/[pcre]/code/tags/pcre-8.37/pcre_jit_compile.c
ViewVC logotype

Diff of /code/tags/pcre-8.37/pcre_jit_compile.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1275 by zherczeg, Sun Mar 10 05:32:10 2013 UTC revision 1435 by zherczeg, Tue Jan 7 07:47:12 2014 UTC
# Line 168  typedef struct jit_arguments { Line 168  typedef struct jit_arguments {
168    pcre_uchar *mark_ptr;    pcre_uchar *mark_ptr;
169    void *callout_data;    void *callout_data;
170    /* Everything else after. */    /* Everything else after. */
171      pcre_uint32 limit_match;
172    int real_offset_count;    int real_offset_count;
173    int offset_count;    int offset_count;
   int call_limit;  
174    pcre_uint8 notbol;    pcre_uint8 notbol;
175    pcre_uint8 noteol;    pcre_uint8 noteol;
176    pcre_uint8 notempty;    pcre_uint8 notempty;
# Line 179  typedef struct jit_arguments { Line 179  typedef struct jit_arguments {
179    
180  typedef struct executable_functions {  typedef struct executable_functions {
181    void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES];    void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES];
182      sljit_uw *read_only_data[JIT_NUMBER_OF_COMPILE_MODES];
183      sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];
184    PUBL(jit_callback) callback;    PUBL(jit_callback) callback;
185    void *userdata;    void *userdata;
186    pcre_uint32 top_bracket;    pcre_uint32 top_bracket;
187    sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];    pcre_uint32 limit_match;
188  } executable_functions;  } executable_functions;
189    
190  typedef struct jump_list {  typedef struct jump_list {
# Line 196  typedef struct stub_list { Line 198  typedef struct stub_list {
198    struct stub_list *next;    struct stub_list *next;
199  } stub_list;  } stub_list;
200    
201    typedef struct label_addr_list {
202      struct sljit_label *label;
203      sljit_uw *addr;
204      struct label_addr_list *next;
205    } label_addr_list;
206    
207  enum frame_types {  enum frame_types {
208    no_frame = -1,    no_frame = -1,
209    no_stack = -2    no_stack = -2
210  };  };
211    
212  enum control_types {  enum control_types {
213    type_commit = 0,    type_mark = 0,
214    type_prune = 1,    type_then_trap = 1
   type_skip = 2  
215  };  };
216    
217  typedef int (SLJIT_CALL *jit_function)(jit_arguments *args);  typedef int (SLJIT_CALL *jit_function)(jit_arguments *args);
218    
219  /* The following structure is the key data type for the recursive  /* The following structure is the key data type for the recursive
220  code generator. It is allocated by compile_matchingpath, and contains  code generator. It is allocated by compile_matchingpath, and contains
221  the aguments for compile_backtrackingpath. Must be the first member  the arguments for compile_backtrackingpath. Must be the first member
222  of its descendants. */  of its descendants. */
223  typedef struct backtrack_common {  typedef struct backtrack_common {
224    /* Concatenation stack. */    /* Concatenation stack. */
# Line 227  typedef struct backtrack_common { Line 234  typedef struct backtrack_common {
234  typedef struct assert_backtrack {  typedef struct assert_backtrack {
235    backtrack_common common;    backtrack_common common;
236    jump_list *condfailed;    jump_list *condfailed;
237    /* Less than 0 (-1) if a frame is not needed. */    /* Less than 0 if a frame is not needed. */
238    int framesize;    int framesize;
239    /* Points to our private memory word on the stack. */    /* Points to our private memory word on the stack. */
240    int private_data_ptr;    int private_data_ptr;
# Line 248  typedef struct bracket_backtrack { Line 255  typedef struct bracket_backtrack {
255      /* Both for OP_COND, OP_SCOND. */      /* Both for OP_COND, OP_SCOND. */
256      jump_list *condfailed;      jump_list *condfailed;
257      assert_backtrack *assert;      assert_backtrack *assert;
258      /* For OP_ONCE. -1 if not needed. */      /* For OP_ONCE. Less than 0 if not needed. */
259      int framesize;      int framesize;
260    } u;    } u;
261    /* Points to our private memory word on the stack. */    /* Points to our private memory word on the stack. */
# Line 283  typedef struct recurse_entry { Line 290  typedef struct recurse_entry {
290    /* Collects the calls until the function is not created. */    /* Collects the calls until the function is not created. */
291    jump_list *calls;    jump_list *calls;
292    /* Points to the starting opcode. */    /* Points to the starting opcode. */
293    int start;    sljit_sw start;
294  } recurse_entry;  } recurse_entry;
295    
296  typedef struct recurse_backtrack {  typedef struct recurse_backtrack {
# Line 291  typedef struct recurse_backtrack { Line 298  typedef struct recurse_backtrack {
298    BOOL inlined_pattern;    BOOL inlined_pattern;
299  } recurse_backtrack;  } recurse_backtrack;
300    
301  #define MAX_RANGE_SIZE 6  #define OP_THEN_TRAP OP_TABLE_LENGTH
302    
303    typedef struct then_trap_backtrack {
304      backtrack_common common;
305      /* If then_trap is not NULL, this structure contains the real
306      then_trap for the backtracking path. */
307      struct then_trap_backtrack *then_trap;
308      /* Points to the starting opcode. */
309      sljit_sw start;
310      /* Exit point for the then opcodes of this alternative. */
311      jump_list *quit;
312      /* Frame size of the current alternative. */
313      int framesize;
314    } then_trap_backtrack;
315    
316    #define MAX_RANGE_SIZE 4
317    
318  typedef struct compiler_common {  typedef struct compiler_common {
319    /* The sljit ceneric compiler. */    /* The sljit ceneric compiler. */
# Line 299  typedef struct compiler_common { Line 321  typedef struct compiler_common {
321    /* First byte code. */    /* First byte code. */
322    pcre_uchar *start;    pcre_uchar *start;
323    /* Maps private data offset to each opcode. */    /* Maps private data offset to each opcode. */
324    int *private_data_ptrs;    sljit_si *private_data_ptrs;
325      /* This read-only data is available during runtime. */
326      sljit_uw *read_only_data;
327      /* The total size of the read-only data. */
328      sljit_uw read_only_data_size;
329      /* The next free entry of the read_only_data. */
330      sljit_uw *read_only_data_ptr;
331    /* Tells whether the capturing bracket is optimized. */    /* Tells whether the capturing bracket is optimized. */
332    pcre_uint8 *optimized_cbracket;    pcre_uint8 *optimized_cbracket;
333      /* Tells whether the starting offset is a target of then. */
334      pcre_uint8 *then_offsets;
335      /* Current position where a THEN must jump. */
336      then_trap_backtrack *then_trap;
337    /* Starting offset of private data for capturing brackets. */    /* Starting offset of private data for capturing brackets. */
338    int cbra_ptr;    int cbra_ptr;
339    /* Output vector starting point. Must be divisible by 2. */    /* Output vector starting point. Must be divisible by 2. */
# Line 330  typedef struct compiler_common { Line 362  typedef struct compiler_common {
362    sljit_sw lcc;    sljit_sw lcc;
363    /* Mode can be PCRE_STUDY_JIT_COMPILE and others. */    /* Mode can be PCRE_STUDY_JIT_COMPILE and others. */
364    int mode;    int mode;
365    /* \K is in the pattern. */    /* TRUE, when minlength is greater than 0. */
366      BOOL might_be_empty;
367      /* \K is found in the pattern. */
368    BOOL has_set_som;    BOOL has_set_som;
369      /* (*SKIP:arg) is found in the pattern. */
370      BOOL has_skip_arg;
371      /* (*THEN) is found in the pattern. */
372      BOOL has_then;
373    /* Needs to know the start position anytime. */    /* Needs to know the start position anytime. */
374    BOOL needs_start_ptr;    BOOL needs_start_ptr;
375    /* Currently in recurse or assert. */    /* Currently in recurse or negative assert. */
376    BOOL local_exit;    BOOL local_exit;
377      /* Currently in a positive assert. */
378      BOOL positive_assert;
379    /* Newline control. */    /* Newline control. */
380    int nltype;    int nltype;
381      pcre_uint32 nlmax;
382      pcre_uint32 nlmin;
383    int newline;    int newline;
384    int bsr_nltype;    int bsr_nltype;
385      pcre_uint32 bsr_nlmax;
386      pcre_uint32 bsr_nlmin;
387    /* Dollar endonly. */    /* Dollar endonly. */
388    int endonly;    int endonly;
389    /* Tables. */    /* Tables. */
390    sljit_sw ctypes;    sljit_sw ctypes;
   int digits[2 + MAX_RANGE_SIZE];  
391    /* Named capturing brackets. */    /* Named capturing brackets. */
392    sljit_uw name_table;    pcre_uchar *name_table;
393    sljit_sw name_count;    sljit_sw name_count;
394    sljit_sw name_entry_size;    sljit_sw name_entry_size;
395    
# Line 356  typedef struct compiler_common { Line 399  typedef struct compiler_common {
399    struct sljit_label *forced_quit_label;    struct sljit_label *forced_quit_label;
400    struct sljit_label *accept_label;    struct sljit_label *accept_label;
401    stub_list *stubs;    stub_list *stubs;
402      label_addr_list *label_addrs;
403    recurse_entry *entries;    recurse_entry *entries;
404    recurse_entry *currententry;    recurse_entry *currententry;
405    jump_list *partialmatch;    jump_list *partialmatch;
406    jump_list *quit;    jump_list *quit;
407      jump_list *positive_assert_quit;
408    jump_list *forced_quit;    jump_list *forced_quit;
409    jump_list *accept;    jump_list *accept;
410    jump_list *calllimit;    jump_list *calllimit;
# Line 378  typedef struct compiler_common { Line 423  typedef struct compiler_common {
423  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
424    BOOL use_ucp;    BOOL use_ucp;
425  #endif  #endif
 #ifndef COMPILE_PCRE32  
   jump_list *utfreadchar;  
 #endif  
426  #ifdef COMPILE_PCRE8  #ifdef COMPILE_PCRE8
427      jump_list *utfreadchar;
428      jump_list *utfreadchar16;
429    jump_list *utfreadtype8;    jump_list *utfreadtype8;
430  #endif  #endif
431  #endif /* SUPPORT_UTF */  #endif /* SUPPORT_UTF */
# Line 438  typedef struct compare_context { Line 482  typedef struct compare_context {
482  #define STACK_TOP     SLJIT_SCRATCH_REG2  #define STACK_TOP     SLJIT_SCRATCH_REG2
483  #define STACK_LIMIT   SLJIT_SAVED_REG3  #define STACK_LIMIT   SLJIT_SAVED_REG3
484  #define ARGUMENTS     SLJIT_SAVED_EREG1  #define ARGUMENTS     SLJIT_SAVED_EREG1
485  #define CALL_COUNT    SLJIT_SAVED_EREG2  #define COUNT_MATCH   SLJIT_SAVED_EREG2
486  #define RETURN_ADDR   SLJIT_TEMPORARY_EREG1  #define RETURN_ADDR   SLJIT_TEMPORARY_EREG1
487    
488  /* Local space layout. */  /* Local space layout. */
# Line 449  typedef struct compare_context { Line 493  typedef struct compare_context {
493  #define POSSESSIVE0      (2 * sizeof(sljit_sw))  #define POSSESSIVE0      (2 * sizeof(sljit_sw))
494  #define POSSESSIVE1      (3 * sizeof(sljit_sw))  #define POSSESSIVE1      (3 * sizeof(sljit_sw))
495  /* Max limit of recursions. */  /* Max limit of recursions. */
496  #define CALL_LIMIT       (4 * sizeof(sljit_sw))  #define LIMIT_MATCH      (4 * sizeof(sljit_sw))
497  /* The output vector is stored on the stack, and contains pointers  /* The output vector is stored on the stack, and contains pointers
498  to characters. The vector data is divided into two groups: the first  to characters. The vector data is divided into two groups: the first
499  group contains the start / end character pointers, and the second is  group contains the start / end character pointers, and the second is
500  the start pointers when the end of the capturing group has not yet reached. */  the start pointers when the end of the capturing group has not yet reached. */
501  #define OVECTOR_START    (common->ovector_start)  #define OVECTOR_START    (common->ovector_start)
502  #define OVECTOR(i)       (OVECTOR_START + (i) * sizeof(sljit_sw))  #define OVECTOR(i)       (OVECTOR_START + (i) * (sljit_sw)sizeof(sljit_sw))
503  #define OVECTOR_PRIV(i)  (common->cbra_ptr + (i) * sizeof(sljit_sw))  #define OVECTOR_PRIV(i)  (common->cbra_ptr + (i) * (sljit_sw)sizeof(sljit_sw))
504  #define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start])  #define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start])
505    
506  #if defined COMPILE_PCRE8  #if defined COMPILE_PCRE8
# Line 498  the start pointers when the end of the c Line 542  the start pointers when the end of the c
542  #define GET_LOCAL_BASE(dst, dstw, offset) \  #define GET_LOCAL_BASE(dst, dstw, offset) \
543    sljit_get_local_base(compiler, (dst), (dstw), (offset))    sljit_get_local_base(compiler, (dst), (dstw), (offset))
544    
545    #define READ_CHAR_MAX 0x7fffffff
546    
547  static pcre_uchar* bracketend(pcre_uchar* cc)  static pcre_uchar* bracketend(pcre_uchar* cc)
548  {  {
549  SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));  SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
# Line 507  cc += 1 + LINK_SIZE; Line 553  cc += 1 + LINK_SIZE;
553  return cc;  return cc;
554  }  }
555    
556    static int no_alternatives(pcre_uchar* cc)
557    {
558    int count = 0;
559    SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
560    do
561      {
562      cc += GET(cc, 1);
563      count++;
564      }
565    while (*cc == OP_ALT);
566    SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
567    return count;
568    }
569    
570    static int ones_in_half_byte[16] = {
571      /* 0 */ 0, 1, 1, 2, /* 4 */ 1, 2, 2, 3,
572      /* 8 */ 1, 2, 2, 3, /* 12 */ 2, 3, 3, 4
573    };
574    
575  /* Functions whose might need modification for all new supported opcodes:  /* Functions whose might need modification for all new supported opcodes:
576   next_opcode   next_opcode
577   get_private_data_length   check_opcode_types
578   set_private_data_ptrs   set_private_data_ptrs
579   get_framesize   get_framesize
580   init_frame   init_frame
581   get_private_data_length_for_copy   get_private_data_copy_length
582   copy_private_data   copy_private_data
583   compile_matchingpath   compile_matchingpath
584   compile_backtrackingpath   compile_backtrackingpath
# Line 559  switch(*cc) Line 624  switch(*cc)
624    case OP_CRMINQUERY:    case OP_CRMINQUERY:
625    case OP_CRRANGE:    case OP_CRRANGE:
626    case OP_CRMINRANGE:    case OP_CRMINRANGE:
627      case OP_CRPOSSTAR:
628      case OP_CRPOSPLUS:
629      case OP_CRPOSQUERY:
630      case OP_CRPOSRANGE:
631    case OP_CLASS:    case OP_CLASS:
632    case OP_NCLASS:    case OP_NCLASS:
633    case OP_REF:    case OP_REF:
634    case OP_REFI:    case OP_REFI:
635      case OP_DNREF:
636      case OP_DNREFI:
637    case OP_RECURSE:    case OP_RECURSE:
638    case OP_CALLOUT:    case OP_CALLOUT:
639    case OP_ALT:    case OP_ALT:
# Line 588  switch(*cc) Line 659  switch(*cc)
659    case OP_SCBRAPOS:    case OP_SCBRAPOS:
660    case OP_SCOND:    case OP_SCOND:
661    case OP_CREF:    case OP_CREF:
662    case OP_NCREF:    case OP_DNCREF:
663    case OP_RREF:    case OP_RREF:
664    case OP_NRREF:    case OP_DNRREF:
665    case OP_DEF:    case OP_DEF:
666    case OP_BRAZERO:    case OP_BRAZERO:
667    case OP_BRAMINZERO:    case OP_BRAMINZERO:
668    case OP_BRAPOSZERO:    case OP_BRAPOSZERO:
669    case OP_PRUNE:    case OP_PRUNE:
670    case OP_SKIP:    case OP_SKIP:
671      case OP_THEN:
672    case OP_COMMIT:    case OP_COMMIT:
673    case OP_FAIL:    case OP_FAIL:
674    case OP_ACCEPT:    case OP_ACCEPT:
# Line 696  switch(*cc) Line 768  switch(*cc)
768    
769    case OP_MARK:    case OP_MARK:
770    case OP_PRUNE_ARG:    case OP_PRUNE_ARG:
771      case OP_SKIP_ARG:
772      case OP_THEN_ARG:
773    return cc + 1 + 2 + cc[1];    return cc + 1 + 2 + cc[1];
774    
775    default:    default:
776      /* All opcodes are supported now! */
777      SLJIT_ASSERT_STOP();
778    return NULL;    return NULL;
779    }    }
780  }  }
781    
782  #define CASE_ITERATOR_PRIVATE_DATA_1 \  static BOOL check_opcode_types(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
     case OP_MINSTAR: \  
     case OP_MINPLUS: \  
     case OP_QUERY: \  
     case OP_MINQUERY: \  
     case OP_MINSTARI: \  
     case OP_MINPLUSI: \  
     case OP_QUERYI: \  
     case OP_MINQUERYI: \  
     case OP_NOTMINSTAR: \  
     case OP_NOTMINPLUS: \  
     case OP_NOTQUERY: \  
     case OP_NOTMINQUERY: \  
     case OP_NOTMINSTARI: \  
     case OP_NOTMINPLUSI: \  
     case OP_NOTQUERYI: \  
     case OP_NOTMINQUERYI:  
   
 #define CASE_ITERATOR_PRIVATE_DATA_2A \  
     case OP_STAR: \  
     case OP_PLUS: \  
     case OP_STARI: \  
     case OP_PLUSI: \  
     case OP_NOTSTAR: \  
     case OP_NOTPLUS: \  
     case OP_NOTSTARI: \  
     case OP_NOTPLUSI:  
   
 #define CASE_ITERATOR_PRIVATE_DATA_2B \  
     case OP_UPTO: \  
     case OP_MINUPTO: \  
     case OP_UPTOI: \  
     case OP_MINUPTOI: \  
     case OP_NOTUPTO: \  
     case OP_NOTMINUPTO: \  
     case OP_NOTUPTOI: \  
     case OP_NOTMINUPTOI:  
   
 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_1 \  
     case OP_TYPEMINSTAR: \  
     case OP_TYPEMINPLUS: \  
     case OP_TYPEQUERY: \  
     case OP_TYPEMINQUERY:  
   
 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2A \  
     case OP_TYPESTAR: \  
     case OP_TYPEPLUS:  
   
 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2B \  
     case OP_TYPEUPTO: \  
     case OP_TYPEMINUPTO:  
   
 static int get_class_iterator_size(pcre_uchar *cc)  
 {  
 switch(*cc)  
   {  
   case OP_CRSTAR:  
   case OP_CRPLUS:  
   return 2;  
   
   case OP_CRMINSTAR:  
   case OP_CRMINPLUS:  
   case OP_CRQUERY:  
   case OP_CRMINQUERY:  
   return 1;  
   
   case OP_CRRANGE:  
   case OP_CRMINRANGE:  
   if (GET2(cc, 1) == GET2(cc, 1 + IMM2_SIZE))  
     return 0;  
   return 2;  
   
   default:  
   return 0;  
   }  
 }  
   
 static int get_private_data_length(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)  
783  {  {
784  int private_data_length = 0;  int count;
785  pcre_uchar *alternative;  pcre_uchar *slot;
 pcre_uchar *name;  
 pcre_uchar *end = NULL;  
 int space, size, i;  
 pcre_uint32 bracketlen;  
786    
787  /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */  /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
788  while (cc < ccend)  while (cc < ccend)
789    {    {
   space = 0;  
   size = 0;  
   bracketlen = 0;  
790    switch(*cc)    switch(*cc)
791      {      {
792      case OP_SET_SOM:      case OP_SET_SOM:
793      common->has_set_som = TRUE;      common->has_set_som = TRUE;
794        common->might_be_empty = TRUE;
795      cc += 1;      cc += 1;
796      break;      break;
797    
# Line 808  while (cc < ccend) Line 801  while (cc < ccend)
801      cc += 1 + IMM2_SIZE;      cc += 1 + IMM2_SIZE;
802      break;      break;
803    
804      case OP_ASSERT:      case OP_BRA:
805      case OP_ASSERT_NOT:      case OP_CBRA:
     case OP_ASSERTBACK:  
     case OP_ASSERTBACK_NOT:  
     case OP_ONCE:  
     case OP_ONCE_NC:  
     case OP_BRAPOS:  
806      case OP_SBRA:      case OP_SBRA:
807      case OP_SBRAPOS:      case OP_SCBRA:
808      private_data_length += sizeof(sljit_sw);      count = no_alternatives(cc);
809      bracketlen = 1 + LINK_SIZE;      if (count > 4)
810          common->read_only_data_size += count * sizeof(sljit_uw);
811        cc += 1 + LINK_SIZE + (*cc == OP_CBRA || *cc == OP_SCBRA ? IMM2_SIZE : 0);
812      break;      break;
813    
814      case OP_CBRAPOS:      case OP_CBRAPOS:
815      case OP_SCBRAPOS:      case OP_SCBRAPOS:
     private_data_length += sizeof(sljit_sw);  
816      common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] = 0;      common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] = 0;
817      bracketlen = 1 + LINK_SIZE + IMM2_SIZE;      cc += 1 + LINK_SIZE + IMM2_SIZE;
818      break;      break;
819    
820      case OP_COND:      case OP_COND:
# Line 833  while (cc < ccend) Line 822  while (cc < ccend)
822      /* Only AUTO_CALLOUT can insert this opcode. We do      /* Only AUTO_CALLOUT can insert this opcode. We do
823         not intend to support this case. */         not intend to support this case. */
824      if (cc[1 + LINK_SIZE] == OP_CALLOUT)      if (cc[1 + LINK_SIZE] == OP_CALLOUT)
825        return -1;        return FALSE;
826        cc += 1 + LINK_SIZE;
     if (*cc == OP_COND)  
       {  
       /* Might be a hidden SCOND. */  
       alternative = cc + GET(cc, 1);  
       if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)  
         private_data_length += sizeof(sljit_sw);  
       }  
     else  
       private_data_length += sizeof(sljit_sw);  
     bracketlen = 1 + LINK_SIZE;  
827      break;      break;
828    
829      case OP_CREF:      case OP_CREF:
830      i = GET2(cc, 1);      common->optimized_cbracket[GET2(cc, 1)] = 0;
     common->optimized_cbracket[i] = 0;  
831      cc += 1 + IMM2_SIZE;      cc += 1 + IMM2_SIZE;
832      break;      break;
833    
834      case OP_NCREF:      case OP_DNREF:
835      bracketlen = GET2(cc, 1);      case OP_DNREFI:
836      name = (pcre_uchar *)common->name_table;      case OP_DNCREF:
837      alternative = name;      count = GET2(cc, 1 + IMM2_SIZE);
838      for (i = 0; i < common->name_count; i++)      slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
839        {      while (count-- > 0)
       if (GET2(name, 0) == bracketlen) break;  
       name += common->name_entry_size;  
       }  
     SLJIT_ASSERT(i != common->name_count);  
   
     for (i = 0; i < common->name_count; i++)  
840        {        {
841        if (STRCMP_UC_UC(alternative + IMM2_SIZE, name + IMM2_SIZE) == 0)        common->optimized_cbracket[GET2(slot, 0)] = 0;
842          common->optimized_cbracket[GET2(alternative, 0)] = 0;        slot += common->name_entry_size;
       alternative += common->name_entry_size;  
843        }        }
844      bracketlen = 0;      cc += 1 + 2 * IMM2_SIZE;
     cc += 1 + IMM2_SIZE;  
     break;  
   
     case OP_BRA:  
     bracketlen = 1 + LINK_SIZE;  
     break;  
   
     case OP_CBRA:  
     case OP_SCBRA:  
     bracketlen = 1 + LINK_SIZE + IMM2_SIZE;  
     break;  
   
     CASE_ITERATOR_PRIVATE_DATA_1  
     space = 1;  
     size = -2;  
     break;  
   
     CASE_ITERATOR_PRIVATE_DATA_2A  
     space = 2;  
     size = -2;  
     break;  
   
     CASE_ITERATOR_PRIVATE_DATA_2B  
     space = 2;  
     size = -(2 + IMM2_SIZE);  
     break;  
   
     CASE_ITERATOR_TYPE_PRIVATE_DATA_1  
     space = 1;  
     size = 1;  
     break;  
   
     CASE_ITERATOR_TYPE_PRIVATE_DATA_2A  
     if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)  
       space = 2;  
     size = 1;  
     break;  
   
     CASE_ITERATOR_TYPE_PRIVATE_DATA_2B  
     if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)  
       space = 2;  
     size = 1 + IMM2_SIZE;  
     break;  
   
     case OP_CLASS:  
     case OP_NCLASS:  
     size += 1 + 32 / sizeof(pcre_uchar);  
     space = get_class_iterator_size(cc + size);  
845      break;      break;
846    
 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8  
     case OP_XCLASS:  
     size = GET(cc, 1);  
     space = get_class_iterator_size(cc + size);  
     break;  
 #endif  
   
847      case OP_RECURSE:      case OP_RECURSE:
848      /* Set its value only once. */      /* Set its value only once. */
849      if (common->recursive_head_ptr == 0)      if (common->recursive_head_ptr == 0)
# Line 947  while (cc < ccend) Line 863  while (cc < ccend)
863      cc += 2 + 2 * LINK_SIZE;      cc += 2 + 2 * LINK_SIZE;
864      break;      break;
865    
866        case OP_THEN_ARG:
867        common->has_then = TRUE;
868        common->control_head_ptr = 1;
869        /* Fall through. */
870    
871      case OP_PRUNE_ARG:      case OP_PRUNE_ARG:
872      common->needs_start_ptr = TRUE;      common->needs_start_ptr = TRUE;
     common->control_head_ptr = 1;  
873      /* Fall through. */      /* Fall through. */
874    
875      case OP_MARK:      case OP_MARK:
# Line 961  while (cc < ccend) Line 881  while (cc < ccend)
881      cc += 1 + 2 + cc[1];      cc += 1 + 2 + cc[1];
882      break;      break;
883    
884        case OP_THEN:
885        common->has_then = TRUE;
886        common->control_head_ptr = 1;
887        /* Fall through. */
888    
889      case OP_PRUNE:      case OP_PRUNE:
890      case OP_SKIP:      case OP_SKIP:
891      common->needs_start_ptr = TRUE;      common->needs_start_ptr = TRUE;
892      /* Fall through. */      cc += 1;
893        break;
894    
895      case OP_COMMIT:      case OP_SKIP_ARG:
896      common->control_head_ptr = 1;      common->control_head_ptr = 1;
897      cc += 1;      common->has_skip_arg = TRUE;
898        cc += 1 + 2 + cc[1];
899      break;      break;
900    
901      default:      default:
902      cc = next_opcode(common, cc);      cc = next_opcode(common, cc);
903      if (cc == NULL)      if (cc == NULL)
904        return -1;        return FALSE;
905      break;      break;
906      }      }
907      }
908    return TRUE;
909    }
910    
911    if (space > 0 && cc >= end)  static int get_class_iterator_size(pcre_uchar *cc)
912      private_data_length += sizeof(sljit_sw) * space;  {
913    switch(*cc)
914      {
915      case OP_CRSTAR:
916      case OP_CRPLUS:
917      return 2;
918    
919    if (size != 0)    case OP_CRMINSTAR:
920      {    case OP_CRMINPLUS:
921      if (size < 0)    case OP_CRQUERY:
922        {    case OP_CRMINQUERY:
923        cc += -size;    return 1;
 #ifdef SUPPORT_UTF  
       if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);  
 #endif  
       }  
     else  
       cc += size;  
     }  
924    
925    if (bracketlen != 0)    case OP_CRRANGE:
926      {    case OP_CRMINRANGE:
927      if (cc >= end)    if (GET2(cc, 1) == GET2(cc, 1 + IMM2_SIZE))
928        {      return 0;
929        end = bracketend(cc);    return 2;
930        if (end[-1 - LINK_SIZE] == OP_KET)  
931          end = NULL;    default:
932        }    return 0;
     cc += bracketlen;  
     }  
933    }    }
 return private_data_length;  
934  }  }
935    
936  static void set_private_data_ptrs(compiler_common *common, int private_data_ptr, pcre_uchar *ccend)  static BOOL detect_repeat(compiler_common *common, pcre_uchar *begin)
937  {  {
938  pcre_uchar *cc = common->start;  pcre_uchar *end = bracketend(begin);
939  pcre_uchar *alternative;  pcre_uchar *next;
940  pcre_uchar *end = NULL;  pcre_uchar *next_end;
941  int space, size, bracketlen;  pcre_uchar *max_end;
942    pcre_uchar type;
943    sljit_sw length = end - begin;
944    int min, max, i;
945    
946    /* Detect fixed iterations first. */
947    if (end[-(1 + LINK_SIZE)] != OP_KET)
948      return FALSE;
949    
950    /* Already detected repeat. */
951    if (common->private_data_ptrs[end - common->start - LINK_SIZE] != 0)
952      return TRUE;
953    
954    next = end;
955    min = 1;
956    while (1)
957      {
958      if (*next != *begin)
959        break;
960      next_end = bracketend(next);
961      if (next_end - next != length || memcmp(begin, next, IN_UCHARS(length)) != 0)
962        break;
963      next = next_end;
964      min++;
965      }
966    
967    if (min == 2)
968      return FALSE;
969    
970    max = 0;
971    max_end = next;
972    if (*next == OP_BRAZERO || *next == OP_BRAMINZERO)
973      {
974      type = *next;
975      while (1)
976        {
977        if (next[0] != type || next[1] != OP_BRA || next[2 + LINK_SIZE] != *begin)
978          break;
979        next_end = bracketend(next + 2 + LINK_SIZE);
980        if (next_end - next != (length + 2 + LINK_SIZE) || memcmp(begin, next + 2 + LINK_SIZE, IN_UCHARS(length)) != 0)
981          break;
982        next = next_end;
983        max++;
984        }
985    
986      if (next[0] == type && next[1] == *begin && max >= 1)
987        {
988        next_end = bracketend(next + 1);
989        if (next_end - next == (length + 1) && memcmp(begin, next + 1, IN_UCHARS(length)) == 0)
990          {
991          for (i = 0; i < max; i++, next_end += 1 + LINK_SIZE)
992            if (*next_end != OP_KET)
993              break;
994    
995          if (i == max)
996            {
997            common->private_data_ptrs[max_end - common->start - LINK_SIZE] = next_end - max_end;
998            common->private_data_ptrs[max_end - common->start - LINK_SIZE + 1] = (type == OP_BRAZERO) ? OP_UPTO : OP_MINUPTO;
999            /* +2 the original and the last. */
1000            common->private_data_ptrs[max_end - common->start - LINK_SIZE + 2] = max + 2;
1001            if (min == 1)
1002              return TRUE;
1003            min--;
1004            max_end -= (1 + LINK_SIZE) + GET(max_end, -LINK_SIZE);
1005            }
1006          }
1007        }
1008      }
1009    
1010    if (min >= 3)
1011      {
1012      common->private_data_ptrs[end - common->start - LINK_SIZE] = max_end - end;
1013      common->private_data_ptrs[end - common->start - LINK_SIZE + 1] = OP_EXACT;
1014      common->private_data_ptrs[end - common->start - LINK_SIZE + 2] = min;
1015      return TRUE;
1016      }
1017    
1018    return FALSE;
1019    }
1020    
1021    #define CASE_ITERATOR_PRIVATE_DATA_1 \
1022        case OP_MINSTAR: \
1023        case OP_MINPLUS: \
1024        case OP_QUERY: \
1025        case OP_MINQUERY: \
1026        case OP_MINSTARI: \
1027        case OP_MINPLUSI: \
1028        case OP_QUERYI: \
1029        case OP_MINQUERYI: \
1030        case OP_NOTMINSTAR: \
1031        case OP_NOTMINPLUS: \
1032        case OP_NOTQUERY: \
1033        case OP_NOTMINQUERY: \
1034        case OP_NOTMINSTARI: \
1035        case OP_NOTMINPLUSI: \
1036        case OP_NOTQUERYI: \
1037        case OP_NOTMINQUERYI:
1038    
1039    #define CASE_ITERATOR_PRIVATE_DATA_2A \
1040        case OP_STAR: \
1041        case OP_PLUS: \
1042        case OP_STARI: \
1043        case OP_PLUSI: \
1044        case OP_NOTSTAR: \
1045        case OP_NOTPLUS: \
1046        case OP_NOTSTARI: \
1047        case OP_NOTPLUSI:
1048    
1049    #define CASE_ITERATOR_PRIVATE_DATA_2B \
1050        case OP_UPTO: \
1051        case OP_MINUPTO: \
1052        case OP_UPTOI: \
1053        case OP_MINUPTOI: \
1054        case OP_NOTUPTO: \
1055        case OP_NOTMINUPTO: \
1056        case OP_NOTUPTOI: \
1057        case OP_NOTMINUPTOI:
1058    
1059    #define CASE_ITERATOR_TYPE_PRIVATE_DATA_1 \
1060        case OP_TYPEMINSTAR: \
1061        case OP_TYPEMINPLUS: \
1062        case OP_TYPEQUERY: \
1063        case OP_TYPEMINQUERY:
1064    
1065    #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2A \
1066        case OP_TYPESTAR: \
1067        case OP_TYPEPLUS:
1068    
1069    #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2B \
1070        case OP_TYPEUPTO: \
1071        case OP_TYPEMINUPTO:
1072    
1073    static void set_private_data_ptrs(compiler_common *common, int *private_data_start, pcre_uchar *ccend)
1074    {
1075    pcre_uchar *cc = common->start;
1076    pcre_uchar *alternative;
1077    pcre_uchar *end = NULL;
1078    int private_data_ptr = *private_data_start;
1079    int space, size, bracketlen;
1080    
1081  while (cc < ccend)  while (cc < ccend)
1082    {    {
1083    space = 0;    space = 0;
1084    size = 0;    size = 0;
1085    bracketlen = 0;    bracketlen = 0;
1086      if (private_data_ptr > SLJIT_MAX_LOCAL_SIZE)
1087        return;
1088    
1089      if (*cc == OP_ONCE || *cc == OP_ONCE_NC || *cc == OP_BRA || *cc == OP_CBRA || *cc == OP_COND)
1090        if (detect_repeat(common, cc))
1091          {
1092          /* These brackets are converted to repeats, so no global
1093          based single character repeat is allowed. */
1094          if (cc >= end)
1095            end = bracketend(cc);
1096          }
1097    
1098    switch(*cc)    switch(*cc)
1099      {      {
1100        case OP_KET:
1101        if (common->private_data_ptrs[cc + 1 - common->start] != 0)
1102          {
1103          common->private_data_ptrs[cc - common->start] = private_data_ptr;
1104          private_data_ptr += sizeof(sljit_sw);
1105          cc += common->private_data_ptrs[cc + 1 - common->start];
1106          }
1107        cc += 1 + LINK_SIZE;
1108        break;
1109    
1110      case OP_ASSERT:      case OP_ASSERT:
1111      case OP_ASSERT_NOT:      case OP_ASSERT_NOT:
1112      case OP_ASSERTBACK:      case OP_ASSERTBACK:
# Line 1115  while (cc < ccend) Line 1200  while (cc < ccend)
1200      break;      break;
1201      }      }
1202    
1203      /* Character iterators, which are not inside a repeated bracket,
1204         gets a private slot instead of allocating it on the stack. */
1205    if (space > 0 && cc >= end)    if (space > 0 && cc >= end)
1206      {      {
1207      common->private_data_ptrs[cc - common->start] = private_data_ptr;      common->private_data_ptrs[cc - common->start] = private_data_ptr;
# Line 1145  while (cc < ccend) Line 1232  while (cc < ccend)
1232      cc += bracketlen;      cc += bracketlen;
1233      }      }
1234    }    }
1235    *private_data_start = private_data_ptr;
1236  }  }
1237    
1238  /* Returns with a frame_types (always < 0) if no need for frame. */  /* Returns with a frame_types (always < 0) if no need for frame. */
1239  static int get_framesize(compiler_common *common, pcre_uchar *cc, BOOL recursive)  static int get_framesize(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL recursive, BOOL* needs_control_head)
1240  {  {
 pcre_uchar *ccend = bracketend(cc) - (1 + LINK_SIZE);  
1241  int length = 0;  int length = 0;
1242  int possessive = 0;  int possessive = 0;
1243  BOOL stack_restore = FALSE;  BOOL stack_restore = FALSE;
# Line 1159  BOOL setmark_found = recursive; Line 1246  BOOL setmark_found = recursive;
1246  /* The last capture is a local variable even for recursions. */  /* The last capture is a local variable even for recursions. */
1247  BOOL capture_last_found = FALSE;  BOOL capture_last_found = FALSE;
1248    
1249  if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))  #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
1250    SLJIT_ASSERT(common->control_head_ptr != 0);
1251    *needs_control_head = TRUE;
1252    #else
1253    *needs_control_head = FALSE;
1254    #endif
1255    
1256    if (ccend == NULL)
1257    {    {
1258    possessive = length = (common->capture_last_ptr != 0) ? 5 : 3;    ccend = bracketend(cc) - (1 + LINK_SIZE);
1259    /* This is correct regardless of common->capture_last_ptr. */    if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))
1260    capture_last_found = TRUE;      {
1261        possessive = length = (common->capture_last_ptr != 0) ? 5 : 3;
1262        /* This is correct regardless of common->capture_last_ptr. */
1263        capture_last_found = TRUE;
1264        }
1265      cc = next_opcode(common, cc);
1266    }    }
1267    
 cc = next_opcode(common, cc);  
1268  SLJIT_ASSERT(cc != NULL);  SLJIT_ASSERT(cc != NULL);
1269  while (cc < ccend)  while (cc < ccend)
1270    switch(*cc)    switch(*cc)
# Line 1184  while (cc < ccend) Line 1282  while (cc < ccend)
1282    
1283      case OP_MARK:      case OP_MARK:
1284      case OP_PRUNE_ARG:      case OP_PRUNE_ARG:
1285        case OP_THEN_ARG:
1286      SLJIT_ASSERT(common->mark_ptr != 0);      SLJIT_ASSERT(common->mark_ptr != 0);
1287      stack_restore = TRUE;      stack_restore = TRUE;
1288      if (!setmark_found)      if (!setmark_found)
# Line 1191  while (cc < ccend) Line 1290  while (cc < ccend)
1290        length += 2;        length += 2;
1291        setmark_found = TRUE;        setmark_found = TRUE;
1292        }        }
1293        if (common->control_head_ptr != 0)
1294          *needs_control_head = TRUE;
1295      cc += 1 + 2 + cc[1];      cc += 1 + 2 + cc[1];
1296      break;      break;
1297    
# Line 1310  if (length > 0) Line 1411  if (length > 0)
1411  return stack_restore ? no_frame : no_stack;  return stack_restore ? no_frame : no_stack;
1412  }  }
1413    
1414  static void init_frame(compiler_common *common, pcre_uchar *cc, int stackpos, int stacktop, BOOL recursive)  static void init_frame(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, int stackpos, int stacktop, BOOL recursive)
1415  {  {
1416  DEFINE_COMPILER;  DEFINE_COMPILER;
 pcre_uchar *ccend = bracketend(cc) - (1 + LINK_SIZE);  
1417  BOOL setsom_found = recursive;  BOOL setsom_found = recursive;
1418  BOOL setmark_found = recursive;  BOOL setmark_found = recursive;
1419  /* The last capture is a local variable even for recursions. */  /* The last capture is a local variable even for recursions. */
# Line 1325  SLJIT_UNUSED_ARG(stacktop); Line 1425  SLJIT_UNUSED_ARG(stacktop);
1425  SLJIT_ASSERT(stackpos >= stacktop + 2);  SLJIT_ASSERT(stackpos >= stacktop + 2);
1426    
1427  stackpos = STACK(stackpos);  stackpos = STACK(stackpos);
1428  if (recursive || (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS))  if (ccend == NULL)
1429    cc = next_opcode(common, cc);    {
1430      ccend = bracketend(cc) - (1 + LINK_SIZE);
1431      if (recursive || (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS))
1432        cc = next_opcode(common, cc);
1433      }
1434    
1435  SLJIT_ASSERT(cc != NULL);  SLJIT_ASSERT(cc != NULL);
1436  while (cc < ccend)  while (cc < ccend)
1437    switch(*cc)    switch(*cc)
# Line 1347  while (cc < ccend) Line 1452  while (cc < ccend)
1452    
1453      case OP_MARK:      case OP_MARK:
1454      case OP_PRUNE_ARG:      case OP_PRUNE_ARG:
1455        case OP_THEN_ARG:
1456      SLJIT_ASSERT(common->mark_ptr != 0);      SLJIT_ASSERT(common->mark_ptr != 0);
1457      if (!setmark_found)      if (!setmark_found)
1458        {        {
# Line 1427  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), st Line 1533  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), st
1533  SLJIT_ASSERT(stackpos == STACK(stacktop));  SLJIT_ASSERT(stackpos == STACK(stacktop));
1534  }  }
1535    
1536  static SLJIT_INLINE int get_private_data_length_for_copy(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)  static SLJIT_INLINE int get_private_data_copy_length(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL needs_control_head)
1537  {  {
1538  int private_data_length = common->control_head_ptr ? 3 : 2;  int private_data_length = needs_control_head ? 3 : 2;
1539  int size;  int size;
1540  pcre_uchar *alternative;  pcre_uchar *alternative;
1541  /* Calculate the sum of the private machine words. */  /* Calculate the sum of the private machine words. */
# Line 1438  while (cc < ccend) Line 1544  while (cc < ccend)
1544    size = 0;    size = 0;
1545    switch(*cc)    switch(*cc)
1546      {      {
1547        case OP_KET:
1548        if (PRIVATE_DATA(cc) != 0)
1549          private_data_length++;
1550        cc += 1 + LINK_SIZE;
1551        break;
1552    
1553      case OP_ASSERT:      case OP_ASSERT:
1554      case OP_ASSERT_NOT:      case OP_ASSERT_NOT:
1555      case OP_ASSERTBACK:      case OP_ASSERTBACK:
# Line 1542  return private_data_length; Line 1654  return private_data_length;
1654  }  }
1655    
1656  static void copy_private_data(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend,  static void copy_private_data(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend,
1657    BOOL save, int stackptr, int stacktop)    BOOL save, int stackptr, int stacktop, BOOL needs_control_head)
1658  {  {
1659  DEFINE_COMPILER;  DEFINE_COMPILER;
1660  int srcw[2];  int srcw[2];
# Line 1563  stacktop = STACK(stacktop - 1); Line 1675  stacktop = STACK(stacktop - 1);
1675    
1676  if (!save)  if (!save)
1677    {    {
1678    stackptr += (common->control_head_ptr ? 2 : 1) * sizeof(sljit_sw);    stackptr += (needs_control_head ? 2 : 1) * sizeof(sljit_sw);
1679    if (stackptr < stacktop)    if (stackptr < stacktop)
1680      {      {
1681      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
# Line 1588  do Line 1700  do
1700      SLJIT_ASSERT(save && common->recursive_head_ptr != 0);      SLJIT_ASSERT(save && common->recursive_head_ptr != 0);
1701      count = 1;      count = 1;
1702      srcw[0] = common->recursive_head_ptr;      srcw[0] = common->recursive_head_ptr;
1703      if (common->control_head_ptr != 0)      if (needs_control_head)
1704        {        {
1705          SLJIT_ASSERT(common->control_head_ptr != 0);
1706        count = 2;        count = 2;
1707        srcw[1] = common->control_head_ptr;        srcw[1] = common->control_head_ptr;
1708        }        }
# Line 1605  do Line 1718  do
1718    
1719      switch(*cc)      switch(*cc)
1720        {        {
1721          case OP_KET:
1722          if (PRIVATE_DATA(cc) != 0)
1723            {
1724            count = 1;
1725            srcw[0] = PRIVATE_DATA(cc);
1726            }
1727          cc += 1 + LINK_SIZE;
1728          break;
1729    
1730        case OP_ASSERT:        case OP_ASSERT:
1731        case OP_ASSERT_NOT:        case OP_ASSERT_NOT:
1732        case OP_ASSERTBACK:        case OP_ASSERTBACK:
# Line 1851  if (save) Line 1973  if (save)
1973  SLJIT_ASSERT(cc == ccend && stackptr == stacktop && (save || (tmp1empty && tmp2empty)));  SLJIT_ASSERT(cc == ccend && stackptr == stacktop && (save || (tmp1empty && tmp2empty)));
1974  }  }
1975    
1976    static SLJIT_INLINE pcre_uchar *set_then_offsets(compiler_common *common, pcre_uchar *cc, pcre_uint8 *current_offset)
1977    {
1978    pcre_uchar *end = bracketend(cc);
1979    BOOL has_alternatives = cc[GET(cc, 1)] == OP_ALT;
1980    
1981    /* Assert captures then. */
1982    if (*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT)
1983      current_offset = NULL;
1984    /* Conditional block does not. */
1985    if (*cc == OP_COND || *cc == OP_SCOND)
1986      has_alternatives = FALSE;
1987    
1988    cc = next_opcode(common, cc);
1989    if (has_alternatives)
1990      current_offset = common->then_offsets + (cc - common->start);
1991    
1992    while (cc < end)
1993      {
1994      if ((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND))
1995        cc = set_then_offsets(common, cc, current_offset);
1996      else
1997        {
1998        if (*cc == OP_ALT && has_alternatives)
1999          current_offset = common->then_offsets + (cc + 1 + LINK_SIZE - common->start);
2000        if (*cc >= OP_THEN && *cc <= OP_THEN_ARG && current_offset != NULL)
2001          *current_offset = 1;
2002        cc = next_opcode(common, cc);
2003        }
2004      }
2005    
2006    return end;
2007    }
2008    
2009  #undef CASE_ITERATOR_PRIVATE_DATA_1  #undef CASE_ITERATOR_PRIVATE_DATA_1
2010  #undef CASE_ITERATOR_PRIVATE_DATA_2A  #undef CASE_ITERATOR_PRIVATE_DATA_2A
2011  #undef CASE_ITERATOR_PRIVATE_DATA_2B  #undef CASE_ITERATOR_PRIVATE_DATA_2B
# Line 1914  while (list_item) Line 2069  while (list_item)
2069  common->stubs = NULL;  common->stubs = NULL;
2070  }  }
2071    
2072  static SLJIT_INLINE void decrease_call_count(compiler_common *common)  static void add_label_addr(compiler_common *common)
2073    {
2074    DEFINE_COMPILER;
2075    label_addr_list *label_addr;
2076    
2077    label_addr = sljit_alloc_memory(compiler, sizeof(label_addr_list));
2078    if (label_addr == NULL)
2079      return;
2080    label_addr->label = LABEL();
2081    label_addr->addr = common->read_only_data_ptr;
2082    label_addr->next = common->label_addrs;
2083    common->label_addrs = label_addr;
2084    common->read_only_data_ptr++;
2085    }
2086    
2087    static SLJIT_INLINE void count_match(compiler_common *common)
2088  {  {
2089  DEFINE_COMPILER;  DEFINE_COMPILER;
2090    
2091  OP2(SLJIT_SUB | SLJIT_SET_E, CALL_COUNT, 0, CALL_COUNT, 0, SLJIT_IMM, 1);  OP2(SLJIT_SUB | SLJIT_SET_E, COUNT_MATCH, 0, COUNT_MATCH, 0, SLJIT_IMM, 1);
2092  add_jump(compiler, &common->calllimit, JUMP(SLJIT_C_ZERO));  add_jump(compiler, &common->calllimit, JUMP(SLJIT_C_ZERO));
2093  }  }
2094    
# Line 1998  else Line 2168  else
2168  OP1(SLJIT_MOV, STACK_TOP, 0, ARGUMENTS, 0);  OP1(SLJIT_MOV, STACK_TOP, 0, ARGUMENTS, 0);
2169  if (common->mark_ptr != 0)  if (common->mark_ptr != 0)
2170    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr, SLJIT_IMM, 0);    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr, SLJIT_IMM, 0);
2171  SLJIT_ASSERT(common->control_head_ptr != 0);  if (common->control_head_ptr != 0)
2172  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_IMM, 0);    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_IMM, 0);
2173  OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(jit_arguments, stack));  OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(jit_arguments, stack));
2174  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_ptr);  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_ptr);
2175  OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(struct sljit_stack, base));  OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(struct sljit_stack, base));
2176  }  }
2177    
2178  static sljit_sw do_check_control_chain(sljit_sw *current)  static sljit_sw SLJIT_CALL do_search_mark(sljit_sw *current, const pcre_uchar *skip_arg)
2179  {  {
2180  sljit_sw return_value = 0;  while (current != NULL)
   
 SLJIT_ASSERT(current != NULL);  
 do  
2181    {    {
2182    switch (current[-2])    switch (current[-2])
2183      {      {
2184      case type_commit:      case type_then_trap:
     /* Commit overwrites all. */  
     return -1;  
   
     case type_prune:  
2185      break;      break;
2186    
2187      case type_skip:      case type_mark:
2188      /* Overwrites prune, but not other skips. */      if (STRCMP_UC_UC(skip_arg, (pcre_uchar *)current[-3]) == 0)
2189      if (return_value == 0)        return current[-4];
       return_value = current[-3];  
2190      break;      break;
2191    
2192      default:      default:
# Line 2033  do Line 2195  do
2195      }      }
2196    current = (sljit_sw*)current[-1];    current = (sljit_sw*)current[-1];
2197    }    }
2198  while (current != NULL);  return -1;
 return return_value;  
2199  }  }
2200    
2201  static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)  static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)
# Line 2106  OP1(SLJIT_MOV, SLJIT_SAVED_REG1, 0, SLJI Line 2267  OP1(SLJIT_MOV, SLJIT_SAVED_REG1, 0, SLJI
2267  OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, offsets));  OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, offsets));
2268    
2269  jump = CMP(SLJIT_C_SIG_LESS, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 3);  jump = CMP(SLJIT_C_SIG_LESS, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 3);
2270  OP2(SLJIT_SUB, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_ptr : (common->hit_start + sizeof(sljit_sw)), SLJIT_SAVED_REG1, 0);  OP2(SLJIT_SUB, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_ptr : (common->hit_start + (int)sizeof(sljit_sw)), SLJIT_SAVED_REG1, 0);
2271  #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32  #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2272  OP2(SLJIT_ASHR, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, UCHAR_SHIFT);  OP2(SLJIT_ASHR, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, UCHAR_SHIFT);
2273  #endif  #endif
# Line 2274  return (bit < 256) ? ((0 << 8) | bit) : Line 2435  return (bit < 256) ? ((0 << 8) | bit) :
2435    
2436  static void check_partial(compiler_common *common, BOOL force)  static void check_partial(compiler_common *common, BOOL force)
2437  {  {
2438  /* Checks whether a partial matching is occured. Does not modify registers. */  /* Checks whether a partial matching is occurred. Does not modify registers. */
2439  DEFINE_COMPILER;  DEFINE_COMPILER;
2440  struct sljit_jump *jump = NULL;  struct sljit_jump *jump = NULL;
2441    
# Line 2361  else Line 2522  else
2522  JUMPHERE(jump);  JUMPHERE(jump);
2523  }  }
2524    
2525  static void read_char(compiler_common *common)  static void peek_char(compiler_common *common, pcre_uint32 max)
2526  {  {
2527  /* Reads the character into TMP1, updates STR_PTR.  /* Reads the character into TMP1, keeps STR_PTR.
2528  Does not check STR_END. TMP2 Destroyed. */  Does not check STR_END. TMP2 Destroyed. */
2529  DEFINE_COMPILER;  DEFINE_COMPILER;
2530  #if defined SUPPORT_UTF && !defined COMPILE_PCRE32  #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2531  struct sljit_jump *jump;  struct sljit_jump *jump;
2532  #endif  #endif
2533    
2534    SLJIT_UNUSED_ARG(max);
2535    
2536  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2537  #if defined SUPPORT_UTF && !defined COMPILE_PCRE32  #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2538  if (common->utf)  if (common->utf)
2539    {    {
2540  #if defined COMPILE_PCRE8    if (max < 128) return;
2541    
2542    jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);    jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2543  #elif defined COMPILE_PCRE16    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
   jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);  
 #endif /* COMPILE_PCRE[8|16] */  
2544    add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));    add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2545      OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2546    JUMPHERE(jump);    JUMPHERE(jump);
2547    }    }
2548  #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */  #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2549  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));  
2550    #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2551    if (common->utf)
2552      {
2553      if (max < 0xd800) return;
2554    
2555      OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2556      jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2557      /* TMP2 contains the high surrogate. */
2558      OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2559      OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x40);
2560      OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
2561      OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
2562      OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2563      JUMPHERE(jump);
2564      }
2565    #endif
2566  }  }
2567    
2568  static void peek_char(compiler_common *common)  #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2569    
2570    static BOOL is_char7_bitset(const pcre_uint8* bitset, BOOL nclass)
2571  {  {
2572  /* Reads the character into TMP1, keeps STR_PTR.  /* Tells whether the character codes below 128 are enough
2573  Does not check STR_END. TMP2 Destroyed. */  to determine a match. */
2574  DEFINE_COMPILER;  const pcre_uint8 value = nclass ? 0xff : 0;
2575  #if defined SUPPORT_UTF && !defined COMPILE_PCRE32  const pcre_uint8* end = bitset + 32;
 struct sljit_jump *jump;  
 #endif  
2576    
2577  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);  bitset += 16;
2578  #if defined SUPPORT_UTF && !defined COMPILE_PCRE32  do
 if (common->utf)  
2579    {    {
2580  #if defined COMPILE_PCRE8    if (*bitset++ != value)
2581    jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);      return FALSE;
 #elif defined COMPILE_PCRE16  
   jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);  
 #endif /* COMPILE_PCRE[8|16] */  
   add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));  
   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);  
   JUMPHERE(jump);  
2582    }    }
2583  #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */  while (bitset < end);
2584    return TRUE;
2585  }  }
2586    
2587  static void read_char8_type(compiler_common *common)  static void read_char7_type(compiler_common *common, BOOL full_read)
2588  {  {
2589  /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */  /* Reads the precise character type of a character into TMP1, if the character
2590    is less than 128. Otherwise it returns with zero. Does not check STR_END. The
2591    full_read argument tells whether characters above max are accepted or not. */
2592  DEFINE_COMPILER;  DEFINE_COMPILER;
 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32  
2593  struct sljit_jump *jump;  struct sljit_jump *jump;
 #endif  
2594    
2595  #ifdef SUPPORT_UTF  SLJIT_ASSERT(common->utf);
2596  if (common->utf)  
2597    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2598    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2599    
2600    OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2601    
2602    if (full_read)
2603    {    {
   OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);  
   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));  
 #if defined COMPILE_PCRE8  
   /* This can be an extra read in some situations, but hopefully  
   it is needed in most cases. */  
   OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);  
2604    jump = CMP(SLJIT_C_LESS, TMP2, 0, SLJIT_IMM, 0xc0);    jump = CMP(SLJIT_C_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
2605    add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));    OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
   JUMPHERE(jump);  
 #elif defined COMPILE_PCRE16  
   OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);  
   jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);  
   OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);  
   JUMPHERE(jump);  
   /* Skip low surrogate if necessary. */  
   OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xfc00);  
   OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0xd800);  
   OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);  
   OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);  
2606    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
 #elif defined COMPILE_PCRE32  
   OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);  
   jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);  
   OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);  
2607    JUMPHERE(jump);    JUMPHERE(jump);
 #endif /* COMPILE_PCRE[8|16|32] */  
   return;  
2608    }    }
 #endif /* SUPPORT_UTF */  
 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);  
 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));  
 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32  
 /* The ctypes array contains only 256 values. */  
 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);  
 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);  
 #endif  
 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);  
 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32  
 JUMPHERE(jump);  
 #endif  
2609  }  }
2610    
2611  static void skip_char_back(compiler_common *common)  #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
2612    
2613    static void read_char_range(compiler_common *common, pcre_uint32 min, pcre_uint32 max, BOOL update_str_ptr)
2614  {  {
2615  /* Goes one character back. Affects STR_PTR and TMP1. Does not check begin. */  /* Reads the precise value of a character into TMP1, if the character is
2616    between min and max (c >= min && c <= max). Otherwise it returns with a value
2617    outside the range. Does not check STR_END. */
2618  DEFINE_COMPILER;  DEFINE_COMPILER;
2619  #if defined SUPPORT_UTF && !defined COMPILE_PCRE32  #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2620  #if defined COMPILE_PCRE8  struct sljit_jump *jump;
2621  struct sljit_label *label;  #endif
2622    #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2623    struct sljit_jump *jump2;
2624    #endif
2625    
2626    SLJIT_UNUSED_ARG(update_str_ptr);
2627    SLJIT_UNUSED_ARG(min);
2628    SLJIT_UNUSED_ARG(max);
2629    SLJIT_ASSERT(min <= max);
2630    
2631    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2632    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2633    
2634    #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2635  if (common->utf)  if (common->utf)
2636    {    {
2637    label = LABEL();    if (max < 128 && !update_str_ptr) return;
2638    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));  
2639    OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));    jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2640    OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);    if (min >= 0x10000)
2641        {
2642        OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xf0);
2643        if (update_str_ptr)
2644          OP1(SLJIT_MOV_UB, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2645        OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2646        jump2 = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0x7);
2647        OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2648        OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2649        OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2650        OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2651        OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2652        OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2653        OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2654        OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2655        if (!update_str_ptr)
2656          OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
2657        OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2658        OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2659        OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2660        JUMPHERE(jump2);
2661        if (update_str_ptr)
2662          OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
2663        }
2664      else if (min >= 0x800 && max <= 0xffff)
2665        {
2666        OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xe0);
2667        if (update_str_ptr)
2668          OP1(SLJIT_MOV_UB, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2669        OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2670        jump2 = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0xf);
2671        OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2672        OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2673        OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2674        OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2675        if (!update_str_ptr)
2676          OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
2677        OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2678        OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2679        OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2680        JUMPHERE(jump2);
2681        if (update_str_ptr)
2682          OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
2683        }
2684      else if (max >= 0x800)
2685        add_jump(compiler, (max < 0x10000) ? &common->utfreadchar16 : &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2686      else if (max < 128)
2687        {
2688        OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2689        OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2690        }
2691      else
2692        {
2693        OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2694        if (!update_str_ptr)
2695          OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2696        else
2697          OP1(SLJIT_MOV_UB, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2698        OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2699        OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2700        OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2701        OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2702        if (update_str_ptr)
2703          OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
2704        }
2705      JUMPHERE(jump);
2706      }
2707    #endif
2708    
2709    #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2710    if (common->utf)
2711      {
2712      if (max >= 0x10000)
2713        {
2714        OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2715        jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2716        /* TMP2 contains the high surrogate. */
2717        OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2718        OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x40);
2719        OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
2720        OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2721        OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
2722        OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2723        JUMPHERE(jump);
2724        return;
2725        }
2726    
2727      if (max < 0xd800 && !update_str_ptr) return;
2728    
2729      /* Skip low surrogate if necessary. */
2730      OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2731      jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2732      if (update_str_ptr)
2733        OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2734      if (max >= 0xd800)
2735        OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x10000);
2736      JUMPHERE(jump);
2737      }
2738    #endif
2739    }
2740    
2741    static SLJIT_INLINE void read_char(compiler_common *common)
2742    {
2743    read_char_range(common, 0, READ_CHAR_MAX, TRUE);
2744    }
2745    
2746    static void read_char8_type(compiler_common *common, BOOL update_str_ptr)
2747    {
2748    /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */
2749    DEFINE_COMPILER;
2750    #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
2751    struct sljit_jump *jump;
2752    #endif
2753    #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2754    struct sljit_jump *jump2;
2755    #endif
2756    
2757    SLJIT_UNUSED_ARG(update_str_ptr);
2758    
2759    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2760    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2761    
2762    #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2763    if (common->utf)
2764      {
2765      /* This can be an extra read in some situations, but hopefully
2766      it is needed in most cases. */
2767      OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2768      jump = CMP(SLJIT_C_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
2769      if (!update_str_ptr)
2770        {
2771        OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2772        OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2773        OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2774        OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2775        OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2776        OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
2777        OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2778        jump2 = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2779        OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2780        JUMPHERE(jump2);
2781        }
2782      else
2783        add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));
2784      JUMPHERE(jump);
2785      return;
2786      }
2787    #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
2788    
2789    #if !defined COMPILE_PCRE8
2790    /* The ctypes array contains only 256 values. */
2791    OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2792    jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2793    #endif
2794    OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2795    #if !defined COMPILE_PCRE8
2796    JUMPHERE(jump);
2797    #endif
2798    
2799    #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2800    if (common->utf && update_str_ptr)
2801      {
2802      /* Skip low surrogate if necessary. */
2803      OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);
2804      jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2805      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2806      JUMPHERE(jump);
2807      }
2808    #endif /* SUPPORT_UTF && COMPILE_PCRE16 */
2809    }
2810    
2811    static void skip_char_back(compiler_common *common)
2812    {
2813    /* Goes one character back. Affects STR_PTR and TMP1. Does not check begin. */
2814    DEFINE_COMPILER;
2815    #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2816    #if defined COMPILE_PCRE8
2817    struct sljit_label *label;
2818    
2819    if (common->utf)
2820      {
2821      label = LABEL();
2822      OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2823      OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2824      OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
2825    CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);    CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);
2826    return;    return;
2827    }    }
# Line 2499  if (common->utf) Line 2843  if (common->utf)
2843  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2844  }  }
2845    
2846  static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpiftrue)  static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpifmatch)
2847  {  {
2848  /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */  /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */
2849  DEFINE_COMPILER;  DEFINE_COMPILER;
2850    struct sljit_jump *jump;
2851    
2852  if (nltype == NLTYPE_ANY)  if (nltype == NLTYPE_ANY)
2853    {    {
2854    add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));    add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
2855    add_jump(compiler, backtracks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));    add_jump(compiler, backtracks, JUMP(jumpifmatch ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
2856    }    }
2857  else if (nltype == NLTYPE_ANYCRLF)  else if (nltype == NLTYPE_ANYCRLF)
2858    {    {
2859    OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_CR);    if (jumpifmatch)
2860    OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);      {
2861    OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);      add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR));
2862    OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);      add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
2863    add_jump(compiler, backtracks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));      }
2864      else
2865        {
2866        jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
2867        add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
2868        JUMPHERE(jump);
2869        }
2870    }    }
2871  else  else
2872    {    {
2873    SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);    SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);
2874    add_jump(compiler, backtracks, CMP(jumpiftrue ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));    add_jump(compiler, backtracks, CMP(jumpifmatch ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
2875    }    }
2876  }  }
2877    
# Line 2530  else Line 2881  else
2881  static void do_utfreadchar(compiler_common *common)  static void do_utfreadchar(compiler_common *common)
2882  {  {
2883  /* Fast decoding a UTF-8 character. TMP1 contains the first byte  /* Fast decoding a UTF-8 character. TMP1 contains the first byte
2884  of the character (>= 0xc0). Return char value in TMP1, length - 1 in TMP2. */  of the character (>= 0xc0). Return char value in TMP1, length in TMP2. */
2885  DEFINE_COMPILER;  DEFINE_COMPILER;
2886  struct sljit_jump *jump;  struct sljit_jump *jump;
2887    
2888  sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);  sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2889    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2890    OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2891    OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2892    OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2893    OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2894    
2895  /* Searching for the first zero. */  /* Searching for the first zero. */
2896  OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);  OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
2897  jump = JUMP(SLJIT_C_NOT_ZERO);  jump = JUMP(SLJIT_C_NOT_ZERO);
2898  /* Two byte sequence. */  /* Two byte sequence. */
 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));  
2899  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2900  OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1f);  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(2));
2901    sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2902    
2903    JUMPHERE(jump);
2904    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2905    OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x800);
2906  OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);  OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2907  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2908  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));  
 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);  
 JUMPHERE(jump);  
2909    
2910  OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10);  OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10000);
2911  jump = JUMP(SLJIT_C_NOT_ZERO);  jump = JUMP(SLJIT_C_NOT_ZERO);
2912  /* Three byte sequence. */  /* Three byte sequence. */
 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));  
 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0f);  
 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 12);  
 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);  
 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);  
 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);  
 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));  
2913  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
2914  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));
 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);  
 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(2));  
2915  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
 JUMPHERE(jump);  
2916    
2917  /* Four byte sequence. */  /* Four byte sequence. */
2918  OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));  JUMPHERE(jump);
2919  OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x07);  OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2920  OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 18);  OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
2921    OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2922    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
2923  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12);  
2924  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2925  OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(4));
2926    sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2927    }
2928    
2929    static void do_utfreadchar16(compiler_common *common)
2930    {
2931    /* Fast decoding a UTF-8 character. TMP1 contains the first byte
2932    of the character (>= 0xc0). Return value in TMP1. */
2933    DEFINE_COMPILER;
2934    struct sljit_jump *jump;
2935    
2936    sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2937    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2938    OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2939    OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2940  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);  
2941  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2942  OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(3));  
2943  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));  /* Searching for the first zero. */
2944    OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
2945    jump = JUMP(SLJIT_C_NOT_ZERO);
2946    /* Two byte sequence. */
2947    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2948    sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2949    
2950    JUMPHERE(jump);
2951    OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x400);
2952    OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_NOT_ZERO);
2953    /* This code runs only in 8 bit mode. No need to shift the value. */
2954    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2955    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2956    OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x800);
2957    OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2958  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2959  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2960  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));  /* Three byte sequence. */
2961    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
2962  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2963  }  }
2964    
# Line 2601  jump = JUMP(SLJIT_C_NOT_ZERO); Line 2978  jump = JUMP(SLJIT_C_NOT_ZERO);
2978  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2979  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2980  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
2981    /* The upper 5 bits are known at this point. */
2982    compare = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0x3);
2983  OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);  OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2984  OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);  OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2985  OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);  OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
 compare = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);  
2986  OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);  OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2987  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2988    
2989  JUMPHERE(compare);  JUMPHERE(compare);
2990  OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);  OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2991  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
 JUMPHERE(jump);  
2992    
2993  /* We only have types for characters less than 256. */  /* We only have types for characters less than 256. */
 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);  
 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);  
 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);  
 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);  
 }  
   
 #elif defined COMPILE_PCRE16  
   
 static void do_utfreadchar(compiler_common *common)  
 {  
 /* Fast decoding a UTF-16 character. TMP1 contains the first 16 bit char  
 of the character (>= 0xd800). Return char value in TMP1, length - 1 in TMP2. */  
 DEFINE_COMPILER;  
 struct sljit_jump *jump;  
   
 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);  
 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xdc00);  
 /* Do nothing, only return. */  
 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);  
   
2994  JUMPHERE(jump);  JUMPHERE(jump);
2995  /* Combine two 16 bit characters. */  OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2996  OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));  OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2997  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);  
 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 10);  
 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3ff);  
 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);  
 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));  
 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);  
2998  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2999  }  }
3000    
3001  #endif /* COMPILE_PCRE[8|16] */  #endif /* COMPILE_PCRE8 */
3002    
3003  #endif /* SUPPORT_UTF */  #endif /* SUPPORT_UTF */
3004    
# Line 2722  if (firstline) Line 3073  if (firstline)
3073      mainloop = LABEL();      mainloop = LABEL();
3074      /* Continual stores does not cause data dependency. */      /* Continual stores does not cause data dependency. */
3075      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0);      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0);
3076      read_char(common);      read_char_range(common, common->nlmin, common->nlmax, TRUE);
3077      check_newlinechar(common, common->nltype, &newline, TRUE);      check_newlinechar(common, common->nltype, &newline, TRUE);
3078      CMPTO(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0, mainloop);      CMPTO(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0, mainloop);
3079      JUMPHERE(end);      JUMPHERE(end);
# Line 2798  if (newlinecheck) Line 3149  if (newlinecheck)
3149  return mainloop;  return mainloop;
3150  }  }
3151    
3152  #define MAX_N_CHARS 3  static int scan_prefix(compiler_common *common, pcre_uchar *cc, pcre_uint32 *chars, int max_chars)
   
 static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common, BOOL firstline)  
3153  {  {
3154  DEFINE_COMPILER;  /* Recursive function, which scans prefix literals. */
3155  struct sljit_label *start;  int len, repeat, len_save, consumed = 0;
3156  struct sljit_jump *quit;  pcre_uint32 caseless, chr, mask;
3157  pcre_uint32 chars[MAX_N_CHARS * 2];  pcre_uchar *alternative, *cc_save;
3158  pcre_uchar *cc = common->start + 1 + LINK_SIZE;  BOOL last, any;
 int location = 0;  
 pcre_int32 len, c, bit, caseless;  
 int must_stop;  
   
 /* We do not support alternatives now. */  
 if (*(common->start + GET(common->start, 1)) == OP_ALT)  
   return FALSE;  
3159    
3160    repeat = 1;
3161  while (TRUE)  while (TRUE)
3162    {    {
3163      last = TRUE;
3164      any = FALSE;
3165    caseless = 0;    caseless = 0;
3166    must_stop = 1;    switch (*cc)
   switch(*cc)  
3167      {      {
     case OP_CHAR:  
     must_stop = 0;  
     cc++;  
     break;  
   
3168      case OP_CHARI:      case OP_CHARI:
3169      caseless = 1;      caseless = 1;
3170      must_stop = 0;      case OP_CHAR:
3171        last = FALSE;
3172      cc++;      cc++;
3173      break;      break;
3174    
# Line 2853  while (TRUE) Line 3193  while (TRUE)
3193      cc++;      cc++;
3194      break;      break;
3195    
3196        case OP_EXACTI:
3197        caseless = 1;
3198      case OP_EXACT:      case OP_EXACT:
3199        repeat = GET2(cc, 1);
3200        last = FALSE;
3201      cc += 1 + IMM2_SIZE;      cc += 1 + IMM2_SIZE;
3202      break;      break;
3203    
3204      case OP_PLUSI:      case OP_PLUSI:
3205      case OP_MINPLUSI:      case OP_MINPLUSI:
3206      case OP_POSPLUSI:      case OP_POSPLUSI:
3207      caseless = 1;      caseless = 1;
3208      cc++;      cc++;
3209      break;      break;
3210    
3211        case OP_KET:
3212        cc += 1 + LINK_SIZE;
3213        continue;
3214    
3215        case OP_ALT:
3216        cc += GET(cc, 1);
3217        continue;
3218    
3219        case OP_ONCE:
3220        case OP_ONCE_NC:
3221        case OP_BRA:
3222        case OP_BRAPOS:
3223        case OP_CBRA:
3224        case OP_CBRAPOS:
3225        alternative = cc + GET(cc, 1);
3226        while (*alternative == OP_ALT)
3227          {
3228          max_chars = scan_prefix(common, alternative + 1 + LINK_SIZE, chars, max_chars);
3229          if (max_chars == 0)
3230            return consumed;
3231          alternative += GET(alternative, 1);
3232          }
3233    
3234        if (*cc == OP_CBRA || *cc == OP_CBRAPOS)
3235          cc += IMM2_SIZE;
3236        cc += 1 + LINK_SIZE;
3237        continue;
3238    
3239        case OP_CLASS:
3240        case OP_NCLASS:
3241        any = TRUE;
3242        cc += 1 + 32 / sizeof(pcre_uchar);
3243        break;
3244    
3245    #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3246        case OP_XCLASS:
3247        any = TRUE;
3248        cc += GET(cc, 1);
3249        break;
3250    #endif
3251    
3252        case OP_NOT_DIGIT:
3253        case OP_DIGIT:
3254        case OP_NOT_WHITESPACE:
3255        case OP_WHITESPACE:
3256        case OP_NOT_WORDCHAR:
3257        case OP_WORDCHAR:
3258        case OP_ANY:
3259        case OP_ALLANY:
3260        any = TRUE;
3261        cc++;
3262        break;
3263    
3264    #ifdef SUPPORT_UCP
3265        case OP_NOTPROP:
3266        case OP_PROP:
3267        any = TRUE;
3268        cc += 1 + 2;
3269        break;
3270    #endif
3271    
3272        case OP_TYPEEXACT:
3273        repeat = GET2(cc, 1);
3274        cc += 1 + IMM2_SIZE;
3275        continue;
3276    
3277        default:
3278        return consumed;
3279        }
3280    
3281      if (any)
3282        {
3283    #ifdef SUPPORT_UTF
3284        if (common->utf) return consumed;
3285    #endif
3286    #if defined COMPILE_PCRE8
3287        mask = 0xff;
3288    #elif defined COMPILE_PCRE16
3289        mask = 0xffff;
3290    #elif defined COMPILE_PCRE32
3291        mask = 0xffffffff;
3292    #else
3293        SLJIT_ASSERT_STOP();
3294    #endif
3295    
3296        do
3297          {
3298          chars[0] = mask;
3299          chars[1] = mask;
3300    
3301          if (--max_chars == 0)
3302            return consumed;
3303          consumed++;
3304          chars += 2;
3305          }
3306        while (--repeat > 0);
3307    
3308        repeat = 1;
3309        continue;
3310        }
3311    
3312      len = 1;
3313    #ifdef SUPPORT_UTF
3314      if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
3315    #endif
3316    
3317      if (caseless != 0 && char_has_othercase(common, cc))
3318        {
3319        caseless = char_get_othercase_bit(common, cc);
3320        if (caseless == 0)
3321          return consumed;
3322    #ifdef COMPILE_PCRE8
3323        caseless = ((caseless & 0xff) << 8) | (len - (caseless >> 8));
3324    #else
3325        if ((caseless & 0x100) != 0)
3326          caseless = ((caseless & 0xff) << 16) | (len - (caseless >> 9));
3327        else
3328          caseless = ((caseless & 0xff) << 8) | (len - (caseless >> 9));
3329    #endif
3330        }
3331      else
3332        caseless = 0;
3333    
3334      len_save = len;
3335      cc_save = cc;
3336      while (TRUE)
3337        {
3338        do
3339          {
3340          chr = *cc;
3341    #ifdef COMPILE_PCRE32
3342          if (SLJIT_UNLIKELY(chr == NOTACHAR))
3343            return consumed;
3344    #endif
3345          mask = 0;
3346          if ((pcre_uint32)len == (caseless & 0xff))
3347            {
3348            mask = caseless >> 8;
3349            chr |= mask;
3350            }
3351    
3352          if (chars[0] == NOTACHAR)
3353            {
3354            chars[0] = chr;
3355            chars[1] = mask;
3356            }
3357          else
3358            {
3359            mask |= chars[0] ^ chr;
3360            chr |= mask;
3361            chars[0] = chr;
3362            chars[1] |= mask;
3363            }
3364    
3365          len--;
3366          if (--max_chars == 0)
3367            return consumed;
3368          consumed++;
3369          chars += 2;
3370          cc++;
3371          }
3372        while (len > 0);
3373    
3374        if (--repeat == 0)
3375          break;
3376    
3377        len = len_save;
3378        cc = cc_save;
3379        }
3380    
3381      repeat = 1;
3382      if (last)
3383        return consumed;
3384      }
3385    }
3386    
3387    #define MAX_N_CHARS 16
3388    
3389    static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common, BOOL firstline)
3390    {
3391    DEFINE_COMPILER;
3392    struct sljit_label *start;
3393    struct sljit_jump *quit;
3394    pcre_uint32 chars[MAX_N_CHARS * 2];
3395    pcre_uint8 ones[MAX_N_CHARS];
3396    pcre_uint32 mask;
3397    int i, max;
3398    int offsets[3];
3399    
3400    for (i = 0; i < MAX_N_CHARS; i++)
3401      {
3402      chars[i << 1] = NOTACHAR;
3403      chars[(i << 1) + 1] = 0;
3404      }
3405    
3406    max = scan_prefix(common, common->start, chars, MAX_N_CHARS);
3407    
3408      case OP_EXACTI:  if (max <= 1)
3409      caseless = 1;    return FALSE;
     cc += 1 + IMM2_SIZE;  
     break;  
3410    
3411      default:  for (i = 0; i < max; i++)
3412      must_stop = 2;    {
3413      break;    mask = chars[(i << 1) + 1];
3414      ones[i] = ones_in_half_byte[mask & 0xf];
3415      mask >>= 4;
3416      while (mask != 0)
3417        {
3418        ones[i] += ones_in_half_byte[mask & 0xf];
3419        mask >>= 4;
3420      }      }
3421      }
3422    
3423    if (must_stop == 2)  offsets[0] = -1;
3424        break;  /* Scan forward. */
3425    for (i = 0; i < max; i++)
3426      if (ones[i] <= 2) {
3427        offsets[0] = i;
3428        break;
3429      }
3430    
3431    len = 1;  if (offsets[0] == -1)
3432  #ifdef SUPPORT_UTF    return FALSE;
   if (common->utf && HAS_EXTRALEN(cc[0])) len += GET_EXTRALEN(cc[0]);  
 #endif  
3433    
3434    if (caseless && char_has_othercase(common, cc))  /* Scan backward. */
3435      {  offsets[1] = -1;
3436      caseless = char_get_othercase_bit(common, cc);  for (i = max - 1; i > offsets[0]; i--)
3437      if (caseless == 0)    if (ones[i] <= 2) {
3438        return FALSE;      offsets[1] = i;
3439  #ifdef COMPILE_PCRE8      break;
3440      caseless = ((caseless & 0xff) << 8) | (len - (caseless >> 8));    }
 #else  
     if ((caseless & 0x100) != 0)  
       caseless = ((caseless & 0xff) << 16) | (len - (caseless >> 9));  
     else  
       caseless = ((caseless & 0xff) << 8) | (len - (caseless >> 9));  
 #endif  
     }  
   else  
     caseless = 0;  
3441    
3442    while (len > 0 && location < MAX_N_CHARS * 2)  offsets[2] = -1;
3443      {  if (offsets[1] >= 0)
3444      c = *cc;    {
3445      bit = 0;    /* Scan from middle. */
3446      if (len == (caseless & 0xff))    for (i = (offsets[0] + offsets[1]) / 2 + 1; i < offsets[1]; i++)
3447        if (ones[i] <= 2)
3448        {        {
3449        bit = caseless >> 8;        offsets[2] = i;
3450        c |= bit;        break;
3451        }        }
3452    
3453      chars[location] = c;    if (offsets[2] == -1)
3454      chars[location + 1] = bit;      {
3455        for (i = (offsets[0] + offsets[1]) / 2; i > offsets[0]; i--)
3456      len--;        if (ones[i] <= 2)
3457      location += 2;          {
3458      cc++;          offsets[2] = i;
3459            break;
3460            }
3461      }      }
   
   if (location >= MAX_N_CHARS * 2 || must_stop != 0)  
     break;  
3462    }    }
3463    
3464  /* At least two characters are required. */  SLJIT_ASSERT(offsets[1] == -1 || (offsets[0] < offsets[1]));
3465  if (location < 2 * 2)  SLJIT_ASSERT(offsets[2] == -1 || (offsets[0] < offsets[2] && offsets[1] > offsets[2]));
3466      return FALSE;  
3467    chars[0] = chars[offsets[0] << 1];
3468    chars[1] = chars[(offsets[0] << 1) + 1];
3469    if (offsets[2] >= 0)
3470      {
3471      chars[2] = chars[offsets[2] << 1];
3472      chars[3] = chars[(offsets[2] << 1) + 1];
3473      }
3474    if (offsets[1] >= 0)
3475      {
3476      chars[4] = chars[offsets[1] << 1];
3477      chars[5] = chars[(offsets[1] << 1) + 1];
3478      }
3479    
3480    max -= 1;
3481  if (firstline)  if (firstline)
3482    {    {
3483    SLJIT_ASSERT(common->first_line_end != 0);    SLJIT_ASSERT(common->first_line_end != 0);
3484    OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);    OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
3485    OP2(SLJIT_SUB, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, SLJIT_IMM, IN_UCHARS((location >> 1) - 1));    OP2(SLJIT_SUB, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, SLJIT_IMM, IN_UCHARS(max));
3486    }    }
3487  else  else
3488    OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS((location >> 1) - 1));    OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
3489    
3490  start = LABEL();  start = LABEL();
3491  quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);  quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3492    
3493  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[0]));
3494  OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));  if (offsets[1] >= 0)
3495      OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[1]));
3496  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3497    
3498  if (chars[1] != 0)  if (chars[1] != 0)
3499    OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[1]);    OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[1]);
3500  CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[0], start);  CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[0], start);
3501  if (location > 2 * 2)  if (offsets[2] >= 0)
3502    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[2] - 1));
3503  if (chars[3] != 0)  
3504    OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, chars[3]);  if (offsets[1] >= 0)
 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, chars[2], start);  
 if (location > 2 * 2)  
3505    {    {
3506    if (chars[5] != 0)    if (chars[5] != 0)
3507      OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[5]);      OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, chars[5]);
3508    CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[4], start);    CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, chars[4], start);
3509      }
3510    
3511    if (offsets[2] >= 0)
3512      {
3513      if (chars[3] != 0)
3514        OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[3]);
3515      CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[2], start);
3516    }    }
3517  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3518    
# Line 2961  JUMPHERE(quit); Line 3521  JUMPHERE(quit);
3521  if (firstline)  if (firstline)
3522    OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);    OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3523  else  else
3524    OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS((location >> 1) - 1));    OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
3525  return TRUE;  return TRUE;
3526  }  }
3527    
# Line 3081  firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_ Line 3641  firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_
3641  skip_char_back(common);  skip_char_back(common);
3642    
3643  loop = LABEL();  loop = LABEL();
3644  read_char(common);  read_char_range(common, common->nlmin, common->nlmax, TRUE);
3645  lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);  lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3646  if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)  if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
3647    foundcr = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);    foundcr = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
# Line 3110  if (firstline) Line 3670  if (firstline)
3670    OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);    OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3671  }  }
3672    
3673  static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, jump_list **backtracks);  static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks);
3674    
3675  static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, sljit_uw start_bits, BOOL firstline)  static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, pcre_uint8 *start_bits, BOOL firstline)
3676  {  {
3677  DEFINE_COMPILER;  DEFINE_COMPILER;
3678  struct sljit_label *start;  struct sljit_label *start;
3679  struct sljit_jump *quit;  struct sljit_jump *quit;
3680  struct sljit_jump *found = NULL;  struct sljit_jump *found = NULL;
3681  jump_list *matches = NULL;  jump_list *matches = NULL;
 pcre_uint8 inverted_start_bits[32];  
 int i;  
3682  #ifndef COMPILE_PCRE8  #ifndef COMPILE_PCRE8
3683  struct sljit_jump *jump;  struct sljit_jump *jump;
3684  #endif  #endif
3685    
 for (i = 0; i < 32; ++i)  
   inverted_start_bits[i] = ~(((pcre_uint8*)start_bits)[i]);  
   
3686  if (firstline)  if (firstline)
3687    {    {
3688    SLJIT_ASSERT(common->first_line_end != 0);    SLJIT_ASSERT(common->first_line_end != 0);
# Line 3143  if (common->utf) Line 3698  if (common->utf)
3698    OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);    OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
3699  #endif  #endif
3700    
3701  if (!check_class_ranges(common, inverted_start_bits, (inverted_start_bits[31] & 0x80) != 0, &matches))  if (!check_class_ranges(common, start_bits, (start_bits[31] & 0x80) != 0, TRUE, &matches))
3702    {    {
3703  #ifndef COMPILE_PCRE8  #ifndef COMPILE_PCRE8
3704    jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 255);    jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 255);
# Line 3152  if (!check_class_ranges(common, inverted Line 3707  if (!check_class_ranges(common, inverted
3707  #endif  #endif
3708    OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);    OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
3709    OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);    OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
3710    OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), start_bits);    OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)start_bits);
3711    OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);    OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
3712    OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);    OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
3713    found = JUMP(SLJIT_C_NOT_ZERO);    found = JUMP(SLJIT_C_NOT_ZERO);
# Line 3355  JUMPHERE(skipread); Line 3910  JUMPHERE(skipread);
3910    
3911  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3912  check_str_end(common, &skipread_list);  check_str_end(common, &skipread_list);
3913  peek_char(common);  peek_char(common, READ_CHAR_MAX);
3914    
3915  /* Testing char type. This is a code duplication. */  /* Testing char type. This is a code duplication. */
3916  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
# Line 3401  OP2(SLJIT_XOR | SLJIT_SET_E, SLJIT_UNUSE Line 3956  OP2(SLJIT_XOR | SLJIT_SET_E, SLJIT_UNUSE
3956  sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);  sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3957  }  }
3958    
3959  /*  static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
   range format:  
   
   ranges[0] = length of the range (max MAX_RANGE_SIZE, -1 means invalid range).  
   ranges[1] = first bit (0 or 1)  
   ranges[2-length] = position of the bit change (when the current bit is not equal to the previous)  
 */  
   
 static BOOL check_ranges(compiler_common *common, int *ranges, jump_list **backtracks, BOOL readch)  
3960  {  {
3961  DEFINE_COMPILER;  DEFINE_COMPILER;
3962  struct sljit_jump *jump;  int ranges[MAX_RANGE_SIZE];
   
 if (ranges[0] < 0)  
   return FALSE;  
   
 switch(ranges[0])  
   {  
   case 1:  
   if (readch)  
     read_char(common);  
   add_jump(compiler, backtracks, CMP(ranges[1] == 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));  
   return TRUE;  
   
   case 2:  
   if (readch)  
     read_char(common);  
   OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);  
   add_jump(compiler, backtracks, CMP(ranges[1] != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));  
   return TRUE;  
   
   case 4:  
   if (ranges[2] + 1 == ranges[3] && ranges[4] + 1 == ranges[5])  
     {  
     if (readch)  
       read_char(common);  
     if (ranges[1] != 0)  
       {  
       add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));  
       add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[4]));  
       }  
     else  
       {  
       jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]);  
       add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[4]));  
       JUMPHERE(jump);  
       }  
     return TRUE;  
     }  
   if ((ranges[3] - ranges[2]) == (ranges[5] - ranges[4]) && is_powerof2(ranges[4] - ranges[2]))  
     {  
     if (readch)  
       read_char(common);  
     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[4] - ranges[2]);  
     OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[4]);  
     add_jump(compiler, backtracks, CMP(ranges[1] != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[5] - ranges[4]));  
     return TRUE;  
     }  
   return FALSE;  
   
   default:  
   return FALSE;  
   }  
 }  
   
 static void get_ctype_ranges(compiler_common *common, int flag, int *ranges)  
 {  
 int i, bit, length;  
 const pcre_uint8 *ctypes = (const pcre_uint8*)common->ctypes;  
   
 bit = ctypes[0] & flag;  
 ranges[0] = -1;  
 ranges[1] = bit != 0 ? 1 : 0;  
 length = 0;  
   
 for (i = 1; i < 256; i++)  
   if ((ctypes[i] & flag) != bit)  
     {  
     if (length >= MAX_RANGE_SIZE)  
       return;  
     ranges[2 + length] = i;  
     length++;  
     bit ^= flag;  
     }  
   
 if (bit != 0)  
   {  
   if (length >= MAX_RANGE_SIZE)  
     return;  
   ranges[2 + length] = 256;  
   length++;  
   }  
 ranges[0] = length;  
 }  
   
 static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, jump_list **backtracks)  
 {  
 int ranges[2 + MAX_RANGE_SIZE];  
3963  pcre_uint8 bit, cbit, all;  pcre_uint8 bit, cbit, all;
3964  int i, byte, length = 0;  int i, byte, length = 0;
3965    
3966  bit = bits[0] & 0x1;  bit = bits[0] & 0x1;
3967  ranges[1] = bit;  /* All bits will be zero or one (since bit is zero or one). */
 /* Can be 0 or 255. */  
3968  all = -bit;  all = -bit;
3969    
3970  for (i = 0; i < 256; )  for (i = 0; i < 256; )
# Line 3519  for (i = 0; i < 256; ) Line 3979  for (i = 0; i < 256; )
3979        {        {
3980        if (length >= MAX_RANGE_SIZE)        if (length >= MAX_RANGE_SIZE)
3981          return FALSE;          return FALSE;
3982        ranges[2 + length] = i;        ranges[length] = i;
3983        length++;        length++;
3984        bit = cbit;        bit = cbit;
3985        all = -cbit;        all = -cbit;
# Line 3532  if (((bit == 0) && nclass) || ((bit == 1 Line 3992  if (((bit == 0) && nclass) || ((bit == 1
3992    {    {
3993    if (length >= MAX_RANGE_SIZE)    if (length >= MAX_RANGE_SIZE)
3994      return FALSE;      return FALSE;
3995    ranges[2 + length] = 256;    ranges[length] = 256;
3996    length++;    length++;
3997    }    }
 ranges[0] = length;  
3998    
3999  return check_ranges(common, ranges, backtracks, FALSE);  if (length < 0 || length > 4)
4000      return FALSE;
4001    
4002    bit = bits[0] & 0x1;
4003    if (invert) bit ^= 0x1;
4004    
4005    /* No character is accepted. */
4006    if (length == 0 && bit == 0)
4007      add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4008    
4009    switch(length)
4010      {
4011      case 0:
4012      /* When bit != 0, all characters are accepted. */
4013      return TRUE;
4014    
4015      case 1:
4016      add_jump(compiler, backtracks, CMP(bit == 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
4017      return TRUE;
4018    
4019      case 2:
4020      if (ranges[0] + 1 != ranges[1])
4021        {
4022        OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
4023        add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
4024        }
4025      else
4026        add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
4027      return TRUE;
4028    
4029      case 3:
4030      if (bit != 0)
4031        {
4032        add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
4033        if (ranges[0] + 1 != ranges[1])
4034          {
4035          OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
4036          add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
4037          }
4038        else
4039          add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
4040        return TRUE;
4041        }
4042    
4043      add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[0]));
4044      if (ranges[1] + 1 != ranges[2])
4045        {
4046        OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1]);
4047        add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
4048        }
4049      else
4050        add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1]));
4051      return TRUE;
4052    
4053      case 4:
4054      if ((ranges[1] - ranges[0]) == (ranges[3] - ranges[2])
4055          && (ranges[0] | (ranges[2] - ranges[0])) == ranges[2]
4056          && is_powerof2(ranges[2] - ranges[0]))
4057        {
4058        OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[0]);
4059        if (ranges[2] + 1 != ranges[3])
4060          {
4061          OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);
4062          add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
4063          }
4064        else
4065          add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
4066        return TRUE;
4067        }
4068    
4069      if (bit != 0)
4070        {
4071        i = 0;
4072        if (ranges[0] + 1 != ranges[1])
4073          {
4074          OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
4075          add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
4076          i = ranges[0];
4077          }
4078        else
4079          add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
4080    
4081        if (ranges[2] + 1 != ranges[3])
4082          {
4083          OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - i);
4084          add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
4085          }
4086        else
4087          add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2] - i));
4088        return TRUE;
4089        }
4090    
4091      OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
4092      add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[0]));
4093      if (ranges[1] + 1 != ranges[2])
4094        {
4095        OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]);
4096        add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
4097        }
4098      else
4099        add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
4100      return TRUE;
4101    
4102      default:
4103      SLJIT_ASSERT_STOP();
4104      return FALSE;
4105      }
4106  }  }
4107    
4108  static void check_anynewline(compiler_common *common)  static void check_anynewline(compiler_common *common)
# Line 3904  return cc; Line 4469  return cc;
4469  #define SET_TYPE_OFFSET(value) \  #define SET_TYPE_OFFSET(value) \
4470    if ((value) != typeoffset) \    if ((value) != typeoffset) \
4471      { \      { \
4472      if ((value) > typeoffset) \      if ((value) < typeoffset) \
       OP2(SLJIT_SUB, typereg, 0, typereg, 0, SLJIT_IMM, (value) - typeoffset); \  
     else \  
4473        OP2(SLJIT_ADD, typereg, 0, typereg, 0, SLJIT_IMM, typeoffset - (value)); \        OP2(SLJIT_ADD, typereg, 0, typereg, 0, SLJIT_IMM, typeoffset - (value)); \
4474        else \
4475          OP2(SLJIT_SUB, typereg, 0, typereg, 0, SLJIT_IMM, (value) - typeoffset); \
4476      } \      } \
4477    typeoffset = (value);    typeoffset = (value);
4478    
4479  #define SET_CHAR_OFFSET(value) \  #define SET_CHAR_OFFSET(value) \
4480    if ((value) != charoffset) \    if ((value) != charoffset) \
4481      { \      { \
4482      if ((value) > charoffset) \      if ((value) < charoffset) \
4483        OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (value) - charoffset); \        OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(charoffset - (value))); \
4484      else \      else \
4485        OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, charoffset - (value)); \        OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)((value) - charoffset)); \
4486      } \      } \
4487    charoffset = (value);    charoffset = (value);
4488    
# Line 3925  static void compile_xclass_matchingpath( Line 4490  static void compile_xclass_matchingpath(
4490  {  {
4491  DEFINE_COMPILER;  DEFINE_COMPILER;
4492  jump_list *found = NULL;  jump_list *found = NULL;
4493  jump_list **list = (*cc & XCL_NOT) == 0 ? &found : backtracks;  jump_list **list = (cc[0] & XCL_NOT) == 0 ? &found : backtracks;
4494  pcre_int32 c, charoffset;  sljit_uw c, charoffset, max = 256, min = READ_CHAR_MAX;
 const pcre_uint32 *other_cases;  
4495  struct sljit_jump *jump = NULL;  struct sljit_jump *jump = NULL;
4496  pcre_uchar *ccbegin;  pcre_uchar *ccbegin;
4497  int compares, invertcmp, numberofcmps;  int compares, invertcmp, numberofcmps;
4498    #if defined SUPPORT_UTF && (defined COMPILE_PCRE8 || defined COMPILE_PCRE16)
4499    BOOL utf = common->utf;
4500    #endif
4501    
4502  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
4503  BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE;  BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE;
4504  BOOL charsaved = FALSE;  BOOL charsaved = FALSE;
4505  int typereg = TMP1, scriptreg = TMP1;  int typereg = TMP1, scriptreg = TMP1;
4506  pcre_int32 typeoffset;  const pcre_uint32 *other_cases;
4507    sljit_uw typeoffset;
4508  #endif  #endif
4509    
4510  /* Although SUPPORT_UTF must be defined, we are  /* Scanning the necessary info. */
4511     not necessary in utf mode even in 8 bit mode. */  cc++;
4512  detect_partial_match(common, backtracks);  ccbegin = cc;
4513  read_char(common);  compares = 0;
4514    if (cc[-1] & XCL_MAP)
 if ((*cc++ & XCL_MAP) != 0)  
4515    {    {
4516    OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);    min = 0;
 #ifndef COMPILE_PCRE8  
   jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);  
 #elif defined SUPPORT_UTF  
   if (common->utf)  
     jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);  
 #endif  
   
   if (!check_class_ranges(common, (const pcre_uint8 *)cc, TRUE, list))  
     {  
     OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);  
     OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);  
     OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);  
     OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);  
     OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);  
     add_jump(compiler, list, JUMP(SLJIT_C_NOT_ZERO));  
     }  
   
 #ifndef COMPILE_PCRE8  
   JUMPHERE(jump);  
 #elif defined SUPPORT_UTF  
   if (common->utf)  
     JUMPHERE(jump);  
 #endif  
   OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);  
 #ifdef SUPPORT_UCP  
   charsaved = TRUE;  
 #endif  
4517    cc += 32 / sizeof(pcre_uchar);    cc += 32 / sizeof(pcre_uchar);
4518    }    }
4519    
 /* Scanning the necessary info. */  
 ccbegin = cc;  
 compares = 0;  
4520  while (*cc != XCL_END)  while (*cc != XCL_END)
4521    {    {
4522    compares++;    compares++;
4523    if (*cc == XCL_SINGLE)    if (*cc == XCL_SINGLE)
4524      {      {
4525      cc += 2;      cc ++;
4526  #ifdef SUPPORT_UTF      GETCHARINCTEST(c, cc);
4527      if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);      if (c > max) max = c;
4528  #endif      if (c < min) min = c;
4529  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
4530      needschar = TRUE;      needschar = TRUE;
4531  #endif  #endif
4532      }      }
4533    else if (*cc == XCL_RANGE)    else if (*cc == XCL_RANGE)
4534      {      {
4535      cc += 2;      cc ++;
4536  #ifdef SUPPORT_UTF      GETCHARINCTEST(c, cc);
4537      if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);      if (c < min) min = c;
4538  #endif      GETCHARINCTEST(c, cc);
4539      cc++;      if (c > max) max = c;
 #ifdef SUPPORT_UTF  
     if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);  
 #endif  
4540  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
4541      needschar = TRUE;      needschar = TRUE;
4542  #endif  #endif
# Line 4011  while (*cc != XCL_END) Line 4546  while (*cc != XCL_END)
4546      {      {
4547      SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);      SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
4548      cc++;      cc++;
4549        if (*cc == PT_CLIST)
4550          {
4551          other_cases = PRIV(ucd_caseless_sets) + cc[1];
4552          while (*other_cases != NOTACHAR)
4553            {
4554            if (*other_cases > max) max = *other_cases;
4555            if (*other_cases < min) min = *other_cases;
4556            other_cases++;
4557            }
4558          }
4559        else
4560          {
4561          max = READ_CHAR_MAX;
4562          min = 0;
4563          }
4564    
4565      switch(*cc)      switch(*cc)
4566        {        {
4567        case PT_ANY:        case PT_ANY:
# Line 4030  while (*cc != XCL_END) Line 4581  while (*cc != XCL_END)
4581        case PT_SPACE:        case PT_SPACE:
4582        case PT_PXSPACE:        case PT_PXSPACE:
4583        case PT_WORD:        case PT_WORD:
4584          case PT_PXGRAPH:
4585          case PT_PXPRINT:
4586          case PT_PXPUNCT:
4587        needstype = TRUE;        needstype = TRUE;
4588        needschar = TRUE;        needschar = TRUE;
4589        break;        break;
# Line 4043  while (*cc != XCL_END) Line 4597  while (*cc != XCL_END)
4597        SLJIT_ASSERT_STOP();        SLJIT_ASSERT_STOP();
4598        break;        break;
4599        }        }
4600      cc += 2;      cc += 2;
4601        }
4602    #endif
4603      }
4604    
4605    /* We are not necessary in utf mode even in 8 bit mode. */
4606    cc = ccbegin;
4607    detect_partial_match(common, backtracks);
4608    read_char_range(common, min, max, (cc[-1] & XCL_NOT) != 0);
4609    
4610    if ((cc[-1] & XCL_HASPROP) == 0)
4611      {
4612      if ((cc[-1] & XCL_MAP) != 0)
4613        {
4614        jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
4615        if (!check_class_ranges(common, (const pcre_uint8 *)cc, (((const pcre_uint8 *)cc)[31] & 0x80) != 0, TRUE, &found))
4616          {
4617          OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
4618          OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
4619          OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
4620          OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4621          OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4622          add_jump(compiler, &found, JUMP(SLJIT_C_NOT_ZERO));
4623          }
4624    
4625        add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4626        JUMPHERE(jump);
4627    
4628        cc += 32 / sizeof(pcre_uchar);
4629        }
4630      else
4631        {
4632        OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, min);
4633        add_jump(compiler, (cc[-1] & XCL_NOT) == 0 ? backtracks : &found, CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, max - min));
4634      }      }
4635      }
4636    else if ((cc[-1] & XCL_MAP) != 0)
4637      {
4638      OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
4639    #ifdef SUPPORT_UCP
4640      charsaved = TRUE;
4641    #endif
4642      if (!check_class_ranges(common, (const pcre_uint8 *)cc, FALSE, TRUE, list))
4643        {
4644    #ifdef COMPILE_PCRE8
4645        SLJIT_ASSERT(common->utf);
4646  #endif  #endif
4647        jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
4648    
4649        OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
4650        OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
4651        OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
4652        OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4653        OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4654        add_jump(compiler, list, JUMP(SLJIT_C_NOT_ZERO));
4655    
4656        JUMPHERE(jump);
4657        }
4658    
4659      OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
4660      cc += 32 / sizeof(pcre_uchar);
4661    }    }
4662    
4663  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
# Line 4089  if (needstype || needsscript) Line 4701  if (needstype || needsscript)
4701  #endif  #endif
4702    
4703  /* Generating code. */  /* Generating code. */
 cc = ccbegin;  
4704  charoffset = 0;  charoffset = 0;
4705  numberofcmps = 0;  numberofcmps = 0;
4706  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
# Line 4105  while (*cc != XCL_END) Line 4716  while (*cc != XCL_END)
4716    if (*cc == XCL_SINGLE)    if (*cc == XCL_SINGLE)
4717      {      {
4718      cc ++;      cc ++;
4719  #ifdef SUPPORT_UTF      GETCHARINCTEST(c, cc);
     if (common->utf)  
       {  
       GETCHARINC(c, cc);  
       }  
     else  
 #endif  
       c = *cc++;  
4720    
4721      if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))      if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
4722        {        {
4723        OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);        OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
4724        OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_EQUAL);        OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_EQUAL);
4725        numberofcmps++;        numberofcmps++;
4726        }        }
4727      else if (numberofcmps > 0)      else if (numberofcmps > 0)
4728        {        {
4729        OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);        OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
4730        OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);        OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4731        jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);        jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4732        numberofcmps = 0;        numberofcmps = 0;
4733        }        }
4734      else      else
4735        {        {
4736        jump = CMP(SLJIT_C_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, c - charoffset);        jump = CMP(SLJIT_C_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
4737        numberofcmps = 0;        numberofcmps = 0;
4738        }        }
4739      }      }
4740    else if (*cc == XCL_RANGE)    else if (*cc == XCL_RANGE)
4741      {      {
4742      cc ++;      cc ++;
4743  #ifdef SUPPORT_UTF      GETCHARINCTEST(c, cc);
     if (common->utf)  
       {  
       GETCHARINC(c, cc);  
       }  
     else  
 #endif  
       c = *cc++;  
4744      SET_CHAR_OFFSET(c);      SET_CHAR_OFFSET(c);
4745  #ifdef SUPPORT_UTF      GETCHARINCTEST(c, cc);
4746      if (common->utf)  
       {  
       GETCHARINC(c, cc);  
       }  
     else  
 #endif  
       c = *cc++;  
4747      if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))      if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
4748        {        {
4749        OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);        OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
4750        OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_LESS_EQUAL);        OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_LESS_EQUAL);
4751        numberofcmps++;        numberofcmps++;
4752        }        }
4753      else if (numberofcmps > 0)      else if (numberofcmps > 0)
4754        {        {
4755        OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);        OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
4756        OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);        OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4757        jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);        jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4758        numberofcmps = 0;        numberofcmps = 0;
4759        }        }
4760      else      else
4761        {        {
4762        jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, c - charoffset);        jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
4763        numberofcmps = 0;        numberofcmps = 0;
4764        }        }
4765      }      }
# Line 4217  while (*cc != XCL_END) Line 4808  while (*cc != XCL_END)
4808    
4809        case PT_SPACE:        case PT_SPACE:
4810        case PT_PXSPACE:        case PT_PXSPACE:
       if (*cc == PT_SPACE)  
         {  
         OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);  
         jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 11 - charoffset);  
         }  
4811        SET_CHAR_OFFSET(9);        SET_CHAR_OFFSET(9);
4812        OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 13 - 9);        OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd - 0x9);
4813        OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);        OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
4814        if (*cc == PT_SPACE)  
4815          JUMPHERE(jump);        OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x9);
4816          OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4817    
4818          OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x9);
4819          OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4820    
4821        SET_TYPE_OFFSET(ucp_Zl);        SET_TYPE_OFFSET(ucp_Zl);
4822        OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl);        OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl);
# Line 4235  while (*cc != XCL_END) Line 4825  while (*cc != XCL_END)
4825        break;        break;
4826    
4827        case PT_WORD:        case PT_WORD:
4828        OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE - charoffset);        OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_UNDERSCORE - charoffset));
4829        OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);        OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4830        /* Fall through. */        /* Fall through. */
4831    
# Line 4283  while (*cc != XCL_END) Line 4873  while (*cc != XCL_END)
4873          OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[2]);          OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[2]);
4874          OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);          OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4875    
4876          OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, other_cases[0] - charoffset);          OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(other_cases[0] - charoffset));
4877          OP_FLAGS(SLJIT_OR | ((other_cases[3] == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);          OP_FLAGS(SLJIT_OR | ((other_cases[3] == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4878    
4879          other_cases += 3;          other_cases += 3;
4880          }          }
4881        else        else
4882          {          {
4883          OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, *other_cases++ - charoffset);          OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
4884          OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);          OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4885          }          }
4886    
4887        while (*other_cases != NOTACHAR)        while (*other_cases != NOTACHAR)
4888          {          {
4889          OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, *other_cases++ - charoffset);          OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
4890          OP_FLAGS(SLJIT_OR | ((*other_cases == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);          OP_FLAGS(SLJIT_OR | ((*other_cases == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4891          }          }
4892        jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);        jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4893        break;        break;
4894    
4895        case PT_UCNC:        case PT_UCNC:
4896        OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_DOLLAR_SIGN - charoffset);        OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_DOLLAR_SIGN - charoffset));
4897        OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);        OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4898        OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_COMMERCIAL_AT - charoffset);        OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_COMMERCIAL_AT - charoffset));
4899        OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);        OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4900        OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_GRAVE_ACCENT - charoffset);        OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_GRAVE_ACCENT - charoffset));
4901        OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);        OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4902    
4903        SET_CHAR_OFFSET(0xa0);        SET_CHAR_OFFSET(0xa0);
4904        OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd7ff - charoffset);        OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(0xd7ff - charoffset));
4905        OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);        OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4906        SET_CHAR_OFFSET(0);        SET_CHAR_OFFSET(0);
4907        OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xe000 - 0);        OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xe000 - 0);
4908        OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_GREATER_EQUAL);        OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_GREATER_EQUAL);
4909        jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);        jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4910        break;        break;
4911    
4912          case PT_PXGRAPH:
4913          /* C and Z groups are the farthest two groups. */
4914          SET_TYPE_OFFSET(ucp_Ll);
4915          OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
4916          OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_GREATER);
4917    
4918          jump = CMP(SLJIT_C_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
4919    
4920          /* In case of ucp_Cf, we overwrite the result. */
4921          SET_CHAR_OFFSET(0x2066);
4922          OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
4923          OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
4924    
4925          OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
4926          OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4927    
4928          OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x2066);
4929          OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4930    
4931          JUMPHERE(jump);
4932          jump = CMP(SLJIT_C_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
4933          break;
4934    
4935          case PT_PXPRINT:
4936          /* C and Z groups are the farthest two groups. */
4937          SET_TYPE_OFFSET(ucp_Ll);
4938          OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
4939          OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_GREATER);
4940    
4941          OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Ll);
4942          OP_FLAGS(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_C_NOT_EQUAL);
4943    
4944          jump = CMP(SLJIT_C_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
4945    
4946          /* In case of ucp_Cf, we overwrite the result. */
4947          SET_CHAR_OFFSET(0x2066);
4948          OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
4949          OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
4950    
4951          OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
4952          OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4953    
4954          JUMPHERE(jump);
4955          jump = CMP(SLJIT_C_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
4956          break;
4957    
4958          case PT_PXPUNCT:
4959          SET_TYPE_OFFSET(ucp_Sc);
4960          OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Sc);
4961          OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
4962    
4963          SET_CHAR_OFFSET(0);
4964          OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xff);
4965          OP_FLAGS(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4966    
4967          SET_TYPE_OFFSET(ucp_Pc);
4968          OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ps - ucp_Pc);
4969          OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4970          jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4971          break;
4972        }        }
4973      cc += 2;      cc += 2;
4974      }      }
# Line 4349  struct sljit_label *label; Line 5000  struct sljit_label *label;
5000  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
5001  pcre_uchar propdata[5];  pcre_uchar propdata[5];
5002  #endif  #endif
5003  #endif  #endif /* SUPPORT_UTF */
5004    
5005  switch(type)  switch(type)
5006    {    {
# Line 4374  switch(type) Line 5025  switch(type)
5025    case OP_NOT_DIGIT:    case OP_NOT_DIGIT:
5026    case OP_DIGIT:    case OP_DIGIT:
5027    /* Digits are usually 0-9, so it is worth to optimize them. */    /* Digits are usually 0-9, so it is worth to optimize them. */
   if (common->digits[0] == -2)  
     get_ctype_ranges(common, ctype_digit, common->digits);  
5028    detect_partial_match(common, backtracks);    detect_partial_match(common, backtracks);
5029    /* Flip the starting bit in the negative case. */  #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5030    if (type == OP_NOT_DIGIT)    if (common->utf && is_char7_bitset((const pcre_uint8*)common->ctypes - cbit_length + cbit_digit, FALSE))
5031      common->digits[1] ^= 1;      read_char7_type(common, type == OP_NOT_DIGIT);
5032    if (!check_ranges(common, common->digits, backtracks, TRUE))    else
5033      {  #endif
5034      read_char8_type(common);      read_char8_type(common, type == OP_NOT_DIGIT);
5035      OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit);      /* Flip the starting bit in the negative case. */
5036      add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));    OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit);
5037      }    add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
   if (type == OP_NOT_DIGIT)  
     common->digits[1] ^= 1;  
5038    return cc;    return cc;
5039    
5040    case OP_NOT_WHITESPACE:    case OP_NOT_WHITESPACE:
5041    case OP_WHITESPACE:    case OP_WHITESPACE:
5042    detect_partial_match(common, backtracks);    detect_partial_match(common, backtracks);
5043    read_char8_type(common);  #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5044      if (common->utf && is_char7_bitset((const pcre_uint8*)common->ctypes - cbit_length + cbit_space, FALSE))
5045        read_char7_type(common, type == OP_NOT_WHITESPACE);
5046      else
5047    #endif
5048        read_char8_type(common, type == OP_NOT_WHITESPACE);
5049    OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space);    OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space);
5050    add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));    add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
5051    return cc;    return cc;
# Line 4401  switch(type) Line 5053  switch(type)
5053    case OP_NOT_WORDCHAR:    case OP_NOT_WORDCHAR:
5054    case OP_WORDCHAR:    case OP_WORDCHAR:
5055    detect_partial_match(common, backtracks);    detect_partial_match(common, backtracks);
5056    read_char8_type(common);  #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5057      if (common->utf && is_char7_bitset((const pcre_uint8*)common->ctypes - cbit_length + cbit_word, FALSE))
5058        read_char7_type(common, type == OP_NOT_WORDCHAR);
5059      else
5060    #endif
5061        read_char8_type(common, type == OP_NOT_WORDCHAR);
5062    OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word);    OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word);
5063    add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));    add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
5064    return cc;    return cc;
5065    
5066    case OP_ANY:    case OP_ANY:
5067    detect_partial_match(common, backtracks);    detect_partial_match(common, backtracks);
5068    read_char(common);    read_char_range(common, common->nlmin, common->nlmax, TRUE);
5069    if (common->nltype == NLTYPE_FIXED && common->newline > 255)    if (common->nltype == NLTYPE_FIXED && common->newline > 255)
5070      {      {
5071      jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);      jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
# Line 4464  switch(type) Line 5121  switch(type)
5121  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
5122    case OP_NOTPROP:    case OP_NOTPROP:
5123    case OP_PROP:    case OP_PROP:
5124    propdata[0] = 0;    propdata[0] = XCL_HASPROP;
5125    propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP;    propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP;
5126    propdata[2] = cc[0];    propdata[2] = cc[0];
5127    propdata[3] = cc[1];    propdata[3] = cc[1];
# Line 4476  switch(type) Line 5133  switch(type)
5133    
5134    case OP_ANYNL:    case OP_ANYNL:
5135    detect_partial_match(common, backtracks);    detect_partial_match(common, backtracks);
5136    read_char(common);    read_char_range(common, common->bsr_nlmin, common->bsr_nlmax, FALSE);
5137    jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);    jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
5138    /* We don't need to handle soft partial matching case. */    /* We don't need to handle soft partial matching case. */
5139    end_list = NULL;    end_list = NULL;
# Line 4498  switch(type) Line 5155  switch(type)
5155    case OP_NOT_HSPACE:    case OP_NOT_HSPACE:
5156    case OP_HSPACE:    case OP_HSPACE:
5157    detect_partial_match(common, backtracks);    detect_partial_match(common, backtracks);
5158    read_char(common);    read_char_range(common, 0x9, 0x3000, type == OP_NOT_HSPACE);
5159    add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));    add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));
5160    add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));    add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
5161    return cc;    return cc;
# Line 4506  switch(type) Line 5163  switch(type)
5163    case OP_NOT_VSPACE:    case OP_NOT_VSPACE:
5164    case OP_VSPACE:    case OP_VSPACE:
5165    detect_partial_match(common, backtracks);    detect_partial_match(common, backtracks);
5166    read_char(common);    read_char_range(common, 0xa, 0x2029, type == OP_NOT_VSPACE);
5167    add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));    add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));
5168    add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));    add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
5169    return cc;    return cc;
# Line 4605  switch(type) Line 5262  switch(type)
5262      else      else
5263        {        {
5264        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, STR_PTR, 0);        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, STR_PTR, 0);
5265        read_char(common);        read_char_range(common, common->nlmin, common->nlmax, TRUE);
5266        add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, STR_END, 0));        add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
5267        add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));        add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
5268        add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));        add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
# Line 4653  switch(type) Line 5310  switch(type)
5310    else    else
5311      {      {
5312      skip_char_back(common);      skip_char_back(common);
5313      read_char(common);      read_char_range(common, common->nlmin, common->nlmax, TRUE);
5314      check_newlinechar(common, common->nltype, backtracks, FALSE);      check_newlinechar(common, common->nltype, backtracks, FALSE);
5315      }      }
5316    JUMPHERE(jump[0]);    JUMPHERE(jump[0]);
# Line 4704  switch(type) Line 5361  switch(type)
5361      }      }
5362    else    else
5363      {      {
5364      peek_char(common);      peek_char(common, common->nlmax);
5365      check_newlinechar(common, common->nltype, backtracks, FALSE);      check_newlinechar(common, common->nltype, backtracks, FALSE);
5366      }      }
5367    JUMPHERE(jump[0]);    JUMPHERE(jump[0]);
# Line 4728  switch(type) Line 5385  switch(type)
5385  #endif  #endif
5386      return byte_sequence_compare(common, type == OP_CHARI, cc, &context, backtracks);      return byte_sequence_compare(common, type == OP_CHARI, cc, &context, backtracks);
5387      }      }
5388    
5389    detect_partial_match(common, backtracks);    detect_partial_match(common, backtracks);
   read_char(common);  
5390  #ifdef SUPPORT_UTF  #ifdef SUPPORT_UTF
5391    if (common->utf)    if (common->utf)
5392      {      {
# Line 4738  switch(type) Line 5395  switch(type)
5395    else    else
5396  #endif  #endif
5397      c = *cc;      c = *cc;
5398    
5399    if (type == OP_CHAR || !char_has_othercase(common, cc))    if (type == OP_CHAR || !char_has_othercase(common, cc))
5400      {      {
5401        read_char_range(common, c, c, FALSE);
5402      add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c));      add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c));
5403      return cc + length;      return cc + length;
5404      }      }
5405    oc = char_othercase(common, c);    oc = char_othercase(common, c);
5406      read_char_range(common, c < oc ? c : oc, c > oc ? c : oc, FALSE);
5407    bit = c ^ oc;    bit = c ^ oc;
5408    if (is_powerof2(bit))    if (is_powerof2(bit))
5409      {      {
# Line 4751  switch(type) Line 5411  switch(type)
5411      add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));      add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
5412      return cc + length;      return cc + length;
5413      }      }
5414    OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c);    jump[0] = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c);
5415    OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);    add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, oc));
5416    OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);    JUMPHERE(jump[0]);
   OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);  
   add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));  
5417    return cc + length;    return cc + length;
5418    
5419    case OP_NOT:    case OP_NOT:
# Line 4790  switch(type) Line 5448  switch(type)
5448  #endif /* COMPILE_PCRE8 */  #endif /* COMPILE_PCRE8 */
5449        {        {
5450        GETCHARLEN(c, cc, length);        GETCHARLEN(c, cc, length);
       read_char(common);  
5451        }        }
5452      }      }
5453    else    else
5454  #endif /* SUPPORT_UTF */  #endif /* SUPPORT_UTF */
     {  
     read_char(common);  
5455      c = *cc;      c = *cc;
     }  
5456    
5457    if (type == OP_NOT || !char_has_othercase(common, cc))    if (type == OP_NOT || !char_has_othercase(common, cc))
5458        {
5459        read_char_range(common, c, c, TRUE);
5460      add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));      add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
5461        }
5462    else    else
5463      {      {
5464      oc = char_othercase(common, c);      oc = char_othercase(common, c);
5465        read_char_range(common, c < oc ? c : oc, c > oc ? c : oc, TRUE);
5466      bit = c ^ oc;      bit = c ^ oc;
5467      if (is_powerof2(bit))      if (is_powerof2(bit))
5468        {        {
# Line 4822  switch(type) Line 5480  switch(type)
5480    case OP_CLASS:    case OP_CLASS:
5481    case OP_NCLASS:    case OP_NCLASS:
5482    detect_partial_match(common, backtracks);    detect_partial_match(common, backtracks);
5483    read_char(common);  
5484    if (check_class_ranges(common, (const pcre_uint8 *)cc, type == OP_NCLASS, backtracks))  #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5485      bit = (common->utf && is_char7_bitset((const pcre_uint8 *)cc, type == OP_NCLASS)) ? 127 : 255;
5486      read_char_range(common, 0, bit, type == OP_NCLASS);
5487    #else
5488      read_char_range(common, 0, 255, type == OP_NCLASS);
5489    #endif
5490    
5491      if (check_class_ranges(common, (const pcre_uint8 *)cc, type == OP_NCLASS, FALSE, backtracks))
5492      return cc + 32 / sizeof(pcre_uchar);      return cc + 32 / sizeof(pcre_uchar);
5493    
5494  #if defined SUPPORT_UTF || !defined COMPILE_PCRE8  #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5495    jump[0] = NULL;    jump[0] = NULL;
 #ifdef COMPILE_PCRE8  
   /* This check only affects 8 bit mode. In other modes, we  
   always need to compare the value with 255. */  
5496    if (common->utf)    if (common->utf)
 #endif /* COMPILE_PCRE8 */  
5497      {      {
5498      jump[0] = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);      jump[0] = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, bit);
5499      if (type == OP_CLASS)      if (type == OP_CLASS)
5500        {        {
5501        add_jump(compiler, backtracks, jump[0]);        add_jump(compiler, backtracks, jump[0]);
5502        jump[0] = NULL;        jump[0] = NULL;
5503        }        }
5504      }      }
5505  #endif /* SUPPORT_UTF || !COMPILE_PCRE8 */  #elif !defined COMPILE_PCRE8
5506      jump[0] = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
5507      if (type == OP_CLASS)
5508        {
5509        add_jump(compiler, backtracks, jump[0]);
5510        jump[0] = NULL;
5511        }
5512    #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
5513    
5514    OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);    OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
5515    OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);    OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
5516    OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);    OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
5517    OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);    OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
5518    OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);    OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
5519    add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));    add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
5520    
5521  #if defined SUPPORT_UTF || !defined COMPILE_PCRE8  #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
5522    if (jump[0] != NULL)    if (jump[0] != NULL)
5523      JUMPHERE(jump[0]);      JUMPHERE(jump[0]);
5524  #endif /* SUPPORT_UTF || !COMPILE_PCRE8 */  #endif
5525    
5526    return cc + 32 / sizeof(pcre_uchar);    return cc + 32 / sizeof(pcre_uchar);
5527    
5528  #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32  #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
# Line 4956  if (context.length > 0) Line 5627  if (context.length > 0)
5627  return compile_char1_matchingpath(common, *cc, cc + 1, backtracks);  return compile_char1_matchingpath(common, *cc, cc + 1, backtracks);
5628  }  }
5629    
 static struct sljit_jump *compile_ref_checks(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)  
 {  
 DEFINE_COMPILER;  
 int offset = GET2(cc, 1) << 1;  
   
 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));  
 if (!common->jscript_compat)  
   {  
   if (backtracks == NULL)  
     {  
     /* OVECTOR(1) contains the "string begin - 1" constant. */  
     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));  
     OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);  
     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));  
     OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);  
     return JUMP(SLJIT_C_NOT_ZERO);  
     }  
   add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));  
   }  
 return CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));  
 }  
   
5630  /* Forward definitions. */  /* Forward definitions. */
5631  static void compile_matchingpath(compiler_common *, pcre_uchar *, pcre_uchar *, backtrack_common *);  static void compile_matchingpath(compiler_common *, pcre_uchar *, pcre_uchar *, backtrack_common *);
5632  static void compile_backtrackingpath(compiler_common *, struct backtrack_common *);  static void compile_backtrackingpath(compiler_common *, struct backtrack_common *);
# Line 5010  static void compile_backtrackingpath(com Line 5659  static void compile_backtrackingpath(com
5659    
5660  #define BACKTRACK_AS(type) ((type *)backtrack)  #define BACKTRACK_AS(type) ((type *)backtrack)
5661    
5662  static pcre_uchar *compile_ref_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks, BOOL withchecks, BOOL emptyfail)  static void compile_dnref_search(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
5663    {
5664    /* The OVECTOR offset goes to TMP2. */
5665    DEFINE_COMPILER;
5666    int count = GET2(cc, 1 + IMM2_SIZE);
5667    pcre_uchar *slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
5668    unsigned int offset;
5669    jump_list *found = NULL;
5670    
5671    SLJIT_ASSERT(*cc == OP_DNREF || *cc == OP_DNREFI);
5672    
5673    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
5674    
5675    count--;
5676    while (count-- > 0)
5677      {
5678      offset = GET2(slot, 0) << 1;
5679      GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
5680      add_jump(compiler, &found, CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0));
5681      slot += common->name_entry_size;
5682      }
5683    
5684    offset = GET2(slot, 0) << 1;
5685    GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
5686    if (backtracks != NULL && !common->jscript_compat)
5687      add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0));
5688    
5689    set_jumps(found, LABEL());
5690    }
5691    
5692    static void compile_ref_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks, BOOL withchecks, BOOL emptyfail)
5693  {  {
5694  DEFINE_COMPILER;  DEFINE_COMPILER;
5695  int offset = GET2(cc, 1) << 1;  BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
5696    int offset = 0;
5697  struct sljit_jump *jump = NULL;  struct sljit_jump *jump = NULL;
5698  struct sljit_jump *partial;  struct sljit_jump *partial;
5699  struct sljit_jump *nopartial;  struct sljit_jump *nopartial;
5700    
5701  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));  if (ref)
5702  /* OVECTOR(1) contains the "string begin - 1" constant. */    {
5703  if (withchecks && !common->jscript_compat)    offset = GET2(cc, 1) << 1;
5704    add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
5705      /* OVECTOR(1) contains the "string begin - 1" constant. */
5706      if (withchecks && !common->jscript_compat)
5707        add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
5708      }
5709    else
5710      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
5711    
5712  #if defined SUPPORT_UTF && defined SUPPORT_UCP  #if defined SUPPORT_UTF && defined SUPPORT_UCP
5713  if (common->utf && *cc == OP_REFI)  if (common->utf && *cc == OP_REFI)
5714    {    {
5715    SLJIT_ASSERT(TMP1 == SLJIT_SCRATCH_REG1 && STACK_TOP == SLJIT_SCRATCH_REG2 && TMP2 == SLJIT_SCRATCH_REG3);    SLJIT_ASSERT(TMP1 == SLJIT_SCRATCH_REG1 && STACK_TOP == SLJIT_SCRATCH_REG2 && TMP2 == SLJIT_SCRATCH_REG3);
5716    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));    if (ref)
5717        OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
5718      else
5719        OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
5720    
5721    if (withchecks)    if (withchecks)
5722      jump = CMP(SLJIT_C_EQUAL, TMP1, 0, TMP2, 0);      jump = CMP(SLJIT_C_EQUAL, TMP1, 0, TMP2, 0);
5723    
# Line 5052  if (common->utf && *cc == OP_REFI) Line 5742  if (common->utf && *cc == OP_REFI)
5742  else  else
5743  #endif /* SUPPORT_UTF && SUPPORT_UCP */  #endif /* SUPPORT_UTF && SUPPORT_UCP */
5744    {    {
5745    OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP1, 0);    if (ref)
5746        OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP1, 0);
5747      else
5748        OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
5749    
5750    if (withchecks)    if (withchecks)
5751      jump = JUMP(SLJIT_C_ZERO);      jump = JUMP(SLJIT_C_ZERO);
5752    
# Line 5089  if (jump != NULL) Line 5783  if (jump != NULL)
5783    else    else
5784      JUMPHERE(jump);      JUMPHERE(jump);
5785    }    }
 return cc + 1 + IMM2_SIZE;  
5786  }  }
5787    
5788  static SLJIT_INLINE pcre_uchar *compile_ref_iterator_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)  static SLJIT_INLINE pcre_uchar *compile_ref_iterator_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
5789  {  {
5790  DEFINE_COMPILER;  DEFINE_COMPILER;
5791    BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
5792  backtrack_common *backtrack;  backtrack_common *backtrack;
5793  pcre_uchar type;  pcre_uchar type;
5794    int offset = 0;
5795  struct sljit_label *label;  struct sljit_label *label;
5796  struct sljit_jump *zerolength;  struct sljit_jump *zerolength;
5797  struct sljit_jump *jump = NULL;  struct sljit_jump *jump = NULL;
# Line 5106  BOOL minimize; Line 5801  BOOL minimize;
5801    
5802  PUSH_BACKTRACK(sizeof(iterator_backtrack), cc, NULL);  PUSH_BACKTRACK(sizeof(iterator_backtrack), cc, NULL);
5803    
5804    if (ref)
5805      offset = GET2(cc, 1) << 1;
5806    else
5807      cc += IMM2_SIZE;
5808  type = cc[1 + IMM2_SIZE];  type = cc[1 + IMM2_SIZE];
5809    
5810    SLJIT_COMPILE_ASSERT((OP_CRSTAR & 0x1) == 0, crstar_opcode_must_be_even);
5811  minimize = (type & 0x1) != 0;  minimize = (type & 0x1) != 0;
5812  switch(type)  switch(type)
5813    {    {
# Line 5144  if (!minimize) Line 5845  if (!minimize)
5845    if (min == 0)    if (min == 0)
5846      {      {
5847      allocate_stack(common, 2);      allocate_stack(common, 2);
5848        if (ref)
5849          OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
5850      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5851      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
5852      /* Temporary release of STR_PTR. */      /* Temporary release of STR_PTR. */
5853      OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));      OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5854      zerolength = compile_ref_checks(common, ccbegin, NULL);      /* Handles both invalid and empty cases. Since the minimum repeat,
5855        is zero the invalid case is basically the same as an empty case. */
5856        if (ref)
5857          zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
5858        else
5859          {
5860          compile_dnref_search(common, ccbegin, NULL);
5861          OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
5862          OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, TMP2, 0);
5863          zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
5864          }
5865      /* Restore if not zero length. */      /* Restore if not zero length. */
5866      OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));      OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5867      }      }
5868    else    else
5869      {      {
5870      allocate_stack(common, 1);      allocate_stack(common, 1);
5871        if (ref)
5872          OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
5873      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5874      zerolength = compile_ref_checks(common, ccbegin, &backtrack->topbacktracks);      if (ref)
5875          {
5876          add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
5877          zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
5878          }
5879        else
5880          {
5881          compile_dnref_search(common, ccbegin, &backtrack->topbacktracks);
5882          OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
5883          OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, TMP2, 0);
5884          zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
5885          }
5886      }      }
5887    
5888    if (min > 1 || max > 1)    if (min > 1 || max > 1)
5889      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 0);      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 0);
5890    
5891    label = LABEL();    label = LABEL();
5892      if (!ref)
5893        OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);
5894    compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, FALSE, FALSE);    compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, FALSE, FALSE);
5895    
5896    if (min > 1 || max > 1)    if (min > 1 || max > 1)
# Line 5193  if (!minimize) Line 5921  if (!minimize)
5921    JUMPHERE(zerolength);    JUMPHERE(zerolength);
5922    BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();    BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
5923    
5924    decrease_call_count(common);    count_match(common);
5925    return cc;    return cc;
5926    }    }
5927    
5928  allocate_stack(common, 2);  allocate_stack(common, ref ? 2 : 3);
5929    if (ref)
5930      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
5931  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5932  if (type != OP_CRMINSTAR)  if (type != OP_CRMINSTAR)
5933    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
5934    
5935  if (min == 0)  if (min == 0)
5936    {    {
5937    zerolength = compile_ref_checks(common, ccbegin, NULL);    /* Handles both invalid and empty cases. Since the minimum repeat,
5938      is zero the invalid case is basically the same as an empty case. */
5939      if (ref)
5940        zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
5941      else
5942        {
5943        compile_dnref_search(common, ccbegin, NULL);
5944        OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
5945        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
5946        zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
5947        }
5948      /* Length is non-zero, we can match real repeats. */
5949    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5950    jump = JUMP(SLJIT_JUMP);    jump = JUMP(SLJIT_JUMP);
5951    }    }
5952  else  else
5953    zerolength = compile_ref_checks(common, ccbegin, &backtrack->topbacktracks);    {
5954      if (ref)
5955        {
5956        add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
5957        zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
5958        }
5959      else
5960        {
5961        compile_dnref_search(common, ccbegin, &backtrack->topbacktracks);
5962        OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
5963        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
5964        zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
5965        }
5966      }
5967    
5968  BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();  BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
5969  if (max > 0)  if (max > 0)
5970    add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max));    add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max));
5971    
5972    if (!ref)
5973      OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
5974  compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, TRUE, TRUE);  compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, TRUE, TRUE);
5975  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5976    
# Line 5232  if (jump != NULL) Line 5988  if (jump != NULL)
5988    JUMPHERE(jump);    JUMPHERE(jump);
5989  JUMPHERE(zerolength);  JUMPHERE(zerolength);
5990    
5991  decrease_call_count(common);  count_match(common);
5992  return cc;  return cc;
5993  }  }
5994    
# Line 5242  DEFINE_COMPILER; Line 5998  DEFINE_COMPILER;
5998  backtrack_common *backtrack;  backtrack_common *backtrack;
5999  recurse_entry *entry = common->entries;  recurse_entry *entry = common->entries;
6000  recurse_entry *prev = NULL;  recurse_entry *prev = NULL;
6001  int start = GET(cc, 1);  sljit_sw start = GET(cc, 1);
6002  pcre_uchar *start_cc;  pcre_uchar *start_cc;
6003    BOOL needs_control_head;
6004    
6005  PUSH_BACKTRACK(sizeof(recurse_backtrack), cc, NULL);  PUSH_BACKTRACK(sizeof(recurse_backtrack), cc, NULL);
6006    
6007  /* Inlining simple patterns. */  /* Inlining simple patterns. */
6008  if (get_framesize(common, common->start + start, TRUE) == no_stack)  if (get_framesize(common, common->start + start, NULL, TRUE, &needs_control_head) == no_stack)
6009    {    {
6010    start_cc = common->start + start;    start_cc = common->start + start;
6011    compile_matchingpath(common, next_opcode(common, start_cc), bracketend(start_cc) - (1 + LINK_SIZE), backtrack);    compile_matchingpath(common, next_opcode(common, start_cc), bracketend(start_cc) - (1 + LINK_SIZE), backtrack);
# Line 5408  static pcre_uchar *compile_assert_matchi Line 6165  static pcre_uchar *compile_assert_matchi
6165  DEFINE_COMPILER;  DEFINE_COMPILER;
6166  int framesize;  int framesize;
6167  int extrasize;  int extrasize;
6168  BOOL needs_control_head = common->control_head_ptr != 0;  BOOL needs_control_head;
6169  int private_data_ptr;  int private_data_ptr;
6170  backtrack_common altbacktrack;  backtrack_common altbacktrack;
6171  pcre_uchar *ccbegin;  pcre_uchar *ccbegin;
# Line 5418  jump_list *tmp = NULL; Line 6175  jump_list *tmp = NULL;
6175  jump_list **target = (conditional) ? &backtrack->condfailed : &backtrack->common.topbacktracks;  jump_list **target = (conditional) ? &backtrack->condfailed : &backtrack->common.topbacktracks;
6176  jump_list **found;  jump_list **found;
6177  /* Saving previous accept variables. */  /* Saving previous accept variables. */
6178    BOOL save_local_exit = common->local_exit;
6179    BOOL save_positive_assert = common->positive_assert;
6180    then_trap_backtrack *save_then_trap = common->then_trap;
6181  struct sljit_label *save_quit_label = common->quit_label;  struct sljit_label *save_quit_label = common->quit_label;
6182  struct sljit_label *save_accept_label = common->accept_label;  struct sljit_label *save_accept_label = common->accept_label;
6183  jump_list *save_quit = common->quit;  jump_list *save_quit = common->quit;
6184    jump_list *save_positive_assert_quit = common->positive_assert_quit;
6185  jump_list *save_accept = common->accept;  jump_list *save_accept = common->accept;
 BOOL save_local_exit = common->local_exit;  
6186  struct sljit_jump *jump;  struct sljit_jump *jump;
6187  struct sljit_jump *brajump = NULL;  struct sljit_jump *brajump = NULL;
6188    
6189    /* Assert captures then. */
6190    common->then_trap = NULL;
6191    
6192  if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)  if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
6193    {    {
6194    SLJIT_ASSERT(!conditional);    SLJIT_ASSERT(!conditional);
# Line 5434  if (*cc == OP_BRAZERO || *cc == OP_BRAMI Line 6197  if (*cc == OP_BRAZERO || *cc == OP_BRAMI
6197    }    }
6198  private_data_ptr = PRIVATE_DATA(cc);  private_data_ptr = PRIVATE_DATA(cc);
6199  SLJIT_ASSERT(private_data_ptr != 0);  SLJIT_ASSERT(private_data_ptr != 0);
6200  framesize = get_framesize(common, cc, FALSE);  framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
6201  backtrack->framesize = framesize;  backtrack->framesize = framesize;
6202  backtrack->private_data_ptr = private_data_ptr;  backtrack->private_data_ptr = private_data_ptr;
6203  opcode = *cc;  opcode = *cc;
# Line 5454  if (bra == OP_BRAMINZERO) Line 6217  if (bra == OP_BRAMINZERO)
6217  if (framesize < 0)  if (framesize < 0)
6218    {    {
6219    extrasize = needs_control_head ? 2 : 1;    extrasize = needs_control_head ? 2 : 1;
6220    if (framesize != no_stack)    if (framesize == no_frame)
6221      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);
6222    allocate_stack(common, extrasize);    allocate_stack(common, extrasize);
6223    if (needs_control_head)    if (needs_control_head)
# Line 5484  else Line 6247  else
6247      }      }
6248    else    else
6249      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
6250    init_frame(common, ccbegin, framesize + extrasize - 1, extrasize, FALSE);    init_frame(common, ccbegin, NULL, framesize + extrasize - 1, extrasize, FALSE);
6251    }    }
6252    
6253  memset(&altbacktrack, 0, sizeof(backtrack_common));  memset(&altbacktrack, 0, sizeof(backtrack_common));
6254  common->local_exit = TRUE;  if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6255  common->quit_label = NULL;    {
6256  common->quit = NULL;    /* Negative assert is stronger than positive assert. */
6257      common->local_exit = TRUE;
6258      common->quit_label = NULL;
6259      common->quit = NULL;
6260      common->positive_assert = FALSE;
6261      }
6262    else
6263      common->positive_assert = TRUE;
6264    common->positive_assert_quit = NULL;
6265    
6266  while (1)  while (1)
6267    {    {
6268    common->accept_label = NULL;    common->accept_label = NULL;
# Line 5505  while (1) Line 6277  while (1)
6277    compile_matchingpath(common, ccbegin + 1 + LINK_SIZE, cc, &altbacktrack);    compile_matchingpath(common, ccbegin + 1 + LINK_SIZE, cc, &altbacktrack);
6278    if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))    if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6279      {      {
6280      common->local_exit = save_local_exit;      if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6281      common->quit_label = save_quit_label;        {
6282          common->local_exit = save_local_exit;
6283          common->quit_label = save_quit_label;
6284          common->quit = save_quit;
6285          }
6286        common->positive_assert = save_positive_assert;
6287        common->then_trap = save_then_trap;
6288      common->accept_label = save_accept_label;      common->accept_label = save_accept_label;
6289      common->quit = save_quit;      common->positive_assert_quit = save_positive_assert_quit;
6290      common->accept = save_accept;      common->accept = save_accept;
6291      return NULL;      return NULL;
6292      }      }
# Line 5519  while (1) Line 6297  while (1)
6297    /* Reset stack. */    /* Reset stack. */
6298    if (framesize < 0)    if (framesize < 0)
6299      {      {
6300      if (framesize != no_stack)      if (framesize == no_frame)
6301        OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);        OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6302      else      else
6303        free_stack(common, extrasize);        free_stack(common, extrasize);
# Line 5573  while (1) Line 6351  while (1)
6351    compile_backtrackingpath(common, altbacktrack.top);    compile_backtrackingpath(common, altbacktrack.top);
6352    if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))    if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6353      {      {
6354      common->local_exit = save_local_exit;      if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6355      common->quit_label = save_quit_label;        {
6356          common->local_exit = save_local_exit;
6357          common->quit_label = save_quit_label;
6358          common->quit = save_quit;
6359          }
6360        common->positive_assert = save_positive_assert;
6361        common->then_trap = save_then_trap;
6362      common->accept_label = save_accept_label;      common->accept_label = save_accept_label;
6363      common->quit = save_quit;      common->positive_assert_quit = save_positive_assert_quit;
6364      common->accept = save_accept;      common->accept = save_accept;
6365      return NULL;      return NULL;
6366      }      }
# Line 5589  while (1) Line 6373  while (1)
6373    cc += GET(cc, 1);    cc += GET(cc, 1);
6374    }    }
6375    
6376    if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6377      {
6378      SLJIT_ASSERT(common->positive_assert_quit == NULL);
6379      /* Makes the check less complicated below. */
6380      common->positive_assert_quit = common->quit;
6381      }
6382    
6383  /* None of them matched. */  /* None of them matched. */
6384  if (common->quit != NULL)  if (common->positive_assert_quit != NULL)
6385    {    {
6386    jump = JUMP(SLJIT_JUMP);    jump = JUMP(SLJIT_JUMP);
6387    set_jumps(common->quit, LABEL());    set_jumps(common->positive_assert_quit, LABEL());
6388    SLJIT_ASSERT(framesize != no_stack);    SLJIT_ASSERT(framesize != no_stack);
6389    if (framesize < 0)    if (framesize < 0)
6390      OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, extrasize * sizeof(sljit_sw));      OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, extrasize * sizeof(sljit_sw));
# Line 5753  else Line 6544  else
6544      }      }
6545    }    }
6546    
6547  common->local_exit = save_local_exit;  if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6548  common->quit_label = save_quit_label;    {
6549      common->local_exit = save_local_exit;
6550      common->quit_label = save_quit_label;
6551      common->quit = save_quit;
6552      }
6553    common->positive_assert = save_positive_assert;
6554    common->then_trap = save_then_trap;
6555  common->accept_label = save_accept_label;  common->accept_label = save_accept_label;
6556  common->quit = save_quit;  common->positive_assert_quit = save_positive_assert_quit;
6557  common->accept = save_accept;  common->accept = save_accept;
6558  return cc + 1 + LINK_SIZE;  return cc + 1 + LINK_SIZE;
6559  }  }
6560    
6561  static sljit_sw SLJIT_CALL do_searchovector(sljit_uw refno, sljit_sw* locals, pcre_uchar *name_table)  static SLJIT_INLINE void match_once_common(compiler_common *common, pcre_uchar ket, int framesize, int private_data_ptr, BOOL has_alternatives, BOOL needs_control_head)
6562  {  {
6563  int condition = FALSE;  DEFINE_COMPILER;
6564  pcre_uchar *slotA = name_table;  int stacksize;
 pcre_uchar *slotB;  
 sljit_sw name_count = locals[LOCALS0 / sizeof(sljit_sw)];  
 sljit_sw name_entry_size = locals[LOCALS1 / sizeof(sljit_sw)];  
 sljit_sw no_capture;  
 int i;  
   
 locals += refno & 0xff;  
 refno >>= 8;  
 no_capture = locals[1];  
6565    
6566  for (i = 0; i < name_count; i++)  if (framesize < 0)
6567    {    {
6568    if (GET2(slotA, 0) == refno) break;    if (framesize == no_frame)
6569    slotA += name_entry_size;      OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6570    }    else
6571        {
6572        stacksize = needs_control_head ? 1 : 0;
6573        if (ket != OP_KET || has_alternatives)
6574          stacksize++;
6575        free_stack(common, stacksize);
6576        }
6577    
6578  if (i < name_count)    if (needs_control_head)
6579    {      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), (ket != OP_KET || has_alternatives) ? sizeof(sljit_sw) : 0);
   /* Found a name for the number - there can be only one; duplicate names  
   for different numbers are allowed, but not vice versa. First scan down  
   for duplicates. */  
6580    
6581    slotB = slotA;    /* TMP2 which is set here used by OP_KETRMAX below. */
6582    while (slotB > name_table)    if (ket == OP_KETRMAX)
6583        OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), 0);
6584      else if (ket == OP_KETRMIN)
6585      {      {
6586      slotB -= name_entry_size;      /* Move the STR_PTR to the private_data_ptr. */
6587      if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), 0);
       {  
       condition = locals[GET2(slotB, 0) << 1] != no_capture;  
       if (condition) break;  
       }  
     else break;  
6588      }      }
6589      }
6590    else
6591      {
6592      stacksize = (ket != OP_KET || has_alternatives) ? 2 : 1;
6593      OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + stacksize) * sizeof(sljit_sw));
6594      if (needs_control_head)
6595        OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), 0);
6596    
6597    /* Scan up for duplicates */    if (ket == OP_KETRMAX)
   if (!condition)  
6598      {      {
6599      slotB = slotA;      /* TMP2 which is set here used by OP_KETRMAX below. */
6600      for (i++; i < name_count; i++)      OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
       {  
       slotB += name_entry_size;  
       if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)  
         {  
         condition = locals[GET2(slotB, 0) << 1] != no_capture;  
         if (condition) break;  
         }  
       else break;  
       }  
6601      }      }
6602    }    }
6603  return condition;  if (needs_control_head)
6604      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, TMP1, 0);
6605  }  }
6606    
6607  static sljit_sw SLJIT_CALL do_searchgroups(sljit_uw recno, sljit_uw* locals, pcre_uchar *name_table)  static SLJIT_INLINE int match_capture_common(compiler_common *common, int stacksize, int offset, int private_data_ptr)
6608  {  {
6609  int condition = FALSE;  DEFINE_COMPILER;
 pcre_uchar *slotA = name_table;  
 pcre_uchar *slotB;  
 sljit_uw name_count = locals[LOCALS0 / sizeof(sljit_sw)];  
 sljit_uw name_entry_size = locals[LOCALS1 / sizeof(sljit_sw)];  
 sljit_uw group_num = locals[POSSESSIVE0 / sizeof(sljit_sw)];  
 sljit_uw i;  
   
 for (i = 0; i < name_count; i++)  
   {  
   if (GET2(slotA, 0) == recno) break;  
   slotA += name_entry_size;  
   }  
   
 if (i < name_count)  
   {  
   /* Found a name for the number - there can be only one; duplicate  
   names for different numbers are allowed, but not vice versa. First  
   scan down for duplicates. */  
   
   slotB = slotA;  
   while (slotB > name_table)  
     {  
     slotB -= name_entry_size;  
     if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)  
       {  
       condition = GET2(slotB, 0) == group_num;  
       if (condition) break;  
       }  
     else break;  
     }  
6610    
6611    /* Scan up for duplicates */  if (common->capture_last_ptr != 0)
6612    if (!condition)    {
6613      {    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
6614      slotB = slotA;    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
6615      for (i++; i < name_count; i++)    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
6616        {    stacksize++;
6617        slotB += name_entry_size;    }
6618        if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)  if (common->optimized_cbracket[offset >> 1] == 0)
6619          {    {
6620          condition = GET2(slotB, 0) == group_num;    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
6621          if (condition) break;    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
6622          }    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
6623        else break;    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6624        }    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
6625      }    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
6626      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
6627      stacksize += 2;
6628    }    }
6629  return condition;  return stacksize;
6630  }  }
6631    
6632  /*  /*
# Line 5932  backtrack_common *backtrack; Line 6690  backtrack_common *backtrack;
6690  pcre_uchar opcode;  pcre_uchar opcode;
6691  int private_data_ptr = 0;  int private_data_ptr = 0;
6692  int offset = 0;  int offset = 0;
6693  int stacksize;  int i, stacksize;
6694    int repeat_ptr = 0, repeat_length = 0;
6695    int repeat_type = 0, repeat_count = 0;
6696  pcre_uchar *ccbegin;  pcre_uchar *ccbegin;
6697  pcre_uchar *matchingpath;  pcre_uchar *matchingpath;
6698    pcre_uchar *slot;
6699  pcre_uchar bra = OP_BRA;  pcre_uchar bra = OP_BRA;
6700  pcre_uchar ket;  pcre_uchar ket;
6701  assert_backtrack *assert;  assert_backtrack *assert;
6702  BOOL has_alternatives;  BOOL has_alternatives;
6703    BOOL needs_control_head = FALSE;
6704  struct sljit_jump *jump;  struct sljit_jump *jump;
6705  struct sljit_jump *skip;  struct sljit_jump *skip;
6706  struct sljit_label *rmaxlabel = NULL;  struct sljit_label *rmax_label = NULL;
6707  struct sljit_jump *braminzerojump = NULL;  struct sljit_jump *braminzero = NULL;
6708    
6709  PUSH_BACKTRACK(sizeof(bracket_backtrack), cc, NULL);  PUSH_BACKTRACK(sizeof(bracket_backtrack), cc, NULL);
6710    
# Line 5955  if (*cc == OP_BRAZERO || *cc == OP_BRAMI Line 6717  if (*cc == OP_BRAZERO || *cc == OP_BRAMI
6717    
6718  opcode = *cc;  opcode = *cc;
6719  ccbegin = cc;  ccbegin = cc;
6720  matchingpath = ccbegin + 1 + LINK_SIZE;  matchingpath = bracketend(cc) - 1 - LINK_SIZE;
6721    ket = *matchingpath;
6722    if (ket == OP_KET && PRIVATE_DATA(matchingpath) != 0)
6723      {
6724      repeat_ptr = PRIVATE_DATA(matchingpath);
6725      repeat_length = PRIVATE_DATA(matchingpath + 1);
6726      repeat_type = PRIVATE_DATA(matchingpath + 2);
6727      repeat_count = PRIVATE_DATA(matchingpath + 3);
6728      SLJIT_ASSERT(repeat_length != 0 && repeat_type != 0 && repeat_count != 0);
6729      if (repeat_type == OP_UPTO)
6730        ket = OP_KETRMAX;
6731      if (repeat_type == OP_MINUPTO)
6732        ket = OP_KETRMIN;
6733      }
6734    
6735  if ((opcode == OP_COND || opcode == OP_SCOND) && cc[1 + LINK_SIZE] == OP_DEF)  if ((opcode == OP_COND || opcode == OP_SCOND) && cc[1 + LINK_SIZE] == OP_DEF)
6736    {    {
6737    /* Drop this bracket_backtrack. */    /* Drop this bracket_backtrack. */
6738    parent->top = backtrack->prev;    parent->top = backtrack->prev;
6739    return bracketend(cc);    return matchingpath + 1 + LINK_SIZE + repeat_length;
6740    }    }
6741    
6742  ket = *(bracketend(cc) - 1 - LINK_SIZE);  matchingpath = ccbegin + 1 + LINK_SIZE;
6743  SLJIT_ASSERT(ket == OP_KET || ket == OP_KETRMAX || ket == OP_KETRMIN);  SLJIT_ASSERT(ket == OP_KET || ket == OP_KETRMAX || ket == OP_KETRMIN);
6744  SLJIT_ASSERT(!((bra == OP_BRAZERO && ket == OP_KETRMIN) || (bra == OP_BRAMINZERO && ket == OP_KETRMAX)));  SLJIT_ASSERT(!((bra == OP_BRAZERO && ket == OP_KETRMIN) || (bra == OP_BRAMINZERO && ket == OP_KETRMAX)));
6745  cc += GET(cc, 1);  cc += GET(cc, 1);
6746    
6747  has_alternatives = *cc == OP_ALT;  has_alternatives = *cc == OP_ALT;
6748  if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))  if (SLJIT_UNLIKELY(opcode == OP_COND || opcode == OP_SCOND))
6749    {    has_alternatives = (*matchingpath == OP_RREF || *matchingpath == OP_DNRREF) ? FALSE : TRUE;
   has_alternatives = (*matchingpath == OP_RREF) ? FALSE : TRUE;  
   if (*matchingpath == OP_NRREF)  
     {  
     stacksize = GET2(matchingpath, 1);  
     if (common->currententry == NULL || stacksize == RREF_ANY)  
       has_alternatives = FALSE;  
     else if (common->currententry->start == 0)  
       has_alternatives = stacksize != 0;  
     else  
       has_alternatives = stacksize != (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);  
     }  
   }  
6750    
6751  if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))  if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
6752    opcode = OP_SCOND;    opcode = OP_SCOND;
# Line 6014  else if (opcode == OP_ONCE || opcode == Line 6777  else if (opcode == OP_ONCE || opcode ==
6777    SLJIT_ASSERT(private_data_ptr != 0);    SLJIT_ASSERT(private_data_ptr != 0);
6778    BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;    BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
6779    if (opcode == OP_ONCE)    if (opcode == OP_ONCE)
6780      BACKTRACK_AS(bracket_backtrack)->u.framesize = get_framesize(common, ccbegin, FALSE);      BACKTRACK_AS(bracket_backtrack)->u.framesize = get_framesize(common, ccbegin, NULL, FALSE, &needs_control_head);
6781    }    }
6782    
6783  /* Instructions before the first alternative. */  /* Instructions before the first alternative. */
6784  stacksize = 0;  stacksize = 0;
6785  if ((ket == OP_KETRMAX) || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))  if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
6786    stacksize++;    stacksize++;
6787  if (bra == OP_BRAZERO)  if (bra == OP_BRAZERO)
6788    stacksize++;    stacksize++;
# Line 6028  if (stacksize > 0) Line 6791  if (stacksize > 0)
6791    allocate_stack(common, stacksize);    allocate_stack(common, stacksize);
6792    
6793  stacksize = 0;  stacksize = 0;
6794  if ((ket == OP_KETRMAX) || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))  if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
6795    {    {
6796    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
6797    stacksize++;    stacksize++;
# Line 6044  if (bra ==