/[pcre]/code/trunk/pcre_jit_compile.c
ViewVC logotype

Diff of /code/trunk/pcre_jit_compile.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1277 by zherczeg, Mon Mar 11 09:50:29 2013 UTC revision 1426 by zherczeg, Wed Jan 1 13:14:19 2014 UTC
# Line 168  typedef struct jit_arguments { Line 168  typedef struct jit_arguments {
168    pcre_uchar *mark_ptr;    pcre_uchar *mark_ptr;
169    void *callout_data;    void *callout_data;
170    /* Everything else after. */    /* Everything else after. */
171      pcre_uint32 limit_match;
172    int real_offset_count;    int real_offset_count;
173    int offset_count;    int offset_count;
   int call_limit;  
174    pcre_uint8 notbol;    pcre_uint8 notbol;
175    pcre_uint8 noteol;    pcre_uint8 noteol;
176    pcre_uint8 notempty;    pcre_uint8 notempty;
# Line 182  typedef struct executable_functions { Line 182  typedef struct executable_functions {
182    PUBL(jit_callback) callback;    PUBL(jit_callback) callback;
183    void *userdata;    void *userdata;
184    pcre_uint32 top_bracket;    pcre_uint32 top_bracket;
185      pcre_uint32 limit_match;
186    sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];    sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];
187  } executable_functions;  } executable_functions;
188    
# Line 202  enum frame_types { Line 203  enum frame_types {
203  };  };
204    
205  enum control_types {  enum control_types {
206    type_commit = 0,    type_mark = 0,
207    type_prune = 1,    type_then_trap = 1
   type_skip = 2  
208  };  };
209    
210  typedef int (SLJIT_CALL *jit_function)(jit_arguments *args);  typedef int (SLJIT_CALL *jit_function)(jit_arguments *args);
211    
212  /* The following structure is the key data type for the recursive  /* The following structure is the key data type for the recursive
213  code generator. It is allocated by compile_matchingpath, and contains  code generator. It is allocated by compile_matchingpath, and contains
214  the aguments for compile_backtrackingpath. Must be the first member  the arguments for compile_backtrackingpath. Must be the first member
215  of its descendants. */  of its descendants. */
216  typedef struct backtrack_common {  typedef struct backtrack_common {
217    /* Concatenation stack. */    /* Concatenation stack. */
# Line 283  typedef struct recurse_entry { Line 283  typedef struct recurse_entry {
283    /* Collects the calls until the function is not created. */    /* Collects the calls until the function is not created. */
284    jump_list *calls;    jump_list *calls;
285    /* Points to the starting opcode. */    /* Points to the starting opcode. */
286    int start;    sljit_sw start;
287  } recurse_entry;  } recurse_entry;
288    
289  typedef struct recurse_backtrack {  typedef struct recurse_backtrack {
# Line 291  typedef struct recurse_backtrack { Line 291  typedef struct recurse_backtrack {
291    BOOL inlined_pattern;    BOOL inlined_pattern;
292  } recurse_backtrack;  } recurse_backtrack;
293    
294  #define MAX_RANGE_SIZE 6  #define OP_THEN_TRAP OP_TABLE_LENGTH
295    
296    typedef struct then_trap_backtrack {
297      backtrack_common common;
298      /* If then_trap is not NULL, this structure contains the real
299      then_trap for the backtracking path. */
300      struct then_trap_backtrack *then_trap;
301      /* Points to the starting opcode. */
302      sljit_sw start;
303      /* Exit point for the then opcodes of this alternative. */
304      jump_list *quit;
305      /* Frame size of the current alternative. */
306      int framesize;
307    } then_trap_backtrack;
308    
309    #define MAX_RANGE_SIZE 4
310    
311  typedef struct compiler_common {  typedef struct compiler_common {
312    /* The sljit ceneric compiler. */    /* The sljit ceneric compiler. */
# Line 299  typedef struct compiler_common { Line 314  typedef struct compiler_common {
314    /* First byte code. */    /* First byte code. */
315    pcre_uchar *start;    pcre_uchar *start;
316    /* Maps private data offset to each opcode. */    /* Maps private data offset to each opcode. */
317    int *private_data_ptrs;    sljit_si *private_data_ptrs;
318    /* Tells whether the capturing bracket is optimized. */    /* Tells whether the capturing bracket is optimized. */
319    pcre_uint8 *optimized_cbracket;    pcre_uint8 *optimized_cbracket;
320      /* Tells whether the starting offset is a target of then. */
321      pcre_uint8 *then_offsets;
322      /* Current position where a THEN must jump. */
323      then_trap_backtrack *then_trap;
324    /* Starting offset of private data for capturing brackets. */    /* Starting offset of private data for capturing brackets. */
325    int cbra_ptr;    int cbra_ptr;
326    /* Output vector starting point. Must be divisible by 2. */    /* Output vector starting point. Must be divisible by 2. */
# Line 330  typedef struct compiler_common { Line 349  typedef struct compiler_common {
349    sljit_sw lcc;    sljit_sw lcc;
350    /* Mode can be PCRE_STUDY_JIT_COMPILE and others. */    /* Mode can be PCRE_STUDY_JIT_COMPILE and others. */
351    int mode;    int mode;
352    /* \K is in the pattern. */    /* \K is found in the pattern. */
353    BOOL has_set_som;    BOOL has_set_som;
354      /* (*SKIP:arg) is found in the pattern. */
355      BOOL has_skip_arg;
356      /* (*THEN) is found in the pattern. */
357      BOOL has_then;
358    /* Needs to know the start position anytime. */    /* Needs to know the start position anytime. */
359    BOOL needs_start_ptr;    BOOL needs_start_ptr;
360    /* Currently in recurse or assert. */    /* Currently in recurse or negative assert. */
361    BOOL local_exit;    BOOL local_exit;
362      /* Currently in a positive assert. */
363      BOOL positive_assert;
364    /* Newline control. */    /* Newline control. */
365    int nltype;    int nltype;
366      pcre_uint32 nlmax;
367      pcre_uint32 nlmin;
368    int newline;    int newline;
369    int bsr_nltype;    int bsr_nltype;
370      pcre_uint32 bsr_nlmax;
371      pcre_uint32 bsr_nlmin;
372    /* Dollar endonly. */    /* Dollar endonly. */
373    int endonly;    int endonly;
374    /* Tables. */    /* Tables. */
375    sljit_sw ctypes;    sljit_sw ctypes;
   int digits[2 + MAX_RANGE_SIZE];  
376    /* Named capturing brackets. */    /* Named capturing brackets. */
377    sljit_uw name_table;    pcre_uchar *name_table;
378    sljit_sw name_count;    sljit_sw name_count;
379    sljit_sw name_entry_size;    sljit_sw name_entry_size;
380    
# Line 360  typedef struct compiler_common { Line 388  typedef struct compiler_common {
388    recurse_entry *currententry;    recurse_entry *currententry;
389    jump_list *partialmatch;    jump_list *partialmatch;
390    jump_list *quit;    jump_list *quit;
391      jump_list *positive_assert_quit;
392    jump_list *forced_quit;    jump_list *forced_quit;
393    jump_list *accept;    jump_list *accept;
394    jump_list *calllimit;    jump_list *calllimit;
# Line 378  typedef struct compiler_common { Line 407  typedef struct compiler_common {
407  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
408    BOOL use_ucp;    BOOL use_ucp;
409  #endif  #endif
 #ifndef COMPILE_PCRE32  
   jump_list *utfreadchar;  
 #endif  
410  #ifdef COMPILE_PCRE8  #ifdef COMPILE_PCRE8
411      jump_list *utfreadchar;
412      jump_list *utfreadchar16;
413    jump_list *utfreadtype8;    jump_list *utfreadtype8;
414  #endif  #endif
415  #endif /* SUPPORT_UTF */  #endif /* SUPPORT_UTF */
# Line 438  typedef struct compare_context { Line 466  typedef struct compare_context {
466  #define STACK_TOP     SLJIT_SCRATCH_REG2  #define STACK_TOP     SLJIT_SCRATCH_REG2
467  #define STACK_LIMIT   SLJIT_SAVED_REG3  #define STACK_LIMIT   SLJIT_SAVED_REG3
468  #define ARGUMENTS     SLJIT_SAVED_EREG1  #define ARGUMENTS     SLJIT_SAVED_EREG1
469  #define CALL_COUNT    SLJIT_SAVED_EREG2  #define COUNT_MATCH   SLJIT_SAVED_EREG2
470  #define RETURN_ADDR   SLJIT_TEMPORARY_EREG1  #define RETURN_ADDR   SLJIT_TEMPORARY_EREG1
471    
472  /* Local space layout. */  /* Local space layout. */
# Line 449  typedef struct compare_context { Line 477  typedef struct compare_context {
477  #define POSSESSIVE0      (2 * sizeof(sljit_sw))  #define POSSESSIVE0      (2 * sizeof(sljit_sw))
478  #define POSSESSIVE1      (3 * sizeof(sljit_sw))  #define POSSESSIVE1      (3 * sizeof(sljit_sw))
479  /* Max limit of recursions. */  /* Max limit of recursions. */
480  #define CALL_LIMIT       (4 * sizeof(sljit_sw))  #define LIMIT_MATCH      (4 * sizeof(sljit_sw))
481  /* The output vector is stored on the stack, and contains pointers  /* The output vector is stored on the stack, and contains pointers
482  to characters. The vector data is divided into two groups: the first  to characters. The vector data is divided into two groups: the first
483  group contains the start / end character pointers, and the second is  group contains the start / end character pointers, and the second is
484  the start pointers when the end of the capturing group has not yet reached. */  the start pointers when the end of the capturing group has not yet reached. */
485  #define OVECTOR_START    (common->ovector_start)  #define OVECTOR_START    (common->ovector_start)
486  #define OVECTOR(i)       (OVECTOR_START + (i) * sizeof(sljit_sw))  #define OVECTOR(i)       (OVECTOR_START + (i) * (sljit_sw)sizeof(sljit_sw))
487  #define OVECTOR_PRIV(i)  (common->cbra_ptr + (i) * sizeof(sljit_sw))  #define OVECTOR_PRIV(i)  (common->cbra_ptr + (i) * (sljit_sw)sizeof(sljit_sw))
488  #define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start])  #define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start])
489    
490  #if defined COMPILE_PCRE8  #if defined COMPILE_PCRE8
# Line 498  the start pointers when the end of the c Line 526  the start pointers when the end of the c
526  #define GET_LOCAL_BASE(dst, dstw, offset) \  #define GET_LOCAL_BASE(dst, dstw, offset) \
527    sljit_get_local_base(compiler, (dst), (dstw), (offset))    sljit_get_local_base(compiler, (dst), (dstw), (offset))
528    
529    #define READ_CHAR_MAX 0x7fffffff
530    
531  static pcre_uchar* bracketend(pcre_uchar* cc)  static pcre_uchar* bracketend(pcre_uchar* cc)
532  {  {
533  SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));  SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
# Line 507  cc += 1 + LINK_SIZE; Line 537  cc += 1 + LINK_SIZE;
537  return cc;  return cc;
538  }  }
539    
540    static int ones_in_half_byte[16] = {
541      /* 0 */ 0, 1, 1, 2, /* 4 */ 1, 2, 2, 3,
542      /* 8 */ 1, 2, 2, 3, /* 12 */ 2, 3, 3, 4
543    };
544    
545  /* Functions whose might need modification for all new supported opcodes:  /* Functions whose might need modification for all new supported opcodes:
546   next_opcode   next_opcode
547   get_private_data_length   check_opcode_types
548   set_private_data_ptrs   set_private_data_ptrs
549   get_framesize   get_framesize
550   init_frame   init_frame
# Line 559  switch(*cc) Line 594  switch(*cc)
594    case OP_CRMINQUERY:    case OP_CRMINQUERY:
595    case OP_CRRANGE:    case OP_CRRANGE:
596    case OP_CRMINRANGE:    case OP_CRMINRANGE:
597      case OP_CRPOSSTAR:
598      case OP_CRPOSPLUS:
599      case OP_CRPOSQUERY:
600      case OP_CRPOSRANGE:
601    case OP_CLASS:    case OP_CLASS:
602    case OP_NCLASS:    case OP_NCLASS:
603    case OP_REF:    case OP_REF:
604    case OP_REFI:    case OP_REFI:
605      case OP_DNREF:
606      case OP_DNREFI:
607    case OP_RECURSE:    case OP_RECURSE:
608    case OP_CALLOUT:    case OP_CALLOUT:
609    case OP_ALT:    case OP_ALT:
# Line 588  switch(*cc) Line 629  switch(*cc)
629    case OP_SCBRAPOS:    case OP_SCBRAPOS:
630    case OP_SCOND:    case OP_SCOND:
631    case OP_CREF:    case OP_CREF:
632    case OP_NCREF:    case OP_DNCREF:
633    case OP_RREF:    case OP_RREF:
634    case OP_NRREF:    case OP_DNRREF:
635    case OP_DEF:    case OP_DEF:
636    case OP_BRAZERO:    case OP_BRAZERO:
637    case OP_BRAMINZERO:    case OP_BRAMINZERO:
638    case OP_BRAPOSZERO:    case OP_BRAPOSZERO:
639    case OP_PRUNE:    case OP_PRUNE:
640    case OP_SKIP:    case OP_SKIP:
641      case OP_THEN:
642    case OP_COMMIT:    case OP_COMMIT:
643    case OP_FAIL:    case OP_FAIL:
644    case OP_ACCEPT:    case OP_ACCEPT:
# Line 696  switch(*cc) Line 738  switch(*cc)
738    
739    case OP_MARK:    case OP_MARK:
740    case OP_PRUNE_ARG:    case OP_PRUNE_ARG:
741      case OP_SKIP_ARG:
742      case OP_THEN_ARG:
743    return cc + 1 + 2 + cc[1];    return cc + 1 + 2 + cc[1];
744    
745    default:    default:
746      /* All opcodes are supported now! */
747      SLJIT_ASSERT_STOP();
748    return NULL;    return NULL;
749    }    }
750  }  }
751    
752    static BOOL check_opcode_types(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
753    {
754    int count;
755    pcre_uchar *slot;
756    
757    /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
758    while (cc < ccend)
759      {
760      switch(*cc)
761        {
762        case OP_SET_SOM:
763        common->has_set_som = TRUE;
764        cc += 1;
765        break;
766    
767        case OP_REF:
768        case OP_REFI:
769        common->optimized_cbracket[GET2(cc, 1)] = 0;
770        cc += 1 + IMM2_SIZE;
771        break;
772    
773        case OP_CBRAPOS:
774        case OP_SCBRAPOS:
775        common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] = 0;
776        cc += 1 + LINK_SIZE + IMM2_SIZE;
777        break;
778    
779        case OP_COND:
780        case OP_SCOND:
781        /* Only AUTO_CALLOUT can insert this opcode. We do
782           not intend to support this case. */
783        if (cc[1 + LINK_SIZE] == OP_CALLOUT)
784          return FALSE;
785        cc += 1 + LINK_SIZE;
786        break;
787    
788        case OP_CREF:
789        common->optimized_cbracket[GET2(cc, 1)] = 0;
790        cc += 1 + IMM2_SIZE;
791        break;
792    
793        case OP_DNREF:
794        case OP_DNREFI:
795        case OP_DNCREF:
796        count = GET2(cc, 1 + IMM2_SIZE);
797        slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
798        while (count-- > 0)
799          {
800          common->optimized_cbracket[GET2(slot, 0)] = 0;
801          slot += common->name_entry_size;
802          }
803        cc += 1 + 2 * IMM2_SIZE;
804        break;
805    
806        case OP_RECURSE:
807        /* Set its value only once. */
808        if (common->recursive_head_ptr == 0)
809          {
810          common->recursive_head_ptr = common->ovector_start;
811          common->ovector_start += sizeof(sljit_sw);
812          }
813        cc += 1 + LINK_SIZE;
814        break;
815    
816        case OP_CALLOUT:
817        if (common->capture_last_ptr == 0)
818          {
819          common->capture_last_ptr = common->ovector_start;
820          common->ovector_start += sizeof(sljit_sw);
821          }
822        cc += 2 + 2 * LINK_SIZE;
823        break;
824    
825        case OP_THEN_ARG:
826        common->has_then = TRUE;
827        common->control_head_ptr = 1;
828        /* Fall through. */
829    
830        case OP_PRUNE_ARG:
831        common->needs_start_ptr = TRUE;
832        /* Fall through. */
833    
834        case OP_MARK:
835        if (common->mark_ptr == 0)
836          {
837          common->mark_ptr = common->ovector_start;
838          common->ovector_start += sizeof(sljit_sw);
839          }
840        cc += 1 + 2 + cc[1];
841        break;
842    
843        case OP_THEN:
844        common->has_then = TRUE;
845        common->control_head_ptr = 1;
846        /* Fall through. */
847    
848        case OP_PRUNE:
849        case OP_SKIP:
850        common->needs_start_ptr = TRUE;
851        cc += 1;
852        break;
853    
854        case OP_SKIP_ARG:
855        common->control_head_ptr = 1;
856        common->has_skip_arg = TRUE;
857        cc += 1 + 2 + cc[1];
858        break;
859    
860        default:
861        cc = next_opcode(common, cc);
862        if (cc == NULL)
863          return FALSE;
864        break;
865        }
866      }
867    return TRUE;
868    }
869    
870    static int get_class_iterator_size(pcre_uchar *cc)
871    {
872    switch(*cc)
873      {
874      case OP_CRSTAR:
875      case OP_CRPLUS:
876      return 2;
877    
878      case OP_CRMINSTAR:
879      case OP_CRMINPLUS:
880      case OP_CRQUERY:
881      case OP_CRMINQUERY:
882      return 1;
883    
884      case OP_CRRANGE:
885      case OP_CRMINRANGE:
886      if (GET2(cc, 1) == GET2(cc, 1 + IMM2_SIZE))
887        return 0;
888      return 2;
889    
890      default:
891      return 0;
892      }
893    }
894    
895    static BOOL detect_repeat(compiler_common *common, pcre_uchar *begin)
896    {
897    pcre_uchar *end = bracketend(begin);
898    pcre_uchar *next;
899    pcre_uchar *next_end;
900    pcre_uchar *max_end;
901    pcre_uchar type;
902    sljit_sw length = end - begin;
903    int min, max, i;
904    
905    /* Detect fixed iterations first. */
906    if (end[-(1 + LINK_SIZE)] != OP_KET)
907      return FALSE;
908    
909    /* Already detected repeat. */
910    if (common->private_data_ptrs[end - common->start - LINK_SIZE] != 0)
911      return TRUE;
912    
913    next = end;
914    min = 1;
915    while (1)
916      {
917      if (*next != *begin)
918        break;
919      next_end = bracketend(next);
920      if (next_end - next != length || memcmp(begin, next, IN_UCHARS(length)) != 0)
921        break;
922      next = next_end;
923      min++;
924      }
925    
926    if (min == 2)
927      return FALSE;
928    
929    max = 0;
930    max_end = next;
931    if (*next == OP_BRAZERO || *next == OP_BRAMINZERO)
932      {
933      type = *next;
934      while (1)
935        {
936        if (next[0] != type || next[1] != OP_BRA || next[2 + LINK_SIZE] != *begin)
937          break;
938        next_end = bracketend(next + 2 + LINK_SIZE);
939        if (next_end - next != (length + 2 + LINK_SIZE) || memcmp(begin, next + 2 + LINK_SIZE, IN_UCHARS(length)) != 0)
940          break;
941        next = next_end;
942        max++;
943        }
944    
945      if (next[0] == type && next[1] == *begin && max >= 1)
946        {
947        next_end = bracketend(next + 1);
948        if (next_end - next == (length + 1) && memcmp(begin, next + 1, IN_UCHARS(length)) == 0)
949          {
950          for (i = 0; i < max; i++, next_end += 1 + LINK_SIZE)
951            if (*next_end != OP_KET)
952              break;
953    
954          if (i == max)
955            {
956            common->private_data_ptrs[max_end - common->start - LINK_SIZE] = next_end - max_end;
957            common->private_data_ptrs[max_end - common->start - LINK_SIZE + 1] = (type == OP_BRAZERO) ? OP_UPTO : OP_MINUPTO;
958            /* +2 the original and the last. */
959            common->private_data_ptrs[max_end - common->start - LINK_SIZE + 2] = max + 2;
960            if (min == 1)
961              return TRUE;
962            min--;
963            max_end -= (1 + LINK_SIZE) + GET(max_end, -LINK_SIZE);
964            }
965          }
966        }
967      }
968    
969    if (min >= 3)
970      {
971      common->private_data_ptrs[end - common->start - LINK_SIZE] = max_end - end;
972      common->private_data_ptrs[end - common->start - LINK_SIZE + 1] = OP_EXACT;
973      common->private_data_ptrs[end - common->start - LINK_SIZE + 2] = min;
974      return TRUE;
975      }
976    
977    return FALSE;
978    }
979    
980  #define CASE_ITERATOR_PRIVATE_DATA_1 \  #define CASE_ITERATOR_PRIVATE_DATA_1 \
981      case OP_MINSTAR: \      case OP_MINSTAR: \
982      case OP_MINPLUS: \      case OP_MINPLUS: \
# Line 755  switch(*cc) Line 1029  switch(*cc)
1029      case OP_TYPEUPTO: \      case OP_TYPEUPTO: \
1030      case OP_TYPEMINUPTO:      case OP_TYPEMINUPTO:
1031    
1032  static int get_class_iterator_size(pcre_uchar *cc)  static void set_private_data_ptrs(compiler_common *common, int *private_data_start, pcre_uchar *ccend)
 {  
 switch(*cc)  
   {  
   case OP_CRSTAR:  
   case OP_CRPLUS:  
   return 2;  
   
   case OP_CRMINSTAR:  
   case OP_CRMINPLUS:  
   case OP_CRQUERY:  
   case OP_CRMINQUERY:  
   return 1;  
   
   case OP_CRRANGE:  
   case OP_CRMINRANGE:  
   if (GET2(cc, 1) == GET2(cc, 1 + IMM2_SIZE))  
     return 0;  
   return 2;  
   
   default:  
   return 0;  
   }  
 }  
   
 static int get_private_data_length(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)  
1033  {  {
1034  int private_data_length = 0;  pcre_uchar *cc = common->start;
1035  pcre_uchar *alternative;  pcre_uchar *alternative;
 pcre_uchar *name;  
1036  pcre_uchar *end = NULL;  pcre_uchar *end = NULL;
1037  int space, size, i;  int private_data_ptr = *private_data_start;
1038  pcre_uint32 bracketlen;  int space, size, bracketlen;
1039    
 /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */  
1040  while (cc < ccend)  while (cc < ccend)
1041    {    {
1042    space = 0;    space = 0;
1043    size = 0;    size = 0;
1044    bracketlen = 0;    bracketlen = 0;
1045      if (private_data_ptr > SLJIT_MAX_LOCAL_SIZE)
1046        return;
1047    
1048      if (*cc == OP_ONCE || *cc == OP_ONCE_NC || *cc == OP_BRA || *cc == OP_CBRA || *cc == OP_COND)
1049        if (detect_repeat(common, cc))
1050          {
1051          /* These brackets are converted to repeats, so no global
1052          based single character repeat is allowed. */
1053          if (cc >= end)
1054            end = bracketend(cc);
1055          }
1056    
1057    switch(*cc)    switch(*cc)
1058      {      {
1059      case OP_SET_SOM:      case OP_KET:
1060      common->has_set_som = TRUE;      if (common->private_data_ptrs[cc + 1 - common->start] != 0)
1061      cc += 1;        {
1062      break;        common->private_data_ptrs[cc - common->start] = private_data_ptr;
1063          private_data_ptr += sizeof(sljit_sw);
1064      case OP_REF:        cc += common->private_data_ptrs[cc + 1 - common->start];
1065      case OP_REFI:        }
1066      common->optimized_cbracket[GET2(cc, 1)] = 0;      cc += 1 + LINK_SIZE;
     cc += 1 + IMM2_SIZE;  
1067      break;      break;
1068    
1069      case OP_ASSERT:      case OP_ASSERT:
# Line 817  while (cc < ccend) Line 1075  while (cc < ccend)
1075      case OP_BRAPOS:      case OP_BRAPOS:
1076      case OP_SBRA:      case OP_SBRA:
1077      case OP_SBRAPOS:      case OP_SBRAPOS:
1078      private_data_length += sizeof(sljit_sw);      case OP_SCOND:
1079        common->private_data_ptrs[cc - common->start] = private_data_ptr;
1080        private_data_ptr += sizeof(sljit_sw);
1081      bracketlen = 1 + LINK_SIZE;      bracketlen = 1 + LINK_SIZE;
1082      break;      break;
1083    
1084      case OP_CBRAPOS:      case OP_CBRAPOS:
1085      case OP_SCBRAPOS:      case OP_SCBRAPOS:
1086      private_data_length += sizeof(sljit_sw);      common->private_data_ptrs[cc - common->start] = private_data_ptr;
1087      common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] = 0;      private_data_ptr += sizeof(sljit_sw);
1088      bracketlen = 1 + LINK_SIZE + IMM2_SIZE;      bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1089      break;      break;
1090    
1091      case OP_COND:      case OP_COND:
1092      case OP_SCOND:      /* Might be a hidden SCOND. */
1093      /* Only AUTO_CALLOUT can insert this opcode. We do      alternative = cc + GET(cc, 1);
1094         not intend to support this case. */      if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
     if (cc[1 + LINK_SIZE] == OP_CALLOUT)  
       return -1;  
   
     if (*cc == OP_COND)  
1095        {        {
1096        /* Might be a hidden SCOND. */        common->private_data_ptrs[cc - common->start] = private_data_ptr;
1097        alternative = cc + GET(cc, 1);        private_data_ptr += sizeof(sljit_sw);
       if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)  
         private_data_length += sizeof(sljit_sw);  
       }  
     else  
       private_data_length += sizeof(sljit_sw);  
     bracketlen = 1 + LINK_SIZE;  
     break;  
   
     case OP_CREF:  
     i = GET2(cc, 1);  
     common->optimized_cbracket[i] = 0;  
     cc += 1 + IMM2_SIZE;  
     break;  
   
     case OP_NCREF:  
     bracketlen = GET2(cc, 1);  
     name = (pcre_uchar *)common->name_table;  
     alternative = name;  
     for (i = 0; i < common->name_count; i++)  
       {  
       if (GET2(name, 0) == bracketlen) break;  
       name += common->name_entry_size;  
       }  
     SLJIT_ASSERT(i != common->name_count);  
   
     for (i = 0; i < common->name_count; i++)  
       {  
       if (STRCMP_UC_UC(alternative + IMM2_SIZE, name + IMM2_SIZE) == 0)  
         common->optimized_cbracket[GET2(alternative, 0)] = 0;  
       alternative += common->name_entry_size;  
       }  
     bracketlen = 0;  
     cc += 1 + IMM2_SIZE;  
     break;  
   
     case OP_BRA:  
     bracketlen = 1 + LINK_SIZE;  
     break;  
   
     case OP_CBRA:  
     case OP_SCBRA:  
     bracketlen = 1 + LINK_SIZE + IMM2_SIZE;  
     break;  
   
     CASE_ITERATOR_PRIVATE_DATA_1  
     space = 1;  
     size = -2;  
     break;  
   
     CASE_ITERATOR_PRIVATE_DATA_2A  
     space = 2;  
     size = -2;  
     break;  
   
     CASE_ITERATOR_PRIVATE_DATA_2B  
     space = 2;  
     size = -(2 + IMM2_SIZE);  
     break;  
   
     CASE_ITERATOR_TYPE_PRIVATE_DATA_1  
     space = 1;  
     size = 1;  
     break;  
   
     CASE_ITERATOR_TYPE_PRIVATE_DATA_2A  
     if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)  
       space = 2;  
     size = 1;  
     break;  
   
     CASE_ITERATOR_TYPE_PRIVATE_DATA_2B  
     if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)  
       space = 2;  
     size = 1 + IMM2_SIZE;  
     break;  
   
     case OP_CLASS:  
     case OP_NCLASS:  
     size += 1 + 32 / sizeof(pcre_uchar);  
     space = get_class_iterator_size(cc + size);  
     break;  
   
 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8  
     case OP_XCLASS:  
     size = GET(cc, 1);  
     space = get_class_iterator_size(cc + size);  
     break;  
 #endif  
   
     case OP_RECURSE:  
     /* Set its value only once. */  
     if (common->recursive_head_ptr == 0)  
       {  
       common->recursive_head_ptr = common->ovector_start;  
       common->ovector_start += sizeof(sljit_sw);  
       }  
     cc += 1 + LINK_SIZE;  
     break;  
   
     case OP_CALLOUT:  
     if (common->capture_last_ptr == 0)  
       {  
       common->capture_last_ptr = common->ovector_start;  
       common->ovector_start += sizeof(sljit_sw);  
       }  
     cc += 2 + 2 * LINK_SIZE;  
     break;  
   
     case OP_PRUNE_ARG:  
     common->needs_start_ptr = TRUE;  
     common->control_head_ptr = 1;  
     /* Fall through. */  
   
     case OP_MARK:  
     if (common->mark_ptr == 0)  
       {  
       common->mark_ptr = common->ovector_start;  
       common->ovector_start += sizeof(sljit_sw);  
       }  
     cc += 1 + 2 + cc[1];  
     break;  
   
     case OP_PRUNE:  
     case OP_SKIP:  
     common->needs_start_ptr = TRUE;  
     /* Fall through. */  
   
     case OP_COMMIT:  
     common->control_head_ptr = 1;  
     cc += 1;  
     break;  
   
     default:  
     cc = next_opcode(common, cc);  
     if (cc == NULL)  
       return -1;  
     break;  
     }  
   
   if (space > 0 && cc >= end)  
     private_data_length += sizeof(sljit_sw) * space;  
   
   if (size != 0)  
     {  
     if (size < 0)  
       {  
       cc += -size;  
 #ifdef SUPPORT_UTF  
       if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);  
 #endif  
       }  
     else  
       cc += size;  
     }  
   
   if (bracketlen != 0)  
     {  
     if (cc >= end)  
       {  
       end = bracketend(cc);  
       if (end[-1 - LINK_SIZE] == OP_KET)  
         end = NULL;  
       }  
     cc += bracketlen;  
     }  
   }  
 return private_data_length;  
 }  
   
 static void set_private_data_ptrs(compiler_common *common, int private_data_ptr, pcre_uchar *ccend)  
 {  
 pcre_uchar *cc = common->start;  
 pcre_uchar *alternative;  
 pcre_uchar *end = NULL;  
 int space, size, bracketlen;  
   
 while (cc < ccend)  
   {  
   space = 0;  
   size = 0;  
   bracketlen = 0;  
   switch(*cc)  
     {  
     case OP_ASSERT:  
     case OP_ASSERT_NOT:  
     case OP_ASSERTBACK:  
     case OP_ASSERTBACK_NOT:  
     case OP_ONCE:  
     case OP_ONCE_NC:  
     case OP_BRAPOS:  
     case OP_SBRA:  
     case OP_SBRAPOS:  
     case OP_SCOND:  
     common->private_data_ptrs[cc - common->start] = private_data_ptr;  
     private_data_ptr += sizeof(sljit_sw);  
     bracketlen = 1 + LINK_SIZE;  
     break;  
   
     case OP_CBRAPOS:  
     case OP_SCBRAPOS:  
     common->private_data_ptrs[cc - common->start] = private_data_ptr;  
     private_data_ptr += sizeof(sljit_sw);  
     bracketlen = 1 + LINK_SIZE + IMM2_SIZE;  
     break;  
   
     case OP_COND:  
     /* Might be a hidden SCOND. */  
     alternative = cc + GET(cc, 1);  
     if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)  
       {  
       common->private_data_ptrs[cc - common->start] = private_data_ptr;  
       private_data_ptr += sizeof(sljit_sw);  
1098        }        }
1099      bracketlen = 1 + LINK_SIZE;      bracketlen = 1 + LINK_SIZE;
1100      break;      break;
# Line 1115  while (cc < ccend) Line 1159  while (cc < ccend)
1159      break;      break;
1160      }      }
1161    
1162      /* Character iterators, which are not inside a repeated bracket,
1163         gets a private slot instead of allocating it on the stack. */
1164    if (space > 0 && cc >= end)    if (space > 0 && cc >= end)
1165      {      {
1166      common->private_data_ptrs[cc - common->start] = private_data_ptr;      common->private_data_ptrs[cc - common->start] = private_data_ptr;
# Line 1145  while (cc < ccend) Line 1191  while (cc < ccend)
1191      cc += bracketlen;      cc += bracketlen;
1192      }      }
1193    }    }
1194    *private_data_start = private_data_ptr;
1195  }  }
1196    
1197  /* Returns with a frame_types (always < 0) if no need for frame. */  /* Returns with a frame_types (always < 0) if no need for frame. */
1198  static int get_framesize(compiler_common *common, pcre_uchar *cc, BOOL recursive, BOOL* needs_control_head)  static int get_framesize(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL recursive, BOOL* needs_control_head)
1199  {  {
 pcre_uchar *ccend = bracketend(cc) - (1 + LINK_SIZE);  
1200  int length = 0;  int length = 0;
1201  int possessive = 0;  int possessive = 0;
1202  BOOL stack_restore = FALSE;  BOOL stack_restore = FALSE;
# Line 1166  SLJIT_ASSERT(common->control_head_ptr != Line 1212  SLJIT_ASSERT(common->control_head_ptr !=
1212  *needs_control_head = FALSE;  *needs_control_head = FALSE;
1213  #endif  #endif
1214    
1215  if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))  if (ccend == NULL)
1216    {    {
1217    possessive = length = (common->capture_last_ptr != 0) ? 5 : 3;    ccend = bracketend(cc) - (1 + LINK_SIZE);
1218    /* This is correct regardless of common->capture_last_ptr. */    if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))
1219    capture_last_found = TRUE;      {
1220        possessive = length = (common->capture_last_ptr != 0) ? 5 : 3;
1221        /* This is correct regardless of common->capture_last_ptr. */
1222        capture_last_found = TRUE;
1223        }
1224      cc = next_opcode(common, cc);
1225    }    }
1226    
 cc = next_opcode(common, cc);  
1227  SLJIT_ASSERT(cc != NULL);  SLJIT_ASSERT(cc != NULL);
1228  while (cc < ccend)  while (cc < ccend)
1229    switch(*cc)    switch(*cc)
# Line 1191  while (cc < ccend) Line 1241  while (cc < ccend)
1241    
1242      case OP_MARK:      case OP_MARK:
1243      case OP_PRUNE_ARG:      case OP_PRUNE_ARG:
1244        case OP_THEN_ARG:
1245      SLJIT_ASSERT(common->mark_ptr != 0);      SLJIT_ASSERT(common->mark_ptr != 0);
1246      stack_restore = TRUE;      stack_restore = TRUE;
1247      if (!setmark_found)      if (!setmark_found)
# Line 1237  while (cc < ccend) Line 1288  while (cc < ccend)
1288      cc += 1 + LINK_SIZE + IMM2_SIZE;      cc += 1 + LINK_SIZE + IMM2_SIZE;
1289      break;      break;
1290    
     case OP_PRUNE:  
     case OP_SKIP:  
     case OP_COMMIT:  
     if (common->control_head_ptr != 0)  
       *needs_control_head = TRUE;  
     /* Fall through. */  
   
1291      default:      default:
1292      stack_restore = TRUE;      stack_restore = TRUE;
1293      /* Fall through. */      /* Fall through. */
# Line 1326  if (length > 0) Line 1370  if (length > 0)
1370  return stack_restore ? no_frame : no_stack;  return stack_restore ? no_frame : no_stack;
1371  }  }
1372    
1373  static void init_frame(compiler_common *common, pcre_uchar *cc, int stackpos, int stacktop, BOOL recursive)  static void init_frame(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, int stackpos, int stacktop, BOOL recursive)
1374  {  {
1375  DEFINE_COMPILER;  DEFINE_COMPILER;
 pcre_uchar *ccend = bracketend(cc) - (1 + LINK_SIZE);  
1376  BOOL setsom_found = recursive;  BOOL setsom_found = recursive;
1377  BOOL setmark_found = recursive;  BOOL setmark_found = recursive;
1378  /* The last capture is a local variable even for recursions. */  /* The last capture is a local variable even for recursions. */
# Line 1341  SLJIT_UNUSED_ARG(stacktop); Line 1384  SLJIT_UNUSED_ARG(stacktop);
1384  SLJIT_ASSERT(stackpos >= stacktop + 2);  SLJIT_ASSERT(stackpos >= stacktop + 2);
1385    
1386  stackpos = STACK(stackpos);  stackpos = STACK(stackpos);
1387  if (recursive || (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS))  if (ccend == NULL)
1388    cc = next_opcode(common, cc);    {
1389      ccend = bracketend(cc) - (1 + LINK_SIZE);
1390      if (recursive || (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS))
1391        cc = next_opcode(common, cc);
1392      }
1393    
1394  SLJIT_ASSERT(cc != NULL);  SLJIT_ASSERT(cc != NULL);
1395  while (cc < ccend)  while (cc < ccend)
1396    switch(*cc)    switch(*cc)
# Line 1363  while (cc < ccend) Line 1411  while (cc < ccend)
1411    
1412      case OP_MARK:      case OP_MARK:
1413      case OP_PRUNE_ARG:      case OP_PRUNE_ARG:
1414        case OP_THEN_ARG:
1415      SLJIT_ASSERT(common->mark_ptr != 0);      SLJIT_ASSERT(common->mark_ptr != 0);
1416      if (!setmark_found)      if (!setmark_found)
1417        {        {
# Line 1454  while (cc < ccend) Line 1503  while (cc < ccend)
1503    size = 0;    size = 0;
1504    switch(*cc)    switch(*cc)
1505      {      {
1506        case OP_KET:
1507        if (PRIVATE_DATA(cc) != 0)
1508          private_data_length++;
1509        cc += 1 + LINK_SIZE;
1510        break;
1511    
1512      case OP_ASSERT:      case OP_ASSERT:
1513      case OP_ASSERT_NOT:      case OP_ASSERT_NOT:
1514      case OP_ASSERTBACK:      case OP_ASSERTBACK:
# Line 1622  do Line 1677  do
1677    
1678      switch(*cc)      switch(*cc)
1679        {        {
1680          case OP_KET:
1681          if (PRIVATE_DATA(cc) != 0)
1682            {
1683            count = 1;
1684            srcw[0] = PRIVATE_DATA(cc);
1685            }
1686          cc += 1 + LINK_SIZE;
1687          break;
1688    
1689        case OP_ASSERT:        case OP_ASSERT:
1690        case OP_ASSERT_NOT:        case OP_ASSERT_NOT:
1691        case OP_ASSERTBACK:        case OP_ASSERTBACK:
# Line 1868  if (save) Line 1932  if (save)
1932  SLJIT_ASSERT(cc == ccend && stackptr == stacktop && (save || (tmp1empty && tmp2empty)));  SLJIT_ASSERT(cc == ccend && stackptr == stacktop && (save || (tmp1empty && tmp2empty)));
1933  }  }
1934    
1935    static SLJIT_INLINE pcre_uchar *set_then_offsets(compiler_common *common, pcre_uchar *cc, pcre_uint8 *current_offset)
1936    {
1937    pcre_uchar *end = bracketend(cc);
1938    BOOL has_alternatives = cc[GET(cc, 1)] == OP_ALT;
1939    
1940    /* Assert captures then. */
1941    if (*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT)
1942      current_offset = NULL;
1943    /* Conditional block does not. */
1944    if (*cc == OP_COND || *cc == OP_SCOND)
1945      has_alternatives = FALSE;
1946    
1947    cc = next_opcode(common, cc);
1948    if (has_alternatives)
1949      current_offset = common->then_offsets + (cc - common->start);
1950    
1951    while (cc < end)
1952      {
1953      if ((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND))
1954        cc = set_then_offsets(common, cc, current_offset);
1955      else
1956        {
1957        if (*cc == OP_ALT && has_alternatives)
1958          current_offset = common->then_offsets + (cc + 1 + LINK_SIZE - common->start);
1959        if (*cc >= OP_THEN && *cc <= OP_THEN_ARG && current_offset != NULL)
1960          *current_offset = 1;
1961        cc = next_opcode(common, cc);
1962        }
1963      }
1964    
1965    return end;
1966    }
1967    
1968  #undef CASE_ITERATOR_PRIVATE_DATA_1  #undef CASE_ITERATOR_PRIVATE_DATA_1
1969  #undef CASE_ITERATOR_PRIVATE_DATA_2A  #undef CASE_ITERATOR_PRIVATE_DATA_2A
1970  #undef CASE_ITERATOR_PRIVATE_DATA_2B  #undef CASE_ITERATOR_PRIVATE_DATA_2B
# Line 1931  while (list_item) Line 2028  while (list_item)
2028  common->stubs = NULL;  common->stubs = NULL;
2029  }  }
2030    
2031  static SLJIT_INLINE void decrease_call_count(compiler_common *common)  static SLJIT_INLINE void count_match(compiler_common *common)
2032  {  {
2033  DEFINE_COMPILER;  DEFINE_COMPILER;
2034    
2035  OP2(SLJIT_SUB | SLJIT_SET_E, CALL_COUNT, 0, CALL_COUNT, 0, SLJIT_IMM, 1);  OP2(SLJIT_SUB | SLJIT_SET_E, COUNT_MATCH, 0, COUNT_MATCH, 0, SLJIT_IMM, 1);
2036  add_jump(compiler, &common->calllimit, JUMP(SLJIT_C_ZERO));  add_jump(compiler, &common->calllimit, JUMP(SLJIT_C_ZERO));
2037  }  }
2038    
# Line 2015  else Line 2112  else
2112  OP1(SLJIT_MOV, STACK_TOP, 0, ARGUMENTS, 0);  OP1(SLJIT_MOV, STACK_TOP, 0, ARGUMENTS, 0);
2113  if (common->mark_ptr != 0)  if (common->mark_ptr != 0)
2114    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr, SLJIT_IMM, 0);    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr, SLJIT_IMM, 0);
2115  SLJIT_ASSERT(common->control_head_ptr != 0);  if (common->control_head_ptr != 0)
2116  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_IMM, 0);    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_IMM, 0);
2117  OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(jit_arguments, stack));  OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(jit_arguments, stack));
2118  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_ptr);  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_ptr);
2119  OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(struct sljit_stack, base));  OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(struct sljit_stack, base));
2120  }  }
2121    
2122  static sljit_sw SLJIT_CALL do_check_control_chain(sljit_sw *current)  static sljit_sw SLJIT_CALL do_search_mark(sljit_sw *current, const pcre_uchar *skip_arg)
2123  {  {
2124  sljit_sw return_value = 0;  while (current != NULL)
   
 SLJIT_ASSERT(current != NULL);  
 do  
2125    {    {
2126    switch (current[-2])    switch (current[-2])
2127      {      {
2128      case type_commit:      case type_then_trap:
     /* Commit overwrites all. */  
     return -1;  
   
     case type_prune:  
2129      break;      break;
2130    
2131      case type_skip:      case type_mark:
2132      /* Overwrites prune, but not other skips. */      if (STRCMP_UC_UC(skip_arg, (pcre_uchar *)current[-3]) == 0)
2133      if (return_value == 0)        return current[-4];
       return_value = current[-3];  
2134      break;      break;
2135    
2136      default:      default:
# Line 2050  do Line 2139  do
2139      }      }
2140    current = (sljit_sw*)current[-1];    current = (sljit_sw*)current[-1];
2141    }    }
2142  while (current != NULL);  return -1;
 return return_value;  
2143  }  }
2144    
2145  static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)  static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)
# Line 2291  return (bit < 256) ? ((0 << 8) | bit) : Line 2379  return (bit < 256) ? ((0 << 8) | bit) :
2379    
2380  static void check_partial(compiler_common *common, BOOL force)  static void check_partial(compiler_common *common, BOOL force)
2381  {  {
2382  /* Checks whether a partial matching is occured. Does not modify registers. */  /* Checks whether a partial matching is occurred. Does not modify registers. */
2383  DEFINE_COMPILER;  DEFINE_COMPILER;
2384  struct sljit_jump *jump = NULL;  struct sljit_jump *jump = NULL;
2385    
# Line 2378  else Line 2466  else
2466  JUMPHERE(jump);  JUMPHERE(jump);
2467  }  }
2468    
2469  static void read_char(compiler_common *common)  static void peek_char(compiler_common *common, pcre_uint32 max)
2470  {  {
2471  /* Reads the character into TMP1, updates STR_PTR.  /* Reads the character into TMP1, keeps STR_PTR.
2472  Does not check STR_END. TMP2 Destroyed. */  Does not check STR_END. TMP2 Destroyed. */
2473  DEFINE_COMPILER;  DEFINE_COMPILER;
2474  #if defined SUPPORT_UTF && !defined COMPILE_PCRE32  #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2475  struct sljit_jump *jump;  struct sljit_jump *jump;
2476  #endif  #endif
2477    
2478    SLJIT_UNUSED_ARG(max);
2479    
2480  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2481  #if defined SUPPORT_UTF && !defined COMPILE_PCRE32  #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2482  if (common->utf)  if (common->utf)
2483    {    {
2484  #if defined COMPILE_PCRE8    if (max < 128) return;
2485    
2486    jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);    jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2487  #elif defined COMPILE_PCRE16    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
   jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);  
 #endif /* COMPILE_PCRE[8|16] */  
2488    add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));    add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2489      OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2490    JUMPHERE(jump);    JUMPHERE(jump);
2491    }    }
2492  #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */  #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));  
 }  
   
 static void peek_char(compiler_common *common)  
 {  
 /* Reads the character into TMP1, keeps STR_PTR.  
 Does not check STR_END. TMP2 Destroyed. */  
 DEFINE_COMPILER;  
 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32  
 struct sljit_jump *jump;  
 #endif  
2493    
2494  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);  #if defined SUPPORT_UTF && defined COMPILE_PCRE16
 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32  
2495  if (common->utf)  if (common->utf)
2496    {    {
2497  #if defined COMPILE_PCRE8    if (max < 0xd800) return;
2498    jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);  
2499  #elif defined COMPILE_PCRE16    OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2500    jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);    jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2501  #endif /* COMPILE_PCRE[8|16] */    /* TMP2 contains the high surrogate. */
2502    add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2503    OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);    OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x40);
2504      OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
2505      OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
2506      OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2507    JUMPHERE(jump);    JUMPHERE(jump);
2508    }    }
2509  #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */  #endif
2510  }  }
2511    
2512  static void read_char8_type(compiler_common *common)  #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2513    
2514    static BOOL is_char7_bitset(const pcre_uint8* bitset, BOOL nclass)
2515    {
2516    /* Tells whether the character codes below 128 are enough
2517    to determine a match. */
2518    const pcre_uint8 value = nclass ? 0xff : 0;
2519    const pcre_uint8* end = bitset + 32;
2520    
2521    bitset += 16;
2522    do
2523      {
2524      if (*bitset++ != value)
2525        return FALSE;
2526      }
2527    while (bitset < end);
2528    return TRUE;
2529    }
2530    
2531    static void read_char7_type(compiler_common *common, BOOL full_read)
2532    {
2533    /* Reads the precise character type of a character into TMP1, if the character
2534    is less than 128. Otherwise it returns with zero. Does not check STR_END. The
2535    full_read argument tells whether characters above max are accepted or not. */
2536    DEFINE_COMPILER;
2537    struct sljit_jump *jump;
2538    
2539    SLJIT_ASSERT(common->utf);
2540    
2541    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2542    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2543    
2544    OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2545    
2546    if (full_read)
2547      {
2548      jump = CMP(SLJIT_C_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
2549      OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2550      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2551      JUMPHERE(jump);
2552      }
2553    }
2554    
2555    #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
2556    
2557    static void read_char_range(compiler_common *common, pcre_uint32 min, pcre_uint32 max, BOOL update_str_ptr)
2558    {
2559    /* Reads the precise value of a character into TMP1, if the character is
2560    between min and max (c >= min && c <= max). Otherwise it returns with a value
2561    outside the range. Does not check STR_END. */
2562    DEFINE_COMPILER;
2563    #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2564    struct sljit_jump *jump;
2565    #endif
2566    
2567    SLJIT_UNUSED_ARG(update_str_ptr);
2568    SLJIT_UNUSED_ARG(min);
2569    SLJIT_UNUSED_ARG(max);
2570    SLJIT_ASSERT(min <= max);
2571    
2572    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2573    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2574    
2575    #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2576    if (common->utf)
2577      {
2578      if (max < 128 && !update_str_ptr) return;
2579    
2580      jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2581      if (max >= 0x800)
2582        add_jump(compiler, (max < 0x10000) ? &common->utfreadchar16 : &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2583      else if (max < 128)
2584        {
2585        OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2586        OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2587        }
2588      else
2589        {
2590        OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2591        if (!update_str_ptr)
2592          OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2593        else
2594          OP1(SLJIT_MOV_UB, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2595        OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2596        OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2597        OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2598        OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2599        if (update_str_ptr)
2600          OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
2601        }
2602      JUMPHERE(jump);
2603      }
2604    #endif
2605    
2606    #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2607    if (common->utf)
2608      {
2609      if (max >= 0x10000)
2610        {
2611        OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2612        jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2613        /* TMP2 contains the high surrogate. */
2614        OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2615        OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x40);
2616        OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
2617        OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2618        OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
2619        OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2620        JUMPHERE(jump);
2621        return;
2622        }
2623    
2624      if (max < 0xd800 && !update_str_ptr) return;
2625    
2626      /* Skip low surrogate if necessary. */
2627      OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2628      jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2629      if (update_str_ptr)
2630        OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2631      if (max >= 0xd800)
2632        OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x10000);
2633      JUMPHERE(jump);
2634      }
2635    #endif
2636    }
2637    
2638    static SLJIT_INLINE void read_char(compiler_common *common)
2639    {
2640    read_char_range(common, 0, READ_CHAR_MAX, TRUE);
2641    }
2642    
2643    static void read_char8_type(compiler_common *common, BOOL update_str_ptr)
2644  {  {
2645  /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */  /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */
2646  DEFINE_COMPILER;  DEFINE_COMPILER;
2647  #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32  #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
2648  struct sljit_jump *jump;  struct sljit_jump *jump;
2649  #endif  #endif
2650    #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2651    struct sljit_jump *jump2;
2652    #endif
2653    
2654  #ifdef SUPPORT_UTF  SLJIT_UNUSED_ARG(update_str_ptr);
2655    
2656    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2657    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2658    
2659    #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2660  if (common->utf)  if (common->utf)
2661    {    {
   OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);  
   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));  
 #if defined COMPILE_PCRE8  
2662    /* This can be an extra read in some situations, but hopefully    /* This can be an extra read in some situations, but hopefully
2663    it is needed in most cases. */    it is needed in most cases. */
2664    OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);    OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2665    jump = CMP(SLJIT_C_LESS, TMP2, 0, SLJIT_IMM, 0xc0);    jump = CMP(SLJIT_C_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
2666    add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));    if (!update_str_ptr)
2667    JUMPHERE(jump);      {
2668  #elif defined COMPILE_PCRE16      OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2669    OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2670    jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);      OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2671    OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);      OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2672    JUMPHERE(jump);      OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2673    /* Skip low surrogate if necessary. */      OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
2674    OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xfc00);      OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2675    OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0xd800);      jump2 = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2676    OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);      OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2677    OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);      JUMPHERE(jump2);
2678    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);      }
2679  #elif defined COMPILE_PCRE32    else
2680    OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);      add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));
   jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);  
   OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);  
2681    JUMPHERE(jump);    JUMPHERE(jump);
 #endif /* COMPILE_PCRE[8|16|32] */  
2682    return;    return;
2683    }    }
2684  #endif /* SUPPORT_UTF */  #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
2685  OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);  
2686  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));  #if !defined COMPILE_PCRE8
 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32  
2687  /* The ctypes array contains only 256 values. */  /* The ctypes array contains only 256 values. */
2688  OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);  OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2689  jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);  jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2690  #endif  #endif
2691  OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);  OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2692  #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32  #if !defined COMPILE_PCRE8
2693  JUMPHERE(jump);  JUMPHERE(jump);
2694  #endif  #endif
2695    
2696    #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2697    if (common->utf && update_str_ptr)
2698      {
2699      /* Skip low surrogate if necessary. */
2700      OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);
2701      jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2702      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2703      JUMPHERE(jump);
2704      }
2705    #endif /* SUPPORT_UTF && COMPILE_PCRE16 */
2706  }  }
2707    
2708  static void skip_char_back(compiler_common *common)  static void skip_char_back(compiler_common *common)
# Line 2516  if (common->utf) Line 2740  if (common->utf)
2740  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2741  }  }
2742    
2743  static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpiftrue)  static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpifmatch)
2744  {  {
2745  /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */  /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */
2746  DEFINE_COMPILER;  DEFINE_COMPILER;
2747    struct sljit_jump *jump;
2748    
2749  if (nltype == NLTYPE_ANY)  if (nltype == NLTYPE_ANY)
2750    {    {
2751    add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));    add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
2752    add_jump(compiler, backtracks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));    add_jump(compiler, backtracks, JUMP(jumpifmatch ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
2753    }    }
2754  else if (nltype == NLTYPE_ANYCRLF)  else if (nltype == NLTYPE_ANYCRLF)
2755    {    {
2756    OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_CR);    if (jumpifmatch)
2757    OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);      {
2758    OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);      add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR));
2759    OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);      add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
2760    add_jump(compiler, backtracks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));      }
2761      else
2762        {
2763        jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
2764        add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
2765        JUMPHERE(jump);
2766        }
2767    }    }
2768  else  else
2769    {    {
2770    SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);    SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);
2771    add_jump(compiler, backtracks, CMP(jumpiftrue ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));    add_jump(compiler, backtracks, CMP(jumpifmatch ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
2772    }    }
2773  }  }
2774    
# Line 2547  else Line 2778  else
2778  static void do_utfreadchar(compiler_common *common)  static void do_utfreadchar(compiler_common *common)
2779  {  {
2780  /* Fast decoding a UTF-8 character. TMP1 contains the first byte  /* Fast decoding a UTF-8 character. TMP1 contains the first byte
2781  of the character (>= 0xc0). Return char value in TMP1, length - 1 in TMP2. */  of the character (>= 0xc0). Return char value in TMP1, length in TMP2. */
2782  DEFINE_COMPILER;  DEFINE_COMPILER;
2783  struct sljit_jump *jump;  struct sljit_jump *jump;
2784    
2785  sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);  sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2786    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2787    OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2788    OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2789    OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2790    OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2791    
2792  /* Searching for the first zero. */  /* Searching for the first zero. */
2793  OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);  OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
2794  jump = JUMP(SLJIT_C_NOT_ZERO);  jump = JUMP(SLJIT_C_NOT_ZERO);
2795  /* Two byte sequence. */  /* Two byte sequence. */
 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));  
2796  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2797  OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1f);  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(2));
2798    sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2799    
2800    JUMPHERE(jump);
2801    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2802    OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x800);
2803  OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);  OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2804  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2805  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));  
 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);  
 JUMPHERE(jump);  
2806    
2807  OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10);  OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10000);
2808  jump = JUMP(SLJIT_C_NOT_ZERO);  jump = JUMP(SLJIT_C_NOT_ZERO);
2809  /* Three byte sequence. */  /* Three byte sequence. */
 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));  
 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0f);  
 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 12);  
 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);  
 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);  
 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);  
 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));  
2810  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
2811  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));
 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);  
 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(2));  
2812  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
 JUMPHERE(jump);  
2813    
2814  /* Four byte sequence. */  /* Four byte sequence. */
2815  OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));  JUMPHERE(jump);
2816  OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x07);  OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2817  OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 18);  OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
2818    OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2819    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
2820  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12);  
2821  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2822  OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(4));
2823    sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2824    }
2825    
2826    static void do_utfreadchar16(compiler_common *common)
2827    {
2828    /* Fast decoding a UTF-8 character. TMP1 contains the first byte
2829    of the character (>= 0xc0). Return value in TMP1. */
2830    DEFINE_COMPILER;
2831    struct sljit_jump *jump;
2832    
2833    sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2834    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2835    OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2836    OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2837  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);  
2838  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2839  OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(3));  
2840  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));  /* Searching for the first zero. */
2841    OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
2842    jump = JUMP(SLJIT_C_NOT_ZERO);
2843    /* Two byte sequence. */
2844    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2845    sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2846    
2847    JUMPHERE(jump);
2848    OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x400);
2849    OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_NOT_ZERO);
2850    /* This code runs only in 8 bit mode. No need to shift the value. */
2851    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2852    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2853    OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x800);
2854    OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2855  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2856  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2857  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));  /* Three byte sequence. */
2858    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
2859  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2860  }  }
2861    
# Line 2618  jump = JUMP(SLJIT_C_NOT_ZERO); Line 2875  jump = JUMP(SLJIT_C_NOT_ZERO);
2875  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2876  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2877  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
2878    /* The upper 5 bits are known at this point. */
2879    compare = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0x3);
2880  OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);  OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2881  OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);  OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2882  OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);  OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
 compare = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);  
2883  OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);  OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2884  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2885    
2886  JUMPHERE(compare);  JUMPHERE(compare);
2887  OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);  OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2888  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
 JUMPHERE(jump);  
2889    
2890  /* We only have types for characters less than 256. */  /* We only have types for characters less than 256. */
 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);  
 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);  
 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);  
 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);  
 }  
   
 #elif defined COMPILE_PCRE16  
   
 static void do_utfreadchar(compiler_common *common)  
 {  
 /* Fast decoding a UTF-16 character. TMP1 contains the first 16 bit char  
 of the character (>= 0xd800). Return char value in TMP1, length - 1 in TMP2. */  
 DEFINE_COMPILER;  
 struct sljit_jump *jump;  
   
 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);  
 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xdc00);  
 /* Do nothing, only return. */  
 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);  
   
2891  JUMPHERE(jump);  JUMPHERE(jump);
2892  /* Combine two 16 bit characters. */  OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2893  OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));  OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2894  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);  
 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 10);  
 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3ff);  
 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);  
 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));  
 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);  
2895  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2896  }  }
2897    
2898  #endif /* COMPILE_PCRE[8|16] */  #endif /* COMPILE_PCRE8 */
2899    
2900  #endif /* SUPPORT_UTF */  #endif /* SUPPORT_UTF */
2901    
# Line 2739  if (firstline) Line 2970  if (firstline)
2970      mainloop = LABEL();      mainloop = LABEL();
2971      /* Continual stores does not cause data dependency. */      /* Continual stores does not cause data dependency. */
2972      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0);      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0);
2973      read_char(common);      read_char_range(common, common->nlmin, common->nlmax, TRUE);
2974      check_newlinechar(common, common->nltype, &newline, TRUE);      check_newlinechar(common, common->nltype, &newline, TRUE);
2975      CMPTO(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0, mainloop);      CMPTO(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0, mainloop);
2976      JUMPHERE(end);      JUMPHERE(end);
# Line 2815  if (newlinecheck) Line 3046  if (newlinecheck)
3046  return mainloop;  return mainloop;
3047  }  }
3048    
3049  #define MAX_N_CHARS 3  static int scan_prefix(compiler_common *common, pcre_uchar *cc, pcre_uint32 *chars, int max_chars)
   
 static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common, BOOL firstline)  
3050  {  {
3051  DEFINE_COMPILER;  /* Recursive function, which scans prefix literals. */
3052  struct sljit_label *start;  int len, repeat, len_save, consumed = 0;
3053  struct sljit_jump *quit;  pcre_uint32 caseless, chr, mask;
3054  pcre_uint32 chars[MAX_N_CHARS * 2];  pcre_uchar *alternative, *cc_save;
3055  pcre_uchar *cc = common->start + 1 + LINK_SIZE;  BOOL last, any;
 int location = 0;  
 pcre_int32 len, c, bit, caseless;  
 int must_stop;  
   
 /* We do not support alternatives now. */  
 if (*(common->start + GET(common->start, 1)) == OP_ALT)  
   return FALSE;  
3056    
3057    repeat = 1;
3058  while (TRUE)  while (TRUE)
3059    {    {
3060      last = TRUE;
3061      any = FALSE;
3062    caseless = 0;    caseless = 0;
3063    must_stop = 1;    switch (*cc)
   switch(*cc)  
3064      {      {
     case OP_CHAR:  
     must_stop = 0;  
     cc++;  
     break;  
   
3065      case OP_CHARI:      case OP_CHARI:
3066      caseless = 1;      caseless = 1;
3067      must_stop = 0;      case OP_CHAR:
3068        last = FALSE;
3069      cc++;      cc++;
3070      break;      break;
3071    
# Line 2870  while (TRUE) Line 3090  while (TRUE)
3090      cc++;      cc++;
3091      break;      break;
3092    
3093        case OP_EXACTI:
3094        caseless = 1;
3095      case OP_EXACT:      case OP_EXACT:
3096        repeat = GET2(cc, 1);
3097        last = FALSE;
3098      cc += 1 + IMM2_SIZE;      cc += 1 + IMM2_SIZE;
3099      break;      break;
3100    
# Line 2881  while (TRUE) Line 3105  while (TRUE)
3105      cc++;      cc++;
3106      break;      break;
3107    
3108      case OP_EXACTI:      case OP_KET:
3109      caseless = 1;      cc += 1 + LINK_SIZE;
3110      cc += 1 + IMM2_SIZE;      continue;
3111    
3112        case OP_ALT:
3113        cc += GET(cc, 1);
3114        continue;
3115    
3116        case OP_ONCE:
3117        case OP_ONCE_NC:
3118        case OP_BRA:
3119        case OP_BRAPOS:
3120        case OP_CBRA:
3121        case OP_CBRAPOS:
3122        alternative = cc + GET(cc, 1);
3123        while (*alternative == OP_ALT)
3124          {
3125          max_chars = scan_prefix(common, alternative + 1 + LINK_SIZE, chars, max_chars);
3126          if (max_chars == 0)
3127            return consumed;
3128          alternative += GET(alternative, 1);
3129          }
3130    
3131        if (*cc == OP_CBRA || *cc == OP_CBRAPOS)
3132          cc += IMM2_SIZE;
3133        cc += 1 + LINK_SIZE;
3134        continue;
3135    
3136        case OP_CLASS:
3137        case OP_NCLASS:
3138        any = TRUE;
3139        cc += 1 + 32 / sizeof(pcre_uchar);
3140      break;      break;
3141    
3142      default:  #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3143      must_stop = 2;      case OP_XCLASS:
3144        any = TRUE;
3145        cc += GET(cc, 1);
3146      break;      break;
3147    #endif
3148    
3149        case OP_NOT_DIGIT:
3150        case OP_DIGIT:
3151        case OP_NOT_WHITESPACE:
3152        case OP_WHITESPACE:
3153        case OP_NOT_WORDCHAR:
3154        case OP_WORDCHAR:
3155        case OP_ANY:
3156        case OP_ALLANY:
3157        any = TRUE;
3158        cc++;
3159        break;
3160    
3161    #ifdef SUPPORT_UCP
3162        case OP_NOTPROP:
3163        case OP_PROP:
3164        any = TRUE;
3165        cc += 1 + 2;
3166        break;
3167    #endif
3168    
3169        case OP_TYPEEXACT:
3170        repeat = GET2(cc, 1);
3171        cc += 1 + IMM2_SIZE;
3172        continue;
3173    
3174        default:
3175        return consumed;
3176      }      }
3177    
3178    if (must_stop == 2)    if (any)
3179        break;      {
3180    #ifdef SUPPORT_UTF
3181        if (common->utf) return consumed;
3182    #endif
3183    #if defined COMPILE_PCRE8
3184        mask = 0xff;
3185    #elif defined COMPILE_PCRE16
3186        mask = 0xffff;
3187    #elif defined COMPILE_PCRE32
3188        mask = 0xffffffff;
3189    #else
3190        SLJIT_ASSERT_STOP();
3191    #endif
3192    
3193        do
3194          {
3195          chars[0] = mask;
3196          chars[1] = mask;
3197    
3198          if (--max_chars == 0)
3199            return consumed;
3200          consumed++;
3201          chars += 2;
3202          }
3203        while (--repeat > 0);
3204    
3205        repeat = 1;
3206        continue;
3207        }
3208    
3209    len = 1;    len = 1;
3210  #ifdef SUPPORT_UTF  #ifdef SUPPORT_UTF
3211    if (common->utf && HAS_EXTRALEN(cc[0])) len += GET_EXTRALEN(cc[0]);    if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
3212  #endif  #endif
3213    
3214    if (caseless && char_has_othercase(common, cc))    if (caseless != 0 && char_has_othercase(common, cc))
3215      {      {
3216      caseless = char_get_othercase_bit(common, cc);      caseless = char_get_othercase_bit(common, cc);
3217      if (caseless == 0)      if (caseless == 0)
3218        return FALSE;        return consumed;
3219  #ifdef COMPILE_PCRE8  #ifdef COMPILE_PCRE8
3220      caseless = ((caseless & 0xff) << 8) | (len - (caseless >> 8));      caseless = ((caseless & 0xff) << 8) | (len - (caseless >> 8));
3221  #else  #else
# Line 2916  while (TRUE) Line 3228  while (TRUE)
3228    else    else
3229      caseless = 0;      caseless = 0;
3230    
3231    while (len > 0 && location < MAX_N_CHARS * 2)    len_save = len;
3232      {    cc_save = cc;
3233      c = *cc;    while (TRUE)
3234      bit = 0;      {
3235      if (len == (caseless & 0xff))      do
3236        {        {
3237        bit = caseless >> 8;        chr = *cc;
3238        c |= bit;  #ifdef COMPILE_PCRE32
3239          if (SLJIT_UNLIKELY(chr == NOTACHAR))
3240            return consumed;
3241    #endif
3242          mask = 0;
3243          if ((pcre_uint32)len == (caseless & 0xff))
3244            {
3245            mask = caseless >> 8;
3246            chr |= mask;
3247            }
3248    
3249          if (chars[0] == NOTACHAR)
3250            {
3251            chars[0] = chr;
3252            chars[1] = mask;
3253            }
3254          else
3255            {
3256            mask |= chars[0] ^ chr;
3257            chr |= mask;
3258            chars[0] = chr;
3259            chars[1] |= mask;
3260            }
3261    
3262          len--;
3263          if (--max_chars == 0)
3264            return consumed;
3265          consumed++;
3266          chars += 2;
3267          cc++;
3268        }        }
3269        while (len > 0);
3270    
3271      chars[location] = c;      if (--repeat == 0)
3272      chars[location + 1] = bit;        break;
3273    
3274      len--;      len = len_save;
3275      location += 2;      cc = cc_save;
3276      cc++;      }
3277    
3278      repeat = 1;
3279      if (last)
3280        return consumed;
3281      }
3282    }
3283    
3284    #define MAX_N_CHARS 16
3285    
3286    static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common, BOOL firstline)
3287    {
3288    DEFINE_COMPILER;
3289    struct sljit_label *start;
3290    struct sljit_jump *quit;
3291    pcre_uint32 chars[MAX_N_CHARS * 2];
3292    pcre_uint8 ones[MAX_N_CHARS];
3293    pcre_uint32 mask;
3294    int i, max;
3295    int offsets[3];
3296    
3297    for (i = 0; i < MAX_N_CHARS; i++)
3298      {
3299      chars[i << 1] = NOTACHAR;
3300      chars[(i << 1) + 1] = 0;
3301      }
3302    
3303    max = scan_prefix(common, common->start, chars, MAX_N_CHARS);
3304    
3305    if (max <= 1)
3306      return FALSE;
3307    
3308    for (i = 0; i < max; i++)
3309      {
3310      mask = chars[(i << 1) + 1];
3311      ones[i] = ones_in_half_byte[mask & 0xf];
3312      mask >>= 4;
3313      while (mask != 0)
3314        {
3315        ones[i] += ones_in_half_byte[mask & 0xf];
3316        mask >>= 4;
3317      }      }
3318      }
3319    
3320    offsets[0] = -1;
3321    /* Scan forward. */
3322    for (i = 0; i < max; i++)
3323      if (ones[i] <= 2) {
3324        offsets[0] = i;
3325        break;
3326      }
3327    
3328    if (offsets[0] == -1)
3329      return FALSE;
3330    
3331    if (location >= MAX_N_CHARS * 2 || must_stop != 0)  /* Scan backward. */
3332    offsets[1] = -1;
3333    for (i = max - 1; i > offsets[0]; i--)
3334      if (ones[i] <= 2) {
3335        offsets[1] = i;
3336      break;      break;
3337    }    }
3338    
3339  /* At least two characters are required. */  offsets[2] = -1;
3340  if (location < 2 * 2)  if (offsets[1] >= 0)
3341      return FALSE;    {
3342      /* Scan from middle. */
3343      for (i = (offsets[0] + offsets[1]) / 2 + 1; i < offsets[1]; i++)
3344        if (ones[i] <= 2)
3345          {
3346          offsets[2] = i;
3347          break;
3348          }
3349    
3350      if (offsets[2] == -1)
3351        {
3352        for (i = (offsets[0] + offsets[1]) / 2; i > offsets[0]; i--)
3353          if (ones[i] <= 2)
3354            {
3355            offsets[2] = i;
3356            break;
3357            }
3358        }
3359      }
3360    
3361    SLJIT_ASSERT(offsets[1] == -1 || (offsets[0] < offsets[1]));
3362    SLJIT_ASSERT(offsets[2] == -1 || (offsets[0] < offsets[2] && offsets[1] > offsets[2]));
3363    
3364    chars[0] = chars[offsets[0] << 1];
3365    chars[1] = chars[(offsets[0] << 1) + 1];
3366    if (offsets[2] >= 0)
3367      {
3368      chars[2] = chars[offsets[2] << 1];
3369      chars[3] = chars[(offsets[2] << 1) + 1];
3370      }
3371    if (offsets[1] >= 0)
3372      {
3373      chars[4] = chars[offsets[1] << 1];
3374      chars[5] = chars[(offsets[1] << 1) + 1];
3375      }
3376    
3377    max -= 1;
3378  if (firstline)  if (firstline)
3379    {    {
3380    SLJIT_ASSERT(common->first_line_end != 0);    SLJIT_ASSERT(common->first_line_end != 0);
3381    OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);    OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
3382    OP2(SLJIT_SUB, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, SLJIT_IMM, IN_UCHARS((location >> 1) - 1));    OP2(SLJIT_SUB, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, SLJIT_IMM, IN_UCHARS(max));
3383    }    }
3384  else  else
3385    OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS((location >> 1) - 1));    OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
3386    
3387  start = LABEL();  start = LABEL();
3388  quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);  quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3389    
3390  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[0]));
3391  OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));  if (offsets[1] >= 0)
3392      OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[1]));
3393  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3394    
3395  if (chars[1] != 0)  if (chars[1] != 0)
3396    OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[1]);    OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[1]);
3397  CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[0], start);  CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[0], start);
3398  if (location > 2 * 2)  if (offsets[2] >= 0)
3399    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[2] - 1));
3400  if (chars[3] != 0)  
3401    OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, chars[3]);  if (offsets[1] >= 0)
 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, chars[2], start);  
 if (location > 2 * 2)  
3402    {    {
3403    if (chars[5] != 0)    if (chars[5] != 0)
3404      OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[5]);      OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, chars[5]);
3405    CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[4], start);    CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, chars[4], start);
3406      }
3407    
3408    if (offsets[2] >= 0)
3409      {
3410      if (chars[3] != 0)
3411        OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[3]);
3412      CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[2], start);
3413    }    }
3414  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3415    
# Line 2978  JUMPHERE(quit); Line 3418  JUMPHERE(quit);
3418  if (firstline)  if (firstline)
3419    OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);    OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3420  else  else
3421    OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS((location >> 1) - 1));    OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
3422  return TRUE;  return TRUE;
3423  }  }
3424    
# Line 3098  firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_ Line 3538  firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_
3538  skip_char_back(common);  skip_char_back(common);
3539    
3540  loop = LABEL();  loop = LABEL();
3541  read_char(common);  read_char_range(common, common->nlmin, common->nlmax, TRUE);
3542  lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);  lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3543  if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)  if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
3544    foundcr = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);    foundcr = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
# Line 3127  if (firstline) Line 3567  if (firstline)
3567    OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);    OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3568  }  }
3569    
3570  static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, jump_list **backtracks);  static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks);
3571    
3572  static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, sljit_uw start_bits, BOOL firstline)  static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, pcre_uint8 *start_bits, BOOL firstline)
3573  {  {
3574  DEFINE_COMPILER;  DEFINE_COMPILER;
3575  struct sljit_label *start;  struct sljit_label *start;
3576  struct sljit_jump *quit;  struct sljit_jump *quit;
3577  struct sljit_jump *found = NULL;  struct sljit_jump *found = NULL;
3578  jump_list *matches = NULL;  jump_list *matches = NULL;
 pcre_uint8 inverted_start_bits[32];  
 int i;  
3579  #ifndef COMPILE_PCRE8  #ifndef COMPILE_PCRE8
3580  struct sljit_jump *jump;  struct sljit_jump *jump;
3581  #endif  #endif
3582    
 for (i = 0; i < 32; ++i)  
   inverted_start_bits[i] = ~(((pcre_uint8*)start_bits)[i]);  
   
3583  if (firstline)  if (firstline)
3584    {    {
3585    SLJIT_ASSERT(common->first_line_end != 0);    SLJIT_ASSERT(common->first_line_end != 0);
# Line 3160  if (common->utf) Line 3595  if (common->utf)
3595    OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);    OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
3596  #endif  #endif
3597    
3598  if (!check_class_ranges(common, inverted_start_bits, (inverted_start_bits[31] & 0x80) != 0, &matches))  if (!check_class_ranges(common, start_bits, (start_bits[31] & 0x80) != 0, TRUE, &matches))
3599    {    {
3600  #ifndef COMPILE_PCRE8  #ifndef COMPILE_PCRE8
3601    jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 255);    jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 255);
# Line 3169  if (!check_class_ranges(common, inverted Line 3604  if (!check_class_ranges(common, inverted
3604  #endif  #endif
3605    OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);    OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
3606    OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);    OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
3607    OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), start_bits);    OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)start_bits);
3608    OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);    OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
3609    OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);    OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
3610    found = JUMP(SLJIT_C_NOT_ZERO);    found = JUMP(SLJIT_C_NOT_ZERO);
# Line 3372  JUMPHERE(skipread); Line 3807  JUMPHERE(skipread);
3807    
3808  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3809  check_str_end(common, &skipread_list);  check_str_end(common, &skipread_list);
3810  peek_char(common);  peek_char(common, READ_CHAR_MAX);
3811    
3812  /* Testing char type. This is a code duplication. */  /* Testing char type. This is a code duplication. */
3813  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
# Line 3418  OP2(SLJIT_XOR | SLJIT_SET_E, SLJIT_UNUSE Line 3853  OP2(SLJIT_XOR | SLJIT_SET_E, SLJIT_UNUSE
3853  sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);  sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3854  }  }
3855    
3856  /*  static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
   range format:  
   
   ranges[0] = length of the range (max MAX_RANGE_SIZE, -1 means invalid range).  
   ranges[1] = first bit (0 or 1)  
   ranges[2-length] = position of the bit change (when the current bit is not equal to the previous)  
 */  
   
 static BOOL check_ranges(compiler_common *common, int *ranges, jump_list **backtracks, BOOL readch)  
3857  {  {
3858  DEFINE_COMPILER;  DEFINE_COMPILER;
3859  struct sljit_jump *jump;  int ranges[MAX_RANGE_SIZE];
   
 if (ranges[0] < 0)  
   return FALSE;  
   
 switch(ranges[0])  
   {  
   case 1:  
   if (readch)  
     read_char(common);  
   add_jump(compiler, backtracks, CMP(ranges[1] == 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));  
   return TRUE;  
   
   case 2:  
   if (readch)  
     read_char(common);  
   OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);  
   add_jump(compiler, backtracks, CMP(ranges[1] != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));  
   return TRUE;  
   
   case 4:  
   if (ranges[2] + 1 == ranges[3] && ranges[4] + 1 == ranges[5])  
     {  
     if (readch)  
       read_char(common);  
     if (ranges[1] != 0)  
       {  
       add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));  
       add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[4]));  
       }  
     else  
       {  
       jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]);  
       add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[4]));  
       JUMPHERE(jump);  
       }  
     return TRUE;  
     }  
   if ((ranges[3] - ranges[2]) == (ranges[5] - ranges[4]) && is_powerof2(ranges[4] - ranges[2]))  
     {  
     if (readch)  
       read_char(common);  
     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[4] - ranges[2]);  
     OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[4]);  
     add_jump(compiler, backtracks, CMP(ranges[1] != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[5] - ranges[4]));  
     return TRUE;  
     }  
   return FALSE;  
   
   default:  
   return FALSE;  
   }  
 }  
   
 static void get_ctype_ranges(compiler_common *common, int flag, int *ranges)  
 {  
 int i, bit, length;  
 const pcre_uint8 *ctypes = (const pcre_uint8*)common->ctypes;  
   
 bit = ctypes[0] & flag;  
 ranges[0] = -1;  
 ranges[1] = bit != 0 ? 1 : 0;  
 length = 0;  
   
 for (i = 1; i < 256; i++)  
   if ((ctypes[i] & flag) != bit)  
     {  
     if (length >= MAX_RANGE_SIZE)  
       return;  
     ranges[2 + length] = i;  
     length++;  
     bit ^= flag;  
     }  
   
 if (bit != 0)  
   {  
   if (length >= MAX_RANGE_SIZE)  
     return;  
   ranges[2 + length] = 256;  
   length++;  
   }  
 ranges[0] = length;  
 }  
   
 static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, jump_list **backtracks)  
 {  
 int ranges[2 + MAX_RANGE_SIZE];  
3860  pcre_uint8 bit, cbit, all;  pcre_uint8 bit, cbit, all;
3861  int i, byte, length = 0;  int i, byte, length = 0;
3862    
3863  bit = bits[0] & 0x1;  bit = bits[0] & 0x1;
3864  ranges[1] = bit;  /* All bits will be zero or one (since bit is zero or one). */
 /* Can be 0 or 255. */  
3865  all = -bit;  all = -bit;
3866    
3867  for (i = 0; i < 256; )  for (i = 0; i < 256; )
# Line 3536  for (i = 0; i < 256; ) Line 3876  for (i = 0; i < 256; )
3876        {        {
3877        if (length >= MAX_RANGE_SIZE)        if (length >= MAX_RANGE_SIZE)
3878          return FALSE;          return FALSE;
3879        ranges[2 + length] = i;        ranges[length] = i;
3880        length++;        length++;
3881        bit = cbit;        bit = cbit;
3882        all = -cbit;        all = -cbit;
# Line 3549  if (((bit == 0) && nclass) || ((bit == 1 Line 3889  if (((bit == 0) && nclass) || ((bit == 1
3889    {    {
3890    if (length >= MAX_RANGE_SIZE)    if (length >= MAX_RANGE_SIZE)
3891      return FALSE;      return FALSE;
3892    ranges[2 + length] = 256;    ranges[length] = 256;
3893    length++;    length++;
3894    }    }
 ranges[0] = length;  
3895    
3896  return check_ranges(common, ranges, backtracks, FALSE);  if (length < 0 || length > 4)
3897      return FALSE;
3898    
3899    bit = bits[0] & 0x1;
3900    if (invert) bit ^= 0x1;
3901    
3902    /* No character is accepted. */
3903    if (length == 0 && bit == 0)
3904      add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
3905    
3906    switch(length)
3907      {
3908      case 0:
3909      /* When bit != 0, all characters are accepted. */
3910      return TRUE;
3911    
3912      case 1:
3913      add_jump(compiler, backtracks, CMP(bit == 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
3914      return TRUE;
3915    
3916      case 2:
3917      if (ranges[0] + 1 != ranges[1])
3918        {
3919        OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
3920        add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
3921        }
3922      else
3923        add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
3924      return TRUE;
3925    
3926      case 3:
3927      if (bit != 0)
3928        {
3929        add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
3930        if (ranges[0] + 1 != ranges[1])
3931          {
3932          OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
3933          add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
3934          }
3935        else
3936          add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
3937        return TRUE;
3938        }
3939    
3940      add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[0]));
3941      if (ranges[1] + 1 != ranges[2])
3942        {
3943        OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1]);
3944        add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
3945        }
3946      else
3947        add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1]));
3948      return TRUE;
3949    
3950      case 4:
3951      if ((ranges[1] - ranges[0]) == (ranges[3] - ranges[2])
3952          && (ranges[0] | (ranges[2] - ranges[0])) == ranges[2]
3953          && is_powerof2(ranges[2] - ranges[0]))
3954        {
3955        OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[0]);
3956        if (ranges[2] + 1 != ranges[3])
3957          {
3958          OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);
3959          add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
3960          }
3961        else
3962          add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
3963        return TRUE;
3964        }
3965    
3966      if (bit != 0)
3967        {
3968        i = 0;
3969        if (ranges[0] + 1 != ranges[1])
3970          {
3971          OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
3972          add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
3973          i = ranges[0];
3974          }
3975        else
3976          add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
3977    
3978        if (ranges[2] + 1 != ranges[3])
3979          {
3980          OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - i);
3981          add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
3982          }
3983        else
3984          add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2] - i));
3985        return TRUE;
3986        }
3987    
3988      OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
3989      add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[0]));
3990      if (ranges[1] + 1 != ranges[2])
3991        {
3992        OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]);
3993        add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
3994        }
3995      else
3996        add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
3997      return TRUE;
3998    
3999      default:
4000      SLJIT_ASSERT_STOP();
4001      return FALSE;
4002      }
4003  }  }
4004    
4005  static void check_anynewline(compiler_common *common)  static void check_anynewline(compiler_common *common)
# Line 3921  return cc; Line 4366  return cc;
4366  #define SET_TYPE_OFFSET(value) \  #define SET_TYPE_OFFSET(value) \
4367    if ((value) != typeoffset) \    if ((value) != typeoffset) \
4368      { \      { \
4369      if ((value) > typeoffset) \      if ((value) < typeoffset) \
       OP2(SLJIT_SUB, typereg, 0, typereg, 0, SLJIT_IMM, (value) - typeoffset); \  
     else \  
4370        OP2(SLJIT_ADD, typereg, 0, typereg, 0, SLJIT_IMM, typeoffset - (value)); \        OP2(SLJIT_ADD, typereg, 0, typereg, 0, SLJIT_IMM, typeoffset - (value)); \
4371        else \
4372          OP2(SLJIT_SUB, typereg, 0, typereg, 0, SLJIT_IMM, (value) - typeoffset); \
4373      } \      } \
4374    typeoffset = (value);    typeoffset = (value);
4375    
4376  #define SET_CHAR_OFFSET(value) \  #define SET_CHAR_OFFSET(value) \
4377    if ((value) != charoffset) \    if ((value) != charoffset) \
4378      { \      { \
4379      if ((value) > charoffset) \      if ((value) < charoffset) \
4380        OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (value) - charoffset); \        OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(charoffset - (value))); \
4381      else \      else \
4382        OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, charoffset - (value)); \        OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)((value) - charoffset)); \
4383      } \      } \
4384    charoffset = (value);    charoffset = (value);
4385    
# Line 3942  static void compile_xclass_matchingpath( Line 4387  static void compile_xclass_matchingpath(
4387  {  {
4388  DEFINE_COMPILER;  DEFINE_COMPILER;
4389  jump_list *found = NULL;  jump_list *found = NULL;
4390  jump_list **list = (*cc & XCL_NOT) == 0 ? &found : backtracks;  jump_list **list = (cc[0] & XCL_NOT) == 0 ? &found : backtracks;
4391  pcre_int32 c, charoffset;  sljit_uw c, charoffset, max = 256, min = READ_CHAR_MAX;
 const pcre_uint32 *other_cases;  
4392  struct sljit_jump *jump = NULL;  struct sljit_jump *jump = NULL;
4393  pcre_uchar *ccbegin;  pcre_uchar *ccbegin;
4394  int compares, invertcmp, numberofcmps;  int compares, invertcmp, numberofcmps;
4395    #if defined SUPPORT_UTF && (defined COMPILE_PCRE8 || defined COMPILE_PCRE16)
4396    BOOL utf = common->utf;
4397    #endif
4398    
4399  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
4400  BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE;  BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE;
4401  BOOL charsaved = FALSE;  BOOL charsaved = FALSE;
4402  int typereg = TMP1, scriptreg = TMP1;  int typereg = TMP1, scriptreg = TMP1;
4403  pcre_int32 typeoffset;  const pcre_uint32 *other_cases;
4404    sljit_uw typeoffset;
4405  #endif  #endif
4406    
4407  /* Although SUPPORT_UTF must be defined, we are  /* Scanning the necessary info. */
4408     not necessary in utf mode even in 8 bit mode. */  cc++;
4409  detect_partial_match(common, backtracks);  ccbegin = cc;
4410  read_char(common);  compares = 0;
4411    if (cc[-1] & XCL_MAP)
 if ((*cc++ & XCL_MAP) != 0)  
4412    {    {
4413    OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);    min = 0;
 #ifndef COMPILE_PCRE8  
   jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);  
 #elif defined SUPPORT_UTF  
   if (common->utf)  
     jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);  
 #endif  
   
   if (!check_class_ranges(common, (const pcre_uint8 *)cc, TRUE, list))  
     {  
     OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);  
     OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);  
     OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);  
     OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);  
     OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);  
     add_jump(compiler, list, JUMP(SLJIT_C_NOT_ZERO));  
     }  
   
 #ifndef COMPILE_PCRE8  
   JUMPHERE(jump);  
 #elif defined SUPPORT_UTF  
   if (common->utf)  
     JUMPHERE(jump);  
 #endif  
   OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);  
 #ifdef SUPPORT_UCP  
   charsaved = TRUE;  
 #endif  
4414    cc += 32 / sizeof(pcre_uchar);    cc += 32 / sizeof(pcre_uchar);
4415    }    }
4416    
 /* Scanning the necessary info. */  
 ccbegin = cc;  
 compares = 0;  
4417  while (*cc != XCL_END)  while (*cc != XCL_END)
4418    {    {
4419    compares++;    compares++;
4420    if (*cc == XCL_SINGLE)    if (*cc == XCL_SINGLE)
4421      {      {
4422      cc += 2;      cc ++;
4423  #ifdef SUPPORT_UTF      GETCHARINCTEST(c, cc);
4424      if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);      if (c > max) max = c;
4425  #endif      if (c < min) min = c;
4426  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
4427      needschar = TRUE;      needschar = TRUE;
4428  #endif  #endif
4429      }      }
4430    else if (*cc == XCL_RANGE)    else if (*cc == XCL_RANGE)
4431      {      {
4432      cc += 2;      cc ++;
4433  #ifdef SUPPORT_UTF      GETCHARINCTEST(c, cc);
4434      if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);      if (c < min) min = c;
4435  #endif      GETCHARINCTEST(c, cc);
4436      cc++;      if (c > max) max = c;
 #ifdef SUPPORT_UTF  
     if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);  
 #endif  
4437  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
4438      needschar = TRUE;      needschar = TRUE;
4439  #endif  #endif
# Line 4028  while (*cc != XCL_END) Line 4443  while (*cc != XCL_END)
4443      {      {
4444      SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);      SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
4445      cc++;      cc++;
4446        if (*cc == PT_CLIST)
4447          {
4448          other_cases = PRIV(ucd_caseless_sets) + cc[1];
4449          while (*other_cases != NOTACHAR)
4450            {
4451            if (*other_cases > max) max = *other_cases;
4452            if (*other_cases < min) min = *other_cases;
4453            other_cases++;
4454            }
4455          }
4456        else
4457          {
4458          max = READ_CHAR_MAX;
4459          min = 0;
4460          }
4461    
4462      switch(*cc)      switch(*cc)
4463        {        {
4464        case PT_ANY:        case PT_ANY:
# Line 4047  while (*cc != XCL_END) Line 4478  while (*cc != XCL_END)
4478        case PT_SPACE:        case PT_SPACE:
4479        case PT_PXSPACE:        case PT_PXSPACE:
4480        case PT_WORD:        case PT_WORD:
4481          case PT_PXGRAPH:
4482          case PT_PXPRINT:
4483          case PT_PXPUNCT:
4484        needstype = TRUE;        needstype = TRUE;
4485        needschar = TRUE;        needschar = TRUE;
4486        break;        break;
# Line 4065  while (*cc != XCL_END) Line 4499  while (*cc != XCL_END)
4499  #endif  #endif
4500    }    }
4501    
4502    /* We are not necessary in utf mode even in 8 bit mode. */
4503    cc = ccbegin;
4504    detect_partial_match(common, backtracks);
4505    read_char_range(common, min, max, (cc[0] & XCL_NOT) != 0);
4506    
4507    if ((cc[-1] & XCL_HASPROP) == 0)
4508      {
4509      if ((cc[-1] & XCL_MAP) != 0)
4510        {
4511        jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
4512        if (!check_class_ranges(common, (const pcre_uint8 *)cc, (((const pcre_uint8 *)cc)[31] & 0x80) != 0, TRUE, &found))
4513          {
4514          OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
4515          OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
4516          OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
4517          OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4518          OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4519          add_jump(compiler, &found, JUMP(SLJIT_C_NOT_ZERO));
4520          }
4521    
4522        add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4523        JUMPHERE(jump);
4524    
4525        cc += 32 / sizeof(pcre_uchar);
4526        }
4527      else
4528        add_jump(compiler, (cc[-1] & XCL_NOT) == 0 ? backtracks : &found, CMP(SLJIT_C_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0xff));
4529      }
4530    else if ((cc[-1] & XCL_MAP) != 0)
4531      {
4532      OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
4533    #ifdef SUPPORT_UCP
4534      charsaved = TRUE;
4535    #endif
4536      if (!check_class_ranges(common, (const pcre_uint8 *)cc, FALSE, TRUE, list))
4537        {
4538    #ifdef COMPILE_PCRE8
4539        SLJIT_ASSERT(common->utf);
4540    #endif
4541        jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
4542    
4543        OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
4544        OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
4545        OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
4546        OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4547        OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4548        add_jump(compiler, list, JUMP(SLJIT_C_NOT_ZERO));
4549    
4550        JUMPHERE(jump);
4551        }
4552    
4553      OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
4554      cc += 32 / sizeof(pcre_uchar);
4555      }
4556    
4557  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
4558  /* Simple register allocation. TMP1 is preferred if possible. */  /* Simple register allocation. TMP1 is preferred if possible. */
4559  if (needstype || needsscript)  if (needstype || needsscript)
# Line 4106  if (needstype || needsscript) Line 4595  if (needstype || needsscript)
4595  #endif  #endif
4596    
4597  /* Generating code. */  /* Generating code. */
 cc = ccbegin;  
4598  charoffset = 0;  charoffset = 0;
4599  numberofcmps = 0;  numberofcmps = 0;
4600  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
# Line 4122  while (*cc != XCL_END) Line 4610  while (*cc != XCL_END)
4610    if (*cc == XCL_SINGLE)    if (*cc == XCL_SINGLE)
4611      {      {
4612      cc ++;      cc ++;
4613  #ifdef SUPPORT_UTF      GETCHARINCTEST(c, cc);
     if (common->utf)  
       {  
       GETCHARINC(c, cc);  
       }  
     else  
 #endif  
       c = *cc++;  
4614    
4615      if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))      if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
4616        {        {
4617        OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);        OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
4618        OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_EQUAL);        OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_EQUAL);
4619        numberofcmps++;        numberofcmps++;
4620        }        }
4621      else if (numberofcmps > 0)      else if (numberofcmps > 0)
4622        {        {
4623        OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);        OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
4624        OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);        OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4625        jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);        jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4626        numberofcmps = 0;        numberofcmps = 0;
4627        }        }
4628      else      else
4629        {        {
4630        jump = CMP(SLJIT_C_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, c - charoffset);        jump = CMP(SLJIT_C_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
4631        numberofcmps = 0;        numberofcmps = 0;
4632        }        }
4633      }      }
4634    else if (*cc == XCL_RANGE)    else if (*cc == XCL_RANGE)
4635      {      {
4636      cc ++;      cc ++;
4637  #ifdef SUPPORT_UTF      GETCHARINCTEST(c, cc);
     if (common->utf)  
       {  
       GETCHARINC(c, cc);  
       }  
     else  
 #endif  
       c = *cc++;  
4638      SET_CHAR_OFFSET(c);      SET_CHAR_OFFSET(c);
4639  #ifdef SUPPORT_UTF      GETCHARINCTEST(c, cc);
4640      if (common->utf)  
       {  
       GETCHARINC(c, cc);  
       }  
     else  
 #endif  
       c = *cc++;  
4641      if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))      if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
4642        {        {
4643        OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);        OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
4644        OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_LESS_EQUAL);        OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_LESS_EQUAL);
4645        numberofcmps++;        numberofcmps++;
4646        }        }
4647      else if (numberofcmps > 0)      else if (numberofcmps > 0)
4648        {        {
4649        OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);        OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
4650        OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);        OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4651        jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);        jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4652        numberofcmps = 0;        numberofcmps = 0;
4653        }        }
4654      else      else
4655        {        {
4656        jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, c - charoffset);        jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
4657        numberofcmps = 0;        numberofcmps = 0;
4658        }        }
4659      }      }
# Line 4234  while (*cc != XCL_END) Line 4702  while (*cc != XCL_END)
4702    
4703        case PT_SPACE:        case PT_SPACE:
4704        case PT_PXSPACE:        case PT_PXSPACE:
       if (*cc == PT_SPACE)  
         {  
         OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);  
         jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 11 - charoffset);  
         }  
4705        SET_CHAR_OFFSET(9);        SET_CHAR_OFFSET(9);
4706        OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 13 - 9);        OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd - 0x9);
4707        OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);        OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
4708        if (*cc == PT_SPACE)  
4709          JUMPHERE(jump);        OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x9);
4710          OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4711    
4712          OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x9);
4713          OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4714    
4715        SET_TYPE_OFFSET(ucp_Zl);        SET_TYPE_OFFSET(ucp_Zl);
4716        OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl);        OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl);
# Line 4252  while (*cc != XCL_END) Line 4719  while (*cc != XCL_END)
4719        break;        break;
4720    
4721        case PT_WORD:        case PT_WORD:
4722        OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE - charoffset);        OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_UNDERSCORE - charoffset));
4723        OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);        OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4724        /* Fall through. */        /* Fall through. */
4725    
# Line 4300  while (*cc != XCL_END) Line 4767  while (*cc != XCL_END)
4767          OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[2]);          OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[2]);
4768          OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);          OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4769    
4770          OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, other_cases[0] - charoffset);          OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(other_cases[0] - charoffset));
4771          OP_FLAGS(SLJIT_OR | ((other_cases[3] == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);          OP_FLAGS(SLJIT_OR | ((other_cases[3] == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4772    
4773          other_cases += 3;          other_cases += 3;
4774          }          }
4775        else        else
4776          {          {
4777          OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, *other_cases++ - charoffset);          OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
4778          OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);          OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4779          }          }
4780    
4781        while (*other_cases != NOTACHAR)        while (*other_cases != NOTACHAR)
4782          {          {
4783          OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, *other_cases++ - charoffset);          OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
4784          OP_FLAGS(SLJIT_OR | ((*other_cases == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);          OP_FLAGS(SLJIT_OR | ((*other_cases == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4785          }          }
4786        jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);        jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4787        break;        break;
4788    
4789        case PT_UCNC:        case PT_UCNC:
4790        OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_DOLLAR_SIGN - charoffset);        OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_DOLLAR_SIGN - charoffset));
4791        OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);        OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4792        OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_COMMERCIAL_AT - charoffset);        OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_COMMERCIAL_AT - charoffset));
4793        OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);        OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4794        OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_GRAVE_ACCENT - charoffset);        OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_GRAVE_ACCENT - charoffset));
4795        OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);        OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4796    
4797        SET_CHAR_OFFSET(0xa0);        SET_CHAR_OFFSET(0xa0);
4798        OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd7ff - charoffset);        OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(0xd7ff - charoffset));
4799        OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);        OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4800        SET_CHAR_OFFSET(0);        SET_CHAR_OFFSET(0);
4801        OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xe000 - 0);        OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xe000 - 0);
4802        OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_GREATER_EQUAL);        OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_GREATER_EQUAL);
4803        jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);        jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4804        break;        break;
4805    
4806          case PT_PXGRAPH:
4807          /* C and Z groups are the farthest two groups. */
4808          SET_TYPE_OFFSET(ucp_Ll);
4809          OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
4810          OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_GREATER);
4811    
4812          jump = CMP(SLJIT_C_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
4813    
4814          /* In case of ucp_Cf, we overwrite the result. */
4815          SET_CHAR_OFFSET(0x2066);
4816          OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
4817          OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
4818    
4819          OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
4820          OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4821    
4822          OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x2066);
4823          OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4824    
4825          JUMPHERE(jump);
4826          jump = CMP(SLJIT_C_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
4827          break;
4828    
4829          case PT_PXPRINT:
4830          /* C and Z groups are the farthest two groups. */
4831          SET_TYPE_OFFSET(ucp_Ll);
4832          OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
4833          OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_GREATER);
4834    
4835          OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Ll);
4836          OP_FLAGS(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_C_NOT_EQUAL);
4837    
4838          jump = CMP(SLJIT_C_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
4839    
4840          /* In case of ucp_Cf, we overwrite the result. */
4841          SET_CHAR_OFFSET(0x2066);
4842          OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
4843          OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
4844    
4845          OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
4846          OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4847    
4848          JUMPHERE(jump);
4849          jump = CMP(SLJIT_C_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
4850          break;
4851    
4852          case PT_PXPUNCT:
4853          SET_TYPE_OFFSET(ucp_Sc);
4854          OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Sc);
4855          OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
4856    
4857          SET_CHAR_OFFSET(0);
4858          OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xff);
4859          OP_FLAGS(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4860    
4861          SET_TYPE_OFFSET(ucp_Pc);
4862          OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ps - ucp_Pc);
4863          OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4864          jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4865          break;
4866        }        }
4867      cc += 2;      cc += 2;
4868      }      }
# Line 4366  struct sljit_label *label; Line 4894  struct sljit_label *label;
4894  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
4895  pcre_uchar propdata[5];  pcre_uchar propdata[5];
4896  #endif  #endif
4897  #endif  #endif /* SUPPORT_UTF */
4898    
4899  switch(type)  switch(type)
4900    {    {
# Line 4391  switch(type) Line 4919  switch(type)
4919    case OP_NOT_DIGIT:    case OP_NOT_DIGIT:
4920    case OP_DIGIT:    case OP_DIGIT:
4921    /* Digits are usually 0-9, so it is worth to optimize them. */    /* Digits are usually 0-9, so it is worth to optimize them. */
   if (common->digits[0] == -2)  
     get_ctype_ranges(common, ctype_digit, common->digits);  
4922    detect_partial_match(common, backtracks);    detect_partial_match(common, backtracks);
4923    /* Flip the starting bit in the negative case. */  #if defined SUPPORT_UTF && defined COMPILE_PCRE8
4924    if (type == OP_NOT_DIGIT)    if (common->utf && is_char7_bitset((const pcre_uint8*)common->ctypes - cbit_length + cbit_digit, FALSE))
4925      common->digits[1] ^= 1;      read_char7_type(common, type == OP_NOT_DIGIT);
4926    if (!check_ranges(common, common->digits, backtracks, TRUE))    else
4927      {  #endif
4928      read_char8_type(common);      read_char8_type(common, type == OP_NOT_DIGIT);
4929      OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit);      /* Flip the starting bit in the negative case. */
4930      add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));    OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit);
4931      }    add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
   if (type == OP_NOT_DIGIT)  
     common->digits[1] ^= 1;  
4932    return cc;    return cc;
4933    
4934    case OP_NOT_WHITESPACE:    case OP_NOT_WHITESPACE:
4935    case OP_WHITESPACE:    case OP_WHITESPACE:
4936    detect_partial_match(common, backtracks);    detect_partial_match(common, backtracks);
4937    read_char8_type(common);  #if defined SUPPORT_UTF && defined COMPILE_PCRE8
4938      if (common->utf && is_char7_bitset((const pcre_uint8*)common->ctypes - cbit_length + cbit_space, FALSE))
4939        read_char7_type(common, type == OP_NOT_WHITESPACE);
4940      else
4941    #endif
4942        read_char8_type(common, type == OP_NOT_WHITESPACE);
4943    OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space);    OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space);
4944    add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));    add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
4945    return cc;    return cc;
# Line 4418  switch(type) Line 4947  switch(type)
4947    case OP_NOT_WORDCHAR:    case OP_NOT_WORDCHAR:
4948    case OP_WORDCHAR:    case OP_WORDCHAR:
4949    detect_partial_match(common, backtracks);    detect_partial_match(common, backtracks);
4950    read_char8_type(common);  #if defined SUPPORT_UTF && defined COMPILE_PCRE8
4951      if (common->utf && is_char7_bitset((const pcre_uint8*)common->ctypes - cbit_length + cbit_word, FALSE))
4952        read_char7_type(common, type == OP_NOT_WORDCHAR);
4953      else
4954    #endif
4955        read_char8_type(common, type == OP_NOT_WORDCHAR);
4956    OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word);    OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word);
4957    add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));    add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
4958    return cc;    return cc;
4959    
4960    case OP_ANY:    case OP_ANY:
4961    detect_partial_match(common, backtracks);    detect_partial_match(common, backtracks);
4962    read_char(common);    read_char_range(common, common->nlmin, common->nlmax, TRUE);
4963    if (common->nltype == NLTYPE_FIXED && common->newline > 255)    if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4964      {      {
4965      jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);      jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
# Line 4481  switch(type) Line 5015  switch(type)
5015  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
5016    case OP_NOTPROP:    case OP_NOTPROP:
5017    case OP_PROP:    case OP_PROP:
5018    propdata[0] = 0;    propdata[0] = XCL_HASPROP;
5019    propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP;    propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP;
5020    propdata[2] = cc[0];    propdata[2] = cc[0];
5021    propdata[3] = cc[1];    propdata[3] = cc[1];
# Line 4493  switch(type) Line 5027  switch(type)
5027    
5028    case OP_ANYNL:    case OP_ANYNL:
5029    detect_partial_match(common, backtracks);    detect_partial_match(common, backtracks);
5030    read_char(common);    read_char_range(common, common->bsr_nlmin, common->bsr_nlmax, FALSE);
5031    jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);    jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
5032    /* We don't need to handle soft partial matching case. */    /* We don't need to handle soft partial matching case. */
5033    end_list = NULL;    end_list = NULL;
# Line 4515  switch(type) Line 5049  switch(type)
5049    case OP_NOT_HSPACE:    case OP_NOT_HSPACE:
5050    case OP_HSPACE:    case OP_HSPACE:
5051    detect_partial_match(common, backtracks);    detect_partial_match(common, backtracks);
5052    read_char(common);    read_char_range(common, 0x9, 0x3000, type == OP_NOT_HSPACE);
5053    add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));    add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));
5054    add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));    add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
5055    return cc;    return cc;
# Line 4523  switch(type) Line 5057  switch(type)
5057    case OP_NOT_VSPACE:    case OP_NOT_VSPACE:
5058    case OP_VSPACE:    case OP_VSPACE:
5059    detect_partial_match(common, backtracks);    detect_partial_match(common, backtracks);
5060    read_char(common);    read_char_range(common, 0xa, 0x2029, type == OP_NOT_VSPACE);
5061    add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));    add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));
5062    add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));    add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
5063    return cc;    return cc;
# Line 4622  switch(type) Line 5156  switch(type)
5156      else      else
5157        {        {
5158        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, STR_PTR, 0);        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, STR_PTR, 0);
5159        read_char(common);        read_char_range(common, common->nlmin, common->nlmax, TRUE);
5160        add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, STR_END, 0));        add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
5161        add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));        add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
5162        add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));        add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
# Line 4670  switch(type) Line 5204  switch(type)
5204    else    else
5205      {      {
5206      skip_char_back(common);      skip_char_back(common);
5207      read_char(common);      read_char_range(common, common->nlmin, common->nlmax, TRUE);
5208      check_newlinechar(common, common->nltype, backtracks, FALSE);      check_newlinechar(common, common->nltype, backtracks, FALSE);
5209      }      }
5210    JUMPHERE(jump[0]);    JUMPHERE(jump[0]);
# Line 4721  switch(type) Line 5255  switch(type)
5255      }      }
5256    else    else
5257      {      {
5258      peek_char(common);      peek_char(common, common->nlmax);
5259      check_newlinechar(common, common->nltype, backtracks, FALSE);      check_newlinechar(common, common->nltype, backtracks, FALSE);
5260      }      }
5261    JUMPHERE(jump[0]);    JUMPHERE(jump[0]);
# Line 4745  switch(type) Line 5279  switch(type)
5279  #endif  #endif
5280      return byte_sequence_compare(common, type == OP_CHARI, cc, &context, backtracks);      return byte_sequence_compare(common, type == OP_CHARI, cc, &context, backtracks);
5281      }      }
5282    
5283    detect_partial_match(common, backtracks);    detect_partial_match(common, backtracks);
   read_char(common);  
5284  #ifdef SUPPORT_UTF  #ifdef SUPPORT_UTF
5285    if (common->utf)    if (common->utf)
5286      {      {
# Line 4755  switch(type) Line 5289  switch(type)
5289    else    else
5290  #endif  #endif
5291      c = *cc;      c = *cc;
5292    
5293    if (type == OP_CHAR || !char_has_othercase(common, cc))    if (type == OP_CHAR || !char_has_othercase(common, cc))
5294      {      {
5295        read_char_range(common, c, c, FALSE);
5296      add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c));      add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c));
5297      return cc + length;      return cc + length;
5298      }      }
5299    oc = char_othercase(common, c);    oc = char_othercase(common, c);
5300      read_char_range(common, c < oc ? c : oc, c > oc ? c : oc, FALSE);
5301    bit = c ^ oc;    bit = c ^ oc;
5302    if (is_powerof2(bit))    if (is_powerof2(bit))
5303      {      {
# Line 4768  switch(type) Line 5305  switch(type)
5305      add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));      add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
5306      return cc + length;      return cc + length;
5307      }      }
5308    OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c);    jump[0] = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c);
5309    OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);    add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, oc));
5310    OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);    JUMPHERE(jump[0]);
   OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);  
   add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));  
5311    return cc + length;    return cc + length;
5312    
5313    case OP_NOT:    case OP_NOT:
# Line 4807  switch(type) Line 5342  switch(type)
5342  #endif /* COMPILE_PCRE8 */  #endif /* COMPILE_PCRE8 */
5343        {        {
5344        GETCHARLEN(c, cc, length);        GETCHARLEN(c, cc, length);
       read_char(common);  
5345        }        }
5346      }      }
5347    else    else
5348  #endif /* SUPPORT_UTF */  #endif /* SUPPORT_UTF */
     {  
     read_char(common);  
5349      c = *cc;      c = *cc;
     }  
5350    
5351    if (type == OP_NOT || !char_has_othercase(common, cc))    if (type == OP_NOT || !char_has_othercase(common, cc))
5352        {
5353        read_char_range(common, c, c, TRUE);
5354      add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));      add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
5355        }
5356    else    else
5357      {      {
5358      oc = char_othercase(common, c);      oc = char_othercase(common, c);
5359        read_char_range(common, c < oc ? c : oc, c > oc ? c : oc, TRUE);
5360      bit = c ^ oc;      bit = c ^ oc;
5361      if (is_powerof2(bit))      if (is_powerof2(bit))
5362        {        {
# Line 4839  switch(type) Line 5374  switch(type)
5374    case OP_CLASS:    case OP_CLASS:
5375    case OP_NCLASS:    case OP_NCLASS:
5376    detect_partial_match(common, backtracks);    detect_partial_match(common, backtracks);
5377    read_char(common);  
5378    if (check_class_ranges(common, (const pcre_uint8 *)cc, type == OP_NCLASS, backtracks))  #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5379      bit = (common->utf && is_char7_bitset((const pcre_uint8 *)cc, type == OP_NCLASS)) ? 127 : 255;
5380      read_char_range(common, 0, bit, type == OP_NCLASS);
5381    #else
5382      read_char_range(common, 0, 255, type == OP_NCLASS);
5383    #endif
5384    
5385      if (check_class_ranges(common, (const pcre_uint8 *)cc, type == OP_NCLASS, FALSE, backtracks))
5386      return cc + 32 / sizeof(pcre_uchar);      return cc + 32 / sizeof(pcre_uchar);
5387    
5388  #if defined SUPPORT_UTF || !defined COMPILE_PCRE8  #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5389    jump[0] = NULL;    jump[0] = NULL;
 #ifdef COMPILE_PCRE8  
   /* This check only affects 8 bit mode. In other modes, we  
   always need to compare the value with 255. */  
5390    if (common->utf)    if (common->utf)
 #endif /* COMPILE_PCRE8 */  
5391      {      {
5392      jump[0] = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);      jump[0] = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, bit);
5393      if (type == OP_CLASS)      if (type == OP_CLASS)
5394        {        {
5395        add_jump(compiler, backtracks, jump[0]);        add_jump(compiler, backtracks, jump[0]);
5396        jump[0] = NULL;        jump[0] = NULL;
5397        }        }
5398      }      }
5399  #endif /* SUPPORT_UTF || !COMPILE_PCRE8 */  #elif !defined COMPILE_PCRE8
5400      jump[0] = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
5401      if (type == OP_CLASS)
5402        {
5403        add_jump(compiler, backtracks, jump[0]);
5404        jump[0] = NULL;
5405        }
5406    #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
5407    
5408    OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);    OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
5409    OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);    OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
5410    OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);    OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
5411    OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);    OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
5412    OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);    OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
5413    add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));    add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
5414    
5415  #if defined SUPPORT_UTF || !defined COMPILE_PCRE8  #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
5416    if (jump[0] != NULL)    if (jump[0] != NULL)
5417      JUMPHERE(jump[0]);      JUMPHERE(jump[0]);
5418  #endif /* SUPPORT_UTF || !COMPILE_PCRE8 */  #endif
5419    
5420    return cc + 32 / sizeof(pcre_uchar);    return cc + 32 / sizeof(pcre_uchar);
5421    
5422  #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32  #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
# Line 4973  if (context.length > 0) Line 5521  if (context.length > 0)
5521  return compile_char1_matchingpath(common, *cc, cc + 1, backtracks);  return compile_char1_matchingpath(common, *cc, cc + 1, backtracks);
5522  }  }
5523    
 static struct sljit_jump *compile_ref_checks(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)  
 {  
 DEFINE_COMPILER;  
 int offset = GET2(cc, 1) << 1;  
   
 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));  
 if (!common->jscript_compat)  
   {  
   if (backtracks == NULL)  
     {  
     /* OVECTOR(1) contains the "string begin - 1" constant. */  
     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));  
     OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);  
     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));  
     OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);  
     return JUMP(SLJIT_C_NOT_ZERO);  
     }  
   add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));  
   }  
 return CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));  
 }  
   
5524  /* Forward definitions. */  /* Forward definitions. */
5525  static void compile_matchingpath(compiler_common *, pcre_uchar *, pcre_uchar *, backtrack_common *);  static void compile_matchingpath(compiler_common *, pcre_uchar *, pcre_uchar *, backtrack_common *);
5526  static void compile_backtrackingpath(compiler_common *, struct backtrack_common *);  static void compile_backtrackingpath(compiler_common *, struct backtrack_common *);
# Line 5027  static void compile_backtrackingpath(com Line 5553  static void compile_backtrackingpath(com
5553    
5554  #define BACKTRACK_AS(type) ((type *)backtrack)  #define BACKTRACK_AS(type) ((type *)backtrack)
5555    
5556  static pcre_uchar *compile_ref_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks, BOOL withchecks, BOOL emptyfail)  static void compile_dnref_search(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
5557    {
5558    /* The OVECTOR offset goes to TMP2. */
5559    DEFINE_COMPILER;
5560    int count = GET2(cc, 1 + IMM2_SIZE);
5561    pcre_uchar *slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
5562    unsigned int offset;
5563    jump_list *found = NULL;
5564    
5565    SLJIT_ASSERT(*cc == OP_DNREF || *cc == OP_DNREFI);
5566    
5567    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
5568    
5569    count--;
5570    while (count-- > 0)
5571      {
5572      offset = GET2(slot, 0) << 1;
5573      GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
5574      add_jump(compiler, &found, CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0));
5575      slot += common->name_entry_size;
5576      }
5577    
5578    offset = GET2(slot, 0) << 1;
5579    GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
5580    if (backtracks != NULL && !common->jscript_compat)
5581      add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0));
5582    
5583    set_jumps(found, LABEL());
5584    }
5585    
5586    static void compile_ref_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks, BOOL withchecks, BOOL emptyfail)
5587  {  {
5588  DEFINE_COMPILER;  DEFINE_COMPILER;
5589  int offset = GET2(cc, 1) << 1;  BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
5590    int offset = 0;
5591  struct sljit_jump *jump = NULL;  struct sljit_jump *jump = NULL;
5592  struct sljit_jump *partial;  struct sljit_jump *partial;
5593  struct sljit_jump *nopartial;  struct sljit_jump *nopartial;
5594    
5595  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));  if (ref)
5596  /* OVECTOR(1) contains the "string begin - 1" constant. */    {
5597  if (withchecks && !common->jscript_compat)    offset = GET2(cc, 1) << 1;
5598    add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
5599      /* OVECTOR(1) contains the "string begin - 1" constant. */
5600      if (withchecks && !common->jscript_compat)
5601        add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
5602      }
5603    else
5604      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
5605    
5606  #if defined SUPPORT_UTF && defined SUPPORT_UCP  #if defined SUPPORT_UTF && defined SUPPORT_UCP
5607  if (common->utf && *cc == OP_REFI)  if (common->utf && *cc == OP_REFI)
5608    {    {
5609    SLJIT_ASSERT(TMP1 == SLJIT_SCRATCH_REG1 && STACK_TOP == SLJIT_SCRATCH_REG2 && TMP2 == SLJIT_SCRATCH_REG3);    SLJIT_ASSERT(TMP1 == SLJIT_SCRATCH_REG1 && STACK_TOP == SLJIT_SCRATCH_REG2 && TMP2 == SLJIT_SCRATCH_REG3);
5610    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));    if (ref)
5611        OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
5612      else
5613        OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
5614    
5615    if (withchecks)    if (withchecks)
5616      jump = CMP(SLJIT_C_EQUAL, TMP1, 0, TMP2, 0);      jump = CMP(SLJIT_C_EQUAL, TMP1, 0, TMP2, 0);
5617    
# Line 5069  if (common->utf && *cc == OP_REFI) Line 5636  if (common->utf && *cc == OP_REFI)
5636  else  else
5637  #endif /* SUPPORT_UTF && SUPPORT_UCP */  #endif /* SUPPORT_UTF && SUPPORT_UCP */
5638    {    {
5639    OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP1, 0);    if (ref)
5640        OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP1, 0);
5641      else
5642        OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
5643    
5644    if (withchecks)    if (withchecks)
5645      jump = JUMP(SLJIT_C_ZERO);      jump = JUMP(SLJIT_C_ZERO);
5646    
# Line 5106  if (jump != NULL) Line 5677  if (jump != NULL)
5677    else    else
5678      JUMPHERE(jump);      JUMPHERE(jump);
5679    }    }
 return cc + 1 + IMM2_SIZE;  
5680  }  }
5681    
5682  static SLJIT_INLINE pcre_uchar *compile_ref_iterator_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)  static SLJIT_INLINE pcre_uchar *compile_ref_iterator_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
5683  {  {
5684  DEFINE_COMPILER;  DEFINE_COMPILER;
5685    BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
5686  backtrack_common *backtrack;  backtrack_common *backtrack;
5687  pcre_uchar type;  pcre_uchar type;
5688    int offset = 0;
5689  struct sljit_label *label;  struct sljit_label *label;
5690  struct sljit_jump *zerolength;  struct sljit_jump *zerolength;
5691  struct sljit_jump *jump = NULL;  struct sljit_jump *jump = NULL;
# Line 5123  BOOL minimize; Line 5695  BOOL minimize;
5695    
5696  PUSH_BACKTRACK(sizeof(iterator_backtrack), cc, NULL);  PUSH_BACKTRACK(sizeof(iterator_backtrack), cc, NULL);
5697    
5698    if (ref)
5699      offset = GET2(cc, 1) << 1;
5700    else
5701      cc += IMM2_SIZE;
5702  type = cc[1 + IMM2_SIZE];  type = cc[1 + IMM2_SIZE];
5703    
5704    SLJIT_COMPILE_ASSERT((OP_CRSTAR & 0x1) == 0, crstar_opcode_must_be_even);
5705  minimize = (type & 0x1) != 0;  minimize = (type & 0x1) != 0;
5706  switch(type)  switch(type)
5707    {    {
# Line 5161  if (!minimize) Line 5739  if (!minimize)
5739    if (min == 0)    if (min == 0)
5740      {      {
5741      allocate_stack(common, 2);      allocate_stack(common, 2);
5742        if (ref)
5743          OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
5744      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5745      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
5746      /* Temporary release of STR_PTR. */      /* Temporary release of STR_PTR. */
5747      OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));      OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5748      zerolength = compile_ref_checks(common, ccbegin, NULL);      /* Handles both invalid and empty cases. Since the minimum repeat,
5749        is zero the invalid case is basically the same as an empty case. */
5750        if (ref)
5751          zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
5752        else
5753          {
5754          compile_dnref_search(common, ccbegin, NULL);
5755          OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
5756          OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, TMP2, 0);
5757          zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
5758          }
5759      /* Restore if not zero length. */      /* Restore if not zero length. */
5760      OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));      OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5761      }      }
5762    else    else
5763      {      {
5764      allocate_stack(common, 1);      allocate_stack(common, 1);
5765        if (ref)
5766          OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
5767      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5768      zerolength = compile_ref_checks(common, ccbegin, &backtrack->topbacktracks);      if (ref)
5769          {
5770          add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
5771          zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
5772          }
5773        else
5774          {
5775          compile_dnref_search(common, ccbegin, &backtrack->topbacktracks);
5776          OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
5777          OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, TMP2, 0);
5778          zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
5779          }
5780      }      }
5781    
5782    if (min > 1 || max > 1)    if (min > 1 || max > 1)
5783      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 0);      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 0);
5784    
5785    label = LABEL();    label = LABEL();
5786      if (!ref)
5787        OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);
5788    compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, FALSE, FALSE);    compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, FALSE, FALSE);
5789    
5790    if (min > 1 || max > 1)    if (min > 1 || max > 1)
# Line 5210  if (!minimize) Line 5815  if (!minimize)
5815    JUMPHERE(zerolength);    JUMPHERE(zerolength);
5816    BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();    BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
5817    
5818    decrease_call_count(common);    count_match(common);
5819    return cc;    return cc;
5820    }    }
5821    
5822  allocate_stack(common, 2);  allocate_stack(common, ref ? 2 : 3);
5823    if (ref)
5824      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
5825  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5826  if (type != OP_CRMINSTAR)  if (type != OP_CRMINSTAR)
5827    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
5828    
5829  if (min == 0)  if (min == 0)
5830    {    {
5831    zerolength = compile_ref_checks(common, ccbegin, NULL);    /* Handles both invalid and empty cases. Since the minimum repeat,
5832      is zero the invalid case is basically the same as an empty case. */
5833      if (ref)
5834        zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
5835      else
5836        {
5837        compile_dnref_search(common, ccbegin, NULL);
5838        OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
5839        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
5840        zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
5841        }
5842      /* Length is non-zero, we can match real repeats. */
5843    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5844    jump = JUMP(SLJIT_JUMP);    jump = JUMP(SLJIT_JUMP);
5845    }    }
5846  else  else
5847    zerolength = compile_ref_checks(common, ccbegin, &backtrack->topbacktracks);    {
5848      if (ref)
5849        {
5850        add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
5851        zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
5852        }
5853      else
5854        {
5855        compile_dnref_search(common, ccbegin, &backtrack->topbacktracks);
5856        OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
5857        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
5858        zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
5859        }
5860      }
5861    
5862  BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();  BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
5863  if (max > 0)  if (max > 0)
5864    add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max));    add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max));
5865    
5866    if (!ref)
5867      OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
5868  compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, TRUE, TRUE);  compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, TRUE, TRUE);
5869  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5870    
# Line 5249  if (jump != NULL) Line 5882  if (jump != NULL)
5882    JUMPHERE(jump);    JUMPHERE(jump);
5883  JUMPHERE(zerolength);  JUMPHERE(zerolength);
5884    
5885  decrease_call_count(common);  count_match(common);
5886  return cc;  return cc;
5887  }  }
5888    
# Line 5259  DEFINE_COMPILER; Line 5892  DEFINE_COMPILER;
5892  backtrack_common *backtrack;  backtrack_common *backtrack;
5893  recurse_entry *entry = common->entries;  recurse_entry *entry = common->entries;
5894  recurse_entry *prev = NULL;  recurse_entry *prev = NULL;
5895  int start = GET(cc, 1);  sljit_sw start = GET(cc, 1);
5896  pcre_uchar *start_cc;  pcre_uchar *start_cc;
5897  BOOL needs_control_head;  BOOL needs_control_head;
5898    
5899  PUSH_BACKTRACK(sizeof(recurse_backtrack), cc, NULL);  PUSH_BACKTRACK(sizeof(recurse_backtrack), cc, NULL);
5900    
5901  /* Inlining simple patterns. */  /* Inlining simple patterns. */
5902  if (get_framesize(common, common->start + start, TRUE, &needs_control_head) == no_stack)  if (get_framesize(common, common->start + start, NULL, TRUE, &needs_control_head) == no_stack)
5903    {    {
5904    start_cc = common->start + start;    start_cc = common->start + start;
5905    compile_matchingpath(common, next_opcode(common, start_cc), bracketend(start_cc) - (1 + LINK_SIZE), backtrack);    compile_matchingpath(common, next_opcode(common, start_cc), bracketend(start_cc) - (1 + LINK_SIZE), backtrack);
# Line 5436  jump_list *tmp = NULL; Line 6069  jump_list *tmp = NULL;
6069  jump_list **target = (conditional) ? &backtrack->condfailed : &backtrack->common.topbacktracks;  jump_list **target = (conditional) ? &backtrack->condfailed : &backtrack->common.topbacktracks;
6070  jump_list **found;  jump_list **found;
6071  /* Saving previous accept variables. */  /* Saving previous accept variables. */
6072    BOOL save_local_exit = common->local_exit;
6073    BOOL save_positive_assert = common->positive_assert;
6074    then_trap_backtrack *save_then_trap = common->then_trap;
6075  struct sljit_label *save_quit_label = common->quit_label;  struct sljit_label *save_quit_label = common->quit_label;
6076  struct sljit_label *save_accept_label = common->accept_label;  struct sljit_label *save_accept_label = common->accept_label;
6077  jump_list *save_quit = common->quit;  jump_list *save_quit = common->quit;
6078    jump_list *save_positive_assert_quit = common->positive_assert_quit;
6079  jump_list *save_accept = common->accept;  jump_list *save_accept = common->accept;
 BOOL save_local_exit = common->local_exit;  
6080  struct sljit_jump *jump;  struct sljit_jump *jump;
6081  struct sljit_jump *brajump = NULL;  struct sljit_jump *brajump = NULL;
6082    
6083    /* Assert captures then. */
6084    common->then_trap = NULL;
6085    
6086  if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)  if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
6087    {    {
6088    SLJIT_ASSERT(!conditional);    SLJIT_ASSERT(!conditional);
# Line 5452  if (*cc == OP_BRAZERO || *cc == OP_BRAMI Line 6091  if (*cc == OP_BRAZERO || *cc == OP_BRAMI
6091    }    }
6092  private_data_ptr = PRIVATE_DATA(cc);  private_data_ptr = PRIVATE_DATA(cc);
6093  SLJIT_ASSERT(private_data_ptr != 0);  SLJIT_ASSERT(private_data_ptr != 0);
6094  framesize = get_framesize(common, cc, FALSE, &needs_control_head);  framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
6095  backtrack->framesize = framesize;  backtrack->framesize = framesize;
6096  backtrack->private_data_ptr = private_data_ptr;  backtrack->private_data_ptr = private_data_ptr;
6097  opcode = *cc;  opcode = *cc;
# Line 5502  else Line 6141  else
6141      }      }
6142    else    else
6143      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
6144    init_frame(common, ccbegin, framesize + extrasize - 1, extrasize, FALSE);    init_frame(common, ccbegin, NULL, framesize + extrasize - 1, extrasize, FALSE);
6145    }    }
6146    
6147  memset(&altbacktrack, 0, sizeof(backtrack_common));  memset(&altbacktrack, 0, sizeof(backtrack_common));
6148  common->local_exit = TRUE;  if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6149  common->quit_label = NULL;    {
6150  common->quit = NULL;    /* Negative assert is stronger than positive assert. */
6151      common->local_exit = TRUE;
6152      common->quit_label = NULL;
6153      common->quit = NULL;
6154      common->positive_assert = FALSE;
6155      }
6156    else
6157      common->positive_assert = TRUE;
6158    common->positive_assert_quit = NULL;
6159    
6160  while (1)  while (1)
6161    {    {
6162    common->accept_label = NULL;    common->accept_label = NULL;
# Line 5523  while (1) Line 6171  while (1)
6171    compile_matchingpath(common, ccbegin + 1 + LINK_SIZE, cc, &altbacktrack);    compile_matchingpath(common, ccbegin + 1 + LINK_SIZE, cc, &altbacktrack);
6172    if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))    if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6173      {      {
6174      common->local_exit = save_local_exit;      if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6175      common->quit_label = save_quit_label;        {
6176          common->local_exit = save_local_exit;
6177          common->quit_label = save_quit_label;
6178          common->quit = save_quit;
6179          }
6180        common->positive_assert = save_positive_assert;
6181        common->then_trap = save_then_trap;
6182      common->accept_label = save_accept_label;      common->accept_label = save_accept_label;
6183      common->quit = save_quit;      common->positive_assert_quit = save_positive_assert_quit;
6184      common->accept = save_accept;      common->accept = save_accept;
6185      return NULL;      return NULL;
6186      }      }
# Line 5591  while (1) Line 6245  while (1)
6245    compile_backtrackingpath(common, altbacktrack.top);    compile_backtrackingpath(common, altbacktrack.top);
6246    if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))    if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6247      {      {
6248      common->local_exit = save_local_exit;      if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6249      common->quit_label = save_quit_label;        {
6250          common->local_exit = save_local_exit;
6251          common->quit_label = save_quit_label;
6252          common->quit = save_quit;
6253          }
6254        common->positive_assert = save_positive_assert;
6255        common->then_trap = save_then_trap;
6256      common->accept_label = save_accept_label;      common->accept_label = save_accept_label;
6257      common->quit = save_quit;      common->positive_assert_quit = save_positive_assert_quit;
6258      common->accept = save_accept;      common->accept = save_accept;
6259      return NULL;      return NULL;
6260      }      }
# Line 5607  while (1) Line 6267  while (1)
6267    cc += GET(cc, 1);    cc += GET(cc, 1);
6268    }    }
6269    
6270    if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6271      {
6272      SLJIT_ASSERT(common->positive_assert_quit == NULL);
6273      /* Makes the check less complicated below. */
6274      common->positive_assert_quit = common->quit;
6275      }
6276    
6277  /* None of them matched. */  /* None of them matched. */
6278  if (common->quit != NULL)  if (common->positive_assert_quit != NULL)
6279    {    {
6280    jump = JUMP(SLJIT_JUMP);    jump = JUMP(SLJIT_JUMP);
6281    set_jumps(common->quit, LABEL());    set_jumps(common->positive_assert_quit, LABEL());
6282    SLJIT_ASSERT(framesize != no_stack);    SLJIT_ASSERT(framesize != no_stack);
6283    if (framesize < 0)    if (framesize < 0)
6284      OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, extrasize * sizeof(sljit_sw));      OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, extrasize * sizeof(sljit_sw));
# Line 5771  else Line 6438  else
6438      }      }
6439    }    }
6440    
6441  common->local_exit = save_local_exit;  if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6442  common->quit_label = save_quit_label;    {
6443      common->local_exit = save_local_exit;
6444      common->quit_label = save_quit_label;
6445      common->quit = save_quit;
6446      }
6447    common->positive_assert = save_positive_assert;
6448    common->then_trap = save_then_trap;
6449  common->accept_label = save_accept_label;  common->accept_label = save_accept_label;
6450  common->quit = save_quit;  common->positive_assert_quit = save_positive_assert_quit;
6451  common->accept = save_accept;  common->accept = save_accept;
6452  return cc + 1 + LINK_SIZE;  return cc + 1 + LINK_SIZE;
6453  }  }
6454    
 static sljit_sw SLJIT_CALL do_searchovector(sljit_uw refno, sljit_sw* locals, pcre_uchar *name_table)  
 {  
 int condition = FALSE;  
 pcre_uchar *slotA = name_table;  
 pcre_uchar *slotB;  
 sljit_sw name_count = locals[LOCALS0 / sizeof(sljit_sw)];  
 sljit_sw name_entry_size = locals[LOCALS1 / sizeof(sljit_sw)];  
 sljit_sw no_capture;  
 int i;  
   
 locals += refno & 0xff;  
 refno >>= 8;  
 no_capture = locals[1];  
   
 for (i = 0; i < name_count; i++)  
   {  
   if (GET2(slotA, 0) == refno) break;  
   slotA += name_entry_size;  
   }  
   
 if (i < name_count)  
   {  
   /* Found a name for the number - there can be only one; duplicate names  
   for different numbers are allowed, but not vice versa. First scan down  
   for duplicates. */  
   
   slotB = slotA;  
   while (slotB > name_table)  
     {  
     slotB -= name_entry_size;  
     if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)  
       {  
       condition = locals[GET2(slotB, 0) << 1] != no_capture;  
       if (condition) break;  
       }  
     else break;  
     }  
   
   /* Scan up for duplicates */  
   if (!condition)  
     {  
     slotB = slotA;  
     for (i++; i < name_count; i++)  
       {  
       slotB += name_entry_size;  
       if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)  
         {  
         condition = locals[GET2(slotB, 0) << 1] != no_capture;  
         if (condition) break;  
         }  
       else break;  
       }  
     }  
   }  
 return condition;  
 }  
   
 static sljit_sw SLJIT_CALL do_searchgroups(sljit_uw recno, sljit_uw* locals, pcre_uchar *name_table)  
 {  
 int condition = FALSE;  
 pcre_uchar *slotA = name_table;  
 pcre_uchar *slotB;  
 sljit_uw name_count = locals[LOCALS0 / sizeof(sljit_sw)];  
 sljit_uw name_entry_size = locals[LOCALS1 / sizeof(sljit_sw)];  
 sljit_uw group_num = locals[POSSESSIVE0 / sizeof(sljit_sw)];  
 sljit_uw i;  
   
 for (i = 0; i < name_count; i++)  
   {  
   if (GET2(slotA, 0) == recno) break;  
   slotA += name_entry_size;  
   }  
   
 if (i < name_count)  
   {  
   /* Found a name for the number - there can be only one; duplicate  
   names for different numbers are allowed, but not vice versa. First  
   scan down for duplicates. */  
   
   slotB = slotA;  
   while (slotB > name_table)  
     {  
     slotB -= name_entry_size;  
     if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)  
       {  
       condition = GET2(slotB, 0) == group_num;  
       if (condition) break;  
       }  
     else break;  
     }  
   
   /* Scan up for duplicates */  
   if (!condition)  
     {  
     slotB = slotA;  
     for (i++; i < name_count; i++)  
       {  
       slotB += name_entry_size;  
       if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)  
         {  
         condition = GET2(slotB, 0) == group_num;  
         if (condition) break;  
         }  
       else break;  
       }  
     }  
   }  
 return condition;  
 }  
   
6455  static SLJIT_INLINE void match_once_common(compiler_common *common, pcre_uchar ket, int framesize, int private_data_ptr, BOOL has_alternatives, BOOL needs_control_head)  static SLJIT_INLINE void match_once_common(compiler_common *common, pcre_uchar ket, int framesize, int private_data_ptr, BOOL has_alternatives, BOOL needs_control_head)
6456  {  {
6457  DEFINE_COMPILER;  DEFINE_COMPILER;
# Line 6021  backtrack_common *backtrack; Line 6584  backtrack_common *backtrack;
6584  pcre_uchar opcode;  pcre_uchar opcode;
6585  int private_data_ptr = 0;  int private_data_ptr = 0;
6586  int offset = 0;  int offset = 0;
6587  int stacksize;  int i, stacksize;
6588    int repeat_ptr = 0, repeat_length = 0;
6589    int repeat_type = 0, repeat_count = 0;
6590  pcre_uchar *ccbegin;  pcre_uchar *ccbegin;
6591  pcre_uchar *matchingpath;  pcre_uchar *matchingpath;
6592    pcre_uchar *slot;
6593  pcre_uchar bra = OP_BRA;  pcre_uchar bra = OP_BRA;
6594  pcre_uchar ket;  pcre_uchar ket;
6595  assert_backtrack *assert;  assert_backtrack *assert;
# Line 6031  BOOL has_alternatives; Line 6597  BOOL has_alternatives;
6597  BOOL needs_control_head = FALSE;  BOOL needs_control_head = FALSE;
6598  struct sljit_jump *jump;  struct sljit_jump *jump;
6599  struct sljit_jump *skip;  struct sljit_jump *skip;
6600  struct sljit_label *rmaxlabel = NULL;  struct sljit_label *rmax_label = NULL;
6601  struct sljit_jump *braminzerojump = NULL;  struct sljit_jump *braminzero = NULL;
6602    
6603  PUSH_BACKTRACK(sizeof(bracket_backtrack), cc, NULL);  PUSH_BACKTRACK(sizeof(bracket_backtrack), cc, NULL);
6604    
# Line 6045  if (*cc == OP_BRAZERO || *cc == OP_BRAMI Line 6611  if (*cc == OP_BRAZERO || *cc == OP_BRAMI
6611    
6612  opcode = *cc;  opcode = *cc;
6613  ccbegin = cc;  ccbegin = cc;
6614  matchingpath = ccbegin + 1 + LINK_SIZE;  matchingpath = bracketend(cc) - 1 - LINK_SIZE;
6615    ket = *matchingpath;
6616    if (ket == OP_KET && PRIVATE_DATA(matchingpath) != 0)
6617      {
6618      repeat_ptr = PRIVATE_DATA(matchingpath);
6619      repeat_length = PRIVATE_DATA(matchingpath + 1);
6620      repeat_type = PRIVATE_DATA(matchingpath + 2);
6621      repeat_count = PRIVATE_DATA(matchingpath + 3);
6622      SLJIT_ASSERT(repeat_length != 0 && repeat_type != 0 && repeat_count != 0);
6623      if (repeat_type == OP_UPTO)
6624        ket = OP_KETRMAX;
6625      if (repeat_type == OP_MINUPTO)
6626        ket = OP_KETRMIN;
6627      }
6628    
6629  if ((opcode == OP_COND || opcode == OP_SCOND) && cc[1 + LINK_SIZE] == OP_DEF)  if ((opcode == OP_COND || opcode == OP_SCOND) && cc[1 + LINK_SIZE] == OP_DEF)
6630    {    {
6631    /* Drop this bracket_backtrack. */    /* Drop this bracket_backtrack. */
6632    parent->top = backtrack->prev;    parent->top = backtrack->prev;
6633    return bracketend(cc);    return matchingpath + 1 + LINK_SIZE + repeat_length;
6634    }    }
6635    
6636  ket = *(bracketend(cc) - 1 - LINK_SIZE);  matchingpath = ccbegin + 1 + LINK_SIZE;
6637  SLJIT_ASSERT(ket == OP_KET || ket == OP_KETRMAX || ket == OP_KETRMIN);  SLJIT_ASSERT(ket == OP_KET || ket == OP_KETRMAX || ket == OP_KETRMIN);
6638  SLJIT_ASSERT(!((bra == OP_BRAZERO && ket == OP_KETRMIN) || (bra == OP_BRAMINZERO && ket == OP_KETRMAX)));  SLJIT_ASSERT(!((bra == OP_BRAZERO && ket == OP_KETRMIN) || (bra == OP_BRAMINZERO && ket == OP_KETRMAX)));
6639  cc += GET(cc, 1);  cc += GET(cc, 1);
6640    
6641  has_alternatives = *cc == OP_ALT;  has_alternatives = *cc == OP_ALT;
6642  if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))  if (SLJIT_UNLIKELY(opcode == OP_COND || opcode == OP_SCOND))
6643    {    has_alternatives = (*matchingpath == OP_RREF || *matchingpath == OP_DNRREF) ? FALSE : TRUE;
   has_alternatives = (*matchingpath == OP_RREF) ? FALSE : TRUE;  
   if (*matchingpath == OP_NRREF)  
     {  
     stacksize = GET2(matchingpath, 1);  
     if (common->currententry == NULL || stacksize == RREF_ANY)  
       has_alternatives = FALSE;  
     else if (common->currententry->start == 0)  
       has_alternatives = stacksize != 0;  
     else  
       has_alternatives = stacksize != (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);  
     }  
   }  
6644    
6645  if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))  if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
6646    opcode = OP_SCOND;    opcode = OP_SCOND;
# Line 6104  else if (opcode == OP_ONCE || opcode == Line 6671  else if (opcode == OP_ONCE || opcode ==
6671    SLJIT_ASSERT(private_data_ptr != 0);    SLJIT_ASSERT(private_data_ptr != 0);
6672    BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;    BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
6673    if (opcode == OP_ONCE)    if (opcode == OP_ONCE)
6674      BACKTRACK_AS(bracket_backtrack)->u.framesize = get_framesize(common, ccbegin, FALSE, &needs_control_head);      BACKTRACK_AS(bracket_backtrack)->u.framesize = get_framesize(common, ccbegin, NULL, FALSE, &needs_control_head);
6675    }    }
6676    
6677  /* Instructions before the first alternative. */  /* Instructions before the first alternative. */
# Line 6134  if (bra == OP_BRAMINZERO) Line 6701  if (bra == OP_BRAMINZERO)
6701    if (ket != OP_KETRMIN)    if (ket != OP_KETRMIN)
6702      {      {
6703      free_stack(common, 1);      free_stack(common, 1);
6704      braminzerojump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);      braminzero = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
6705      }      }
6706    else    else
6707      {      {
# Line 6149  if (bra == OP_BRAMINZERO) Line 6716  if (bra == OP_BRAMINZERO)
6716        if (opcode != OP_ONCE || BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)        if (opcode != OP_ONCE || BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
6717          {          {
6718          /* When we come from outside, private_data_ptr contains the previous STR_PTR. */          /* When we come from outside, private_data_ptr contains the previous STR_PTR. */
6719          braminzerojump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);          braminzero = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6720          }          }
6721        else        else
6722          {          {
6723          /* Except when the whole stack frame must be saved. */          /* Except when the whole stack frame must be saved. */
6724          OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);          OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6725          braminzerojump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), (BACKTRACK_AS(bracket_backtrack)->u.framesize + 1) * sizeof(sljit_sw));          braminzero = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), (BACKTRACK_AS(bracket_backtrack)->u.framesize + 1) * sizeof(sljit_sw));
6726          }          }
6727        JUMPHERE(skip);        JUMPHERE(skip);
6728        }        }
# Line 6168  if (bra == OP_BRAMINZERO) Line 6735  if (bra == OP_BRAMINZERO)
6735      }      }
6736    }    }
6737    
6738    if (repeat_type != 0)
6739      {
6740      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr, SLJIT_IMM, repeat_count);
6741      if (repeat_type == OP_EXACT)
6742        rmax_label = LABEL();
6743      }
6744    
6745  if (ket == OP_KETRMIN)  if (ket == OP_KETRMIN)
6746    BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();    BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
6747    
6748  if (ket == OP_KETRMAX)  if (ket == OP_KETRMAX)
6749    {    {
6750    rmaxlabel = LABEL();    rmax_label = LABEL();
6751    if (has_alternatives && opcode != OP_ONCE && opcode < OP_SBRA)    if (has_alternatives && opcode != OP_ONCE && opcode < OP_SBRA && repeat_type == 0)
6752      BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = rmaxlabel;      BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = rmax_label;
6753    }    }
6754    
6755  /* Handling capturing brackets and alternatives. */  /* Handling capturing brackets and alternatives. */
# Line 6254  if (opcode == OP_ONCE) Line 6828  if (opcode == OP_ONCE)
6828        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0);        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0);
6829        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
6830        }        }
6831      init_frame(common, ccbegin, BACKTRACK_AS(bracket_backtrack)->u.framesize + stacksize, stacksize + 1, FALSE);      init_frame(common, ccbegin, NULL, BACKTRACK_AS(bracket_backtrack)->u.framesize + stacksize, stacksize + 1, FALSE);
6832      }      }
6833    }    }
6834  else if (opcode == OP_CBRA || opcode == OP_SCBRA)  else if (opcode == OP_CBRA || opcode == OP_SCBRA)
# Line 6303  if (opcode == OP_COND || opcode == OP_SC Line 6877  if (opcode == OP_COND || opcode == OP_SC
6877        CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(GET2(matchingpath, 1) << 1), SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));        CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(GET2(matchingpath, 1) << 1), SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
6878      matchingpath += 1 + IMM2_SIZE;      matchingpath += 1 + IMM2_SIZE;
6879      }      }
6880    else if (*matchingpath == OP_NCREF)    else if (*matchingpath == OP_DNCREF)
6881      {      {
6882      SLJIT_ASSERT(has_alternatives);      SLJIT_ASSERT(has_alternatives);
     stacksize = GET2(matchingpath, 1);  
     jump = CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(stacksize << 1), SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));  
   
     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STACK_TOP, 0);  
     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, SLJIT_IMM, common->name_count);  
     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, common->name_entry_size);  
     OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, SLJIT_IMM, (stacksize << 8) | (common->ovector_start / sizeof(sljit_sw)));  
     GET_LOCAL_BASE(SLJIT_SCRATCH_REG2, 0, 0);  
     OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, common->name_table);  
     sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_searchovector));  
     OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);  
     add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), CMP(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG1, 0, SLJIT_IMM, 0));  
6883    
6884      JUMPHERE(jump);      i = GET2(matchingpath, 1 + IMM2_SIZE);
6885      matchingpath += 1 + IMM2_SIZE;      slot = common->name_table + GET2(matchingpath, 1) * common->name_entry_size;
6886        OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
6887        OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
6888        OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(GET2(slot, 0) << 1), TMP1, 0);
6889        slot += common->name_entry_size;
6890        i--;
6891        while (i-- > 0)
6892          {
6893          OP2(SLJIT_SUB, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(GET2(slot, 0) << 1), TMP1, 0);
6894          OP2(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, STR_PTR, 0);
6895          slot += common->name_entry_size;
6896          }
6897        OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
6898        add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), JUMP(SLJIT_C_ZERO));
6899        matchingpath += 1 + 2 * IMM2_SIZE;
6900      }      }
6901    else if (*matchingpath == OP_RREF || *matchingpath == OP_NRREF)    else if (*matchingpath == OP_RREF || *matchingpath == OP_DNRREF)
6902      {      {
6903      /* Never has other case. */      /* Never has other case. */
6904      BACKTRACK_AS(bracket_backtrack)->u.condfailed = NULL;      BACKTRACK_AS(bracket_backtrack)->u.condfailed = NULL;
6905        SLJIT_ASSERT(!has_alternatives);
6906    
6907      stacksize = GET2(matchingpath, 1);      if (*matchingpath == OP_RREF)
     if (common->currententry == NULL)  
       stacksize = 0;  
     else if (stacksize == RREF_ANY)  
       stacksize = 1;  
     else if (common->currententry->start == 0)  
       stacksize = stacksize == 0;  
     else  
       stacksize = stacksize == (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);  
   
     if (*matchingpath == OP_RREF || stacksize || common->currententry == NULL)  
6908        {        {
6909        SLJIT_ASSERT(!has_alternatives);        stacksize = GET2(matchingpath, 1);
6910          if (common->currententry == NULL)
6911            stacksize = 0;
6912          else if (stacksize == RREF_ANY)
6913            stacksize = 1;
6914          else if (common->currententry->start == 0)
6915            stacksize = stacksize == 0;
6916          else
6917            stacksize = stacksize == (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
6918    
6919        if (stacksize != 0)        if (stacksize != 0)
6920          matchingpath += 1 + IMM2_SIZE;          matchingpath += 1 + IMM2_SIZE;
6921          }
6922        else
6923          {
6924          if (common->currententry == NULL || common->currententry->start == 0)
6925            stacksize = 0;
6926        else        else
6927          {          {
6928            stacksize = GET2(matchingpath, 1 + IMM2_SIZE);
6929            slot = common->name_table + GET2(matchingpath, 1) * common->name_entry_size;
6930            i = (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
6931            while (stacksize > 0)
6932              {
6933              if ((int)GET2(slot, 0) == i)
6934                break;
6935              slot += common->name_entry_size;
6936              stacksize--;
6937              }
6938            }
6939    
6940          if (stacksize != 0)
6941            matchingpath += 1 + 2 * IMM2_SIZE;
6942          }
6943    
6944          /* The stacksize == 0 is a common "else" case. */
6945          if (stacksize == 0)
6946            {
6947          if (*cc == OP_ALT)          if (*cc == OP_ALT)
6948            {            {
6949            matchingpath = cc + 1 + LINK_SIZE;            matchingpath = cc + 1 + LINK_SIZE;
# Line 6352  if (opcode == OP_COND || opcode == OP_SC Line 6952  if (opcode == OP_COND || opcode == OP_SC
6952          else          else
6953            matchingpath = cc;            matchingpath = cc;
6954          }          }
       }  
     else  
       {  
       SLJIT_ASSERT(has_alternatives);  
   
       stacksize = GET2(matchingpath, 1);  
       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STACK_TOP, 0);  
       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, SLJIT_IMM, common->name_count);  
       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, common->name_entry_size);  
       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, GET2(common->start, common->currententry->start + 1 + LINK_SIZE));  
       OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, SLJIT_IMM, stacksize);  
       GET_LOCAL_BASE(SLJIT_SCRATCH_REG2, 0, 0);  
       OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, common->name_table);  
       sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_searchgroups));  
       OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);  
       add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), CMP(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG1, 0, SLJIT_IMM, 0));  
       matchingpath += 1 + IMM2_SIZE;  
       }  
6955      }      }
6956    else    else
6957      {      {
# Line 6393  if (opcode == OP_ONCE) Line 6975  if (opcode == OP_ONCE)
6975    match_once_common(common, ket, BACKTRACK_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head);    match_once_common(common, ket, BACKTRACK_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head);
6976    
6977  stacksize = 0;  stacksize = 0;
6978    if (repeat_type == OP_MINUPTO)
6979      {
6980      /* We need to preserve the counter. TMP2 will be used below. */
6981      OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr);
6982      stacksize++;
6983      }
6984  if (ket != OP_KET || bra != OP_BRA)  if (ket != OP_KET || bra != OP_BRA)
6985    stacksize++;    stacksize++;
6986  if (offset != 0)  if (offset != 0)
# Line 6409  if (stacksize > 0) Line 6997  if (stacksize > 0)
6997    allocate_stack(common, stacksize);    allocate_stack(common, stacksize);
6998    
6999  stacksize = 0;  stacksize = 0;
7000    if (repeat_type == OP_MINUPTO)
7001      {
7002      /* TMP2 was set above. */
7003      OP2(SLJIT_SUB, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP2, 0, SLJIT_IMM, 1);
7004      stacksize++;
7005      }
7006    
7007  if (ket != OP_KET || bra != OP_BRA)  if (ket != OP_KET || bra != OP_BRA)
7008    {    {
7009    if (ket != OP_KET)    if (ket != OP_KET)
# Line 6438  if (offset != 0 && common->optimized_cbr Line 7033  if (offset != 0 && common->optimized_cbr
7033    
7034  if (ket == OP_KETRMAX)  if (ket == OP_KETRMAX)
7035    {    {
7036    if (opcode == OP_ONCE || opcode >= OP_SBRA)    if (repeat_type != 0)
7037        {
7038        if (has_alternatives)
7039          BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
7040        OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr, SLJIT_IMM, 1);
7041        JUMPTO(SLJIT_C_NOT_ZERO, rmax_label);
7042        /* Drop STR_PTR for greedy plus quantifier. */
7043        if (opcode != OP_ONCE)
7044          free_stack(common, 1);
7045        }
7046      else if (opcode == OP_ONCE || opcode >= OP_SBRA)
7047      {      {
7048      if (has_alternatives)      if (has_alternatives)
7049        BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();        BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
7050      /* Checking zero-length iteration. */      /* Checking zero-length iteration. */
7051      if (opcode != OP_ONCE)      if (opcode != OP_ONCE)
7052        {        {
7053        CMPTO(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0, rmaxlabel);        CMPTO(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0, rmax_label);
7054        /* Drop STR_PTR for greedy plus quantifier. */        /* Drop STR_PTR for greedy plus quantifier. */
7055        if (bra != OP_BRAZERO)        if (bra != OP_BRAZERO)
7056          free_stack(common, 1);          free_stack(common, 1);
7057        }        }
7058      else      else
7059        /* TMP2 must contain the starting STR_PTR. */        /* TMP2 must contain the starting STR_PTR. */
7060        CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0, rmaxlabel);        CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0, rmax_label);
7061      }      }
7062    else    else
7063      JUMPTO(SLJIT_JUMP, rmaxlabel);      JUMPTO(SLJIT_JUMP, rmax_label);
7064    BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();    BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
7065    }    }
7066    
7067    if (repeat_type == OP_EXACT)
7068      {
7069      OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr, SLJIT_IMM, 1);
7070      JUMPTO(SLJIT_C_NOT_ZERO, rmax_label);
7071      }
7072    else if (repeat_type == OP_UPTO)
7073      {
7074      /* We need to preserve the counter. */
7075      OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr);
7076      allocate_stack(common, 1);
7077      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
7078      }
7079    
7080  if (bra == OP_BRAZERO)  if (bra == OP_BRAZERO)
7081    BACKTRACK_AS(bracket_backtrack)->zero_matchingpath = LABEL();    BACKTRACK_AS(bracket_backtrack)->zero_matchingpath = LABEL();
7082    
# Line 6466  if (bra == OP_BRAMINZERO) Line 7084  if (bra == OP_BRAMINZERO)
7084    {    {
7085    /* This is a backtrack path! (From the viewpoint of OP_BRAMINZERO) */    /* This is a backtrack path! (From the viewpoint of OP_BRAMINZERO) */
7086    JUMPTO(SLJIT_JUMP, ((braminzero_backtrack *)parent)->matchingpath);    JUMPTO(SLJIT_JUMP, ((braminzero_backtrack *)parent)->matchingpath);
7087    if (braminzerojump != NULL)    if (braminzero != NULL)
7088      {      {
7089      JUMPHERE(braminzerojump);      JUMPHERE(braminzero);
7090      /* We need to release the end pointer to perform the      /* We need to release the end pointer to perform the
7091      backtrack for the zero-length iteration. When      backtrack for the zero-length iteration. When
7092      framesize is < 0, OP_ONCE will do the release itself. */      framesize is < 0, OP_ONCE will do the release itself. */
# Line 6484  if (bra == OP_BRAMINZERO) Line 7102  if (bra == OP_BRAMINZERO)
7102    }    }
7103    
7104  if ((ket != OP_KET && bra != OP_BRAMINZERO) || bra == OP_BRAZERO)  if ((ket != OP_KET && bra != OP_BRAMINZERO) || bra == OP_BRAZERO)
7105    decrease_call_count(common);    count_match(common);
7106    
7107  /* Skip the other alternatives. */  /* Skip the other alternatives. */
7108  while (*cc == OP_ALT)  while (*cc == OP_ALT)
# Line 6494  cc += 1 + LINK_SIZE; Line 7112  cc += 1 + LINK_SIZE;
7112  /* Temporarily encoding the needs_control_head in framesize. */  /* Temporarily encoding the needs_control_head in framesize. */
7113  if (opcode == OP_ONCE)  if (opcode == OP_ONCE)
7114    BACKTRACK_AS(bracket_backtrack)->u.framesize = (BACKTRACK_AS(bracket_backtrack)->u.framesize << 1) | (needs_control_head ? 1 : 0);    BACKTRACK_AS(bracket_backtrack)->u.framesize = (BACKTRACK_AS(bracket_backtrack)->u.framesize << 1) | (needs_control_head ? 1 : 0);
7115  return cc;  return cc + repeat_length;
7116  }  }
7117    
7118  static pcre_uchar *compile_bracketpos_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)  static pcre_uchar *compile_bracketpos_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
# Line 6548  switch(opcode) Line 7166  switch(opcode)
7166    break;    break;
7167    }    }
7168    
7169  framesize = get_framesize(common, cc, FALSE, &needs_control_head);  framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
7170  BACKTRACK_AS(bracketpos_backtrack)->framesize = framesize;  BACKTRACK_AS(bracketpos_backtrack)->framesize = framesize;
7171  if (framesize < 0)  if (framesize < 0)
7172    {    {
# Line 6641  else Line 7259  else
7259      stack++;      stack++;
7260      }      }
7261    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP1, 0);    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP1, 0);
7262    init_frame(common, cc, stacksize - 1, stacksize - framesize, FALSE);    init_frame(common, cc, NULL, stacksize - 1, stacksize - framesize, FALSE);
7263    stack -= 1 + (offset == 0);    stack -= 1 + (offset == 0);
7264    }    }
7265    
# Line 6771  if (!zero) Line 7389  if (!zero)
7389    
7390  /* None of them matched. */  /* None of them matched. */
7391  set_jumps(emptymatch, LABEL());  set_jumps(emptymatch, LABEL());
7392  decrease_call_count(common);  count_match(common);
7393  return cc + 1 + LINK_SIZE;  return cc + 1 + LINK_SIZE;
7394  }  }
7395    
7396  static SLJIT_INLINE pcre_uchar *get_iterator_parameters(compiler_common *common, pcre_uchar *cc, pcre_uchar *opcode, pcre_uchar *type, int *arg1, int *arg2, pcre_uchar **end)  static SLJIT_INLINE pcre_uchar *get_iterator_parameters(compiler_common *common, pcre_uchar *cc, pcre_uchar *opcode, pcre_uchar *type, int *max, int *min, pcre_uchar **end)
7397  {  {
7398  int class_len;  int class_len;
7399    
# Line 6811  else if (*opcode >= OP_TYPESTAR && *opco Line 7429  else if (*opcode >= OP_TYPESTAR && *opco
7429    }    }
7430  else  else
7431    {    {
7432    SLJIT_ASSERT(*opcode >= OP_CLASS || *opcode <= OP_XCLASS);    SLJIT_ASSERT(*opcode == OP_CLASS || *opcode == OP_NCLASS || *opcode == OP_XCLASS);
7433    *type = *opcode;    *type = *opcode;
7434    cc++;    cc++;
7435    class_len = (*type < OP_XCLASS) ? (int)(1 + (32 / sizeof(pcre_uchar))) : GET(cc, 0);    class_len = (*type < OP_XCLASS) ? (int)(1 + (32 / sizeof(pcre_uchar))) : GET(cc, 0);
# Line 6822  else Line 7440  else
7440      if (end != NULL)      if (end != NULL)
7441        *end = cc + class_len;        *end = cc + class_len;
7442      }      }
7443      else if (*opcode >= OP_CRPOSSTAR && *opcode <= OP_CRPOSQUERY)
7444        {
7445        *opcode -= OP_CRPOSSTAR - OP_POSSTAR;