/[pcre]/code/trunk/pcre_jit_compile.c
ViewVC logotype

Diff of /code/trunk/pcre_jit_compile.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1252 by zherczeg, Fri Feb 22 11:13:38 2013 UTC revision 1422 by zherczeg, Mon Dec 30 19:05:36 2013 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2012 University of Cambridge             Copyright (c) 1997-2013 University of Cambridge
10    
11    The machine code generator part (this module) was written by Zoltan Herczeg    The machine code generator part (this module) was written by Zoltan Herczeg
12                        Copyright (c) 2010-2012                        Copyright (c) 2010-2013
13    
14  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
15  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 71  system files. */ Line 71  system files. */
71     2 - Enable capture_last_ptr (includes option 1). */     2 - Enable capture_last_ptr (includes option 1). */
72  /* #define DEBUG_FORCE_UNOPTIMIZED_CBRAS 2 */  /* #define DEBUG_FORCE_UNOPTIMIZED_CBRAS 2 */
73    
74    /* 1 - Always have a control head. */
75    /* #define DEBUG_FORCE_CONTROL_HEAD 1 */
76    
77  /* Allocate memory for the regex stack on the real machine stack.  /* Allocate memory for the regex stack on the real machine stack.
78  Fast, but limited size. */  Fast, but limited size. */
79  #define MACHINE_STACK_SIZE 32768  #define MACHINE_STACK_SIZE 32768
# Line 165  typedef struct jit_arguments { Line 168  typedef struct jit_arguments {
168    pcre_uchar *mark_ptr;    pcre_uchar *mark_ptr;
169    void *callout_data;    void *callout_data;
170    /* Everything else after. */    /* Everything else after. */
171      pcre_uint32 limit_match;
172    int real_offset_count;    int real_offset_count;
173    int offset_count;    int offset_count;
   int call_limit;  
174    pcre_uint8 notbol;    pcre_uint8 notbol;
175    pcre_uint8 noteol;    pcre_uint8 noteol;
176    pcre_uint8 notempty;    pcre_uint8 notempty;
# Line 179  typedef struct executable_functions { Line 182  typedef struct executable_functions {
182    PUBL(jit_callback) callback;    PUBL(jit_callback) callback;
183    void *userdata;    void *userdata;
184    pcre_uint32 top_bracket;    pcre_uint32 top_bracket;
185      pcre_uint32 limit_match;
186    sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];    sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];
187  } executable_functions;  } executable_functions;
188    
# Line 193  typedef struct stub_list { Line 197  typedef struct stub_list {
197    struct stub_list *next;    struct stub_list *next;
198  } stub_list;  } stub_list;
199    
200  enum frame_types { no_frame = -1, no_stack = -2 };  enum frame_types {
201      no_frame = -1,
202      no_stack = -2
203    };
204    
205    enum control_types {
206      type_mark = 0,
207      type_then_trap = 1
208    };
209    
210  typedef int (SLJIT_CALL *jit_function)(jit_arguments *args);  typedef int (SLJIT_CALL *jit_function)(jit_arguments *args);
211    
212  /* The following structure is the key data type for the recursive  /* The following structure is the key data type for the recursive
213  code generator. It is allocated by compile_matchingpath, and contains  code generator. It is allocated by compile_matchingpath, and contains
214  the aguments for compile_backtrackingpath. Must be the first member  the arguments for compile_backtrackingpath. Must be the first member
215  of its descendants. */  of its descendants. */
216  typedef struct backtrack_common {  typedef struct backtrack_common {
217    /* Concatenation stack. */    /* Concatenation stack. */
# Line 215  typedef struct backtrack_common { Line 227  typedef struct backtrack_common {
227  typedef struct assert_backtrack {  typedef struct assert_backtrack {
228    backtrack_common common;    backtrack_common common;
229    jump_list *condfailed;    jump_list *condfailed;
230    /* Less than 0 (-1) if a frame is not needed. */    /* Less than 0 if a frame is not needed. */
231    int framesize;    int framesize;
232    /* Points to our private memory word on the stack. */    /* Points to our private memory word on the stack. */
233    int private_data_ptr;    int private_data_ptr;
# Line 236  typedef struct bracket_backtrack { Line 248  typedef struct bracket_backtrack {
248      /* Both for OP_COND, OP_SCOND. */      /* Both for OP_COND, OP_SCOND. */
249      jump_list *condfailed;      jump_list *condfailed;
250      assert_backtrack *assert;      assert_backtrack *assert;
251      /* For OP_ONCE. -1 if not needed. */      /* For OP_ONCE. Less than 0 if not needed. */
252      int framesize;      int framesize;
253    } u;    } u;
254    /* Points to our private memory word on the stack. */    /* Points to our private memory word on the stack. */
# Line 271  typedef struct recurse_entry { Line 283  typedef struct recurse_entry {
283    /* Collects the calls until the function is not created. */    /* Collects the calls until the function is not created. */
284    jump_list *calls;    jump_list *calls;
285    /* Points to the starting opcode. */    /* Points to the starting opcode. */
286    int start;    sljit_sw start;
287  } recurse_entry;  } recurse_entry;
288    
289  typedef struct recurse_backtrack {  typedef struct recurse_backtrack {
# Line 279  typedef struct recurse_backtrack { Line 291  typedef struct recurse_backtrack {
291    BOOL inlined_pattern;    BOOL inlined_pattern;
292  } recurse_backtrack;  } recurse_backtrack;
293    
294  #define MAX_RANGE_SIZE 6  #define OP_THEN_TRAP OP_TABLE_LENGTH
295    
296    typedef struct then_trap_backtrack {
297      backtrack_common common;
298      /* If then_trap is not NULL, this structure contains the real
299      then_trap for the backtracking path. */
300      struct then_trap_backtrack *then_trap;
301      /* Points to the starting opcode. */
302      sljit_sw start;
303      /* Exit point for the then opcodes of this alternative. */
304      jump_list *quit;
305      /* Frame size of the current alternative. */
306      int framesize;
307    } then_trap_backtrack;
308    
309    #define MAX_RANGE_SIZE 4
310    
311  typedef struct compiler_common {  typedef struct compiler_common {
312      /* The sljit ceneric compiler. */
313    struct sljit_compiler *compiler;    struct sljit_compiler *compiler;
314      /* First byte code. */
315    pcre_uchar *start;    pcre_uchar *start;
   
316    /* Maps private data offset to each opcode. */    /* Maps private data offset to each opcode. */
317    int *private_data_ptrs;    sljit_si *private_data_ptrs;
318    /* Tells whether the capturing bracket is optimized. */    /* Tells whether the capturing bracket is optimized. */
319    pcre_uint8 *optimized_cbracket;    pcre_uint8 *optimized_cbracket;
320      /* Tells whether the starting offset is a target of then. */
321      pcre_uint8 *then_offsets;
322      /* Current position where a THEN must jump. */
323      then_trap_backtrack *then_trap;
324    /* Starting offset of private data for capturing brackets. */    /* Starting offset of private data for capturing brackets. */
325    int cbraptr;    int cbra_ptr;
326    /* OVector starting point. Must be divisible by 2. */    /* Output vector starting point. Must be divisible by 2. */
327    int ovector_start;    int ovector_start;
328    /* Last known position of the requested byte. */    /* Last known position of the requested byte. */
329    int req_char_ptr;    int req_char_ptr;
# Line 305  typedef struct compiler_common { Line 337  typedef struct compiler_common {
337    int first_line_end;    int first_line_end;
338    /* Points to the marked string. */    /* Points to the marked string. */
339    int mark_ptr;    int mark_ptr;
340      /* Recursive control verb management chain. */
341      int control_head_ptr;
342    /* Points to the last matched capture block index. */    /* Points to the last matched capture block index. */
343    int capture_last_ptr;    int capture_last_ptr;
344      /* Points to the starting position of the current match. */
345      int start_ptr;
346    
347    /* Flipped and lower case tables. */    /* Flipped and lower case tables. */
348    const pcre_uint8 *fcc;    const pcre_uint8 *fcc;
349    sljit_sw lcc;    sljit_sw lcc;
350    /* Mode can be PCRE_STUDY_JIT_COMPILE and others. */    /* Mode can be PCRE_STUDY_JIT_COMPILE and others. */
351    int mode;    int mode;
352      /* \K is found in the pattern. */
353      BOOL has_set_som;
354      /* (*SKIP:arg) is found in the pattern. */
355      BOOL has_skip_arg;
356      /* (*THEN) is found in the pattern. */
357      BOOL has_then;
358      /* Needs to know the start position anytime. */
359      BOOL needs_start_ptr;
360      /* Currently in recurse or negative assert. */
361      BOOL local_exit;
362      /* Currently in a positive assert. */
363      BOOL positive_assert;
364    /* Newline control. */    /* Newline control. */
365    int nltype;    int nltype;
366    int newline;    int newline;
367    int bsr_nltype;    int bsr_nltype;
368    /* Dollar endonly. */    /* Dollar endonly. */
369    int endonly;    int endonly;
   BOOL has_set_som;  
370    /* Tables. */    /* Tables. */
371    sljit_sw ctypes;    sljit_sw ctypes;
   int digits[2 + MAX_RANGE_SIZE];  
372    /* Named capturing brackets. */    /* Named capturing brackets. */
373    sljit_uw name_table;    pcre_uchar *name_table;
374    sljit_sw name_count;    sljit_sw name_count;
375    sljit_sw name_entry_size;    sljit_sw name_entry_size;
376    
# Line 338  typedef struct compiler_common { Line 384  typedef struct compiler_common {
384    recurse_entry *currententry;    recurse_entry *currententry;
385    jump_list *partialmatch;    jump_list *partialmatch;
386    jump_list *quit;    jump_list *quit;
387      jump_list *positive_assert_quit;
388    jump_list *forced_quit;    jump_list *forced_quit;
389    jump_list *accept;    jump_list *accept;
390    jump_list *calllimit;    jump_list *calllimit;
# Line 349  typedef struct compiler_common { Line 396  typedef struct compiler_common {
396    jump_list *vspace;    jump_list *vspace;
397    jump_list *casefulcmp;    jump_list *casefulcmp;
398    jump_list *caselesscmp;    jump_list *caselesscmp;
399      jump_list *reset_match;
400    BOOL jscript_compat;    BOOL jscript_compat;
401  #ifdef SUPPORT_UTF  #ifdef SUPPORT_UTF
402    BOOL utf;    BOOL utf;
403  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
404    BOOL use_ucp;    BOOL use_ucp;
405  #endif  #endif
 #ifndef COMPILE_PCRE32  
   jump_list *utfreadchar;  
 #endif  
406  #ifdef COMPILE_PCRE8  #ifdef COMPILE_PCRE8
407      jump_list *utfreadchar;
408      jump_list *utfreadchar16;
409    jump_list *utfreadtype8;    jump_list *utfreadtype8;
410  #endif  #endif
411  #endif /* SUPPORT_UTF */  #endif /* SUPPORT_UTF */
# Line 415  typedef struct compare_context { Line 462  typedef struct compare_context {
462  #define STACK_TOP     SLJIT_SCRATCH_REG2  #define STACK_TOP     SLJIT_SCRATCH_REG2
463  #define STACK_LIMIT   SLJIT_SAVED_REG3  #define STACK_LIMIT   SLJIT_SAVED_REG3
464  #define ARGUMENTS     SLJIT_SAVED_EREG1  #define ARGUMENTS     SLJIT_SAVED_EREG1
465  #define CALL_COUNT    SLJIT_SAVED_EREG2  #define COUNT_MATCH   SLJIT_SAVED_EREG2
466  #define RETURN_ADDR   SLJIT_TEMPORARY_EREG1  #define RETURN_ADDR   SLJIT_TEMPORARY_EREG1
467    
468  /* Local space layout. */  /* Local space layout. */
# Line 426  typedef struct compare_context { Line 473  typedef struct compare_context {
473  #define POSSESSIVE0      (2 * sizeof(sljit_sw))  #define POSSESSIVE0      (2 * sizeof(sljit_sw))
474  #define POSSESSIVE1      (3 * sizeof(sljit_sw))  #define POSSESSIVE1      (3 * sizeof(sljit_sw))
475  /* Max limit of recursions. */  /* Max limit of recursions. */
476  #define CALL_LIMIT       (4 * sizeof(sljit_sw))  #define LIMIT_MATCH      (4 * sizeof(sljit_sw))
477  /* The output vector is stored on the stack, and contains pointers  /* The output vector is stored on the stack, and contains pointers
478  to characters. The vector data is divided into two groups: the first  to characters. The vector data is divided into two groups: the first
479  group contains the start / end character pointers, and the second is  group contains the start / end character pointers, and the second is
480  the start pointers when the end of the capturing group has not yet reached. */  the start pointers when the end of the capturing group has not yet reached. */
481  #define OVECTOR_START    (common->ovector_start)  #define OVECTOR_START    (common->ovector_start)
482  #define OVECTOR(i)       (OVECTOR_START + (i) * sizeof(sljit_sw))  #define OVECTOR(i)       (OVECTOR_START + (i) * (sljit_sw)sizeof(sljit_sw))
483  #define OVECTOR_PRIV(i)  (common->cbraptr + (i) * sizeof(sljit_sw))  #define OVECTOR_PRIV(i)  (common->cbra_ptr + (i) * (sljit_sw)sizeof(sljit_sw))
484  #define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start])  #define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start])
485    
486  #if defined COMPILE_PCRE8  #if defined COMPILE_PCRE8
# Line 484  cc += 1 + LINK_SIZE; Line 531  cc += 1 + LINK_SIZE;
531  return cc;  return cc;
532  }  }
533    
534    static int ones_in_half_byte[16] = {
535      /* 0 */ 0, 1, 1, 2, /* 4 */ 1, 2, 2, 3,
536      /* 8 */ 1, 2, 2, 3, /* 12 */ 2, 3, 3, 4
537    };
538    
539  /* Functions whose might need modification for all new supported opcodes:  /* Functions whose might need modification for all new supported opcodes:
540   next_opcode   next_opcode
541   get_private_data_length   check_opcode_types
542   set_private_data_ptrs   set_private_data_ptrs
543   get_framesize   get_framesize
544   init_frame   init_frame
545   get_private_data_length_for_copy   get_private_data_copy_length
546   copy_private_data   copy_private_data
547   compile_matchingpath   compile_matchingpath
548   compile_backtrackingpath   compile_backtrackingpath
# Line 536  switch(*cc) Line 588  switch(*cc)
588    case OP_CRMINQUERY:    case OP_CRMINQUERY:
589    case OP_CRRANGE:    case OP_CRRANGE:
590    case OP_CRMINRANGE:    case OP_CRMINRANGE:
591      case OP_CRPOSSTAR:
592      case OP_CRPOSPLUS:
593      case OP_CRPOSQUERY:
594      case OP_CRPOSRANGE:
595    case OP_CLASS:    case OP_CLASS:
596    case OP_NCLASS:    case OP_NCLASS:
597    case OP_REF:    case OP_REF:
598    case OP_REFI:    case OP_REFI:
599      case OP_DNREF:
600      case OP_DNREFI:
601    case OP_RECURSE:    case OP_RECURSE:
602    case OP_CALLOUT:    case OP_CALLOUT:
603    case OP_ALT:    case OP_ALT:
# Line 565  switch(*cc) Line 623  switch(*cc)
623    case OP_SCBRAPOS:    case OP_SCBRAPOS:
624    case OP_SCOND:    case OP_SCOND:
625    case OP_CREF:    case OP_CREF:
626    case OP_NCREF:    case OP_DNCREF:
627    case OP_RREF:    case OP_RREF:
628    case OP_NRREF:    case OP_DNRREF:
629    case OP_DEF:    case OP_DEF:
630    case OP_BRAZERO:    case OP_BRAZERO:
631    case OP_BRAMINZERO:    case OP_BRAMINZERO:
632    case OP_BRAPOSZERO:    case OP_BRAPOSZERO:
633      case OP_PRUNE:
634      case OP_SKIP:
635      case OP_THEN:
636    case OP_COMMIT:    case OP_COMMIT:
637    case OP_FAIL:    case OP_FAIL:
638    case OP_ACCEPT:    case OP_ACCEPT:
# Line 670  switch(*cc) Line 731  switch(*cc)
731  #endif  #endif
732    
733    case OP_MARK:    case OP_MARK:
734      case OP_PRUNE_ARG:
735      case OP_SKIP_ARG:
736      case OP_THEN_ARG:
737    return cc + 1 + 2 + cc[1];    return cc + 1 + 2 + cc[1];
738    
739    default:    default:
740      /* All opcodes are supported now! */
741      SLJIT_ASSERT_STOP();
742    return NULL;    return NULL;
743    }    }
744  }  }
745    
746    static BOOL check_opcode_types(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
747    {
748    int count;
749    pcre_uchar *slot;
750    
751    /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
752    while (cc < ccend)
753      {
754      switch(*cc)
755        {
756        case OP_SET_SOM:
757        common->has_set_som = TRUE;
758        cc += 1;
759        break;
760    
761        case OP_REF:
762        case OP_REFI:
763        common->optimized_cbracket[GET2(cc, 1)] = 0;
764        cc += 1 + IMM2_SIZE;
765        break;
766    
767        case OP_CBRAPOS:
768        case OP_SCBRAPOS:
769        common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] = 0;
770        cc += 1 + LINK_SIZE + IMM2_SIZE;
771        break;
772    
773        case OP_COND:
774        case OP_SCOND:
775        /* Only AUTO_CALLOUT can insert this opcode. We do
776           not intend to support this case. */
777        if (cc[1 + LINK_SIZE] == OP_CALLOUT)
778          return FALSE;
779        cc += 1 + LINK_SIZE;
780        break;
781    
782        case OP_CREF:
783        common->optimized_cbracket[GET2(cc, 1)] = 0;
784        cc += 1 + IMM2_SIZE;
785        break;
786    
787        case OP_DNREF:
788        case OP_DNREFI:
789        case OP_DNCREF:
790        count = GET2(cc, 1 + IMM2_SIZE);
791        slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
792        while (count-- > 0)
793          {
794          common->optimized_cbracket[GET2(slot, 0)] = 0;
795          slot += common->name_entry_size;
796          }
797        cc += 1 + 2 * IMM2_SIZE;
798        break;
799    
800        case OP_RECURSE:
801        /* Set its value only once. */
802        if (common->recursive_head_ptr == 0)
803          {
804          common->recursive_head_ptr = common->ovector_start;
805          common->ovector_start += sizeof(sljit_sw);
806          }
807        cc += 1 + LINK_SIZE;
808        break;
809    
810        case OP_CALLOUT:
811        if (common->capture_last_ptr == 0)
812          {
813          common->capture_last_ptr = common->ovector_start;
814          common->ovector_start += sizeof(sljit_sw);
815          }
816        cc += 2 + 2 * LINK_SIZE;
817        break;
818    
819        case OP_THEN_ARG:
820        common->has_then = TRUE;
821        common->control_head_ptr = 1;
822        /* Fall through. */
823    
824        case OP_PRUNE_ARG:
825        common->needs_start_ptr = TRUE;
826        /* Fall through. */
827    
828        case OP_MARK:
829        if (common->mark_ptr == 0)
830          {
831          common->mark_ptr = common->ovector_start;
832          common->ovector_start += sizeof(sljit_sw);
833          }
834        cc += 1 + 2 + cc[1];
835        break;
836    
837        case OP_THEN:
838        common->has_then = TRUE;
839        common->control_head_ptr = 1;
840        /* Fall through. */
841    
842        case OP_PRUNE:
843        case OP_SKIP:
844        common->needs_start_ptr = TRUE;
845        cc += 1;
846        break;
847    
848        case OP_SKIP_ARG:
849        common->control_head_ptr = 1;
850        common->has_skip_arg = TRUE;
851        cc += 1 + 2 + cc[1];
852        break;
853    
854        default:
855        cc = next_opcode(common, cc);
856        if (cc == NULL)
857          return FALSE;
858        break;
859        }
860      }
861    return TRUE;
862    }
863    
864    static int get_class_iterator_size(pcre_uchar *cc)
865    {
866    switch(*cc)
867      {
868      case OP_CRSTAR:
869      case OP_CRPLUS:
870      return 2;
871    
872      case OP_CRMINSTAR:
873      case OP_CRMINPLUS:
874      case OP_CRQUERY:
875      case OP_CRMINQUERY:
876      return 1;
877    
878      case OP_CRRANGE:
879      case OP_CRMINRANGE:
880      if (GET2(cc, 1) == GET2(cc, 1 + IMM2_SIZE))
881        return 0;
882      return 2;
883    
884      default:
885      return 0;
886      }
887    }
888    
889    static BOOL detect_repeat(compiler_common *common, pcre_uchar *begin)
890    {
891    pcre_uchar *end = bracketend(begin);
892    pcre_uchar *next;
893    pcre_uchar *next_end;
894    pcre_uchar *max_end;
895    pcre_uchar type;
896    sljit_sw length = end - begin;
897    int min, max, i;
898    
899    /* Detect fixed iterations first. */
900    if (end[-(1 + LINK_SIZE)] != OP_KET)
901      return FALSE;
902    
903    /* Already detected repeat. */
904    if (common->private_data_ptrs[end - common->start - LINK_SIZE] != 0)
905      return TRUE;
906    
907    next = end;
908    min = 1;
909    while (1)
910      {
911      if (*next != *begin)
912        break;
913      next_end = bracketend(next);
914      if (next_end - next != length || memcmp(begin, next, IN_UCHARS(length)) != 0)
915        break;
916      next = next_end;
917      min++;
918      }
919    
920    if (min == 2)
921      return FALSE;
922    
923    max = 0;
924    max_end = next;
925    if (*next == OP_BRAZERO || *next == OP_BRAMINZERO)
926      {
927      type = *next;
928      while (1)
929        {
930        if (next[0] != type || next[1] != OP_BRA || next[2 + LINK_SIZE] != *begin)
931          break;
932        next_end = bracketend(next + 2 + LINK_SIZE);
933        if (next_end - next != (length + 2 + LINK_SIZE) || memcmp(begin, next + 2 + LINK_SIZE, IN_UCHARS(length)) != 0)
934          break;
935        next = next_end;
936        max++;
937        }
938    
939      if (next[0] == type && next[1] == *begin && max >= 1)
940        {
941        next_end = bracketend(next + 1);
942        if (next_end - next == (length + 1) && memcmp(begin, next + 1, IN_UCHARS(length)) == 0)
943          {
944          for (i = 0; i < max; i++, next_end += 1 + LINK_SIZE)
945            if (*next_end != OP_KET)
946              break;
947    
948          if (i == max)
949            {
950            common->private_data_ptrs[max_end - common->start - LINK_SIZE] = next_end - max_end;
951            common->private_data_ptrs[max_end - common->start - LINK_SIZE + 1] = (type == OP_BRAZERO) ? OP_UPTO : OP_MINUPTO;
952            /* +2 the original and the last. */
953            common->private_data_ptrs[max_end - common->start - LINK_SIZE + 2] = max + 2;
954            if (min == 1)
955              return TRUE;
956            min--;
957            max_end -= (1 + LINK_SIZE) + GET(max_end, -LINK_SIZE);
958            }
959          }
960        }
961      }
962    
963    if (min >= 3)
964      {
965      common->private_data_ptrs[end - common->start - LINK_SIZE] = max_end - end;
966      common->private_data_ptrs[end - common->start - LINK_SIZE + 1] = OP_EXACT;
967      common->private_data_ptrs[end - common->start - LINK_SIZE + 2] = min;
968      return TRUE;
969      }
970    
971    return FALSE;
972    }
973    
974  #define CASE_ITERATOR_PRIVATE_DATA_1 \  #define CASE_ITERATOR_PRIVATE_DATA_1 \
975      case OP_MINSTAR: \      case OP_MINSTAR: \
976      case OP_MINPLUS: \      case OP_MINPLUS: \
# Line 729  switch(*cc) Line 1023  switch(*cc)
1023      case OP_TYPEUPTO: \      case OP_TYPEUPTO: \
1024      case OP_TYPEMINUPTO:      case OP_TYPEMINUPTO:
1025    
1026  static int get_class_iterator_size(pcre_uchar *cc)  static void set_private_data_ptrs(compiler_common *common, int *private_data_start, pcre_uchar *ccend)
 {  
 switch(*cc)  
   {  
   case OP_CRSTAR:  
   case OP_CRPLUS:  
   return 2;  
   
   case OP_CRMINSTAR:  
   case OP_CRMINPLUS:  
   case OP_CRQUERY:  
   case OP_CRMINQUERY:  
   return 1;  
   
   case OP_CRRANGE:  
   case OP_CRMINRANGE:  
   if (GET2(cc, 1) == GET2(cc, 1 + IMM2_SIZE))  
     return 0;  
   return 2;  
   
   default:  
   return 0;  
   }  
 }  
   
 static int get_private_data_length(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)  
1027  {  {
1028  int private_data_length = 0;  pcre_uchar *cc = common->start;
1029  pcre_uchar *alternative;  pcre_uchar *alternative;
 pcre_uchar *name;  
1030  pcre_uchar *end = NULL;  pcre_uchar *end = NULL;
1031  int space, size, i;  int private_data_ptr = *private_data_start;
1032  pcre_uint32 bracketlen;  int space, size, bracketlen;
1033    
 /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */  
1034  while (cc < ccend)  while (cc < ccend)
1035    {    {
1036    space = 0;    space = 0;
1037    size = 0;    size = 0;
1038    bracketlen = 0;    bracketlen = 0;
1039      if (private_data_ptr > SLJIT_MAX_LOCAL_SIZE)
1040        return;
1041    
1042      if (*cc == OP_ONCE || *cc == OP_ONCE_NC || *cc == OP_BRA || *cc == OP_CBRA || *cc == OP_COND)
1043        if (detect_repeat(common, cc))
1044          {
1045          /* These brackets are converted to repeats, so no global
1046          based single character repeat is allowed. */
1047          if (cc >= end)
1048            end = bracketend(cc);
1049          }
1050    
1051    switch(*cc)    switch(*cc)
1052      {      {
1053      case OP_SET_SOM:      case OP_KET:
1054      common->has_set_som = TRUE;      if (common->private_data_ptrs[cc + 1 - common->start] != 0)
1055      cc += 1;        {
1056      break;        common->private_data_ptrs[cc - common->start] = private_data_ptr;
1057          private_data_ptr += sizeof(sljit_sw);
1058      case OP_REF:        cc += common->private_data_ptrs[cc + 1 - common->start];
1059      case OP_REFI:        }
1060      common->optimized_cbracket[GET2(cc, 1)] = 0;      cc += 1 + LINK_SIZE;
     cc += 1 + IMM2_SIZE;  
1061      break;      break;
1062    
1063      case OP_ASSERT:      case OP_ASSERT:
# Line 791  while (cc < ccend) Line 1069  while (cc < ccend)
1069      case OP_BRAPOS:      case OP_BRAPOS:
1070      case OP_SBRA:      case OP_SBRA:
1071      case OP_SBRAPOS:      case OP_SBRAPOS:
1072      private_data_length += sizeof(sljit_sw);      case OP_SCOND:
1073        common->private_data_ptrs[cc - common->start] = private_data_ptr;
1074        private_data_ptr += sizeof(sljit_sw);
1075      bracketlen = 1 + LINK_SIZE;      bracketlen = 1 + LINK_SIZE;
1076      break;      break;
1077    
1078      case OP_CBRAPOS:      case OP_CBRAPOS:
1079      case OP_SCBRAPOS:      case OP_SCBRAPOS:
1080      private_data_length += sizeof(sljit_sw);      common->private_data_ptrs[cc - common->start] = private_data_ptr;
1081      common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] = 0;      private_data_ptr += sizeof(sljit_sw);
1082      bracketlen = 1 + LINK_SIZE + IMM2_SIZE;      bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1083      break;      break;
1084    
1085      case OP_COND:      case OP_COND:
1086      case OP_SCOND:      /* Might be a hidden SCOND. */
1087      /* Only AUTO_CALLOUT can insert this opcode. We do      alternative = cc + GET(cc, 1);
1088         not intend to support this case. */      if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
     if (cc[1 + LINK_SIZE] == OP_CALLOUT)  
       return -1;  
   
     if (*cc == OP_COND)  
1089        {        {
1090        /* Might be a hidden SCOND. */        common->private_data_ptrs[cc - common->start] = private_data_ptr;
1091        alternative = cc + GET(cc, 1);        private_data_ptr += sizeof(sljit_sw);
       if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)  
         private_data_length += sizeof(sljit_sw);  
       }  
     else  
       private_data_length += sizeof(sljit_sw);  
     bracketlen = 1 + LINK_SIZE;  
     break;  
   
     case OP_CREF:  
     i = GET2(cc, 1);  
     common->optimized_cbracket[i] = 0;  
     cc += 1 + IMM2_SIZE;  
     break;  
   
     case OP_NCREF:  
     bracketlen = GET2(cc, 1);  
     name = (pcre_uchar *)common->name_table;  
     alternative = name;  
     for (i = 0; i < common->name_count; i++)  
       {  
       if (GET2(name, 0) == bracketlen) break;  
       name += common->name_entry_size;  
       }  
     SLJIT_ASSERT(i != common->name_count);  
   
     for (i = 0; i < common->name_count; i++)  
       {  
       if (STRCMP_UC_UC(alternative + IMM2_SIZE, name + IMM2_SIZE) == 0)  
         common->optimized_cbracket[GET2(alternative, 0)] = 0;  
       alternative += common->name_entry_size;  
       }  
     bracketlen = 0;  
     cc += 1 + IMM2_SIZE;  
     break;  
   
     case OP_BRA:  
     bracketlen = 1 + LINK_SIZE;  
     break;  
   
     case OP_CBRA:  
     case OP_SCBRA:  
     bracketlen = 1 + LINK_SIZE + IMM2_SIZE;  
     break;  
   
     CASE_ITERATOR_PRIVATE_DATA_1  
     space = 1;  
     size = -2;  
     break;  
   
     CASE_ITERATOR_PRIVATE_DATA_2A  
     space = 2;  
     size = -2;  
     break;  
   
     CASE_ITERATOR_PRIVATE_DATA_2B  
     space = 2;  
     size = -(2 + IMM2_SIZE);  
     break;  
   
     CASE_ITERATOR_TYPE_PRIVATE_DATA_1  
     space = 1;  
     size = 1;  
     break;  
   
     CASE_ITERATOR_TYPE_PRIVATE_DATA_2A  
     if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)  
       space = 2;  
     size = 1;  
     break;  
   
     CASE_ITERATOR_TYPE_PRIVATE_DATA_2B  
     if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)  
       space = 2;  
     size = 1 + IMM2_SIZE;  
     break;  
   
     case OP_CLASS:  
     case OP_NCLASS:  
     size += 1 + 32 / sizeof(pcre_uchar);  
     space = get_class_iterator_size(cc + size);  
     break;  
   
 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8  
     case OP_XCLASS:  
     size = GET(cc, 1);  
     space = get_class_iterator_size(cc + size);  
     break;  
 #endif  
   
     case OP_RECURSE:  
     /* Set its value only once. */  
     if (common->recursive_head_ptr == 0)  
       {  
       common->recursive_head_ptr = common->ovector_start;  
       common->ovector_start += sizeof(sljit_sw);  
       }  
     cc += 1 + LINK_SIZE;  
     break;  
   
     case OP_CALLOUT:  
     if (common->capture_last_ptr == 0)  
       {  
       common->capture_last_ptr = common->ovector_start;  
       common->ovector_start += sizeof(sljit_sw);  
       }  
     cc += 2 + 2 * LINK_SIZE;  
     break;  
   
     case OP_MARK:  
     if (common->mark_ptr == 0)  
       {  
       common->mark_ptr = common->ovector_start;  
       common->ovector_start += sizeof(sljit_sw);  
       }  
     cc += 1 + 2 + cc[1];  
     break;  
   
     default:  
     cc = next_opcode(common, cc);  
     if (cc == NULL)  
       return -1;  
     break;  
     }  
   
   if (space > 0 && cc >= end)  
     private_data_length += sizeof(sljit_sw) * space;  
   
   if (size != 0)  
     {  
     if (size < 0)  
       {  
       cc += -size;  
 #ifdef SUPPORT_UTF  
       if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);  
 #endif  
       }  
     else  
       cc += size;  
     }  
   
   if (bracketlen != 0)  
     {  
     if (cc >= end)  
       {  
       end = bracketend(cc);  
       if (end[-1 - LINK_SIZE] == OP_KET)  
         end = NULL;  
       }  
     cc += bracketlen;  
     }  
   }  
 return private_data_length;  
 }  
   
 static void set_private_data_ptrs(compiler_common *common, int private_data_ptr, pcre_uchar *ccend)  
 {  
 pcre_uchar *cc = common->start;  
 pcre_uchar *alternative;  
 pcre_uchar *end = NULL;  
 int space, size, bracketlen;  
   
 while (cc < ccend)  
   {  
   space = 0;  
   size = 0;  
   bracketlen = 0;  
   switch(*cc)  
     {  
     case OP_ASSERT:  
     case OP_ASSERT_NOT:  
     case OP_ASSERTBACK:  
     case OP_ASSERTBACK_NOT:  
     case OP_ONCE:  
     case OP_ONCE_NC:  
     case OP_BRAPOS:  
     case OP_SBRA:  
     case OP_SBRAPOS:  
     case OP_SCOND:  
     common->private_data_ptrs[cc - common->start] = private_data_ptr;  
     private_data_ptr += sizeof(sljit_sw);  
     bracketlen = 1 + LINK_SIZE;  
     break;  
   
     case OP_CBRAPOS:  
     case OP_SCBRAPOS:  
     common->private_data_ptrs[cc - common->start] = private_data_ptr;  
     private_data_ptr += sizeof(sljit_sw);  
     bracketlen = 1 + LINK_SIZE + IMM2_SIZE;  
     break;  
   
     case OP_COND:  
     /* Might be a hidden SCOND. */  
     alternative = cc + GET(cc, 1);  
     if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)  
       {  
       common->private_data_ptrs[cc - common->start] = private_data_ptr;  
       private_data_ptr += sizeof(sljit_sw);  
1092        }        }
1093      bracketlen = 1 + LINK_SIZE;      bracketlen = 1 + LINK_SIZE;
1094      break;      break;
# Line 1074  while (cc < ccend) Line 1153  while (cc < ccend)
1153      break;      break;
1154      }      }
1155    
1156      /* Character iterators, which are not inside a repeated bracket,
1157         gets a private slot instead of allocating it on the stack. */
1158    if (space > 0 && cc >= end)    if (space > 0 && cc >= end)
1159      {      {
1160      common->private_data_ptrs[cc - common->start] = private_data_ptr;      common->private_data_ptrs[cc - common->start] = private_data_ptr;
# Line 1104  while (cc < ccend) Line 1185  while (cc < ccend)
1185      cc += bracketlen;      cc += bracketlen;
1186      }      }
1187    }    }
1188    *private_data_start = private_data_ptr;
1189  }  }
1190    
1191  /* Returns with a frame_types (always < 0) if no need for frame. */  /* Returns with a frame_types (always < 0) if no need for frame. */
1192  static int get_framesize(compiler_common *common, pcre_uchar *cc, BOOL recursive)  static int get_framesize(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL recursive, BOOL* needs_control_head)
1193  {  {
 pcre_uchar *ccend = bracketend(cc) - (1 + LINK_SIZE);  
1194  int length = 0;  int length = 0;
1195  int possessive = 0;  int possessive = 0;
1196  BOOL stack_restore = FALSE;  BOOL stack_restore = FALSE;
# Line 1118  BOOL setmark_found = recursive; Line 1199  BOOL setmark_found = recursive;
1199  /* The last capture is a local variable even for recursions. */  /* The last capture is a local variable even for recursions. */
1200  BOOL capture_last_found = FALSE;  BOOL capture_last_found = FALSE;
1201    
1202  if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))  #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
1203    SLJIT_ASSERT(common->control_head_ptr != 0);
1204    *needs_control_head = TRUE;
1205    #else
1206    *needs_control_head = FALSE;
1207    #endif
1208    
1209    if (ccend == NULL)
1210    {    {
1211    possessive = length = (common->capture_last_ptr != 0) ? 5 : 3;    ccend = bracketend(cc) - (1 + LINK_SIZE);
1212    /* This is correct regardless of common->capture_last_ptr. */    if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))
1213    capture_last_found = TRUE;      {
1214        possessive = length = (common->capture_last_ptr != 0) ? 5 : 3;
1215        /* This is correct regardless of common->capture_last_ptr. */
1216        capture_last_found = TRUE;
1217        }
1218      cc = next_opcode(common, cc);
1219    }    }
1220    
 cc = next_opcode(common, cc);  
1221  SLJIT_ASSERT(cc != NULL);  SLJIT_ASSERT(cc != NULL);
1222  while (cc < ccend)  while (cc < ccend)
1223    switch(*cc)    switch(*cc)
# Line 1142  while (cc < ccend) Line 1234  while (cc < ccend)
1234      break;      break;
1235    
1236      case OP_MARK:      case OP_MARK:
1237        case OP_PRUNE_ARG:
1238        case OP_THEN_ARG:
1239      SLJIT_ASSERT(common->mark_ptr != 0);      SLJIT_ASSERT(common->mark_ptr != 0);
1240      stack_restore = TRUE;      stack_restore = TRUE;
1241      if (!setmark_found)      if (!setmark_found)
# Line 1149  while (cc < ccend) Line 1243  while (cc < ccend)
1243        length += 2;        length += 2;
1244        setmark_found = TRUE;        setmark_found = TRUE;
1245        }        }
1246        if (common->control_head_ptr != 0)
1247          *needs_control_head = TRUE;
1248      cc += 1 + 2 + cc[1];      cc += 1 + 2 + cc[1];
1249      break;      break;
1250    
# Line 1268  if (length > 0) Line 1364  if (length > 0)
1364  return stack_restore ? no_frame : no_stack;  return stack_restore ? no_frame : no_stack;
1365  }  }
1366    
1367  static void init_frame(compiler_common *common, pcre_uchar *cc, int stackpos, int stacktop, BOOL recursive)  static void init_frame(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, int stackpos, int stacktop, BOOL recursive)
1368  {  {
1369  DEFINE_COMPILER;  DEFINE_COMPILER;
 pcre_uchar *ccend = bracketend(cc) - (1 + LINK_SIZE);  
1370  BOOL setsom_found = recursive;  BOOL setsom_found = recursive;
1371  BOOL setmark_found = recursive;  BOOL setmark_found = recursive;
1372  /* The last capture is a local variable even for recursions. */  /* The last capture is a local variable even for recursions. */
# Line 1283  SLJIT_UNUSED_ARG(stacktop); Line 1378  SLJIT_UNUSED_ARG(stacktop);
1378  SLJIT_ASSERT(stackpos >= stacktop + 2);  SLJIT_ASSERT(stackpos >= stacktop + 2);
1379    
1380  stackpos = STACK(stackpos);  stackpos = STACK(stackpos);
1381  if (recursive || (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS))  if (ccend == NULL)
1382    cc = next_opcode(common, cc);    {
1383      ccend = bracketend(cc) - (1 + LINK_SIZE);
1384      if (recursive || (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS))
1385        cc = next_opcode(common, cc);
1386      }
1387    
1388  SLJIT_ASSERT(cc != NULL);  SLJIT_ASSERT(cc != NULL);
1389  while (cc < ccend)  while (cc < ccend)
1390    switch(*cc)    switch(*cc)
# Line 1304  while (cc < ccend) Line 1404  while (cc < ccend)
1404      break;      break;
1405    
1406      case OP_MARK:      case OP_MARK:
1407        case OP_PRUNE_ARG:
1408        case OP_THEN_ARG:
1409      SLJIT_ASSERT(common->mark_ptr != 0);      SLJIT_ASSERT(common->mark_ptr != 0);
1410      if (!setmark_found)      if (!setmark_found)
1411        {        {
# Line 1384  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), st Line 1486  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), st
1486  SLJIT_ASSERT(stackpos == STACK(stacktop));  SLJIT_ASSERT(stackpos == STACK(stacktop));
1487  }  }
1488    
1489  static SLJIT_INLINE int get_private_data_length_for_copy(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)  static SLJIT_INLINE int get_private_data_copy_length(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL needs_control_head)
1490  {  {
1491  int private_data_length = 2;  int private_data_length = needs_control_head ? 3 : 2;
1492  int size;  int size;
1493  pcre_uchar *alternative;  pcre_uchar *alternative;
1494  /* Calculate the sum of the private machine words. */  /* Calculate the sum of the private machine words. */
# Line 1395  while (cc < ccend) Line 1497  while (cc < ccend)
1497    size = 0;    size = 0;
1498    switch(*cc)    switch(*cc)
1499      {      {
1500        case OP_KET:
1501        if (PRIVATE_DATA(cc) != 0)
1502          private_data_length++;
1503        cc += 1 + LINK_SIZE;
1504        break;
1505    
1506      case OP_ASSERT:      case OP_ASSERT:
1507      case OP_ASSERT_NOT:      case OP_ASSERT_NOT:
1508      case OP_ASSERTBACK:      case OP_ASSERTBACK:
# Line 1499  return private_data_length; Line 1607  return private_data_length;
1607  }  }
1608    
1609  static void copy_private_data(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend,  static void copy_private_data(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend,
1610    BOOL save, int stackptr, int stacktop)    BOOL save, int stackptr, int stacktop, BOOL needs_control_head)
1611  {  {
1612  DEFINE_COMPILER;  DEFINE_COMPILER;
1613  int srcw[2];  int srcw[2];
# Line 1520  stacktop = STACK(stacktop - 1); Line 1628  stacktop = STACK(stacktop - 1);
1628    
1629  if (!save)  if (!save)
1630    {    {
1631    stackptr += sizeof(sljit_sw);    stackptr += (needs_control_head ? 2 : 1) * sizeof(sljit_sw);
1632    if (stackptr < stacktop)    if (stackptr < stacktop)
1633      {      {
1634      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
# Line 1536  if (!save) Line 1644  if (!save)
1644    /* The tmp1next must be TRUE in either way. */    /* The tmp1next must be TRUE in either way. */
1645    }    }
1646    
1647  while (status != end)  do
1648    {    {
1649    count = 0;    count = 0;
1650    switch(status)    switch(status)
# Line 1545  while (status != end) Line 1653  while (status != end)
1653      SLJIT_ASSERT(save && common->recursive_head_ptr != 0);      SLJIT_ASSERT(save && common->recursive_head_ptr != 0);
1654      count = 1;      count = 1;
1655      srcw[0] = common->recursive_head_ptr;      srcw[0] = common->recursive_head_ptr;
1656        if (needs_control_head)
1657          {
1658          SLJIT_ASSERT(common->control_head_ptr != 0);
1659          count = 2;
1660          srcw[1] = common->control_head_ptr;
1661          }
1662      status = loop;      status = loop;
1663      break;      break;
1664    
# Line 1557  while (status != end) Line 1671  while (status != end)
1671    
1672      switch(*cc)      switch(*cc)
1673        {        {
1674          case OP_KET:
1675          if (PRIVATE_DATA(cc) != 0)
1676            {
1677            count = 1;
1678            srcw[0] = PRIVATE_DATA(cc);
1679            }
1680          cc += 1 + LINK_SIZE;
1681          break;
1682    
1683        case OP_ASSERT:        case OP_ASSERT:
1684        case OP_ASSERT_NOT:        case OP_ASSERT_NOT:
1685        case OP_ASSERTBACK:        case OP_ASSERTBACK:
# Line 1769  while (status != end) Line 1892  while (status != end)
1892        }        }
1893      }      }
1894    }    }
1895    while (status != end);
1896    
1897  if (save)  if (save)
1898    {    {
# Line 1802  if (save) Line 1926  if (save)
1926  SLJIT_ASSERT(cc == ccend && stackptr == stacktop && (save || (tmp1empty && tmp2empty)));  SLJIT_ASSERT(cc == ccend && stackptr == stacktop && (save || (tmp1empty && tmp2empty)));
1927  }  }
1928    
1929    static SLJIT_INLINE pcre_uchar *set_then_offsets(compiler_common *common, pcre_uchar *cc, pcre_uint8 *current_offset)
1930    {
1931    pcre_uchar *end = bracketend(cc);
1932    BOOL has_alternatives = cc[GET(cc, 1)] == OP_ALT;
1933    
1934    /* Assert captures then. */
1935    if (*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT)
1936      current_offset = NULL;
1937    /* Conditional block does not. */
1938    if (*cc == OP_COND || *cc == OP_SCOND)
1939      has_alternatives = FALSE;
1940    
1941    cc = next_opcode(common, cc);
1942    if (has_alternatives)
1943      current_offset = common->then_offsets + (cc - common->start);
1944    
1945    while (cc < end)
1946      {
1947      if ((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND))
1948        cc = set_then_offsets(common, cc, current_offset);
1949      else
1950        {
1951        if (*cc == OP_ALT && has_alternatives)
1952          current_offset = common->then_offsets + (cc + 1 + LINK_SIZE - common->start);
1953        if (*cc >= OP_THEN && *cc <= OP_THEN_ARG && current_offset != NULL)
1954          *current_offset = 1;
1955        cc = next_opcode(common, cc);
1956        }
1957      }
1958    
1959    return end;
1960    }
1961    
1962  #undef CASE_ITERATOR_PRIVATE_DATA_1  #undef CASE_ITERATOR_PRIVATE_DATA_1
1963  #undef CASE_ITERATOR_PRIVATE_DATA_2A  #undef CASE_ITERATOR_PRIVATE_DATA_2A
1964  #undef CASE_ITERATOR_PRIVATE_DATA_2B  #undef CASE_ITERATOR_PRIVATE_DATA_2B
# Line 1865  while (list_item) Line 2022  while (list_item)
2022  common->stubs = NULL;  common->stubs = NULL;
2023  }  }
2024    
2025  static SLJIT_INLINE void decrease_call_count(compiler_common *common)  static SLJIT_INLINE void count_match(compiler_common *common)
2026  {  {
2027  DEFINE_COMPILER;  DEFINE_COMPILER;
2028    
2029  OP2(SLJIT_SUB | SLJIT_SET_E, CALL_COUNT, 0, CALL_COUNT, 0, SLJIT_IMM, 1);  OP2(SLJIT_SUB | SLJIT_SET_E, COUNT_MATCH, 0, COUNT_MATCH, 0, SLJIT_IMM, 1);
2030  add_jump(compiler, &common->calllimit, JUMP(SLJIT_C_ZERO));  add_jump(compiler, &common->calllimit, JUMP(SLJIT_C_ZERO));
2031  }  }
2032    
# Line 1900  static SLJIT_INLINE void reset_ovector(c Line 2057  static SLJIT_INLINE void reset_ovector(c
2057  DEFINE_COMPILER;  DEFINE_COMPILER;
2058  struct sljit_label *loop;  struct sljit_label *loop;
2059  int i;  int i;
2060    
2061  /* At this point we can freely use all temporary registers. */  /* At this point we can freely use all temporary registers. */
2062    SLJIT_ASSERT(length > 1);
2063  /* TMP1 returns with begin - 1. */  /* TMP1 returns with begin - 1. */
2064  OP2(SLJIT_SUB, SLJIT_SCRATCH_REG1, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));  OP2(SLJIT_SUB, SLJIT_SCRATCH_REG1, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));
2065  if (length < 8)  if (length < 8)
2066    {    {
2067    for (i = 0; i < length; i++)    for (i = 1; i < length; i++)
2068      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(i), SLJIT_SCRATCH_REG1, 0);      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(i), SLJIT_SCRATCH_REG1, 0);
2069    }    }
2070  else  else
2071    {    {
2072    GET_LOCAL_BASE(SLJIT_SCRATCH_REG2, 0, OVECTOR_START - sizeof(sljit_sw));    GET_LOCAL_BASE(SLJIT_SCRATCH_REG2, 0, OVECTOR_START);
2073    OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, length);    OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, length - 1);
2074    loop = LABEL();    loop = LABEL();
2075    OP1(SLJIT_MOVU, SLJIT_MEM1(SLJIT_SCRATCH_REG2), sizeof(sljit_sw), SLJIT_SCRATCH_REG1, 0);    OP1(SLJIT_MOVU, SLJIT_MEM1(SLJIT_SCRATCH_REG2), sizeof(sljit_sw), SLJIT_SCRATCH_REG1, 0);
2076    OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 1);    OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 1);
# Line 1919  else Line 2078  else
2078    }    }
2079  }  }
2080    
2081    static SLJIT_INLINE void do_reset_match(compiler_common *common, int length)
2082    {
2083    DEFINE_COMPILER;
2084    struct sljit_label *loop;
2085    int i;
2086    
2087    SLJIT_ASSERT(length > 1);
2088    /* OVECTOR(1) contains the "string begin - 1" constant. */
2089    if (length > 2)
2090      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
2091    if (length < 8)
2092      {
2093      for (i = 2; i < length; i++)
2094        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(i), TMP1, 0);
2095      }
2096    else
2097      {
2098      GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + sizeof(sljit_sw));
2099      OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
2100      loop = LABEL();
2101      OP1(SLJIT_MOVU, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
2102      OP2(SLJIT_SUB | SLJIT_SET_E, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
2103      JUMPTO(SLJIT_C_NOT_ZERO, loop);
2104      }
2105    
2106    OP1(SLJIT_MOV, STACK_TOP, 0, ARGUMENTS, 0);
2107    if (common->mark_ptr != 0)
2108      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr, SLJIT_IMM, 0);
2109    if (common->control_head_ptr != 0)
2110      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_IMM, 0);
2111    OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(jit_arguments, stack));
2112    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_ptr);
2113    OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(struct sljit_stack, base));
2114    }
2115    
2116    static sljit_sw SLJIT_CALL do_search_mark(sljit_sw *current, const pcre_uchar *skip_arg)
2117    {
2118    while (current != NULL)
2119      {
2120      switch (current[-2])
2121        {
2122        case type_then_trap:
2123        break;
2124    
2125        case type_mark:
2126        if (STRCMP_UC_UC(skip_arg, (pcre_uchar *)current[-3]) == 0)
2127          return current[-4];
2128        break;
2129    
2130        default:
2131        SLJIT_ASSERT_STOP();
2132        break;
2133        }
2134      current = (sljit_sw*)current[-1];
2135      }
2136    return -1;
2137    }
2138    
2139  static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)  static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)
2140  {  {
2141  DEFINE_COMPILER;  DEFINE_COMPILER;
# Line 1975  DEFINE_COMPILER; Line 2192  DEFINE_COMPILER;
2192  struct sljit_jump *jump;  struct sljit_jump *jump;
2193    
2194  SLJIT_COMPILE_ASSERT(STR_END == SLJIT_SAVED_REG2, str_end_must_be_saved_reg2);  SLJIT_COMPILE_ASSERT(STR_END == SLJIT_SAVED_REG2, str_end_must_be_saved_reg2);
2195  SLJIT_ASSERT(common->start_used_ptr != 0 && (common->mode == JIT_PARTIAL_SOFT_COMPILE ? common->hit_start != 0 : common->hit_start == 0));  SLJIT_ASSERT(common->start_used_ptr != 0 && common->start_ptr != 0
2196      && (common->mode == JIT_PARTIAL_SOFT_COMPILE ? common->hit_start != 0 : common->hit_start == 0));
2197    
2198  OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, ARGUMENTS, 0);  OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, ARGUMENTS, 0);
2199  OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_PARTIAL);  OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_PARTIAL);
# Line 1987  OP1(SLJIT_MOV, SLJIT_SAVED_REG1, 0, SLJI Line 2205  OP1(SLJIT_MOV, SLJIT_SAVED_REG1, 0, SLJI
2205  OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, offsets));  OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, offsets));
2206    
2207  jump = CMP(SLJIT_C_SIG_LESS, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 3);  jump = CMP(SLJIT_C_SIG_LESS, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 3);
2208  OP2(SLJIT_SUB, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), (common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_used_ptr : common->hit_start) + sizeof(sljit_sw), SLJIT_SAVED_REG1, 0);  OP2(SLJIT_SUB, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_ptr : (common->hit_start + (int)sizeof(sljit_sw)), SLJIT_SAVED_REG1, 0);
2209  #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32  #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2210  OP2(SLJIT_ASHR, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, UCHAR_SHIFT);  OP2(SLJIT_ASHR, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, UCHAR_SHIFT);
2211  #endif  #endif
# Line 2155  return (bit < 256) ? ((0 << 8) | bit) : Line 2373  return (bit < 256) ? ((0 << 8) | bit) :
2373    
2374  static void check_partial(compiler_common *common, BOOL force)  static void check_partial(compiler_common *common, BOOL force)
2375  {  {
2376  /* Checks whether a partial matching is occured. Does not modify registers. */  /* Checks whether a partial matching is occurred. Does not modify registers. */
2377  DEFINE_COMPILER;  DEFINE_COMPILER;
2378  struct sljit_jump *jump = NULL;  struct sljit_jump *jump = NULL;
2379    
# Line 2170  else if (common->mode == JIT_PARTIAL_SOF Line 2388  else if (common->mode == JIT_PARTIAL_SOF
2388    jump = CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, -1);    jump = CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, -1);
2389    
2390  if (common->mode == JIT_PARTIAL_SOFT_COMPILE)  if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2391    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, -1);    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, 0);
2392  else  else
2393    {    {
2394    if (common->partialmatchlabel != NULL)    if (common->partialmatchlabel != NULL)
# Line 2183  if (jump != NULL) Line 2401  if (jump != NULL)
2401    JUMPHERE(jump);    JUMPHERE(jump);
2402  }  }
2403    
2404  static struct sljit_jump *check_str_end(compiler_common *common)  static void check_str_end(compiler_common *common, jump_list **end_reached)
2405  {  {
2406  /* Does not affect registers. Usually used in a tight spot. */  /* Does not affect registers. Usually used in a tight spot. */
2407  DEFINE_COMPILER;  DEFINE_COMPILER;
2408  struct sljit_jump *jump;  struct sljit_jump *jump;
 struct sljit_jump *nohit;  
 struct sljit_jump *return_value;  
2409    
2410  if (common->mode == JIT_COMPILE)  if (common->mode == JIT_COMPILE)
2411    return CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);    {
2412      add_jump(compiler, end_reached, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2413      return;
2414      }
2415    
2416  jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);  jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
2417  if (common->mode == JIT_PARTIAL_SOFT_COMPILE)  if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2418    {    {
2419    nohit = CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);    add_jump(compiler, end_reached, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0));
2420    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, -1);    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, 0);
2421    JUMPHERE(nohit);    add_jump(compiler, end_reached, JUMP(SLJIT_JUMP));
   return_value = JUMP(SLJIT_JUMP);  
2422    }    }
2423  else  else
2424    {    {
2425    return_value = CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);    add_jump(compiler, end_reached, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0));
2426    if (common->partialmatchlabel != NULL)    if (common->partialmatchlabel != NULL)
2427      JUMPTO(SLJIT_JUMP, common->partialmatchlabel);      JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2428    else    else
2429      add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));      add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2430    }    }
2431  JUMPHERE(jump);  JUMPHERE(jump);
 return return_value;  
2432  }  }
2433    
2434  static void detect_partial_match(compiler_common *common, jump_list **backtracks)  static void detect_partial_match(compiler_common *common, jump_list **backtracks)
# Line 2230  jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR Line 2447  jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR
2447  add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0));  add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0));
2448  if (common->mode == JIT_PARTIAL_SOFT_COMPILE)  if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2449    {    {
2450    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, -1);    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, 0);
2451    add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));    add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
2452    }    }
2453  else  else
# Line 2243  else Line 2460  else
2460  JUMPHERE(jump);  JUMPHERE(jump);
2461  }  }
2462    
2463  static void read_char(compiler_common *common)  static void peek_char(compiler_common *common)
2464  {  {
2465  /* Reads the character into TMP1, updates STR_PTR.  /* Reads the character into TMP1, keeps STR_PTR.
2466  Does not check STR_END. TMP2 Destroyed. */  Does not check STR_END. TMP2 Destroyed. */
2467  DEFINE_COMPILER;  DEFINE_COMPILER;
2468  #if defined SUPPORT_UTF && !defined COMPILE_PCRE32  #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
# Line 2253  struct sljit_jump *jump; Line 2470  struct sljit_jump *jump;
2470  #endif  #endif
2471    
2472  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2473  #if defined SUPPORT_UTF && !defined COMPILE_PCRE32  #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2474  if (common->utf)  if (common->utf)
2475    {    {
 #if defined COMPILE_PCRE8  
2476    jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);    jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2477  #elif defined COMPILE_PCRE16    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
   jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);  
 #endif /* COMPILE_PCRE[8|16] */  
2478    add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));    add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2479      OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2480    JUMPHERE(jump);    JUMPHERE(jump);
2481    }    }
2482  #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */  #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));  
 }  
2483    
2484  static void peek_char(compiler_common *common)  #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2485  {  if (common->utf)
2486  /* Reads the character into TMP1, keeps STR_PTR.    {
2487  Does not check STR_END. TMP2 Destroyed. */    OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2488      jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2489      /* TMP2 contains the high surrogate. */
2490      OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2491      OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x40);
2492      OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
2493      OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
2494      OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2495      JUMPHERE(jump);
2496      }
2497    #endif
2498    }
2499    
2500    #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2501    
2502    static BOOL is_char7_bitset(const pcre_uint8* bitset, BOOL nclass)
2503    {
2504    /* Tells whether the character codes below 128 are enough
2505    to determine a match. */
2506    const pcre_uint8 value = nclass ? 0xff : 0;
2507    const pcre_uint8* end = bitset + 32;
2508    
2509    bitset += 16;
2510    do
2511      {
2512      if (*bitset++ != value)
2513        return FALSE;
2514      }
2515    while (bitset < end);
2516    return TRUE;
2517    }
2518    
2519    static void read_char7_type(compiler_common *common, BOOL full_read)
2520    {
2521    /* Reads the precise character type of a character into TMP1, if the character
2522    is less than 128. Otherwise it returns with zero. Does not check STR_END. The
2523    full_read argument tells whether characters above max are accepted or not. */
2524    DEFINE_COMPILER;
2525    struct sljit_jump *jump;
2526    
2527    SLJIT_ASSERT(common->utf);
2528    
2529    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2530    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2531    
2532    OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2533    
2534    if (full_read)
2535      {
2536      jump = CMP(SLJIT_C_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
2537      OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2538      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2539      JUMPHERE(jump);
2540      }
2541    }
2542    
2543    #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
2544    
2545    static void read_char_max(compiler_common *common, pcre_uint32 max, BOOL full_read)
2546    {
2547    /* Reads the precise value of a character into TMP1, if the character is
2548    less than or equal to max. Otherwise it returns with a value greater than max.
2549    Does not check STR_END. The full_read argument tells whether characters above
2550    max are accepted or not. */
2551  DEFINE_COMPILER;  DEFINE_COMPILER;
2552  #if defined SUPPORT_UTF && !defined COMPILE_PCRE32  #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2553  struct sljit_jump *jump;  struct sljit_jump *jump;
2554  #endif  #endif
2555    
2556    SLJIT_UNUSED_ARG(full_read);
2557    SLJIT_UNUSED_ARG(max);
2558    
2559  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2560  #if defined SUPPORT_UTF && !defined COMPILE_PCRE32  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2561    
2562    #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2563  if (common->utf)  if (common->utf)
2564    {    {
2565  #if defined COMPILE_PCRE8    if (max < 128 && !full_read)
2566        return;
2567    
2568    jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);    jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2569  #elif defined COMPILE_PCRE16    if (max >= 0x800)
2570    jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);      add_jump(compiler, (max < 0x10000) ? &common->utfreadchar16 : &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2571  #endif /* COMPILE_PCRE[8|16] */    else if (max < 128)
2572    add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));      {
2573    OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);      OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2574        OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2575        }
2576      else
2577        {
2578        OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2579        if (!full_read)
2580          OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2581        else
2582          OP1(SLJIT_MOV_UB, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2583        OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2584        OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2585        OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2586        OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2587        if (full_read)
2588          OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
2589        }
2590    JUMPHERE(jump);    JUMPHERE(jump);
2591    }    }
2592  #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */  #endif
2593    
2594    #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2595    if (common->utf)
2596      {
2597      if (max >= 0x10000)
2598        {
2599        OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2600        jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2601        /* TMP2 contains the high surrogate. */
2602        OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2603        OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x40);
2604        OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
2605        OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2606        OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
2607        OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2608        JUMPHERE(jump);
2609        return;
2610        }
2611    
2612      if (max < 0xd800 && !full_read)
2613        return;
2614    
2615      /* Skip low surrogate if necessary. */
2616      OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2617      jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2618      if (full_read)
2619        OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2620      if (max >= 0xd800)
2621        OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x10000);
2622      JUMPHERE(jump);
2623      }
2624    #endif
2625    }
2626    
2627    static SLJIT_INLINE void read_char(compiler_common *common)
2628    {
2629    read_char_max(common, 0x7fffffff, TRUE);
2630  }  }
2631    
2632  static void read_char8_type(compiler_common *common)  static void read_char8_type(compiler_common *common, BOOL full_read)
2633  {  {
2634  /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */  /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END.
2635    The full_read argument tells whether characters above max are accepted or not. */
2636  DEFINE_COMPILER;  DEFINE_COMPILER;
2637  #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32  #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
2638  struct sljit_jump *jump;  struct sljit_jump *jump;
2639  #endif  #endif
2640    #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2641    struct sljit_jump *jump2;
2642    #endif
2643    
2644  #ifdef SUPPORT_UTF  SLJIT_UNUSED_ARG(full_read);
2645    
2646    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2647    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2648    
2649    #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2650  if (common->utf)  if (common->utf)
2651    {    {
   OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);  
   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));  
 #if defined COMPILE_PCRE8  
2652    /* This can be an extra read in some situations, but hopefully    /* This can be an extra read in some situations, but hopefully
2653    it is needed in most cases. */    it is needed in most cases. */
2654    OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);    OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2655    jump = CMP(SLJIT_C_LESS, TMP2, 0, SLJIT_IMM, 0xc0);    jump = CMP(SLJIT_C_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
2656    add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));    if (!full_read)
2657    JUMPHERE(jump);      {
2658  #elif defined COMPILE_PCRE16      OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2659    OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2660    jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);      OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2661    OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);      OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2662    JUMPHERE(jump);      OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2663    /* Skip low surrogate if necessary. */      OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
2664    OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xfc00);      OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2665    OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0xd800);      jump2 = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2666    OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);      OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2667    OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);      JUMPHERE(jump2);
2668    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);      }
2669  #elif defined COMPILE_PCRE32    else
2670    OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);      add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));
   jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);  
   OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);  
2671    JUMPHERE(jump);    JUMPHERE(jump);
 #endif /* COMPILE_PCRE[8|16|32] */  
2672    return;    return;
2673    }    }
2674  #endif /* SUPPORT_UTF */  #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
2675  OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);  
2676  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));  #if !defined COMPILE_PCRE8
 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32  
2677  /* The ctypes array contains only 256 values. */  /* The ctypes array contains only 256 values. */
2678  OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);  OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2679  jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);  jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2680  #endif  #endif
2681  OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);  OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2682  #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32  #if !defined COMPILE_PCRE8
2683  JUMPHERE(jump);  JUMPHERE(jump);
2684  #endif  #endif
2685    
2686    #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2687    if (common->utf && full_read)
2688      {
2689      /* Skip low surrogate if necessary. */
2690      OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);
2691      jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2692      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2693      JUMPHERE(jump);
2694      }
2695    #endif /* SUPPORT_UTF && COMPILE_PCRE16 */
2696  }  }
2697    
2698  static void skip_char_back(compiler_common *common)  static void skip_char_back(compiler_common *common)
# Line 2412  else Line 2761  else
2761  static void do_utfreadchar(compiler_common *common)  static void do_utfreadchar(compiler_common *common)
2762  {  {
2763  /* Fast decoding a UTF-8 character. TMP1 contains the first byte  /* Fast decoding a UTF-8 character. TMP1 contains the first byte
2764  of the character (>= 0xc0). Return char value in TMP1, length - 1 in TMP2. */  of the character (>= 0xc0). Return char value in TMP1, length in TMP2. */
2765  DEFINE_COMPILER;  DEFINE_COMPILER;
2766  struct sljit_jump *jump;  struct sljit_jump *jump;
2767    
2768  sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);  sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2769    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2770    OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2771    OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2772    OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2773    OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2774    
2775  /* Searching for the first zero. */  /* Searching for the first zero. */
2776  OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);  OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
2777  jump = JUMP(SLJIT_C_NOT_ZERO);  jump = JUMP(SLJIT_C_NOT_ZERO);
2778  /* Two byte sequence. */  /* Two byte sequence. */
 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));  
2779  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2780  OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1f);  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(2));
2781    sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2782    
2783    JUMPHERE(jump);
2784    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2785    OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x800);
2786  OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);  OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2787  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2788  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));  
 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);  
 JUMPHERE(jump);  
2789    
2790  OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10);  OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10000);
2791  jump = JUMP(SLJIT_C_NOT_ZERO);  jump = JUMP(SLJIT_C_NOT_ZERO);
2792  /* Three byte sequence. */  /* Three byte sequence. */
 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));  
 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0f);  
 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 12);  
 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);  
 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);  
 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);  
 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));  
2793  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
2794  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));
 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);  
 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(2));  
2795  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
 JUMPHERE(jump);  
2796    
2797  /* Four byte sequence. */  /* Four byte sequence. */
2798  OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));  JUMPHERE(jump);
2799  OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x07);  OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2800  OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 18);  OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
2801    OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2802    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
2803  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12);  
2804  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2805  OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(4));
2806    sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2807    }
2808    
2809    static void do_utfreadchar16(compiler_common *common)
2810    {
2811    /* Fast decoding a UTF-8 character. TMP1 contains the first byte
2812    of the character (>= 0xc0). Return value in TMP1. */
2813    DEFINE_COMPILER;
2814    struct sljit_jump *jump;
2815    
2816    sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2817    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2818    OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2819    OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2820  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);  
2821  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2822  OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(3));  
2823  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));  /* Searching for the first zero. */
2824    OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
2825    jump = JUMP(SLJIT_C_NOT_ZERO);
2826    /* Two byte sequence. */
2827    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2828    sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2829    
2830    JUMPHERE(jump);
2831    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2832    OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x800);
2833    OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2834  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2835  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2836  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));  /* Three byte sequence. */
2837    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
2838  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2839  }  }
2840    
# Line 2483  jump = JUMP(SLJIT_C_NOT_ZERO); Line 2854  jump = JUMP(SLJIT_C_NOT_ZERO);
2854  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2855  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2856  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
2857    /* The upper 5 bits are known at this point. */
2858    compare = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0x3);
2859  OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);  OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2860  OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);  OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2861  OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);  OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
 compare = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);  
2862  OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);  OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2863  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2864    
2865  JUMPHERE(compare);  JUMPHERE(compare);
2866  OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);  OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2867  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
 JUMPHERE(jump);  
2868    
2869  /* We only have types for characters less than 256. */  /* We only have types for characters less than 256. */
 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);  
 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);  
 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);  
 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);  
 }  
   
 #elif defined COMPILE_PCRE16  
   
 static void do_utfreadchar(compiler_common *common)  
 {  
 /* Fast decoding a UTF-16 character. TMP1 contains the first 16 bit char  
 of the character (>= 0xd800). Return char value in TMP1, length - 1 in TMP2. */  
 DEFINE_COMPILER;  
 struct sljit_jump *jump;  
   
 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);  
 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xdc00);  
 /* Do nothing, only return. */  
 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);  
   
2870  JUMPHERE(jump);  JUMPHERE(jump);
2871  /* Combine two 16 bit characters. */  OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2872  OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));  OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2873  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);  
 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 10);  
 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3ff);  
 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);  
 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));  
 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);  
2874  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2875  }  }
2876    
2877  #endif /* COMPILE_PCRE[8|16] */  #endif /* COMPILE_PCRE8 */
2878    
2879  #endif /* SUPPORT_UTF */  #endif /* SUPPORT_UTF */
2880    
# Line 2680  if (newlinecheck) Line 3025  if (newlinecheck)
3025  return mainloop;  return mainloop;
3026  }  }
3027    
3028  #define MAX_N_CHARS 3  static int scan_prefix(compiler_common *common, pcre_uchar *cc, pcre_uint32 *chars, int max_chars)
   
 static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common, BOOL firstline)  
3029  {  {
3030  DEFINE_COMPILER;  /* Recursive function, which scans prefix literals. */
3031  struct sljit_label *start;  int len, repeat, len_save, consumed = 0;
3032  struct sljit_jump *quit;  pcre_uint32 caseless, chr, mask;
3033  pcre_uint32 chars[MAX_N_CHARS * 2];  pcre_uchar *alternative, *cc_save;
3034  pcre_uchar *cc = common->start + 1 + LINK_SIZE;  BOOL last, any;
 int location = 0;  
 pcre_int32 len, c, bit, caseless;  
 int must_stop;  
   
 /* We do not support alternatives now. */  
 if (*(common->start + GET(common->start, 1)) == OP_ALT)  
   return FALSE;  
3035    
3036    repeat = 1;
3037  while (TRUE)  while (TRUE)
3038    {    {
3039      last = TRUE;
3040      any = FALSE;
3041    caseless = 0;    caseless = 0;
3042    must_stop = 1;    switch (*cc)
   switch(*cc)  
3043      {      {
     case OP_CHAR:  
     must_stop = 0;  
     cc++;  
     break;  
   
3044      case OP_CHARI:      case OP_CHARI:
3045      caseless = 1;      caseless = 1;
3046      must_stop = 0;      case OP_CHAR:
3047        last = FALSE;
3048      cc++;      cc++;
3049      break;      break;
3050    
# Line 2735  while (TRUE) Line 3069  while (TRUE)
3069      cc++;      cc++;
3070      break;      break;
3071    
3072        case OP_EXACTI:
3073        caseless = 1;
3074      case OP_EXACT:      case OP_EXACT:
3075        repeat = GET2(cc, 1);
3076        last = FALSE;
3077      cc += 1 + IMM2_SIZE;      cc += 1 + IMM2_SIZE;
3078      break;      break;
3079    
# Line 2746  while (TRUE) Line 3084  while (TRUE)
3084      cc++;      cc++;
3085      break;      break;
3086    
3087      case OP_EXACTI:      case OP_KET:
3088      caseless = 1;      cc += 1 + LINK_SIZE;
3089      cc += 1 + IMM2_SIZE;      continue;
3090    
3091        case OP_ALT:
3092        cc += GET(cc, 1);
3093        continue;
3094    
3095        case OP_ONCE:
3096        case OP_ONCE_NC:
3097        case OP_BRA:
3098        case OP_BRAPOS:
3099        case OP_CBRA:
3100        case OP_CBRAPOS:
3101        alternative = cc + GET(cc, 1);
3102        while (*alternative == OP_ALT)
3103          {
3104          max_chars = scan_prefix(common, alternative + 1 + LINK_SIZE, chars, max_chars);
3105          if (max_chars == 0)
3106            return consumed;
3107          alternative += GET(alternative, 1);
3108          }
3109    
3110        if (*cc == OP_CBRA || *cc == OP_CBRAPOS)
3111          cc += IMM2_SIZE;
3112        cc += 1 + LINK_SIZE;
3113        continue;
3114    
3115        case OP_CLASS:
3116        case OP_NCLASS:
3117        any = TRUE;
3118        cc += 1 + 32 / sizeof(pcre_uchar);
3119      break;      break;
3120    
3121      default:  #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3122      must_stop = 2;      case OP_XCLASS:
3123        any = TRUE;
3124        cc += GET(cc, 1);
3125        break;
3126    #endif
3127    
3128        case OP_NOT_DIGIT:
3129        case OP_DIGIT:
3130        case OP_NOT_WHITESPACE:
3131        case OP_WHITESPACE:
3132        case OP_NOT_WORDCHAR:
3133        case OP_WORDCHAR:
3134        case OP_ANY:
3135        case OP_ALLANY:
3136        any = TRUE;
3137        cc++;
3138      break;      break;
3139    
3140    #ifdef SUPPORT_UCP
3141        case OP_NOTPROP:
3142        case OP_PROP:
3143        any = TRUE;
3144        cc += 1 + 2;
3145        break;
3146    #endif
3147    
3148        case OP_TYPEEXACT:
3149        repeat = GET2(cc, 1);
3150        cc += 1 + IMM2_SIZE;
3151        continue;
3152    
3153        default:
3154        return consumed;
3155      }      }
3156    
3157    if (must_stop == 2)    if (any)
3158        break;      {
3159    #ifdef SUPPORT_UTF
3160        if (common->utf) return consumed;
3161    #endif
3162    #if defined COMPILE_PCRE8
3163        mask = 0xff;
3164    #elif defined COMPILE_PCRE16
3165        mask = 0xffff;
3166    #elif defined COMPILE_PCRE32
3167        mask = 0xffffffff;
3168    #else
3169        SLJIT_ASSERT_STOP();
3170    #endif
3171    
3172        do
3173          {
3174          chars[0] = mask;
3175          chars[1] = mask;
3176    
3177          if (--max_chars == 0)
3178            return consumed;
3179          consumed++;
3180          chars += 2;
3181          }
3182        while (--repeat > 0);
3183    
3184        repeat = 1;
3185        continue;
3186        }
3187    
3188    len = 1;    len = 1;
3189  #ifdef SUPPORT_UTF  #ifdef SUPPORT_UTF
3190    if (common->utf && HAS_EXTRALEN(cc[0])) len += GET_EXTRALEN(cc[0]);    if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
3191  #endif  #endif
3192    
3193    if (caseless && char_has_othercase(common, cc))    if (caseless != 0 && char_has_othercase(common, cc))
3194      {      {
3195      caseless = char_get_othercase_bit(common, cc);      caseless = char_get_othercase_bit(common, cc);
3196      if (caseless == 0)      if (caseless == 0)
3197        return FALSE;        return consumed;
3198  #ifdef COMPILE_PCRE8  #ifdef COMPILE_PCRE8
3199      caseless = ((caseless & 0xff) << 8) | (len - (caseless >> 8));      caseless = ((caseless & 0xff) << 8) | (len - (caseless >> 8));
3200  #else  #else
# Line 2781  while (TRUE) Line 3207  while (TRUE)
3207    else    else
3208      caseless = 0;      caseless = 0;
3209    
3210    while (len > 0 && location < MAX_N_CHARS * 2)    len_save = len;
3211      {    cc_save = cc;
3212      c = *cc;    while (TRUE)
3213      bit = 0;      {
3214      if (len == (caseless & 0xff))      do
3215        {        {
3216        bit = caseless >> 8;        chr = *cc;
3217        c |= bit;  #ifdef COMPILE_PCRE32
3218          if (SLJIT_UNLIKELY(chr == NOTACHAR))
3219            return consumed;
3220    #endif
3221          mask = 0;
3222          if (len == (caseless & 0xff))
3223            {
3224            mask = caseless >> 8;
3225            chr |= mask;
3226            }
3227    
3228          if (chars[0] == NOTACHAR)
3229            {
3230            chars[0] = chr;
3231            chars[1] = mask;
3232            }
3233          else
3234            {
3235            mask |= chars[0] ^ chr;
3236            chr |= mask;
3237            chars[0] = chr;
3238            chars[1] |= mask;
3239            }
3240    
3241          len--;
3242          if (--max_chars == 0)
3243            return consumed;
3244          consumed++;
3245          chars += 2;
3246          cc++;
3247        }        }
3248        while (len > 0);
3249    
3250      chars[location] = c;      if (--repeat == 0)
3251      chars[location + 1] = bit;        break;
3252    
3253      len--;      len = len_save;
3254      location += 2;      cc = cc_save;
     cc++;  
3255      }      }
3256    
3257    if (location >= MAX_N_CHARS * 2 || must_stop != 0)    repeat = 1;
3258      if (last)
3259        return consumed;
3260      }
3261    }
3262    
3263    #define MAX_N_CHARS 16
3264    
3265    static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common, BOOL firstline)
3266    {
3267    DEFINE_COMPILER;
3268    struct sljit_label *start;
3269    struct sljit_jump *quit;
3270    pcre_uint32 chars[MAX_N_CHARS * 2];
3271    pcre_uint8 ones[MAX_N_CHARS];
3272    pcre_uint32 mask;
3273    int i, max;
3274    int offsets[3];
3275    
3276    for (i = 0; i < MAX_N_CHARS; i++)
3277      {
3278      chars[i << 1] = NOTACHAR;
3279      chars[(i << 1) + 1] = 0;
3280      }
3281    
3282    max = scan_prefix(common, common->start, chars, MAX_N_CHARS);
3283    
3284    if (max <= 1)
3285      return FALSE;
3286    
3287    for (i = 0; i < max; i++)
3288      {
3289      mask = chars[(i << 1) + 1];
3290      ones[i] = ones_in_half_byte[mask & 0xf];
3291      mask >>= 4;
3292      while (mask != 0)
3293        {
3294        ones[i] += ones_in_half_byte[mask & 0xf];
3295        mask >>= 4;
3296        }
3297      }
3298    
3299    offsets[0] = -1;
3300    /* Scan forward. */
3301    for (i = 0; i < max; i++)
3302      if (ones[i] <= 2) {
3303        offsets[0] = i;
3304      break;      break;
3305    }    }
3306    
3307  /* At least two characters are required. */  if (offsets[0] == -1)
3308  if (location < 2 * 2)    return FALSE;
3309      return FALSE;  
3310    /* Scan backward. */
3311    offsets[1] = -1;
3312    for (i = max - 1; i > offsets[0]; i--)
3313      if (ones[i] <= 2) {
3314        offsets[1] = i;
3315        break;
3316      }
3317    
3318    offsets[2] = -1;
3319    if (offsets[1] >= 0)
3320      {
3321      /* Scan from middle. */
3322      for (i = (offsets[0] + offsets[1]) / 2 + 1; i < offsets[1]; i++)
3323        if (ones[i] <= 2)
3324          {
3325          offsets[2] = i;
3326          break;
3327          }
3328    
3329      if (offsets[2] == -1)
3330        {
3331        for (i = (offsets[0] + offsets[1]) / 2; i > offsets[0]; i--)
3332          if (ones[i] <= 2)
3333            {
3334            offsets[2] = i;
3335            break;
3336            }
3337        }
3338      }
3339    
3340    SLJIT_ASSERT(offsets[1] == -1 || (offsets[0] < offsets[1]));
3341    SLJIT_ASSERT(offsets[2] == -1 || (offsets[0] < offsets[2] && offsets[1] > offsets[2]));
3342    
3343    chars[0] = chars[offsets[0] << 1];
3344    chars[1] = chars[(offsets[0] << 1) + 1];
3345    if (offsets[2] >= 0)
3346      {
3347      chars[2] = chars[offsets[2] << 1];
3348      chars[3] = chars[(offsets[2] << 1) + 1];
3349      }
3350    if (offsets[1] >= 0)
3351      {
3352      chars[4] = chars[offsets[1] << 1];
3353      chars[5] = chars[(offsets[1] << 1) + 1];
3354      }
3355    
3356    max -= 1;
3357  if (firstline)  if (firstline)
3358    {    {
3359    SLJIT_ASSERT(common->first_line_end != 0);    SLJIT_ASSERT(common->first_line_end != 0);
3360    OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);    OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
3361    OP2(SLJIT_SUB, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, SLJIT_IMM, IN_UCHARS((location >> 1) - 1));    OP2(SLJIT_SUB, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, SLJIT_IMM, IN_UCHARS(max));
3362    }    }
3363  else  else
3364    OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS((location >> 1) - 1));    OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
3365    
3366  start = LABEL();  start = LABEL();
3367  quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);  quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3368    
3369  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[0]));
3370  OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));  if (offsets[1] >= 0)
3371      OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[1]));
3372  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3373    
3374  if (chars[1] != 0)  if (chars[1] != 0)
3375    OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[1]);    OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[1]);
3376  CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[0], start);  CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[0], start);
3377  if (location > 2 * 2)  if (offsets[2] >= 0)
3378    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[2] - 1));
3379  if (chars[3] != 0)  
3380    OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, chars[3]);  if (offsets[1] >= 0)
 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, chars[2], start);  
 if (location > 2 * 2)  
3381    {    {
3382    if (chars[5] != 0)    if (chars[5] != 0)
3383      OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[5]);      OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, chars[5]);
3384    CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[4], start);    CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, chars[4], start);
3385      }
3386    
3387    if (offsets[2] >= 0)
3388      {
3389      if (chars[3] != 0)
3390        OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[3]);
3391      CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[2], start);
3392    }    }
3393  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3394    
# Line 2843  JUMPHERE(quit); Line 3397  JUMPHERE(quit);
3397  if (firstline)  if (firstline)
3398    OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);    OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3399  else  else
3400    OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS((location >> 1) - 1));    OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
3401  return TRUE;  return TRUE;
3402  }  }
3403    
# Line 2992  if (firstline) Line 3546  if (firstline)
3546    OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);    OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3547  }  }
3548    
3549  static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, jump_list **backtracks);  static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks);
3550    
3551  static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, sljit_uw start_bits, BOOL firstline)  static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, pcre_uint8 *start_bits, BOOL firstline)
3552  {  {
3553  DEFINE_COMPILER;  DEFINE_COMPILER;
3554  struct sljit_label *start;  struct sljit_label *start;
3555  struct sljit_jump *quit;  struct sljit_jump *quit;
3556  struct sljit_jump *found = NULL;  struct sljit_jump *found = NULL;
3557  jump_list *matches = NULL;  jump_list *matches = NULL;
 pcre_uint8 inverted_start_bits[32];  
 int i;  
3558  #ifndef COMPILE_PCRE8  #ifndef COMPILE_PCRE8
3559  struct sljit_jump *jump;  struct sljit_jump *jump;
3560  #endif  #endif
3561    
 for (i = 0; i < 32; ++i)  
   inverted_start_bits[i] = ~(((pcre_uint8*)start_bits)[i]);  
   
3562  if (firstline)  if (firstline)
3563    {    {
3564    SLJIT_ASSERT(common->first_line_end != 0);    SLJIT_ASSERT(common->first_line_end != 0);
# Line 3025  if (common->utf) Line 3574  if (common->utf)
3574    OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);    OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
3575  #endif  #endif
3576    
3577  if (!check_class_ranges(common, inverted_start_bits, (inverted_start_bits[31] & 0x80) != 0, &matches))  if (!check_class_ranges(common, start_bits, (start_bits[31] & 0x80) != 0, TRUE, &matches))
3578    {    {
3579  #ifndef COMPILE_PCRE8  #ifndef COMPILE_PCRE8
3580    jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 255);    jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 255);
# Line 3034  if (!check_class_ranges(common, inverted Line 3583  if (!check_class_ranges(common, inverted
3583  #endif  #endif
3584    OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);    OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
3585    OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);    OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
3586    OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), start_bits);    OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)start_bits);
3587    OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);    OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
3588    OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);    OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
3589    found = JUMP(SLJIT_C_NOT_ZERO);    found = JUMP(SLJIT_C_NOT_ZERO);
# Line 3178  static void check_wordboundary(compiler_ Line 3727  static void check_wordboundary(compiler_
3727  {  {
3728  DEFINE_COMPILER;  DEFINE_COMPILER;
3729  struct sljit_jump *skipread;  struct sljit_jump *skipread;
3730    jump_list *skipread_list = NULL;
3731  #if !(defined COMPILE_PCRE8) || defined SUPPORT_UTF  #if !(defined COMPILE_PCRE8) || defined SUPPORT_UTF
3732  struct sljit_jump *jump;  struct sljit_jump *jump;
3733  #endif  #endif
# Line 3235  else Line 3785  else
3785  JUMPHERE(skipread);  JUMPHERE(skipread);
3786    
3787  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3788  skipread = check_str_end(common);  check_str_end(common, &skipread_list);
3789  peek_char(common);  peek_char(common);
3790    
3791  /* Testing char type. This is a code duplication. */  /* Testing char type. This is a code duplication. */
# Line 3276  else Line 3826  else
3826      JUMPHERE(jump);      JUMPHERE(jump);
3827  #endif /* COMPILE_PCRE8 */  #endif /* COMPILE_PCRE8 */
3828    }    }
3829  JUMPHERE(skipread);  set_jumps(skipread_list, LABEL());
3830    
3831  OP2(SLJIT_XOR | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);  OP2(SLJIT_XOR | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
3832  sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);  sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3833  }  }
3834    
3835  /*  static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
   range format:  
   
   ranges[0] = length of the range (max MAX_RANGE_SIZE, -1 means invalid range).  
   ranges[1] = first bit (0 or 1)  
   ranges[2-length] = position of the bit change (when the current bit is not equal to the previous)  
 */  
   
 static BOOL check_ranges(compiler_common *common, int *ranges, jump_list **backtracks, BOOL readch)  
3836  {  {
3837  DEFINE_COMPILER;  DEFINE_COMPILER;
3838  struct sljit_jump *jump;  int ranges[MAX_RANGE_SIZE];
3839    pcre_uint8 bit, cbit, all;
3840    int i, byte, length = 0;
3841    
3842    bit = bits[0] & 0x1;
3843    /* All bits will be zero or one (since bit is zero or one). */
3844    all = -bit;
3845    
3846    for (i = 0; i < 256; )
3847      {
3848      byte = i >> 3;
3849      if ((i & 0x7) == 0 && bits[byte] == all)
3850        i += 8;
3851      else
3852        {
3853        cbit = (bits[byte] >> (i & 0x7)) & 0x1;
3854        if (cbit != bit)
3855          {
3856          if (length >= MAX_RANGE_SIZE)
3857            return FALSE;
3858          ranges[length] = i;
3859          length++;
3860          bit = cbit;
3861          all = -cbit;
3862          }
3863        i++;
3864        }
3865      }
3866    
3867    if (((bit == 0) && nclass) || ((bit == 1) && !nclass))
3868      {
3869      if (length >= MAX_RANGE_SIZE)
3870        return FALSE;
3871      ranges[length] = 256;
3872      length++;
3873      }
3874    
3875  if (ranges[0] < 0)  if (length < 0 || length > 4)
3876    return FALSE;    return FALSE;
3877    
3878  switch(ranges[0])  bit = bits[0] & 0x1;
3879    if (invert) bit ^= 0x1;
3880    
3881    /* No character is accepted. */
3882    if (length == 0 && bit == 0)
3883      add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
3884    
3885    switch(length)
3886    {    {
3887      case 0:
3888      /* When bit != 0, all characters are accepted. */
3889      return TRUE;
3890    
3891    case 1:    case 1:
3892    if (readch)    add_jump(compiler, backtracks, CMP(bit == 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
     read_char(common);  
   add_jump(compiler, backtracks, CMP(ranges[1] == 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));  
3893    return TRUE;    return TRUE;
3894    
3895    case 2:    case 2:
3896    if (readch)    if (ranges[0] + 1 != ranges[1])
3897      read_char(common);      {
3898    OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);      OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
3899    add_jump(compiler, backtracks, CMP(ranges[1] != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));      add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
3900        }
3901      else
3902        add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
3903    return TRUE;    return TRUE;
3904    
3905    case 4:    case 3:
3906    if (ranges[2] + 1 == ranges[3] && ranges[4] + 1 == ranges[5])    if (bit != 0)
3907      {      {
3908      if (readch)      add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
3909        read_char(common);      if (ranges[0] + 1 != ranges[1])
     if (ranges[1] != 0)  
3910        {        {
3911        add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));        OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
3912        add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[4]));        add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
3913        }        }
3914      else      else
3915        {        add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
       jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]);  
       add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[4]));  
       JUMPHERE(jump);  
       }  
3916      return TRUE;      return TRUE;
3917      }      }
3918    if ((ranges[3] - ranges[2]) == (ranges[5] - ranges[4]) && is_powerof2(ranges[4] - ranges[2]))  
3919      add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[0]));
3920      if (ranges[1] + 1 != ranges[2])
3921      {      {
3922      if (readch)      OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1]);
3923        read_char(common);      add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[4] - ranges[2]);  
     OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[4]);  
     add_jump(compiler, backtracks, CMP(ranges[1] != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[5] - ranges[4]));  
     return TRUE;  
3924      }      }
3925    return FALSE;    else
3926        add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1]));
3927    default:    return TRUE;
   return FALSE;  
   }  
 }  
   
 static void get_ctype_ranges(compiler_common *common, int flag, int *ranges)  
 {  
 int i, bit, length;  
 const pcre_uint8 *ctypes = (const pcre_uint8*)common->ctypes;  
   
 bit = ctypes[0] & flag;  
 ranges[0] = -1;  
 ranges[1] = bit != 0 ? 1 : 0;  
 length = 0;  
3928    
3929  for (i = 1; i < 256; i++)    case 4:
3930    if ((ctypes[i] & flag) != bit)    if ((ranges[1] - ranges[0]) == (ranges[3] - ranges[2])
3931          && (ranges[0] | (ranges[2] - ranges[0])) == ranges[2]
3932          && is_powerof2(ranges[2] - ranges[0]))
3933      {      {
3934      if (length >= MAX_RANGE_SIZE)      OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[0]);
3935        return;      if (ranges[2] + 1 != ranges[3])
3936      ranges[2 + length] = i;        {
3937      length++;        OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);
3938      bit ^= flag;        add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
3939          }
3940        else
3941          add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
3942        return TRUE;
3943      }      }
3944    
3945  if (bit != 0)    if (bit != 0)
   {  
   if (length >= MAX_RANGE_SIZE)  
     return;  
   ranges[2 + length] = 256;  
   length++;  
   }  
 ranges[0] = length;  
 }  
   
 static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, jump_list **backtracks)  
 {  
 int ranges[2 + MAX_RANGE_SIZE];  
 pcre_uint8 bit, cbit, all;  
 int i, byte, length = 0;  
   
 bit = bits[0] & 0x1;  
 ranges[1] = bit;  
 /* Can be 0 or 255. */  
 all = -bit;  
   
 for (i = 0; i < 256; )  
   {  
   byte = i >> 3;  
   if ((i & 0x7) == 0 && bits[byte] == all)  
     i += 8;  
   else  
3946      {      {
3947      cbit = (bits[byte] >> (i & 0x7)) & 0x1;      i = 0;
3948      if (cbit != bit)      if (ranges[0] + 1 != ranges[1])
3949        {        {
3950        if (length >= MAX_RANGE_SIZE)        OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
3951          return FALSE;        add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
3952        ranges[2 + length] = i;        i = ranges[0];
       length++;  
       bit = cbit;  
       all = -cbit;  
3953        }        }
3954      i++;      else
3955          add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
3956    
3957        if (ranges[2] + 1 != ranges[3])
3958          {
3959          OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - i);
3960          add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
3961          }
3962        else
3963          add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2] - i));
3964        return TRUE;
3965      }      }
   }  
3966    
3967  if (((bit == 0) && nclass) || ((bit == 1) && !nclass))    OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
3968    {    add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[0]));
3969    if (length >= MAX_RANGE_SIZE)    if (ranges[1] + 1 != ranges[2])
3970      return FALSE;      {
3971    ranges[2 + length] = 256;      OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]);
3972    length++;      add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
3973    }      }
3974  ranges[0] = length;    else
3975        add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
3976      return TRUE;
3977    
3978  return check_ranges(common, ranges, backtracks, FALSE);    default:
3979      SLJIT_ASSERT_STOP();
3980      return FALSE;
3981      }
3982  }  }
3983    
3984  static void check_anynewline(compiler_common *common)  static void check_anynewline(compiler_common *common)
# Line 3806  static void compile_xclass_matchingpath( Line 4366  static void compile_xclass_matchingpath(
4366  {  {
4367  DEFINE_COMPILER;  DEFINE_COMPILER;
4368  jump_list *found = NULL;  jump_list *found = NULL;
4369  jump_list **list = (*cc & XCL_NOT) == 0 ? &found : backtracks;  jump_list **list = (cc[0] & XCL_NOT) == 0 ? &found : backtracks;
4370  pcre_int32 c, charoffset;  pcre_int32 c, charoffset;
 const pcre_uint32 *other_cases;  
4371  struct sljit_jump *jump = NULL;  struct sljit_jump *jump = NULL;
4372  pcre_uchar *ccbegin;  pcre_uchar *ccbegin;
4373  int compares, invertcmp, numberofcmps;  int compares, invertcmp, numberofcmps;
4374    
4375  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
4376  BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE;  BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE;
4377  BOOL charsaved = FALSE;  BOOL charsaved = FALSE;
4378  int typereg = TMP1, scriptreg = TMP1;  int typereg = TMP1, scriptreg = TMP1;
4379    const pcre_uint32 *other_cases;
4380  pcre_int32 typeoffset;  pcre_int32 typeoffset;
4381  #endif  #endif
4382    
# Line 3824  pcre_int32 typeoffset; Line 4385  pcre_int32 typeoffset;
4385  detect_partial_match(common, backtracks);  detect_partial_match(common, backtracks);
4386  read_char(common);  read_char(common);
4387    
4388  if ((*cc++ & XCL_MAP) != 0)  cc++;
4389    if ((cc[-1] & XCL_HASPROP) == 0)
4390    {    {
4391    OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);    if ((cc[-1] & XCL_MAP) != 0)
4392  #ifndef COMPILE_PCRE8      {
4393    jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);      OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
4394  #elif defined SUPPORT_UTF  #ifdef SUPPORT_UCP
4395    if (common->utf)      charsaved = TRUE;
     jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);  
4396  #endif  #endif
4397        if (!check_class_ranges(common, (const pcre_uint8 *)cc, TRUE, FALSE, backtracks))
4398          {
4399          jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
4400    
4401          OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
4402          OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
4403          OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
4404          OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4405          OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4406          add_jump(compiler, &found, JUMP(SLJIT_C_NOT_ZERO));
4407          add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4408    
4409          JUMPHERE(jump);
4410          }
4411        else
4412          add_jump(compiler, &found, CMP(SLJIT_C_LESS_EQUAL, TMP3, 0, SLJIT_IMM, 0xff));
4413    
4414    if (!check_class_ranges(common, (const pcre_uint8 *)cc, TRUE, list))      OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
4415        cc += 32 / sizeof(pcre_uchar);
4416        }
4417      else
4418        add_jump(compiler, (cc[-1] & XCL_NOT) == 0 ? backtracks : &found, CMP(SLJIT_C_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0xff));
4419      }
4420    else if ((cc[-1] & XCL_MAP) != 0)
4421      {
4422      OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
4423    #ifdef SUPPORT_UCP
4424      charsaved = TRUE;
4425    #endif
4426      if (!check_class_ranges(common, (const pcre_uint8 *)cc, FALSE, TRUE, list))
4427      {      {
4428    #ifdef COMPILE_PCRE8
4429        SLJIT_ASSERT(common->utf);
4430    #endif
4431        jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
4432    
4433      OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);      OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
4434      OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);      OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
4435      OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);      OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
4436      OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);      OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4437      OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);      OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4438      add_jump(compiler, list, JUMP(SLJIT_C_NOT_ZERO));      add_jump(compiler, list, JUMP(SLJIT_C_NOT_ZERO));
     }  
4439    
 #ifndef COMPILE_PCRE8  
   JUMPHERE(jump);  
 #elif defined SUPPORT_UTF  
   if (common->utf)  
4440      JUMPHERE(jump);      JUMPHERE(jump);
4441  #endif      }
4442    
4443    OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);    OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
 #ifdef SUPPORT_UCP  
   charsaved = TRUE;  
 #endif  
4444    cc += 32 / sizeof(pcre_uchar);    cc += 32 / sizeof(pcre_uchar);
4445    }    }
4446    
# Line 3911  while (*cc != XCL_END) Line 4498  while (*cc != XCL_END)
4498        case PT_SPACE:        case PT_SPACE:
4499        case PT_PXSPACE:        case PT_PXSPACE:
4500        case PT_WORD:        case PT_WORD:
4501          case PT_PXGRAPH:
4502          case PT_PXPRINT:
4503          case PT_PXPUNCT:
4504        needstype = TRUE;        needstype = TRUE;
4505        needschar = TRUE;        needschar = TRUE;
4506        break;        break;
4507    
4508        case PT_CLIST:        case PT_CLIST:
4509          case PT_UCNC:
4510        needschar = TRUE;        needschar = TRUE;
4511        break;        break;
4512    
# Line 4097  while (*cc != XCL_END) Line 4688  while (*cc != XCL_END)
4688    
4689        case PT_SPACE:        case PT_SPACE:
4690        case PT_PXSPACE:        case PT_PXSPACE:
       if (*cc == PT_SPACE)  
         {  
         OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);  
         jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 11 - charoffset);  
         }  
4691        SET_CHAR_OFFSET(9);        SET_CHAR_OFFSET(9);
4692        OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 13 - 9);        OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd - 0x9);
4693        OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);        OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
4694        if (*cc == PT_SPACE)  
4695          JUMPHERE(jump);        OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x9);
4696          OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4697    
4698          OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x9);
4699          OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4700    
4701        SET_TYPE_OFFSET(ucp_Zl);        SET_TYPE_OFFSET(ucp_Zl);
4702        OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl);        OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl);
# Line 4181  while (*cc != XCL_END) Line 4771  while (*cc != XCL_END)
4771          }          }
4772        jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);        jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4773        break;        break;
4774    
4775          case PT_UCNC:
4776          OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_DOLLAR_SIGN - charoffset);
4777          OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4778          OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_COMMERCIAL_AT - charoffset);
4779          OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4780          OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_GRAVE_ACCENT - charoffset);
4781          OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4782    
4783          SET_CHAR_OFFSET(0xa0);
4784          OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd7ff - charoffset);
4785          OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4786          SET_CHAR_OFFSET(0);
4787          OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xe000 - 0);
4788          OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_GREATER_EQUAL);
4789          jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4790          break;
4791    
4792          case PT_PXGRAPH:
4793          /* C and Z groups are the farthest two groups. */
4794          SET_TYPE_OFFSET(ucp_Ll);
4795          OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
4796          OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_GREATER);
4797    
4798          jump = CMP(SLJIT_C_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
4799    
4800          /* In case of ucp_Cf, we overwrite the result. */
4801          SET_CHAR_OFFSET(0x2066);
4802          OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
4803          OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
4804    
4805          OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
4806          OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4807    
4808          OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x2066);
4809          OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4810    
4811          JUMPHERE(jump);
4812          jump = CMP(SLJIT_C_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
4813          break;
4814    
4815          case PT_PXPRINT:
4816          /* C and Z groups are the farthest two groups. */
4817          SET_TYPE_OFFSET(ucp_Ll);
4818          OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
4819          OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_GREATER);
4820    
4821          OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Ll);
4822          OP_FLAGS(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_C_NOT_EQUAL);
4823    
4824          jump = CMP(SLJIT_C_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
4825    
4826          /* In case of ucp_Cf, we overwrite the result. */
4827          SET_CHAR_OFFSET(0x2066);
4828          OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
4829          OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
4830    
4831          OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
4832          OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4833    
4834          JUMPHERE(jump);
4835          jump = CMP(SLJIT_C_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
4836          break;
4837    
4838          case PT_PXPUNCT:
4839          SET_TYPE_OFFSET(ucp_Sc);
4840          OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Sc);
4841          OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
4842    
4843          SET_CHAR_OFFSET(0);
4844          OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xff);
4845          OP_FLAGS(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4846    
4847          SET_TYPE_OFFSET(ucp_Pc);
4848          OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ps - ucp_Pc);
4849          OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4850          jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4851          break;
4852        }        }
4853      cc += 2;      cc += 2;
4854      }      }
# Line 4206  int length; Line 4874  int length;
4874  unsigned int c, oc, bit;  unsigned int c, oc, bit;
4875  compare_context context;  compare_context context;
4876  struct sljit_jump *jump[4];  struct sljit_jump *jump[4];
4877    jump_list *end_list;
4878  #ifdef SUPPORT_UTF  #ifdef SUPPORT_UTF
4879  struct sljit_label *label;  struct sljit_label *label;
4880  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
4881  pcre_uchar propdata[5];  pcre_uchar propdata[5];
4882  #endif  #endif
4883  #endif  #endif /* SUPPORT_UTF */
4884    
4885  switch(type)  switch(type)
4886    {    {
# Line 4236  switch(type) Line 4905  switch(type)
4905    case OP_NOT_DIGIT:    case OP_NOT_DIGIT:
4906    case OP_DIGIT:    case OP_DIGIT:
4907    /* Digits are usually 0-9, so it is worth to optimize them. */    /* Digits are usually 0-9, so it is worth to optimize them. */
   if (common->digits[0] == -2)  
     get_ctype_ranges(common, ctype_digit, common->digits);  
4908    detect_partial_match(common, backtracks);    detect_partial_match(common, backtracks);
4909    /* Flip the starting bit in the negative case. */  #if defined SUPPORT_UTF && defined COMPILE_PCRE8
4910    if (type == OP_NOT_DIGIT)    if (common->utf && is_char7_bitset((const pcre_uint8*)common->ctypes - cbit_length + cbit_digit, FALSE))
4911      common->digits[1] ^= 1;      read_char7_type(common, type == OP_NOT_DIGIT);
4912    if (!check_ranges(common, common->digits, backtracks, TRUE))    else
4913      {  #endif
4914      read_char8_type(common);      read_char8_type(common, type == OP_NOT_DIGIT);
4915      OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit);      /* Flip the starting bit in the negative case. */
4916      add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));    OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit);
4917      }    add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
   if (type == OP_NOT_DIGIT)  
     common->digits[1] ^= 1;  
4918    return cc;    return cc;
4919    
4920    case OP_NOT_WHITESPACE:    case OP_NOT_WHITESPACE:
4921    case OP_WHITESPACE:    case OP_WHITESPACE:
4922    detect_partial_match(common, backtracks);    detect_partial_match(common, backtracks);
4923    read_char8_type(common);  #if defined SUPPORT_UTF && defined COMPILE_PCRE8
4924      if (common->utf && is_char7_bitset((const pcre_uint8*)common->ctypes - cbit_length + cbit_space, FALSE))
4925        read_char7_type(common, type == OP_NOT_WHITESPACE);
4926      else
4927    #endif
4928        read_char8_type(common, type == OP_NOT_WHITESPACE);
4929    OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space);    OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space);
4930    add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));    add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
4931    return cc;    return cc;
# Line 4263  switch(type) Line 4933  switch(type)
4933    case OP_NOT_WORDCHAR:    case OP_NOT_WORDCHAR:
4934    case OP_WORDCHAR:    case OP_WORDCHAR:
4935    detect_partial_match(common, backtracks);    detect_partial_match(common, backtracks);
4936    read_char8_type(common);  #if defined SUPPORT_UTF && defined COMPILE_PCRE8
4937      if (common->utf && is_char7_bitset((const pcre_uint8*)common->ctypes - cbit_length + cbit_word, FALSE))
4938        read_char7_type(common, type == OP_NOT_WORDCHAR);
4939      else
4940    #endif
4941        read_char8_type(common, type == OP_NOT_WORDCHAR);
4942    OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word);    OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word);
4943    add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));    add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
4944    return cc;    return cc;
# Line 4274  switch(type) Line 4949  switch(type)
4949    if (common->nltype == NLTYPE_FIXED && common->newline > 255)    if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4950      {      {
4951      jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);      jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
4952        end_list = NULL;
4953      if (common->mode != JIT_PARTIAL_HARD_COMPILE)      if (common->mode != JIT_PARTIAL_HARD_COMPILE)
4954        jump[1] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);        add_jump(compiler, &end_list, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4955      else      else
4956        jump[1] = check_str_end(common);        check_str_end(common, &end_list);
4957    
4958      OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);      OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4959      add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff));      add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff));
4960      if (jump[1] != NULL)      set_jumps(end_list, LABEL());
       JUMPHERE(jump[1]);  
4961      JUMPHERE(jump[0]);      JUMPHERE(jump[0]);
4962      }      }
4963    else    else
# Line 4326  switch(type) Line 5001  switch(type)
5001  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
5002    case OP_NOTPROP:    case OP_NOTPROP:
5003    case OP_PROP:    case OP_PROP:
5004    propdata[0] = 0;    propdata[0] = XCL_HASPROP;
5005    propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP;    propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP;
5006    propdata[2] = cc[0];    propdata[2] = cc[0];
5007    propdata[3] = cc[1];    propdata[3] = cc[1];
# Line 4341  switch(type) Line 5016  switch(type)
5016    read_char(common);    read_char(common);
5017    jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);    jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
5018    /* We don't need to handle soft partial matching case. */    /* We don't need to handle soft partial matching case. */
5019      end_list = NULL;
5020    if (common->mode != JIT_PARTIAL_HARD_COMPILE)    if (common->mode != JIT_PARTIAL_HARD_COMPILE)
5021      jump[1] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);      add_jump(compiler, &end_list, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
5022    else    else
5023      jump[1] = check_str_end(common);      check_str_end(common, &end_list);
5024    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5025    jump[2] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);    jump[1] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
5026    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5027    jump[3] = JUMP(SLJIT_JUMP);    jump[2] = JUMP(SLJIT_JUMP);
5028    JUMPHERE(jump[0]);    JUMPHERE(jump[0]);
5029    check_newlinechar(common, common->bsr_nltype, backtracks, FALSE);    check_newlinechar(common, common->bsr_nltype, backtracks, FALSE);
5030      set_jumps(end_list, LABEL());
5031    JUMPHERE(jump[1]);    JUMPHERE(jump[1]);
5032    JUMPHERE(jump[2]);    JUMPHERE(jump[2]);
   JUMPHERE(jump[3]);  
5033    return cc;    return cc;
5034    
5035    case OP_NOT_HSPACE:    case OP_NOT_HSPACE:
# Line 4651  switch(type) Line 5327  switch(type)
5327  #endif /* COMPILE_PCRE8 */  #endif /* COMPILE_PCRE8 */
5328        {        {
5329        GETCHARLEN(c, cc, length);        GETCHARLEN(c, cc, length);
       read_char(common);  
5330        }        }
5331      }      }
5332    else    else
5333  #endif /* SUPPORT_UTF */  #endif /* SUPPORT_UTF */
     {  
     read_char(common);  
5334      c = *cc;      c = *cc;
     }  
5335    
5336    if (type == OP_NOT || !char_has_othercase(common, cc))    if (type == OP_NOT || !char_has_othercase(common, cc))
5337        {
5338        read_char_max(common, c, TRUE);
5339      add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));      add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
5340        }
5341    else    else
5342      {      {
5343      oc = char_othercase(common, c);      oc = char_othercase(common, c);
5344        read_char_max(common, c > oc ? c : oc, TRUE);
5345      bit = c ^ oc;      bit = c ^ oc;
5346      if (is_powerof2(bit))      if (is_powerof2(bit))
5347        {        {
# Line 4683  switch(type) Line 5359  switch(type)
5359    case OP_CLASS:    case OP_CLASS:
5360    case OP_NCLASS:    case OP_NCLASS:
5361    detect_partial_match(common, backtracks);    detect_partial_match(common, backtracks);
5362    read_char(common);  
5363    if (check_class_ranges(common, (const pcre_uint8 *)cc, type == OP_NCLASS, backtracks))  #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5364      bit = (common->utf && is_char7_bitset((const pcre_uint8 *)cc, type == OP_NCLASS)) ? 127 : 255;
5365      read_char_max(common, bit, type == OP_NCLASS);
5366    #else
5367      read_char_max(common, 255, type == OP_NCLASS);
5368    #endif
5369    
5370      if (check_class_ranges(common, (const pcre_uint8 *)cc, type == OP_NCLASS, FALSE, backtracks))
5371      return cc + 32 / sizeof(pcre_uchar);      return cc + 32 / sizeof(pcre_uchar);
5372    
5373  #if defined SUPPORT_UTF || !defined COMPILE_PCRE8  #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5374    jump[0] = NULL;    jump[0] = NULL;
 #ifdef COMPILE_PCRE8  
   /* This check only affects 8 bit mode. In other modes, we  
   always need to compare the value with 255. */  
5375    if (common->utf)    if (common->utf)
 #endif /* COMPILE_PCRE8 */  
5376      {      {
5377      jump[0] = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);      jump[0] = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, bit);
5378      if (type == OP_CLASS)      if (type == OP_CLASS)
5379        {        {
5380        add_jump(compiler, backtracks, jump[0]);        add_jump(compiler, backtracks, jump[0]);
5381        jump[0] = NULL;        jump[0] = NULL;
5382        }        }
5383      }      }
5384  #endif /* SUPPORT_UTF || !COMPILE_PCRE8 */  #elif !defined COMPILE_PCRE8
5385      jump[0] = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
5386      if (type == OP_CLASS)
5387        {
5388        add_jump(compiler, backtracks, jump[0]);
5389        jump[0] = NULL;
5390        }
5391    #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
5392    
5393    OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);    OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
5394    OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);    OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
5395    OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);    OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
5396    OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);    OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
5397    OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);    OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
5398    add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));    add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
5399    
5400  #if defined SUPPORT_UTF || !defined COMPILE_PCRE8  #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
5401    if (jump[0] != NULL)    if (jump[0] != NULL)
5402      JUMPHERE(jump[0]);      JUMPHERE(jump[0]);
5403  #endif /* SUPPORT_UTF || !COMPILE_PCRE8 */  #endif
5404    
5405    return cc + 32 / sizeof(pcre_uchar);    return cc + 32 / sizeof(pcre_uchar);
5406    
5407  #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32  #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
# Line 4817  if (context.length > 0) Line 5506  if (context.length > 0)
5506  return compile_char1_matchingpath(common, *cc, cc + 1, backtracks);  return compile_char1_matchingpath(common, *cc, cc + 1, backtracks);
5507  }  }
5508    
 static struct sljit_jump *compile_ref_checks(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)  
 {  
 DEFINE_COMPILER;  
 int offset = GET2(cc, 1) << 1;  
   
 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));  
 if (!common->jscript_compat)  
   {  
   if (backtracks == NULL)  
     {  
     /* OVECTOR(1) contains the "string begin - 1" constant. */  
     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));  
     OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);  
     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));  
     OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);  
     return JUMP(SLJIT_C_NOT_ZERO);  
     }  
   add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));  
   }  
 return CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));  
 }  
   
5509  /* Forward definitions. */  /* Forward definitions. */
5510  static void compile_matchingpath(compiler_common *, pcre_uchar *, pcre_uchar *, backtrack_common *);  static void compile_matchingpath(compiler_common *, pcre_uchar *, pcre_uchar *, backtrack_common *);
5511  static void compile_backtrackingpath(compiler_common *, struct backtrack_common *);  static void compile_backtrackingpath(compiler_common *, struct backtrack_common *);
# Line 4871  static void compile_backtrackingpath(com Line 5538  static void compile_backtrackingpath(com
5538    
5539  #define BACKTRACK_AS(type) ((type *)backtrack)  #define BACKTRACK_AS(type) ((type *)backtrack)
5540    
5541  static pcre_uchar *compile_ref_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks, BOOL withchecks, BOOL emptyfail)  static void compile_dnref_search(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
5542    {
5543    /* The OVECTOR offset goes to TMP2. */
5544    DEFINE_COMPILER;
5545    int count = GET2(cc, 1 + IMM2_SIZE);
5546    pcre_uchar *slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
5547    unsigned int offset;
5548    jump_list *found = NULL;
5549    
5550    SLJIT_ASSERT(*cc == OP_DNREF || *cc == OP_DNREFI);
5551    
5552    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
5553    
5554    count--;
5555    while (count-- > 0)
5556      {
5557      offset = GET2(slot, 0) << 1;
5558      GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
5559      add_jump(compiler, &found, CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0));
5560      slot += common->name_entry_size;
5561      }
5562    
5563    offset = GET2(slot, 0) << 1;
5564    GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
5565    if (backtracks != NULL && !common->jscript_compat)
5566      add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0));
5567    
5568    set_jumps(found, LABEL());
5569    }
5570    
5571    static void compile_ref_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks, BOOL withchecks, BOOL emptyfail)
5572  {  {
5573  DEFINE_COMPILER;  DEFINE_COMPILER;
5574  int offset = GET2(cc, 1) << 1;  BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
5575    int offset = 0;
5576  struct sljit_jump *jump = NULL;  struct sljit_jump *jump = NULL;
5577  struct sljit_jump *partial;  struct sljit_jump *partial;
5578  struct sljit_jump *nopartial;  struct sljit_jump *nopartial;
5579    
5580  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));  if (ref)
5581  /* OVECTOR(1) contains the "string begin - 1" constant. */    {
5582  if (withchecks && !common->jscript_compat)    offset = GET2(cc, 1) << 1;
5583    add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
5584      /* OVECTOR(1) contains the "string begin - 1" constant. */
5585      if (withchecks && !common->jscript_compat)
5586        add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
5587      }
5588    else
5589      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
5590    
5591  #if defined SUPPORT_UTF && defined SUPPORT_UCP  #if defined SUPPORT_UTF && defined SUPPORT_UCP
5592  if (common->utf && *cc == OP_REFI)  if (common->utf && *cc == OP_REFI)
5593    {    {
5594    SLJIT_ASSERT(TMP1 == SLJIT_SCRATCH_REG1 && STACK_TOP == SLJIT_SCRATCH_REG2 && TMP2 == SLJIT_SCRATCH_REG3);    SLJIT_ASSERT(TMP1 == SLJIT_SCRATCH_REG1 && STACK_TOP == SLJIT_SCRATCH_REG2 && TMP2 == SLJIT_SCRATCH_REG3);
5595    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));    if (ref)
5596        OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
5597      else
5598        OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
5599    
5600    if (withchecks)    if (withchecks)
5601      jump = CMP(SLJIT_C_EQUAL, TMP1, 0, TMP2, 0);      jump = CMP(SLJIT_C_EQUAL, TMP1, 0, TMP2, 0);
5602    
# Line 4913  if (common->utf && *cc == OP_REFI) Line 5621  if (common->utf && *cc == OP_REFI)
5621  else  else
5622  #endif /* SUPPORT_UTF && SUPPORT_UCP */  #endif /* SUPPORT_UTF && SUPPORT_UCP */
5623    {    {
5624    OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP1, 0);    if (ref)
5625        OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP1, 0);
5626      else
5627        OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
5628    
5629    if (withchecks)    if (withchecks)
5630      jump = JUMP(SLJIT_C_ZERO);      jump = JUMP(SLJIT_C_ZERO);
5631    
# Line 4950  if (jump != NULL) Line 5662  if (jump != NULL)
5662    else    else
5663      JUMPHERE(jump);      JUMPHERE(jump);
5664    }    }
 return cc + 1 + IMM2_SIZE;  
5665  }  }
5666    
5667  static SLJIT_INLINE pcre_uchar *compile_ref_iterator_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)  static SLJIT_INLINE pcre_uchar *compile_ref_iterator_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
5668  {  {
5669  DEFINE_COMPILER;  DEFINE_COMPILER;
5670    BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
5671  backtrack_common *backtrack;  backtrack_common *backtrack;
5672  pcre_uchar type;  pcre_uchar type;
5673    int offset = 0;
5674  struct sljit_label *label;  struct sljit_label *label;
5675  struct sljit_jump *zerolength;  struct sljit_jump *zerolength;
5676  struct sljit_jump *jump = NULL;  struct sljit_jump *jump = NULL;
# Line 4967  BOOL minimize; Line 5680  BOOL minimize;
5680    
5681  PUSH_BACKTRACK(sizeof(iterator_backtrack), cc, NULL);  PUSH_BACKTRACK(sizeof(iterator_backtrack), cc, NULL);
5682    
5683    if (ref)
5684      offset = GET2(cc, 1) << 1;
5685    else
5686      cc += IMM2_SIZE;
5687  type = cc[1 + IMM2_SIZE];  type = cc[1 + IMM2_SIZE];
5688    
5689    SLJIT_COMPILE_ASSERT((OP_CRSTAR & 0x1) == 0, crstar_opcode_must_be_even);
5690  minimize = (type & 0x1) != 0;  minimize = (type & 0x1) != 0;
5691  switch(type)  switch(type)
5692    {    {
# Line 5005  if (!minimize) Line 5724  if (!minimize)
5724    if (min == 0)    if (min == 0)
5725      {      {
5726      allocate_stack(common, 2);      allocate_stack(common, 2);
5727        if (ref)
5728          OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
5729      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5730      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
5731      /* Temporary release of STR_PTR. */      /* Temporary release of STR_PTR. */
5732      OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));      OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5733      zerolength = compile_ref_checks(common, ccbegin, NULL);      /* Handles both invalid and empty cases. Since the minimum repeat,
5734        is zero the invalid case is basically the same as an empty case. */
5735        if (ref)
5736          zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
5737        else
5738          {
5739          compile_dnref_search(common, ccbegin, NULL);
5740          OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
5741          OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, TMP2, 0);
5742          zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
5743          }
5744      /* Restore if not zero length. */      /* Restore if not zero length. */
5745      OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));      OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5746      }      }
5747    else    else
5748      {      {
5749      allocate_stack(common, 1);      allocate_stack(common, 1);
5750        if (ref)
5751          OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
5752      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5753      zerolength = compile_ref_checks(common, ccbegin, &backtrack->topbacktracks);      if (ref)
5754          {
5755          add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
5756          zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
5757          }
5758        else
5759          {
5760          compile_dnref_search(common, ccbegin, &backtrack->topbacktracks);
5761          OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
5762          OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, TMP2, 0);
5763          zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
5764          }
5765      }      }
5766    
5767    if (min > 1 || max > 1)    if (min > 1 || max > 1)
5768      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 0);      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 0);
5769    
5770    label = LABEL();    label = LABEL();
5771      if (!ref)
5772        OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);
5773    compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, FALSE, FALSE);    compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, FALSE, FALSE);
5774    
5775    if (min > 1 || max > 1)    if (min > 1 || max > 1)
# Line 5054  if (!minimize) Line 5800  if (!minimize)
5800    JUMPHERE(zerolength);    JUMPHERE(zerolength);
5801    BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();    BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
5802    
5803    decrease_call_count(common);    count_match(common);
5804    return cc;    return cc;
5805    }    }
5806    
5807  allocate_stack(common, 2);  allocate_stack(common, ref ? 2 : 3);
5808    if (ref)
5809      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
5810  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5811  if (type != OP_CRMINSTAR)  if (type != OP_CRMINSTAR)
5812    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
5813    
5814  if (min == 0)  if (min == 0)
5815    {    {
5816    zerolength = compile_ref_checks(common, ccbegin, NULL);    /* Handles both invalid and empty cases. Since the minimum repeat,
5817      is zero the invalid case is basically the same as an empty case. */
5818      if (ref)
5819        zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
5820      else
5821        {
5822        compile_dnref_search(common, ccbegin, NULL);
5823        OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
5824        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
5825        zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
5826        }
5827      /* Length is non-zero, we can match real repeats. */
5828    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5829    jump = JUMP(SLJIT_JUMP);    jump = JUMP(SLJIT_JUMP);
5830    }    }
5831  else  else
5832    zerolength = compile_ref_checks(common, ccbegin, &backtrack->topbacktracks);    {
5833      if (ref)
5834        {
5835        add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
5836        zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
5837        }
5838      else
5839        {
5840        compile_dnref_search(common, ccbegin, &backtrack->topbacktracks);
5841        OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
5842        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
5843        zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
5844        }
5845      }
5846    
5847  BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();  BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
5848  if (max > 0)  if (max > 0)
5849    add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max));    add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max));
5850    
5851    if (!ref)
5852      OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
5853  compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, TRUE, TRUE);  compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, TRUE, TRUE);
5854  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5855    
# Line 5093  if (jump != NULL) Line 5867  if (jump != NULL)
5867    JUMPHERE(jump);    JUMPHERE(jump);
5868  JUMPHERE(zerolength);  JUMPHERE(zerolength);
5869    
5870  decrease_call_count(common);  count_match(common);
5871  return cc;  return cc;
5872  }  }
5873    
# Line 5103  DEFINE_COMPILER; Line 5877  DEFINE_COMPILER;
5877  backtrack_common *backtrack;  backtrack_common *backtrack;
5878  recurse_entry *entry = common->entries;  recurse_entry *entry = common->entries;
5879  recurse_entry *prev = NULL;  recurse_entry *prev = NULL;
5880  int start = GET(cc, 1);  sljit_sw start = GET(cc, 1);
5881  pcre_uchar *start_cc;  pcre_uchar *start_cc;
5882    BOOL needs_control_head;
5883    
5884  PUSH_BACKTRACK(sizeof(recurse_backtrack), cc, NULL);  PUSH_BACKTRACK(sizeof(recurse_backtrack), cc, NULL);
5885    
5886  /* Inlining simple patterns. */  /* Inlining simple patterns. */
5887  if (get_framesize(common, common->start + start, TRUE) == no_stack)  if (get_framesize(common, common->start + start, NULL, TRUE, &needs_control_head) == no_stack)
5888    {    {
5889    start_cc = common->start + start;    start_cc = common->start + start;
5890    compile_matchingpath(common, next_opcode(common, start_cc), bracketend(start_cc) - (1 + LINK_SIZE), backtrack);    compile_matchingpath(common, next_opcode(common, start_cc), bracketend(start_cc) - (1 + LINK_SIZE), backtrack);
# Line 5228  allocate_stack(common, CALLOUT_ARG_SIZE Line 6003  allocate_stack(common, CALLOUT_ARG_SIZE
6003  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
6004  OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);  OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6005  SLJIT_ASSERT(common->capture_last_ptr != 0);  SLJIT_ASSERT(common->capture_last_ptr != 0);
6006  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_number), SLJIT_IMM, cc[1]);  OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_number), SLJIT_IMM, cc[1]);
6007  OP1(SLJIT_MOV_UI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_last), TMP2, 0);  OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_last), TMP2, 0);
6008    
6009  /* These pointer sized fields temporarly stores internal variables. */  /* These pointer sized fields temporarly stores internal variables. */
6010  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
# Line 5238  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CA Line 6013  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CA
6013    
6014  if (common->mark_ptr != 0)  if (common->mark_ptr != 0)
6015    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr));    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr));
6016  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(pattern_position), SLJIT_IMM, GET(cc, 2));  OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(pattern_position), SLJIT_IMM, GET(cc, 2));
6017  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(next_item_length), SLJIT_IMM, GET(cc, 2 + LINK_SIZE));  OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(next_item_length), SLJIT_IMM, GET(cc, 2 + LINK_SIZE));
6018  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(mark), (common->mark_ptr != 0) ? TMP2 : SLJIT_IMM, 0);  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(mark), (common->mark_ptr != 0) ? TMP2 : SLJIT_IMM, 0);
6019    
6020  /* Needed to save important temporary registers. */  /* Needed to save important temporary registers. */
# Line 5268  static pcre_uchar *compile_assert_matchi Line 6043  static pcre_uchar *compile_assert_matchi
6043  {  {
6044  DEFINE_COMPILER;  DEFINE_COMPILER;
6045  int framesize;  int framesize;
6046    int extrasize;
6047    BOOL needs_control_head;
6048  int private_data_ptr;  int private_data_ptr;
6049  backtrack_common altbacktrack;  backtrack_common altbacktrack;
6050  pcre_uchar *ccbegin;  pcre_uchar *ccbegin;
# Line 5277  jump_list *tmp = NULL; Line 6054  jump_list *tmp = NULL;
6054  jump_list **target = (conditional) ? &backtrack->condfailed : &backtrack->common.topbacktracks;  jump_list **target = (conditional) ? &backtrack->condfailed : &backtrack->common.topbacktracks;
6055  jump_list **found;  jump_list **found;
6056  /* Saving previous accept variables. */  /* Saving previous accept variables. */
6057    BOOL save_local_exit = common->local_exit;
6058    BOOL save_positive_assert = common->positive_assert;
6059    then_trap_backtrack *save_then_trap = common->then_trap;
6060  struct sljit_label *save_quit_label = common->quit_label;  struct sljit_label *save_quit_label = common->quit_label;
6061  struct sljit_label *save_accept_label = common->accept_label;  struct sljit_label *save_accept_label = common->accept_label;
6062  jump_list *save_quit = common->quit;  jump_list *save_quit = common->quit;
6063    jump_list *save_positive_assert_quit = common->positive_assert_quit;
6064  jump_list *save_accept = common->accept;  jump_list *save_accept = common->accept;
6065  struct sljit_jump *jump;  struct sljit_jump *jump;
6066  struct sljit_jump *brajump = NULL;  struct sljit_jump *brajump = NULL;
6067    
6068    /* Assert captures then. */
6069    common->then_trap = NULL;
6070    
6071  if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)  if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
6072    {    {
6073    SLJIT_ASSERT(!conditional);    SLJIT_ASSERT(!conditional);
# Line 5292  if (*cc == OP_BRAZERO || *cc == OP_BRAMI Line 6076  if (*cc == OP_BRAZERO || *cc == OP_BRAMI
6076    }    }
6077  private_data_ptr = PRIVATE_DATA(cc);  private_data_ptr = PRIVATE_DATA(cc);
6078  SLJIT_ASSERT(private_data_ptr != 0);  SLJIT_ASSERT(private_data_ptr != 0);
6079  framesize = get_framesize(common, cc, FALSE);  framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
6080  backtrack->framesize = framesize;  backtrack->framesize = framesize;
6081  backtrack->private_data_ptr = private_data_ptr;  backtrack->private_data_ptr = private_data_ptr;
6082  opcode = *cc;  opcode = *cc;
# Line 5311  if (bra == OP_BRAMINZERO) Line 6095  if (bra == OP_BRAMINZERO)
6095    
6096  if (framesize < 0)  if (framesize < 0)
6097    {    {
6098    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);    extrasize = needs_control_head ? 2 : 1;
6099    allocate_stack(common, 1);    if (framesize == no_frame)
6100        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);
6101      allocate_stack(common, extrasize);
6102      if (needs_control_head)
6103        OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
6104    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6105      if (needs_control_head)
6106        {
6107        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_IMM, 0);
6108        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
6109        }
6110    }    }
6111  else  else
6112    {    {
6113    allocate_stack(common, framesize + 2);    extrasize = needs_control_head ? 3 : 2;
6114      allocate_stack(common, framesize + extrasize);
6115    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6116    OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(framesize + 1));    OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw));
6117    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0);    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0);
6118      if (needs_control_head)
6119        OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
6120    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6121    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);    if (needs_control_head)
6122    init_frame(common, ccbegin, framesize + 1, 2, FALSE);      {
6123        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
6124        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
6125        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_IMM, 0);
6126        }
6127      else
6128        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
6129      init_frame(common, ccbegin, NULL, framesize + extrasize - 1, extrasize, FALSE);
6130    }    }
6131    
6132  memset(&altbacktrack, 0, sizeof(backtrack_common));  memset(&altbacktrack, 0, sizeof(backtrack_common));
6133  common->quit_label = NULL;  if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6134  common->quit = NULL;    {
6135      /* Negative assert is stronger than positive assert. */
6136      common->local_exit = TRUE;
6137      common->quit_label = NULL;
6138      common->quit = NULL;
6139      common->positive_assert = FALSE;
6140      }
6141    else
6142      common->positive_assert = TRUE;
6143    common->positive_assert_quit = NULL;
6144    
6145  while (1)  while (1)
6146    {    {
6147    common->accept_label = NULL;    common->accept_label = NULL;
# Line 5343  while (1) Line 6156  while (1)
6156    compile_matchingpath(common, ccbegin + 1 + LINK_SIZE, cc, &altbacktrack);    compile_matchingpath(common, ccbegin + 1 + LINK_SIZE, cc, &altbacktrack);
6157    if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))    if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6158      {      {
6159      common->quit_label = save_quit_label;      if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6160          {
6161          common->local_exit = save_local_exit;
6162          common->quit_label = save_quit_label;
6163          common->quit = save_quit;
6164          }
6165        common->positive_assert = save_positive_assert;
6166        common->then_trap = save_then_trap;
6167      common->accept_label = save_accept_label;      common->accept_label = save_accept_label;
6168      common->quit = save_quit;      common->positive_assert_quit = save_positive_assert_quit;
6169      common->accept = save_accept;      common->accept = save_accept;
6170      return NULL;      return NULL;
6171      }      }
# Line 5355  while (1) Line 6175  while (1)
6175    
6176    /* Reset stack. */    /* Reset stack. */
6177    if (framesize < 0)    if (framesize < 0)
6178      OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);      {
6179    else {      if (framesize == no_frame)
6180          OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6181        else
6182          free_stack(common, extrasize);
6183        if (needs_control_head)
6184          OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), 0);
6185        }
6186      else
6187        {
6188      if ((opcode != OP_ASSERT_NOT && opcode != OP_ASSERTBACK_NOT) || conditional)      if ((opcode != OP_ASSERT_NOT && opcode != OP_ASSERTBACK_NOT) || conditional)
6189        {        {
6190        /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */        /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
6191        OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));        OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
6192          if (needs_control_head)
6193            OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), 0);
6194        }        }
6195      else      else
6196        {        {
6197        OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);        OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6198          if (needs_control_head)
6199            OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), (framesize + 1) * sizeof(sljit_sw));
6200        add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));        add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6201        }        }
6202    }      }
6203    
6204    if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)    if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6205      {      {
6206      /* We know that STR_PTR was stored on the top of the stack. */      /* We know that STR_PTR was stored on the top of the stack. */
6207      if (conditional)      if (conditional)
6208        OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);        OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), needs_control_head ? sizeof(sljit_sw) : 0);
6209      else if (bra == OP_BRAZERO)      else if (bra == OP_BRAZERO)
6210        {        {
6211        if (framesize < 0)        if (framesize < 0)
6212          OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);          OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 1) * sizeof(sljit_sw));
6213        else        else
6214          {          {
6215          OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));          OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
6216          OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (framesize + 1) * sizeof(sljit_sw));          OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (framesize + extrasize - 1) * sizeof(sljit_sw));
6217          OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);          OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
6218          }          }
6219        OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));        OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
# Line 5398  while (1) Line 6230  while (1)
6230    compile_backtrackingpath(common, altbacktrack.top);    compile_backtrackingpath(common, altbacktrack.top);
6231    if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))    if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6232      {      {
6233      common->quit_label = save_quit_label;      if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6234          {
6235          common->local_exit = save_local_exit;
6236          common->quit_label = save_quit_label;
6237          common->quit = save_quit;
6238          }
6239        common->positive_assert = save_positive_assert;
6240        common->then_trap = save_then_trap;
6241      common->accept_label = save_accept_label;      common->accept_label = save_accept_label;
6242      common->quit = save_quit;      common->positive_assert_quit = save_positive_assert_quit;
6243      common->accept = save_accept;      common->accept = save_accept;
6244      return NULL;      return NULL;
6245      }      }
# Line 5412  while (1) Line 6251  while (1)
6251    ccbegin = cc;    ccbegin = cc;
6252    cc += GET(cc, 1);    cc += GET(cc, 1);
6253    }    }
6254    
6255    if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6256      {
6257      SLJIT_ASSERT(common->positive_assert_quit == NULL);
6258      /* Makes the check less complicated below. */
6259      common->positive_assert_quit = common->quit;
6260      }
6261    
6262  /* None of them matched. */  /* None of them matched. */
6263  if (common->quit != NULL)  if (common->positive_assert_quit != NULL)
6264    set_jumps(common->quit, LABEL());    {
6265      jump = JUMP(SLJIT_JUMP);
6266      set_jumps(common->positive_assert_quit, LABEL());
6267      SLJIT_ASSERT(framesize != no_stack);
6268      if (framesize < 0)
6269        OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, extrasize * sizeof(sljit_sw));
6270      else
6271        {
6272        OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6273        add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6274        OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw));
6275        }
6276      JUMPHERE(jump);
6277      }
6278    
6279    if (needs_control_head)
6280      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(1));
6281    
6282  if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK)  if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK)
6283    {    {
# Line 5426  if (opcode == OP_ASSERT || opcode == OP_ Line 6289  if (opcode == OP_ASSERT || opcode == OP_
6289      {      {
6290      /* The topmost item should be 0. */      /* The topmost item should be 0. */
6291      if (bra == OP_BRAZERO)      if (bra == OP_BRAZERO)
6292          {
6293          if (extrasize == 2)
6294            free_stack(common, 1);
6295        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6296          }
6297      else      else
6298        free_stack(common, 1);        free_stack(common, extrasize);
6299      }      }
6300    else    else
6301      {      {
6302      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
6303      /* The topmost item should be 0. */      /* The topmost item should be 0. */
6304      if (bra == OP_BRAZERO)      if (bra == OP_BRAZERO)
6305        {        {
6306        free_stack(common, framesize + 1);        free_stack(common, framesize + extrasize - 1);
6307        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6308        }        }
6309      else      else
6310        free_stack(common, framesize + 2);        free_stack(common, framesize + extrasize);
6311      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
6312      }      }
6313    jump = JUMP(SLJIT_JUMP);    jump = JUMP(SLJIT_JUMP);
# Line 5452  if (opcode == OP_ASSERT || opcode == OP_ Line 6319  if (opcode == OP_ASSERT || opcode == OP_
6319    if (framesize < 0)    if (framesize < 0)
6320      {      {
6321      /* We know that STR_PTR was stored on the top of the stack. */      /* We know that STR_PTR was stored on the top of the stack. */
6322      OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);      OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 1) * sizeof(sljit_sw));
6323      /* Keep the STR_PTR on the top of the stack. */      /* Keep the STR_PTR on the top of the stack. */
6324      if (bra == OP_BRAZERO)      if (bra == OP_BRAZERO)
6325          {
6326        OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));        OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
6327          if (extrasize == 2)
6328            OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6329          }
6330      else if (bra == OP_BRAMINZERO)      else if (bra == OP_BRAMINZERO)
6331        {        {
6332        OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));        OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
# Line 5468  if (opcode == OP_ASSERT || opcode == OP_ Line 6339  if (opcode == OP_ASSERT || opcode == OP_
6339        {        {
6340        /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */        /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
6341        OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));        OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
6342        OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);        OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 2) * sizeof(sljit_sw));
6343        }        }
6344      else      else
6345        {        {
6346        /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */        /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
6347        OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 2) * sizeof(sljit_sw));        OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 2) * sizeof(sljit_sw));
6348        OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));        if (extrasize == 2)
6349        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), bra == OP_BRAZERO ? STR_PTR : SLJIT_IMM, 0);          {
6350            OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6351            if (bra == OP_BRAMINZERO)
6352              OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6353            }
6354          else
6355            {
6356            OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
6357            OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), bra == OP_BRAZERO ? STR_PTR : SLJIT_IMM, 0);
6358            }
6359        }        }
6360      }      }
6361    
# Line 5504  else Line 6384  else
6384      {      {
6385      OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));      OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6386      if (bra != OP_BRA)      if (bra != OP_BRA)
6387          {
6388          if (extrasize == 2)
6389            free_stack(common, 1);
6390        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6391          }
6392      else      else
6393        free_stack(common, 1);        free_stack(common, extrasize);
6394      }      }
6395    else    else
6396      {      {
6397      OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));      OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6398      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
6399      /* The topmost item should be 0. */      /* The topmost item should be 0. */
6400      if (bra != OP_BRA)      if (bra != OP_BRA)
6401        {        {
6402        free_stack(common, framesize + 1);        free_stack(common, framesize + extrasize - 1);
6403        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6404        }        }
6405      else      else
6406        free_stack(common, framesize + 2);        free_stack(common, framesize + extrasize);
6407      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
6408      }      }
6409    
# Line 5539  else Line 6423  else
6423      }      }
6424    }    }
6425    
6426  common->quit_label = save_quit_label;  if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6427      {
6428      common->local_exit = save_local_exit;
6429      common->quit_label = save_quit_label;
6430      common->quit = save_quit;
6431      }
6432    common->positive_assert = save_positive_assert;
6433    common->then_trap = save_then_trap;
6434  common->accept_label = save_accept_label;  common->accept_label = save_accept_label;
6435  common->quit = save_quit;  common->positive_assert_quit = save_positive_assert_quit;
6436  common->accept = save_accept;  common->accept = save_accept;
6437  return cc + 1 + LINK_SIZE;  return cc + 1 + LINK_SIZE;
6438  }  }
6439    
6440  static sljit_sw SLJIT_CALL do_searchovector(sljit_uw refno, sljit_sw* locals, pcre_uchar *name_table)  static SLJIT_INLINE void match_once_common(compiler_common *common, pcre_uchar ket, int framesize, int private_data_ptr, BOOL has_alternatives, BOOL needs_control_head)
6441  {  {
6442  int condition = FALSE;  DEFINE_COMPILER;
6443  pcre_uchar *slotA = name_table;  int stacksize;
 pcre_uchar *slotB;  
 sljit_sw name_count = locals[LOCALS0 / sizeof(sljit_sw)];  
 sljit_sw name_entry_size = locals[LOCALS1 / sizeof(sljit_sw)];  
 sljit_sw no_capture;  
 int i;  
   
 locals += refno & 0xff;  
 refno >>= 8;  
 no_capture = locals[1];  
6444    
6445  for (i = 0; i < name_count; i++)  if (framesize < 0)
6446    {    {
6447    if (GET2(slotA, 0) == refno) break;    if (framesize == no_frame)
6448    slotA += name_entry_size;      OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6449    }    else
6450        {
6451        stacksize = needs_control_head ? 1 : 0;
6452        if (ket != OP_KET || has_alternatives)
6453          stacksize++;
6454        free_stack(common, stacksize);
6455        }
6456    
6457  if (i < name_count)    if (needs_control_head)
6458    {      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), (ket != OP_KET || has_alternatives) ? sizeof(sljit_sw) : 0);
   /* Found a name for the number - there can be only one; duplicate names  
   for different numbers are allowed, but not vice versa. First scan down  
   for duplicates. */  
6459    
6460    slotB = slotA;    /* TMP2 which is set here used by OP_KETRMAX below. */
6461    while (slotB > name_table)    if (ket == OP_KETRMAX)
6462        OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), 0);
6463      else if (ket == OP_KETRMIN)
6464      {      {
6465      slotB -= name_entry_size;      /* Move the STR_PTR to the private_data_ptr. */
6466      if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), 0);
       {  
       condition = locals[GET2(slotB, 0) << 1] != no_capture;  
       if (condition) break;  
       }  
     else break;  
6467      }      }
6468      }
6469    else
6470      {
6471      stacksize = (ket != OP_KET || has_alternatives) ? 2 : 1;
6472      OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + stacksize) * sizeof(sljit_sw));
6473      if (needs_control_head)
6474        OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), 0);
6475    
6476    /* Scan up for duplicates */    if (ket == OP_KETRMAX)
   if (!condition)  
6477      {      {
6478      slotB = slotA;      /* TMP2 which is set here used by OP_KETRMAX below. */
6479      for (i++; i < name_count; i++)      OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
       {  
       slotB += name_entry_size;  
       if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)  
         {  
         condition = locals[GET2(slotB, 0) << 1] != no_capture;  
         if (condition) break;  
         }  
       else break;  
       }  
6480      }      }
6481    }    }
6482  return condition;  if (needs_control_head)
6483      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, TMP1, 0);
6484  }  }
6485    
6486  static sljit_sw SLJIT_CALL do_searchgroups(sljit_uw recno, sljit_uw* locals, pcre_uchar *name_table)  static SLJIT_INLINE int match_capture_common(compiler_common *common, int stacksize, int offset, int private_data_ptr)
6487  {  {
6488  int condition = FALSE;  DEFINE_COMPILER;
 pcre_uchar *slotA = name_table;  
 pcre_uchar *slotB;  
 sljit_uw name_count = locals[LOCALS0 / sizeof(sljit_sw)];  
 sljit_uw name_entry_size = locals[LOCALS1 / sizeof(sljit_sw)];  
 sljit_uw group_num = locals[POSSESSIVE0 / sizeof(sljit_sw)];  
 sljit_uw i;  
6489    
6490  for (i = 0; i < name_count; i++)  if (common->capture_last_ptr != 0)
6491    {    {
6492    if (GET2(slotA, 0) == recno) break;    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
6493    slotA += name_entry_size;    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
6494      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
6495      stacksize++;
6496    }    }
6497    if (common->optimized_cbracket[offset >> 1] == 0)
 if (i < name_count)  
6498    {    {
6499    /* Found a name for the number - there can be only one; duplicate    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
6500    names for different numbers are allowed, but not vice versa. First    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
6501    scan down for duplicates. */    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
6502      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6503    slotB = slotA;    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
6504    while (slotB > name_table)    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
6505      {    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
6506      slotB -= name_entry_size;    stacksize += 2;
     if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)  
       {  
       condition = GET2(slotB, 0) == group_num;  
       if (condition) break;  
       }  
     else break;  
     }  
   
   /* Scan up for duplicates */  
   if (!condition)  
     {  
     slotB = slotA;  
     for (i++; i < name_count; i++)  
       {  
       slotB += name_entry_size;  
       if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)  
         {  
         condition = GET2(slotB, 0) == group_num;  
         if (condition) break;  
         }  
       else break;  
       }  
     }  
6507    }    }
6508  return condition;  return stacksize;
6509  }  }
6510    
6511  /*  /*
# Line 5717  backtrack_common *backtrack; Line 6569  backtrack_common *backtrack;
6569  pcre_uchar opcode;  pcre_uchar opcode;
6570  int private_data_ptr = 0;  int private_data_ptr = 0;
6571  int offset = 0;  int offset = 0;
6572  int stacksize;  int i, stacksize;
6573    int repeat_ptr = 0, repeat_length = 0;
6574    int repeat_type = 0, repeat_count = 0;
6575  pcre_uchar *ccbegin;  pcre_uchar *ccbegin;
6576  pcre_uchar *matchingpath;  pcre_uchar *matchingpath;
6577    pcre_uchar *slot;
6578  pcre_uchar bra = OP_BRA;  pcre_uchar bra = OP_BRA;
6579  pcre_uchar ket;  pcre_uchar ket;
6580  assert_backtrack *assert;  assert_backtrack *assert;
6581  BOOL has_alternatives;  BOOL has_alternatives;
6582    BOOL needs_control_head = FALSE;
6583  struct sljit_jump *jump;  struct sljit_jump *jump;
6584  struct sljit_jump *skip;  struct sljit_jump *skip;
6585  struct sljit_label *rmaxlabel = NULL;  struct sljit_label *rmax_label = NULL;
6586  struct sljit_jump *braminzerojump = NULL;  struct sljit_jump *braminzero = NULL;
6587    
6588  PUSH_BACKTRACK(sizeof(bracket_backtrack), cc, NULL);  PUSH_BACKTRACK(sizeof(bracket_backtrack), cc, NULL);
6589    
# Line 5740  if (*cc == OP_BRAZERO || *cc == OP_BRAMI Line 6596  if (*cc == OP_BRAZERO || *cc == OP_BRAMI
6596    
6597  opcode = *cc;  opcode = *cc;
6598  ccbegin = cc;  ccbegin = cc;
6599  matchingpath = ccbegin + 1 + LINK_SIZE;  matchingpath = bracketend(cc) - 1 - LINK_SIZE;
6600    ket = *matchingpath;
6601    if (ket == OP_KET && PRIVATE_DATA(matchingpath) != 0)
6602      {
6603      repeat_ptr = PRIVATE_DATA(matchingpath);
6604      repeat_length = PRIVATE_DATA(matchingpath + 1);
6605      repeat_type = PRIVATE_DATA(matchingpath + 2);
6606      repeat_count = PRIVATE_DATA(matchingpath + 3);
6607      SLJIT_ASSERT(repeat_length != 0 && repeat_type != 0 && repeat_count != 0);
6608      if (repeat_type == OP_UPTO)
6609        ket = OP_KETRMAX;
6610      if (repeat_type == OP_MINUPTO)
6611        ket = OP_KETRMIN;
6612      }
6613    
6614  if ((opcode == OP_COND || opcode == OP_SCOND) && cc[1 + LINK_SIZE] == OP_DEF)  if ((opcode == OP_COND || opcode == OP_SCOND) && cc[1 + LINK_SIZE] == OP_DEF)
6615    {    {
6616    /* Drop this bracket_backtrack. */    /* Drop this bracket_backtrack. */
6617    parent->top = backtrack->prev;    parent->top = backtrack->prev;
6618    return bracketend(cc);    return matchingpath + 1 + LINK_SIZE + repeat_length;
6619    }    }
6620    
6621  ket = *(bracketend(cc) - 1 - LINK_SIZE);  matchingpath = ccbegin + 1 + LINK_SIZE;
6622  SLJIT_ASSERT(ket == OP_KET || ket == OP_KETRMAX || ket == OP_KETRMIN);  SLJIT_ASSERT(ket == OP_KET || ket == OP_KETRMAX || ket == OP_KETRMIN);
6623  SLJIT_ASSERT(!((bra == OP_BRAZERO && ket == OP_KETRMIN) || (bra == OP_BRAMINZERO && ket == OP_KETRMAX)));  SLJIT_ASSERT(!((bra == OP_BRAZERO && ket == OP_KETRMIN) || (bra == OP_BRAMINZERO && ket == OP_KETRMAX)));
6624  cc += GET(cc, 1);  cc += GET(cc, 1);
6625    
6626  has_alternatives = *cc == OP_ALT;  has_alternatives = *cc == OP_ALT;
6627  if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))  if (SLJIT_UNLIKELY(opcode == OP_COND || opcode == OP_SCOND))
6628    {    has_alternatives = (*matchingpath == OP_RREF || *matchingpath == OP_DNRREF) ? FALSE : TRUE;
   has_alternatives = (*matchingpath == OP_RREF) ? FALSE : TRUE;  
   if (*matchingpath == OP_NRREF)  
     {  
     stacksize = GET2(matchingpath, 1);  
     if (common->currententry == NULL || stacksize == RREF_ANY)  
       has_alternatives = FALSE;  
     else if (common->currententry->start == 0)  
       has_alternatives = stacksize != 0;  
     else  
       has_alternatives = stacksize != (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);  
     }  
   }  
6629    
6630  if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))  if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
6631    opcode = OP_SCOND;    opcode = OP_SCOND;
# Line 5799  else if (opcode == OP_ONCE || opcode == Line 6656  else if (opcode == OP_ONCE || opcode ==
6656    SLJIT_ASSERT(private_data_ptr != 0);    SLJIT_ASSERT(private_data_ptr != 0);
6657    BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;    BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
6658    if (opcode == OP_ONCE)    if (opcode == OP_ONCE)
6659      BACKTRACK_AS(bracket_backtrack)->u.framesize = get_framesize(common, ccbegin, FALSE);      BACKTRACK_AS(bracket_backtrack)->u.framesize = get_framesize(common, ccbegin, NULL, FALSE, &needs_control_head);
6660    }    }
6661    
6662  /* Instructions before the first alternative. */  /* Instructions before the first alternative. */
6663  stacksize = 0;  stacksize = 0;
6664  if ((ket == OP_KETRMAX) || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))  if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
6665    stacksize++;    stacksize++;
6666  if (bra == OP_BRAZERO)  if (bra == OP_BRAZERO)
6667    stacksize++;    stacksize++;
# Line 5813  if (stacksize > 0) Line 6670  if (stacksize > 0)
6670    allocate_stack(common, stacksize);    allocate_stack(common, stacksize);
6671    
6672  stacksize = 0;  stacksize = 0;
6673  if ((ket == OP_KETRMAX) || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))  if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
6674    {    {
6675    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
6676    stacksize++;    stacksize++;
# Line 5829  if (bra == OP_BRAMINZERO) Line 6686  if (bra == OP_BRAMINZERO)
6686    if (ket != OP_KETRMIN)    if (ket != OP_KETRMIN)
6687      {      {
6688      free_stack(common, 1);      free_stack(common, 1);
6689      braminzerojump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);      braminzero = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
6690      }      }
6691    else    else
6692      {      {
# Line 5844  if (bra == OP_BRAMINZERO) Line 6701  if (bra == OP_BRAMINZERO)
6701        if (opcode != OP_ONCE || BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)        if (opcode != OP_ONCE || BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
6702          {          {
6703          /* When we come from outside, private_data_ptr contains the previous STR_PTR. */          /* When we come from outside, private_data_ptr contains the previous STR_PTR. */
6704          braminzerojump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);          braminzero = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6705          }          }
6706        else        else
6707          {          {
6708          /* Except when the whole stack frame must be saved. */          /* Except when the whole stack frame must be saved. */
6709          OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);          OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6710          braminzerojump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), (BACKTRACK_AS(bracket_backtrack)->u.framesize + 1) * sizeof(sljit_sw));          braminzero = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), (BACKTRACK_AS(bracket_backtrack)->u.framesize + 1) * sizeof(sljit_sw));
6711          }          }
6712        JUMPHERE(skip);        JUMPHERE(skip);
6713        }        }
# Line 5863  if (bra == OP_BRAMINZERO) Line 6720  if (bra == OP_BRAMINZERO)
6720      }      }
6721    }    }
6722    
6723    if (repeat_type != 0)
6724      {
6725      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr, SLJIT_IMM, repeat_count);
6726      if (repeat_type == OP_EXACT)
6727        rmax_label = LABEL();
6728      }
6729    
6730  if (ket == OP_KETRMIN)  if (ket == OP_KETRMIN)
6731    BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();    BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
6732    
6733  if (ket == OP_KETRMAX)  if (ket == OP_KETRMAX)
6734    {    {
6735    rmaxlabel = LABEL();    rmax_label = LABEL();
6736    if (has_alternatives && opcode != OP_ONCE && opcode < OP_SBRA)    if (has_alternatives && opcode != OP_ONCE && opcode < OP_SBRA && repeat_type == 0)
6737      BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = rmaxlabel;      BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = rmax_label;
6738    }    }
6739    
6740  /* Handling capturing brackets and alternatives. */  /* Handling capturing brackets and alternatives. */
6741  if (opcode == OP_ONCE)  if (opcode == OP_ONCE)
6742    {    {
6743      stacksize = 0;
6744      if (needs_control_head)
6745        {
6746        OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
6747        stacksize++;
6748        }
6749    
6750    if (BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)    if (BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
6751      {      {
6752      /* Neither capturing brackets nor recursions are not found in the block. */      /* Neither capturing brackets nor recursions are found in the block. */
6753      if (ket == OP_KETRMIN)      if (ket == OP_KETRMIN)
6754        {        {
6755        OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);        stacksize += 2;
6756        allocate_stack(common, 2);        if (!needs_control_head)
6757        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);          OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);  
       OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));  
6758        }        }
6759      else if (ket == OP_KETRMAX || has_alternatives)      else
6760        {        {
6761        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);        if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame)
6762        allocate_stack(common, 1);          OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);
6763        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);        if (ket == OP_KETRMAX || has_alternatives)
6764            stacksize++;
6765        }        }
6766      else  
6767        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);      if (stacksize > 0)
6768          allocate_stack(common, stacksize);
6769    
6770        stacksize = 0;
6771        if (needs_control_head)
6772          {
6773          stacksize++;
6774          OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
6775          }
6776    
6777        if (ket == OP_KETRMIN)
6778          {
6779          if (needs_control_head)