/[pcre]/code/tags/pcre-8.37/pcre_jit_compile.c
ViewVC logotype

Diff of /code/tags/pcre-8.37/pcre_jit_compile.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1269 by zherczeg, Mon Mar 4 10:47:12 2013 UTC revision 1474 by zherczeg, Thu Apr 24 06:43:50 2014 UTC
# Line 71  system files. */ Line 71  system files. */
71     2 - Enable capture_last_ptr (includes option 1). */     2 - Enable capture_last_ptr (includes option 1). */
72  /* #define DEBUG_FORCE_UNOPTIMIZED_CBRAS 2 */  /* #define DEBUG_FORCE_UNOPTIMIZED_CBRAS 2 */
73    
74    /* 1 - Always have a control head. */
75    /* #define DEBUG_FORCE_CONTROL_HEAD 1 */
76    
77  /* Allocate memory for the regex stack on the real machine stack.  /* Allocate memory for the regex stack on the real machine stack.
78  Fast, but limited size. */  Fast, but limited size. */
79  #define MACHINE_STACK_SIZE 32768  #define MACHINE_STACK_SIZE 32768
# Line 165  typedef struct jit_arguments { Line 168  typedef struct jit_arguments {
168    pcre_uchar *mark_ptr;    pcre_uchar *mark_ptr;
169    void *callout_data;    void *callout_data;
170    /* Everything else after. */    /* Everything else after. */
171      pcre_uint32 limit_match;
172    int real_offset_count;    int real_offset_count;
173    int offset_count;    int offset_count;
   int call_limit;  
174    pcre_uint8 notbol;    pcre_uint8 notbol;
175    pcre_uint8 noteol;    pcre_uint8 noteol;
176    pcre_uint8 notempty;    pcre_uint8 notempty;
# Line 176  typedef struct jit_arguments { Line 179  typedef struct jit_arguments {
179    
180  typedef struct executable_functions {  typedef struct executable_functions {
181    void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES];    void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES];
182      sljit_uw *read_only_data[JIT_NUMBER_OF_COMPILE_MODES];
183      sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];
184    PUBL(jit_callback) callback;    PUBL(jit_callback) callback;
185    void *userdata;    void *userdata;
186    pcre_uint32 top_bracket;    pcre_uint32 top_bracket;
187    sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];    pcre_uint32 limit_match;
188  } executable_functions;  } executable_functions;
189    
190  typedef struct jump_list {  typedef struct jump_list {
# Line 193  typedef struct stub_list { Line 198  typedef struct stub_list {
198    struct stub_list *next;    struct stub_list *next;
199  } stub_list;  } stub_list;
200    
201  enum frame_types { no_frame = -1, no_stack = -2 };  typedef struct label_addr_list {
202      struct sljit_label *label;
203      sljit_uw *addr;
204      struct label_addr_list *next;
205    } label_addr_list;
206    
207    enum frame_types {
208      no_frame = -1,
209      no_stack = -2
210    };
211    
212    enum control_types {
213      type_mark = 0,
214      type_then_trap = 1
215    };
216    
217  typedef int (SLJIT_CALL *jit_function)(jit_arguments *args);  typedef int (SLJIT_CALL *jit_function)(jit_arguments *args);
218    
219  /* The following structure is the key data type for the recursive  /* The following structure is the key data type for the recursive
220  code generator. It is allocated by compile_matchingpath, and contains  code generator. It is allocated by compile_matchingpath, and contains
221  the aguments for compile_backtrackingpath. Must be the first member  the arguments for compile_backtrackingpath. Must be the first member
222  of its descendants. */  of its descendants. */
223  typedef struct backtrack_common {  typedef struct backtrack_common {
224    /* Concatenation stack. */    /* Concatenation stack. */
# Line 215  typedef struct backtrack_common { Line 234  typedef struct backtrack_common {
234  typedef struct assert_backtrack {  typedef struct assert_backtrack {
235    backtrack_common common;    backtrack_common common;
236    jump_list *condfailed;    jump_list *condfailed;
237    /* Less than 0 (-1) if a frame is not needed. */    /* Less than 0 if a frame is not needed. */
238    int framesize;    int framesize;
239    /* Points to our private memory word on the stack. */    /* Points to our private memory word on the stack. */
240    int private_data_ptr;    int private_data_ptr;
# Line 236  typedef struct bracket_backtrack { Line 255  typedef struct bracket_backtrack {
255      /* Both for OP_COND, OP_SCOND. */      /* Both for OP_COND, OP_SCOND. */
256      jump_list *condfailed;      jump_list *condfailed;
257      assert_backtrack *assert;      assert_backtrack *assert;
258      /* For OP_ONCE. -1 if not needed. */      /* For OP_ONCE. Less than 0 if not needed. */
259      int framesize;      int framesize;
260    } u;    } u;
261    /* Points to our private memory word on the stack. */    /* Points to our private memory word on the stack. */
# Line 271  typedef struct recurse_entry { Line 290  typedef struct recurse_entry {
290    /* Collects the calls until the function is not created. */    /* Collects the calls until the function is not created. */
291    jump_list *calls;    jump_list *calls;
292    /* Points to the starting opcode. */    /* Points to the starting opcode. */
293    int start;    sljit_sw start;
294  } recurse_entry;  } recurse_entry;
295    
296  typedef struct recurse_backtrack {  typedef struct recurse_backtrack {
# Line 279  typedef struct recurse_backtrack { Line 298  typedef struct recurse_backtrack {
298    BOOL inlined_pattern;    BOOL inlined_pattern;
299  } recurse_backtrack;  } recurse_backtrack;
300    
301  #define MAX_RANGE_SIZE 6  #define OP_THEN_TRAP OP_TABLE_LENGTH
302    
303    typedef struct then_trap_backtrack {
304      backtrack_common common;
305      /* If then_trap is not NULL, this structure contains the real
306      then_trap for the backtracking path. */
307      struct then_trap_backtrack *then_trap;
308      /* Points to the starting opcode. */
309      sljit_sw start;
310      /* Exit point for the then opcodes of this alternative. */
311      jump_list *quit;
312      /* Frame size of the current alternative. */
313      int framesize;
314    } then_trap_backtrack;
315    
316    #define MAX_RANGE_SIZE 4
317    
318  typedef struct compiler_common {  typedef struct compiler_common {
319      /* The sljit ceneric compiler. */
320    struct sljit_compiler *compiler;    struct sljit_compiler *compiler;
321      /* First byte code. */
322    pcre_uchar *start;    pcre_uchar *start;
   
323    /* Maps private data offset to each opcode. */    /* Maps private data offset to each opcode. */
324    int *private_data_ptrs;    sljit_si *private_data_ptrs;
325      /* This read-only data is available during runtime. */
326      sljit_uw *read_only_data;
327      /* The total size of the read-only data. */
328      sljit_uw read_only_data_size;
329      /* The next free entry of the read_only_data. */
330      sljit_uw *read_only_data_ptr;
331    /* Tells whether the capturing bracket is optimized. */    /* Tells whether the capturing bracket is optimized. */
332    pcre_uint8 *optimized_cbracket;    pcre_uint8 *optimized_cbracket;
333      /* Tells whether the starting offset is a target of then. */
334      pcre_uint8 *then_offsets;
335      /* Current position where a THEN must jump. */
336      then_trap_backtrack *then_trap;
337    /* Starting offset of private data for capturing brackets. */    /* Starting offset of private data for capturing brackets. */
338    int cbraptr;    int cbra_ptr;
339    /* OVector starting point. Must be divisible by 2. */    /* Output vector starting point. Must be divisible by 2. */
340    int ovector_start;    int ovector_start;
341    /* Last known position of the requested byte. */    /* Last known position of the requested byte. */
342    int req_char_ptr;    int req_char_ptr;
# Line 305  typedef struct compiler_common { Line 350  typedef struct compiler_common {
350    int first_line_end;    int first_line_end;
351    /* Points to the marked string. */    /* Points to the marked string. */
352    int mark_ptr;    int mark_ptr;
353      /* Recursive control verb management chain. */
354      int control_head_ptr;
355    /* Points to the last matched capture block index. */    /* Points to the last matched capture block index. */
356    int capture_last_ptr;    int capture_last_ptr;
357      /* Points to the starting position of the current match. */
358      int start_ptr;
359    
360    /* Flipped and lower case tables. */    /* Flipped and lower case tables. */
361    const pcre_uint8 *fcc;    const pcre_uint8 *fcc;
362    sljit_sw lcc;    sljit_sw lcc;
363    /* Mode can be PCRE_STUDY_JIT_COMPILE and others. */    /* Mode can be PCRE_STUDY_JIT_COMPILE and others. */
364    int mode;    int mode;
365      /* TRUE, when minlength is greater than 0. */
366      BOOL might_be_empty;
367      /* \K is found in the pattern. */
368      BOOL has_set_som;
369      /* (*SKIP:arg) is found in the pattern. */
370      BOOL has_skip_arg;
371      /* (*THEN) is found in the pattern. */
372      BOOL has_then;
373      /* Needs to know the start position anytime. */
374      BOOL needs_start_ptr;
375      /* Currently in recurse or negative assert. */
376      BOOL local_exit;
377      /* Currently in a positive assert. */
378      BOOL positive_assert;
379    /* Newline control. */    /* Newline control. */
380    int nltype;    int nltype;
381      pcre_uint32 nlmax;
382      pcre_uint32 nlmin;
383    int newline;    int newline;
384    int bsr_nltype;    int bsr_nltype;
385      pcre_uint32 bsr_nlmax;
386      pcre_uint32 bsr_nlmin;
387    /* Dollar endonly. */    /* Dollar endonly. */
388    int endonly;    int endonly;
   BOOL has_set_som;  
389    /* Tables. */    /* Tables. */
390    sljit_sw ctypes;    sljit_sw ctypes;
   int digits[2 + MAX_RANGE_SIZE];  
391    /* Named capturing brackets. */    /* Named capturing brackets. */
392    sljit_uw name_table;    pcre_uchar *name_table;
393    sljit_sw name_count;    sljit_sw name_count;
394    sljit_sw name_entry_size;    sljit_sw name_entry_size;
395    
# Line 333  typedef struct compiler_common { Line 398  typedef struct compiler_common {
398    struct sljit_label *quit_label;    struct sljit_label *quit_label;
399    struct sljit_label *forced_quit_label;    struct sljit_label *forced_quit_label;
400    struct sljit_label *accept_label;    struct sljit_label *accept_label;
401      struct sljit_label *ff_newline_shortcut;
402    stub_list *stubs;    stub_list *stubs;
403      label_addr_list *label_addrs;
404    recurse_entry *entries;    recurse_entry *entries;
405    recurse_entry *currententry;    recurse_entry *currententry;
406    jump_list *partialmatch;    jump_list *partialmatch;
407    jump_list *quit;    jump_list *quit;
408      jump_list *positive_assert_quit;
409    jump_list *forced_quit;    jump_list *forced_quit;
410    jump_list *accept;    jump_list *accept;
411    jump_list *calllimit;    jump_list *calllimit;
# Line 349  typedef struct compiler_common { Line 417  typedef struct compiler_common {
417    jump_list *vspace;    jump_list *vspace;
418    jump_list *casefulcmp;    jump_list *casefulcmp;
419    jump_list *caselesscmp;    jump_list *caselesscmp;
420      jump_list *reset_match;
421    BOOL jscript_compat;    BOOL jscript_compat;
422  #ifdef SUPPORT_UTF  #ifdef SUPPORT_UTF
423    BOOL utf;    BOOL utf;
424  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
425    BOOL use_ucp;    BOOL use_ucp;
426  #endif  #endif
 #ifndef COMPILE_PCRE32  
   jump_list *utfreadchar;  
 #endif  
427  #ifdef COMPILE_PCRE8  #ifdef COMPILE_PCRE8
428      jump_list *utfreadchar;
429      jump_list *utfreadchar16;
430    jump_list *utfreadtype8;    jump_list *utfreadtype8;
431  #endif  #endif
432  #endif /* SUPPORT_UTF */  #endif /* SUPPORT_UTF */
# Line 415  typedef struct compare_context { Line 483  typedef struct compare_context {
483  #define STACK_TOP     SLJIT_SCRATCH_REG2  #define STACK_TOP     SLJIT_SCRATCH_REG2
484  #define STACK_LIMIT   SLJIT_SAVED_REG3  #define STACK_LIMIT   SLJIT_SAVED_REG3
485  #define ARGUMENTS     SLJIT_SAVED_EREG1  #define ARGUMENTS     SLJIT_SAVED_EREG1
486  #define CALL_COUNT    SLJIT_SAVED_EREG2  #define COUNT_MATCH   SLJIT_SAVED_EREG2
487  #define RETURN_ADDR   SLJIT_TEMPORARY_EREG1  #define RETURN_ADDR   SLJIT_TEMPORARY_EREG1
488    
489  /* Local space layout. */  /* Local space layout. */
# Line 426  typedef struct compare_context { Line 494  typedef struct compare_context {
494  #define POSSESSIVE0      (2 * sizeof(sljit_sw))  #define POSSESSIVE0      (2 * sizeof(sljit_sw))
495  #define POSSESSIVE1      (3 * sizeof(sljit_sw))  #define POSSESSIVE1      (3 * sizeof(sljit_sw))
496  /* Max limit of recursions. */  /* Max limit of recursions. */
497  #define CALL_LIMIT       (4 * sizeof(sljit_sw))  #define LIMIT_MATCH      (4 * sizeof(sljit_sw))
498  /* The output vector is stored on the stack, and contains pointers  /* The output vector is stored on the stack, and contains pointers
499  to characters. The vector data is divided into two groups: the first  to characters. The vector data is divided into two groups: the first
500  group contains the start / end character pointers, and the second is  group contains the start / end character pointers, and the second is
501  the start pointers when the end of the capturing group has not yet reached. */  the start pointers when the end of the capturing group has not yet reached. */
502  #define OVECTOR_START    (common->ovector_start)  #define OVECTOR_START    (common->ovector_start)
503  #define OVECTOR(i)       (OVECTOR_START + (i) * sizeof(sljit_sw))  #define OVECTOR(i)       (OVECTOR_START + (i) * (sljit_sw)sizeof(sljit_sw))
504  #define OVECTOR_PRIV(i)  (common->cbraptr + (i) * sizeof(sljit_sw))  #define OVECTOR_PRIV(i)  (common->cbra_ptr + (i) * (sljit_sw)sizeof(sljit_sw))
505  #define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start])  #define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start])
506    
507  #if defined COMPILE_PCRE8  #if defined COMPILE_PCRE8
# Line 475  the start pointers when the end of the c Line 543  the start pointers when the end of the c
543  #define GET_LOCAL_BASE(dst, dstw, offset) \  #define GET_LOCAL_BASE(dst, dstw, offset) \
544    sljit_get_local_base(compiler, (dst), (dstw), (offset))    sljit_get_local_base(compiler, (dst), (dstw), (offset))
545    
546    #define READ_CHAR_MAX 0x7fffffff
547    
548  static pcre_uchar* bracketend(pcre_uchar* cc)  static pcre_uchar* bracketend(pcre_uchar* cc)
549  {  {
550  SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));  SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
# Line 484  cc += 1 + LINK_SIZE; Line 554  cc += 1 + LINK_SIZE;
554  return cc;  return cc;
555  }  }
556    
557    static int no_alternatives(pcre_uchar* cc)
558    {
559    int count = 0;
560    SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
561    do
562      {
563      cc += GET(cc, 1);
564      count++;
565      }
566    while (*cc == OP_ALT);
567    SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
568    return count;
569    }
570    
571    static int ones_in_half_byte[16] = {
572      /* 0 */ 0, 1, 1, 2, /* 4 */ 1, 2, 2, 3,
573      /* 8 */ 1, 2, 2, 3, /* 12 */ 2, 3, 3, 4
574    };
575    
576  /* Functions whose might need modification for all new supported opcodes:  /* Functions whose might need modification for all new supported opcodes:
577   next_opcode   next_opcode
578   get_private_data_length   check_opcode_types
579   set_private_data_ptrs   set_private_data_ptrs
580   get_framesize   get_framesize
581   init_frame   init_frame
582   get_private_data_length_for_copy   get_private_data_copy_length
583   copy_private_data   copy_private_data
584   compile_matchingpath   compile_matchingpath
585   compile_backtrackingpath   compile_backtrackingpath
# Line 536  switch(*cc) Line 625  switch(*cc)
625    case OP_CRMINQUERY:    case OP_CRMINQUERY:
626    case OP_CRRANGE:    case OP_CRRANGE:
627    case OP_CRMINRANGE:    case OP_CRMINRANGE:
628      case OP_CRPOSSTAR:
629      case OP_CRPOSPLUS:
630      case OP_CRPOSQUERY:
631      case OP_CRPOSRANGE:
632    case OP_CLASS:    case OP_CLASS:
633    case OP_NCLASS:    case OP_NCLASS:
634    case OP_REF:    case OP_REF:
635    case OP_REFI:    case OP_REFI:
636      case OP_DNREF:
637      case OP_DNREFI:
638    case OP_RECURSE:    case OP_RECURSE:
639    case OP_CALLOUT:    case OP_CALLOUT:
640    case OP_ALT:    case OP_ALT:
# Line 565  switch(*cc) Line 660  switch(*cc)
660    case OP_SCBRAPOS:    case OP_SCBRAPOS:
661    case OP_SCOND:    case OP_SCOND:
662    case OP_CREF:    case OP_CREF:
663    case OP_NCREF:    case OP_DNCREF:
664    case OP_RREF:    case OP_RREF:
665    case OP_NRREF:    case OP_DNRREF:
666    case OP_DEF:    case OP_DEF:
667    case OP_BRAZERO:    case OP_BRAZERO:
668    case OP_BRAMINZERO:    case OP_BRAMINZERO:
669    case OP_BRAPOSZERO:    case OP_BRAPOSZERO:
670      case OP_PRUNE:
671      case OP_SKIP:
672      case OP_THEN:
673    case OP_COMMIT:    case OP_COMMIT:
674    case OP_FAIL:    case OP_FAIL:
675    case OP_ACCEPT:    case OP_ACCEPT:
# Line 670  switch(*cc) Line 768  switch(*cc)
768  #endif  #endif
769    
770    case OP_MARK:    case OP_MARK:
771      case OP_PRUNE_ARG:
772      case OP_SKIP_ARG:
773      case OP_THEN_ARG:
774    return cc + 1 + 2 + cc[1];    return cc + 1 + 2 + cc[1];
775    
776    default:    default:
777      /* All opcodes are supported now! */
778      SLJIT_ASSERT_STOP();
779    return NULL;    return NULL;
780    }    }
781  }  }
782    
783    static BOOL check_opcode_types(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
784    {
785    int count;
786    pcre_uchar *slot;
787    
788    /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
789    while (cc < ccend)
790      {
791      switch(*cc)
792        {
793        case OP_SET_SOM:
794        common->has_set_som = TRUE;
795        common->might_be_empty = TRUE;
796        cc += 1;
797        break;
798    
799        case OP_REF:
800        case OP_REFI:
801        common->optimized_cbracket[GET2(cc, 1)] = 0;
802        cc += 1 + IMM2_SIZE;
803        break;
804    
805        case OP_BRA:
806        case OP_CBRA:
807        case OP_SBRA:
808        case OP_SCBRA:
809        count = no_alternatives(cc);
810        if (count > 4)
811          common->read_only_data_size += count * sizeof(sljit_uw);
812        cc += 1 + LINK_SIZE + (*cc == OP_CBRA || *cc == OP_SCBRA ? IMM2_SIZE : 0);
813        break;
814    
815        case OP_CBRAPOS:
816        case OP_SCBRAPOS:
817        common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] = 0;
818        cc += 1 + LINK_SIZE + IMM2_SIZE;
819        break;
820    
821        case OP_COND:
822        case OP_SCOND:
823        /* Only AUTO_CALLOUT can insert this opcode. We do
824           not intend to support this case. */
825        if (cc[1 + LINK_SIZE] == OP_CALLOUT)
826          return FALSE;
827        cc += 1 + LINK_SIZE;
828        break;
829    
830        case OP_CREF:
831        common->optimized_cbracket[GET2(cc, 1)] = 0;
832        cc += 1 + IMM2_SIZE;
833        break;
834    
835        case OP_DNREF:
836        case OP_DNREFI:
837        case OP_DNCREF:
838        count = GET2(cc, 1 + IMM2_SIZE);
839        slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
840        while (count-- > 0)
841          {
842          common->optimized_cbracket[GET2(slot, 0)] = 0;
843          slot += common->name_entry_size;
844          }
845        cc += 1 + 2 * IMM2_SIZE;
846        break;
847    
848        case OP_RECURSE:
849        /* Set its value only once. */
850        if (common->recursive_head_ptr == 0)
851          {
852          common->recursive_head_ptr = common->ovector_start;
853          common->ovector_start += sizeof(sljit_sw);
854          }
855        cc += 1 + LINK_SIZE;
856        break;
857    
858        case OP_CALLOUT:
859        if (common->capture_last_ptr == 0)
860          {
861          common->capture_last_ptr = common->ovector_start;
862          common->ovector_start += sizeof(sljit_sw);
863          }
864        cc += 2 + 2 * LINK_SIZE;
865        break;
866    
867        case OP_THEN_ARG:
868        common->has_then = TRUE;
869        common->control_head_ptr = 1;
870        /* Fall through. */
871    
872        case OP_PRUNE_ARG:
873        common->needs_start_ptr = TRUE;
874        /* Fall through. */
875    
876        case OP_MARK:
877        if (common->mark_ptr == 0)
878          {
879          common->mark_ptr = common->ovector_start;
880          common->ovector_start += sizeof(sljit_sw);
881          }
882        cc += 1 + 2 + cc[1];
883        break;
884    
885        case OP_THEN:
886        common->has_then = TRUE;
887        common->control_head_ptr = 1;
888        /* Fall through. */
889    
890        case OP_PRUNE:
891        case OP_SKIP:
892        common->needs_start_ptr = TRUE;
893        cc += 1;
894        break;
895    
896        case OP_SKIP_ARG:
897        common->control_head_ptr = 1;
898        common->has_skip_arg = TRUE;
899        cc += 1 + 2 + cc[1];
900        break;
901    
902        default:
903        cc = next_opcode(common, cc);
904        if (cc == NULL)
905          return FALSE;
906        break;
907        }
908      }
909    return TRUE;
910    }
911    
912    static int get_class_iterator_size(pcre_uchar *cc)
913    {
914    switch(*cc)
915      {
916      case OP_CRSTAR:
917      case OP_CRPLUS:
918      return 2;
919    
920      case OP_CRMINSTAR:
921      case OP_CRMINPLUS:
922      case OP_CRQUERY:
923      case OP_CRMINQUERY:
924      return 1;
925    
926      case OP_CRRANGE:
927      case OP_CRMINRANGE:
928      if (GET2(cc, 1) == GET2(cc, 1 + IMM2_SIZE))
929        return 0;
930      return 2;
931    
932      default:
933      return 0;
934      }
935    }
936    
937    static BOOL detect_repeat(compiler_common *common, pcre_uchar *begin)
938    {
939    pcre_uchar *end = bracketend(begin);
940    pcre_uchar *next;
941    pcre_uchar *next_end;
942    pcre_uchar *max_end;
943    pcre_uchar type;
944    sljit_sw length = end - begin;
945    int min, max, i;
946    
947    /* Detect fixed iterations first. */
948    if (end[-(1 + LINK_SIZE)] != OP_KET)
949      return FALSE;
950    
951    /* Already detected repeat. */
952    if (common->private_data_ptrs[end - common->start - LINK_SIZE] != 0)
953      return TRUE;
954    
955    next = end;
956    min = 1;
957    while (1)
958      {
959      if (*next != *begin)
960        break;
961      next_end = bracketend(next);
962      if (next_end - next != length || memcmp(begin, next, IN_UCHARS(length)) != 0)
963        break;
964      next = next_end;
965      min++;
966      }
967    
968    if (min == 2)
969      return FALSE;
970    
971    max = 0;
972    max_end = next;
973    if (*next == OP_BRAZERO || *next == OP_BRAMINZERO)
974      {
975      type = *next;
976      while (1)
977        {
978        if (next[0] != type || next[1] != OP_BRA || next[2 + LINK_SIZE] != *begin)
979          break;
980        next_end = bracketend(next + 2 + LINK_SIZE);
981        if (next_end - next != (length + 2 + LINK_SIZE) || memcmp(begin, next + 2 + LINK_SIZE, IN_UCHARS(length)) != 0)
982          break;
983        next = next_end;
984        max++;
985        }
986    
987      if (next[0] == type && next[1] == *begin && max >= 1)
988        {
989        next_end = bracketend(next + 1);
990        if (next_end - next == (length + 1) && memcmp(begin, next + 1, IN_UCHARS(length)) == 0)
991          {
992          for (i = 0; i < max; i++, next_end += 1 + LINK_SIZE)
993            if (*next_end != OP_KET)
994              break;
995    
996          if (i == max)
997            {
998            common->private_data_ptrs[max_end - common->start - LINK_SIZE] = next_end - max_end;
999            common->private_data_ptrs[max_end - common->start - LINK_SIZE + 1] = (type == OP_BRAZERO) ? OP_UPTO : OP_MINUPTO;
1000            /* +2 the original and the last. */
1001            common->private_data_ptrs[max_end - common->start - LINK_SIZE + 2] = max + 2;
1002            if (min == 1)
1003              return TRUE;
1004            min--;
1005            max_end -= (1 + LINK_SIZE) + GET(max_end, -LINK_SIZE);
1006            }
1007          }
1008        }
1009      }
1010    
1011    if (min >= 3)
1012      {
1013      common->private_data_ptrs[end - common->start - LINK_SIZE] = max_end - end;
1014      common->private_data_ptrs[end - common->start - LINK_SIZE + 1] = OP_EXACT;
1015      common->private_data_ptrs[end - common->start - LINK_SIZE + 2] = min;
1016      return TRUE;
1017      }
1018    
1019    return FALSE;
1020    }
1021    
1022  #define CASE_ITERATOR_PRIVATE_DATA_1 \  #define CASE_ITERATOR_PRIVATE_DATA_1 \
1023      case OP_MINSTAR: \      case OP_MINSTAR: \
1024      case OP_MINPLUS: \      case OP_MINPLUS: \
# Line 729  switch(*cc) Line 1071  switch(*cc)
1071      case OP_TYPEUPTO: \      case OP_TYPEUPTO: \
1072      case OP_TYPEMINUPTO:      case OP_TYPEMINUPTO:
1073    
1074  static int get_class_iterator_size(pcre_uchar *cc)  static void set_private_data_ptrs(compiler_common *common, int *private_data_start, pcre_uchar *ccend)
 {  
 switch(*cc)  
   {  
   case OP_CRSTAR:  
   case OP_CRPLUS:  
   return 2;  
   
   case OP_CRMINSTAR:  
   case OP_CRMINPLUS:  
   case OP_CRQUERY:  
   case OP_CRMINQUERY:  
   return 1;  
   
   case OP_CRRANGE:  
   case OP_CRMINRANGE:  
   if (GET2(cc, 1) == GET2(cc, 1 + IMM2_SIZE))  
     return 0;  
   return 2;  
   
   default:  
   return 0;  
   }  
 }  
   
 static int get_private_data_length(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)  
1075  {  {
1076  int private_data_length = 0;  pcre_uchar *cc = common->start;
1077  pcre_uchar *alternative;  pcre_uchar *alternative;
 pcre_uchar *name;  
1078  pcre_uchar *end = NULL;  pcre_uchar *end = NULL;
1079  int space, size, i;  int private_data_ptr = *private_data_start;
1080  pcre_uint32 bracketlen;  int space, size, bracketlen;
1081    
 /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */  
1082  while (cc < ccend)  while (cc < ccend)
1083    {    {
1084    space = 0;    space = 0;
1085    size = 0;    size = 0;
1086    bracketlen = 0;    bracketlen = 0;
1087      if (private_data_ptr > SLJIT_MAX_LOCAL_SIZE)
1088        return;
1089    
1090      if (*cc == OP_ONCE || *cc == OP_ONCE_NC || *cc == OP_BRA || *cc == OP_CBRA || *cc == OP_COND)
1091        if (detect_repeat(common, cc))
1092          {
1093          /* These brackets are converted to repeats, so no global
1094          based single character repeat is allowed. */
1095          if (cc >= end)
1096            end = bracketend(cc);
1097          }
1098    
1099    switch(*cc)    switch(*cc)
1100      {      {
1101      case OP_SET_SOM:      case OP_KET:
1102      common->has_set_som = TRUE;      if (common->private_data_ptrs[cc + 1 - common->start] != 0)
1103      cc += 1;        {
1104      break;        common->private_data_ptrs[cc - common->start] = private_data_ptr;
1105          private_data_ptr += sizeof(sljit_sw);
1106      case OP_REF:        cc += common->private_data_ptrs[cc + 1 - common->start];
1107      case OP_REFI:        }
1108      common->optimized_cbracket[GET2(cc, 1)] = 0;      cc += 1 + LINK_SIZE;
     cc += 1 + IMM2_SIZE;  
1109      break;      break;
1110    
1111      case OP_ASSERT:      case OP_ASSERT:
# Line 791  while (cc < ccend) Line 1117  while (cc < ccend)
1117      case OP_BRAPOS:      case OP_BRAPOS:
1118      case OP_SBRA:      case OP_SBRA:
1119      case OP_SBRAPOS:      case OP_SBRAPOS:
1120      private_data_length += sizeof(sljit_sw);      case OP_SCOND:
1121        common->private_data_ptrs[cc - common->start] = private_data_ptr;
1122        private_data_ptr += sizeof(sljit_sw);
1123      bracketlen = 1 + LINK_SIZE;      bracketlen = 1 + LINK_SIZE;
1124      break;      break;
1125    
1126      case OP_CBRAPOS:      case OP_CBRAPOS:
1127      case OP_SCBRAPOS:      case OP_SCBRAPOS:
1128      private_data_length += sizeof(sljit_sw);      common->private_data_ptrs[cc - common->start] = private_data_ptr;
1129      common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] = 0;      private_data_ptr += sizeof(sljit_sw);
1130      bracketlen = 1 + LINK_SIZE + IMM2_SIZE;      bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1131      break;      break;
1132    
1133      case OP_COND:      case OP_COND:
1134      case OP_SCOND:      /* Might be a hidden SCOND. */
1135      /* Only AUTO_CALLOUT can insert this opcode. We do      alternative = cc + GET(cc, 1);
1136         not intend to support this case. */      if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
     if (cc[1 + LINK_SIZE] == OP_CALLOUT)  
       return -1;  
   
     if (*cc == OP_COND)  
1137        {        {
1138        /* Might be a hidden SCOND. */        common->private_data_ptrs[cc - common->start] = private_data_ptr;
1139        alternative = cc + GET(cc, 1);        private_data_ptr += sizeof(sljit_sw);
       if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)  
         private_data_length += sizeof(sljit_sw);  
       }  
     else  
       private_data_length += sizeof(sljit_sw);  
     bracketlen = 1 + LINK_SIZE;  
     break;  
   
     case OP_CREF:  
     i = GET2(cc, 1);  
     common->optimized_cbracket[i] = 0;  
     cc += 1 + IMM2_SIZE;  
     break;  
   
     case OP_NCREF:  
     bracketlen = GET2(cc, 1);  
     name = (pcre_uchar *)common->name_table;  
     alternative = name;  
     for (i = 0; i < common->name_count; i++)  
       {  
       if (GET2(name, 0) == bracketlen) break;  
       name += common->name_entry_size;  
       }  
     SLJIT_ASSERT(i != common->name_count);  
   
     for (i = 0; i < common->name_count; i++)  
       {  
       if (STRCMP_UC_UC(alternative + IMM2_SIZE, name + IMM2_SIZE) == 0)  
         common->optimized_cbracket[GET2(alternative, 0)] = 0;  
       alternative += common->name_entry_size;  
       }  
     bracketlen = 0;  
     cc += 1 + IMM2_SIZE;  
     break;  
   
     case OP_BRA:  
     bracketlen = 1 + LINK_SIZE;  
     break;  
   
     case OP_CBRA:  
     case OP_SCBRA:  
     bracketlen = 1 + LINK_SIZE + IMM2_SIZE;  
     break;  
   
     CASE_ITERATOR_PRIVATE_DATA_1  
     space = 1;  
     size = -2;  
     break;  
   
     CASE_ITERATOR_PRIVATE_DATA_2A  
     space = 2;  
     size = -2;  
     break;  
   
     CASE_ITERATOR_PRIVATE_DATA_2B  
     space = 2;  
     size = -(2 + IMM2_SIZE);  
     break;  
   
     CASE_ITERATOR_TYPE_PRIVATE_DATA_1  
     space = 1;  
     size = 1;  
     break;  
   
     CASE_ITERATOR_TYPE_PRIVATE_DATA_2A  
     if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)  
       space = 2;  
     size = 1;  
     break;  
   
     CASE_ITERATOR_TYPE_PRIVATE_DATA_2B  
     if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)  
       space = 2;  
     size = 1 + IMM2_SIZE;  
     break;  
   
     case OP_CLASS:  
     case OP_NCLASS:  
     size += 1 + 32 / sizeof(pcre_uchar);  
     space = get_class_iterator_size(cc + size);  
     break;  
   
 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8  
     case OP_XCLASS:  
     size = GET(cc, 1);  
     space = get_class_iterator_size(cc + size);  
     break;  
 #endif  
   
     case OP_RECURSE:  
     /* Set its value only once. */  
     if (common->recursive_head_ptr == 0)  
       {  
       common->recursive_head_ptr = common->ovector_start;  
       common->ovector_start += sizeof(sljit_sw);  
       }  
     cc += 1 + LINK_SIZE;  
     break;  
   
     case OP_CALLOUT:  
     if (common->capture_last_ptr == 0)  
       {  
       common->capture_last_ptr = common->ovector_start;  
       common->ovector_start += sizeof(sljit_sw);  
       }  
     cc += 2 + 2 * LINK_SIZE;  
     break;  
   
     case OP_MARK:  
     if (common->mark_ptr == 0)  
       {  
       common->mark_ptr = common->ovector_start;  
       common->ovector_start += sizeof(sljit_sw);  
       }  
     cc += 1 + 2 + cc[1];  
     break;  
   
     default:  
     cc = next_opcode(common, cc);  
     if (cc == NULL)  
       return -1;  
     break;  
     }  
   
   if (space > 0 && cc >= end)  
     private_data_length += sizeof(sljit_sw) * space;  
   
   if (size != 0)  
     {  
     if (size < 0)  
       {  
       cc += -size;  
 #ifdef SUPPORT_UTF  
       if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);  
 #endif  
       }  
     else  
       cc += size;  
     }  
   
   if (bracketlen != 0)  
     {  
     if (cc >= end)  
       {  
       end = bracketend(cc);  
       if (end[-1 - LINK_SIZE] == OP_KET)  
         end = NULL;  
       }  
     cc += bracketlen;  
     }  
   }  
 return private_data_length;  
 }  
   
 static void set_private_data_ptrs(compiler_common *common, int private_data_ptr, pcre_uchar *ccend)  
 {  
 pcre_uchar *cc = common->start;  
 pcre_uchar *alternative;  
 pcre_uchar *end = NULL;  
 int space, size, bracketlen;  
   
 while (cc < ccend)  
   {  
   space = 0;  
   size = 0;  
   bracketlen = 0;  
   switch(*cc)  
     {  
     case OP_ASSERT:  
     case OP_ASSERT_NOT:  
     case OP_ASSERTBACK:  
     case OP_ASSERTBACK_NOT:  
     case OP_ONCE:  
     case OP_ONCE_NC:  
     case OP_BRAPOS:  
     case OP_SBRA:  
     case OP_SBRAPOS:  
     case OP_SCOND:  
     common->private_data_ptrs[cc - common->start] = private_data_ptr;  
     private_data_ptr += sizeof(sljit_sw);  
     bracketlen = 1 + LINK_SIZE;  
     break;  
   
     case OP_CBRAPOS:  
     case OP_SCBRAPOS:  
     common->private_data_ptrs[cc - common->start] = private_data_ptr;  
     private_data_ptr += sizeof(sljit_sw);  
     bracketlen = 1 + LINK_SIZE + IMM2_SIZE;  
     break;  
   
     case OP_COND:  
     /* Might be a hidden SCOND. */  
     alternative = cc + GET(cc, 1);  
     if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)  
       {  
       common->private_data_ptrs[cc - common->start] = private_data_ptr;  
       private_data_ptr += sizeof(sljit_sw);  
1140        }        }
1141      bracketlen = 1 + LINK_SIZE;      bracketlen = 1 + LINK_SIZE;
1142      break;      break;
# Line 1074  while (cc < ccend) Line 1201  while (cc < ccend)
1201      break;      break;
1202      }      }
1203    
1204      /* Character iterators, which are not inside a repeated bracket,
1205         gets a private slot instead of allocating it on the stack. */
1206    if (space > 0 && cc >= end)    if (space > 0 && cc >= end)
1207      {      {
1208      common->private_data_ptrs[cc - common->start] = private_data_ptr;      common->private_data_ptrs[cc - common->start] = private_data_ptr;
# Line 1104  while (cc < ccend) Line 1233  while (cc < ccend)
1233      cc += bracketlen;      cc += bracketlen;
1234      }      }
1235    }    }
1236    *private_data_start = private_data_ptr;
1237  }  }
1238    
1239  /* Returns with a frame_types (always < 0) if no need for frame. */  /* Returns with a frame_types (always < 0) if no need for frame. */
1240  static int get_framesize(compiler_common *common, pcre_uchar *cc, BOOL recursive)  static int get_framesize(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL recursive, BOOL* needs_control_head)
1241  {  {
 pcre_uchar *ccend = bracketend(cc) - (1 + LINK_SIZE);  
1242  int length = 0;  int length = 0;
1243  int possessive = 0;  int possessive = 0;
1244  BOOL stack_restore = FALSE;  BOOL stack_restore = FALSE;
# Line 1118  BOOL setmark_found = recursive; Line 1247  BOOL setmark_found = recursive;
1247  /* The last capture is a local variable even for recursions. */  /* The last capture is a local variable even for recursions. */
1248  BOOL capture_last_found = FALSE;  BOOL capture_last_found = FALSE;
1249    
1250  if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))  #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
1251    SLJIT_ASSERT(common->control_head_ptr != 0);
1252    *needs_control_head = TRUE;
1253    #else
1254    *needs_control_head = FALSE;
1255    #endif
1256    
1257    if (ccend == NULL)
1258    {    {
1259    possessive = length = (common->capture_last_ptr != 0) ? 5 : 3;    ccend = bracketend(cc) - (1 + LINK_SIZE);
1260    /* This is correct regardless of common->capture_last_ptr. */    if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))
1261    capture_last_found = TRUE;      {
1262        possessive = length = (common->capture_last_ptr != 0) ? 5 : 3;
1263        /* This is correct regardless of common->capture_last_ptr. */
1264        capture_last_found = TRUE;
1265        }
1266      cc = next_opcode(common, cc);
1267    }    }
1268    
 cc = next_opcode(common, cc);  
1269  SLJIT_ASSERT(cc != NULL);  SLJIT_ASSERT(cc != NULL);
1270  while (cc < ccend)  while (cc < ccend)
1271    switch(*cc)    switch(*cc)
# Line 1142  while (cc < ccend) Line 1282  while (cc < ccend)
1282      break;      break;
1283    
1284      case OP_MARK:      case OP_MARK:
1285        case OP_PRUNE_ARG:
1286        case OP_THEN_ARG:
1287      SLJIT_ASSERT(common->mark_ptr != 0);      SLJIT_ASSERT(common->mark_ptr != 0);
1288      stack_restore = TRUE;      stack_restore = TRUE;
1289      if (!setmark_found)      if (!setmark_found)
# Line 1149  while (cc < ccend) Line 1291  while (cc < ccend)
1291        length += 2;        length += 2;
1292        setmark_found = TRUE;        setmark_found = TRUE;
1293        }        }
1294        if (common->control_head_ptr != 0)
1295          *needs_control_head = TRUE;
1296      cc += 1 + 2 + cc[1];      cc += 1 + 2 + cc[1];
1297      break;      break;
1298    
# Line 1268  if (length > 0) Line 1412  if (length > 0)
1412  return stack_restore ? no_frame : no_stack;  return stack_restore ? no_frame : no_stack;
1413  }  }
1414    
1415  static void init_frame(compiler_common *common, pcre_uchar *cc, int stackpos, int stacktop, BOOL recursive)  static void init_frame(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, int stackpos, int stacktop, BOOL recursive)
1416  {  {
1417  DEFINE_COMPILER;  DEFINE_COMPILER;
 pcre_uchar *ccend = bracketend(cc) - (1 + LINK_SIZE);  
1418  BOOL setsom_found = recursive;  BOOL setsom_found = recursive;
1419  BOOL setmark_found = recursive;  BOOL setmark_found = recursive;
1420  /* The last capture is a local variable even for recursions. */  /* The last capture is a local variable even for recursions. */
# Line 1283  SLJIT_UNUSED_ARG(stacktop); Line 1426  SLJIT_UNUSED_ARG(stacktop);
1426  SLJIT_ASSERT(stackpos >= stacktop + 2);  SLJIT_ASSERT(stackpos >= stacktop + 2);
1427    
1428  stackpos = STACK(stackpos);  stackpos = STACK(stackpos);
1429  if (recursive || (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS))  if (ccend == NULL)
1430    cc = next_opcode(common, cc);    {
1431      ccend = bracketend(cc) - (1 + LINK_SIZE);
1432      if (recursive || (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS))
1433        cc = next_opcode(common, cc);
1434      }
1435    
1436  SLJIT_ASSERT(cc != NULL);  SLJIT_ASSERT(cc != NULL);
1437  while (cc < ccend)  while (cc < ccend)
1438    switch(*cc)    switch(*cc)
# Line 1304  while (cc < ccend) Line 1452  while (cc < ccend)
1452      break;      break;
1453    
1454      case OP_MARK:      case OP_MARK:
1455        case OP_PRUNE_ARG:
1456        case OP_THEN_ARG:
1457      SLJIT_ASSERT(common->mark_ptr != 0);      SLJIT_ASSERT(common->mark_ptr != 0);
1458      if (!setmark_found)      if (!setmark_found)
1459        {        {
# Line 1384  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), st Line 1534  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), st
1534  SLJIT_ASSERT(stackpos == STACK(stacktop));  SLJIT_ASSERT(stackpos == STACK(stacktop));
1535  }  }
1536    
1537  static SLJIT_INLINE int get_private_data_length_for_copy(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)  static SLJIT_INLINE int get_private_data_copy_length(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL needs_control_head)
1538  {  {
1539  int private_data_length = 2;  int private_data_length = needs_control_head ? 3 : 2;
1540  int size;  int size;
1541  pcre_uchar *alternative;  pcre_uchar *alternative;
1542  /* Calculate the sum of the private machine words. */  /* Calculate the sum of the private machine words. */
# Line 1395  while (cc < ccend) Line 1545  while (cc < ccend)
1545    size = 0;    size = 0;
1546    switch(*cc)    switch(*cc)
1547      {      {
1548        case OP_KET:
1549        if (PRIVATE_DATA(cc) != 0)
1550          private_data_length++;
1551        cc += 1 + LINK_SIZE;
1552        break;
1553    
1554      case OP_ASSERT:      case OP_ASSERT:
1555      case OP_ASSERT_NOT:      case OP_ASSERT_NOT:
1556      case OP_ASSERTBACK:      case OP_ASSERTBACK:
# Line 1499  return private_data_length; Line 1655  return private_data_length;
1655  }  }
1656    
1657  static void copy_private_data(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend,  static void copy_private_data(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend,
1658    BOOL save, int stackptr, int stacktop)    BOOL save, int stackptr, int stacktop, BOOL needs_control_head)
1659  {  {
1660  DEFINE_COMPILER;  DEFINE_COMPILER;
1661  int srcw[2];  int srcw[2];
# Line 1520  stacktop = STACK(stacktop - 1); Line 1676  stacktop = STACK(stacktop - 1);
1676    
1677  if (!save)  if (!save)
1678    {    {
1679    stackptr += sizeof(sljit_sw);    stackptr += (needs_control_head ? 2 : 1) * sizeof(sljit_sw);
1680    if (stackptr < stacktop)    if (stackptr < stacktop)
1681      {      {
1682      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
# Line 1536  if (!save) Line 1692  if (!save)
1692    /* The tmp1next must be TRUE in either way. */    /* The tmp1next must be TRUE in either way. */
1693    }    }
1694    
1695  while (status != end)  do
1696    {    {
1697    count = 0;    count = 0;
1698    switch(status)    switch(status)
# Line 1545  while (status != end) Line 1701  while (status != end)
1701      SLJIT_ASSERT(save && common->recursive_head_ptr != 0);      SLJIT_ASSERT(save && common->recursive_head_ptr != 0);
1702      count = 1;      count = 1;
1703      srcw[0] = common->recursive_head_ptr;      srcw[0] = common->recursive_head_ptr;
1704        if (needs_control_head)
1705          {
1706          SLJIT_ASSERT(common->control_head_ptr != 0);
1707          count = 2;
1708          srcw[1] = common->control_head_ptr;
1709          }
1710      status = loop;      status = loop;
1711      break;      break;
1712    
# Line 1557  while (status != end) Line 1719  while (status != end)
1719    
1720      switch(*cc)      switch(*cc)
1721        {        {
1722          case OP_KET:
1723          if (PRIVATE_DATA(cc) != 0)
1724            {
1725            count = 1;
1726            srcw[0] = PRIVATE_DATA(cc);
1727            }
1728          cc += 1 + LINK_SIZE;
1729          break;
1730    
1731        case OP_ASSERT:        case OP_ASSERT:
1732        case OP_ASSERT_NOT:        case OP_ASSERT_NOT:
1733        case OP_ASSERTBACK:        case OP_ASSERTBACK:
# Line 1769  while (status != end) Line 1940  while (status != end)
1940        }        }
1941      }      }
1942    }    }
1943    while (status != end);
1944    
1945  if (save)  if (save)
1946    {    {
# Line 1802  if (save) Line 1974  if (save)
1974  SLJIT_ASSERT(cc == ccend && stackptr == stacktop && (save || (tmp1empty && tmp2empty)));  SLJIT_ASSERT(cc == ccend && stackptr == stacktop && (save || (tmp1empty && tmp2empty)));
1975  }  }
1976    
1977    static SLJIT_INLINE pcre_uchar *set_then_offsets(compiler_common *common, pcre_uchar *cc, pcre_uint8 *current_offset)
1978    {
1979    pcre_uchar *end = bracketend(cc);
1980    BOOL has_alternatives = cc[GET(cc, 1)] == OP_ALT;
1981    
1982    /* Assert captures then. */
1983    if (*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT)
1984      current_offset = NULL;
1985    /* Conditional block does not. */
1986    if (*cc == OP_COND || *cc == OP_SCOND)
1987      has_alternatives = FALSE;
1988    
1989    cc = next_opcode(common, cc);
1990    if (has_alternatives)
1991      current_offset = common->then_offsets + (cc - common->start);
1992    
1993    while (cc < end)
1994      {
1995      if ((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND))
1996        cc = set_then_offsets(common, cc, current_offset);
1997      else
1998        {
1999        if (*cc == OP_ALT && has_alternatives)
2000          current_offset = common->then_offsets + (cc + 1 + LINK_SIZE - common->start);
2001        if (*cc >= OP_THEN && *cc <= OP_THEN_ARG && current_offset != NULL)
2002          *current_offset = 1;
2003        cc = next_opcode(common, cc);
2004        }
2005      }
2006    
2007    return end;
2008    }
2009    
2010  #undef CASE_ITERATOR_PRIVATE_DATA_1  #undef CASE_ITERATOR_PRIVATE_DATA_1
2011  #undef CASE_ITERATOR_PRIVATE_DATA_2A  #undef CASE_ITERATOR_PRIVATE_DATA_2A
2012  #undef CASE_ITERATOR_PRIVATE_DATA_2B  #undef CASE_ITERATOR_PRIVATE_DATA_2B
# Line 1865  while (list_item) Line 2070  while (list_item)
2070  common->stubs = NULL;  common->stubs = NULL;
2071  }  }
2072    
2073  static SLJIT_INLINE void decrease_call_count(compiler_common *common)  static void add_label_addr(compiler_common *common)
2074  {  {
2075  DEFINE_COMPILER;  DEFINE_COMPILER;
2076    label_addr_list *label_addr;
2077    
2078  OP2(SLJIT_SUB | SLJIT_SET_E, CALL_COUNT, 0, CALL_COUNT, 0, SLJIT_IMM, 1);  label_addr = sljit_alloc_memory(compiler, sizeof(label_addr_list));
2079    if (label_addr == NULL)
2080      return;
2081    label_addr->label = LABEL();
2082    label_addr->addr = common->read_only_data_ptr;
2083    label_addr->next = common->label_addrs;
2084    common->label_addrs = label_addr;
2085    common->read_only_data_ptr++;
2086    }
2087    
2088    static SLJIT_INLINE void count_match(compiler_common *common)
2089    {
2090    DEFINE_COMPILER;
2091    
2092    OP2(SLJIT_SUB | SLJIT_SET_E, COUNT_MATCH, 0, COUNT_MATCH, 0, SLJIT_IMM, 1);
2093  add_jump(compiler, &common->calllimit, JUMP(SLJIT_C_ZERO));  add_jump(compiler, &common->calllimit, JUMP(SLJIT_C_ZERO));
2094  }  }
2095    
# Line 1900  static SLJIT_INLINE void reset_ovector(c Line 2120  static SLJIT_INLINE void reset_ovector(c
2120  DEFINE_COMPILER;  DEFINE_COMPILER;
2121  struct sljit_label *loop;  struct sljit_label *loop;
2122  int i;  int i;
2123    
2124  /* At this point we can freely use all temporary registers. */  /* At this point we can freely use all temporary registers. */
2125    SLJIT_ASSERT(length > 1);
2126  /* TMP1 returns with begin - 1. */  /* TMP1 returns with begin - 1. */
2127  OP2(SLJIT_SUB, SLJIT_SCRATCH_REG1, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));  OP2(SLJIT_SUB, SLJIT_SCRATCH_REG1, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));
2128  if (length < 8)  if (length < 8)
2129    {    {
2130    for (i = 0; i < length; i++)    for (i = 1; i < length; i++)
2131      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(i), SLJIT_SCRATCH_REG1, 0);      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(i), SLJIT_SCRATCH_REG1, 0);
2132    }    }
2133  else  else
2134    {    {
2135    GET_LOCAL_BASE(SLJIT_SCRATCH_REG2, 0, OVECTOR_START - sizeof(sljit_sw));    GET_LOCAL_BASE(SLJIT_SCRATCH_REG2, 0, OVECTOR_START);
2136    OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, length);    OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, length - 1);
2137    loop = LABEL();    loop = LABEL();
2138    OP1(SLJIT_MOVU, SLJIT_MEM1(SLJIT_SCRATCH_REG2), sizeof(sljit_sw), SLJIT_SCRATCH_REG1, 0);    OP1(SLJIT_MOVU, SLJIT_MEM1(SLJIT_SCRATCH_REG2), sizeof(sljit_sw), SLJIT_SCRATCH_REG1, 0);
2139    OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 1);    OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 1);
# Line 1919  else Line 2141  else
2141    }    }
2142  }  }
2143    
2144    static SLJIT_INLINE void do_reset_match(compiler_common *common, int length)
2145    {
2146    DEFINE_COMPILER;
2147    struct sljit_label *loop;
2148    int i;
2149    
2150    SLJIT_ASSERT(length > 1);
2151    /* OVECTOR(1) contains the "string begin - 1" constant. */
2152    if (length > 2)
2153      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
2154    if (length < 8)
2155      {
2156      for (i = 2; i < length; i++)
2157        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(i), TMP1, 0);
2158      }
2159    else
2160      {
2161      GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + sizeof(sljit_sw));
2162      OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
2163      loop = LABEL();
2164      OP1(SLJIT_MOVU, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
2165      OP2(SLJIT_SUB | SLJIT_SET_E, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
2166      JUMPTO(SLJIT_C_NOT_ZERO, loop);
2167      }
2168    
2169    OP1(SLJIT_MOV, STACK_TOP, 0, ARGUMENTS, 0);
2170    if (common->mark_ptr != 0)
2171      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr, SLJIT_IMM, 0);
2172    if (common->control_head_ptr != 0)
2173      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_IMM, 0);
2174    OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(jit_arguments, stack));
2175    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_ptr);
2176    OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(struct sljit_stack, base));
2177    }
2178    
2179    static sljit_sw SLJIT_CALL do_search_mark(sljit_sw *current, const pcre_uchar *skip_arg)
2180    {
2181    while (current != NULL)
2182      {
2183      switch (current[-2])
2184        {
2185        case type_then_trap:
2186        break;
2187    
2188        case type_mark:
2189        if (STRCMP_UC_UC(skip_arg, (pcre_uchar *)current[-3]) == 0)
2190          return current[-4];
2191        break;
2192    
2193        default:
2194        SLJIT_ASSERT_STOP();
2195        break;
2196        }
2197      current = (sljit_sw*)current[-1];
2198      }
2199    return -1;
2200    }
2201    
2202  static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)  static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)
2203  {  {
2204  DEFINE_COMPILER;  DEFINE_COMPILER;
# Line 1975  DEFINE_COMPILER; Line 2255  DEFINE_COMPILER;
2255  struct sljit_jump *jump;  struct sljit_jump *jump;
2256    
2257  SLJIT_COMPILE_ASSERT(STR_END == SLJIT_SAVED_REG2, str_end_must_be_saved_reg2);  SLJIT_COMPILE_ASSERT(STR_END == SLJIT_SAVED_REG2, str_end_must_be_saved_reg2);
2258  SLJIT_ASSERT(common->start_used_ptr != 0 && (common->mode == JIT_PARTIAL_SOFT_COMPILE ? common->hit_start != 0 : common->hit_start == 0));  SLJIT_ASSERT(common->start_used_ptr != 0 && common->start_ptr != 0
2259      && (common->mode == JIT_PARTIAL_SOFT_COMPILE ? common->hit_start != 0 : common->hit_start == 0));
2260    
2261  OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, ARGUMENTS, 0);  OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, ARGUMENTS, 0);
2262  OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_PARTIAL);  OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_PARTIAL);
# Line 1987  OP1(SLJIT_MOV, SLJIT_SAVED_REG1, 0, SLJI Line 2268  OP1(SLJIT_MOV, SLJIT_SAVED_REG1, 0, SLJI
2268  OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, offsets));  OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, offsets));
2269    
2270  jump = CMP(SLJIT_C_SIG_LESS, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 3);  jump = CMP(SLJIT_C_SIG_LESS, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 3);
2271  OP2(SLJIT_SUB, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr + sizeof(sljit_sw), SLJIT_SAVED_REG1, 0);  OP2(SLJIT_SUB, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_ptr : (common->hit_start + (int)sizeof(sljit_sw)), SLJIT_SAVED_REG1, 0);
2272  #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32  #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2273  OP2(SLJIT_ASHR, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, UCHAR_SHIFT);  OP2(SLJIT_ASHR, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, UCHAR_SHIFT);
2274  #endif  #endif
# Line 2155  return (bit < 256) ? ((0 << 8) | bit) : Line 2436  return (bit < 256) ? ((0 << 8) | bit) :
2436    
2437  static void check_partial(compiler_common *common, BOOL force)  static void check_partial(compiler_common *common, BOOL force)
2438  {  {
2439  /* Checks whether a partial matching is occured. Does not modify registers. */  /* Checks whether a partial matching is occurred. Does not modify registers. */
2440  DEFINE_COMPILER;  DEFINE_COMPILER;
2441  struct sljit_jump *jump = NULL;  struct sljit_jump *jump = NULL;
2442    
# Line 2242  else Line 2523  else
2523  JUMPHERE(jump);  JUMPHERE(jump);
2524  }  }
2525    
2526  static void read_char(compiler_common *common)  static void peek_char(compiler_common *common, pcre_uint32 max)
2527  {  {
2528  /* Reads the character into TMP1, updates STR_PTR.  /* Reads the character into TMP1, keeps STR_PTR.
2529  Does not check STR_END. TMP2 Destroyed. */  Does not check STR_END. TMP2 Destroyed. */
2530  DEFINE_COMPILER;  DEFINE_COMPILER;
2531  #if defined SUPPORT_UTF && !defined COMPILE_PCRE32  #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2532  struct sljit_jump *jump;  struct sljit_jump *jump;
2533  #endif  #endif
2534    
2535    SLJIT_UNUSED_ARG(max);
2536    
2537  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2538  #if defined SUPPORT_UTF && !defined COMPILE_PCRE32  #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2539  if (common->utf)  if (common->utf)
2540    {    {
2541  #if defined COMPILE_PCRE8    if (max < 128) return;
2542    
2543    jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);    jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2544  #elif defined COMPILE_PCRE16    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
   jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);  
 #endif /* COMPILE_PCRE[8|16] */  
2545    add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));    add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2546      OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2547    JUMPHERE(jump);    JUMPHERE(jump);
2548    }    }
2549  #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */  #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2550    
2551    #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2552    if (common->utf)
2553      {
2554      if (max < 0xd800) return;
2555    
2556      OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2557      jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2558      /* TMP2 contains the high surrogate. */
2559      OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2560      OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x40);
2561      OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
2562      OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
2563      OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2564      JUMPHERE(jump);
2565      }
2566    #endif
2567    }
2568    
2569    #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2570    
2571    static BOOL is_char7_bitset(const pcre_uint8 *bitset, BOOL nclass)
2572    {
2573    /* Tells whether the character codes below 128 are enough
2574    to determine a match. */
2575    const pcre_uint8 value = nclass ? 0xff : 0;
2576    const pcre_uint8* end = bitset + 32;
2577    
2578    bitset += 16;
2579    do
2580      {
2581      if (*bitset++ != value)
2582        return FALSE;
2583      }
2584    while (bitset < end);
2585    return TRUE;
2586    }
2587    
2588    static void read_char7_type(compiler_common *common, BOOL full_read)
2589    {
2590    /* Reads the precise character type of a character into TMP1, if the character
2591    is less than 128. Otherwise it returns with zero. Does not check STR_END. The
2592    full_read argument tells whether characters above max are accepted or not. */
2593    DEFINE_COMPILER;
2594    struct sljit_jump *jump;
2595    
2596    SLJIT_ASSERT(common->utf);
2597    
2598    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2599  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2600    
2601    OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2602    
2603    if (full_read)
2604      {
2605      jump = CMP(SLJIT_C_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
2606      OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2607      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2608      JUMPHERE(jump);
2609      }
2610  }  }
2611    
2612  static void peek_char(compiler_common *common)  #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
2613    
2614    static void read_char_range(compiler_common *common, pcre_uint32 min, pcre_uint32 max, BOOL update_str_ptr)
2615  {  {
2616  /* Reads the character into TMP1, keeps STR_PTR.  /* Reads the precise value of a character into TMP1, if the character is
2617  Does not check STR_END. TMP2 Destroyed. */  between min and max (c >= min && c <= max). Otherwise it returns with a value
2618    outside the range. Does not check STR_END. */
2619  DEFINE_COMPILER;  DEFINE_COMPILER;
2620  #if defined SUPPORT_UTF && !defined COMPILE_PCRE32  #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2621  struct sljit_jump *jump;  struct sljit_jump *jump;
2622  #endif  #endif
2623    #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2624    struct sljit_jump *jump2;
2625    #endif
2626    
2627  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);  SLJIT_UNUSED_ARG(update_str_ptr);
2628  #if defined SUPPORT_UTF && !defined COMPILE_PCRE32  SLJIT_UNUSED_ARG(min);
2629    SLJIT_UNUSED_ARG(max);
2630    SLJIT_ASSERT(min <= max);
2631    
2632    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2633    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2634    
2635    #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2636  if (common->utf)  if (common->utf)
2637    {    {
2638  #if defined COMPILE_PCRE8    if (max < 128 && !update_str_ptr) return;
2639    
2640    jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);    jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2641  #elif defined COMPILE_PCRE16    if (min >= 0x10000)
2642    jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);      {
2643  #endif /* COMPILE_PCRE[8|16] */      OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xf0);
2644    add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));      if (update_str_ptr)
2645    OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);        OP1(SLJIT_MOV_UB, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2646        OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2647        jump2 = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0x7);
2648        OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2649        OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2650        OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2651        OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2652        OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2653        OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2654        OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2655        OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2656        if (!update_str_ptr)
2657          OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
2658        OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2659        OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2660        OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2661        JUMPHERE(jump2);
2662        if (update_str_ptr)
2663          OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
2664        }
2665      else if (min >= 0x800 && max <= 0xffff)
2666        {
2667        OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xe0);
2668        if (update_str_ptr)
2669          OP1(SLJIT_MOV_UB, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2670        OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2671        jump2 = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0xf);
2672        OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2673        OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2674        OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2675        OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2676        if (!update_str_ptr)
2677          OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
2678        OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2679        OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2680        OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2681        JUMPHERE(jump2);
2682        if (update_str_ptr)
2683          OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
2684        }
2685      else if (max >= 0x800)
2686        add_jump(compiler, (max < 0x10000) ? &common->utfreadchar16 : &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2687      else if (max < 128)
2688        {
2689        OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2690        OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2691        }
2692      else
2693        {
2694        OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2695        if (!update_str_ptr)
2696          OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2697        else
2698          OP1(SLJIT_MOV_UB, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2699        OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2700        OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2701        OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2702        OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2703        if (update_str_ptr)
2704          OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
2705        }
2706    JUMPHERE(jump);    JUMPHERE(jump);
2707    }    }
2708  #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */  #endif
2709    
2710    #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2711    if (common->utf)
2712      {
2713      if (max >= 0x10000)
2714        {
2715        OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2716        jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2717        /* TMP2 contains the high surrogate. */
2718        OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2719        OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x40);
2720        OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
2721        OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2722        OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
2723        OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2724        JUMPHERE(jump);
2725        return;
2726        }
2727    
2728      if (max < 0xd800 && !update_str_ptr) return;
2729    
2730      /* Skip low surrogate if necessary. */
2731      OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2732      jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2733      if (update_str_ptr)
2734        OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2735      if (max >= 0xd800)
2736        OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x10000);
2737      JUMPHERE(jump);
2738      }
2739    #endif
2740    }
2741    
2742    static SLJIT_INLINE void read_char(compiler_common *common)
2743    {
2744    read_char_range(common, 0, READ_CHAR_MAX, TRUE);
2745  }  }
2746    
2747  static void read_char8_type(compiler_common *common)  static void read_char8_type(compiler_common *common, BOOL update_str_ptr)
2748  {  {
2749  /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */  /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */
2750  DEFINE_COMPILER;  DEFINE_COMPILER;
2751  #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32  #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
2752  struct sljit_jump *jump;  struct sljit_jump *jump;
2753  #endif  #endif
2754    #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2755    struct sljit_jump *jump2;
2756    #endif
2757    
2758  #ifdef SUPPORT_UTF  SLJIT_UNUSED_ARG(update_str_ptr);
2759    
2760    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2761    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2762    
2763    #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2764  if (common->utf)  if (common->utf)
2765    {    {
   OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);  
   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));  
 #if defined COMPILE_PCRE8  
2766    /* This can be an extra read in some situations, but hopefully    /* This can be an extra read in some situations, but hopefully
2767    it is needed in most cases. */    it is needed in most cases. */
2768    OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);    OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2769    jump = CMP(SLJIT_C_LESS, TMP2, 0, SLJIT_IMM, 0xc0);    jump = CMP(SLJIT_C_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
2770    add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));    if (!update_str_ptr)
2771    JUMPHERE(jump);      {
2772  #elif defined COMPILE_PCRE16      OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2773    OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2774    jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);      OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2775    OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);      OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2776    JUMPHERE(jump);      OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2777    /* Skip low surrogate if necessary. */      OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
2778    OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xfc00);      OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2779    OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0xd800);      jump2 = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2780    OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);      OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2781    OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);      JUMPHERE(jump2);
2782    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);      }
2783  #elif defined COMPILE_PCRE32    else
2784    OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);      add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));
   jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);  
   OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);  
2785    JUMPHERE(jump);    JUMPHERE(jump);
 #endif /* COMPILE_PCRE[8|16|32] */  
2786    return;    return;
2787    }    }
2788  #endif /* SUPPORT_UTF */  #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
2789  OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);  
2790  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));  #if !defined COMPILE_PCRE8
 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32  
2791  /* The ctypes array contains only 256 values. */  /* The ctypes array contains only 256 values. */
2792  OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);  OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2793  jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);  jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2794  #endif  #endif
2795  OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);  OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2796  #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32  #if !defined COMPILE_PCRE8
2797  JUMPHERE(jump);  JUMPHERE(jump);
2798  #endif  #endif
2799    
2800    #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2801    if (common->utf && update_str_ptr)
2802      {
2803      /* Skip low surrogate if necessary. */
2804      OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);
2805      jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2806      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2807      JUMPHERE(jump);
2808      }
2809    #endif /* SUPPORT_UTF && COMPILE_PCRE16 */
2810  }  }
2811    
2812  static void skip_char_back(compiler_common *common)  static void skip_char_back(compiler_common *common)
# Line 2380  if (common->utf) Line 2844  if (common->utf)
2844  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2845  }  }
2846    
2847  static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpiftrue)  static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpifmatch)
2848  {  {
2849  /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */  /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */
2850  DEFINE_COMPILER;  DEFINE_COMPILER;
2851    struct sljit_jump *jump;
2852    
2853  if (nltype == NLTYPE_ANY)  if (nltype == NLTYPE_ANY)
2854    {    {
2855    add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));    add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
2856    add_jump(compiler, backtracks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));    add_jump(compiler, backtracks, JUMP(jumpifmatch ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
2857    }    }
2858  else if (nltype == NLTYPE_ANYCRLF)  else if (nltype == NLTYPE_ANYCRLF)
2859    {    {
2860    OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_CR);    if (jumpifmatch)
2861    OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);      {
2862    OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);      add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR));
2863    OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);      add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
2864    add_jump(compiler, backtracks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));      }
2865      else
2866        {
2867        jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
2868        add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
2869        JUMPHERE(jump);
2870        }
2871    }    }
2872  else  else
2873    {    {
2874    SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);    SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);
2875    add_jump(compiler, backtracks, CMP(jumpiftrue ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));    add_jump(compiler, backtracks, CMP(jumpifmatch ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
2876    }    }
2877  }  }
2878    
# Line 2411  else Line 2882  else
2882  static void do_utfreadchar(compiler_common *common)  static void do_utfreadchar(compiler_common *common)
2883  {  {
2884  /* Fast decoding a UTF-8 character. TMP1 contains the first byte  /* Fast decoding a UTF-8 character. TMP1 contains the first byte
2885  of the character (>= 0xc0). Return char value in TMP1, length - 1 in TMP2. */  of the character (>= 0xc0). Return char value in TMP1, length in TMP2. */
2886  DEFINE_COMPILER;  DEFINE_COMPILER;
2887  struct sljit_jump *jump;  struct sljit_jump *jump;
2888    
2889  sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);  sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2890    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2891    OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2892    OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2893    OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2894    OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2895    
2896  /* Searching for the first zero. */  /* Searching for the first zero. */
2897  OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);  OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
2898  jump = JUMP(SLJIT_C_NOT_ZERO);  jump = JUMP(SLJIT_C_NOT_ZERO);
2899  /* Two byte sequence. */  /* Two byte sequence. */
 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));  
2900  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2901  OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1f);  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(2));
2902    sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2903    
2904    JUMPHERE(jump);
2905    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2906    OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x800);
2907  OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);  OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2908  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2909  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));  
 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);  
 JUMPHERE(jump);  
2910    
2911  OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10);  OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10000);
2912  jump = JUMP(SLJIT_C_NOT_ZERO);  jump = JUMP(SLJIT_C_NOT_ZERO);
2913  /* Three byte sequence. */  /* Three byte sequence. */
 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));  
 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0f);  
 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 12);  
 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);  
 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);  
 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);  
 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));  
2914  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
2915  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));
 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);  
 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(2));  
2916  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
 JUMPHERE(jump);  
2917    
2918  /* Four byte sequence. */  /* Four byte sequence. */
2919  OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));  JUMPHERE(jump);
2920  OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x07);  OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2921  OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 18);  OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
2922    OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2923    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
2924  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12);  
2925  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2926  OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(4));
2927    sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2928    }
2929    
2930    static void do_utfreadchar16(compiler_common *common)
2931    {
2932    /* Fast decoding a UTF-8 character. TMP1 contains the first byte
2933    of the character (>= 0xc0). Return value in TMP1. */
2934    DEFINE_COMPILER;
2935    struct sljit_jump *jump;
2936    
2937    sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2938    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2939    OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2940    OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2941  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);  
2942  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2943  OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(3));  
2944  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));  /* Searching for the first zero. */
2945    OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
2946    jump = JUMP(SLJIT_C_NOT_ZERO);
2947    /* Two byte sequence. */
2948    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2949    sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2950    
2951    JUMPHERE(jump);
2952    OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x400);
2953    OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_NOT_ZERO);
2954    /* This code runs only in 8 bit mode. No need to shift the value. */
2955    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2956    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2957    OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x800);
2958    OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2959  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2960  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2961  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));  /* Three byte sequence. */
2962    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
2963  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2964  }  }
2965    
# Line 2482  jump = JUMP(SLJIT_C_NOT_ZERO); Line 2979  jump = JUMP(SLJIT_C_NOT_ZERO);
2979  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2980  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2981  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
2982    /* The upper 5 bits are known at this point. */
2983    compare = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0x3);
2984  OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);  OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2985  OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);  OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2986  OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);  OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
 compare = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);  
2987  OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);  OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2988  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2989    
2990  JUMPHERE(compare);  JUMPHERE(compare);
2991  OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);  OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2992  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
 JUMPHERE(jump);  
2993    
2994  /* We only have types for characters less than 256. */  /* We only have types for characters less than 256. */
 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);  
 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);  
 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);  
 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);  
 }  
   
 #elif defined COMPILE_PCRE16  
   
 static void do_utfreadchar(compiler_common *common)  
 {  
 /* Fast decoding a UTF-16 character. TMP1 contains the first 16 bit char  
 of the character (>= 0xd800). Return char value in TMP1, length - 1 in TMP2. */  
 DEFINE_COMPILER;  
 struct sljit_jump *jump;  
   
 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);  
 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xdc00);  
 /* Do nothing, only return. */  
 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);  
   
2995  JUMPHERE(jump);  JUMPHERE(jump);
2996  /* Combine two 16 bit characters. */  OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2997  OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));  OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2998  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);  
 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 10);  
 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3ff);  
 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);  
 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));  
 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);  
2999  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3000  }  }
3001    
3002  #endif /* COMPILE_PCRE[8|16] */  #endif /* COMPILE_PCRE8 */
3003    
3004  #endif /* SUPPORT_UTF */  #endif /* SUPPORT_UTF */
3005    
# Line 2603  if (firstline) Line 3074  if (firstline)
3074      mainloop = LABEL();      mainloop = LABEL();
3075      /* Continual stores does not cause data dependency. */      /* Continual stores does not cause data dependency. */
3076      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0);      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0);
3077      read_char(common);      read_char_range(common, common->nlmin, common->nlmax, TRUE);
3078      check_newlinechar(common, common->nltype, &newline, TRUE);      check_newlinechar(common, common->nltype, &newline, TRUE);
3079      CMPTO(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0, mainloop);      CMPTO(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0, mainloop);
3080      JUMPHERE(end);      JUMPHERE(end);
# Line 2679  if (newlinecheck) Line 3150  if (newlinecheck)
3150  return mainloop;  return mainloop;
3151  }  }
3152    
3153  #define MAX_N_CHARS 3  #define MAX_N_CHARS 16
3154    #define MAX_N_BYTES 8
3155    
3156  static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common, BOOL firstline)  static SLJIT_INLINE void add_prefix_byte(pcre_uint8 byte, pcre_uint8 *bytes)
3157  {  {
3158  DEFINE_COMPILER;  pcre_uint8 len = bytes[0];
3159  struct sljit_label *start;  int i;
 struct sljit_jump *quit;  
 pcre_uint32 chars[MAX_N_CHARS * 2];  
 pcre_uchar *cc = common->start + 1 + LINK_SIZE;  
 int location = 0;  
 pcre_int32 len, c, bit, caseless;  
 int must_stop;  
3160    
3161  /* We do not support alternatives now. */  if (len == 255)
3162  if (*(common->start + GET(common->start, 1)) == OP_ALT)    return;
   return FALSE;  
3163    
3164    if (len == 0)
3165      {
3166      bytes[0] = 1;
3167      bytes[1] = byte;
3168      return;
3169      }
3170    
3171    for (i = len; i > 0; i--)
3172      if (bytes[i] == byte)
3173        return;
3174    
3175    if (len >= MAX_N_BYTES - 1)
3176      {
3177      bytes[0] = 255;
3178      return;
3179      }
3180    
3181    len++;
3182    bytes[len] = byte;
3183    bytes[0] = len;
3184    }
3185    
3186    static int scan_prefix(compiler_common *common, pcre_uchar *cc, pcre_uint32 *chars, pcre_uint8 *bytes, int max_chars)
3187    {
3188    /* Recursive function, which scans prefix literals. */
3189    BOOL last, any, caseless;
3190    int len, repeat, len_save, consumed = 0;
3191    pcre_uint32 chr, mask;
3192    pcre_uchar *alternative, *cc_save, *oc;
3193    #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3194    pcre_uchar othercase[8];
3195    #elif defined SUPPORT_UTF && defined COMPILE_PCRE16
3196    pcre_uchar othercase[2];
3197    #else
3198    pcre_uchar othercase[1];
3199    #endif
3200    
3201    repeat = 1;
3202  while (TRUE)  while (TRUE)
3203    {    {
3204    caseless = 0;    last = TRUE;
3205    must_stop = 1;    any = FALSE;
3206    switch(*cc)    caseless = FALSE;
3207      switch (*cc)
3208      {      {
     case OP_CHAR:  
     must_stop = 0;  
     cc++;  
     break;  
   
3209      case OP_CHARI:      case OP_CHARI:
3210      caseless = 1;      caseless = TRUE;
3211      must_stop = 0;      case OP_CHAR:
3212        last = FALSE;
3213      cc++;      cc++;
3214      break;      break;
3215    
# Line 2728  while (TRUE) Line 3228  while (TRUE)
3228      cc++;      cc++;
3229      continue;      continue;
3230    
3231        case OP_ASSERT:
3232        case OP_ASSERT_NOT:
3233        case OP_ASSERTBACK:
3234        case OP_ASSERTBACK_NOT:
3235        cc = bracketend(cc);
3236        continue;
3237    
3238        case OP_PLUSI:
3239        case OP_MINPLUSI:
3240        case OP_POSPLUSI:
3241        caseless = TRUE;
3242      case OP_PLUS:      case OP_PLUS:
3243      case OP_MINPLUS:      case OP_MINPLUS:
3244      case OP_POSPLUS:      case OP_POSPLUS:
3245      cc++;      cc++;
3246      break;      break;
3247    
3248        case OP_EXACTI:
3249        caseless = TRUE;
3250      case OP_EXACT:      case OP_EXACT:
3251        repeat = GET2(cc, 1);
3252        last = FALSE;
3253      cc += 1 + IMM2_SIZE;      cc += 1 + IMM2_SIZE;
3254      break;      break;
3255    
3256      case OP_PLUSI:      case OP_QUERYI:
3257      case OP_MINPLUSI:      case OP_MINQUERYI:
3258      case OP_POSPLUSI:      case OP_POSQUERYI:
3259      caseless = 1;      caseless = TRUE;
3260        case OP_QUERY:
3261        case OP_MINQUERY:
3262        case OP_POSQUERY:
3263        len = 1;
3264      cc++;      cc++;
3265    #ifdef SUPPORT_UTF
3266        if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
3267    #endif
3268        max_chars = scan_prefix(common, cc + len, chars, bytes, max_chars);
3269        if (max_chars == 0)
3270          return consumed;
3271        last = FALSE;
3272      break;      break;
3273    
3274      case OP_EXACTI:      case OP_KET:
3275      caseless = 1;      cc += 1 + LINK_SIZE;
3276        continue;
3277    
3278        case OP_ALT:
3279        cc += GET(cc, 1);
3280        continue;
3281    
3282        case OP_ONCE:
3283        case OP_ONCE_NC:
3284        case OP_BRA:
3285        case OP_BRAPOS:
3286        case OP_CBRA:
3287        case OP_CBRAPOS:
3288        alternative = cc + GET(cc, 1);
3289        while (*alternative == OP_ALT)
3290          {
3291          max_chars = scan_prefix(common, alternative + 1 + LINK_SIZE, chars, bytes, max_chars);
3292          if (max_chars == 0)
3293            return consumed;
3294          alternative += GET(alternative, 1);
3295          }
3296    
3297        if (*cc == OP_CBRA || *cc == OP_CBRAPOS)
3298          cc += IMM2_SIZE;
3299        cc += 1 + LINK_SIZE;
3300        continue;
3301    
3302        case OP_CLASS:
3303    #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3304        if (common->utf && !is_char7_bitset((const pcre_uint8 *)(cc + 1), FALSE)) return consumed;
3305    #endif
3306        any = TRUE;
3307        cc += 1 + 32 / sizeof(pcre_uchar);
3308        break;
3309    
3310        case OP_NCLASS:
3311    #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3312        if (common->utf) return consumed;
3313    #endif
3314        any = TRUE;
3315        cc += 1 + 32 / sizeof(pcre_uchar);
3316        break;
3317    
3318    #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3319        case OP_XCLASS:
3320    #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3321        if (common->utf) return consumed;
3322    #endif
3323        any = TRUE;
3324        cc += GET(cc, 1);
3325        break;
3326    #endif
3327    
3328        case OP_DIGIT:
3329    #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3330        if (common->utf && !is_char7_bitset((const pcre_uint8 *)common->ctypes - cbit_length + cbit_digit, FALSE))
3331          return consumed;
3332    #endif
3333        any = TRUE;
3334        cc++;
3335        break;
3336    
3337        case OP_WHITESPACE:
3338    #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3339        if (common->utf && !is_char7_bitset((const pcre_uint8 *)common->ctypes - cbit_length + cbit_space, FALSE))
3340          return consumed;
3341    #endif
3342        any = TRUE;
3343        cc++;
3344        break;
3345    
3346        case OP_WORDCHAR:
3347    #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3348        if (common->utf && !is_char7_bitset((const pcre_uint8 *)common->ctypes - cbit_length + cbit_word, FALSE))
3349          return consumed;
3350    #endif
3351        any = TRUE;
3352        cc++;
3353        break;
3354    
3355        case OP_NOT:
3356        case OP_NOTI:
3357        cc++;
3358        /* Fall through. */
3359        case OP_NOT_DIGIT:
3360        case OP_NOT_WHITESPACE:
3361        case OP_NOT_WORDCHAR:
3362        case OP_ANY:
3363        case OP_ALLANY:
3364    #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3365        if (common->utf) return consumed;
3366    #endif
3367        any = TRUE;
3368        cc++;
3369        break;
3370    
3371    #ifdef SUPPORT_UCP
3372        case OP_NOTPROP:
3373        case OP_PROP:
3374    #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3375        if (common->utf) return consumed;
3376    #endif
3377        any = TRUE;
3378        cc += 1 + 2;
3379        break;
3380    #endif
3381    
3382        case OP_TYPEEXACT:
3383        repeat = GET2(cc, 1);
3384      cc += 1 + IMM2_SIZE;      cc += 1 + IMM2_SIZE;
3385        continue;
3386    
3387        case OP_NOTEXACT:
3388        case OP_NOTEXACTI:
3389    #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3390        if (common->utf) return consumed;
3391    #endif
3392        any = TRUE;
3393        repeat = GET2(cc, 1);
3394        cc += 1 + IMM2_SIZE + 1;
3395      break;      break;
3396    
3397      default:      default:
3398      must_stop = 2;      return consumed;
     break;  
3399      }      }
3400    
3401    if (must_stop == 2)    if (any)
3402        break;      {
3403    #if defined COMPILE_PCRE8
3404        mask = 0xff;
3405    #elif defined COMPILE_PCRE16
3406        mask = 0xffff;
3407    #elif defined COMPILE_PCRE32
3408        mask = 0xffffffff;
3409    #else
3410        SLJIT_ASSERT_STOP();
3411    #endif
3412    
3413        do
3414          {
3415          chars[0] = mask;
3416          chars[1] = mask;
3417          bytes[0] = 255;
3418    
3419          consumed++;
3420          if (--max_chars == 0)
3421            return consumed;
3422          chars += 2;
3423          bytes += MAX_N_BYTES;
3424          }
3425        while (--repeat > 0);
3426    
3427        repeat = 1;
3428        continue;
3429        }
3430    
3431    len = 1;    len = 1;
3432  #ifdef SUPPORT_UTF  #ifdef SUPPORT_UTF
3433    if (common->utf && HAS_EXTRALEN(cc[0])) len += GET_EXTRALEN(cc[0]);    if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
3434  #endif  #endif
3435    
3436    if (caseless && char_has_othercase(common, cc))    if (caseless && char_has_othercase(common, cc))
3437      {      {
3438      caseless = char_get_othercase_bit(common, cc);  #ifdef SUPPORT_UTF
3439      if (caseless == 0)      if (common->utf)
3440        return FALSE;        {
3441  #ifdef COMPILE_PCRE8        GETCHAR(chr, cc);
3442      caseless = ((caseless & 0xff) << 8) | (len - (caseless >> 8));        if ((int)PRIV(ord2utf)(char_othercase(common, chr), othercase) != len)
3443  #else          return consumed;
3444      if ((caseless & 0x100) != 0)        }
       caseless = ((caseless & 0xff) << 16) | (len - (caseless >> 9));  
3445      else      else
       caseless = ((caseless & 0xff) << 8) | (len - (caseless >> 9));  
3446  #endif  #endif
3447          {
3448          chr = *cc;
3449          othercase[0] = TABLE_GET(chr, common->fcc, chr);
3450          }
3451      }      }
3452    else    else
3453      caseless = 0;      caseless = FALSE;
3454    
3455    while (len > 0 && location < MAX_N_CHARS * 2)    len_save = len;
3456      cc_save = cc;
3457      while (TRUE)
3458      {      {
3459      c = *cc;      oc = othercase;
3460      bit = 0;      do
     if (len == (caseless & 0xff))  
3461        {        {
3462        bit = caseless >> 8;        chr = *cc;
3463        c |= bit;  #ifdef COMPILE_PCRE32
3464          if (SLJIT_UNLIKELY(chr == NOTACHAR))
3465            return consumed;
3466    #endif
3467          add_prefix_byte((pcre_uint8)chr, bytes);
3468    
3469          mask = 0;
3470          if (caseless)
3471            {
3472            add_prefix_byte((pcre_uint8)*oc, bytes);
3473            mask = *cc ^ *oc;
3474            chr |= mask;
3475            }
3476    
3477    #ifdef COMPILE_PCRE32
3478          if (chars[0] == NOTACHAR && chars[1] == 0)
3479    #else
3480          if (chars[0] == NOTACHAR)
3481    #endif
3482            {
3483            chars[0] = chr;
3484            chars[1] = mask;
3485            }
3486          else
3487            {
3488            mask |= chars[0] ^ chr;
3489            chr |= mask;
3490            chars[0] = chr;
3491            chars[1] |= mask;
3492            }
3493    
3494          len--;
3495          consumed++;
3496          if (--max_chars == 0)
3497            return consumed;
3498          chars += 2;
3499          bytes += MAX_N_BYTES;
3500          cc++;
3501          oc++;
3502        }        }
3503        while (len > 0);
3504    
3505      chars[location] = c;      if (--repeat == 0)
3506      chars[location + 1] = bit;        break;
3507    
3508      len--;      len = len_save;
3509      location += 2;      cc = cc_save;
3510      cc++;      }
3511    
3512      repeat = 1;
3513      if (last)
3514        return consumed;
3515      }
3516    }
3517    
3518    static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common, BOOL firstline)
3519    {
3520    DEFINE_COMPILER;
3521    struct sljit_label *start;
3522    struct sljit_jump *quit;
3523    pcre_uint32 chars[MAX_N_CHARS * 2];
3524    pcre_uint8 bytes[MAX_N_CHARS * MAX_N_BYTES];
3525    pcre_uint8 ones[MAX_N_CHARS];
3526    int offsets[3];
3527    pcre_uint32 mask;
3528    pcre_uint8 *byte_set, *byte_set_end;
3529    int i, max, from;
3530    int range_right = -1, range_len = 3 - 1;
3531    sljit_ub *update_table = NULL;
3532    BOOL in_range;
3533    
3534    /* This is even TRUE, if both are NULL. */
3535    SLJIT_ASSERT(common->read_only_data_ptr == common->read_only_data);
3536    
3537    for (i = 0; i < MAX_N_CHARS; i++)
3538      {
3539      chars[i << 1] = NOTACHAR;
3540      chars[(i << 1) + 1] = 0;
3541      bytes[i * MAX_N_BYTES] = 0;
3542      }
3543    
3544    max = scan_prefix(common, common->start, chars, bytes, MAX_N_CHARS);
3545    
3546    if (max <= 1)
3547      return FALSE;
3548    
3549    for (i = 0; i < max; i++)
3550      {
3551      mask = chars[(i << 1) + 1];
3552      ones[i] = ones_in_half_byte[mask & 0xf];
3553      mask >>= 4;
3554      while (mask != 0)
3555        {
3556        ones[i] += ones_in_half_byte[mask & 0xf];
3557        mask >>= 4;
3558      }      }
3559      }
3560    
3561    if (location >= MAX_N_CHARS * 2 || must_stop != 0)  in_range = FALSE;
3562    from = 0;   /* Prevent compiler "uninitialized" warning */
3563    for (i = 0; i <= max; i++)
3564      {
3565      if (in_range && (i - from) > range_len && (bytes[(i - 1) * MAX_N_BYTES] <= 4))
3566        {
3567        range_len = i - from;
3568        range_right = i - 1;
3569        }
3570    
3571      if (i < max && bytes[i * MAX_N_BYTES] < 255)
3572        {
3573        if (!in_range)
3574          {
3575          in_range = TRUE;
3576          from = i;
3577          }
3578        }
3579      else if (in_range)
3580        in_range = FALSE;
3581      }
3582    
3583    if (range_right >= 0)
3584      {
3585      /* Since no data is consumed (see the assert in the beginning
3586      of this function), this space can be reallocated. */
3587      if (common->read_only_data)
3588        SLJIT_FREE(common->read_only_data);
3589    
3590      common->read_only_data_size += 256;
3591      common->read_only_data = (sljit_uw *)SLJIT_MALLOC(common->read_only_data_size);
3592      if (common->read_only_data == NULL)
3593        return TRUE;
3594    
3595      update_table = (sljit_ub *)common->read_only_data;
3596      common->read_only_data_ptr = (sljit_uw *)(update_table + 256);
3597      memset(update_table, IN_UCHARS(range_len), 256);
3598    
3599      for (i = 0; i < range_len; i++)
3600        {
3601        byte_set = bytes + ((range_right - i) * MAX_N_BYTES);
3602        SLJIT_ASSERT(byte_set[0] > 0 && byte_set[0] < 255);
3603        byte_set_end = byte_set + byte_set[0];
3604        byte_set++;
3605        while (byte_set <= byte_set_end)
3606          {
3607          if (update_table[*byte_set] > IN_UCHARS(i))
3608            update_table[*byte_set] = IN_UCHARS(i);
3609          byte_set++;
3610          }
3611        }
3612      }
3613    
3614    offsets[0] = -1;
3615    /* Scan forward. */
3616    for (i = 0; i < max; i++)
3617      if (ones[i] <= 2) {
3618        offsets[0] = i;
3619      break;      break;
3620    }    }
3621    
3622  /* At least two characters are required. */  if (offsets[0] < 0 && range_right < 0)
3623  if (location < 2 * 2)    return FALSE;
3624    
3625    if (offsets[0] >= 0)
3626      {
3627      /* Scan backward. */
3628      offsets[1] = -1;
3629      for (i = max - 1; i > offsets[0]; i--)
3630        if (ones[i] <= 2 && i != range_right)
3631          {
3632          offsets[1] = i;
3633          break;
3634          }
3635    
3636      /* This case is handled better by fast_forward_first_char. */
3637      if (offsets[1] == -1 && offsets[0] == 0 && range_right < 0)
3638      return FALSE;      return FALSE;
3639    
3640      offsets[2] = -1;
3641      /* We only search for a middle character if there is no range check. */
3642      if (offsets[1] >= 0 && range_right == -1)
3643        {
3644        /* Scan from middle. */
3645        for (i = (offsets[0] + offsets[1]) / 2 + 1; i < offsets[1]; i++)
3646          if (ones[i] <= 2)
3647            {
3648            offsets[2] = i;
3649            break;
3650            }
3651    
3652        if (offsets[2] == -1)
3653          {
3654          for (i = (offsets[0] + offsets[1]) / 2; i > offsets[0]; i--)
3655            if (ones[i] <= 2)
3656              {
3657              offsets[2] = i;
3658              break;
3659              }
3660          }
3661        }
3662    
3663      SLJIT_ASSERT(offsets[1] == -1 || (offsets[0] < offsets[1]));
3664      SLJIT_ASSERT(offsets[2] == -1 || (offsets[0] < offsets[2] && offsets[1] > offsets[2]));
3665    
3666      chars[0] = chars[offsets[0] << 1];
3667      chars[1] = chars[(offsets[0] << 1) + 1];
3668      if (offsets[2] >= 0)
3669        {
3670        chars[2] = chars[offsets[2] << 1];
3671        chars[3] = chars[(offsets[2] << 1) + 1];
3672        }
3673      if (offsets[1] >= 0)
3674        {
3675        chars[4] = chars[offsets[1] << 1];
3676        chars[5] = chars[(offsets[1] << 1) + 1];
3677        }
3678      }
3679    
3680    max -= 1;
3681  if (firstline)  if (firstline)
3682    {    {
3683    SLJIT_ASSERT(common->first_line_end != 0);    SLJIT_ASSERT(common->first_line_end != 0);
3684      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
3685    OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);    OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
3686    OP2(SLJIT_SUB, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, SLJIT_IMM, IN_UCHARS((location >> 1) - 1));    OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
3687      quit = CMP(SLJIT_C_LESS_EQUAL, STR_END, 0, TMP1, 0);
3688      OP1(SLJIT_MOV, STR_END, 0, TMP1, 0);
3689      JUMPHERE(quit);
3690    }    }
3691  else  else
3692    OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS((location >> 1) - 1));    OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
3693    
3694    #if !(defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
3695    if (range_right >= 0)
3696      OP1(SLJIT_MOV, RETURN_ADDR, 0, SLJIT_IMM, (sljit_sw)update_table);
3697    #endif
3698    
3699  start = LABEL();  start = LABEL();
3700  quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);  quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3701    
3702  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));  SLJIT_ASSERT(range_right >= 0 || offsets[0] >= 0);
3703  OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));  
3704  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));  if (range_right >= 0)
3705  if (chars[1] != 0)    {
3706    OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[1]);  #if defined COMPILE_PCRE8 || (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN)
3707  CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[0], start);    OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right));
3708  if (location > 2 * 2)  #else
3709    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));    OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right + 1) - 1);
3710  if (chars[3] != 0)  #endif
3711    OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, chars[3]);  
3712  CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, chars[2], start);  #if !(defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
3713  if (location > 2 * 2)    OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(RETURN_ADDR, TMP1), 0);
3714    {  #else
3715    if (chars[5] != 0)    OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)update_table);
3716      OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[5]);  #endif
3717    CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[4], start);    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3718      CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, start);
3719      }
3720    
3721    if (offsets[0] >= 0)
3722      {
3723      OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[0]));
3724      if (offsets[1] >= 0)
3725        OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[1]));
3726      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3727    
3728      if (chars[1] != 0)
3729        OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[1]);
3730      CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[0], start);
3731      if (offsets[2] >= 0)
3732        OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[2] - 1));
3733    
3734      if (offsets[1] >= 0)
3735        {
3736        if (chars[5] != 0)
3737          OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, chars[5]);
3738        CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, chars[4], start);
3739        }
3740    
3741      if (offsets[2] >= 0)
3742        {
3743        if (chars[3] != 0)
3744          OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[3]);
3745        CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[2], start);
3746        }
3747      OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3748    }    }
 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));  
3749    
3750  JUMPHERE(quit);  JUMPHERE(quit);
3751    
3752  if (firstline)  if (firstline)
3753      {
3754      if (range_right >= 0)
3755        OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
3756    OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);    OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3757      if (range_right >= 0)
3758        {
3759        quit = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
3760        OP1(SLJIT_MOV, STR_PTR, 0, TMP1, 0);
3761        JUMPHERE(quit);
3762        }
3763      }
3764  else  else
3765    OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS((location >> 1) - 1));    OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
3766  return TRUE;  return TRUE;
3767  }  }
3768    
3769  #undef MAX_N_CHARS  #undef MAX_N_CHARS
3770    #undef MAX_N_BYTES
3771    
3772  static SLJIT_INLINE void fast_forward_first_char(compiler_common *common, pcre_uchar first_char, BOOL caseless, BOOL firstline)  static SLJIT_INLINE void fast_forward_first_char(compiler_common *common, pcre_uchar first_char, BOOL caseless, BOOL firstline)
3773  {  {
# Line 2952  if (common->nltype == NLTYPE_FIXED && co Line 3873  if (common->nltype == NLTYPE_FIXED && co
3873    JUMPHERE(lastchar);    JUMPHERE(lastchar);
3874    
3875    if (firstline)    if (firstline)
3876      OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);      OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3877    return;    return;
3878    }    }
3879    
# Line 2962  firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_ Line 3883  firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_
3883  skip_char_back(common);  skip_char_back(common);
3884    
3885  loop = LABEL();  loop = LABEL();
3886  read_char(common);  common->ff_newline_shortcut = loop;
3887    
3888    read_char_range(common, common->nlmin, common->nlmax, TRUE);
3889  lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);  lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3890  if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)  if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
3891    foundcr = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);    foundcr = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
# Line 2991  if (firstline) Line 3914  if (firstline)
3914    OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);    OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3915  }  }
3916    
3917  static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, jump_list **backtracks);  static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks);
3918    
3919  static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, sljit_uw start_bits, BOOL firstline)  static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, pcre_uint8 *start_bits, BOOL firstline)
3920  {  {
3921  DEFINE_COMPILER;  DEFINE_COMPILER;
3922  struct sljit_label *start;  struct sljit_label *start;
3923  struct sljit_jump *quit;  struct sljit_jump *quit;
3924  struct sljit_jump *found = NULL;  struct sljit_jump *found = NULL;
3925  jump_list *matches = NULL;  jump_list *matches = NULL;
 pcre_uint8 inverted_start_bits[32];  
 int i;  
3926  #ifndef COMPILE_PCRE8  #ifndef COMPILE_PCRE8
3927  struct sljit_jump *jump;  struct sljit_jump *jump;
3928  #endif  #endif
3929    
 for (i = 0; i < 32; ++i)  
   inverted_start_bits[i] = ~(((pcre_uint8*)start_bits)[i]);  
   
3930  if (firstline)  if (firstline)
3931    {    {
3932    SLJIT_ASSERT(common->first_line_end != 0);    SLJIT_ASSERT(common->first_line_end != 0);
# Line 3024  if (common->utf) Line 3942  if (common->utf)
3942    OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);    OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
3943  #endif  #endif
3944    
3945  if (!check_class_ranges(common, inverted_start_bits, (inverted_start_bits[31] & 0x80) != 0, &matches))  if (!check_class_ranges(common, start_bits, (start_bits[31] & 0x80) != 0, TRUE, &matches))
3946    {    {
3947  #ifndef COMPILE_PCRE8  #ifndef COMPILE_PCRE8
3948    jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 255);    jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 255);
# Line 3033  if (!check_class_ranges(common, inverted Line 3951  if (!check_class_ranges(common, inverted
3951  #endif  #endif
3952    OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);    OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
3953    OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);    OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
3954    OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), start_bits);    OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)start_bits);
3955    OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);    OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
3956    OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);    OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
3957    found = JUMP(SLJIT_C_NOT_ZERO);    found = JUMP(SLJIT_C_NOT_ZERO);
# Line 3236  JUMPHERE(skipread); Line 4154  JUMPHERE(skipread);
4154    
4155  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
4156  check_str_end(common, &skipread_list);  check_str_end(common, &skipread_list);
4157  peek_char(common);  peek_char(common, READ_CHAR_MAX);
4158    
4159  /* Testing char type. This is a code duplication. */  /* Testing char type. This is a code duplication. */
4160  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
# Line 3282  OP2(SLJIT_XOR | SLJIT_SET_E, SLJIT_UNUSE Line 4200  OP2(SLJIT_XOR | SLJIT_SET_E, SLJIT_UNUSE
4200  sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);  sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
4201  }  }
4202    
4203  /*  static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
   range format:  
   
   ranges[0] = length of the range (max MAX_RANGE_SIZE, -1 means invalid range).  
   ranges[1] = first bit (0 or 1)  
   ranges[2-length] = position of the bit change (when the current bit is not equal to the previous)  
 */  
   
 static BOOL check_ranges(compiler_common *common, int *ranges, jump_list **backtracks, BOOL readch)  
4204  {  {
4205  DEFINE_COMPILER;  DEFINE_COMPILER;
4206  struct sljit_jump *jump;  int ranges[MAX_RANGE_SIZE];
   
 if (ranges[0] < 0)  
   return FALSE;  
   
 switch(ranges[0])  
   {  
   case 1:  
   if (readch)  
     read_char(common);  
   add_jump(compiler, backtracks, CMP(ranges[1] == 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));  
   return TRUE;  
   
   case 2:  
   if (readch)  
     read_char(common);  
   OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);  
   add_jump(compiler, backtracks, CMP(ranges[1] != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));  
   return TRUE;  
   
   case 4:  
   if (ranges[2] + 1 == ranges[3] && ranges[4] + 1 == ranges[5])  
     {  
     if (readch)  
       read_char(common);  
     if (ranges[1] != 0)  
       {  
       add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));  
       add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[4]));  
       }  
     else  
       {  
       jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]);  
       add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[4]));  
       JUMPHERE(jump);  
       }  
     return TRUE;  
     }  
   if ((ranges[3] - ranges[2]) == (ranges[5] - ranges[4]) && is_powerof2(ranges[4] - ranges[2]))  
     {  
     if (readch)  
       read_char(common);  
     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[4] - ranges[2]);  
     OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[4]);  
     add_jump(compiler, backtracks, CMP(ranges[1] != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[5] - ranges[4]));  
     return TRUE;  
     }  
   return FALSE;  
   
   default:  
   return FALSE;  
   }  
 }  
   
 static void get_ctype_ranges(compiler_common *common, int flag, int *ranges)  
 {  
 int i, bit, length;  
 const pcre_uint8 *ctypes = (const pcre_uint8*)common->ctypes;  
   
 bit = ctypes[0] & flag;  
 ranges[0] = -1;  
 ranges[1] = bit != 0 ? 1 : 0;  
 length = 0;  
   
 for (i = 1; i < 256; i++)  
   if ((ctypes[i] & flag) != bit)  
     {  
     if (length >= MAX_RANGE_SIZE)  
       return;  
     ranges[2 + length] = i;  
     length++;  
     bit ^= flag;  
     }  
   
 if (bit != 0)  
   {  
   if (length >= MAX_RANGE_SIZE)  
     return;  
   ranges[2 + length] = 256;  
   length++;  
   }  
 ranges[0] = length;  
 }  
   
 static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, jump_list **backtracks)  
 {  
 int ranges[2 + MAX_RANGE_SIZE];  
4207  pcre_uint8 bit, cbit, all;  pcre_uint8 bit, cbit, all;
4208  int i, byte, length = 0;  int i, byte, length = 0;
4209    
4210  bit = bits[0] & 0x1;  bit = bits[0] & 0x1;
4211  ranges[1] = bit;  /* All bits will be zero or one (since bit is zero or one). */
 /* Can be 0 or 255. */  
4212  all = -bit;  all = -bit;
4213    
4214  for (i = 0; i < 256; )  for (i = 0; i < 256; )
# Line 3400  for (i = 0; i < 256; ) Line 4223  for (i = 0; i < 256; )
4223        {        {
4224        if (length >= MAX_RANGE_SIZE)        if (length >= MAX_RANGE_SIZE)
4225          return FALSE;          return FALSE;
4226        ranges[2 + length] = i;        ranges[length] = i;
4227        length++;        length++;
4228        bit = cbit;        bit = cbit;
4229        all = -cbit;        all = -cbit;
# Line 3413  if (((bit == 0) && nclass) || ((bit == 1 Line 4236  if (((bit == 0) && nclass) || ((bit == 1
4236    {    {
4237    if (length >= MAX_RANGE_SIZE)    if (length >= MAX_RANGE_SIZE)
4238      return FALSE;      return FALSE;
4239    ranges[2 + length] = 256;    ranges[length] = 256;
4240    length++;    length++;
4241    }    }
 ranges[0] = length;  
4242    
4243  return check_ranges(common, ranges, backtracks, FALSE);  if (length < 0 || length > 4)
4244      return FALSE;
4245    
4246    bit = bits[0] & 0x1;
4247    if (invert) bit ^= 0x1;
4248    
4249    /* No character is accepted. */
4250    if (length == 0 && bit == 0)
4251      add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4252    
4253    switch(length)
4254      {
4255      case 0:
4256      /* When bit != 0, all characters are accepted. */
4257      return TRUE;
4258    
4259      case 1:
4260      add_jump(compiler, backtracks, CMP(bit == 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
4261      return TRUE;
4262    
4263      case 2:
4264      if (ranges[0] + 1 != ranges[1])
4265        {
4266        OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
4267        add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
4268        }
4269      else
4270        add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
4271      return TRUE;
4272    
4273      case 3:
4274      if (bit != 0)
4275        {
4276        add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
4277        if (ranges[0] + 1 != ranges[1])
4278          {
4279          OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
4280          add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
4281          }
4282        else
4283          add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
4284        return TRUE;
4285        }
4286    
4287      add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[0]));
4288      if (ranges[1] + 1 != ranges[2])
4289        {
4290        OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1]);
4291        add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
4292        }
4293      else
4294        add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1]));
4295      return TRUE;
4296    
4297      case 4:
4298      if ((ranges[1] - ranges[0]) == (ranges[3] - ranges[2])
4299          && (ranges[0] | (ranges[2] - ranges[0])) == ranges[2]
4300          && is_powerof2(ranges[2] - ranges[0]))
4301        {
4302        OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[0]);
4303        if (ranges[2] + 1 != ranges[3])
4304          {
4305          OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);
4306          add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
4307          }
4308        else
4309          add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
4310        return TRUE;
4311        }
4312    
4313      if (bit != 0)
4314        {
4315        i = 0;
4316        if (ranges[0] + 1 != ranges[1])
4317          {
4318          OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
4319          add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
4320          i = ranges[0];
4321          }
4322        else
4323          add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
4324    
4325        if (ranges[2] + 1 != ranges[3])
4326          {
4327          OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - i);
4328          add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
4329          }
4330        else
4331          add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2] - i));
4332        return TRUE;
4333        }
4334    
4335      OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
4336      add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[0]));
4337      if (ranges[1] + 1 != ranges[2])
4338        {
4339        OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]);
4340        add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
4341        }
4342      else
4343        add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
4344      return TRUE;
4345    
4346      default:
4347      SLJIT_ASSERT_STOP();
4348      return FALSE;
4349      }
4350  }  }
4351    
4352  static void check_anynewline(compiler_common *common)  static void check_anynewline(compiler_common *common)
# Line 3785  return cc; Line 4713  return cc;
4713  #define SET_TYPE_OFFSET(value) \  #define SET_TYPE_OFFSET(value) \
4714    if ((value) != typeoffset) \    if ((value) != typeoffset) \
4715      { \      { \
4716      if ((value) > typeoffset) \      if ((value) < typeoffset) \
       OP2(SLJIT_SUB, typereg, 0, typereg, 0, SLJIT_IMM, (value) - typeoffset); \  
     else \  
4717        OP2(SLJIT_ADD, typereg, 0, typereg, 0, SLJIT_IMM, typeoffset - (value)); \        OP2(SLJIT_ADD, typereg, 0, typereg, 0, SLJIT_IMM, typeoffset - (value)); \
4718        else \
4719          OP2(SLJIT_SUB, typereg, 0, typereg, 0, SLJIT_IMM, (value) - typeoffset); \
4720      } \      } \
4721    typeoffset = (value);    typeoffset = (value);
4722    
4723  #define SET_CHAR_OFFSET(value) \  #define SET_CHAR_OFFSET(value) \
4724    if ((value) != charoffset) \    if ((value) != charoffset) \
4725      { \      { \
4726      if ((value) > charoffset) \      if ((value) < charoffset) \
4727        OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (value) - charoffset); \        OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(charoffset - (value))); \
4728      else \      else \
4729        OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, charoffset - (value)); \        OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)((value) - charoffset)); \
4730      } \      } \
4731    charoffset = (value);    charoffset = (value);
4732    
# Line 3806  static void compile_xclass_matchingpath( Line 4734  static void compile_xclass_matchingpath(
4734  {  {
4735  DEFINE_COMPILER;  DEFINE_COMPILER;
4736  jump_list *found = NULL;  jump_list *found = NULL;
4737  jump_list **list = (*cc & XCL_NOT) == 0 ? &found : backtracks;  jump_list **list = (cc[0] & XCL_NOT) == 0 ? &found : backtracks;
4738  pcre_int32 c, charoffset;  sljit_uw c, charoffset, max = 256, min = READ_CHAR_MAX;
 const pcre_uint32 *other_cases;  
4739  struct sljit_jump *jump = NULL;  struct sljit_jump *jump = NULL;
4740  pcre_uchar *ccbegin;  pcre_uchar *ccbegin;
4741  int compares, invertcmp, numberofcmps;  int compares, invertcmp, numberofcmps;
4742    #if defined SUPPORT_UTF && (defined COMPILE_PCRE8 || defined COMPILE_PCRE16)
4743    BOOL utf = common->utf;
4744    #endif
4745    
4746  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
4747  BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE;  BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE;
4748  BOOL charsaved = FALSE;  BOOL charsaved = FALSE;
4749  int typereg = TMP1, scriptreg = TMP1;  int typereg = TMP1, scriptreg = TMP1;
4750  pcre_int32 typeoffset;  const pcre_uint32 *other_cases;
4751    sljit_uw typeoffset;
4752  #endif  #endif
4753    
4754  /* Although SUPPORT_UTF must be defined, we are  /* Scanning the necessary info. */
4755     not necessary in utf mode even in 8 bit mode. */  cc++;
4756  detect_partial_match(common, backtracks);  ccbegin = cc;
4757  read_char(common);  compares = 0;
4758    if (cc[-1] & XCL_MAP)
 if ((*cc++ & XCL_MAP) != 0)  
4759    {    {
4760    OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);    min = 0;
 #ifndef COMPILE_PCRE8  
   jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);  
 #elif defined SUPPORT_UTF  
   if (common->utf)  
     jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);  
 #endif  
   
   if (!check_class_ranges(common, (const pcre_uint8 *)cc, TRUE, list))  
     {  
     OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);  
     OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);  
     OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);  
     OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);  
     OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);  
     add_jump(compiler, list, JUMP(SLJIT_C_NOT_ZERO));  
     }  
   
 #ifndef COMPILE_PCRE8  
   JUMPHERE(jump);  
 #elif defined SUPPORT_UTF  
   if (common->utf)  
     JUMPHERE(jump);  
 #endif  
   OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);  
 #ifdef SUPPORT_UCP  
   charsaved = TRUE;  
 #endif  
4761    cc += 32 / sizeof(pcre_uchar);    cc += 32 / sizeof(pcre_uchar);
4762    }    }
4763    
 /* Scanning the necessary info. */  
 ccbegin = cc;  
 compares = 0;  
4764  while (*cc != XCL_END)  while (*cc != XCL_END)
4765    {    {
4766    compares++;    compares++;
4767    if (*cc == XCL_SINGLE)    if (*cc == XCL_SINGLE)
4768      {      {
4769      cc += 2;      cc ++;
4770  #ifdef SUPPORT_UTF      GETCHARINCTEST(c, cc);
4771      if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);      if (c > max) max = c;
4772  #endif      if (c < min) min = c;
4773  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
4774      needschar = TRUE;      needschar = TRUE;
4775  #endif  #endif
4776      }      }
4777    else if (*cc == XCL_RANGE)    else if (*cc == XCL_RANGE)
4778      {      {
4779      cc += 2;      cc ++;
4780  #ifdef SUPPORT_UTF      GETCHARINCTEST(c, cc);
4781      if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);      if (c < min) min = c;
4782  #endif      GETCHARINCTEST(c, cc);
4783      cc++;      if (c > max) max = c;
 #ifdef SUPPORT_UTF  
     if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);  
 #endif  
4784  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
4785      needschar = TRUE;      needschar = TRUE;
4786  #endif  #endif
# Line 3892  while (*cc != XCL_END) Line 4790  while (*cc != XCL_END)
4790      {      {
4791      SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);      SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
4792      cc++;      cc++;
4793        if (*cc == PT_CLIST)
4794          {
4795          other_cases = PRIV(ucd_caseless_sets) + cc[1];
4796          while (*other_cases != NOTACHAR)
4797            {
4798            if (*other_cases > max) max = *other_cases;
4799            if (*other_cases < min) min = *other_cases;
4800            other_cases++;
4801            }
4802          }
4803        else
4804          {
4805          max = READ_CHAR_MAX;
4806          min = 0;
4807          }
4808    
4809      switch(*cc)      switch(*cc)
4810        {        {
4811        case PT_ANY:        case PT_ANY:
# Line 3911  while (*cc != XCL_END) Line 4825  while (*cc != XCL_END)
4825        case PT_SPACE:        case PT_SPACE:
4826        case PT_PXSPACE:        case PT_PXSPACE:
4827        case PT_WORD:        case PT_WORD:
4828          case PT_PXGRAPH:
4829          case PT_PXPRINT:
4830          case PT_PXPUNCT:
4831        needstype = TRUE;        needstype = TRUE;
4832        needschar = TRUE;        needschar = TRUE;
4833        break;        break;
# Line 3929  while (*cc != XCL_END) Line 4846  while (*cc != XCL_END)
4846  #endif  #endif
4847    }    }
4848    
4849    /* We are not necessary in utf mode even in 8 bit mode. */
4850    cc = ccbegin;
4851    detect_partial_match(common, backtracks);
4852    read_char_range(common, min, max, (cc[-1] & XCL_NOT) != 0);
4853    
4854    if ((cc[-1] & XCL_HASPROP) == 0)
4855      {
4856      if ((cc[-1] & XCL_MAP) != 0)
4857        {
4858        jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
4859        if (!check_class_ranges(common, (const pcre_uint8 *)cc, (((const pcre_uint8 *)cc)[31] & 0x80) != 0, TRUE, &found))
4860          {
4861          OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
4862          OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
4863          OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
4864          OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4865          OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4866          add_jump(compiler, &found, JUMP(SLJIT_C_NOT_ZERO));
4867          }
4868    
4869        add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4870        JUMPHERE(jump);
4871    
4872        cc += 32 / sizeof(pcre_uchar);
4873        }
4874      else
4875        {
4876        OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, min);
4877        add_jump(compiler, (cc[-1] & XCL_NOT) == 0 ? backtracks : &found, CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, max - min));
4878        }
4879      }
4880    else if ((cc[-1] & XCL_MAP) != 0)
4881      {
4882      OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
4883    #ifdef SUPPORT_UCP
4884      charsaved = TRUE;
4885    #endif
4886      if (!check_class_ranges(common, (const pcre_uint8 *)cc, FALSE, TRUE, list))
4887        {
4888    #ifdef COMPILE_PCRE8
4889        SLJIT_ASSERT(common->utf);
4890    #endif
4891        jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
4892    
4893        OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
4894        OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
4895        OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
4896        OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4897        OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4898        add_jump(compiler, list, JUMP(SLJIT_C_NOT_ZERO));
4899    
4900        JUMPHERE(jump);
4901        }
4902    
4903      OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
4904      cc += 32 / sizeof(pcre_uchar);
4905      }
4906    
4907  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
4908  /* Simple register allocation. TMP1 is preferred if possible. */  /* Simple register allocation. TMP1 is preferred if possible. */
4909  if (needstype || needsscript)  if (needstype || needsscript)
# Line 3970  if (needstype || needsscript) Line 4945  if (needstype || needsscript)
4945  #endif  #endif
4946    
4947  /* Generating code. */  /* Generating code. */
 cc = ccbegin;  
4948  charoffset = 0;  charoffset = 0;
4949  numberofcmps = 0;  numberofcmps = 0;
4950  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
# Line 3986  while (*cc != XCL_END) Line 4960  while (*cc != XCL_END)
4960    if (*cc == XCL_SINGLE)    if (*cc == XCL_SINGLE)
4961      {      {
4962      cc ++;      cc ++;
4963  #ifdef SUPPORT_UTF      GETCHARINCTEST(c, cc);
     if (common->utf)  
       {  
       GETCHARINC(c, cc);  
       }  
     else  
 #endif  
       c = *cc++;  
4964    
4965      if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))      if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
4966        {        {
4967        OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);        OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
4968        OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_EQUAL);        OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_EQUAL);
4969        numberofcmps++;        numberofcmps++;
4970        }        }
4971      else if (numberofcmps > 0)      else if (numberofcmps > 0)
4972        {        {
4973        OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);        OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
4974        OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);        OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4975        jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);        jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4976        numberofcmps = 0;        numberofcmps = 0;
4977        }        }
4978      else      else
4979        {        {
4980        jump = CMP(SLJIT_C_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, c - charoffset);        jump = CMP(SLJIT_C_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
4981        numberofcmps = 0;        numberofcmps = 0;
       }  
     }  
   else if (*cc == XCL_RANGE)  
     {  
     cc ++;  
 #ifdef SUPPORT_UTF  
     if (common->utf)  
       {  
       GETCHARINC(c, cc);  
       }  
     else  
 #endif  
       c = *cc++;  
     SET_CHAR_OFFSET(c);  
 #ifdef SUPPORT_UTF  
     if (common->utf)  
       {  
       GETCHARINC(c, cc);  
4982        }        }
4983      else      }
4984  #endif    else if (*cc == XCL_RANGE)
4985        c = *cc++;      {
4986        cc ++;
4987        GETCHARINCTEST(c, cc);
4988        SET_CHAR_OFFSET(c);
4989        GETCHARINCTEST(c, cc);
4990    
4991      if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))      if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
4992        {        {
4993        OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);        OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
4994        OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_LESS_EQUAL);        OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_LESS_EQUAL);
4995        numberofcmps++;        numberofcmps++;
4996        }        }
4997      else if (numberofcmps > 0)      else if (numberofcmps > 0)
4998        {        {
4999        OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);        OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5000        OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);        OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
5001        jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);        jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
5002        numberofcmps = 0;        numberofcmps = 0;
5003        }        }
5004      else      else
5005        {        {
5006        jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, c - charoffset);        jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5007        numberofcmps = 0;        numberofcmps = 0;
5008        }        }
5009      }      }
# Line 4098  while (*cc != XCL_END) Line 5052  while (*cc != XCL_END)
5052    
5053        case PT_SPACE:        case PT_SPACE:
5054        case PT_PXSPACE:        case PT_PXSPACE:
       if (*cc == PT_SPACE)  
         {  
         OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);  
         jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 11 - charoffset);  
         }  
5055        SET_CHAR_OFFSET(9);        SET_CHAR_OFFSET(9);
5056        OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 13 - 9);        OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd - 0x9);
5057        OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);        OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
5058        if (*cc == PT_SPACE)  
5059          JUMPHERE(jump);        OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x9);
5060          OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
5061    
5062          OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x9);
5063          OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
5064    
5065        SET_TYPE_OFFSET(ucp_Zl);        SET_TYPE_OFFSET(ucp_Zl);
5066        OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl);        OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl);
# Line 4116  while (*cc != XCL_END) Line 5069  while (*cc != XCL_END)
5069        break;        break;
5070    
5071        case PT_WORD:        case PT_WORD:
5072        OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE - charoffset);        OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_UNDERSCORE - charoffset));
5073        OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);        OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
5074        /* Fall through. */        /* Fall through. */
5075    
# Line 4164  while (*cc != XCL_END) Line 5117  while (*cc != XCL_END)
5117          OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[2]);          OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[2]);
5118          OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);          OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
5119    
5120          OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, other_cases[0] - charoffset);          OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(other_cases[0] - charoffset));
5121          OP_FLAGS(SLJIT_OR | ((other_cases[3] == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);          OP_FLAGS(SLJIT_OR | ((other_cases[3] == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
5122    
5123          other_cases += 3;          other_cases += 3;
5124          }          }
5125        else        else
5126          {          {
5127          OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, *other_cases++ - charoffset);          OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
5128          OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);          OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
5129          }          }
5130    
5131        while (*other_cases != NOTACHAR)        while (*other_cases != NOTACHAR)
5132          {          {
5133          OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, *other_cases++ - charoffset);          OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
5134          OP_FLAGS(SLJIT_OR | ((*other_cases == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);          OP_FLAGS(SLJIT_OR | ((*other_cases == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
5135          }          }
5136        jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);        jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
5137        break;        break;
5138    
5139        case PT_UCNC:        case PT_UCNC:
5140        OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_DOLLAR_SIGN - charoffset);        OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_DOLLAR_SIGN - charoffset));
5141        OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);        OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
5142        OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_COMMERCIAL_AT - charoffset);        OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_COMMERCIAL_AT - charoffset));
5143        OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);        OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
5144        OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_GRAVE_ACCENT - charoffset);        OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_GRAVE_ACCENT - charoffset));
5145        OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);        OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
5146    
5147        SET_CHAR_OFFSET(0xa0);        SET_CHAR_OFFSET(0xa0);
5148        OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd7ff - charoffset);        OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(0xd7ff - charoffset));
5149        OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);        OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
5150        SET_CHAR_OFFSET(0);        SET_CHAR_OFFSET(0);
5151        OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xe000 - 0);        OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xe000 - 0);
5152        OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_GREATER_EQUAL);        OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_GREATER_EQUAL);
5153        jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);        jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
5154        break;        break;
5155    
5156          case PT_PXGRAPH:
5157          /* C and Z groups are the farthest two groups. */
5158          SET_TYPE_OFFSET(ucp_Ll);
5159          OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
5160          OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_GREATER);
5161    
5162          jump = CMP(SLJIT_C_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
5163    
5164          /* In case of ucp_Cf, we overwrite the result. */
5165          SET_CHAR_OFFSET(0x2066);
5166          OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
5167          OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
5168    
5169          OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
5170          OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
5171    
5172          OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x2066);
5173          OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
5174    
5175          JUMPHERE(jump);
5176          jump = CMP(SLJIT_C_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
5177          break;
5178    
5179          case PT_PXPRINT:
5180          /* C and Z groups are the farthest two groups. */
5181          SET_TYPE_OFFSET(ucp_Ll);
5182          OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
5183          OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_GREATER);
5184    
5185          OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Ll);
5186          OP_FLAGS(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_C_NOT_EQUAL);
5187    
5188          jump = CMP(SLJIT_C_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
5189    
5190          /* In case of ucp_Cf, we overwrite the result. */
5191          SET_CHAR_OFFSET(0x2066);
5192          OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
5193          OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
5194    
5195          OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
5196          OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
5197    
5198          JUMPHERE(jump);
5199          jump = CMP(SLJIT_C_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
5200          break;
5201    
5202          case PT_PXPUNCT:
5203          SET_TYPE_OFFSET(ucp_Sc);
5204          OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Sc);
5205          OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
5206    
5207          SET_CHAR_OFFSET(0);
5208          OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xff);
5209          OP_FLAGS(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
5210    
5211          SET_TYPE_OFFSET(ucp_Pc);
5212          OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ps - ucp_Pc);
5213          OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
5214          jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
5215          break;
5216        }        }
5217      cc += 2;      cc += 2;
5218      }      }
# Line 4230  struct sljit_label *label; Line 5244  struct sljit_label *label;
5244  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
5245  pcre_uchar propdata[5];  pcre_uchar propdata[5];
5246  #endif  #endif
5247  #endif  #endif /* SUPPORT_UTF */
5248    
5249  switch(type)  switch(type)
5250    {    {
# Line 4255  switch(type) Line 5269  switch(type)
5269    case OP_NOT_DIGIT:    case OP_NOT_DIGIT:
5270    case OP_DIGIT:    case OP_DIGIT:
5271    /* Digits are usually 0-9, so it is worth to optimize them. */    /* Digits are usually 0-9, so it is worth to optimize them. */
   if (common->digits[0] == -2)  
     get_ctype_ranges(common, ctype_digit, common->digits);  
5272    detect_partial_match(common, backtracks);    detect_partial_match(common, backtracks);
5273    /* Flip the starting bit in the negative case. */  #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5274    if (type == OP_NOT_DIGIT)    if (common->utf && is_char7_bitset((const pcre_uint8*)common->ctypes - cbit_length + cbit_digit, FALSE))
5275      common->digits[1] ^= 1;      read_char7_type(common, type == OP_NOT_DIGIT);
5276    if (!check_ranges(common, common->digits, backtracks, TRUE))    else
5277      {  #endif
5278      read_char8_type(common);      read_char8_type(common, type == OP_NOT_DIGIT);
5279      OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit);      /* Flip the starting bit in the negative case. */
5280      add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));    OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit);
5281      }    add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
   if (type == OP_NOT_DIGIT)  
     common->digits[1] ^= 1;  
5282    return cc;    return cc;
5283    
5284    case OP_NOT_WHITESPACE:    case OP_NOT_WHITESPACE:
5285    case OP_WHITESPACE:    case OP_WHITESPACE:
5286    detect_partial_match(common, backtracks);    detect_partial_match(common, backtracks);
5287    read_char8_type(common);  #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5288      if (common->utf && is_char7_bitset((const pcre_uint8*)common->ctypes - cbit_length + cbit_space, FALSE))
5289        read_char7_type(common, type == OP_NOT_WHITESPACE);
5290      else
5291    #endif
5292        read_char8_type(common, type == OP_NOT_WHITESPACE);
5293    OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space);    OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space);
5294    add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));    add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
5295    return cc;    return cc;
# Line 4282  switch(type) Line 5297  switch(type)
5297    case OP_NOT_WORDCHAR:    case OP_NOT_WORDCHAR:
5298    case OP_WORDCHAR:    case OP_WORDCHAR:
5299    detect_partial_match(common, backtracks);    detect_partial_match(common, backtracks);
5300    read_char8_type(common);  #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5301      if (common->utf && is_char7_bitset((const pcre_uint8*)common->ctypes - cbit_length + cbit_word, FALSE))
5302        read_char7_type(common, type == OP_NOT_WORDCHAR);
5303      else
5304    #endif
5305        read_char8_type(common, type == OP_NOT_WORDCHAR);
5306    OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word);    OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word);
5307    add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));    add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
5308    return cc;    return cc;
5309    
5310    case OP_ANY:    case OP_ANY:
5311    detect_partial_match(common, backtracks);    detect_partial_match(common, backtracks);
5312    read_char(common);    read_char_range(common, common->nlmin, common->nlmax, TRUE);
5313    if (common->nltype == NLTYPE_FIXED && common->newline > 255)    if (common->nltype == NLTYPE_FIXED && common->newline > 255)
5314      {      {
5315      jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);      jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
# Line 4345  switch(type) Line 5365  switch(type)
5365  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
5366    case OP_NOTPROP:    case OP_NOTPROP:
5367    case OP_PROP:    case OP_PROP:
5368    propdata[0] = 0;    propdata[0] = XCL_HASPROP;
5369    propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP;    propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP;
5370    propdata[2] = cc[0];    propdata[2] = cc[0];
5371    propdata[3] = cc[1];    propdata[3] = cc[1];
# Line 4357  switch(type) Line 5377  switch(type)
5377    
5378    case OP_ANYNL:    case OP_ANYNL:
5379    detect_partial_match(common, backtracks);    detect_partial_match(common, backtracks);
5380    read_char(common);    read_char_range(common, common->bsr_nlmin, common->bsr_nlmax, FALSE);
5381    jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);    jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
5382    /* We don't need to handle soft partial matching case. */    /* We don't need to handle soft partial matching case. */
5383    end_list = NULL;    end_list = NULL;
# Line 4379  switch(type) Line 5399  switch(type)
5399    case OP_NOT_HSPACE:    case OP_NOT_HSPACE:
5400    case OP_HSPACE:    case OP_HSPACE:
5401    detect_partial_match(common, backtracks);    detect_partial_match(common, backtracks);
5402    read_char(common);    read_char_range(common, 0x9, 0x3000, type == OP_NOT_HSPACE);
5403    add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));    add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));
5404    add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));    add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
5405    return cc;    return cc;
# Line 4387  switch(type) Line 5407  switch(type)
5407    case OP_NOT_VSPACE:    case OP_NOT_VSPACE:
5408    case OP_VSPACE:    case OP_VSPACE:
5409    detect_partial_match(common, backtracks);    detect_partial_match(common, backtracks);
5410    read_char(common);    read_char_range(common, 0xa, 0x2029, type == OP_NOT_VSPACE);
5411    add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));    add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));
5412    add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));    add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
5413    return cc;    return cc;
# Line 4486  switch(type) Line 5506  switch(type)
5506      else      else
5507        {        {
5508        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, STR_PTR, 0);        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, STR_PTR, 0);
5509        read_char(common);        read_char_range(common, common->nlmin, common->nlmax, TRUE);
5510        add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, STR_END, 0));        add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
5511        add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));        add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
5512        add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));        add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
# Line 4534  switch(type) Line 5554  switch(type)
5554    else    else
5555      {      {
5556      skip_char_back(common);      skip_char_back(common);
5557      read_char(common);      read_char_range(common, common->nlmin, common->nlmax, TRUE);
5558      check_newlinechar(common, common->nltype, backtracks, FALSE);      check_newlinechar(common, common->nltype, backtracks, FALSE);
5559      }      }
5560    JUMPHERE(jump[0]);    JUMPHERE(jump[0]);
# Line 4585  switch(type) Line 5605  switch(type)
5605      }      }
5606    else    else
5607      {      {
5608      peek_char(common);      peek_char(common, common->nlmax);
5609      check_newlinechar(common, common->nltype, backtracks, FALSE);      check_newlinechar(common, common->nltype, backtracks, FALSE);
5610      }      }
5611    JUMPHERE(jump[0]);    JUMPHERE(jump[0]);
# Line 4609  switch(type) Line 5629  switch(type)
5629  #endif  #endif
5630      return byte_sequence_compare(common, type == OP_CHARI, cc, &context, backtracks);      return byte_sequence_compare(common, type == OP_CHARI, cc, &context, backtracks);
5631      }      }
5632    
5633    detect_partial_match(common, backtracks);    detect_partial_match(common, backtracks);
   read_char(common);  
5634  #ifdef SUPPORT_UTF  #ifdef SUPPORT_UTF
5635    if (common->utf)    if (common->utf)
5636      {      {
# Line 4619  switch(type) Line 5639  switch(type)
5639    else    else
5640  #endif  #endif
5641      c = *cc;      c = *cc;
5642    
5643    if (type == OP_CHAR || !char_has_othercase(common, cc))    if (type == OP_CHAR || !char_has_othercase(common, cc))
5644      {      {
5645        read_char_range(common, c, c, FALSE);
5646      add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c));      add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c));
5647      return cc + length;      return cc + length;
5648      }      }
5649    oc = char_othercase(common, c);    oc = char_othercase(common, c);
5650      read_char_range(common, c < oc ? c : oc, c > oc ? c : oc, FALSE);
5651    bit = c ^ oc;    bit = c ^ oc;
5652    if (is_powerof2(bit))    if (is_powerof2(bit))
5653      {      {
# Line 4632  switch(type) Line 5655  switch(type)
5655      add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));      add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
5656      return cc + length;      return cc + length;
5657      }      }
5658    OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c);    jump[0] = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c);
5659    OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);    add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, oc));
5660    OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);    JUMPHERE(jump[0]);
   OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);  
   add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));  
5661    return cc + length;    return cc + length;
5662    
5663    case OP_NOT:    case OP_NOT:
# Line 4671  switch(type) Line 5692  switch(type)
5692  #endif /* COMPILE_PCRE8 */  #endif /* COMPILE_PCRE8 */
5693        {        {
5694        GETCHARLEN(c, cc, length);        GETCHARLEN(c, cc, length);
       read_char(common);  
5695        }        }
5696      }      }
5697    else    else
5698  #endif /* SUPPORT_UTF */  #endif /* SUPPORT_UTF */
     {  
     read_char(common);  
5699      c = *cc;      c = *cc;
     }  
5700    
5701    if (type == OP_NOT || !char_has_othercase(common, cc))    if (type == OP_NOT || !char_has_othercase(common, cc))
5702        {
5703        read_char_range(common, c, c, TRUE);
5704      add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));      add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
5705        }
5706    else    else
5707      {      {
5708      oc = char_othercase(common, c);      oc = char_othercase(common, c);
5709        read_char_range(common, c < oc ? c : oc, c > oc ? c : oc, TRUE);
5710      bit = c ^ oc;      bit = c ^ oc;
5711      if (is_powerof2(bit))      if (is_powerof2(bit))
5712        {        {
# Line 4703  switch(type) Line 5724  switch(type)
5724    case OP_CLASS:    case OP_CLASS:
5725    case OP_NCLASS:    case OP_NCLASS:
5726    detect_partial_match(common, backtracks);    detect_partial_match(common, backtracks);
5727    read_char(common);  
5728    if (check_class_ranges(common, (const pcre_uint8 *)cc, type == OP_NCLASS, backtracks))  #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5729      bit = (common->utf && is_char7_bitset((const pcre_uint8 *)cc, type == OP_NCLASS)) ? 127 : 255;
5730      read_char_range(common, 0, bit, type == OP_NCLASS);
5731    #else
5732      read_char_range(common, 0, 255, type == OP_NCLASS);
5733    #endif
5734    
5735      if (check_class_ranges(common, (const pcre_uint8 *)cc, type == OP_NCLASS, FALSE, backtracks))
5736      return cc + 32 / sizeof(pcre_uchar);      return cc + 32 / sizeof(pcre_uchar);
5737    
5738  #if defined SUPPORT_UTF || !defined COMPILE_PCRE8  #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5739    jump[0] = NULL;    jump[0] = NULL;
 #ifdef COMPILE_PCRE8  
   /* This check only affects 8 bit mode. In other modes, we  
   always need to compare the value with 255. */  
5740    if (common->utf)    if (common->utf)
 #endif /* COMPILE_PCRE8 */  
5741      {      {
5742      jump[0] = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);      jump[0] = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, bit);
5743      if (type == OP_CLASS)      if (type == OP_CLASS)
5744        {        {
5745        add_jump(compiler, backtracks, jump[0]);        add_jump(compiler, backtracks, jump[0]);
5746        jump[0] = NULL;        jump[0] = NULL;
5747        }        }
5748      }      }
5749  #endif /* SUPPORT_UTF || !COMPILE_PCRE8 */  #elif !defined COMPILE_PCRE8
5750      jump[0] = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
5751      if (type == OP_CLASS)
5752        {
5753        add_jump(compiler, backtracks, jump[0]);
5754        jump[0] = NULL;
5755        }
5756    #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
5757    
5758    OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);    OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
5759    OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);    OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
5760    OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);    OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
5761    OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);    OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
5762    OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);    OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
5763    add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));    add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
5764    
5765  #if defined SUPPORT_UTF || !defined COMPILE_PCRE8  #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
5766    if (jump[0] != NULL)    if (jump[0] != NULL)
5767      JUMPHERE(jump[0]);      JUMPHERE(jump[0]);
5768  #endif /* SUPPORT_UTF || !COMPILE_PCRE8 */  #endif
5769    
5770    return cc + 32 / sizeof(pcre_uchar);    return cc + 32 / sizeof(pcre_uchar);
5771    
5772  #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32  #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
# Line 4837  if (context.length > 0) Line 5871  if (context.length > 0)
5871  return compile_char1_matchingpath(common, *cc, cc + 1, backtracks);  return compile_char1_matchingpath(common, *cc, cc + 1, backtracks);
5872  }  }
5873    
 static struct sljit_jump *compile_ref_checks(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)  
 {  
 DEFINE_COMPILER;  
 int offset = GET2(cc, 1) << 1;  
   
 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));  
 if (!common->jscript_compat)  
   {  
   if (backtracks == NULL)  
     {  
     /* OVECTOR(1) contains the "string begin - 1" constant. */  
     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));  
     OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);  
     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));  
     OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);  
     return JUMP(SLJIT_C_NOT_ZERO);  
     }  
   add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));  
   }  
 return CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));  
 }  
   
5874  /* Forward definitions. */  /* Forward definitions. */
5875  static void compile_matchingpath(compiler_common *, pcre_uchar *, pcre_uchar *, backtrack_common *);  static void compile_matchingpath(compiler_common *, pcre_uchar *, pcre_uchar *, backtrack_common *);
5876  static void compile_backtrackingpath(compiler_common *, struct backtrack_common *);  static void compile_backtrackingpath(compiler_common *, struct backtrack_common *);
# Line 4891  static void compile_backtrackingpath(com Line 5903  static void compile_backtrackingpath(com
5903    
5904  #define BACKTRACK_AS(type) ((type *)backtrack)  #define BACKTRACK_AS(type) ((type *)backtrack)
5905    
5906  static pcre_uchar *compile_ref_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks, BOOL withchecks, BOOL emptyfail)  static void compile_dnref_search(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
5907    {
5908    /* The OVECTOR offset goes to TMP2. */
5909    DEFINE_COMPILER;
5910    int count = GET2(cc, 1 + IMM2_SIZE);
5911    pcre_uchar *slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
5912    unsigned int offset;
5913    jump_list *found = NULL;
5914    
5915    SLJIT_ASSERT(*cc == OP_DNREF || *cc == OP_DNREFI);
5916    
5917    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
5918    
5919    count--;
5920    while (count-- > 0)
5921      {
5922      offset = GET2(slot, 0) << 1;
5923      GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
5924      add_jump(compiler, &found, CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0));
5925      slot += common->name_entry_size;
5926      }
5927    
5928    offset = GET2(slot, 0) << 1;
5929    GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
5930    if (backtracks != NULL && !common->jscript_compat)
5931      add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0));
5932    
5933    set_jumps(found, LABEL());
5934    }
5935    
5936    static void compile_ref_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks, BOOL withchecks, BOOL emptyfail)
5937  {  {
5938  DEFINE_COMPILER;  DEFINE_COMPILER;
5939  int offset = GET2(cc, 1) << 1;  BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
5940    int offset = 0;
5941  struct sljit_jump *jump = NULL;  struct sljit_jump *jump = NULL;
5942  struct sljit_jump *partial;  struct sljit_jump *partial;
5943  struct sljit_jump *nopartial;  struct sljit_jump *nopartial;
5944    
5945  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));  if (ref)
5946  /* OVECTOR(1) contains the "string begin - 1" constant. */    {
5947  if (withchecks && !common->jscript_compat)    offset = GET2(cc, 1) << 1;
5948    add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
5949      /* OVECTOR(1) contains the "string begin - 1" constant. */
5950      if (withchecks && !common->jscript_compat)
5951        add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
5952      }
5953    else
5954      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
5955    
5956  #if defined SUPPORT_UTF && defined SUPPORT_UCP  #if defined SUPPORT_UTF && defined SUPPORT_UCP
5957  if (common->utf && *cc == OP_REFI)  if (common->utf && *cc == OP_REFI)
5958    {    {
5959    SLJIT_ASSERT(TMP1 == SLJIT_SCRATCH_REG1 && STACK_TOP == SLJIT_SCRATCH_REG2 && TMP2 == SLJIT_SCRATCH_REG3);    SLJIT_ASSERT(TMP1 == SLJIT_SCRATCH_REG1 && STACK_TOP == SLJIT_SCRATCH_REG2 && TMP2 == SLJIT_SCRATCH_REG3);
5960    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));    if (ref)
5961        OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
5962      else
5963        OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
5964    
5965    if (withchecks)    if (withchecks)
5966      jump = CMP(SLJIT_C_EQUAL, TMP1, 0, TMP2, 0);      jump = CMP(SLJIT_C_EQUAL, TMP1, 0, TMP2, 0);
5967    
# Line 4933  if (common->utf && *cc == OP_REFI) Line 5986  if (common->utf && *cc == OP_REFI)
5986  else  else
5987  #endif /* SUPPORT_UTF && SUPPORT_UCP */  #endif /* SUPPORT_UTF && SUPPORT_UCP */
5988    {    {
5989    OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP1, 0);    if (ref)
5990        OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP1, 0);
5991      else
5992        OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
5993    
5994    if (withchecks)    if (withchecks)
5995      jump = JUMP(SLJIT_C_ZERO);      jump = JUMP(SLJIT_C_ZERO);
5996    
# Line 4970  if (jump != NULL) Line 6027  if (jump != NULL)
6027    else    else
6028      JUMPHERE(jump);      JUMPHERE(jump);
6029    }    }
 return cc + 1 + IMM2_SIZE;  
6030  }  }
6031    
6032  static SLJIT_INLINE pcre_uchar *compile_ref_iterator_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)  static SLJIT_INLINE pcre_uchar *compile_ref_iterator_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6033  {  {
6034  DEFINE_COMPILER;  DEFINE_COMPILER;
6035    BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
6036  backtrack_common *backtrack;  backtrack_common *backtrack;
6037  pcre_uchar type;  pcre_uchar type;
6038    int offset = 0;
6039  struct sljit_label *label;  struct sljit_label *label;
6040  struct sljit_jump *zerolength;  struct sljit_jump *zerolength;
6041  struct sljit_jump *jump = NULL;  struct sljit_jump *jump = NULL;
# Line 4987  BOOL minimize; Line 6045  BOOL minimize;
6045    
6046  PUSH_BACKTRACK(sizeof(iterator_backtrack), cc, NULL);  PUSH_BACKTRACK(sizeof(iterator_backtrack), cc, NULL);
6047    
6048    if (ref)
6049      offset = GET2(cc, 1) << 1;
6050    else
6051      cc += IMM2_SIZE;
6052  type = cc[1 + IMM2_SIZE];  type = cc[1 + IMM2_SIZE];
6053    
6054    SLJIT_COMPILE_ASSERT((OP_CRSTAR & 0x1) == 0, crstar_opcode_must_be_even);
6055  minimize = (type & 0x1) != 0;  minimize = (type & 0x1) != 0;
6056  switch(type)  switch(type)
6057    {    {
# Line 5025  if (!minimize) Line 6089  if (!minimize)
6089    if (min == 0)    if (min == 0)
6090      {      {
6091      allocate_stack(common, 2);      allocate_stack(common, 2);
6092        if (ref)
6093          OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
6094      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6095      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
6096      /* Temporary release of STR_PTR. */      /* Temporary release of STR_PTR. */
6097      OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));      OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
6098      zerolength = compile_ref_checks(common, ccbegin, NULL);      /* Handles both invalid and empty cases. Since the minimum repeat,
6099        is zero the invalid case is basically the same as an empty case. */
6100        if (ref)
6101          zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
6102        else
6103          {
6104          compile_dnref_search(common, ccbegin, NULL);
6105          OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
6106          OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, TMP2, 0);
6107          zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
6108          }
6109      /* Restore if not zero length. */      /* Restore if not zero length. */
6110      OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));      OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
6111      }      }
6112    else    else
6113      {      {
6114      allocate_stack(common, 1);      allocate_stack(common, 1);
6115        if (ref)
6116          OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
6117      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6118      zerolength = compile_ref_checks(common, ccbegin, &backtrack->topbacktracks);      if (ref)
6119          {
6120          add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
6121          zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
6122          }
6123        else
6124          {
6125          compile_dnref_search(common, ccbegin, &backtrack->topbacktracks);
6126          OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
6127          OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, TMP2, 0);
6128          zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
6129          }
6130      }      }
6131    
6132    if (min > 1 || max > 1)    if (min > 1 || max > 1)
6133      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 0);      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 0);
6134    
6135    label = LABEL();    label = LABEL();
6136      if (!ref)
6137        OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);
6138    compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, FALSE, FALSE);    compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, FALSE, FALSE);
6139    
6140    if (min > 1 || max > 1)    if (min > 1 || max > 1)
# Line 5074  if (!minimize) Line 6165  if (!minimize)
6165    JUMPHERE(zerolength);    JUMPHERE(zerolength);
6166    BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();    BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
6167    
6168    decrease_call_count(common);    count_match(common);
6169    return cc;    return cc;
6170    }    }
6171    
6172  allocate_stack(common, 2);  allocate_stack(common, ref ? 2 : 3);
6173    if (ref)
6174      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
6175  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6176  if (type != OP_CRMINSTAR)  if (type != OP_CRMINSTAR)
6177    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
6178    
6179  if (min == 0)  if (min == 0)
6180    {    {
6181    zerolength = compile_ref_checks(common, ccbegin, NULL);    /* Handles both invalid and empty cases. Since the minimum repeat,
6182      is zero the invalid case is basically the same as an empty case. */
6183      if (ref)
6184        zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
6185      else
6186        {
6187        compile_dnref_search(common, ccbegin, NULL);
6188        OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
6189        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
6190        zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
6191        }
6192      /* Length is non-zero, we can match real repeats. */
6193    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6194    jump = JUMP(SLJIT_JUMP);    jump = JUMP(SLJIT_JUMP);
6195    }    }
6196  else  else
6197    zerolength = compile_ref_checks(common, ccbegin, &backtrack->topbacktracks);    {
6198      if (ref)
6199        {
6200        add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
6201        zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
6202        }
6203      else
6204        {
6205        compile_dnref_search(common, ccbegin, &backtrack->topbacktracks);
6206        OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
6207        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
6208        zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
6209        }
6210      }
6211    
6212  BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();  BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
6213  if (max > 0)  if (max > 0)
6214    add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max));    add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max));
6215    
6216    if (!ref)
6217      OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
6218  compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, TRUE, TRUE);  compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, TRUE, TRUE);
6219  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6220    
# Line 5113  if (jump != NULL) Line 6232  if (jump != NULL)
6232    JUMPHERE(jump);    JUMPHERE(jump);
6233  JUMPHERE(zerolength);  JUMPHERE(zerolength);
6234    
6235  decrease_call_count(common);  count_match(common);
6236  return cc;  return cc;
6237  }  }
6238    
# Line 5123  DEFINE_COMPILER; Line 6242  DEFINE_COMPILER;
6242  backtrack_common *backtrack;  backtrack_common *backtrack;
6243  recurse_entry *entry = common->entries;  recurse_entry *entry = common->entries;
6244  recurse_entry *prev = NULL;  recurse_entry *prev = NULL;
6245  int start = GET(cc, 1);  sljit_sw start = GET(cc, 1);
6246  pcre_uchar *start_cc;  pcre_uchar *start_cc;
6247    BOOL needs_control_head;
6248    
6249  PUSH_BACKTRACK(sizeof(recurse_backtrack), cc, NULL);  PUSH_BACKTRACK(sizeof(recurse_backtrack), cc, NULL);
6250    
6251  /* Inlining simple patterns. */  /* Inlining simple patterns. */
6252  if (get_framesize(common, common->start + start, TRUE) == no_stack)  if (get_framesize(common, common->start + start, NULL, TRUE, &needs_control_head) == no_stack)
6253    {    {
6254    start_cc = common->start + start;    start_cc = common->start + start;
6255    compile_matchingpath(common, next_opcode(common, start_cc), bracketend(start_cc) - (1 + LINK_SIZE), backtrack);    compile_matchingpath(common, next_opcode(common, start_cc), bracketend(start_cc) - (1 + LINK_SIZE), backtrack);
# Line 5248  allocate_stack(common, CALLOUT_ARG_SIZE Line 6368  allocate_stack(common, CALLOUT_ARG_SIZE
6368  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
6369  OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);  OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6370  SLJIT_ASSERT(common->capture_last_ptr != 0);  SLJIT_ASSERT(common->capture_last_ptr != 0);
6371  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_number), SLJIT_IMM, cc[1]);  OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_number), SLJIT_IMM, cc[1]);
6372  OP1(SLJIT_MOV_UI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_last), TMP2, 0);  OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_last), TMP2, 0);
6373    
6374  /* These pointer sized fields temporarly stores internal variables. */  /* These pointer sized fields temporarly stores internal variables. */
6375  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
# Line 5258  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CA Line 6378  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CA
6378    
6379  if (common->mark_ptr != 0)  if (common->mark_ptr != 0)
6380    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr));    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr));
6381  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(pattern_position), SLJIT_IMM, GET(cc, 2));  OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(pattern_position), SLJIT_IMM, GET(cc, 2));
6382  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(next_item_length), SLJIT_IMM, GET(cc, 2 + LINK_SIZE));  OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(next_item_length), SLJIT_IMM, GET(cc, 2 + LINK_SIZE));
6383  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(mark), (common->mark_ptr != 0) ? TMP2 : SLJIT_IMM, 0);  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(mark), (common->mark_ptr != 0) ? TMP2 : SLJIT_IMM, 0);
6384    
6385  /* Needed to save important temporary registers. */  /* Needed to save important temporary registers. */
# Line 5288  static pcre_uchar *compile_assert_matchi Line 6408  static pcre_uchar *compile_assert_matchi
6408  {  {
6409  DEFINE_COMPILER;  DEFINE_COMPILER;
6410  int framesize;  int framesize;
6411    int extrasize;
6412    BOOL needs_control_head;
6413  int private_data_ptr;  int private_data_ptr;
6414  backtrack_common altbacktrack;  backtrack_common altbacktrack;
6415  pcre_uchar *ccbegin;  pcre_uchar *ccbegin;
# Line 5297  jump_list *tmp = NULL; Line 6419  jump_list *tmp = NULL;
6419  jump_list **target = (conditional) ? &backtrack->condfailed : &backtrack->common.topbacktracks;  jump_list **target = (conditional) ? &backtrack->condfailed : &backtrack->common.topbacktracks;
6420  jump_list **found;  jump_list **found;