/[pcre]/code/trunk/pcre_jit_compile.c
ViewVC logotype

Diff of /code/trunk/pcre_jit_compile.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 958 by zherczeg, Wed Apr 11 10:19:10 2012 UTC revision 1632 by zherczeg, Fri Feb 12 14:43:22 2016 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2012 University of Cambridge             Copyright (c) 1997-2013 University of Cambridge
10    
11    The machine code generator part (this module) was written by Zoltan Herczeg    The machine code generator part (this module) was written by Zoltan Herczeg
12                        Copyright (c) 2010-2012                        Copyright (c) 2010-2013
13    
14  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
15  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 46  POSSIBILITY OF SUCH DAMAGE. Line 46  POSSIBILITY OF SUCH DAMAGE.
46    
47  #include "pcre_internal.h"  #include "pcre_internal.h"
48    
49  #ifdef SUPPORT_JIT  #if defined SUPPORT_JIT
50    
51  /* All-in-one: Since we use the JIT compiler only from here,  /* All-in-one: Since we use the JIT compiler only from here,
52  we just include it. This way we don't need to touch the build  we just include it. This way we don't need to touch the build
53  system files. */  system files. */
54    
55  #define SLJIT_MALLOC(size) (PUBL(malloc))(size)  #define SLJIT_MALLOC(size, allocator_data) (PUBL(malloc))(size)
56  #define SLJIT_FREE(ptr) (PUBL(free))(ptr)  #define SLJIT_FREE(ptr, allocator_data) (PUBL(free))(ptr)
57  #define SLJIT_CONFIG_AUTO 1  #define SLJIT_CONFIG_AUTO 1
58  #define SLJIT_CONFIG_STATIC 1  #define SLJIT_CONFIG_STATIC 1
59  #define SLJIT_VERBOSE 0  #define SLJIT_VERBOSE 0
# Line 65  system files. */ Line 65  system files. */
65  #error Unsupported architecture  #error Unsupported architecture
66  #endif  #endif
67    
68  /* Allocate memory on the stack. Fast, but limited size. */  /* Defines for debugging purposes. */
 #define LOCAL_SPACE_SIZE 32768  
69    
70    /* 1 - Use unoptimized capturing brackets.
71       2 - Enable capture_last_ptr (includes option 1). */
72    /* #define DEBUG_FORCE_UNOPTIMIZED_CBRAS 2 */
73    
74    /* 1 - Always have a control head. */
75    /* #define DEBUG_FORCE_CONTROL_HEAD 1 */
76    
77    /* Allocate memory for the regex stack on the real machine stack.
78    Fast, but limited size. */
79    #define MACHINE_STACK_SIZE 32768
80    
81    /* Growth rate for stack allocated by the OS. Should be the multiply
82    of page size. */
83  #define STACK_GROWTH_RATE 8192  #define STACK_GROWTH_RATE 8192
84    
85  /* Enable to check that the allocation could destroy temporaries. */  /* Enable to check that the allocation could destroy temporaries. */
# Line 82  The code generator follows the recursive Line 94  The code generator follows the recursive
94  expressions. The basic blocks of regular expressions are condition checkers  expressions. The basic blocks of regular expressions are condition checkers
95  whose execute different commands depending on the result of the condition check.  whose execute different commands depending on the result of the condition check.
96  The relationship between the operators can be horizontal (concatenation) and  The relationship between the operators can be horizontal (concatenation) and
97  vertical (sub-expression) (See struct fallback_common for more details).  vertical (sub-expression) (See struct backtrack_common for more details).
98    
99    'ab' - 'a' and 'b' regexps are concatenated    'ab' - 'a' and 'b' regexps are concatenated
100    'a+' - 'a' is the sub-expression of the '+' operator    'a+' - 'a' is the sub-expression of the '+' operator
101    
102  The condition checkers are boolean (true/false) checkers. Machine code is generated  The condition checkers are boolean (true/false) checkers. Machine code is generated
103  for the checker itself and for the actions depending on the result of the checker.  for the checker itself and for the actions depending on the result of the checker.
104  The 'true' case is called as the hot path (expected path), and the other is called as  The 'true' case is called as the matching path (expected path), and the other is called as
105  the 'fallback' path. Branch instructions are expesive for all CPUs, so we avoid taken  the 'backtrack' path. Branch instructions are expesive for all CPUs, so we avoid taken
106  branches on the hot path.  branches on the matching path.
107    
108   Greedy star operator (*) :   Greedy star operator (*) :
109     Hot path: match happens.     Matching path: match happens.
110     Fallback path: match failed.     Backtrack path: match failed.
111   Non-greedy star operator (*?) :   Non-greedy star operator (*?) :
112     Hot path: no need to perform a match.     Matching path: no need to perform a match.
113     Fallback path: match is required.     Backtrack path: match is required.
114    
115  The following example shows how the code generated for a capturing bracket  The following example shows how the code generated for a capturing bracket
116  with two alternatives. Let A, B, C, D are arbirary regular expressions, and  with two alternatives. Let A, B, C, D are arbirary regular expressions, and
# Line 108  we have the following regular expression Line 120  we have the following regular expression
120    
121  The generated code will be the following:  The generated code will be the following:
122    
123   A hot path   A matching path
124   '(' hot path (pushing arguments to the stack)   '(' matching path (pushing arguments to the stack)
125   B hot path   B matching path
126   ')' hot path (pushing arguments to the stack)   ')' matching path (pushing arguments to the stack)
127   D hot path   D matching path
128   return with successful match   return with successful match
129    
130   D fallback path   D backtrack path
131   ')' fallback path (If we arrived from "C" jump to the fallback of "C")   ')' backtrack path (If we arrived from "C" jump to the backtrack of "C")
132   B fallback path   B backtrack path
133   C expected path   C expected path
134   jump to D hot path   jump to D matching path
135   C fallback path   C backtrack path
136   A fallback path   A backtrack path
137    
138   Notice, that the order of fallback code paths are the opposite of the fast   Notice, that the order of backtrack code paths are the opposite of the fast
139   code paths. In this way the topmost value on the stack is always belong   code paths. In this way the topmost value on the stack is always belong
140   to the current fallback code path. The fallback code path must check   to the current backtrack code path. The backtrack path must check
141   whether there is a next alternative. If so, it needs to jump back to   whether there is a next alternative. If so, it needs to jump back to
142   the hot path eventually. Otherwise it needs to clear out its own stack   the matching path eventually. Otherwise it needs to clear out its own stack
143   frame and continue the execution on the fallback code paths.   frame and continue the execution on the backtrack code paths.
144  */  */
145    
146  /*  /*
147  Saved stack frames:  Saved stack frames:
148    
149  Atomic blocks and asserts require reloading the values of local variables  Atomic blocks and asserts require reloading the values of private data
150  when the fallback mechanism performed. Because of OP_RECURSE, the locals  when the backtrack mechanism performed. Because of OP_RECURSE, the data
151  are not necessarly known in compile time, thus we need a dynamic restore  are not necessarly known in compile time, thus we need a dynamic restore
152  mechanism.  mechanism.
153    
154  The stack frames are stored in a chain list, and have the following format:  The stack frames are stored in a chain list, and have the following format:
155  ([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]  ([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]
156    
157  Thus we can restore the locals to a particular point in the stack.  Thus we can restore the private data to a particular point in the stack.
158  */  */
159    
160  typedef struct jit_arguments {  typedef struct jit_arguments {
# Line 154  typedef struct jit_arguments { Line 166  typedef struct jit_arguments {
166    int *offsets;    int *offsets;
167    pcre_uchar *uchar_ptr;    pcre_uchar *uchar_ptr;
168    pcre_uchar *mark_ptr;    pcre_uchar *mark_ptr;
169      void *callout_data;
170    /* Everything else after. */    /* Everything else after. */
171    int offsetcount;    pcre_uint32 limit_match;
172    int calllimit;    int real_offset_count;
173      int offset_count;
174    pcre_uint8 notbol;    pcre_uint8 notbol;
175    pcre_uint8 noteol;    pcre_uint8 noteol;
176    pcre_uint8 notempty;    pcre_uint8 notempty;
# Line 165  typedef struct jit_arguments { Line 179  typedef struct jit_arguments {
179    
180  typedef struct executable_functions {  typedef struct executable_functions {
181    void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES];    void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES];
182      void *read_only_data_heads[JIT_NUMBER_OF_COMPILE_MODES];
183      sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];
184    PUBL(jit_callback) callback;    PUBL(jit_callback) callback;
185    void *userdata;    void *userdata;
186    sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];    pcre_uint32 top_bracket;
187      pcre_uint32 limit_match;
188  } executable_functions;  } executable_functions;
189    
190  typedef struct jump_list {  typedef struct jump_list {
# Line 175  typedef struct jump_list { Line 192  typedef struct jump_list {
192    struct jump_list *next;    struct jump_list *next;
193  } jump_list;  } jump_list;
194    
 enum stub_types { stack_alloc };  
   
195  typedef struct stub_list {  typedef struct stub_list {
   enum stub_types type;  
   int data;  
196    struct sljit_jump *start;    struct sljit_jump *start;
197    struct sljit_label *leave;    struct sljit_label *quit;
198    struct stub_list *next;    struct stub_list *next;
199  } stub_list;  } stub_list;
200    
201    typedef struct label_addr_list {
202      struct sljit_label *label;
203      sljit_uw *update_addr;
204      struct label_addr_list *next;
205    } label_addr_list;
206    
207    enum frame_types {
208      no_frame = -1,
209      no_stack = -2
210    };
211    
212    enum control_types {
213      type_mark = 0,
214      type_then_trap = 1
215    };
216    
217  typedef int (SLJIT_CALL *jit_function)(jit_arguments *args);  typedef int (SLJIT_CALL *jit_function)(jit_arguments *args);
218    
219  /* The following structure is the key data type for the recursive  /* The following structure is the key data type for the recursive
220  code generator. It is allocated by compile_hotpath, and contains  code generator. It is allocated by compile_matchingpath, and contains
221  the aguments for compile_fallbackpath. Must be the first member  the arguments for compile_backtrackingpath. Must be the first member
222  of its descendants. */  of its descendants. */
223  typedef struct fallback_common {  typedef struct backtrack_common {
224    /* Concatenation stack. */    /* Concatenation stack. */
225    struct fallback_common *prev;    struct backtrack_common *prev;
226    jump_list *nextfallbacks;    jump_list *nextbacktracks;
227    /* Internal stack (for component operators). */    /* Internal stack (for component operators). */
228    struct fallback_common *top;    struct backtrack_common *top;
229    jump_list *topfallbacks;    jump_list *topbacktracks;
230    /* Opcode pointer. */    /* Opcode pointer. */
231    pcre_uchar *cc;    pcre_uchar *cc;
232  } fallback_common;  } backtrack_common;
233    
234  typedef struct assert_fallback {  typedef struct assert_backtrack {
235    fallback_common common;    backtrack_common common;
236    jump_list *condfailed;    jump_list *condfailed;
237    /* Less than 0 (-1) if a frame is not needed. */    /* Less than 0 if a frame is not needed. */
238    int framesize;    int framesize;
239    /* Points to our private memory word on the stack. */    /* Points to our private memory word on the stack. */
240    int localptr;    int private_data_ptr;
241    /* For iterators. */    /* For iterators. */
242    struct sljit_label *hotpath;    struct sljit_label *matchingpath;
243  } assert_fallback;  } assert_backtrack;
244    
245  typedef struct bracket_fallback {  typedef struct bracket_backtrack {
246    fallback_common common;    backtrack_common common;
247    /* Where to coninue if an alternative is successfully matched. */    /* Where to coninue if an alternative is successfully matched. */
248    struct sljit_label *althotpath;    struct sljit_label *alternative_matchingpath;
249    /* For rmin and rmax iterators. */    /* For rmin and rmax iterators. */
250    struct sljit_label *recursivehotpath;    struct sljit_label *recursive_matchingpath;
251    /* For greedy ? operator. */    /* For greedy ? operator. */
252    struct sljit_label *zerohotpath;    struct sljit_label *zero_matchingpath;
253    /* Contains the branches of a failed condition. */    /* Contains the branches of a failed condition. */
254    union {    union {
255      /* Both for OP_COND, OP_SCOND. */      /* Both for OP_COND, OP_SCOND. */
256      jump_list *condfailed;      jump_list *condfailed;
257      assert_fallback *assert;      assert_backtrack *assert;
258      /* For OP_ONCE. -1 if not needed. */      /* For OP_ONCE. Less than 0 if not needed. */
259      int framesize;      int framesize;
260    } u;    } u;
261    /* Points to our private memory word on the stack. */    /* Points to our private memory word on the stack. */
262    int localptr;    int private_data_ptr;
263  } bracket_fallback;  } bracket_backtrack;
264    
265  typedef struct bracketpos_fallback {  typedef struct bracketpos_backtrack {
266    fallback_common common;    backtrack_common common;
267    /* Points to our private memory word on the stack. */    /* Points to our private memory word on the stack. */
268    int localptr;    int private_data_ptr;
269    /* Reverting stack is needed. */    /* Reverting stack is needed. */
270    int framesize;    int framesize;
271    /* Allocated stack size. */    /* Allocated stack size. */
272    int stacksize;    int stacksize;
273  } bracketpos_fallback;  } bracketpos_backtrack;
274    
275  typedef struct braminzero_fallback {  typedef struct braminzero_backtrack {
276    fallback_common common;    backtrack_common common;
277    struct sljit_label *hotpath;    struct sljit_label *matchingpath;
278  } braminzero_fallback;  } braminzero_backtrack;
279    
280    typedef struct char_iterator_backtrack {
281      backtrack_common common;
282      /* Next iteration. */
283      struct sljit_label *matchingpath;
284      union {
285        jump_list *backtracks;
286        struct {
287          unsigned int othercasebit;
288          pcre_uchar chr;
289          BOOL enabled;
290        } charpos;
291      } u;
292    } char_iterator_backtrack;
293    
294  typedef struct iterator_fallback {  typedef struct ref_iterator_backtrack {
295    fallback_common common;    backtrack_common common;
296    /* Next iteration. */    /* Next iteration. */
297    struct sljit_label *hotpath;    struct sljit_label *matchingpath;
298  } iterator_fallback;  } ref_iterator_backtrack;
299    
300  typedef struct recurse_entry {  typedef struct recurse_entry {
301    struct recurse_entry *next;    struct recurse_entry *next;
# Line 261  typedef struct recurse_entry { Line 304  typedef struct recurse_entry {
304    /* Collects the calls until the function is not created. */    /* Collects the calls until the function is not created. */
305    jump_list *calls;    jump_list *calls;
306    /* Points to the starting opcode. */    /* Points to the starting opcode. */
307    int start;    sljit_sw start;
308  } recurse_entry;  } recurse_entry;
309    
310  typedef struct recurse_fallback {  typedef struct recurse_backtrack {
311    fallback_common common;    backtrack_common common;
312  } recurse_fallback;    BOOL inlined_pattern;
313    } recurse_backtrack;
314    
315    #define OP_THEN_TRAP OP_TABLE_LENGTH
316    
317    typedef struct then_trap_backtrack {
318      backtrack_common common;
319      /* If then_trap is not NULL, this structure contains the real
320      then_trap for the backtracking path. */
321      struct then_trap_backtrack *then_trap;
322      /* Points to the starting opcode. */
323      sljit_sw start;
324      /* Exit point for the then opcodes of this alternative. */
325      jump_list *quit;
326      /* Frame size of the current alternative. */
327      int framesize;
328    } then_trap_backtrack;
329    
330    #define MAX_RANGE_SIZE 4
331    
332  typedef struct compiler_common {  typedef struct compiler_common {
333      /* The sljit ceneric compiler. */
334    struct sljit_compiler *compiler;    struct sljit_compiler *compiler;
335      /* First byte code. */
336    pcre_uchar *start;    pcre_uchar *start;
337      /* Maps private data offset to each opcode. */
338    /* Opcode local area direct map. */    sljit_si *private_data_ptrs;
339    int *localptrs;    /* Chain list of read-only data ptrs. */
340    int cbraptr;    void *read_only_data_head;
341    /* OVector starting point. Must be divisible by 2. */    /* Tells whether the capturing bracket is optimized. */
342    int ovector_start;    sljit_ub *optimized_cbracket;
343      /* Tells whether the starting offset is a target of then. */
344      sljit_ub *then_offsets;
345      /* Current position where a THEN must jump. */
346      then_trap_backtrack *then_trap;
347      /* Starting offset of private data for capturing brackets. */
348      sljit_si cbra_ptr;
349      /* Output vector starting point. Must be divisible by 2. */
350      sljit_si ovector_start;
351      /* Points to the starting character of the current match. */
352      sljit_si start_ptr;
353    /* Last known position of the requested byte. */    /* Last known position of the requested byte. */
354    int req_char_ptr;    sljit_si req_char_ptr;
355    /* Head of the last recursion. */    /* Head of the last recursion. */
356    int recursive_head;    sljit_si recursive_head_ptr;
357    /* First inspected character for partial matching. */    /* First inspected character for partial matching.
358    int start_used_ptr;       (Needed for avoiding zero length partial matches.) */
359      sljit_si start_used_ptr;
360    /* Starting pointer for partial soft matches. */    /* Starting pointer for partial soft matches. */
361    int hit_start;    sljit_si hit_start;
362    /* End pointer of the first line. */    /* End pointer of the first line. */
363    int first_line_end;    sljit_si first_line_end;
364    /* Points to the marked string. */    /* Points to the marked string. */
365    int mark_ptr;    sljit_si mark_ptr;
366      /* Recursive control verb management chain. */
367    /* Other  */    sljit_si control_head_ptr;
368    const pcre_uint8 *fcc;    /* Points to the last matched capture block index. */
369    sljit_w lcc;    sljit_si capture_last_ptr;
370      /* Fast forward skipping byte code pointer. */
371      pcre_uchar *fast_forward_bc_ptr;
372      /* Locals used by fast fail optimization. */
373      sljit_si fast_fail_start_ptr;
374      sljit_si fast_fail_end_ptr;
375    
376      /* Flipped and lower case tables. */
377      const sljit_ub *fcc;
378      sljit_sw lcc;
379      /* Mode can be PCRE_STUDY_JIT_COMPILE and others. */
380    int mode;    int mode;
381      /* TRUE, when minlength is greater than 0. */
382      BOOL might_be_empty;
383      /* \K is found in the pattern. */
384      BOOL has_set_som;
385      /* (*SKIP:arg) is found in the pattern. */
386      BOOL has_skip_arg;
387      /* (*THEN) is found in the pattern. */
388      BOOL has_then;
389      /* (*SKIP) or (*SKIP:arg) is found in lookbehind assertion. */
390      BOOL has_skip_in_assert_back;
391      /* Currently in recurse or negative assert. */
392      BOOL local_exit;
393      /* Currently in a positive assert. */
394      BOOL positive_assert;
395      /* Newline control. */
396    int nltype;    int nltype;
397      sljit_ui nlmax;
398      sljit_ui nlmin;
399    int newline;    int newline;
400    int bsr_nltype;    int bsr_nltype;
401      sljit_ui bsr_nlmax;
402      sljit_ui bsr_nlmin;
403      /* Dollar endonly. */
404    int endonly;    int endonly;
405    BOOL has_set_som;    /* Tables. */
406    sljit_w ctypes;    sljit_sw ctypes;
407    sljit_uw name_table;    /* Named capturing brackets. */
408    sljit_w name_count;    pcre_uchar *name_table;
409    sljit_w name_entry_size;    sljit_sw name_count;
410      sljit_sw name_entry_size;
411    
412    /* Labels and jump lists. */    /* Labels and jump lists. */
413    struct sljit_label *partialmatchlabel;    struct sljit_label *partialmatchlabel;
414    struct sljit_label *leavelabel;    struct sljit_label *quit_label;
415    struct sljit_label *acceptlabel;    struct sljit_label *forced_quit_label;
416      struct sljit_label *accept_label;
417      struct sljit_label *ff_newline_shortcut;
418    stub_list *stubs;    stub_list *stubs;
419      label_addr_list *label_addrs;
420    recurse_entry *entries;    recurse_entry *entries;
421    recurse_entry *currententry;    recurse_entry *currententry;
422    jump_list *partialmatch;    jump_list *partialmatch;
423    jump_list *leave;    jump_list *quit;
424      jump_list *positive_assert_quit;
425      jump_list *forced_quit;
426    jump_list *accept;    jump_list *accept;
427    jump_list *calllimit;    jump_list *calllimit;
428    jump_list *stackalloc;    jump_list *stackalloc;
# Line 323  typedef struct compiler_common { Line 433  typedef struct compiler_common {
433    jump_list *vspace;    jump_list *vspace;
434    jump_list *casefulcmp;    jump_list *casefulcmp;
435    jump_list *caselesscmp;    jump_list *caselesscmp;
436      jump_list *reset_match;
437    BOOL jscript_compat;    BOOL jscript_compat;
438  #ifdef SUPPORT_UTF  #ifdef SUPPORT_UTF
439    BOOL utf;    BOOL utf;
440  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
441    BOOL use_ucp;    BOOL use_ucp;
442      jump_list *getucd;
443  #endif  #endif
   jump_list *utfreadchar;  
444  #ifdef COMPILE_PCRE8  #ifdef COMPILE_PCRE8
445      jump_list *utfreadchar;
446      jump_list *utfreadchar16;
447    jump_list *utfreadtype8;    jump_list *utfreadtype8;
448  #endif  #endif
449  #endif /* SUPPORT_UTF */  #endif /* SUPPORT_UTF */
 #ifdef SUPPORT_UCP  
   jump_list *getucd;  
 #endif  
450  } compiler_common;  } compiler_common;
451    
452  /* For byte_sequence_compare. */  /* For byte_sequence_compare. */
# Line 347  typedef struct compare_context { Line 457  typedef struct compare_context {
457  #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED  #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
458    int ucharptr;    int ucharptr;
459    union {    union {
460      sljit_i asint;      sljit_si asint;
461      sljit_uh asushort;      sljit_uh asushort;
462  #ifdef COMPILE_PCRE8  #if defined COMPILE_PCRE8
463      sljit_ub asbyte;      sljit_ub asbyte;
464      sljit_ub asuchars[4];      sljit_ub asuchars[4];
465  #else  #elif defined COMPILE_PCRE16
 #ifdef COMPILE_PCRE16  
466      sljit_uh asuchars[2];      sljit_uh asuchars[2];
467  #endif  #elif defined COMPILE_PCRE32
468        sljit_ui asuchars[1];
469  #endif  #endif
470    } c;    } c;
471    union {    union {
472      sljit_i asint;      sljit_si asint;
473      sljit_uh asushort;      sljit_uh asushort;
474  #ifdef COMPILE_PCRE8  #if defined COMPILE_PCRE8
475      sljit_ub asbyte;      sljit_ub asbyte;
476      sljit_ub asuchars[4];      sljit_ub asuchars[4];
477  #else  #elif defined COMPILE_PCRE16
 #ifdef COMPILE_PCRE16  
478      sljit_uh asuchars[2];      sljit_uh asuchars[2];
479  #endif  #elif defined COMPILE_PCRE32
480        sljit_ui asuchars[1];
481  #endif  #endif
482    } oc;    } oc;
483  #endif  #endif
484  } compare_context;  } compare_context;
485    
 enum {  
   frame_end = 0,  
   frame_setstrbegin = -1,  
   frame_setmark = -2  
 };  
   
486  /* Undefine sljit macros. */  /* Undefine sljit macros. */
487  #undef CMP  #undef CMP
488    
489  /* Used for accessing the elements of the stack. */  /* Used for accessing the elements of the stack. */
490  #define STACK(i)      ((-(i) - 1) * (int)sizeof(sljit_w))  #define STACK(i)      ((-(i) - 1) * (int)sizeof(sljit_sw))
491    
492  #define TMP1          SLJIT_TEMPORARY_REG1  #define TMP1          SLJIT_R0
493  #define TMP2          SLJIT_TEMPORARY_REG3  #define TMP2          SLJIT_R2
494  #define TMP3          SLJIT_TEMPORARY_EREG2  #define TMP3          SLJIT_R3
495  #define STR_PTR       SLJIT_SAVED_REG1  #define STR_PTR       SLJIT_S0
496  #define STR_END       SLJIT_SAVED_REG2  #define STR_END       SLJIT_S1
497  #define STACK_TOP     SLJIT_TEMPORARY_REG2  #define STACK_TOP     SLJIT_R1
498  #define STACK_LIMIT   SLJIT_SAVED_REG3  #define STACK_LIMIT   SLJIT_S2
499  #define ARGUMENTS     SLJIT_SAVED_EREG1  #define COUNT_MATCH   SLJIT_S3
500  #define CALL_COUNT    SLJIT_SAVED_EREG2  #define ARGUMENTS     SLJIT_S4
501  #define RETURN_ADDR   SLJIT_TEMPORARY_EREG1  #define RETURN_ADDR   SLJIT_R4
502    
503  /* Locals layout. */  /* Local space layout. */
504  /* These two locals can be used by the current opcode. */  /* These two locals can be used by the current opcode. */
505  #define LOCALS0          (0 * sizeof(sljit_w))  #define LOCALS0          (0 * sizeof(sljit_sw))
506  #define LOCALS1          (1 * sizeof(sljit_w))  #define LOCALS1          (1 * sizeof(sljit_sw))
507  /* Two local variables for possessive quantifiers (char1 cannot use them). */  /* Two local variables for possessive quantifiers (char1 cannot use them). */
508  #define POSSESSIVE0      (2 * sizeof(sljit_w))  #define POSSESSIVE0      (2 * sizeof(sljit_sw))
509  #define POSSESSIVE1      (3 * sizeof(sljit_w))  #define POSSESSIVE1      (3 * sizeof(sljit_sw))
510  /* Max limit of recursions. */  /* Max limit of recursions. */
511  #define CALL_LIMIT       (4 * sizeof(sljit_w))  #define LIMIT_MATCH      (4 * sizeof(sljit_sw))
512  /* The output vector is stored on the stack, and contains pointers  /* The output vector is stored on the stack, and contains pointers
513  to characters. The vector data is divided into two groups: the first  to characters. The vector data is divided into two groups: the first
514  group contains the start / end character pointers, and the second is  group contains the start / end character pointers, and the second is
515  the start pointers when the end of the capturing group has not yet reached. */  the start pointers when the end of the capturing group has not yet reached. */
516  #define OVECTOR_START    (common->ovector_start)  #define OVECTOR_START    (common->ovector_start)
517  #define OVECTOR(i)       (OVECTOR_START + (i) * sizeof(sljit_w))  #define OVECTOR(i)       (OVECTOR_START + (i) * (sljit_sw)sizeof(sljit_sw))
518  #define OVECTOR_PRIV(i)  (common->cbraptr + (i) * sizeof(sljit_w))  #define OVECTOR_PRIV(i)  (common->cbra_ptr + (i) * (sljit_sw)sizeof(sljit_sw))
519  #define PRIV_DATA(cc)    (common->localptrs[(cc) - common->start])  #define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start])
520    
521  #ifdef COMPILE_PCRE8  #if defined COMPILE_PCRE8
522  #define MOV_UCHAR  SLJIT_MOV_UB  #define MOV_UCHAR  SLJIT_MOV_UB
523  #define MOVU_UCHAR SLJIT_MOVU_UB  #define MOVU_UCHAR SLJIT_MOVU_UB
524  #else  #elif defined COMPILE_PCRE16
 #ifdef COMPILE_PCRE16  
525  #define MOV_UCHAR  SLJIT_MOV_UH  #define MOV_UCHAR  SLJIT_MOV_UH
526  #define MOVU_UCHAR SLJIT_MOVU_UH  #define MOVU_UCHAR SLJIT_MOVU_UH
527    #elif defined COMPILE_PCRE32
528    #define MOV_UCHAR  SLJIT_MOV_UI
529    #define MOVU_UCHAR SLJIT_MOVU_UI
530  #else  #else
531  #error Unsupported compiling mode  #error Unsupported compiling mode
532  #endif  #endif
 #endif  
533    
534  /* Shortcuts. */  /* Shortcuts. */
535  #define DEFINE_COMPILER \  #define DEFINE_COMPILER \
# Line 441  the start pointers when the end of the c Line 546  the start pointers when the end of the c
546    sljit_set_label(sljit_emit_jump(compiler, (type)), (label))    sljit_set_label(sljit_emit_jump(compiler, (type)), (label))
547  #define JUMPHERE(jump) \  #define JUMPHERE(jump) \
548    sljit_set_label((jump), sljit_emit_label(compiler))    sljit_set_label((jump), sljit_emit_label(compiler))
549    #define SET_LABEL(jump, label) \
550      sljit_set_label((jump), (label))
551  #define CMP(type, src1, src1w, src2, src2w) \  #define CMP(type, src1, src1w, src2, src2w) \
552    sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))    sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))
553  #define CMPTO(type, src1, src1w, src2, src2w, label) \  #define CMPTO(type, src1, src1w, src2, src2w, label) \
554    sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))    sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))
555  #define COND_VALUE(op, dst, dstw, type) \  #define OP_FLAGS(op, dst, dstw, src, srcw, type) \
556    sljit_emit_cond_value(compiler, (op), (dst), (dstw), (type))    sljit_emit_op_flags(compiler, (op), (dst), (dstw), (src), (srcw), (type))
557  #define GET_LOCAL_BASE(dst, dstw, offset) \  #define GET_LOCAL_BASE(dst, dstw, offset) \
558    sljit_get_local_base(compiler, (dst), (dstw), (offset))    sljit_get_local_base(compiler, (dst), (dstw), (offset))
559    
560  static pcre_uchar* bracketend(pcre_uchar* cc)  #define READ_CHAR_MAX 0x7fffffff
561    
562    static pcre_uchar *bracketend(pcre_uchar *cc)
563  {  {
564  SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));  SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
565  do cc += GET(cc, 1); while (*cc == OP_ALT);  do cc += GET(cc, 1); while (*cc == OP_ALT);
# Line 459  cc += 1 + LINK_SIZE; Line 568  cc += 1 + LINK_SIZE;
568  return cc;  return cc;
569  }  }
570    
571    static int no_alternatives(pcre_uchar *cc)
572    {
573    int count = 0;
574    SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
575    do
576      {
577      cc += GET(cc, 1);
578      count++;
579      }
580    while (*cc == OP_ALT);
581    SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
582    return count;
583    }
584    
585    static int ones_in_half_byte[16] = {
586      /* 0 */ 0, 1, 1, 2, /* 4 */ 1, 2, 2, 3,
587      /* 8 */ 1, 2, 2, 3, /* 12 */ 2, 3, 3, 4
588    };
589    
590  /* Functions whose might need modification for all new supported opcodes:  /* Functions whose might need modification for all new supported opcodes:
591   next_opcode   next_opcode
592   get_localspace   check_opcode_types
593   set_localptrs   set_private_data_ptrs
594   get_framesize   get_framesize
595   init_frame   init_frame
596   get_localsize   get_private_data_copy_length
597   copy_locals   copy_private_data
598   compile_hotpath   compile_matchingpath
599   compile_fallbackpath   compile_backtrackingpath
600  */  */
601    
602  static pcre_uchar *next_opcode(compiler_common *common, pcre_uchar *cc)  static pcre_uchar *next_opcode(compiler_common *common, pcre_uchar *cc)
# Line 489  switch(*cc) Line 617  switch(*cc)
617    case OP_WORDCHAR:    case OP_WORDCHAR:
618    case OP_ANY:    case OP_ANY:
619    case OP_ALLANY:    case OP_ALLANY:
620      case OP_NOTPROP:
621      case OP_PROP:
622    case OP_ANYNL:    case OP_ANYNL:
623    case OP_NOT_HSPACE:    case OP_NOT_HSPACE:
624    case OP_HSPACE:    case OP_HSPACE:
# Line 501  switch(*cc) Line 631  switch(*cc)
631    case OP_CIRCM:    case OP_CIRCM:
632    case OP_DOLL:    case OP_DOLL:
633    case OP_DOLLM:    case OP_DOLLM:
   case OP_TYPESTAR:  
   case OP_TYPEMINSTAR:  
   case OP_TYPEPLUS:  
   case OP_TYPEMINPLUS:  
   case OP_TYPEQUERY:  
   case OP_TYPEMINQUERY:  
   case OP_TYPEPOSSTAR:  
   case OP_TYPEPOSPLUS:  
   case OP_TYPEPOSQUERY:  
634    case OP_CRSTAR:    case OP_CRSTAR:
635    case OP_CRMINSTAR:    case OP_CRMINSTAR:
636    case OP_CRPLUS:    case OP_CRPLUS:
637    case OP_CRMINPLUS:    case OP_CRMINPLUS:
638    case OP_CRQUERY:    case OP_CRQUERY:
639    case OP_CRMINQUERY:    case OP_CRMINQUERY:
640      case OP_CRRANGE:
641      case OP_CRMINRANGE:
642      case OP_CRPOSSTAR:
643      case OP_CRPOSPLUS:
644      case OP_CRPOSQUERY:
645      case OP_CRPOSRANGE:
646      case OP_CLASS:
647      case OP_NCLASS:
648      case OP_REF:
649      case OP_REFI:
650      case OP_DNREF:
651      case OP_DNREFI:
652      case OP_RECURSE:
653      case OP_CALLOUT:
654      case OP_ALT:
655      case OP_KET:
656      case OP_KETRMAX:
657      case OP_KETRMIN:
658      case OP_KETRPOS:
659      case OP_REVERSE:
660      case OP_ASSERT:
661      case OP_ASSERT_NOT:
662      case OP_ASSERTBACK:
663      case OP_ASSERTBACK_NOT:
664      case OP_ONCE:
665      case OP_ONCE_NC:
666      case OP_BRA:
667      case OP_BRAPOS:
668      case OP_CBRA:
669      case OP_CBRAPOS:
670      case OP_COND:
671      case OP_SBRA:
672      case OP_SBRAPOS:
673      case OP_SCBRA:
674      case OP_SCBRAPOS:
675      case OP_SCOND:
676      case OP_CREF:
677      case OP_DNCREF:
678      case OP_RREF:
679      case OP_DNRREF:
680    case OP_DEF:    case OP_DEF:
681    case OP_BRAZERO:    case OP_BRAZERO:
682    case OP_BRAMINZERO:    case OP_BRAMINZERO:
683    case OP_BRAPOSZERO:    case OP_BRAPOSZERO:
684      case OP_PRUNE:
685      case OP_SKIP:
686      case OP_THEN:
687    case OP_COMMIT:    case OP_COMMIT:
688    case OP_FAIL:    case OP_FAIL:
689    case OP_ACCEPT:    case OP_ACCEPT:
690    case OP_ASSERT_ACCEPT:    case OP_ASSERT_ACCEPT:
691      case OP_CLOSE:
692    case OP_SKIPZERO:    case OP_SKIPZERO:
693    return cc + 1;    return cc + PRIV(OP_lengths)[*cc];
   
   case OP_ANYBYTE:  
 #ifdef SUPPORT_UTF  
   if (common->utf) return NULL;  
 #endif  
   return cc + 1;  
694    
695    case OP_CHAR:    case OP_CHAR:
696    case OP_CHARI:    case OP_CHARI:
# Line 543  switch(*cc) Line 702  switch(*cc)
702    case OP_MINPLUS:    case OP_MINPLUS:
703    case OP_QUERY:    case OP_QUERY:
704    case OP_MINQUERY:    case OP_MINQUERY:
705      case OP_UPTO:
706      case OP_MINUPTO:
707      case OP_EXACT:
708    case OP_POSSTAR:    case OP_POSSTAR:
709    case OP_POSPLUS:    case OP_POSPLUS:
710    case OP_POSQUERY:    case OP_POSQUERY:
711      case OP_POSUPTO:
712    case OP_STARI:    case OP_STARI:
713    case OP_MINSTARI:    case OP_MINSTARI:
714    case OP_PLUSI:    case OP_PLUSI:
715    case OP_MINPLUSI:    case OP_MINPLUSI:
716    case OP_QUERYI:    case OP_QUERYI:
717    case OP_MINQUERYI:    case OP_MINQUERYI:
718      case OP_UPTOI:
719      case OP_MINUPTOI:
720      case OP_EXACTI:
721    case OP_POSSTARI:    case OP_POSSTARI:
722    case OP_POSPLUSI:    case OP_POSPLUSI:
723    case OP_POSQUERYI:    case OP_POSQUERYI:
724      case OP_POSUPTOI:
725    case OP_NOTSTAR:    case OP_NOTSTAR:
726    case OP_NOTMINSTAR:    case OP_NOTMINSTAR:
727    case OP_NOTPLUS:    case OP_NOTPLUS:
728    case OP_NOTMINPLUS:    case OP_NOTMINPLUS:
729    case OP_NOTQUERY:    case OP_NOTQUERY:
730    case OP_NOTMINQUERY:    case OP_NOTMINQUERY:
731      case OP_NOTUPTO:
732      case OP_NOTMINUPTO:
733      case OP_NOTEXACT:
734    case OP_NOTPOSSTAR:    case OP_NOTPOSSTAR:
735    case OP_NOTPOSPLUS:    case OP_NOTPOSPLUS:
736    case OP_NOTPOSQUERY:    case OP_NOTPOSQUERY:
737      case OP_NOTPOSUPTO:
738    case OP_NOTSTARI:    case OP_NOTSTARI:
739    case OP_NOTMINSTARI:    case OP_NOTMINSTARI:
740    case OP_NOTPLUSI:    case OP_NOTPLUSI:
741    case OP_NOTMINPLUSI:    case OP_NOTMINPLUSI:
742    case OP_NOTQUERYI:    case OP_NOTQUERYI:
743    case OP_NOTMINQUERYI:    case OP_NOTMINQUERYI:
   case OP_NOTPOSSTARI:  
   case OP_NOTPOSPLUSI:  
   case OP_NOTPOSQUERYI:  
   cc += 2;  
 #ifdef SUPPORT_UTF  
   if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);  
 #endif  
   return cc;  
   
   case OP_UPTO:  
   case OP_MINUPTO:  
   case OP_EXACT:  
   case OP_POSUPTO:  
   case OP_UPTOI:  
   case OP_MINUPTOI:  
   case OP_EXACTI:  
   case OP_POSUPTOI:  
   case OP_NOTUPTO:  
   case OP_NOTMINUPTO:  
   case OP_NOTEXACT:  
   case OP_NOTPOSUPTO:  
744    case OP_NOTUPTOI:    case OP_NOTUPTOI:
745    case OP_NOTMINUPTOI:    case OP_NOTMINUPTOI:
746    case OP_NOTEXACTI:    case OP_NOTEXACTI:
747      case OP_NOTPOSSTARI:
748      case OP_NOTPOSPLUSI:
749      case OP_NOTPOSQUERYI:
750    case OP_NOTPOSUPTOI:    case OP_NOTPOSUPTOI:
751    cc += 2 + IMM2_SIZE;    cc += PRIV(OP_lengths)[*cc];
752  #ifdef SUPPORT_UTF  #ifdef SUPPORT_UTF
753    if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);    if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
754  #endif  #endif
755    return cc;    return cc;
756    
757    case OP_NOTPROP:    /* Special cases. */
758    case OP_PROP:    case OP_TYPESTAR:
759    return cc + 1 + 2;    case OP_TYPEMINSTAR:
760      case OP_TYPEPLUS:
761      case OP_TYPEMINPLUS:
762      case OP_TYPEQUERY:
763      case OP_TYPEMINQUERY:
764    case OP_TYPEUPTO:    case OP_TYPEUPTO:
765    case OP_TYPEMINUPTO:    case OP_TYPEMINUPTO:
766    case OP_TYPEEXACT:    case OP_TYPEEXACT:
767      case OP_TYPEPOSSTAR:
768      case OP_TYPEPOSPLUS:
769      case OP_TYPEPOSQUERY:
770    case OP_TYPEPOSUPTO:    case OP_TYPEPOSUPTO:
771    case OP_REF:    return cc + PRIV(OP_lengths)[*cc] - 1;
   case OP_REFI:  
   case OP_CREF:  
   case OP_NCREF:  
   case OP_RREF:  
   case OP_NRREF:  
   case OP_CLOSE:  
   cc += 1 + IMM2_SIZE;  
   return cc;  
   
   case OP_CRRANGE:  
   case OP_CRMINRANGE:  
   return cc + 1 + 2 * IMM2_SIZE;  
772    
773    case OP_CLASS:    case OP_ANYBYTE:
774    case OP_NCLASS:  #ifdef SUPPORT_UTF
775    return cc + 1 + 32 / sizeof(pcre_uchar);    if (common->utf) return NULL;
776    #endif
777      return cc + 1;
778    
779  #if defined SUPPORT_UTF || !defined COMPILE_PCRE8  #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
780    case OP_XCLASS:    case OP_XCLASS:
781    return cc + GET(cc, 1);    return cc + GET(cc, 1);
782  #endif  #endif
783    
   case OP_RECURSE:  
   case OP_ASSERT:  
   case OP_ASSERT_NOT:  
   case OP_ASSERTBACK:  
   case OP_ASSERTBACK_NOT:  
   case OP_REVERSE:  
   case OP_ONCE:  
   case OP_ONCE_NC:  
   case OP_BRA:  
   case OP_BRAPOS:  
   case OP_COND:  
   case OP_SBRA:  
   case OP_SBRAPOS:  
   case OP_SCOND:  
   case OP_ALT:  
   case OP_KET:  
   case OP_KETRMAX:  
   case OP_KETRMIN:  
   case OP_KETRPOS:  
   return cc + 1 + LINK_SIZE;  
   
   case OP_CBRA:  
   case OP_CBRAPOS:  
   case OP_SCBRA:  
   case OP_SCBRAPOS:  
   return cc + 1 + LINK_SIZE + IMM2_SIZE;  
   
784    case OP_MARK:    case OP_MARK:
785      case OP_PRUNE_ARG:
786      case OP_SKIP_ARG:
787      case OP_THEN_ARG:
788    return cc + 1 + 2 + cc[1];    return cc + 1 + 2 + cc[1];
789    
790    default:    default:
791      /* All opcodes are supported now! */
792      SLJIT_ASSERT_STOP();
793    return NULL;    return NULL;
794    }    }
795  }  }
796    
797  static int get_localspace(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)  static BOOL check_opcode_types(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
798  {  {
799  int localspace = 0;  int count;
800  pcre_uchar *alternative;  pcre_uchar *slot;
801    pcre_uchar *assert_back_end = cc - 1;
802    
803  /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */  /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
804  while (cc < ccend)  while (cc < ccend)
805    {    {
# Line 678  while (cc < ccend) Line 807  while (cc < ccend)
807      {      {
808      case OP_SET_SOM:      case OP_SET_SOM:
809      common->has_set_som = TRUE;      common->has_set_som = TRUE;
810        common->might_be_empty = TRUE;
811      cc += 1;      cc += 1;
812      break;      break;
813    
814      case OP_ASSERT:      case OP_REF:
815      case OP_ASSERT_NOT:      case OP_REFI:
816      case OP_ASSERTBACK:      common->optimized_cbracket[GET2(cc, 1)] = 0;
817      case OP_ASSERTBACK_NOT:      cc += 1 + IMM2_SIZE;
     case OP_ONCE:  
     case OP_ONCE_NC:  
     case OP_BRAPOS:  
     case OP_SBRA:  
     case OP_SBRAPOS:  
     case OP_SCOND:  
     localspace += sizeof(sljit_w);  
     cc += 1 + LINK_SIZE;  
818      break;      break;
819    
820      case OP_CBRAPOS:      case OP_CBRAPOS:
821      case OP_SCBRAPOS:      case OP_SCBRAPOS:
822      localspace += sizeof(sljit_w);      common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] = 0;
823      cc += 1 + LINK_SIZE + IMM2_SIZE;      cc += 1 + LINK_SIZE + IMM2_SIZE;
824      break;      break;
825    
826      case OP_COND:      case OP_COND:
827      /* Might be a hidden SCOND. */      case OP_SCOND:
828      alternative = cc + GET(cc, 1);      /* Only AUTO_CALLOUT can insert this opcode. We do
829      if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)         not intend to support this case. */
830        localspace += sizeof(sljit_w);      if (cc[1 + LINK_SIZE] == OP_CALLOUT)
831          return FALSE;
832      cc += 1 + LINK_SIZE;      cc += 1 + LINK_SIZE;
833      break;      break;
834    
835        case OP_CREF:
836        common->optimized_cbracket[GET2(cc, 1)] = 0;
837        cc += 1 + IMM2_SIZE;
838        break;
839    
840        case OP_DNREF:
841        case OP_DNREFI:
842        case OP_DNCREF:
843        count = GET2(cc, 1 + IMM2_SIZE);
844        slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
845        while (count-- > 0)
846          {
847          common->optimized_cbracket[GET2(slot, 0)] = 0;
848          slot += common->name_entry_size;
849          }
850        cc += 1 + 2 * IMM2_SIZE;
851        break;
852    
853      case OP_RECURSE:      case OP_RECURSE:
854      /* Set its value only once. */      /* Set its value only once. */
855      if (common->recursive_head == 0)      if (common->recursive_head_ptr == 0)
856          {
857          common->recursive_head_ptr = common->ovector_start;
858          common->ovector_start += sizeof(sljit_sw);
859          }
860        cc += 1 + LINK_SIZE;
861        break;
862    
863        case OP_CALLOUT:
864        if (common->capture_last_ptr == 0)
865        {        {
866        common->recursive_head = common->ovector_start;        common->capture_last_ptr = common->ovector_start;
867        common->ovector_start += sizeof(sljit_w);        common->ovector_start += sizeof(sljit_sw);
868        }        }
869        cc += 2 + 2 * LINK_SIZE;
870        break;
871    
872        case OP_ASSERTBACK:
873        slot = bracketend(cc);
874        if (slot > assert_back_end)
875          assert_back_end = slot;
876      cc += 1 + LINK_SIZE;      cc += 1 + LINK_SIZE;
877      break;      break;
878    
879        case OP_THEN_ARG:
880        common->has_then = TRUE;
881        common->control_head_ptr = 1;
882        /* Fall through. */
883    
884        case OP_PRUNE_ARG:
885      case OP_MARK:      case OP_MARK:
886      if (common->mark_ptr == 0)      if (common->mark_ptr == 0)
887        {        {
888        common->mark_ptr = common->ovector_start;        common->mark_ptr = common->ovector_start;
889        common->ovector_start += sizeof(sljit_w);        common->ovector_start += sizeof(sljit_sw);
890        }        }
891      cc += 1 + 2 + cc[1];      cc += 1 + 2 + cc[1];
892      break;      break;
893    
894        case OP_THEN:
895        common->has_then = TRUE;
896        common->control_head_ptr = 1;
897        cc += 1;
898        break;
899    
900        case OP_SKIP:
901        if (cc < assert_back_end)
902          common->has_skip_in_assert_back = TRUE;
903        cc += 1;
904        break;
905    
906        case OP_SKIP_ARG:
907        common->control_head_ptr = 1;
908        common->has_skip_arg = TRUE;
909        if (cc < assert_back_end)
910          common->has_skip_in_assert_back = TRUE;
911        cc += 1 + 2 + cc[1];
912        break;
913    
914      default:      default:
915      cc = next_opcode(common, cc);      cc = next_opcode(common, cc);
916      if (cc == NULL)      if (cc == NULL)
917        return -1;        return FALSE;
918      break;      break;
919      }      }
920    }    }
921  return localspace;  return TRUE;
922  }  }
923    
924  static void set_localptrs(compiler_common *common, int localptr, pcre_uchar *ccend)  static BOOL is_accelerated_repeat(pcre_uchar *cc)
925  {  {
926  pcre_uchar *cc = common->start;  switch(*cc)
 pcre_uchar *alternative;  
 while (cc < ccend)  
927    {    {
928    switch(*cc)    case OP_TYPESTAR:
929      {    case OP_TYPEMINSTAR:
930      case OP_ASSERT:    case OP_TYPEPLUS:
931      case OP_ASSERT_NOT:    case OP_TYPEMINPLUS:
932      case OP_ASSERTBACK:    case OP_TYPEPOSSTAR:
933      case OP_ASSERTBACK_NOT:    case OP_TYPEPOSPLUS:
934      case OP_ONCE:    return (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI);
935      case OP_ONCE_NC:  
936      case OP_BRAPOS:    case OP_STAR:
937      case OP_SBRA:    case OP_MINSTAR:
938      case OP_SBRAPOS:    case OP_PLUS:
939      case OP_SCOND:    case OP_MINPLUS:
940      common->localptrs[cc - common->start] = localptr;    case OP_POSSTAR:
941      localptr += sizeof(sljit_w);    case OP_POSPLUS:
942      cc += 1 + LINK_SIZE;  
943      break;    case OP_STARI:
944      case OP_MINSTARI:
945      case OP_CBRAPOS:    case OP_PLUSI:
946      case OP_SCBRAPOS:    case OP_MINPLUSI:
947      common->localptrs[cc - common->start] = localptr;    case OP_POSSTARI:
948      localptr += sizeof(sljit_w);    case OP_POSPLUSI:
949      cc += 1 + LINK_SIZE + IMM2_SIZE;  
950      break;    case OP_NOTSTAR:
951      case OP_NOTMINSTAR:
952      case OP_NOTPLUS:
953      case OP_NOTMINPLUS:
954      case OP_NOTPOSSTAR:
955      case OP_NOTPOSPLUS:
956    
957      case OP_NOTSTARI:
958      case OP_NOTMINSTARI:
959      case OP_NOTPLUSI:
960      case OP_NOTMINPLUSI:
961      case OP_NOTPOSSTARI:
962      case OP_NOTPOSPLUSI:
963      return TRUE;
964    
965      case OP_CLASS:
966      case OP_NCLASS:
967    #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
968      case OP_XCLASS:
969      cc += (*cc == OP_XCLASS) ? GET(cc, 1) : (int)(1 + (32 / sizeof(pcre_uchar)));
970    #else
971      cc += (1 + (32 / sizeof(pcre_uchar)));
972    #endif
973    
974      switch(*cc)
975        {
976        case OP_CRSTAR:
977        case OP_CRMINSTAR:
978        case OP_CRPLUS:
979        case OP_CRMINPLUS:
980        case OP_CRPOSSTAR:
981        case OP_CRPOSPLUS:
982        return TRUE;
983        }
984      break;
985      }
986    return FALSE;
987    }
988    
989    static SLJIT_INLINE void detect_fast_fail(compiler_common *common, pcre_uchar *cc, int *private_data_start, sljit_si depth)
990    {
991      pcre_uchar *next_alt;
992    
993      SLJIT_ASSERT(*cc == OP_BRA || *cc == OP_CBRA);
994    
995      if (*cc == OP_CBRA && common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
996        return;
997    
998      next_alt = bracketend(cc) - (1 + LINK_SIZE);
999      if (*next_alt != OP_KET || PRIVATE_DATA(next_alt) != 0)
1000        return;
1001    
1002      do
1003        {
1004        next_alt = cc + GET(cc, 1);
1005    
1006        cc += 1 + LINK_SIZE + ((*cc == OP_CBRA) ? IMM2_SIZE : 0);
1007    
1008        while (TRUE)
1009          {
1010          switch(*cc)
1011            {
1012            case OP_SOD:
1013            case OP_SOM:
1014            case OP_SET_SOM:
1015            case OP_NOT_WORD_BOUNDARY:
1016            case OP_WORD_BOUNDARY:
1017            case OP_EODN:
1018            case OP_EOD:
1019            case OP_CIRC:
1020            case OP_CIRCM:
1021            case OP_DOLL:
1022            case OP_DOLLM:
1023            /* Zero width assertions. */
1024            cc++;
1025            continue;
1026            }
1027          break;
1028          }
1029    
1030        if (depth > 0 && (*cc == OP_BRA || *cc == OP_CBRA))
1031          detect_fast_fail(common, cc, private_data_start, depth - 1);
1032    
1033        if (is_accelerated_repeat(cc))
1034          {
1035          common->private_data_ptrs[(cc + 1) - common->start] = *private_data_start;
1036    
1037          if (common->fast_fail_start_ptr == 0)
1038            common->fast_fail_start_ptr = *private_data_start;
1039    
1040          *private_data_start += sizeof(sljit_sw);
1041          common->fast_fail_end_ptr = *private_data_start;
1042    
1043          if (*private_data_start > SLJIT_MAX_LOCAL_SIZE)
1044            return;
1045          }
1046    
1047        cc = next_alt;
1048        }
1049      while (*cc == OP_ALT);
1050    }
1051    
1052    static int get_class_iterator_size(pcre_uchar *cc)
1053    {
1054    sljit_ui min;
1055    sljit_ui max;
1056    switch(*cc)
1057      {
1058      case OP_CRSTAR:
1059      case OP_CRPLUS:
1060      return 2;
1061    
1062      case OP_CRMINSTAR:
1063      case OP_CRMINPLUS:
1064      case OP_CRQUERY:
1065      case OP_CRMINQUERY:
1066      return 1;
1067    
1068      case OP_CRRANGE:
1069      case OP_CRMINRANGE:
1070      min = GET2(cc, 1);
1071      max = GET2(cc, 1 + IMM2_SIZE);
1072      if (max == 0)
1073        return (*cc == OP_CRRANGE) ? 2 : 1;
1074      max -= min;
1075      if (max > 2)
1076        max = 2;
1077      return max;
1078    
1079      default:
1080      return 0;
1081      }
1082    }
1083    
1084    static BOOL detect_repeat(compiler_common *common, pcre_uchar *begin)
1085    {
1086    pcre_uchar *end = bracketend(begin);
1087    pcre_uchar *next;
1088    pcre_uchar *next_end;
1089    pcre_uchar *max_end;
1090    pcre_uchar type;
1091    sljit_sw length = end - begin;
1092    int min, max, i;
1093    
1094    /* Detect fixed iterations first. */
1095    if (end[-(1 + LINK_SIZE)] != OP_KET)
1096      return FALSE;
1097    
1098    /* Already detected repeat. */
1099    if (common->private_data_ptrs[end - common->start - LINK_SIZE] != 0)
1100      return TRUE;
1101    
1102    next = end;
1103    min = 1;
1104    while (1)
1105      {
1106      if (*next != *begin)
1107        break;
1108      next_end = bracketend(next);
1109      if (next_end - next != length || memcmp(begin, next, IN_UCHARS(length)) != 0)
1110        break;
1111      next = next_end;
1112      min++;
1113      }
1114    
1115    if (min == 2)
1116      return FALSE;
1117    
1118    max = 0;
1119    max_end = next;
1120    if (*next == OP_BRAZERO || *next == OP_BRAMINZERO)
1121      {
1122      type = *next;
1123      while (1)
1124        {
1125        if (next[0] != type || next[1] != OP_BRA || next[2 + LINK_SIZE] != *begin)
1126          break;
1127        next_end = bracketend(next + 2 + LINK_SIZE);
1128        if (next_end - next != (length + 2 + LINK_SIZE) || memcmp(begin, next + 2 + LINK_SIZE, IN_UCHARS(length)) != 0)
1129          break;
1130        next = next_end;
1131        max++;
1132        }
1133    
1134      if (next[0] == type && next[1] == *begin && max >= 1)
1135        {
1136        next_end = bracketend(next + 1);
1137        if (next_end - next == (length + 1) && memcmp(begin, next + 1, IN_UCHARS(length)) == 0)
1138          {
1139          for (i = 0; i < max; i++, next_end += 1 + LINK_SIZE)
1140            if (*next_end != OP_KET)
1141              break;
1142    
1143          if (i == max)
1144            {
1145            common->private_data_ptrs[max_end - common->start - LINK_SIZE] = next_end - max_end;
1146            common->private_data_ptrs[max_end - common->start - LINK_SIZE + 1] = (type == OP_BRAZERO) ? OP_UPTO : OP_MINUPTO;
1147            /* +2 the original and the last. */
1148            common->private_data_ptrs[max_end - common->start - LINK_SIZE + 2] = max + 2;
1149            if (min == 1)
1150              return TRUE;
1151            min--;
1152            max_end -= (1 + LINK_SIZE) + GET(max_end, -LINK_SIZE);
1153            }
1154          }
1155        }
1156      }
1157    
1158    if (min >= 3)
1159      {
1160      common->private_data_ptrs[end - common->start - LINK_SIZE] = max_end - end;
1161      common->private_data_ptrs[end - common->start - LINK_SIZE + 1] = OP_EXACT;
1162      common->private_data_ptrs[end - common->start - LINK_SIZE + 2] = min;
1163      return TRUE;
1164      }
1165    
1166    return FALSE;
1167    }
1168    
1169    #define CASE_ITERATOR_PRIVATE_DATA_1 \
1170        case OP_MINSTAR: \
1171        case OP_MINPLUS: \
1172        case OP_QUERY: \
1173        case OP_MINQUERY: \
1174        case OP_MINSTARI: \
1175        case OP_MINPLUSI: \
1176        case OP_QUERYI: \
1177        case OP_MINQUERYI: \
1178        case OP_NOTMINSTAR: \
1179        case OP_NOTMINPLUS: \
1180        case OP_NOTQUERY: \
1181        case OP_NOTMINQUERY: \
1182        case OP_NOTMINSTARI: \
1183        case OP_NOTMINPLUSI: \
1184        case OP_NOTQUERYI: \
1185        case OP_NOTMINQUERYI:
1186    
1187    #define CASE_ITERATOR_PRIVATE_DATA_2A \
1188        case OP_STAR: \
1189        case OP_PLUS: \
1190        case OP_STARI: \
1191        case OP_PLUSI: \
1192        case OP_NOTSTAR: \
1193        case OP_NOTPLUS: \
1194        case OP_NOTSTARI: \
1195        case OP_NOTPLUSI:
1196    
1197    #define CASE_ITERATOR_PRIVATE_DATA_2B \
1198        case OP_UPTO: \
1199        case OP_MINUPTO: \
1200        case OP_UPTOI: \
1201        case OP_MINUPTOI: \
1202        case OP_NOTUPTO: \
1203        case OP_NOTMINUPTO: \
1204        case OP_NOTUPTOI: \
1205        case OP_NOTMINUPTOI:
1206    
1207    #define CASE_ITERATOR_TYPE_PRIVATE_DATA_1 \
1208        case OP_TYPEMINSTAR: \
1209        case OP_TYPEMINPLUS: \
1210        case OP_TYPEQUERY: \
1211        case OP_TYPEMINQUERY:
1212    
1213    #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2A \
1214        case OP_TYPESTAR: \
1215        case OP_TYPEPLUS:
1216    
1217    #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2B \
1218        case OP_TYPEUPTO: \
1219        case OP_TYPEMINUPTO:
1220    
1221    static void set_private_data_ptrs(compiler_common *common, int *private_data_start, pcre_uchar *ccend)
1222    {
1223    pcre_uchar *cc = common->start;
1224    pcre_uchar *alternative;
1225    pcre_uchar *end = NULL;
1226    int private_data_ptr = *private_data_start;
1227    int space, size, bracketlen;
1228    BOOL repeat_check = TRUE;
1229    
1230    while (cc < ccend)
1231      {
1232      space = 0;
1233      size = 0;
1234      bracketlen = 0;
1235      if (private_data_ptr > SLJIT_MAX_LOCAL_SIZE)
1236        break;
1237    
1238      if (repeat_check && (*cc == OP_ONCE || *cc == OP_ONCE_NC || *cc == OP_BRA || *cc == OP_CBRA || *cc == OP_COND))
1239        {
1240        if (detect_repeat(common, cc))
1241          {
1242          /* These brackets are converted to repeats, so no global
1243          based single character repeat is allowed. */
1244          if (cc >= end)
1245            end = bracketend(cc);
1246          }
1247        }
1248      repeat_check = TRUE;
1249    
1250      switch(*cc)
1251        {
1252        case OP_KET:
1253        if (common->private_data_ptrs[cc + 1 - common->start] != 0)
1254          {
1255          common->private_data_ptrs[cc - common->start] = private_data_ptr;
1256          private_data_ptr += sizeof(sljit_sw);
1257          cc += common->private_data_ptrs[cc + 1 - common->start];
1258          }
1259        cc += 1 + LINK_SIZE;
1260        break;
1261    
1262        case OP_ASSERT:
1263        case OP_ASSERT_NOT:
1264        case OP_ASSERTBACK:
1265        case OP_ASSERTBACK_NOT:
1266        case OP_ONCE:
1267        case OP_ONCE_NC:
1268        case OP_BRAPOS:
1269        case OP_SBRA:
1270        case OP_SBRAPOS:
1271        case OP_SCOND:
1272        common->private_data_ptrs[cc - common->start] = private_data_ptr;
1273        private_data_ptr += sizeof(sljit_sw);
1274        bracketlen = 1 + LINK_SIZE;
1275        break;
1276    
1277        case OP_CBRAPOS:
1278        case OP_SCBRAPOS:
1279        common->private_data_ptrs[cc - common->start] = private_data_ptr;
1280        private_data_ptr += sizeof(sljit_sw);
1281        bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1282        break;
1283    
1284      case OP_COND:      case OP_COND:
1285      /* Might be a hidden SCOND. */      /* Might be a hidden SCOND. */
1286      alternative = cc + GET(cc, 1);      alternative = cc + GET(cc, 1);
1287      if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)      if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1288        {        {
1289        common->localptrs[cc - common->start] = localptr;        common->private_data_ptrs[cc - common->start] = private_data_ptr;
1290        localptr += sizeof(sljit_w);        private_data_ptr += sizeof(sljit_sw);
1291        }        }
1292      cc += 1 + LINK_SIZE;      bracketlen = 1 + LINK_SIZE;
1293        break;
1294    
1295        case OP_BRA:
1296        bracketlen = 1 + LINK_SIZE;
1297        break;
1298    
1299        case OP_CBRA:
1300        case OP_SCBRA:
1301        bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1302        break;
1303    
1304        case OP_BRAZERO:
1305        case OP_BRAMINZERO:
1306        case OP_BRAPOSZERO:
1307        repeat_check = FALSE;
1308        size = 1;
1309        break;
1310    
1311        CASE_ITERATOR_PRIVATE_DATA_1
1312        space = 1;
1313        size = -2;
1314        break;
1315    
1316        CASE_ITERATOR_PRIVATE_DATA_2A
1317        space = 2;
1318        size = -2;
1319        break;
1320    
1321        CASE_ITERATOR_PRIVATE_DATA_2B
1322        space = 2;
1323        size = -(2 + IMM2_SIZE);
1324        break;
1325    
1326        CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1327        space = 1;
1328        size = 1;
1329        break;
1330    
1331        CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1332        if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
1333          space = 2;
1334        size = 1;
1335        break;
1336    
1337        case OP_TYPEUPTO:
1338        if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
1339          space = 2;
1340        size = 1 + IMM2_SIZE;
1341        break;
1342    
1343        case OP_TYPEMINUPTO:
1344        space = 2;
1345        size = 1 + IMM2_SIZE;
1346        break;
1347    
1348        case OP_CLASS:
1349        case OP_NCLASS:
1350        size += 1 + 32 / sizeof(pcre_uchar);
1351        space = get_class_iterator_size(cc + size);
1352        break;
1353    
1354    #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1355        case OP_XCLASS:
1356        size = GET(cc, 1);
1357        space = get_class_iterator_size(cc + size);
1358      break;      break;
1359    #endif
1360    
1361      default:      default:
1362      cc = next_opcode(common, cc);      cc = next_opcode(common, cc);
1363      SLJIT_ASSERT(cc != NULL);      SLJIT_ASSERT(cc != NULL);
1364      break;      break;
1365      }      }
1366    
1367      /* Character iterators, which are not inside a repeated bracket,
1368         gets a private slot instead of allocating it on the stack. */
1369      if (space > 0 && cc >= end)
1370        {
1371        common->private_data_ptrs[cc - common->start] = private_data_ptr;
1372        private_data_ptr += sizeof(sljit_sw) * space;
1373        }
1374    
1375      if (size != 0)
1376        {
1377        if (size < 0)
1378          {
1379          cc += -size;
1380    #ifdef SUPPORT_UTF
1381          if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1382    #endif
1383          }
1384        else
1385          cc += size;
1386        }
1387    
1388      if (bracketlen > 0)
1389        {
1390        if (cc >= end)
1391          {
1392          end = bracketend(cc);
1393          if (end[-1 - LINK_SIZE] == OP_KET)
1394            end = NULL;
1395          }
1396        cc += bracketlen;
1397        }
1398    }    }
1399    *private_data_start = private_data_ptr;
1400  }  }
1401    
1402  /* Returns with -1 if no need for frame. */  /* Returns with a frame_types (always < 0) if no need for frame. */
1403  static int get_framesize(compiler_common *common, pcre_uchar *cc, BOOL recursive)  static int get_framesize(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL recursive, BOOL *needs_control_head)
1404  {  {
 pcre_uchar *ccend = bracketend(cc);  
1405  int length = 0;  int length = 0;
1406  BOOL possessive = FALSE;  int possessive = 0;
1407    BOOL stack_restore = FALSE;
1408  BOOL setsom_found = recursive;  BOOL setsom_found = recursive;
1409  BOOL setmark_found = recursive;  BOOL setmark_found = recursive;
1410    /* The last capture is a local variable even for recursions. */
1411    BOOL capture_last_found = FALSE;
1412    
1413    #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
1414    SLJIT_ASSERT(common->control_head_ptr != 0);
1415    *needs_control_head = TRUE;
1416    #else
1417    *needs_control_head = FALSE;
1418    #endif
1419    
1420  if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))  if (ccend == NULL)
1421    {    {
1422    length = 3;    ccend = bracketend(cc) - (1 + LINK_SIZE);
1423    possessive = TRUE;    if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))
1424        {
1425        possessive = length = (common->capture_last_ptr != 0) ? 5 : 3;
1426        /* This is correct regardless of common->capture_last_ptr. */
1427        capture_last_found = TRUE;
1428        }
1429      cc = next_opcode(common, cc);
1430    }    }
1431    
 cc = next_opcode(common, cc);  
1432  SLJIT_ASSERT(cc != NULL);  SLJIT_ASSERT(cc != NULL);
1433  while (cc < ccend)  while (cc < ccend)
1434    switch(*cc)    switch(*cc)
1435      {      {
1436      case OP_SET_SOM:      case OP_SET_SOM:
1437      SLJIT_ASSERT(common->has_set_som);      SLJIT_ASSERT(common->has_set_som);
1438        stack_restore = TRUE;
1439      if (!setsom_found)      if (!setsom_found)
1440        {        {
1441        length += 2;        length += 2;
# Line 818  while (cc < ccend) Line 1445  while (cc < ccend)
1445      break;      break;
1446    
1447      case OP_MARK:      case OP_MARK:
1448        case OP_PRUNE_ARG:
1449        case OP_THEN_ARG:
1450      SLJIT_ASSERT(common->mark_ptr != 0);      SLJIT_ASSERT(common->mark_ptr != 0);
1451        stack_restore = TRUE;
1452      if (!setmark_found)      if (!setmark_found)
1453        {        {
1454        length += 2;        length += 2;
1455        setmark_found = TRUE;        setmark_found = TRUE;
1456        }        }
1457        if (common->control_head_ptr != 0)
1458          *needs_control_head = TRUE;
1459      cc += 1 + 2 + cc[1];      cc += 1 + 2 + cc[1];
1460      break;      break;
1461    
1462      case OP_RECURSE:      case OP_RECURSE:
1463        stack_restore = TRUE;
1464      if (common->has_set_som && !setsom_found)      if (common->has_set_som && !setsom_found)
1465        {        {
1466        length += 2;        length += 2;
# Line 838  while (cc < ccend) Line 1471  while (cc < ccend)
1471        length += 2;        length += 2;
1472        setmark_found = TRUE;        setmark_found = TRUE;
1473        }        }
1474        if (common->capture_last_ptr != 0 && !capture_last_found)
1475          {
1476          length += 2;
1477          capture_last_found = TRUE;
1478          }
1479      cc += 1 + LINK_SIZE;      cc += 1 + LINK_SIZE;
1480      break;      break;
1481    
# Line 845  while (cc < ccend) Line 1483  while (cc < ccend)
1483      case OP_CBRAPOS:      case OP_CBRAPOS:
1484      case OP_SCBRA:      case OP_SCBRA:
1485      case OP_SCBRAPOS:      case OP_SCBRAPOS:
1486        stack_restore = TRUE;
1487        if (common->capture_last_ptr != 0 && !capture_last_found)
1488          {
1489          length += 2;
1490          capture_last_found = TRUE;
1491          }
1492      length += 3;      length += 3;
1493      cc += 1 + LINK_SIZE + IMM2_SIZE;      cc += 1 + LINK_SIZE + IMM2_SIZE;
1494      break;      break;
1495    
1496      default:      case OP_THEN:
1497      cc = next_opcode(common, cc);      stack_restore = TRUE;
1498      SLJIT_ASSERT(cc != NULL);      if (common->control_head_ptr != 0)
1499          *needs_control_head = TRUE;
1500        cc ++;
1501      break;      break;
     }  
1502    
1503  /* Possessive quantifiers can use a special case. */      default:
1504  if (SLJIT_UNLIKELY(possessive) && length == 3)      stack_restore = TRUE;
1505    return -1;      /* Fall through. */
   
 if (length > 0)  
   return length + 1;  
 return -1;  
 }  
1506    
1507  static void init_frame(compiler_common *common, pcre_uchar *cc, int stackpos, int stacktop, BOOL recursive)      case OP_NOT_WORD_BOUNDARY:
1508  {      case OP_WORD_BOUNDARY:
1509        case OP_NOT_DIGIT:
1510        case OP_DIGIT:
1511        case OP_NOT_WHITESPACE:
1512        case OP_WHITESPACE:
1513        case OP_NOT_WORDCHAR:
1514        case OP_WORDCHAR:
1515        case OP_ANY:
1516        case OP_ALLANY:
1517        case OP_ANYBYTE:
1518        case OP_NOTPROP:
1519        case OP_PROP:
1520        case OP_ANYNL:
1521        case OP_NOT_HSPACE:
1522        case OP_HSPACE:
1523        case OP_NOT_VSPACE:
1524        case OP_VSPACE:
1525        case OP_EXTUNI:
1526        case OP_EODN:
1527        case OP_EOD:
1528        case OP_CIRC:
1529        case OP_CIRCM:
1530        case OP_DOLL:
1531        case OP_DOLLM:
1532        case OP_CHAR:
1533        case OP_CHARI:
1534        case OP_NOT:
1535        case OP_NOTI:
1536    
1537        case OP_EXACT:
1538        case OP_POSSTAR:
1539        case OP_POSPLUS:
1540        case OP_POSQUERY:
1541        case OP_POSUPTO:
1542    
1543        case OP_EXACTI:
1544        case OP_POSSTARI:
1545        case OP_POSPLUSI:
1546        case OP_POSQUERYI:
1547        case OP_POSUPTOI:
1548    
1549        case OP_NOTEXACT:
1550        case OP_NOTPOSSTAR:
1551        case OP_NOTPOSPLUS:
1552        case OP_NOTPOSQUERY:
1553        case OP_NOTPOSUPTO:
1554    
1555        case OP_NOTEXACTI:
1556        case OP_NOTPOSSTARI:
1557        case OP_NOTPOSPLUSI:
1558        case OP_NOTPOSQUERYI:
1559        case OP_NOTPOSUPTOI:
1560    
1561        case OP_TYPEEXACT:
1562        case OP_TYPEPOSSTAR:
1563        case OP_TYPEPOSPLUS:
1564        case OP_TYPEPOSQUERY:
1565        case OP_TYPEPOSUPTO:
1566    
1567        case OP_CLASS:
1568        case OP_NCLASS:
1569        case OP_XCLASS:
1570        case OP_CALLOUT:
1571    
1572        cc = next_opcode(common, cc);
1573        SLJIT_ASSERT(cc != NULL);
1574        break;
1575        }
1576    
1577    /* Possessive quantifiers can use a special case. */
1578    if (SLJIT_UNLIKELY(possessive == length))
1579      return stack_restore ? no_frame : no_stack;
1580    
1581    if (length > 0)
1582      return length + 1;
1583    return stack_restore ? no_frame : no_stack;
1584    }
1585    
1586    static void init_frame(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, int stackpos, int stacktop, BOOL recursive)
1587    {
1588  DEFINE_COMPILER;  DEFINE_COMPILER;
 pcre_uchar *ccend = bracketend(cc);  
1589  BOOL setsom_found = recursive;  BOOL setsom_found = recursive;
1590  BOOL setmark_found = recursive;  BOOL setmark_found = recursive;
1591    /* The last capture is a local variable even for recursions. */
1592    BOOL capture_last_found = FALSE;
1593  int offset;  int offset;
1594    
1595  /* >= 1 + shortest item size (2) */  /* >= 1 + shortest item size (2) */
# Line 877  SLJIT_UNUSED_ARG(stacktop); Line 1597  SLJIT_UNUSED_ARG(stacktop);
1597  SLJIT_ASSERT(stackpos >= stacktop + 2);  SLJIT_ASSERT(stackpos >= stacktop + 2);
1598    
1599  stackpos = STACK(stackpos);  stackpos = STACK(stackpos);
1600  if (recursive || (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS))  if (ccend == NULL)
1601    cc = next_opcode(common, cc);    {
1602      ccend = bracketend(cc) - (1 + LINK_SIZE);
1603      if (recursive || (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS))
1604        cc = next_opcode(common, cc);
1605      }
1606    
1607  SLJIT_ASSERT(cc != NULL);  SLJIT_ASSERT(cc != NULL);
1608  while (cc < ccend)  while (cc < ccend)
1609    switch(*cc)    switch(*cc)
# Line 887  while (cc < ccend) Line 1612  while (cc < ccend)
1612      SLJIT_ASSERT(common->has_set_som);      SLJIT_ASSERT(common->has_set_som);
1613      if (!setsom_found)      if (!setsom_found)
1614        {        {
1615        OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));        OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
1616        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_setstrbegin);        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1617        stackpos += (int)sizeof(sljit_w);        stackpos += (int)sizeof(sljit_sw);
1618        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1619        stackpos += (int)sizeof(sljit_w);        stackpos += (int)sizeof(sljit_sw);
1620        setsom_found = TRUE;        setsom_found = TRUE;
1621        }        }
1622      cc += 1;      cc += 1;
1623      break;      break;
1624    
1625      case OP_MARK:      case OP_MARK:
1626        case OP_PRUNE_ARG:
1627        case OP_THEN_ARG:
1628      SLJIT_ASSERT(common->mark_ptr != 0);      SLJIT_ASSERT(common->mark_ptr != 0);
1629      if (!setmark_found)      if (!setmark_found)
1630        {        {
1631        OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);        OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
1632        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_setmark);        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1633        stackpos += (int)sizeof(sljit_w);        stackpos += (int)sizeof(sljit_sw);
1634        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1635        stackpos += (int)sizeof(sljit_w);        stackpos += (int)sizeof(sljit_sw);
1636        setmark_found = TRUE;        setmark_found = TRUE;
1637        }        }
1638      cc += 1 + 2 + cc[1];      cc += 1 + 2 + cc[1];
# Line 914  while (cc < ccend) Line 1641  while (cc < ccend)
1641      case OP_RECURSE:      case OP_RECURSE:
1642      if (common->has_set_som && !setsom_found)      if (common->has_set_som && !setsom_found)
1643        {        {
1644        OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));        OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
1645        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_setstrbegin);        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1646        stackpos += (int)sizeof(sljit_w);        stackpos += (int)sizeof(sljit_sw);
1647        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1648        stackpos += (int)sizeof(sljit_w);        stackpos += (int)sizeof(sljit_sw);
1649        setsom_found = TRUE;        setsom_found = TRUE;
1650        }        }
1651      if (common->mark_ptr != 0 && !setmark_found)      if (common->mark_ptr != 0 && !setmark_found)
1652        {        {
1653        OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);        OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
1654        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_setmark);        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1655        stackpos += (int)sizeof(sljit_w);        stackpos += (int)sizeof(sljit_sw);
1656        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1657        stackpos += (int)sizeof(sljit_w);        stackpos += (int)sizeof(sljit_sw);
1658        setmark_found = TRUE;        setmark_found = TRUE;
1659        }        }
1660        if (common->capture_last_ptr != 0 && !capture_last_found)
1661          {
1662          OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
1663          OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1664          stackpos += (int)sizeof(sljit_sw);
1665          OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1666          stackpos += (int)sizeof(sljit_sw);
1667          capture_last_found = TRUE;
1668          }
1669      cc += 1 + LINK_SIZE;      cc += 1 + LINK_SIZE;
1670      break;      break;
1671    
# Line 937  while (cc < ccend) Line 1673  while (cc < ccend)
1673      case OP_CBRAPOS:      case OP_CBRAPOS:
1674      case OP_SCBRA:      case OP_SCBRA:
1675      case OP_SCBRAPOS:      case OP_SCBRAPOS:
1676        if (common->capture_last_ptr != 0 && !capture_last_found)
1677          {
1678          OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
1679          OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1680          stackpos += (int)sizeof(sljit_sw);
1681          OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1682          stackpos += (int)sizeof(sljit_sw);
1683          capture_last_found = TRUE;
1684          }
1685      offset = (GET2(cc, 1 + LINK_SIZE)) << 1;      offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
1686      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));
1687      stackpos += (int)sizeof(sljit_w);      stackpos += (int)sizeof(sljit_sw);
1688      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
1689      OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));      OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
1690      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1691      stackpos += (int)sizeof(sljit_w);      stackpos += (int)sizeof(sljit_sw);
1692      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
1693      stackpos += (int)sizeof(sljit_w);      stackpos += (int)sizeof(sljit_sw);
1694    
1695      cc += 1 + LINK_SIZE + IMM2_SIZE;      cc += 1 + LINK_SIZE + IMM2_SIZE;
1696      break;      break;
# Line 956  while (cc < ccend) Line 1701  while (cc < ccend)
1701      break;      break;
1702      }      }
1703    
1704  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_end);  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, 0);
1705  SLJIT_ASSERT(stackpos == STACK(stacktop));  SLJIT_ASSERT(stackpos == STACK(stacktop));
1706  }  }
1707    
1708  static SLJIT_INLINE int get_localsize(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)  static SLJIT_INLINE int get_private_data_copy_length(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL needs_control_head)
1709  {  {
1710  int localsize = 2;  int private_data_length = needs_control_head ? 3 : 2;
1711    int size;
1712  pcre_uchar *alternative;  pcre_uchar *alternative;
1713  /* Calculate the sum of the local variables. */  /* Calculate the sum of the private machine words. */
1714  while (cc < ccend)  while (cc < ccend)
1715    {    {
1716      size = 0;
1717    switch(*cc)    switch(*cc)
1718      {      {
1719        case OP_KET:
1720        if (PRIVATE_DATA(cc) != 0)
1721          {
1722          private_data_length++;
1723          SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0);
1724          cc += PRIVATE_DATA(cc + 1);
1725          }
1726        cc += 1 + LINK_SIZE;
1727        break;
1728    
1729      case OP_ASSERT:      case OP_ASSERT:
1730      case OP_ASSERT_NOT:      case OP_ASSERT_NOT:
1731      case OP_ASSERTBACK:      case OP_ASSERTBACK:
# Line 979  while (cc < ccend) Line 1736  while (cc < ccend)
1736      case OP_SBRA:      case OP_SBRA:
1737      case OP_SBRAPOS:      case OP_SBRAPOS:
1738      case OP_SCOND:      case OP_SCOND:
1739      localsize++;      private_data_length++;
1740        SLJIT_ASSERT(PRIVATE_DATA(cc) != 0);
1741      cc += 1 + LINK_SIZE;      cc += 1 + LINK_SIZE;
1742      break;      break;
1743    
1744      case OP_CBRA:      case OP_CBRA:
1745      case OP_SCBRA:      case OP_SCBRA:
1746      localsize++;      if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1747          private_data_length++;
1748      cc += 1 + LINK_SIZE + IMM2_SIZE;      cc += 1 + LINK_SIZE + IMM2_SIZE;
1749      break;      break;
1750    
1751      case OP_CBRAPOS:      case OP_CBRAPOS:
1752      case OP_SCBRAPOS:      case OP_SCBRAPOS:
1753      localsize += 2;      private_data_length += 2;
1754      cc += 1 + LINK_SIZE + IMM2_SIZE;      cc += 1 + LINK_SIZE + IMM2_SIZE;
1755      break;      break;
1756    
# Line 999  while (cc < ccend) Line 1758  while (cc < ccend)
1758      /* Might be a hidden SCOND. */      /* Might be a hidden SCOND. */
1759      alternative = cc + GET(cc, 1);      alternative = cc + GET(cc, 1);
1760      if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)      if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1761        localsize++;        private_data_length++;
1762      cc += 1 + LINK_SIZE;      cc += 1 + LINK_SIZE;
1763      break;      break;
1764    
1765        CASE_ITERATOR_PRIVATE_DATA_1
1766        if (PRIVATE_DATA(cc))
1767          private_data_length++;
1768        cc += 2;
1769    #ifdef SUPPORT_UTF
1770        if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1771    #endif
1772        break;
1773    
1774        CASE_ITERATOR_PRIVATE_DATA_2A
1775        if (PRIVATE_DATA(cc))
1776          private_data_length += 2;
1777        cc += 2;
1778    #ifdef SUPPORT_UTF
1779        if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1780    #endif
1781        break;
1782    
1783        CASE_ITERATOR_PRIVATE_DATA_2B
1784        if (PRIVATE_DATA(cc))
1785          private_data_length += 2;
1786        cc += 2 + IMM2_SIZE;
1787    #ifdef SUPPORT_UTF
1788        if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1789    #endif
1790        break;
1791    
1792        CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1793        if (PRIVATE_DATA(cc))
1794          private_data_length++;
1795        cc += 1;
1796        break;
1797    
1798        CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1799        if (PRIVATE_DATA(cc))
1800          private_data_length += 2;
1801        cc += 1;
1802        break;
1803    
1804        CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1805        if (PRIVATE_DATA(cc))
1806          private_data_length += 2;
1807        cc += 1 + IMM2_SIZE;
1808        break;
1809    
1810        case OP_CLASS:
1811        case OP_NCLASS:
1812    #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1813        case OP_XCLASS:
1814        size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1815    #else
1816        size = 1 + 32 / (int)sizeof(pcre_uchar);
1817    #endif
1818        if (PRIVATE_DATA(cc))
1819          private_data_length += get_class_iterator_size(cc + size);
1820        cc += size;
1821        break;
1822    
1823      default:      default:
1824      cc = next_opcode(common, cc);      cc = next_opcode(common, cc);
1825      SLJIT_ASSERT(cc != NULL);      SLJIT_ASSERT(cc != NULL);
# Line 1010  while (cc < ccend) Line 1827  while (cc < ccend)
1827      }      }
1828    }    }
1829  SLJIT_ASSERT(cc == ccend);  SLJIT_ASSERT(cc == ccend);
1830  return localsize;  return private_data_length;
1831  }  }
1832    
1833  static void copy_locals(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend,  static void copy_private_data(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend,
1834    BOOL save, int stackptr, int stacktop)    BOOL save, int stackptr, int stacktop, BOOL needs_control_head)
1835  {  {
1836  DEFINE_COMPILER;  DEFINE_COMPILER;
1837  int srcw[2];  int srcw[2];
1838  int count;  int count, size;
1839  BOOL tmp1next = TRUE;  BOOL tmp1next = TRUE;
1840  BOOL tmp1empty = TRUE;  BOOL tmp1empty = TRUE;
1841  BOOL tmp2empty = TRUE;  BOOL tmp2empty = TRUE;
# Line 1035  stacktop = STACK(stacktop - 1); Line 1852  stacktop = STACK(stacktop - 1);
1852    
1853  if (!save)  if (!save)
1854    {    {
1855    stackptr += sizeof(sljit_w);    stackptr += (needs_control_head ? 2 : 1) * sizeof(sljit_sw);
1856    if (stackptr < stacktop)    if (stackptr < stacktop)
1857      {      {
1858      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1859      stackptr += sizeof(sljit_w);      stackptr += sizeof(sljit_sw);
1860      tmp1empty = FALSE;      tmp1empty = FALSE;
1861      }      }
1862    if (stackptr < stacktop)    if (stackptr < stacktop)
1863      {      {
1864      OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);      OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1865      stackptr += sizeof(sljit_w);      stackptr += sizeof(sljit_sw);
1866      tmp2empty = FALSE;      tmp2empty = FALSE;
1867      }      }
1868    /* The tmp1next must be TRUE in either way. */    /* The tmp1next must be TRUE in either way. */
1869    }    }
1870    
1871  while (status != end)  do
1872    {    {
1873    count = 0;    count = 0;
1874    switch(status)    switch(status)
1875      {      {
1876      case start:      case start:
1877      SLJIT_ASSERT(save && common->recursive_head != 0);      SLJIT_ASSERT(save && common->recursive_head_ptr != 0);
1878      count = 1;      count = 1;
1879      srcw[0] = common->recursive_head;      srcw[0] = common->recursive_head_ptr;
1880        if (needs_control_head)
1881          {
1882          SLJIT_ASSERT(common->control_head_ptr != 0);
1883          count = 2;
1884          srcw[1] = common->control_head_ptr;
1885          }
1886      status = loop;      status = loop;
1887      break;      break;
1888    
# Line 1072  while (status != end) Line 1895  while (status != end)
1895    
1896      switch(*cc)      switch(*cc)
1897        {        {
1898          case OP_KET:
1899          if (PRIVATE_DATA(cc) != 0)
1900            {
1901            count = 1;
1902            srcw[0] = PRIVATE_DATA(cc);
1903            SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0);
1904            cc += PRIVATE_DATA(cc + 1);
1905            }
1906          cc += 1 + LINK_SIZE;
1907          break;
1908    
1909        case OP_ASSERT:        case OP_ASSERT:
1910        case OP_ASSERT_NOT:        case OP_ASSERT_NOT:
1911        case OP_ASSERTBACK:        case OP_ASSERTBACK:
# Line 1083  while (status != end) Line 1917  while (status != end)
1917        case OP_SBRAPOS:        case OP_SBRAPOS:
1918        case OP_SCOND:        case OP_SCOND:
1919        count = 1;        count = 1;
1920        srcw[0] = PRIV_DATA(cc);        srcw[0] = PRIVATE_DATA(cc);
1921        SLJIT_ASSERT(srcw[0] != 0);        SLJIT_ASSERT(srcw[0] != 0);
1922        cc += 1 + LINK_SIZE;        cc += 1 + LINK_SIZE;
1923        break;        break;
1924    
1925        case OP_CBRA:        case OP_CBRA:
1926        case OP_SCBRA:        case OP_SCBRA:
1927        count = 1;        if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1928        srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));          {
1929            count = 1;
1930            srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1931            }
1932        cc += 1 + LINK_SIZE + IMM2_SIZE;        cc += 1 + LINK_SIZE + IMM2_SIZE;
1933        break;        break;
1934    
1935        case OP_CBRAPOS:        case OP_CBRAPOS:
1936        case OP_SCBRAPOS:        case OP_SCBRAPOS:
1937        count = 2;        count = 2;
1938          srcw[0] = PRIVATE_DATA(cc);
1939        srcw[1] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));        srcw[1] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1940        srcw[0] = PRIV_DATA(cc);        SLJIT_ASSERT(srcw[0] != 0 && srcw[1] != 0);
       SLJIT_ASSERT(srcw[0] != 0);  
1941        cc += 1 + LINK_SIZE + IMM2_SIZE;        cc += 1 + LINK_SIZE + IMM2_SIZE;
1942        break;        break;
1943    
# Line 1110  while (status != end) Line 1947  while (status != end)
1947        if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)        if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1948          {          {
1949          count = 1;          count = 1;
1950          srcw[0] = PRIV_DATA(cc);          srcw[0] = PRIVATE_DATA(cc);
1951          SLJIT_ASSERT(srcw[0] != 0);          SLJIT_ASSERT(srcw[0] != 0);
1952          }          }
1953        cc += 1 + LINK_SIZE;        cc += 1 + LINK_SIZE;
1954        break;        break;
1955    
1956          CASE_ITERATOR_PRIVATE_DATA_1
1957          if (PRIVATE_DATA(cc))
1958            {
1959            count = 1;
1960            srcw[0] = PRIVATE_DATA(cc);
1961            }
1962          cc += 2;
1963    #ifdef SUPPORT_UTF
1964          if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1965    #endif
1966          break;
1967    
1968          CASE_ITERATOR_PRIVATE_DATA_2A
1969          if (PRIVATE_DATA(cc))
1970            {
1971            count = 2;
1972            srcw[0] = PRIVATE_DATA(cc);
1973            srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
1974            }
1975          cc += 2;
1976    #ifdef SUPPORT_UTF
1977          if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1978    #endif
1979          break;
1980    
1981          CASE_ITERATOR_PRIVATE_DATA_2B
1982          if (PRIVATE_DATA(cc))
1983            {
1984            count = 2;
1985            srcw[0] = PRIVATE_DATA(cc);
1986            srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
1987            }
1988          cc += 2 + IMM2_SIZE;
1989    #ifdef SUPPORT_UTF
1990          if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1991    #endif
1992          break;
1993    
1994          CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1995          if (PRIVATE_DATA(cc))
1996            {
1997            count = 1;
1998            srcw[0] = PRIVATE_DATA(cc);
1999            }
2000          cc += 1;
2001          break;
2002    
2003          CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
2004          if (PRIVATE_DATA(cc))
2005            {
2006            count = 2;
2007            srcw[0] = PRIVATE_DATA(cc);
2008            srcw[1] = srcw[0] + sizeof(sljit_sw);
2009            }
2010          cc += 1;
2011          break;
2012    
2013          CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
2014          if (PRIVATE_DATA(cc))
2015            {
2016            count = 2;
2017            srcw[0] = PRIVATE_DATA(cc);
2018            srcw[1] = srcw[0] + sizeof(sljit_sw);
2019            }
2020          cc += 1 + IMM2_SIZE;
2021          break;
2022    
2023          case OP_CLASS:
2024          case OP_NCLASS:
2025    #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
2026          case OP_XCLASS:
2027          size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
2028    #else
2029          size = 1 + 32 / (int)sizeof(pcre_uchar);
2030    #endif
2031          if (PRIVATE_DATA(cc))
2032            switch(get_class_iterator_size(cc + size))
2033              {
2034              case 1:
2035              count = 1;
2036              srcw[0] = PRIVATE_DATA(cc);
2037              break;
2038    
2039              case 2:
2040              count = 2;
2041              srcw[0] = PRIVATE_DATA(cc);
2042              srcw[1] = srcw[0] + sizeof(sljit_sw);
2043              break;
2044    
2045              default:
2046              SLJIT_ASSERT_STOP();
2047              break;
2048              }
2049          cc += size;
2050          break;
2051    
2052        default:        default:
2053        cc = next_opcode(common, cc);        cc = next_opcode(common, cc);
2054        SLJIT_ASSERT(cc != NULL);        SLJIT_ASSERT(cc != NULL);
# Line 1138  while (status != end) Line 2071  while (status != end)
2071          if (!tmp1empty)          if (!tmp1empty)
2072            {            {
2073            OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);            OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
2074            stackptr += sizeof(sljit_w);            stackptr += sizeof(sljit_sw);
2075            }            }
2076          OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);          OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), srcw[count]);
2077          tmp1empty = FALSE;          tmp1empty = FALSE;
2078          tmp1next = FALSE;          tmp1next = FALSE;
2079          }          }
# Line 1149  while (status != end) Line 2082  while (status != end)
2082          if (!tmp2empty)          if (!tmp2empty)
2083            {            {
2084            OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);            OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
2085            stackptr += sizeof(sljit_w);            stackptr += sizeof(sljit_sw);
2086            }            }
2087          OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);          OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), srcw[count]);
2088          tmp2empty = FALSE;          tmp2empty = FALSE;
2089          tmp1next = TRUE;          tmp1next = TRUE;
2090          }          }
# Line 1161  while (status != end) Line 2094  while (status != end)
2094        if (tmp1next)        if (tmp1next)
2095          {          {
2096          SLJIT_ASSERT(!tmp1empty);          SLJIT_ASSERT(!tmp1empty);
2097          OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP1, 0);          OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), srcw[count], TMP1, 0);
2098          tmp1empty = stackptr >= stacktop;          tmp1empty = stackptr >= stacktop;
2099          if (!tmp1empty)          if (!tmp1empty)
2100            {            {
2101            OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);            OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
2102            stackptr += sizeof(sljit_w);            stackptr += sizeof(sljit_sw);
2103            }            }
2104          tmp1next = FALSE;          tmp1next = FALSE;
2105          }          }
2106        else        else
2107          {          {
2108          SLJIT_ASSERT(!tmp2empty);          SLJIT_ASSERT(!tmp2empty);
2109          OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP2, 0);          OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), srcw[count], TMP2, 0);
2110          tmp2empty = stackptr >= stacktop;          tmp2empty = stackptr >= stacktop;
2111          if (!tmp2empty)          if (!tmp2empty)
2112            {            {
2113            OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);            OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
2114            stackptr += sizeof(sljit_w);            stackptr += sizeof(sljit_sw);
2115            }            }
2116          tmp1next = TRUE;          tmp1next = TRUE;
2117          }          }
2118        }        }
2119      }      }
2120    }    }
2121    while (status != end);
2122    
2123  if (save)  if (save)
2124    {    {
# Line 1193  if (save) Line 2127  if (save)
2127      if (!tmp1empty)      if (!tmp1empty)
2128        {        {
2129        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
2130        stackptr += sizeof(sljit_w);        stackptr += sizeof(sljit_sw);
2131        }        }
2132      if (!tmp2empty)      if (!tmp2empty)
2133        {        {
2134        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
2135        stackptr += sizeof(sljit_w);        stackptr += sizeof(sljit_sw);
2136        }        }
2137      }      }
2138    else    else
# Line 1206  if (save) Line 2140  if (save)
2140      if (!tmp2empty)      if (!tmp2empty)
2141        {        {
2142        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
2143        stackptr += sizeof(sljit_w);        stackptr += sizeof(sljit_sw);
2144        }        }
2145      if (!tmp1empty)      if (!tmp1empty)
2146        {        {
2147        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
2148        stackptr += sizeof(sljit_w);        stackptr += sizeof(sljit_sw);
2149        }        }
2150      }      }
2151    }    }
2152  SLJIT_ASSERT(cc == ccend && stackptr == stacktop && (save || (tmp1empty && tmp2empty)));  SLJIT_ASSERT(cc == ccend && stackptr == stacktop && (save || (tmp1empty && tmp2empty)));
2153  }  }
2154    
2155  static SLJIT_INLINE BOOL ispowerof2(unsigned int value)  static SLJIT_INLINE pcre_uchar *set_then_offsets(compiler_common *common, pcre_uchar *cc, pcre_uint8 *current_offset)
2156    {
2157    pcre_uchar *end = bracketend(cc);
2158    BOOL has_alternatives = cc[GET(cc, 1)] == OP_ALT;
2159    
2160    /* Assert captures then. */
2161    if (*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT)
2162      current_offset = NULL;
2163    /* Conditional block does not. */
2164    if (*cc == OP_COND || *cc == OP_SCOND)
2165      has_alternatives = FALSE;
2166    
2167    cc = next_opcode(common, cc);
2168    if (has_alternatives)
2169      current_offset = common->then_offsets + (cc - common->start);
2170    
2171    while (cc < end)
2172      {
2173      if ((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND))
2174        cc = set_then_offsets(common, cc, current_offset);
2175      else
2176        {
2177        if (*cc == OP_ALT && has_alternatives)
2178          current_offset = common->then_offsets + (cc + 1 + LINK_SIZE - common->start);
2179        if (*cc >= OP_THEN && *cc <= OP_THEN_ARG && current_offset != NULL)
2180          *current_offset = 1;
2181        cc = next_opcode(common, cc);
2182        }
2183      }
2184    
2185    return end;
2186    }
2187    
2188    #undef CASE_ITERATOR_PRIVATE_DATA_1
2189    #undef CASE_ITERATOR_PRIVATE_DATA_2A
2190    #undef CASE_ITERATOR_PRIVATE_DATA_2B
2191    #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_1
2192    #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
2193    #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
2194    
2195    static SLJIT_INLINE BOOL is_powerof2(unsigned int value)
2196  {  {
2197  return (value & (value - 1)) == 0;  return (value & (value - 1)) == 0;
2198  }  }
# Line 1228  static SLJIT_INLINE void set_jumps(jump_ Line 2202  static SLJIT_INLINE void set_jumps(jump_
2202  while (list)  while (list)
2203    {    {
2204    /* sljit_set_label is clever enough to do nothing    /* sljit_set_label is clever enough to do nothing
2205    if either the jump or the label is NULL */    if either the jump or the label is NULL. */
2206    sljit_set_label(list->jump, label);    SET_LABEL(list->jump, label);
2207    list = list->next;    list = list->next;
2208    }    }
2209  }  }
2210    
2211  static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump* jump)  static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump *jump)
2212  {  {
2213  jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list));  jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list));
2214  if (list_item)  if (list_item)
# Line 1245  if (list_item) Line 2219  if (list_item)
2219    }    }
2220  }  }
2221    
2222  static void add_stub(compiler_common *common, enum stub_types type, int data, struct sljit_jump *start)  static void add_stub(compiler_common *common, struct sljit_jump *start)
2223  {  {
2224  DEFINE_COMPILER;  DEFINE_COMPILER;
2225  stub_list* list_item = sljit_alloc_memory(compiler, sizeof(stub_list));  stub_list *list_item = sljit_alloc_memory(compiler, sizeof(stub_list));
2226    
2227  if (list_item)  if (list_item)
2228    {    {
   list_item->type = type;  
   list_item->data = data;  
2229    list_item->start = start;    list_item->start = start;
2230    list_item->leave = LABEL();    list_item->quit = LABEL();
2231    list_item->next = common->stubs;    list_item->next = common->stubs;
2232    common->stubs = list_item;    common->stubs = list_item;
2233    }    }
# Line 1264  if (list_item) Line 2236  if (list_item)
2236  static void flush_stubs(compiler_common *common)  static void flush_stubs(compiler_common *common)
2237  {  {
2238  DEFINE_COMPILER;  DEFINE_COMPILER;
2239  stub_list* list_item = common->stubs;  stub_list *list_item = common->stubs;
2240    
2241  while (list_item)  while (list_item)
2242    {    {
2243    JUMPHERE(list_item->start);    JUMPHERE(list_item->start);
2244    switch(list_item->type)    add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));
2245      {    JUMPTO(SLJIT_JUMP, list_item->quit);
     case stack_alloc:  
     add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));  
     break;  
     }  
   JUMPTO(SLJIT_JUMP, list_item->leave);  
2246    list_item = list_item->next;    list_item = list_item->next;
2247    }    }
2248  common->stubs = NULL;  common->stubs = NULL;
2249  }  }
2250    
2251  static SLJIT_INLINE void decrease_call_count(compiler_common *common)  static void add_label_addr(compiler_common *common, sljit_uw *update_addr)
2252    {
2253    DEFINE_COMPILER;
2254    label_addr_list *label_addr;
2255    
2256    label_addr = sljit_alloc_memory(compiler, sizeof(label_addr_list));
2257    if (label_addr == NULL)
2258      return;
2259    label_addr->label = LABEL();
2260    label_addr->update_addr = update_addr;
2261    label_addr->next = common->label_addrs;
2262    common->label_addrs = label_addr;
2263    }
2264    
2265    static SLJIT_INLINE void count_match(compiler_common *common)
2266  {  {
2267  DEFINE_COMPILER;  DEFINE_COMPILER;
2268    
2269  OP2(SLJIT_SUB | SLJIT_SET_E, CALL_COUNT, 0, CALL_COUNT, 0, SLJIT_IMM, 1);  OP2(SLJIT_SUB | SLJIT_SET_E, COUNT_MATCH, 0, COUNT_MATCH, 0, SLJIT_IMM, 1);
2270  add_jump(compiler, &common->calllimit, JUMP(SLJIT_C_ZERO));  add_jump(compiler, &common->calllimit, JUMP(SLJIT_ZERO));
2271  }  }
2272    
2273  static SLJIT_INLINE void allocate_stack(compiler_common *common, int size)  static SLJIT_INLINE void allocate_stack(compiler_common *common, int size)
# Line 1294  static SLJIT_INLINE void allocate_stack( Line 2275  static SLJIT_INLINE void allocate_stack(
2275  /* May destroy all locals and registers except TMP2. */  /* May destroy all locals and registers except TMP2. */
2276  DEFINE_COMPILER;  DEFINE_COMPILER;
2277    
2278  OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_w));  SLJIT_ASSERT(size > 0);
2279    OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
2280  #ifdef DESTROY_REGISTERS  #ifdef DESTROY_REGISTERS
2281  OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);  OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);
2282  OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);  OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
2283  OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);  OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
2284  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, TMP1, 0);  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, TMP1, 0);
2285  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP1, 0);
2286  #endif  #endif
2287  add_stub(common, stack_alloc, 0, CMP(SLJIT_C_GREATER, STACK_TOP, 0, STACK_LIMIT, 0));  add_stub(common, CMP(SLJIT_GREATER, STACK_TOP, 0, STACK_LIMIT, 0));
2288  }  }
2289    
2290  static SLJIT_INLINE void free_stack(compiler_common *common, int size)  static SLJIT_INLINE void free_stack(compiler_common *common, int size)
2291  {  {
2292  DEFINE_COMPILER;  DEFINE_COMPILER;
2293  OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_w));  
2294    SLJIT_ASSERT(size > 0);
2295    OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
2296    }
2297    
2298    static sljit_uw * allocate_read_only_data(compiler_common *common, sljit_uw size)
2299    {
2300    DEFINE_COMPILER;
2301    sljit_uw *result;
2302    
2303    if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
2304      return NULL;
2305    
2306    result = (sljit_uw *)SLJIT_MALLOC(size + sizeof(sljit_uw), compiler->allocator_data);
2307    if (SLJIT_UNLIKELY(result == NULL))
2308      {
2309      sljit_set_compiler_memory_error(compiler);
2310      return NULL;
2311      }
2312    
2313    *(void**)result = common->read_only_data_head;
2314    common->read_only_data_head = (void *)result;
2315    return result + 1;
2316    }
2317    
2318    static void free_read_only_data(void *current, void *allocator_data)
2319    {
2320    void *next;
2321    
2322    SLJIT_UNUSED_ARG(allocator_data);
2323    
2324    while (current != NULL)
2325      {
2326      next = *(void**)current;
2327      SLJIT_FREE(current, allocator_data);
2328      current = next;
2329      }
2330  }  }
2331    
2332  static SLJIT_INLINE void reset_ovector(compiler_common *common, int length)  static SLJIT_INLINE void reset_ovector(compiler_common *common, int length)
# Line 1316  static SLJIT_INLINE void reset_ovector(c Line 2334  static SLJIT_INLINE void reset_ovector(c
2334  DEFINE_COMPILER;  DEFINE_COMPILER;
2335  struct sljit_label *loop;  struct sljit_label *loop;
2336  int i;  int i;
2337    
2338  /* At this point we can freely use all temporary registers. */  /* At this point we can freely use all temporary registers. */
2339    SLJIT_ASSERT(length > 1);
2340  /* TMP1 returns with begin - 1. */  /* TMP1 returns with begin - 1. */
2341  OP2(SLJIT_SUB, SLJIT_TEMPORARY_REG1, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));  OP2(SLJIT_SUB, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_S0), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));
2342  if (length < 8)  if (length < 8)
2343    {    {
2344    for (i = 0; i < length; i++)    for (i = 1; i < length; i++)
2345      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(i), SLJIT_TEMPORARY_REG1, 0);      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), SLJIT_R0, 0);
2346    }    }
2347  else  else
2348    {    {
2349    GET_LOCAL_BASE(SLJIT_TEMPORARY_REG2, 0, OVECTOR_START - sizeof(sljit_w));    GET_LOCAL_BASE(SLJIT_R1, 0, OVECTOR_START);
2350    OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, length);    OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, length - 1);
2351    loop = LABEL();    loop = LABEL();
2352    OP1(SLJIT_MOVU, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), sizeof(sljit_w), SLJIT_TEMPORARY_REG1, 0);    OP1(SLJIT_MOVU, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw), SLJIT_R0, 0);
2353    OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_TEMPORARY_REG3, 0, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, 1);    OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, 1);
2354    JUMPTO(SLJIT_C_NOT_ZERO, loop);    JUMPTO(SLJIT_NOT_ZERO, loop);
2355    }    }
2356  }  }
2357    
2358  static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)  static SLJIT_INLINE void reset_fast_fail(compiler_common *common)
2359  {  {
2360  DEFINE_COMPILER;  DEFINE_COMPILER;
2361  struct sljit_label *loop;  sljit_si i;
 struct sljit_jump *earlyexit;  
2362    
2363  /* At this point we can freely use all registers. */  SLJIT_ASSERT(common->fast_fail_start_ptr < common->fast_fail_end_ptr);
 OP1(SLJIT_MOV, SLJIT_SAVED_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));  
 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1), STR_PTR, 0);  
2364    
2365  OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG1, 0, ARGUMENTS, 0);  OP2(SLJIT_SUB, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2366  if (common->mark_ptr != 0)  for (i = common->fast_fail_start_ptr; i < common->fast_fail_end_ptr; i += sizeof(sljit_sw))
2367    OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), i, TMP1, 0);
2368  OP1(SLJIT_MOV_SI, SLJIT_TEMPORARY_REG2, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), SLJIT_OFFSETOF(jit_arguments, offsetcount));  }
 if (common->mark_ptr != 0)  
   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_TEMPORARY_REG3, 0);  
 OP2(SLJIT_SUB, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), SLJIT_OFFSETOF(jit_arguments, offsets), SLJIT_IMM, sizeof(int));  
 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG1, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), SLJIT_OFFSETOF(jit_arguments, begin));  
 GET_LOCAL_BASE(SLJIT_SAVED_REG1, 0, OVECTOR_START);  
 /* Unlikely, but possible */  
 earlyexit = CMP(SLJIT_C_EQUAL, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, 0);  
 loop = LABEL();  
 OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), 0, SLJIT_TEMPORARY_REG1, 0);  
 OP2(SLJIT_ADD, SLJIT_SAVED_REG1, 0, SLJIT_SAVED_REG1, 0, SLJIT_IMM, sizeof(sljit_w));  
 /* Copy the integer value to the output buffer */  
 #ifdef COMPILE_PCRE16  
 OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, 1);  
 #endif  
 OP1(SLJIT_MOVU_SI, SLJIT_MEM1(SLJIT_TEMPORARY_REG3), sizeof(int), SLJIT_SAVED_REG2, 0);  
 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_TEMPORARY_REG2, 0, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, 1);  
 JUMPTO(SLJIT_C_NOT_ZERO, loop);  
 JUMPHERE(earlyexit);  
2369    
2370  /* Calculate the return value, which is the maximum ovector value. */  static SLJIT_INLINE void do_reset_match(compiler_common *common, int length)
2371  if (topbracket > 1)  {
2372    {  DEFINE_COMPILER;
2373    GET_LOCAL_BASE(SLJIT_TEMPORARY_REG1, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_w));  struct sljit_label *loop;
2374    OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, topbracket + 1);  int i;
2375    
2376    /* OVECTOR(0) is never equal to SLJIT_SAVED_REG3. */  SLJIT_ASSERT(length > 1);
2377    loop = LABEL();  /* OVECTOR(1) contains the "string begin - 1" constant. */
2378    OP1(SLJIT_MOVU, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), -(2 * (sljit_w)sizeof(sljit_w)));  if (length > 2)
2379    OP2(SLJIT_SUB, SLJIT_TEMPORARY_REG2, 0, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, 1);    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
2380    CMPTO(SLJIT_C_EQUAL, SLJIT_TEMPORARY_REG3, 0, SLJIT_SAVED_REG3, 0, loop);  if (length < 8)
2381    OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_TEMPORARY_REG2, 0);    {
2382      for (i = 2; i < length; i++)
2383        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), TMP1, 0);
2384    }    }
2385  else  else
2386    OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);    {
2387      GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + sizeof(sljit_sw));
2388      OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
2389      loop = LABEL();
2390      OP1(SLJIT_MOVU, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
2391      OP2(SLJIT_SUB | SLJIT_SET_E, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
2392      JUMPTO(SLJIT_NOT_ZERO, loop);
2393      }
2394    
2395    OP1(SLJIT_MOV, STACK_TOP, 0, ARGUMENTS, 0);
2396    if (common->mark_ptr != 0)
2397      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, SLJIT_IMM, 0);
2398    if (common->control_head_ptr != 0)
2399      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
2400    OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(jit_arguments, stack));
2401    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
2402    OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(struct sljit_stack, base));
2403  }  }
2404    
2405  static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *leave)  static sljit_sw SLJIT_CALL do_search_mark(sljit_sw *current, const pcre_uchar *skip_arg)
2406  {  {
2407  DEFINE_COMPILER;  while (current != NULL)
2408      {
2409      switch (current[-2])
2410        {
2411        case type_then_trap:
2412        break;
2413    
2414        case type_mark:
2415        if (STRCMP_UC_UC(skip_arg, (pcre_uchar *)current[-3]) == 0)
2416          return current[-4];
2417        break;
2418    
2419        default:
2420        SLJIT_ASSERT_STOP();
2421        break;
2422        }
2423      SLJIT_ASSERT(current > (sljit_sw*)current[-1]);
2424      current = (sljit_sw*)current[-1];
2425      }
2426    return -1;
2427    }
2428    
2429    static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)
2430    {
2431    DEFINE_COMPILER;
2432    struct sljit_label *loop;
2433    struct sljit_jump *early_quit;
2434    
2435    /* At this point we can freely use all registers. */
2436    OP1(SLJIT_MOV, SLJIT_S2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
2437    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(1), STR_PTR, 0);
2438    
2439    OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0);
2440    if (common->mark_ptr != 0)
2441      OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
2442    OP1(SLJIT_MOV_SI, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, offset_count));
2443    if (common->mark_ptr != 0)
2444      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_R2, 0);
2445    OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, offsets), SLJIT_IMM, sizeof(int));
2446    OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, begin));
2447    GET_LOCAL_BASE(SLJIT_S0, 0, OVECTOR_START);
2448    /* Unlikely, but possible */
2449    early_quit = CMP(SLJIT_EQUAL, SLJIT_R1, 0, SLJIT_IMM, 0);
2450    loop = LABEL();
2451    OP2(SLJIT_SUB, SLJIT_S1, 0, SLJIT_MEM1(SLJIT_S0), 0, SLJIT_R0, 0);
2452    OP2(SLJIT_ADD, SLJIT_S0, 0, SLJIT_S0, 0, SLJIT_IMM, sizeof(sljit_sw));
2453    /* Copy the integer value to the output buffer */
2454    #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2455    OP2(SLJIT_ASHR, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_IMM, UCHAR_SHIFT);
2456    #endif
2457    OP1(SLJIT_MOVU_SI, SLJIT_MEM1(SLJIT_R2), sizeof(int), SLJIT_S1, 0);
2458    OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
2459    JUMPTO(SLJIT_NOT_ZERO, loop);
2460    JUMPHERE(early_quit);
2461    
2462    /* Calculate the return value, which is the maximum ovector value. */
2463    if (topbracket > 1)
2464      {
2465      GET_LOCAL_BASE(SLJIT_R0, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_sw));
2466      OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, topbracket + 1);
2467    
2468      /* OVECTOR(0) is never equal to SLJIT_S2. */
2469      loop = LABEL();
2470      OP1(SLJIT_MOVU, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), -(2 * (sljit_sw)sizeof(sljit_sw)));
2471      OP2(SLJIT_SUB, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
2472      CMPTO(SLJIT_EQUAL, SLJIT_R2, 0, SLJIT_S2, 0, loop);
2473      OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_R1, 0);
2474      }
2475    else
2476      OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
2477    }
2478    
2479    static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *quit)
2480    {
2481    DEFINE_COMPILER;
2482    struct sljit_jump *jump;
2483    
2484  SLJIT_COMPILE_ASSERT(STR_END == SLJIT_SAVED_REG2, str_end_must_be_saved_reg2);  SLJIT_COMPILE_ASSERT(STR_END == SLJIT_S1, str_end_must_be_saved_reg2);
2485  SLJIT_ASSERT(common->start_used_ptr != 0 && (common->mode == JIT_PARTIAL_SOFT_COMPILE ? common->hit_start != 0 : common->hit_start == 0));  SLJIT_ASSERT(common->start_used_ptr != 0 && common->start_ptr != 0
2486      && (common->mode == JIT_PARTIAL_SOFT_COMPILE ? common->hit_start != 0 : common->hit_start == 0));
2487    
2488  OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG2, 0, ARGUMENTS, 0);  OP1(SLJIT_MOV, SLJIT_R1, 0, ARGUMENTS, 0);
2489  OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_PARTIAL);  OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_PARTIAL);
2490  OP1(SLJIT_MOV_SI, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), SLJIT_OFFSETOF(jit_arguments, offsetcount));  OP1(SLJIT_MOV_SI, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, real_offset_count));
2491  CMPTO(SLJIT_C_LESS, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, 2, leave);  CMPTO(SLJIT_SIG_LESS, SLJIT_R2, 0, SLJIT_IMM, 2, quit);
2492    
2493  /* Store match begin and end. */  /* Store match begin and end. */
2494  OP1(SLJIT_MOV, SLJIT_SAVED_REG1, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), SLJIT_OFFSETOF(jit_arguments, begin));  OP1(SLJIT_MOV, SLJIT_S0, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, begin));
2495  OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG2, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), SLJIT_OFFSETOF(jit_arguments, offsets));  OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, offsets));
2496  OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_used_ptr : common->hit_start);  
2497  OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, STR_END, 0, SLJIT_SAVED_REG1, 0);  jump = CMP(SLJIT_SIG_LESS, SLJIT_R2, 0, SLJIT_IMM, 3);
2498  #ifdef COMPILE_PCRE16  OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_ptr : (common->hit_start + (int)sizeof(sljit_sw)), SLJIT_S0, 0);
2499  OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, 1);  #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2500  #endif  OP2(SLJIT_ASHR, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, UCHAR_SHIFT);
2501  OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), sizeof(int), SLJIT_SAVED_REG2, 0);  #endif
2502    OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_R1), 2 * sizeof(int), SLJIT_R2, 0);
2503  OP2(SLJIT_SUB, SLJIT_TEMPORARY_REG3, 0, SLJIT_TEMPORARY_REG3, 0, SLJIT_SAVED_REG1, 0);  JUMPHERE(jump);
2504  #ifdef COMPILE_PCRE16  
2505  OP2(SLJIT_ASHR, SLJIT_TEMPORARY_REG3, 0, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, 1);  OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_used_ptr : common->hit_start);
2506    OP2(SLJIT_SUB, SLJIT_S1, 0, STR_END, 0, SLJIT_S0, 0);
2507    #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2508    OP2(SLJIT_ASHR, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_IMM, UCHAR_SHIFT);
2509    #endif
2510    OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_R1), sizeof(int), SLJIT_S1, 0);
2511    
2512    OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_S0, 0);
2513    #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2514    OP2(SLJIT_ASHR, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, UCHAR_SHIFT);
2515  #endif  #endif
2516  OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), 0, SLJIT_TEMPORARY_REG3, 0);  OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_R1), 0, SLJIT_R2, 0);
2517    
2518  JUMPTO(SLJIT_JUMP, leave);  JUMPTO(SLJIT_JUMP, quit);
2519  }  }
2520    
2521  static SLJIT_INLINE void check_start_used_ptr(compiler_common *common)  static SLJIT_INLINE void check_start_used_ptr(compiler_common *common)
# Line 1425  struct sljit_jump *jump; Line 2527  struct sljit_jump *jump;
2527  if (common->mode == JIT_PARTIAL_SOFT_COMPILE)  if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2528    {    {
2529    /* The value of -1 must be kept for start_used_ptr! */    /* The value of -1 must be kept for start_used_ptr! */
2530    OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, 1);    OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, 1);
2531    /* Jumps if start_used_ptr < STR_PTR, or start_used_ptr == -1. Although overwriting    /* Jumps if start_used_ptr < STR_PTR, or start_used_ptr == -1. Although overwriting
2532    is not necessary if start_used_ptr == STR_PTR, it does not hurt as well. */    is not necessary if start_used_ptr == STR_PTR, it does not hurt as well. */
2533    jump = CMP(SLJIT_C_LESS_EQUAL, TMP1, 0, STR_PTR, 0);    jump = CMP(SLJIT_LESS_EQUAL, TMP1, 0, STR_PTR, 0);
2534    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2535    JUMPHERE(jump);    JUMPHERE(jump);
2536    }    }
2537  else if (common->mode == JIT_PARTIAL_HARD_COMPILE)  else if (common->mode == JIT_PARTIAL_HARD_COMPILE)
2538    {    {
2539    jump = CMP(SLJIT_C_LESS_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);    jump = CMP(SLJIT_LESS_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2540    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2541    JUMPHERE(jump);    JUMPHERE(jump);
2542    }    }
2543  }  }
2544    
2545  static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, pcre_uchar* cc)  static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, pcre_uchar *cc)
2546  {  {
2547  /* Detects if the character has an othercase. */  /* Detects if the character has an othercase. */
2548  unsigned int c;  unsigned int c;
# Line 1483  if (common->utf && c > 127) Line 2585  if (common->utf && c > 127)
2585  return TABLE_GET(c, common->fcc, c);  return TABLE_GET(c, common->fcc, c);
2586  }  }
2587    
2588  static unsigned int char_get_othercase_bit(compiler_common *common, pcre_uchar* cc)  static unsigned int char_get_othercase_bit(compiler_common *common, pcre_uchar *cc)
2589  {  {
2590  /* Detects if the character and its othercase has only 1 bit difference. */  /* Detects if the character and its othercase has only 1 bit difference. */
2591  unsigned int c, oc, bit;  unsigned int c, oc, bit;
# Line 1524  if (c <= 127 && bit == 0x20) Line 2626  if (c <= 127 && bit == 0x20)
2626    return (0 << 8) | 0x20;    return (0 << 8) | 0x20;
2627    
2628  /* Since c != oc, they must have at least 1 bit difference. */  /* Since c != oc, they must have at least 1 bit difference. */
2629  if (!ispowerof2(bit))  if (!is_powerof2(bit))
2630    return 0;    return 0;
2631    
2632  #ifdef COMPILE_PCRE8  #if defined COMPILE_PCRE8
2633    
2634  #ifdef SUPPORT_UTF  #ifdef SUPPORT_UTF
2635  if (common->utf && c > 127)  if (common->utf && c > 127)
# Line 1543  if (common->utf && c > 127) Line 2645  if (common->utf && c > 127)
2645  #endif /* SUPPORT_UTF */  #endif /* SUPPORT_UTF */
2646  return (0 << 8) | bit;  return (0 << 8) | bit;
2647    
2648  #else /* COMPILE_PCRE8 */  #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2649    
 #ifdef COMPILE_PCRE16  
2650  #ifdef SUPPORT_UTF  #ifdef SUPPORT_UTF
2651  if (common->utf && c > 65535)  if (common->utf && c > 65535)
2652    {    {
# Line 1556  if (common->utf && c > 65535) Line 2657  if (common->utf && c > 65535)
2657    }    }
2658  #endif /* SUPPORT_UTF */  #endif /* SUPPORT_UTF */
2659  return (bit < 256) ? ((0 << 8) | bit) : ((1 << 8) | (bit >> 8));  return (bit < 256) ? ((0 << 8) | bit) : ((1 << 8) | (bit >> 8));
 #endif /* COMPILE_PCRE16 */  
2660    
2661  #endif /* COMPILE_PCRE8 */  #endif /* COMPILE_PCRE[8|16|32] */
2662  }  }
2663    
2664  static void check_partial(compiler_common *common, BOOL force)  static void check_partial(compiler_common *common, BOOL force)
2665  {  {
2666  /* Checks whether a partial matching is occured. Does not modify registers. */  /* Checks whether a partial matching is occurred. Does not modify registers. */
2667  DEFINE_COMPILER;  DEFINE_COMPILER;
2668  struct sljit_jump *jump = NULL;  struct sljit_jump *jump = NULL;
2669    
# Line 1573  if (common->mode == JIT_COMPILE) Line 2673  if (common->mode == JIT_COMPILE)
2673    return;    return;
2674    
2675  if (!force)  if (!force)
2676    jump = CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);    jump = CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2677  else if (common->mode == JIT_PARTIAL_SOFT_COMPILE)  else if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2678    jump = CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, -1);    jump = CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1);
2679    
2680  if (common->mode == JIT_PARTIAL_SOFT_COMPILE)  if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2681    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, -1);    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
2682  else  else
2683    {    {
2684    if (common->partialmatchlabel != NULL)    if (common->partialmatchlabel != NULL)
# Line 1591  if (jump != NULL) Line 2691  if (jump != NULL)
2691    JUMPHERE(jump);    JUMPHERE(jump);
2692  }  }
2693    
2694  static struct sljit_jump *check_str_end(compiler_common *common)  static void check_str_end(compiler_common *common, jump_list **end_reached)
2695  {  {
2696  /* Does not affect registers. Usually used in a tight spot. */  /* Does not affect registers. Usually used in a tight spot. */
2697  DEFINE_COMPILER;  DEFINE_COMPILER;
2698  struct sljit_jump *jump;  struct sljit_jump *jump;
 struct sljit_jump *nohit;  
 struct sljit_jump *return_value;  
2699    
2700  if (common->mode == JIT_COMPILE)  if (common->mode == JIT_COMPILE)
2701    return CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);    {
2702      add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2703      return;
2704      }
2705    
2706  jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);  jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
2707  if (common->mode == JIT_PARTIAL_SOFT_COMPILE)  if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2708    {    {
2709    nohit = CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);    add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
2710    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, -1);    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
2711    JUMPHERE(nohit);    add_jump(compiler, end_reached, JUMP(SLJIT_JUMP));
   return_value = JUMP(SLJIT_JUMP);  
2712    }    }
2713  else  else
2714    {    {
2715    return_value = CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);    add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
2716    if (common->partialmatchlabel != NULL)    if (common->partialmatchlabel != NULL)
2717      JUMPTO(SLJIT_JUMP, common->partialmatchlabel);      JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2718    else    else
2719      add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));      add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2720    }    }
2721  JUMPHERE(jump);  JUMPHERE(jump);
 return return_value;  
2722  }  }
2723    
2724  static void fallback_at_str_end(compiler_common *common, jump_list **fallbacks)  static void detect_partial_match(compiler_common *common, jump_list **backtracks)
2725  {  {
2726  DEFINE_COMPILER;  DEFINE_COMPILER;
2727  struct sljit_jump *jump;  struct sljit_jump *jump;
2728    
2729  if (common->mode == JIT_COMPILE)  if (common->mode == JIT_COMPILE)
2730    {    {
2731    add_jump(compiler, fallbacks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));    add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2732    return;    return;
2733    }    }
2734    
2735  /* Partial matching mode. */  /* Partial matching mode. */
2736  jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);  jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
2737  add_jump(compiler, fallbacks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0));  add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
2738  if (common->mode == JIT_PARTIAL_SOFT_COMPILE)  if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2739    {    {
2740    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, -1);    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
2741    add_jump(compiler, fallbacks, JUMP(SLJIT_JUMP));    add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
2742    }    }
2743  else  else
2744    {    {
# Line 1651  else Line 2750  else
2750  JUMPHERE(jump);  JUMPHERE(jump);
2751  }  }
2752    
2753  static void read_char(compiler_common *common)  static void peek_char(compiler_common *common, sljit_ui max)
2754  {  {
2755  /* Reads the character into TMP1, updates STR_PTR.  /* Reads the character into TMP1, keeps STR_PTR.
2756  Does not check STR_END. TMP2 Destroyed. */  Does not check STR_END. TMP2 Destroyed. */
2757  DEFINE_COMPILER;  DEFINE_COMPILER;
2758  #ifdef SUPPORT_UTF  #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2759  struct sljit_jump *jump;  struct sljit_jump *jump;
2760  #endif  #endif
2761    
2762    SLJIT_UNUSED_ARG(max);
2763    
2764  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2765  #ifdef SUPPORT_UTF  #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2766  if (common->utf)  if (common->utf)
2767    {    {
2768  #ifdef COMPILE_PCRE8    if (max < 128) return;
2769    jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);  
2770  #else    jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2771  #ifdef COMPILE_PCRE16    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
   jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);  
 #endif  
 #endif /* COMPILE_PCRE8 */  
2772    add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));    add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2773      OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2774      JUMPHERE(jump);
2775      }
2776    #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2777    
2778    #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2779    if (common->utf)
2780      {
2781      if (max < 0xd800) return;
2782    
2783      OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2784      jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2785      /* TMP2 contains the high surrogate. */
2786      OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2787      OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x40);
2788      OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
2789      OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
2790      OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2791    JUMPHERE(jump);    JUMPHERE(jump);
2792    }    }
2793  #endif  #endif
2794    }
2795    
2796    #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2797    
2798    static BOOL is_char7_bitset(const sljit_ub *bitset, BOOL nclass)
2799    {
2800    /* Tells whether the character codes below 128 are enough
2801    to determine a match. */
2802    const sljit_ub value = nclass ? 0xff : 0;
2803    const sljit_ub *end = bitset + 32;
2804    
2805    bitset += 16;
2806    do
2807      {
2808      if (*bitset++ != value)
2809        return FALSE;
2810      }
2811    while (bitset < end);
2812    return TRUE;
2813    }
2814    
2815    static void read_char7_type(compiler_common *common, BOOL full_read)
2816    {
2817    /* Reads the precise character type of a character into TMP1, if the character
2818    is less than 128. Otherwise it returns with zero. Does not check STR_END. The
2819    full_read argument tells whether characters above max are accepted or not. */
2820    DEFINE_COMPILER;
2821    struct sljit_jump *jump;
2822    
2823    SLJIT_ASSERT(common->utf);
2824    
2825    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2826  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2827    
2828    OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2829    
2830    if (full_read)
2831      {
2832      jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
2833      OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2834      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2835      JUMPHERE(jump);
2836      }
2837  }  }
2838    
2839  static void peek_char(compiler_common *common)  #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
2840    
2841    static void read_char_range(compiler_common *common, sljit_ui min, sljit_ui max, BOOL update_str_ptr)
2842  {  {
2843  /* Reads the character into TMP1, keeps STR_PTR.  /* Reads the precise value of a character into TMP1, if the character is
2844  Does not check STR_END. TMP2 Destroyed. */  between min and max (c >= min && c <= max). Otherwise it returns with a value
2845    outside the range. Does not check STR_END. */
2846  DEFINE_COMPILER;  DEFINE_COMPILER;
2847  #ifdef SUPPORT_UTF  #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2848  struct sljit_jump *jump;  struct sljit_jump *jump;
2849  #endif  #endif
2850    #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2851    struct sljit_jump *jump2;
2852    #endif
2853    
2854  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);  SLJIT_UNUSED_ARG(update_str_ptr);
2855  #ifdef SUPPORT_UTF  SLJIT_UNUSED_ARG(min);
2856    SLJIT_UNUSED_ARG(max);
2857    SLJIT_ASSERT(min <= max);
2858    
2859    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2860    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2861    
2862    #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2863  if (common->utf)  if (common->utf)
2864    {    {
2865  #ifdef COMPILE_PCRE8    if (max < 128 && !update_str_ptr) return;
2866    jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);  
2867  #else    jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2868  #ifdef COMPILE_PCRE16    if (min >= 0x10000)
2869    jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);      {
2870        OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xf0);
2871        if (update_str_ptr)
2872          OP1(SLJIT_MOV_UB, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2873        OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2874        jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x7);
2875        OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2876        OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2877        OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2878        OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2879        OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2880        OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2881        OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2882        OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2883        if (!update_str_ptr)
2884          OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
2885        OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2886        OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2887        OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2888        JUMPHERE(jump2);
2889        if (update_str_ptr)
2890          OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
2891        }
2892      else if (min >= 0x800 && max <= 0xffff)
2893        {
2894        OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xe0);
2895        if (update_str_ptr)
2896          OP1(SLJIT_MOV_UB, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2897        OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2898        jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xf);
2899        OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2900        OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2901        OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2902        OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2903        if (!update_str_ptr)
2904          OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
2905        OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2906        OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2907        OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2908        JUMPHERE(jump2);
2909        if (update_str_ptr)
2910          OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
2911        }
2912      else if (max >= 0x800)
2913        add_jump(compiler, (max < 0x10000) ? &common->utfreadchar16 : &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2914      else if (max < 128)
2915        {
2916        OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2917        OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2918        }
2919      else
2920        {
2921        OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2922        if (!update_str_ptr)
2923          OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2924        else
2925          OP1(SLJIT_MOV_UB, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2926        OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2927        OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2928        OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2929        OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2930        if (update_str_ptr)
2931          OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
2932        }
2933      JUMPHERE(jump);
2934      }
2935  #endif  #endif
2936  #endif /* COMPILE_PCRE8 */  
2937    add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));  #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2938    OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);  if (common->utf)
2939      {
2940      if (max >= 0x10000)
2941        {
2942        OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2943        jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2944        /* TMP2 contains the high surrogate. */
2945        OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2946        OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x40);
2947        OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
2948        OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2949        OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
2950        OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2951        JUMPHERE(jump);
2952        return;
2953        }
2954    
2955      if (max < 0xd800 && !update_str_ptr) return;
2956    
2957      /* Skip low surrogate if necessary. */
2958      OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2959      jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2960      if (update_str_ptr)
2961        OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2962      if (max >= 0xd800)
2963        OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x10000);
2964    JUMPHERE(jump);    JUMPHERE(jump);
2965    }    }
2966  #endif  #endif
2967  }  }
2968    
2969  static void read_char8_type(compiler_common *common)  static SLJIT_INLINE void read_char(compiler_common *common)
2970    {
2971    read_char_range(common, 0, READ_CHAR_MAX, TRUE);
2972    }
2973    
2974    static void read_char8_type(compiler_common *common, BOOL update_str_ptr)
2975  {  {
2976  /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */  /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */
2977  DEFINE_COMPILER;  DEFINE_COMPILER;
2978  #if defined SUPPORT_UTF || defined COMPILE_PCRE16  #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
2979  struct sljit_jump *jump;  struct sljit_jump *jump;
2980  #endif  #endif
2981    #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2982    struct sljit_jump *jump2;
2983    #endif
2984    
2985  #ifdef SUPPORT_UTF  SLJIT_UNUSED_ARG(update_str_ptr);
2986    
2987    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2988    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2989    
2990    #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2991  if (common->utf)  if (common->utf)
2992    {    {
   OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);  
   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));  
 #ifdef COMPILE_PCRE8  
2993    /* This can be an extra read in some situations, but hopefully    /* This can be an extra read in some situations, but hopefully
2994    it is needed in most cases. */    it is needed in most cases. */
2995    OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);    OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2996    jump = CMP(SLJIT_C_LESS, TMP2, 0, SLJIT_IMM, 0xc0);    jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
2997    add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));    if (!update_str_ptr)
2998    JUMPHERE(jump);      {
2999  #else      OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3000  #ifdef COMPILE_PCRE16      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3001    OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);      OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3002    jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);      OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
3003    OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);      OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3004        OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
3005        OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
3006        jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
3007        OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
3008        JUMPHERE(jump2);
3009        }
3010      else
3011        add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));
3012    JUMPHERE(jump);    JUMPHERE(jump);
   /* Skip low surrogate if necessary. */  
   OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xfc00);  
   OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0xd800);  
   COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);  
   OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);  
   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);  
 #endif  
 #endif /* COMPILE_PCRE8 */  
3013    return;    return;
3014    }    }
3015  #endif  #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
3016  OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);  
3017  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));  #if !defined COMPILE_PCRE8
 #ifdef COMPILE_PCRE16  
3018  /* The ctypes array contains only 256 values. */  /* The ctypes array contains only 256 values. */
3019  OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);  OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
3020  jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);  jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
3021  #endif  #endif
3022  OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);  OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
3023  #ifdef COMPILE_PCRE16  #if !defined COMPILE_PCRE8
3024  JUMPHERE(jump);  JUMPHERE(jump);
3025  #endif  #endif
3026    
3027    #if defined SUPPORT_UTF && defined COMPILE_PCRE16
3028    if (common->utf && update_str_ptr)
3029      {
3030      /* Skip low surrogate if necessary. */
3031      OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);
3032      jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
3033      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3034      JUMPHERE(jump);
3035      }
3036    #endif /* SUPPORT_UTF && COMPILE_PCRE16 */
3037  }  }
3038    
3039  static void skip_char_back(compiler_common *common)  static void skip_char_back(compiler_common *common)
3040  {  {
3041  /* Goes one character back. Affects STR_PTR and TMP1. Does not check begin. */  /* Goes one character back. Affects STR_PTR and TMP1. Does not check begin. */
3042  DEFINE_COMPILER;  DEFINE_COMPILER;
3043  #if defined SUPPORT_UTF && defined COMPILE_PCRE8  #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3044    #if defined COMPILE_PCRE8
3045  struct sljit_label *label;  struct sljit_label *label;
3046    
3047  if (common->utf)  if (common->utf)
# Line 1768  if (common->utf) Line 3050  if (common->utf)
3050    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
3051    OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));    OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3052    OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);    OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
3053    CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);    CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);
3054    return;    return;
3055    }    }
3056  #endif  #elif defined COMPILE_PCRE16
 #if defined SUPPORT_UTF && defined COMPILE_PCRE16  
3057  if (common->utf)  if (common->utf)
3058    {    {
3059    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
# Line 1780  if (common->utf) Line 3061  if (common->utf)
3061    /* Skip low surrogate if necessary. */    /* Skip low surrogate if necessary. */
3062    OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);    OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
3063    OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xdc00);    OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
3064    COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);    OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
3065    OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);    OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3066    OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);    OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3067    return;    return;
3068    }    }
3069  #endif  #endif /* COMPILE_PCRE[8|16] */
3070    #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
3071  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3072  }  }
3073    
3074  static void check_newlinechar(compiler_common *common, int nltype, jump_list **fallbacks, BOOL jumpiftrue)  static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpifmatch)
3075  {  {
3076  /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */  /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */
3077  DEFINE_COMPILER;  DEFINE_COMPILER;
3078    struct sljit_jump *jump;
3079    
3080  if (nltype == NLTYPE_ANY)  if (nltype == NLTYPE_ANY)
3081    {    {
3082    add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));    add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
3083    add_jump(compiler, fallbacks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));    add_jump(compiler, backtracks, JUMP(jumpifmatch ? SLJIT_NOT_ZERO : SLJIT_ZERO));
3084    }    }
3085  else if (nltype == NLTYPE_ANYCRLF)  else if (nltype == NLTYPE_ANYCRLF)
3086    {    {
3087    OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_CR);    if (jumpifmatch)
3088    COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);      {
3089    OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);      add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR));
3090    COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);      add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
3091    add_jump(compiler, fallbacks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));      }
3092      else
3093        {
3094        jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
3095        add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
3096        JUMPHERE(jump);
3097        }
3098    }    }
3099  else  else
3100    {    {
3101    SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);    SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);
3102    add_jump(compiler, fallbacks, CMP(jumpiftrue ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));    add_jump(compiler, backtracks, CMP(jumpifmatch ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
3103    }    }
3104  }  }
3105    
3106  #ifdef SUPPORT_UTF  #ifdef SUPPORT_UTF
3107    
3108  #ifdef COMPILE_PCRE8  #if defined COMPILE_PCRE8
3109  static void do_utfreadchar(compiler_common *common)  static void do_utfreadchar(compiler_common *common)
3110  {  {
3111  /* Fast decoding a UTF-8 character. TMP1 contains the first byte  /* Fast decoding a UTF-8 character. TMP1 contains the first byte
3112  of the character (>= 0xc0). Return char value in TMP1, length - 1 in TMP2. */  of the character (>= 0xc0). Return char value in TMP1, length in TMP2. */
3113  DEFINE_COMPILER;  DEFINE_COMPILER;
3114  struct sljit_jump *jump;  struct sljit_jump *jump;
3115    
3116  sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);  sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3117  /* Searching for the first zero. */  OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3118  OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);  OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
 jump = JUMP(SLJIT_C_NOT_ZERO);  
 /* Two byte sequence. */  
 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));  
 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));  
 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1f);  
3119  OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);  OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3120  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3121  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3122  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));  
3123    /* Searching for the first zero. */
3124    OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
3125    jump = JUMP(SLJIT_NOT_ZERO);
3126    /* Two byte sequence. */
3127    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3128    OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(2));
3129  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
 JUMPHERE(jump);  
3130    
3131  OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10);  JUMPHERE(jump);
 jump = JUMP(SLJIT_C_NOT_ZERO);  
 /* Three byte sequence. */  
3132  OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));  OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3133  OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0f);  OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x800);
3134  OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 12);  OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3135  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);  
3136  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3137  OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));  
3138    OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10000);
3139    jump = JUMP(SLJIT_NOT_ZERO);
3140    /* Three byte sequence. */
3141  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
3142  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));
 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);  
 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(2));  
3143  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
 JUMPHERE(jump);  
3144    
3145  /* Four byte sequence. */  /* Four byte sequence. */
3146  OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));  JUMPHERE(jump);
3147  OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x07);  OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
3148  OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 18);  OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
3149    OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3150    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
3151  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12);  
3152  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3153  OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(4));
3154    sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3155    }
3156    
3157    static void do_utfreadchar16(compiler_common *common)
3158    {
3159    /* Fast decoding a UTF-8 character. TMP1 contains the first byte
3160    of the character (>= 0xc0). Return value in TMP1. */
3161    DEFINE_COMPILER;
3162    struct sljit_jump *jump;
3163    
3164    sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3165    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3166    OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3167    OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3168  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);  
3169  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3170  OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(3));  
3171  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));  /* Searching for the first zero. */
3172    OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
3173    jump = JUMP(SLJIT_NOT_ZERO);
3174    /* Two byte sequence. */
3175    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3176    sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3177    
3178    JUMPHERE(jump);
3179    OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x400);
3180    OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_NOT_ZERO);
3181    /* This code runs only in 8 bit mode. No need to shift the value. */
3182    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3183    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3184    OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x800);
3185    OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3186  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3187  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3188  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));  /* Three byte sequence. */
3189    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
3190  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3191  }  }
3192    
# Line 1886  struct sljit_jump *compare; Line 3201  struct sljit_jump *compare;
3201  sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);  sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3202    
3203  OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20);  OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20);
3204  jump = JUMP(SLJIT_C_NOT_ZERO);  jump = JUMP(SLJIT_NOT_ZERO);
3205  /* Two byte sequence. */  /* Two byte sequence. */
3206  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3207  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3208  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
3209    /* The upper 5 bits are known at this point. */
3210    compare = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x3);
3211  OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);  OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
3212  OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);  OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3213  OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);  OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
 compare = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);  
3214  OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);  OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
3215  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3216    
3217  JUMPHERE(compare);  JUMPHERE(compare);
3218  OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);  OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
3219  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
 JUMPHERE(jump);  
3220    
3221  /* We only have types for characters less than 256. */  /* We only have types for characters less than 256. */
3222  OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_w)PRIV(utf8_table4) - 0xc0);  JUMPHERE(jump);
3223  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);  OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3224  OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);  OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
3225    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3226  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3227  }  }
3228    
3229  #else /* COMPILE_PCRE8 */  #endif /* COMPILE_PCRE8 */
   
 #ifdef COMPILE_PCRE16  
 static void do_utfreadchar(compiler_common *common)  
 {  
 /* Fast decoding a UTF-16 character. TMP1 contains the first 16 bit char  
 of the character (>= 0xd800). Return char value in TMP1, length - 1 in TMP2. */  
 DEFINE_COMPILER;  
 struct sljit_jump *jump;  
   
 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);  
 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xdc00);  
 /* Do nothing, only return. */  
 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);  
   
 JUMPHERE(jump);  
 /* Combine two 16 bit characters. */  
 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));  
 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));  
 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);  
 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 10);  
 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3ff);  
 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);  
 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));  
 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);  
 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);  
 }  
 #endif /* COMPILE_PCRE16 */  
   
 #endif /* COMPILE_PCRE8 */  
3230    
3231  #endif /* SUPPORT_UTF */  #endif /* SUPPORT_UTF */
3232    
# Line 1959  SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && si Line 3246  SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && si
3246    
3247  sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);  sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3248  OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);  OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
3249  OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_w)PRIV(ucd_stage1));  OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
3250  OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);  OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
3251  OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);  OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
3252  OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);  OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
3253  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_w)PRIV(ucd_stage2));  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
3254  OP1(SLJIT_MOV_UH, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);  OP1(SLJIT_MOV_UH, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
3255  OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_w)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));  OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
3256  OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);  OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
3257  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3258  }  }
# Line 1979  struct sljit_label *newlinelabel = NULL; Line 3266  struct sljit_label *newlinelabel = NULL;
3266  struct sljit_jump *start;  struct sljit_jump *start;
3267  struct sljit_jump *end = NULL;  struct sljit_jump *end = NULL;
3268  struct sljit_jump *nl = NULL;  struct sljit_jump *nl = NULL;
3269  #ifdef SUPPORT_UTF  #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3270  struct sljit_jump *singlechar;  struct sljit_jump *singlechar;
3271  #endif  #endif
3272  jump_list *newline = NULL;  jump_list *newline = NULL;
# Line 1994  if (firstline) Line 3281  if (firstline)
3281    {    {
3282    /* Search for the end of the first line. */    /* Search for the end of the first line. */
3283    SLJIT_ASSERT(common->first_line_end != 0);    SLJIT_ASSERT(common->first_line_end != 0);
3284    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STR_PTR, 0);    OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_END, 0);  
3285    
3286    if (common->nltype == NLTYPE_FIXED && common->newline > 255)    if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3287      {      {
3288      mainloop = LABEL();      mainloop = LABEL();
3289      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3290      end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);      end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3291      OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));      OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
3292      OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));      OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3293      CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);      CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);
3294      CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);      CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);
3295      OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));      JUMPHERE(end);
3296        OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_SP), common->first_line_end, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3297      }      }
3298    else    else
3299      {      {
3300      end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);      end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3301      mainloop = LABEL();      mainloop = LABEL();
3302      /* Continual stores does not cause data dependency. */      /* Continual stores does not cause data dependency. */
3303      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0);      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->first_line_end, STR_PTR, 0);
3304      read_char(common);      read_char_range(common, common->nlmin, common->nlmax, TRUE);
3305      check_newlinechar(common, common->nltype, &newline, TRUE);      check_newlinechar(common, common->nltype, &newline, TRUE);
3306      CMPTO(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0, mainloop);      CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, mainloop);
3307      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0);      JUMPHERE(end);
3308        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->first_line_end, STR_PTR, 0);
3309      set_jumps(newline, LABEL());      set_jumps(newline, LABEL());
3310      }      }
3311    
3312    JUMPHERE(end);    OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);  
3313    }    }
3314    
3315  start = JUMP(SLJIT_JUMP);  start = JUMP(SLJIT_JUMP);
# Line 2031  if (newlinecheck) Line 3318  if (newlinecheck)
3318    {    {
3319    newlinelabel = LABEL();    newlinelabel = LABEL();
3320    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3321    end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);    end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3322    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3323    OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff);    OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
3324    COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);    OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
3325  #ifdef COMPILE_PCRE16  #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3326    OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);    OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
3327  #endif  #endif
3328    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3329    nl = JUMP(SLJIT_JUMP);    nl = JUMP(SLJIT_JUMP);
# Line 2054  if (readuchar) Line 3341  if (readuchar)
3341    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3342    
3343  if (newlinecheck)  if (newlinecheck)
3344    CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);    CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);
3345    
3346  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3347  #if defined SUPPORT_UTF && defined COMPILE_PCRE8  #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3348    #if defined COMPILE_PCRE8
3349  if (common->utf)  if (common->utf)
3350    {    {
3351    singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);    singlechar = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
3352    OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0);    OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3353    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3354    JUMPHERE(singlechar);    JUMPHERE(singlechar);
3355    }    }
3356  #endif  #elif defined COMPILE_PCRE16
 #if defined SUPPORT_UTF && defined COMPILE_PCRE16  
3357  if (common->utf)  if (common->utf)
3358    {    {
3359    singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);    singlechar = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
3360    OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);    OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
3361    OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);    OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3362    COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);    OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
3363    OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);    OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3364    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3365    JUMPHERE(singlechar);    JUMPHERE(singlechar);
3366    }    }
3367  #endif  #endif /* COMPILE_PCRE[8|16] */
3368    #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
3369  JUMPHERE(start);  JUMPHERE(start);
3370    
3371  if (newlinecheck)  if (newlinecheck)
# Line 2089  if (newlinecheck) Line 3377  if (newlinecheck)
3377  return mainloop;  return mainloop;
3378  }  }
3379    
3380  static SLJIT_INLINE void fast_forward_first_char(compiler_common *common, pcre_uchar first_char, BOOL caseless, BOOL firstline)  #define MAX_N_CHARS 16
3381  {  #define MAX_N_BYTES 8
3382  DEFINE_COMPILER;  
3383  struct sljit_label *start;  static SLJIT_INLINE void add_prefix_byte(pcre_uint8 byte, pcre_uint8 *bytes)
3384  struct sljit_jump *leave;  {
3385  struct sljit_jump *found;  pcre_uint8 len = bytes[0];
3386  pcre_uchar oc, bit;  int i;
3387    
3388    if (len == 255)
3389      return;
3390    
3391    if (len == 0)
3392      {
3393      bytes[0] = 1;
3394      bytes[1] = byte;
3395      return;
3396      }
3397    
3398    for (i = len; i > 0; i--)
3399      if (bytes[i] == byte)
3400        return;
3401    
3402    if (len >= MAX_N_BYTES - 1)
3403      {
3404      bytes[0] = 255;
3405      return;
3406      }
3407    
3408    len++;
3409    bytes[len] = byte;
3410    bytes[0] = len;
3411    }
3412    
3413    static int scan_prefix(compiler_common *common, pcre_uchar *cc, pcre_uint32 *chars, pcre_uint8 *bytes, int max_chars, pcre_uint32 *rec_count)
3414    {
3415    /* Recursive function, which scans prefix literals. */
3416    BOOL last, any, caseless;
3417    int len, repeat, len_save, consumed = 0;
3418    pcre_uint32 chr, mask;
3419    pcre_uchar *alternative, *cc_save, *oc;
3420    #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3421    pcre_uchar othercase[8];
3422    #elif defined SUPPORT_UTF && defined COMPILE_PCRE16
3423    pcre_uchar othercase[2];
3424    #else
3425    pcre_uchar othercase[1];
3426    #endif
3427    
3428    repeat = 1;
3429    while (TRUE)
3430      {
3431      if (*rec_count == 0)
3432        return 0;
3433      (*rec_count)--;
3434    
3435      last = TRUE;
3436      any = FALSE;
3437      caseless = FALSE;
3438    
3439      switch (*cc)
3440        {
3441        case OP_CHARI:
3442        caseless = TRUE;
3443        case OP_CHAR:
3444        last = FALSE;
3445        cc++;
3446        break;
3447    
3448        case OP_SOD:
3449        case OP_SOM:
3450        case OP_SET_SOM:
3451        case OP_NOT_WORD_BOUNDARY:
3452        case OP_WORD_BOUNDARY:
3453        case OP_EODN:
3454        case OP_EOD:
3455        case OP_CIRC:
3456        case OP_CIRCM:
3457        case OP_DOLL:
3458        case OP_DOLLM:
3459        /* Zero width assertions. */
3460        cc++;
3461        continue;
3462    
3463        case OP_ASSERT:
3464        case OP_ASSERT_NOT:
3465        case OP_ASSERTBACK:
3466        case OP_ASSERTBACK_NOT:
3467        cc = bracketend(cc);
3468        continue;
3469    
3470        case OP_PLUSI:
3471        case OP_MINPLUSI:
3472        case OP_POSPLUSI:
3473        caseless = TRUE;
3474        case OP_PLUS:
3475        case OP_MINPLUS:
3476        case OP_POSPLUS:
3477        cc++;
3478        break;
3479    
3480        case OP_EXACTI:
3481        caseless = TRUE;
3482        case OP_EXACT:
3483        repeat = GET2(cc, 1);
3484        last = FALSE;
3485        cc += 1 + IMM2_SIZE;
3486        break;
3487    
3488        case OP_QUERYI:
3489        case OP_MINQUERYI:
3490        case OP_POSQUERYI:
3491        caseless = TRUE;
3492        case OP_QUERY:
3493        case OP_MINQUERY:
3494        case OP_POSQUERY:
3495        len = 1;
3496        cc++;
3497    #ifdef SUPPORT_UTF
3498        if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
3499    #endif
3500        max_chars = scan_prefix(common, cc + len, chars, bytes, max_chars, rec_count);
3501        if (max_chars == 0)
3502          return consumed;
3503        last = FALSE;
3504        break;
3505    
3506        case OP_KET:
3507        cc += 1 + LINK_SIZE;
3508        continue;
3509    
3510        case OP_ALT:
3511        cc += GET(cc, 1);
3512        continue;
3513    
3514        case OP_ONCE:
3515        case OP_ONCE_NC:
3516        case OP_BRA:
3517        case OP_BRAPOS:
3518        case OP_CBRA:
3519        case OP_CBRAPOS:
3520        alternative = cc + GET(cc, 1);
3521        while (*alternative == OP_ALT)
3522          {
3523          max_chars = scan_prefix(common, alternative + 1 + LINK_SIZE, chars, bytes, max_chars, rec_count);
3524          if (max_chars == 0)
3525            return consumed;
3526          alternative += GET(alternative, 1);
3527          }
3528    
3529        if (*cc == OP_CBRA || *cc == OP_CBRAPOS)
3530          cc += IMM2_SIZE;
3531        cc += 1 + LINK_SIZE;
3532        continue;
3533    
3534        case OP_CLASS:
3535    #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3536        if (common->utf && !is_char7_bitset((const pcre_uint8 *)(cc + 1), FALSE)) return consumed;
3537    #endif
3538        any = TRUE;
3539