/[pcre]/code/trunk/pcre_jit_compile.c
ViewVC logotype

Diff of /code/trunk/pcre_jit_compile.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 958 by zherczeg, Wed Apr 11 10:19:10 2012 UTC revision 1640 by zherczeg, Mon Feb 29 09:21:21 2016 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2012 University of Cambridge             Copyright (c) 1997-2013 University of Cambridge
10    
11    The machine code generator part (this module) was written by Zoltan Herczeg    The machine code generator part (this module) was written by Zoltan Herczeg
12                        Copyright (c) 2010-2012                        Copyright (c) 2010-2013
13    
14  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
15  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 46  POSSIBILITY OF SUCH DAMAGE. Line 46  POSSIBILITY OF SUCH DAMAGE.
46    
47  #include "pcre_internal.h"  #include "pcre_internal.h"
48    
49  #ifdef SUPPORT_JIT  #if defined SUPPORT_JIT
50    
51  /* All-in-one: Since we use the JIT compiler only from here,  /* All-in-one: Since we use the JIT compiler only from here,
52  we just include it. This way we don't need to touch the build  we just include it. This way we don't need to touch the build
53  system files. */  system files. */
54    
55  #define SLJIT_MALLOC(size) (PUBL(malloc))(size)  #define SLJIT_MALLOC(size, allocator_data) (PUBL(malloc))(size)
56  #define SLJIT_FREE(ptr) (PUBL(free))(ptr)  #define SLJIT_FREE(ptr, allocator_data) (PUBL(free))(ptr)
57  #define SLJIT_CONFIG_AUTO 1  #define SLJIT_CONFIG_AUTO 1
58  #define SLJIT_CONFIG_STATIC 1  #define SLJIT_CONFIG_STATIC 1
59  #define SLJIT_VERBOSE 0  #define SLJIT_VERBOSE 0
# Line 65  system files. */ Line 65  system files. */
65  #error Unsupported architecture  #error Unsupported architecture
66  #endif  #endif
67    
68  /* Allocate memory on the stack. Fast, but limited size. */  /* Defines for debugging purposes. */
 #define LOCAL_SPACE_SIZE 32768  
69    
70    /* 1 - Use unoptimized capturing brackets.
71       2 - Enable capture_last_ptr (includes option 1). */
72    /* #define DEBUG_FORCE_UNOPTIMIZED_CBRAS 2 */
73    
74    /* 1 - Always have a control head. */
75    /* #define DEBUG_FORCE_CONTROL_HEAD 1 */
76    
77    /* Allocate memory for the regex stack on the real machine stack.
78    Fast, but limited size. */
79    #define MACHINE_STACK_SIZE 32768
80    
81    /* Growth rate for stack allocated by the OS. Should be the multiply
82    of page size. */
83  #define STACK_GROWTH_RATE 8192  #define STACK_GROWTH_RATE 8192
84    
85  /* Enable to check that the allocation could destroy temporaries. */  /* Enable to check that the allocation could destroy temporaries. */
# Line 82  The code generator follows the recursive Line 94  The code generator follows the recursive
94  expressions. The basic blocks of regular expressions are condition checkers  expressions. The basic blocks of regular expressions are condition checkers
95  whose execute different commands depending on the result of the condition check.  whose execute different commands depending on the result of the condition check.
96  The relationship between the operators can be horizontal (concatenation) and  The relationship between the operators can be horizontal (concatenation) and
97  vertical (sub-expression) (See struct fallback_common for more details).  vertical (sub-expression) (See struct backtrack_common for more details).
98    
99    'ab' - 'a' and 'b' regexps are concatenated    'ab' - 'a' and 'b' regexps are concatenated
100    'a+' - 'a' is the sub-expression of the '+' operator    'a+' - 'a' is the sub-expression of the '+' operator
101    
102  The condition checkers are boolean (true/false) checkers. Machine code is generated  The condition checkers are boolean (true/false) checkers. Machine code is generated
103  for the checker itself and for the actions depending on the result of the checker.  for the checker itself and for the actions depending on the result of the checker.
104  The 'true' case is called as the hot path (expected path), and the other is called as  The 'true' case is called as the matching path (expected path), and the other is called as
105  the 'fallback' path. Branch instructions are expesive for all CPUs, so we avoid taken  the 'backtrack' path. Branch instructions are expesive for all CPUs, so we avoid taken
106  branches on the hot path.  branches on the matching path.
107    
108   Greedy star operator (*) :   Greedy star operator (*) :
109     Hot path: match happens.     Matching path: match happens.
110     Fallback path: match failed.     Backtrack path: match failed.
111   Non-greedy star operator (*?) :   Non-greedy star operator (*?) :
112     Hot path: no need to perform a match.     Matching path: no need to perform a match.
113     Fallback path: match is required.     Backtrack path: match is required.
114    
115  The following example shows how the code generated for a capturing bracket  The following example shows how the code generated for a capturing bracket
116  with two alternatives. Let A, B, C, D are arbirary regular expressions, and  with two alternatives. Let A, B, C, D are arbirary regular expressions, and
# Line 108  we have the following regular expression Line 120  we have the following regular expression
120    
121  The generated code will be the following:  The generated code will be the following:
122    
123   A hot path   A matching path
124   '(' hot path (pushing arguments to the stack)   '(' matching path (pushing arguments to the stack)
125   B hot path   B matching path
126   ')' hot path (pushing arguments to the stack)   ')' matching path (pushing arguments to the stack)
127   D hot path   D matching path
128   return with successful match   return with successful match
129    
130   D fallback path   D backtrack path
131   ')' fallback path (If we arrived from "C" jump to the fallback of "C")   ')' backtrack path (If we arrived from "C" jump to the backtrack of "C")
132   B fallback path   B backtrack path
133   C expected path   C expected path
134   jump to D hot path   jump to D matching path
135   C fallback path   C backtrack path
136   A fallback path   A backtrack path
137    
138   Notice, that the order of fallback code paths are the opposite of the fast   Notice, that the order of backtrack code paths are the opposite of the fast
139   code paths. In this way the topmost value on the stack is always belong   code paths. In this way the topmost value on the stack is always belong
140   to the current fallback code path. The fallback code path must check   to the current backtrack code path. The backtrack path must check
141   whether there is a next alternative. If so, it needs to jump back to   whether there is a next alternative. If so, it needs to jump back to
142   the hot path eventually. Otherwise it needs to clear out its own stack   the matching path eventually. Otherwise it needs to clear out its own stack
143   frame and continue the execution on the fallback code paths.   frame and continue the execution on the backtrack code paths.
144  */  */
145    
146  /*  /*
147  Saved stack frames:  Saved stack frames:
148    
149  Atomic blocks and asserts require reloading the values of local variables  Atomic blocks and asserts require reloading the values of private data
150  when the fallback mechanism performed. Because of OP_RECURSE, the locals  when the backtrack mechanism performed. Because of OP_RECURSE, the data
151  are not necessarly known in compile time, thus we need a dynamic restore  are not necessarly known in compile time, thus we need a dynamic restore
152  mechanism.  mechanism.
153    
154  The stack frames are stored in a chain list, and have the following format:  The stack frames are stored in a chain list, and have the following format:
155  ([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]  ([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]
156    
157  Thus we can restore the locals to a particular point in the stack.  Thus we can restore the private data to a particular point in the stack.
158  */  */
159    
160  typedef struct jit_arguments {  typedef struct jit_arguments {
# Line 154  typedef struct jit_arguments { Line 166  typedef struct jit_arguments {
166    int *offsets;    int *offsets;
167    pcre_uchar *uchar_ptr;    pcre_uchar *uchar_ptr;
168    pcre_uchar *mark_ptr;    pcre_uchar *mark_ptr;
169      void *callout_data;
170    /* Everything else after. */    /* Everything else after. */
171    int offsetcount;    pcre_uint32 limit_match;
172    int calllimit;    int real_offset_count;
173      int offset_count;
174    pcre_uint8 notbol;    pcre_uint8 notbol;
175    pcre_uint8 noteol;    pcre_uint8 noteol;
176    pcre_uint8 notempty;    pcre_uint8 notempty;
# Line 165  typedef struct jit_arguments { Line 179  typedef struct jit_arguments {
179    
180  typedef struct executable_functions {  typedef struct executable_functions {
181    void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES];    void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES];
182      void *read_only_data_heads[JIT_NUMBER_OF_COMPILE_MODES];
183      sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];
184    PUBL(jit_callback) callback;    PUBL(jit_callback) callback;
185    void *userdata;    void *userdata;
186    sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];    sljit_u32 top_bracket;
187      sljit_u32 limit_match;
188  } executable_functions;  } executable_functions;
189    
190  typedef struct jump_list {  typedef struct jump_list {
# Line 175  typedef struct jump_list { Line 192  typedef struct jump_list {
192    struct jump_list *next;    struct jump_list *next;
193  } jump_list;  } jump_list;
194    
 enum stub_types { stack_alloc };  
   
195  typedef struct stub_list {  typedef struct stub_list {
   enum stub_types type;  
   int data;  
196    struct sljit_jump *start;    struct sljit_jump *start;
197    struct sljit_label *leave;    struct sljit_label *quit;
198    struct stub_list *next;    struct stub_list *next;
199  } stub_list;  } stub_list;
200    
201    typedef struct label_addr_list {
202      struct sljit_label *label;
203      sljit_uw *update_addr;
204      struct label_addr_list *next;
205    } label_addr_list;
206    
207    enum frame_types {
208      no_frame = -1,
209      no_stack = -2
210    };
211    
212    enum control_types {
213      type_mark = 0,
214      type_then_trap = 1
215    };
216    
217  typedef int (SLJIT_CALL *jit_function)(jit_arguments *args);  typedef int (SLJIT_CALL *jit_function)(jit_arguments *args);
218    
219  /* The following structure is the key data type for the recursive  /* The following structure is the key data type for the recursive
220  code generator. It is allocated by compile_hotpath, and contains  code generator. It is allocated by compile_matchingpath, and contains
221  the aguments for compile_fallbackpath. Must be the first member  the arguments for compile_backtrackingpath. Must be the first member
222  of its descendants. */  of its descendants. */
223  typedef struct fallback_common {  typedef struct backtrack_common {
224    /* Concatenation stack. */    /* Concatenation stack. */
225    struct fallback_common *prev;    struct backtrack_common *prev;
226    jump_list *nextfallbacks;    jump_list *nextbacktracks;
227    /* Internal stack (for component operators). */    /* Internal stack (for component operators). */
228    struct fallback_common *top;    struct backtrack_common *top;
229    jump_list *topfallbacks;    jump_list *topbacktracks;
230    /* Opcode pointer. */    /* Opcode pointer. */
231    pcre_uchar *cc;    pcre_uchar *cc;
232  } fallback_common;  } backtrack_common;
233    
234  typedef struct assert_fallback {  typedef struct assert_backtrack {
235    fallback_common common;    backtrack_common common;
236    jump_list *condfailed;    jump_list *condfailed;
237    /* Less than 0 (-1) if a frame is not needed. */    /* Less than 0 if a frame is not needed. */
238    int framesize;    int framesize;
239    /* Points to our private memory word on the stack. */    /* Points to our private memory word on the stack. */
240    int localptr;    int private_data_ptr;
241    /* For iterators. */    /* For iterators. */
242    struct sljit_label *hotpath;    struct sljit_label *matchingpath;
243  } assert_fallback;  } assert_backtrack;
244    
245  typedef struct bracket_fallback {  typedef struct bracket_backtrack {
246    fallback_common common;    backtrack_common common;
247    /* Where to coninue if an alternative is successfully matched. */    /* Where to coninue if an alternative is successfully matched. */
248    struct sljit_label *althotpath;    struct sljit_label *alternative_matchingpath;
249    /* For rmin and rmax iterators. */    /* For rmin and rmax iterators. */
250    struct sljit_label *recursivehotpath;    struct sljit_label *recursive_matchingpath;
251    /* For greedy ? operator. */    /* For greedy ? operator. */
252    struct sljit_label *zerohotpath;    struct sljit_label *zero_matchingpath;
253    /* Contains the branches of a failed condition. */    /* Contains the branches of a failed condition. */
254    union {    union {
255      /* Both for OP_COND, OP_SCOND. */      /* Both for OP_COND, OP_SCOND. */
256      jump_list *condfailed;      jump_list *condfailed;
257      assert_fallback *assert;      assert_backtrack *assert;
258      /* For OP_ONCE. -1 if not needed. */      /* For OP_ONCE. Less than 0 if not needed. */
259      int framesize;      int framesize;
260    } u;    } u;
261    /* Points to our private memory word on the stack. */    /* Points to our private memory word on the stack. */
262    int localptr;    int private_data_ptr;
263  } bracket_fallback;  } bracket_backtrack;
264    
265  typedef struct bracketpos_fallback {  typedef struct bracketpos_backtrack {
266    fallback_common common;    backtrack_common common;
267    /* Points to our private memory word on the stack. */    /* Points to our private memory word on the stack. */
268    int localptr;    int private_data_ptr;
269    /* Reverting stack is needed. */    /* Reverting stack is needed. */
270    int framesize;    int framesize;
271    /* Allocated stack size. */    /* Allocated stack size. */
272    int stacksize;    int stacksize;
273  } bracketpos_fallback;  } bracketpos_backtrack;
274    
275  typedef struct braminzero_fallback {  typedef struct braminzero_backtrack {
276    fallback_common common;    backtrack_common common;
277    struct sljit_label *hotpath;    struct sljit_label *matchingpath;
278  } braminzero_fallback;  } braminzero_backtrack;
279    
280    typedef struct char_iterator_backtrack {
281      backtrack_common common;
282      /* Next iteration. */
283      struct sljit_label *matchingpath;
284      union {
285        jump_list *backtracks;
286        struct {
287          unsigned int othercasebit;
288          pcre_uchar chr;
289          BOOL enabled;
290        } charpos;
291      } u;
292    } char_iterator_backtrack;
293    
294  typedef struct iterator_fallback {  typedef struct ref_iterator_backtrack {
295    fallback_common common;    backtrack_common common;
296    /* Next iteration. */    /* Next iteration. */
297    struct sljit_label *hotpath;    struct sljit_label *matchingpath;
298  } iterator_fallback;  } ref_iterator_backtrack;
299    
300  typedef struct recurse_entry {  typedef struct recurse_entry {
301    struct recurse_entry *next;    struct recurse_entry *next;
# Line 261  typedef struct recurse_entry { Line 304  typedef struct recurse_entry {
304    /* Collects the calls until the function is not created. */    /* Collects the calls until the function is not created. */
305    jump_list *calls;    jump_list *calls;
306    /* Points to the starting opcode. */    /* Points to the starting opcode. */
307    int start;    sljit_sw start;
308  } recurse_entry;  } recurse_entry;
309    
310  typedef struct recurse_fallback {  typedef struct recurse_backtrack {
311    fallback_common common;    backtrack_common common;
312  } recurse_fallback;    BOOL inlined_pattern;
313    } recurse_backtrack;
314    
315    #define OP_THEN_TRAP OP_TABLE_LENGTH
316    
317    typedef struct then_trap_backtrack {
318      backtrack_common common;
319      /* If then_trap is not NULL, this structure contains the real
320      then_trap for the backtracking path. */
321      struct then_trap_backtrack *then_trap;
322      /* Points to the starting opcode. */
323      sljit_sw start;
324      /* Exit point for the then opcodes of this alternative. */
325      jump_list *quit;
326      /* Frame size of the current alternative. */
327      int framesize;
328    } then_trap_backtrack;
329    
330    #define MAX_RANGE_SIZE 4
331    
332  typedef struct compiler_common {  typedef struct compiler_common {
333      /* The sljit ceneric compiler. */
334    struct sljit_compiler *compiler;    struct sljit_compiler *compiler;
335      /* First byte code. */
336    pcre_uchar *start;    pcre_uchar *start;
337      /* Maps private data offset to each opcode. */
338    /* Opcode local area direct map. */    sljit_s32 *private_data_ptrs;
339    int *localptrs;    /* Chain list of read-only data ptrs. */
340    int cbraptr;    void *read_only_data_head;
341    /* OVector starting point. Must be divisible by 2. */    /* Tells whether the capturing bracket is optimized. */
342    int ovector_start;    sljit_u8 *optimized_cbracket;
343      /* Tells whether the starting offset is a target of then. */
344      sljit_u8 *then_offsets;
345      /* Current position where a THEN must jump. */
346      then_trap_backtrack *then_trap;
347      /* Starting offset of private data for capturing brackets. */
348      sljit_s32 cbra_ptr;
349      /* Output vector starting point. Must be divisible by 2. */
350      sljit_s32 ovector_start;
351      /* Points to the starting character of the current match. */
352      sljit_s32 start_ptr;
353    /* Last known position of the requested byte. */    /* Last known position of the requested byte. */
354    int req_char_ptr;    sljit_s32 req_char_ptr;
355    /* Head of the last recursion. */    /* Head of the last recursion. */
356    int recursive_head;    sljit_s32 recursive_head_ptr;
357    /* First inspected character for partial matching. */    /* First inspected character for partial matching.
358    int start_used_ptr;       (Needed for avoiding zero length partial matches.) */
359      sljit_s32 start_used_ptr;
360    /* Starting pointer for partial soft matches. */    /* Starting pointer for partial soft matches. */
361    int hit_start;    sljit_s32 hit_start;
362    /* End pointer of the first line. */    /* Pointer of the match end position. */
363    int first_line_end;    sljit_s32 match_end_ptr;
364    /* Points to the marked string. */    /* Points to the marked string. */
365    int mark_ptr;    sljit_s32 mark_ptr;
366      /* Recursive control verb management chain. */
367    /* Other  */    sljit_s32 control_head_ptr;
368    const pcre_uint8 *fcc;    /* Points to the last matched capture block index. */
369    sljit_w lcc;    sljit_s32 capture_last_ptr;
370      /* Fast forward skipping byte code pointer. */
371      pcre_uchar *fast_forward_bc_ptr;
372      /* Locals used by fast fail optimization. */
373      sljit_s32 fast_fail_start_ptr;
374      sljit_s32 fast_fail_end_ptr;
375    
376      /* Flipped and lower case tables. */
377      const sljit_u8 *fcc;
378      sljit_sw lcc;
379      /* Mode can be PCRE_STUDY_JIT_COMPILE and others. */
380    int mode;    int mode;
381      /* TRUE, when minlength is greater than 0. */
382      BOOL might_be_empty;
383      /* \K is found in the pattern. */
384      BOOL has_set_som;
385      /* (*SKIP:arg) is found in the pattern. */
386      BOOL has_skip_arg;
387      /* (*THEN) is found in the pattern. */
388      BOOL has_then;
389      /* (*SKIP) or (*SKIP:arg) is found in lookbehind assertion. */
390      BOOL has_skip_in_assert_back;
391      /* Currently in recurse or negative assert. */
392      BOOL local_exit;
393      /* Currently in a positive assert. */
394      BOOL positive_assert;
395      /* Newline control. */
396    int nltype;    int nltype;
397      sljit_u32 nlmax;
398      sljit_u32 nlmin;
399    int newline;    int newline;
400    int bsr_nltype;    int bsr_nltype;
401      sljit_u32 bsr_nlmax;
402      sljit_u32 bsr_nlmin;
403      /* Dollar endonly. */
404    int endonly;    int endonly;
405    BOOL has_set_som;    /* Tables. */
406    sljit_w ctypes;    sljit_sw ctypes;
407    sljit_uw name_table;    /* Named capturing brackets. */
408    sljit_w name_count;    pcre_uchar *name_table;
409    sljit_w name_entry_size;    sljit_sw name_count;
410      sljit_sw name_entry_size;
411    
412    /* Labels and jump lists. */    /* Labels and jump lists. */
413    struct sljit_label *partialmatchlabel;    struct sljit_label *partialmatchlabel;
414    struct sljit_label *leavelabel;    struct sljit_label *quit_label;
415    struct sljit_label *acceptlabel;    struct sljit_label *forced_quit_label;
416      struct sljit_label *accept_label;
417      struct sljit_label *ff_newline_shortcut;
418    stub_list *stubs;    stub_list *stubs;
419      label_addr_list *label_addrs;
420    recurse_entry *entries;    recurse_entry *entries;
421    recurse_entry *currententry;    recurse_entry *currententry;
422    jump_list *partialmatch;    jump_list *partialmatch;
423    jump_list *leave;    jump_list *quit;
424      jump_list *positive_assert_quit;
425      jump_list *forced_quit;
426    jump_list *accept;    jump_list *accept;
427    jump_list *calllimit;    jump_list *calllimit;
428    jump_list *stackalloc;    jump_list *stackalloc;
# Line 323  typedef struct compiler_common { Line 433  typedef struct compiler_common {
433    jump_list *vspace;    jump_list *vspace;
434    jump_list *casefulcmp;    jump_list *casefulcmp;
435    jump_list *caselesscmp;    jump_list *caselesscmp;
436      jump_list *reset_match;
437    BOOL jscript_compat;    BOOL jscript_compat;
438  #ifdef SUPPORT_UTF  #ifdef SUPPORT_UTF
439    BOOL utf;    BOOL utf;
440  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
441    BOOL use_ucp;    BOOL use_ucp;
442      jump_list *getucd;
443  #endif  #endif
   jump_list *utfreadchar;  
444  #ifdef COMPILE_PCRE8  #ifdef COMPILE_PCRE8
445      jump_list *utfreadchar;
446      jump_list *utfreadchar16;
447    jump_list *utfreadtype8;    jump_list *utfreadtype8;
448  #endif  #endif
449  #endif /* SUPPORT_UTF */  #endif /* SUPPORT_UTF */
 #ifdef SUPPORT_UCP  
   jump_list *getucd;  
 #endif  
450  } compiler_common;  } compiler_common;
451    
452  /* For byte_sequence_compare. */  /* For byte_sequence_compare. */
# Line 347  typedef struct compare_context { Line 457  typedef struct compare_context {
457  #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED  #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
458    int ucharptr;    int ucharptr;
459    union {    union {
460      sljit_i asint;      sljit_s32 asint;
461      sljit_uh asushort;      sljit_u16 asushort;
462  #ifdef COMPILE_PCRE8  #if defined COMPILE_PCRE8
463      sljit_ub asbyte;      sljit_u8 asbyte;
464      sljit_ub asuchars[4];      sljit_u8 asuchars[4];
465  #else  #elif defined COMPILE_PCRE16
466  #ifdef COMPILE_PCRE16      sljit_u16 asuchars[2];
467      sljit_uh asuchars[2];  #elif defined COMPILE_PCRE32
468  #endif      sljit_u32 asuchars[1];
469  #endif  #endif
470    } c;    } c;
471    union {    union {
472      sljit_i asint;      sljit_s32 asint;
473      sljit_uh asushort;      sljit_u16 asushort;
474  #ifdef COMPILE_PCRE8  #if defined COMPILE_PCRE8
475      sljit_ub asbyte;      sljit_u8 asbyte;
476      sljit_ub asuchars[4];      sljit_u8 asuchars[4];
477  #else  #elif defined COMPILE_PCRE16
478  #ifdef COMPILE_PCRE16      sljit_u16 asuchars[2];
479      sljit_uh asuchars[2];  #elif defined COMPILE_PCRE32
480  #endif      sljit_u32 asuchars[1];
481  #endif  #endif
482    } oc;    } oc;
483  #endif  #endif
484  } compare_context;  } compare_context;
485    
 enum {  
   frame_end = 0,  
   frame_setstrbegin = -1,  
   frame_setmark = -2  
 };  
   
486  /* Undefine sljit macros. */  /* Undefine sljit macros. */
487  #undef CMP  #undef CMP
488    
489  /* Used for accessing the elements of the stack. */  /* Used for accessing the elements of the stack. */
490  #define STACK(i)      ((-(i) - 1) * (int)sizeof(sljit_w))  #define STACK(i)      ((-(i) - 1) * (int)sizeof(sljit_sw))
491    
492  #define TMP1          SLJIT_TEMPORARY_REG1  #define TMP1          SLJIT_R0
493  #define TMP2          SLJIT_TEMPORARY_REG3  #define TMP2          SLJIT_R2
494  #define TMP3          SLJIT_TEMPORARY_EREG2  #define TMP3          SLJIT_R3
495  #define STR_PTR       SLJIT_SAVED_REG1  #define STR_PTR       SLJIT_S0
496  #define STR_END       SLJIT_SAVED_REG2  #define STR_END       SLJIT_S1
497  #define STACK_TOP     SLJIT_TEMPORARY_REG2  #define STACK_TOP     SLJIT_R1
498  #define STACK_LIMIT   SLJIT_SAVED_REG3  #define STACK_LIMIT   SLJIT_S2
499  #define ARGUMENTS     SLJIT_SAVED_EREG1  #define COUNT_MATCH   SLJIT_S3
500  #define CALL_COUNT    SLJIT_SAVED_EREG2  #define ARGUMENTS     SLJIT_S4
501  #define RETURN_ADDR   SLJIT_TEMPORARY_EREG1  #define RETURN_ADDR   SLJIT_R4
502    
503  /* Locals layout. */  /* Local space layout. */
504  /* These two locals can be used by the current opcode. */  /* These two locals can be used by the current opcode. */
505  #define LOCALS0          (0 * sizeof(sljit_w))  #define LOCALS0          (0 * sizeof(sljit_sw))
506  #define LOCALS1          (1 * sizeof(sljit_w))  #define LOCALS1          (1 * sizeof(sljit_sw))
507  /* Two local variables for possessive quantifiers (char1 cannot use them). */  /* Two local variables for possessive quantifiers (char1 cannot use them). */
508  #define POSSESSIVE0      (2 * sizeof(sljit_w))  #define POSSESSIVE0      (2 * sizeof(sljit_sw))
509  #define POSSESSIVE1      (3 * sizeof(sljit_w))  #define POSSESSIVE1      (3 * sizeof(sljit_sw))
510  /* Max limit of recursions. */  /* Max limit of recursions. */
511  #define CALL_LIMIT       (4 * sizeof(sljit_w))  #define LIMIT_MATCH      (4 * sizeof(sljit_sw))
512  /* The output vector is stored on the stack, and contains pointers  /* The output vector is stored on the stack, and contains pointers
513  to characters. The vector data is divided into two groups: the first  to characters. The vector data is divided into two groups: the first
514  group contains the start / end character pointers, and the second is  group contains the start / end character pointers, and the second is
515  the start pointers when the end of the capturing group has not yet reached. */  the start pointers when the end of the capturing group has not yet reached. */
516  #define OVECTOR_START    (common->ovector_start)  #define OVECTOR_START    (common->ovector_start)
517  #define OVECTOR(i)       (OVECTOR_START + (i) * sizeof(sljit_w))  #define OVECTOR(i)       (OVECTOR_START + (i) * (sljit_sw)sizeof(sljit_sw))
518  #define OVECTOR_PRIV(i)  (common->cbraptr + (i) * sizeof(sljit_w))  #define OVECTOR_PRIV(i)  (common->cbra_ptr + (i) * (sljit_sw)sizeof(sljit_sw))
519  #define PRIV_DATA(cc)    (common->localptrs[(cc) - common->start])  #define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start])
520    
521  #ifdef COMPILE_PCRE8  #if defined COMPILE_PCRE8
522  #define MOV_UCHAR  SLJIT_MOV_UB  #define MOV_UCHAR  SLJIT_MOV_U8
523  #define MOVU_UCHAR SLJIT_MOVU_UB  #define MOVU_UCHAR SLJIT_MOVU_U8
524  #else  #elif defined COMPILE_PCRE16
525  #ifdef COMPILE_PCRE16  #define MOV_UCHAR  SLJIT_MOV_U16
526  #define MOV_UCHAR  SLJIT_MOV_UH  #define MOVU_UCHAR SLJIT_MOVU_U16
527  #define MOVU_UCHAR SLJIT_MOVU_UH  #elif defined COMPILE_PCRE32
528    #define MOV_UCHAR  SLJIT_MOV_U32
529    #define MOVU_UCHAR SLJIT_MOVU_U32
530  #else  #else
531  #error Unsupported compiling mode  #error Unsupported compiling mode
532  #endif  #endif
 #endif  
533    
534  /* Shortcuts. */  /* Shortcuts. */
535  #define DEFINE_COMPILER \  #define DEFINE_COMPILER \
# Line 441  the start pointers when the end of the c Line 546  the start pointers when the end of the c
546    sljit_set_label(sljit_emit_jump(compiler, (type)), (label))    sljit_set_label(sljit_emit_jump(compiler, (type)), (label))
547  #define JUMPHERE(jump) \  #define JUMPHERE(jump) \
548    sljit_set_label((jump), sljit_emit_label(compiler))    sljit_set_label((jump), sljit_emit_label(compiler))
549    #define SET_LABEL(jump, label) \
550      sljit_set_label((jump), (label))
551  #define CMP(type, src1, src1w, src2, src2w) \  #define CMP(type, src1, src1w, src2, src2w) \
552    sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))    sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))
553  #define CMPTO(type, src1, src1w, src2, src2w, label) \  #define CMPTO(type, src1, src1w, src2, src2w, label) \
554    sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))    sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))
555  #define COND_VALUE(op, dst, dstw, type) \  #define OP_FLAGS(op, dst, dstw, src, srcw, type) \
556    sljit_emit_cond_value(compiler, (op), (dst), (dstw), (type))    sljit_emit_op_flags(compiler, (op), (dst), (dstw), (src), (srcw), (type))
557  #define GET_LOCAL_BASE(dst, dstw, offset) \  #define GET_LOCAL_BASE(dst, dstw, offset) \
558    sljit_get_local_base(compiler, (dst), (dstw), (offset))    sljit_get_local_base(compiler, (dst), (dstw), (offset))
559    
560  static pcre_uchar* bracketend(pcre_uchar* cc)  #define READ_CHAR_MAX 0x7fffffff
561    
562    static pcre_uchar *bracketend(pcre_uchar *cc)
563  {  {
564  SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));  SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
565  do cc += GET(cc, 1); while (*cc == OP_ALT);  do cc += GET(cc, 1); while (*cc == OP_ALT);
# Line 459  cc += 1 + LINK_SIZE; Line 568  cc += 1 + LINK_SIZE;
568  return cc;  return cc;
569  }  }
570    
571    static int no_alternatives(pcre_uchar *cc)
572    {
573    int count = 0;
574    SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
575    do
576      {
577      cc += GET(cc, 1);
578      count++;
579      }
580    while (*cc == OP_ALT);
581    SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
582    return count;
583    }
584    
585  /* Functions whose might need modification for all new supported opcodes:  /* Functions whose might need modification for all new supported opcodes:
586   next_opcode   next_opcode
587   get_localspace   check_opcode_types
588   set_localptrs   set_private_data_ptrs
589   get_framesize   get_framesize
590   init_frame   init_frame
591   get_localsize   get_private_data_copy_length
592   copy_locals   copy_private_data
593   compile_hotpath   compile_matchingpath
594   compile_fallbackpath   compile_backtrackingpath
595  */  */
596    
597  static pcre_uchar *next_opcode(compiler_common *common, pcre_uchar *cc)  static pcre_uchar *next_opcode(compiler_common *common, pcre_uchar *cc)
# Line 489  switch(*cc) Line 612  switch(*cc)
612    case OP_WORDCHAR:    case OP_WORDCHAR:
613    case OP_ANY:    case OP_ANY:
614    case OP_ALLANY:    case OP_ALLANY:
615      case OP_NOTPROP:
616      case OP_PROP:
617    case OP_ANYNL:    case OP_ANYNL:
618    case OP_NOT_HSPACE:    case OP_NOT_HSPACE:
619    case OP_HSPACE:    case OP_HSPACE:
# Line 501  switch(*cc) Line 626  switch(*cc)
626    case OP_CIRCM:    case OP_CIRCM:
627    case OP_DOLL:    case OP_DOLL:
628    case OP_DOLLM:    case OP_DOLLM:
   case OP_TYPESTAR:  
   case OP_TYPEMINSTAR:  
   case OP_TYPEPLUS:  
   case OP_TYPEMINPLUS:  
   case OP_TYPEQUERY:  
   case OP_TYPEMINQUERY:  
   case OP_TYPEPOSSTAR:  
   case OP_TYPEPOSPLUS:  
   case OP_TYPEPOSQUERY:  
629    case OP_CRSTAR:    case OP_CRSTAR:
630    case OP_CRMINSTAR:    case OP_CRMINSTAR:
631    case OP_CRPLUS:    case OP_CRPLUS:
632    case OP_CRMINPLUS:    case OP_CRMINPLUS:
633    case OP_CRQUERY:    case OP_CRQUERY:
634    case OP_CRMINQUERY:    case OP_CRMINQUERY:
635      case OP_CRRANGE:
636      case OP_CRMINRANGE:
637      case OP_CRPOSSTAR:
638      case OP_CRPOSPLUS:
639      case OP_CRPOSQUERY:
640      case OP_CRPOSRANGE:
641      case OP_CLASS:
642      case OP_NCLASS:
643      case OP_REF:
644      case OP_REFI:
645      case OP_DNREF:
646      case OP_DNREFI:
647      case OP_RECURSE:
648      case OP_CALLOUT:
649      case OP_ALT:
650      case OP_KET:
651      case OP_KETRMAX:
652      case OP_KETRMIN:
653      case OP_KETRPOS:
654      case OP_REVERSE:
655      case OP_ASSERT:
656      case OP_ASSERT_NOT:
657      case OP_ASSERTBACK:
658      case OP_ASSERTBACK_NOT:
659      case OP_ONCE:
660      case OP_ONCE_NC:
661      case OP_BRA:
662      case OP_BRAPOS:
663      case OP_CBRA:
664      case OP_CBRAPOS:
665      case OP_COND:
666      case OP_SBRA:
667      case OP_SBRAPOS:
668      case OP_SCBRA:
669      case OP_SCBRAPOS:
670      case OP_SCOND:
671      case OP_CREF:
672      case OP_DNCREF:
673      case OP_RREF:
674      case OP_DNRREF:
675    case OP_DEF:    case OP_DEF:
676    case OP_BRAZERO:    case OP_BRAZERO:
677    case OP_BRAMINZERO:    case OP_BRAMINZERO:
678    case OP_BRAPOSZERO:    case OP_BRAPOSZERO:
679      case OP_PRUNE:
680      case OP_SKIP:
681      case OP_THEN:
682    case OP_COMMIT:    case OP_COMMIT:
683    case OP_FAIL:    case OP_FAIL:
684    case OP_ACCEPT:    case OP_ACCEPT:
685    case OP_ASSERT_ACCEPT:    case OP_ASSERT_ACCEPT:
686      case OP_CLOSE:
687    case OP_SKIPZERO:    case OP_SKIPZERO:
688    return cc + 1;    return cc + PRIV(OP_lengths)[*cc];
   
   case OP_ANYBYTE:  
 #ifdef SUPPORT_UTF  
   if (common->utf) return NULL;  
 #endif  
   return cc + 1;  
689    
690    case OP_CHAR:    case OP_CHAR:
691    case OP_CHARI:    case OP_CHARI:
# Line 543  switch(*cc) Line 697  switch(*cc)
697    case OP_MINPLUS:    case OP_MINPLUS:
698    case OP_QUERY:    case OP_QUERY:
699    case OP_MINQUERY:    case OP_MINQUERY:
700      case OP_UPTO:
701      case OP_MINUPTO:
702      case OP_EXACT:
703    case OP_POSSTAR:    case OP_POSSTAR:
704    case OP_POSPLUS:    case OP_POSPLUS:
705    case OP_POSQUERY:    case OP_POSQUERY:
706      case OP_POSUPTO:
707    case OP_STARI:    case OP_STARI:
708    case OP_MINSTARI:    case OP_MINSTARI:
709    case OP_PLUSI:    case OP_PLUSI:
710    case OP_MINPLUSI:    case OP_MINPLUSI:
711    case OP_QUERYI:    case OP_QUERYI:
712    case OP_MINQUERYI:    case OP_MINQUERYI:
713      case OP_UPTOI:
714      case OP_MINUPTOI:
715      case OP_EXACTI:
716    case OP_POSSTARI:    case OP_POSSTARI:
717    case OP_POSPLUSI:    case OP_POSPLUSI:
718    case OP_POSQUERYI:    case OP_POSQUERYI:
719      case OP_POSUPTOI:
720    case OP_NOTSTAR:    case OP_NOTSTAR:
721    case OP_NOTMINSTAR:    case OP_NOTMINSTAR:
722    case OP_NOTPLUS:    case OP_NOTPLUS:
723    case OP_NOTMINPLUS:    case OP_NOTMINPLUS:
724    case OP_NOTQUERY:    case OP_NOTQUERY:
725    case OP_NOTMINQUERY:    case OP_NOTMINQUERY:
726      case OP_NOTUPTO:
727      case OP_NOTMINUPTO:
728      case OP_NOTEXACT:
729    case OP_NOTPOSSTAR:    case OP_NOTPOSSTAR:
730    case OP_NOTPOSPLUS:    case OP_NOTPOSPLUS:
731    case OP_NOTPOSQUERY:    case OP_NOTPOSQUERY:
732      case OP_NOTPOSUPTO:
733    case OP_NOTSTARI:    case OP_NOTSTARI:
734    case OP_NOTMINSTARI:    case OP_NOTMINSTARI:
735    case OP_NOTPLUSI:    case OP_NOTPLUSI:
736    case OP_NOTMINPLUSI:    case OP_NOTMINPLUSI:
737    case OP_NOTQUERYI:    case OP_NOTQUERYI:
738    case OP_NOTMINQUERYI:    case OP_NOTMINQUERYI:
   case OP_NOTPOSSTARI:  
   case OP_NOTPOSPLUSI:  
   case OP_NOTPOSQUERYI:  
   cc += 2;  
 #ifdef SUPPORT_UTF  
   if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);  
 #endif  
   return cc;  
   
   case OP_UPTO:  
   case OP_MINUPTO:  
   case OP_EXACT:  
   case OP_POSUPTO:  
   case OP_UPTOI:  
   case OP_MINUPTOI:  
   case OP_EXACTI:  
   case OP_POSUPTOI:  
   case OP_NOTUPTO:  
   case OP_NOTMINUPTO:  
   case OP_NOTEXACT:  
   case OP_NOTPOSUPTO:  
739    case OP_NOTUPTOI:    case OP_NOTUPTOI:
740    case OP_NOTMINUPTOI:    case OP_NOTMINUPTOI:
741    case OP_NOTEXACTI:    case OP_NOTEXACTI:
742      case OP_NOTPOSSTARI:
743      case OP_NOTPOSPLUSI:
744      case OP_NOTPOSQUERYI:
745    case OP_NOTPOSUPTOI:    case OP_NOTPOSUPTOI:
746    cc += 2 + IMM2_SIZE;    cc += PRIV(OP_lengths)[*cc];
747  #ifdef SUPPORT_UTF  #ifdef SUPPORT_UTF
748    if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);    if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
749  #endif  #endif
750    return cc;    return cc;
751    
752    case OP_NOTPROP:    /* Special cases. */
753    case OP_PROP:    case OP_TYPESTAR:
754    return cc + 1 + 2;    case OP_TYPEMINSTAR:
755      case OP_TYPEPLUS:
756      case OP_TYPEMINPLUS:
757      case OP_TYPEQUERY:
758      case OP_TYPEMINQUERY:
759    case OP_TYPEUPTO:    case OP_TYPEUPTO:
760    case OP_TYPEMINUPTO:    case OP_TYPEMINUPTO:
761    case OP_TYPEEXACT:    case OP_TYPEEXACT:
762      case OP_TYPEPOSSTAR:
763      case OP_TYPEPOSPLUS:
764      case OP_TYPEPOSQUERY:
765    case OP_TYPEPOSUPTO:    case OP_TYPEPOSUPTO:
766    case OP_REF:    return cc + PRIV(OP_lengths)[*cc] - 1;
   case OP_REFI:  
   case OP_CREF:  
   case OP_NCREF:  
   case OP_RREF:  
   case OP_NRREF:  
   case OP_CLOSE:  
   cc += 1 + IMM2_SIZE;  
   return cc;  
   
   case OP_CRRANGE:  
   case OP_CRMINRANGE:  
   return cc + 1 + 2 * IMM2_SIZE;  
767    
768    case OP_CLASS:    case OP_ANYBYTE:
769    case OP_NCLASS:  #ifdef SUPPORT_UTF
770    return cc + 1 + 32 / sizeof(pcre_uchar);    if (common->utf) return NULL;
771    #endif
772      return cc + 1;
773    
774  #if defined SUPPORT_UTF || !defined COMPILE_PCRE8  #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
775    case OP_XCLASS:    case OP_XCLASS:
776    return cc + GET(cc, 1);    return cc + GET(cc, 1);
777  #endif  #endif
778    
   case OP_RECURSE:  
   case OP_ASSERT:  
   case OP_ASSERT_NOT:  
   case OP_ASSERTBACK:  
   case OP_ASSERTBACK_NOT:  
   case OP_REVERSE:  
   case OP_ONCE:  
   case OP_ONCE_NC:  
   case OP_BRA:  
   case OP_BRAPOS:  
   case OP_COND:  
   case OP_SBRA:  
   case OP_SBRAPOS:  
   case OP_SCOND:  
   case OP_ALT:  
   case OP_KET:  
   case OP_KETRMAX:  
   case OP_KETRMIN:  
   case OP_KETRPOS:  
   return cc + 1 + LINK_SIZE;  
   
   case OP_CBRA:  
   case OP_CBRAPOS:  
   case OP_SCBRA:  
   case OP_SCBRAPOS:  
   return cc + 1 + LINK_SIZE + IMM2_SIZE;  
   
779    case OP_MARK:    case OP_MARK:
780      case OP_PRUNE_ARG:
781      case OP_SKIP_ARG:
782      case OP_THEN_ARG:
783    return cc + 1 + 2 + cc[1];    return cc + 1 + 2 + cc[1];
784    
785    default:    default:
786      /* All opcodes are supported now! */
787      SLJIT_ASSERT_STOP();
788    return NULL;    return NULL;
789    }    }
790  }  }
791    
792  static int get_localspace(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)  static BOOL check_opcode_types(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
793  {  {
794  int localspace = 0;  int count;
795  pcre_uchar *alternative;  pcre_uchar *slot;
796    pcre_uchar *assert_back_end = cc - 1;
797    
798  /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */  /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
799  while (cc < ccend)  while (cc < ccend)
800    {    {
# Line 678  while (cc < ccend) Line 802  while (cc < ccend)
802      {      {
803      case OP_SET_SOM:      case OP_SET_SOM:
804      common->has_set_som = TRUE;      common->has_set_som = TRUE;
805        common->might_be_empty = TRUE;
806      cc += 1;      cc += 1;
807      break;      break;
808    
809      case OP_ASSERT:      case OP_REF:
810      case OP_ASSERT_NOT:      case OP_REFI:
811      case OP_ASSERTBACK:      common->optimized_cbracket[GET2(cc, 1)] = 0;
812      case OP_ASSERTBACK_NOT:      cc += 1 + IMM2_SIZE;
     case OP_ONCE:  
     case OP_ONCE_NC:  
     case OP_BRAPOS:  
     case OP_SBRA:  
     case OP_SBRAPOS:  
     case OP_SCOND:  
     localspace += sizeof(sljit_w);  
     cc += 1 + LINK_SIZE;  
813      break;      break;
814    
815      case OP_CBRAPOS:      case OP_CBRAPOS:
816      case OP_SCBRAPOS:      case OP_SCBRAPOS:
817      localspace += sizeof(sljit_w);      common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] = 0;
818      cc += 1 + LINK_SIZE + IMM2_SIZE;      cc += 1 + LINK_SIZE + IMM2_SIZE;
819      break;      break;
820    
821      case OP_COND:      case OP_COND:
822      /* Might be a hidden SCOND. */      case OP_SCOND:
823      alternative = cc + GET(cc, 1);      /* Only AUTO_CALLOUT can insert this opcode. We do
824      if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)         not intend to support this case. */
825        localspace += sizeof(sljit_w);      if (cc[1 + LINK_SIZE] == OP_CALLOUT)
826          return FALSE;
827      cc += 1 + LINK_SIZE;      cc += 1 + LINK_SIZE;
828      break;      break;
829    
830        case OP_CREF:
831        common->optimized_cbracket[GET2(cc, 1)] = 0;
832        cc += 1 + IMM2_SIZE;
833        break;
834    
835        case OP_DNREF:
836        case OP_DNREFI:
837        case OP_DNCREF:
838        count = GET2(cc, 1 + IMM2_SIZE);
839        slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
840        while (count-- > 0)
841          {
842          common->optimized_cbracket[GET2(slot, 0)] = 0;
843          slot += common->name_entry_size;
844          }
845        cc += 1 + 2 * IMM2_SIZE;
846        break;
847    
848      case OP_RECURSE:      case OP_RECURSE:
849      /* Set its value only once. */      /* Set its value only once. */
850      if (common->recursive_head == 0)      if (common->recursive_head_ptr == 0)
851          {
852          common->recursive_head_ptr = common->ovector_start;
853          common->ovector_start += sizeof(sljit_sw);
854          }
855        cc += 1 + LINK_SIZE;
856        break;
857    
858        case OP_CALLOUT:
859        if (common->capture_last_ptr == 0)
860        {        {
861        common->recursive_head = common->ovector_start;        common->capture_last_ptr = common->ovector_start;
862        common->ovector_start += sizeof(sljit_w);        common->ovector_start += sizeof(sljit_sw);
863        }        }
864        cc += 2 + 2 * LINK_SIZE;
865        break;
866    
867        case OP_ASSERTBACK:
868        slot = bracketend(cc);
869        if (slot > assert_back_end)
870          assert_back_end = slot;
871      cc += 1 + LINK_SIZE;      cc += 1 + LINK_SIZE;
872      break;      break;
873    
874        case OP_THEN_ARG:
875        common->has_then = TRUE;
876        common->control_head_ptr = 1;
877        /* Fall through. */
878    
879        case OP_PRUNE_ARG:
880      case OP_MARK:      case OP_MARK:
881      if (common->mark_ptr == 0)      if (common->mark_ptr == 0)
882        {        {
883        common->mark_ptr = common->ovector_start;        common->mark_ptr = common->ovector_start;
884        common->ovector_start += sizeof(sljit_w);        common->ovector_start += sizeof(sljit_sw);
885        }        }
886      cc += 1 + 2 + cc[1];      cc += 1 + 2 + cc[1];
887      break;      break;
888    
889        case OP_THEN:
890        common->has_then = TRUE;
891        common->control_head_ptr = 1;
892        cc += 1;
893        break;
894    
895        case OP_SKIP:
896        if (cc < assert_back_end)
897          common->has_skip_in_assert_back = TRUE;
898        cc += 1;
899        break;
900    
901        case OP_SKIP_ARG:
902        common->control_head_ptr = 1;
903        common->has_skip_arg = TRUE;
904        if (cc < assert_back_end)
905          common->has_skip_in_assert_back = TRUE;
906        cc += 1 + 2 + cc[1];
907        break;
908    
909      default:      default:
910      cc = next_opcode(common, cc);      cc = next_opcode(common, cc);
911      if (cc == NULL)      if (cc == NULL)
912        return -1;        return FALSE;
913      break;      break;
914      }      }
915    }    }
916  return localspace;  return TRUE;
917  }  }
918    
919  static void set_localptrs(compiler_common *common, int localptr, pcre_uchar *ccend)  static BOOL is_accelerated_repeat(pcre_uchar *cc)
920  {  {
921  pcre_uchar *cc = common->start;  switch(*cc)
 pcre_uchar *alternative;  
 while (cc < ccend)  
922    {    {
923    switch(*cc)    case OP_TYPESTAR:
924      {    case OP_TYPEMINSTAR:
925      case OP_ASSERT:    case OP_TYPEPLUS:
926      case OP_ASSERT_NOT:    case OP_TYPEMINPLUS:
927      case OP_ASSERTBACK:    case OP_TYPEPOSSTAR:
928      case OP_ASSERTBACK_NOT:    case OP_TYPEPOSPLUS:
929      case OP_ONCE:    return (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI);
     case OP_ONCE_NC:  
     case OP_BRAPOS:  
     case OP_SBRA:  
     case OP_SBRAPOS:  
     case OP_SCOND:  
     common->localptrs[cc - common->start] = localptr;  
     localptr += sizeof(sljit_w);  
     cc += 1 + LINK_SIZE;  
     break;  
930    
931      case OP_CBRAPOS:    case OP_STAR:
932      case OP_SCBRAPOS:    case OP_MINSTAR:
933      common->localptrs[cc - common->start] = localptr;    case OP_PLUS:
934      localptr += sizeof(sljit_w);    case OP_MINPLUS:
935      cc += 1 + LINK_SIZE + IMM2_SIZE;    case OP_POSSTAR:
936      break;    case OP_POSPLUS:
937    
938      case OP_COND:    case OP_STARI:
939      /* Might be a hidden SCOND. */    case OP_MINSTARI:
940      alternative = cc + GET(cc, 1);    case OP_PLUSI:
941      if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)    case OP_MINPLUSI:
942        {    case OP_POSSTARI:
943        common->localptrs[cc - common->start] = localptr;    case OP_POSPLUSI:
       localptr += sizeof(sljit_w);  
       }  
     cc += 1 + LINK_SIZE;  
     break;  
944    
945      default:    case OP_NOTSTAR:
946      cc = next_opcode(common, cc);    case OP_NOTMINSTAR:
947      SLJIT_ASSERT(cc != NULL);    case OP_NOTPLUS:
948      break;    case OP_NOTMINPLUS:
949      case OP_NOTPOSSTAR:
950      case OP_NOTPOSPLUS:
951    
952      case OP_NOTSTARI:
953      case OP_NOTMINSTARI:
954      case OP_NOTPLUSI:
955      case OP_NOTMINPLUSI:
956      case OP_NOTPOSSTARI:
957      case OP_NOTPOSPLUSI:
958      return TRUE;
959    
960      case OP_CLASS:
961      case OP_NCLASS:
962    #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
963      case OP_XCLASS:
964      cc += (*cc == OP_XCLASS) ? GET(cc, 1) : (int)(1 + (32 / sizeof(pcre_uchar)));
965    #else
966      cc += (1 + (32 / sizeof(pcre_uchar)));
967    #endif
968    
969      switch(*cc)
970        {
971        case OP_CRSTAR:
972        case OP_CRMINSTAR:
973        case OP_CRPLUS:
974        case OP_CRMINPLUS:
975        case OP_CRPOSSTAR:
976        case OP_CRPOSPLUS:
977        return TRUE;
978      }      }
979      break;
980    }    }
981    return FALSE;
982  }  }
983    
984  /* Returns with -1 if no need for frame. */  static SLJIT_INLINE BOOL detect_fast_forward_skip(compiler_common *common, int *private_data_start)
 static int get_framesize(compiler_common *common, pcre_uchar *cc, BOOL recursive)  
985  {  {
986  pcre_uchar *ccend = bracketend(cc);  pcre_uchar *cc = common->start;
987  int length = 0;  pcre_uchar *end;
 BOOL possessive = FALSE;  
 BOOL setsom_found = recursive;  
 BOOL setmark_found = recursive;  
988    
989  if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))  /* Skip not repeated brackets. */
990    while (TRUE)
991    {    {
   length = 3;  
   possessive = TRUE;  
   }  
   
 cc = next_opcode(common, cc);  
 SLJIT_ASSERT(cc != NULL);  
 while (cc < ccend)  
992    switch(*cc)    switch(*cc)
993      {      {
994        case OP_SOD:
995        case OP_SOM:
996      case OP_SET_SOM:      case OP_SET_SOM:
997      SLJIT_ASSERT(common->has_set_som);      case OP_NOT_WORD_BOUNDARY:
998      if (!setsom_found)      case OP_WORD_BOUNDARY:
999        {      case OP_EODN:
1000        length += 2;      case OP_EOD:
1001        setsom_found = TRUE;      case OP_CIRC:
1002        }      case OP_CIRCM:
1003      cc += 1;      case OP_DOLL:
1004      break;      case OP_DOLLM:
1005        /* Zero width assertions. */
1006        cc++;
1007        continue;
1008        }
1009    
1010      case OP_MARK:    if (*cc != OP_BRA && *cc != OP_CBRA)
     SLJIT_ASSERT(common->mark_ptr != 0);  
     if (!setmark_found)  
       {  
       length += 2;  
       setmark_found = TRUE;  
       }  
     cc += 1 + 2 + cc[1];  
1011      break;      break;
1012    
1013      case OP_RECURSE:    end = cc + GET(cc, 1);
1014      if (common->has_set_som && !setsom_found)    if (*end != OP_KET || PRIVATE_DATA(end) != 0)
1015        return FALSE;
1016      if (*cc == OP_CBRA)
1017        {
1018        if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1019          return FALSE;
1020        cc += IMM2_SIZE;
1021        }
1022      cc += 1 + LINK_SIZE;
1023      }
1024    
1025    if (is_accelerated_repeat(cc))
1026      {
1027      common->fast_forward_bc_ptr = cc;
1028      common->private_data_ptrs[(cc + 1) - common->start] = *private_data_start;
1029      *private_data_start += sizeof(sljit_sw);
1030      return TRUE;
1031      }
1032    return FALSE;
1033    }
1034    
1035    static SLJIT_INLINE void detect_fast_fail(compiler_common *common, pcre_uchar *cc, int *private_data_start, sljit_s32 depth)
1036    {
1037      pcre_uchar *next_alt;
1038    
1039      SLJIT_ASSERT(*cc == OP_BRA || *cc == OP_CBRA);
1040    
1041      if (*cc == OP_CBRA && common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1042        return;
1043    
1044      next_alt = bracketend(cc) - (1 + LINK_SIZE);
1045      if (*next_alt != OP_KET || PRIVATE_DATA(next_alt) != 0)
1046        return;
1047    
1048      do
1049        {
1050        next_alt = cc + GET(cc, 1);
1051    
1052        cc += 1 + LINK_SIZE + ((*cc == OP_CBRA) ? IMM2_SIZE : 0);
1053    
1054        while (TRUE)
1055        {        {
1056        length += 2;        switch(*cc)
1057        setsom_found = TRUE;          {
1058            case OP_SOD:
1059            case OP_SOM:
1060            case OP_SET_SOM:
1061            case OP_NOT_WORD_BOUNDARY:
1062            case OP_WORD_BOUNDARY:
1063            case OP_EODN:
1064            case OP_EOD:
1065            case OP_CIRC:
1066            case OP_CIRCM:
1067            case OP_DOLL:
1068            case OP_DOLLM:
1069            /* Zero width assertions. */
1070            cc++;
1071            continue;
1072            }
1073          break;
1074        }        }
1075      if (common->mark_ptr != 0 && !setmark_found)  
1076        if (depth > 0 && (*cc == OP_BRA || *cc == OP_CBRA))
1077          detect_fast_fail(common, cc, private_data_start, depth - 1);
1078    
1079        if (is_accelerated_repeat(cc))
1080        {        {
1081        length += 2;        common->private_data_ptrs[(cc + 1) - common->start] = *private_data_start;
       setmark_found = TRUE;  
       }  
     cc += 1 + LINK_SIZE;  
     break;  
1082    
1083      case OP_CBRA:        if (common->fast_fail_start_ptr == 0)
1084      case OP_CBRAPOS:          common->fast_fail_start_ptr = *private_data_start;
     case OP_SCBRA:  
     case OP_SCBRAPOS:  
     length += 3;  
     cc += 1 + LINK_SIZE + IMM2_SIZE;  
     break;  
1085    
1086      default:        *private_data_start += sizeof(sljit_sw);
1087      cc = next_opcode(common, cc);        common->fast_fail_end_ptr = *private_data_start;
1088      SLJIT_ASSERT(cc != NULL);  
1089      break;        if (*private_data_start > SLJIT_MAX_LOCAL_SIZE)
1090            return;
1091          }
1092    
1093        cc = next_alt;
1094      }      }
1095      while (*cc == OP_ALT);
1096    }
1097    
1098  /* Possessive quantifiers can use a special case. */  static int get_class_iterator_size(pcre_uchar *cc)
1099  if (SLJIT_UNLIKELY(possessive) && length == 3)  {
1100    return -1;  sljit_u32 min;
1101    sljit_u32 max;
1102    switch(*cc)
1103      {
1104      case OP_CRSTAR:
1105      case OP_CRPLUS:
1106      return 2;
1107    
1108  if (length > 0)    case OP_CRMINSTAR:
1109    return length + 1;    case OP_CRMINPLUS:
1110  return -1;    case OP_CRQUERY:
1111      case OP_CRMINQUERY:
1112      return 1;
1113    
1114      case OP_CRRANGE:
1115      case OP_CRMINRANGE:
1116      min = GET2(cc, 1);
1117      max = GET2(cc, 1 + IMM2_SIZE);
1118      if (max == 0)
1119        return (*cc == OP_CRRANGE) ? 2 : 1;
1120      max -= min;
1121      if (max > 2)
1122        max = 2;
1123      return max;
1124    
1125      default:
1126      return 0;
1127      }
1128  }  }
1129    
1130  static void init_frame(compiler_common *common, pcre_uchar *cc, int stackpos, int stacktop, BOOL recursive)  static BOOL detect_repeat(compiler_common *common, pcre_uchar *begin)
1131  {  {
1132  DEFINE_COMPILER;  pcre_uchar *end = bracketend(begin);
1133  pcre_uchar *ccend = bracketend(cc);  pcre_uchar *next;
1134  BOOL setsom_found = recursive;  pcre_uchar *next_end;
1135  BOOL setmark_found = recursive;  pcre_uchar *max_end;
1136  int offset;  pcre_uchar type;
1137    sljit_sw length = end - begin;
1138    int min, max, i;
1139    
1140  /* >= 1 + shortest item size (2) */  /* Detect fixed iterations first. */
1141  SLJIT_UNUSED_ARG(stacktop);  if (end[-(1 + LINK_SIZE)] != OP_KET)
1142  SLJIT_ASSERT(stackpos >= stacktop + 2);    return FALSE;
1143    
1144    /* Already detected repeat. */
1145    if (common->private_data_ptrs[end - common->start - LINK_SIZE] != 0)
1146      return TRUE;
1147    
1148  stackpos = STACK(stackpos);  next = end;
1149  if (recursive || (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS))  min = 1;
1150    cc = next_opcode(common, cc);  while (1)
1151  SLJIT_ASSERT(cc != NULL);    {
1152  while (cc < ccend)    if (*next != *begin)
   switch(*cc)  
     {  
     case OP_SET_SOM:  
     SLJIT_ASSERT(common->has_set_som);  
     if (!setsom_found)  
       {  
       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));  
       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_setstrbegin);  
       stackpos += (int)sizeof(sljit_w);  
       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);  
       stackpos += (int)sizeof(sljit_w);  
       setsom_found = TRUE;  
       }  
     cc += 1;  
1153      break;      break;
1154      next_end = bracketend(next);
1155      case OP_MARK:    if (next_end - next != length || memcmp(begin, next, IN_UCHARS(length)) != 0)
     SLJIT_ASSERT(common->mark_ptr != 0);  
     if (!setmark_found)  
       {  
       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);  
       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_setmark);  
       stackpos += (int)sizeof(sljit_w);  
       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);  
       stackpos += (int)sizeof(sljit_w);  
       setmark_found = TRUE;  
       }  
     cc += 1 + 2 + cc[1];  
1156      break;      break;
1157      next = next_end;
1158      min++;
1159      }
1160    
1161      case OP_RECURSE:  if (min == 2)
1162      if (common->has_set_som && !setsom_found)    return FALSE;
1163        {  
1164        OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));  max = 0;
1165        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_setstrbegin);  max_end = next;
1166        stackpos += (int)sizeof(sljit_w);  if (*next == OP_BRAZERO || *next == OP_BRAMINZERO)
1167        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);    {
1168        stackpos += (int)sizeof(sljit_w);    type = *next;
1169        setsom_found = TRUE;    while (1)
1170        }      {
1171      if (common->mark_ptr != 0 && !setmark_found)      if (next[0] != type || next[1] != OP_BRA || next[2 + LINK_SIZE] != *begin)
1172          break;
1173        next_end = bracketend(next + 2 + LINK_SIZE);
1174        if (next_end - next != (length + 2 + LINK_SIZE) || memcmp(begin, next + 2 + LINK_SIZE, IN_UCHARS(length)) != 0)
1175          break;
1176        next = next_end;
1177        max++;
1178        }
1179    
1180      if (next[0] == type && next[1] == *begin && max >= 1)
1181        {
1182        next_end = bracketend(next + 1);
1183        if (next_end - next == (length + 1) && memcmp(begin, next + 1, IN_UCHARS(length)) == 0)
1184        {        {
1185        OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);        for (i = 0; i < max; i++, next_end += 1 + LINK_SIZE)
1186        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_setmark);          if (*next_end != OP_KET)
1187        stackpos += (int)sizeof(sljit_w);            break;
1188        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);  
1189        stackpos += (int)sizeof(sljit_w);        if (i == max)
1190        setmark_found = TRUE;          {
1191            common->private_data_ptrs[max_end - common->start - LINK_SIZE] = next_end - max_end;
1192            common->private_data_ptrs[max_end - common->start - LINK_SIZE + 1] = (type == OP_BRAZERO) ? OP_UPTO : OP_MINUPTO;
1193            /* +2 the original and the last. */
1194            common->private_data_ptrs[max_end - common->start - LINK_SIZE + 2] = max + 2;
1195            if (min == 1)
1196              return TRUE;
1197            min--;
1198            max_end -= (1 + LINK_SIZE) + GET(max_end, -LINK_SIZE);
1199            }
1200        }        }
1201      cc += 1 + LINK_SIZE;      }
1202      break;    }
1203    
1204      case OP_CBRA:  if (min >= 3)
1205      case OP_CBRAPOS:    {
1206      case OP_SCBRA:    common->private_data_ptrs[end - common->start - LINK_SIZE] = max_end - end;
1207      case OP_SCBRAPOS:    common->private_data_ptrs[end - common->start - LINK_SIZE + 1] = OP_EXACT;
1208      offset = (GET2(cc, 1 + LINK_SIZE)) << 1;    common->private_data_ptrs[end - common->start - LINK_SIZE + 2] = min;
1209      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));    return TRUE;
1210      stackpos += (int)sizeof(sljit_w);    }
1211      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));  
1212      OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));  return FALSE;
1213      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);  }
1214      stackpos += (int)sizeof(sljit_w);  
1215      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);  #define CASE_ITERATOR_PRIVATE_DATA_1 \
1216      stackpos += (int)sizeof(sljit_w);      case OP_MINSTAR: \
1217        case OP_MINPLUS: \
1218        case OP_QUERY: \
1219        case OP_MINQUERY: \
1220        case OP_MINSTARI: \
1221        case OP_MINPLUSI: \
1222        case OP_QUERYI: \
1223        case OP_MINQUERYI: \
1224        case OP_NOTMINSTAR: \
1225        case OP_NOTMINPLUS: \
1226        case OP_NOTQUERY: \
1227        case OP_NOTMINQUERY: \
1228        case OP_NOTMINSTARI: \
1229        case OP_NOTMINPLUSI: \
1230        case OP_NOTQUERYI: \
1231        case OP_NOTMINQUERYI:
1232    
1233      cc += 1 + LINK_SIZE + IMM2_SIZE;  #define CASE_ITERATOR_PRIVATE_DATA_2A \
1234      break;      case OP_STAR: \
1235        case OP_PLUS: \
1236        case OP_STARI: \
1237        case OP_PLUSI: \
1238        case OP_NOTSTAR: \
1239        case OP_NOTPLUS: \
1240        case OP_NOTSTARI: \
1241        case OP_NOTPLUSI:
1242    
1243      default:  #define CASE_ITERATOR_PRIVATE_DATA_2B \
1244      cc = next_opcode(common, cc);      case OP_UPTO: \
1245      SLJIT_ASSERT(cc != NULL);      case OP_MINUPTO: \
1246      break;      case OP_UPTOI: \
1247      }      case OP_MINUPTOI: \
1248        case OP_NOTUPTO: \
1249        case OP_NOTMINUPTO: \
1250        case OP_NOTUPTOI: \
1251        case OP_NOTMINUPTOI:
1252    
1253  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_end);  #define CASE_ITERATOR_TYPE_PRIVATE_DATA_1 \
1254  SLJIT_ASSERT(stackpos == STACK(stacktop));      case OP_TYPEMINSTAR: \
1255  }      case OP_TYPEMINPLUS: \
1256        case OP_TYPEQUERY: \
1257        case OP_TYPEMINQUERY:
1258    
1259    #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2A \
1260        case OP_TYPESTAR: \
1261        case OP_TYPEPLUS:
1262    
1263    #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2B \
1264        case OP_TYPEUPTO: \
1265        case OP_TYPEMINUPTO:
1266    
1267  static SLJIT_INLINE int get_localsize(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)  static void set_private_data_ptrs(compiler_common *common, int *private_data_start, pcre_uchar *ccend)
1268  {  {
1269  int localsize = 2;  pcre_uchar *cc = common->start;
1270  pcre_uchar *alternative;  pcre_uchar *alternative;
1271  /* Calculate the sum of the local variables. */  pcre_uchar *end = NULL;
1272    int private_data_ptr = *private_data_start;
1273    int space, size, bracketlen;
1274    BOOL repeat_check = TRUE;
1275    
1276  while (cc < ccend)  while (cc < ccend)
1277    {    {
1278      space = 0;
1279      size = 0;
1280      bracketlen = 0;
1281      if (private_data_ptr > SLJIT_MAX_LOCAL_SIZE)
1282        break;
1283    
1284      if (repeat_check && (*cc == OP_ONCE || *cc == OP_ONCE_NC || *cc == OP_BRA || *cc == OP_CBRA || *cc == OP_COND))
1285        {
1286        if (detect_repeat(common, cc))
1287          {
1288          /* These brackets are converted to repeats, so no global
1289          based single character repeat is allowed. */
1290          if (cc >= end)
1291            end = bracketend(cc);
1292          }
1293        }
1294      repeat_check = TRUE;
1295    
1296    switch(*cc)    switch(*cc)
1297      {      {
1298        case OP_KET:
1299        if (common->private_data_ptrs[cc + 1 - common->start] != 0)
1300          {
1301          common->private_data_ptrs[cc - common->start] = private_data_ptr;
1302          private_data_ptr += sizeof(sljit_sw);
1303          cc += common->private_data_ptrs[cc + 1 - common->start];
1304          }
1305        cc += 1 + LINK_SIZE;
1306        break;
1307    
1308      case OP_ASSERT:      case OP_ASSERT:
1309      case OP_ASSERT_NOT:      case OP_ASSERT_NOT:
1310      case OP_ASSERTBACK:      case OP_ASSERTBACK:
# Line 979  while (cc < ccend) Line 1315  while (cc < ccend)
1315      case OP_SBRA:      case OP_SBRA:
1316      case OP_SBRAPOS:      case OP_SBRAPOS:
1317      case OP_SCOND:      case OP_SCOND:
1318      localsize++;      common->private_data_ptrs[cc - common->start] = private_data_ptr;
1319      cc += 1 + LINK_SIZE;      private_data_ptr += sizeof(sljit_sw);
1320      break;      bracketlen = 1 + LINK_SIZE;
   
     case OP_CBRA:  
     case OP_SCBRA:  
     localsize++;  
     cc += 1 + LINK_SIZE + IMM2_SIZE;  
1321      break;      break;
1322    
1323      case OP_CBRAPOS:      case OP_CBRAPOS:
1324      case OP_SCBRAPOS:      case OP_SCBRAPOS:
1325      localsize += 2;      common->private_data_ptrs[cc - common->start] = private_data_ptr;
1326      cc += 1 + LINK_SIZE + IMM2_SIZE;      private_data_ptr += sizeof(sljit_sw);
1327        bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1328      break;      break;
1329    
1330      case OP_COND:      case OP_COND:
1331      /* Might be a hidden SCOND. */      /* Might be a hidden SCOND. */
1332      alternative = cc + GET(cc, 1);      alternative = cc + GET(cc, 1);
1333      if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)      if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1334        localsize++;        {
1335      cc += 1 + LINK_SIZE;        common->private_data_ptrs[cc - common->start] = private_data_ptr;
1336          private_data_ptr += sizeof(sljit_sw);
1337          }
1338        bracketlen = 1 + LINK_SIZE;
1339      break;      break;
1340    
1341      default:      case OP_BRA:
1342      cc = next_opcode(common, cc);      bracketlen = 1 + LINK_SIZE;
     SLJIT_ASSERT(cc != NULL);  
1343      break;      break;
     }  
   }  
 SLJIT_ASSERT(cc == ccend);  
 return localsize;  
 }  
1344    
1345  static void copy_locals(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend,      case OP_CBRA:
1346    BOOL save, int stackptr, int stacktop)      case OP_SCBRA:
1347  {      bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1348  DEFINE_COMPILER;      break;
 int srcw[2];  
 int count;  
 BOOL tmp1next = TRUE;  
 BOOL tmp1empty = TRUE;  
 BOOL tmp2empty = TRUE;  
 pcre_uchar *alternative;  
 enum {  
   start,  
   loop,  
   end  
 } status;  
1349    
1350  status = save ? start : loop;      case OP_BRAZERO:
1351  stackptr = STACK(stackptr - 2);      case OP_BRAMINZERO:
1352  stacktop = STACK(stacktop - 1);      case OP_BRAPOSZERO:
1353        repeat_check = FALSE;
1354        size = 1;
1355        break;
1356    
1357  if (!save)      CASE_ITERATOR_PRIVATE_DATA_1
1358    {      space = 1;
1359    stackptr += sizeof(sljit_w);      size = -2;
1360    if (stackptr < stacktop)      break;
     {  
     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);  
     stackptr += sizeof(sljit_w);  
     tmp1empty = FALSE;  
     }  
   if (stackptr < stacktop)  
     {  
     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);  
     stackptr += sizeof(sljit_w);  
     tmp2empty = FALSE;  
     }  
   /* The tmp1next must be TRUE in either way. */  
   }  
1361    
1362  while (status != end)      CASE_ITERATOR_PRIVATE_DATA_2A
1363    {      space = 2;
1364    count = 0;      size = -2;
   switch(status)  
     {  
     case start:  
     SLJIT_ASSERT(save && common->recursive_head != 0);  
     count = 1;  
     srcw[0] = common->recursive_head;  
     status = loop;  
1365      break;      break;
1366    
1367      case loop:      CASE_ITERATOR_PRIVATE_DATA_2B
1368      if (cc >= ccend)      space = 2;
1369        {      size = -(2 + IMM2_SIZE);
1370        status = end;      break;
       break;  
       }  
1371    
1372      switch(*cc)      CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1373        {      space = 1;
1374        case OP_ASSERT:      size = 1;
1375        case OP_ASSERT_NOT:      break;
       case OP_ASSERTBACK:  
       case OP_ASSERTBACK_NOT:  
       case OP_ONCE:  
       case OP_ONCE_NC:  
       case OP_BRAPOS:  
       case OP_SBRA:  
       case OP_SBRAPOS:  
       case OP_SCOND:  
       count = 1;  
       srcw[0] = PRIV_DATA(cc);  
       SLJIT_ASSERT(srcw[0] != 0);  
       cc += 1 + LINK_SIZE;  
       break;  
1376    
1377        case OP_CBRA:      CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1378        case OP_SCBRA:      if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
1379        count = 1;        space = 2;
1380        srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));      size = 1;
1381        cc += 1 + LINK_SIZE + IMM2_SIZE;      break;
       break;  
1382    
1383        case OP_CBRAPOS:      case OP_TYPEUPTO:
1384        case OP_SCBRAPOS:      if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
1385        count = 2;        space = 2;
1386        srcw[1] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));      size = 1 + IMM2_SIZE;
1387        srcw[0] = PRIV_DATA(cc);      break;
       SLJIT_ASSERT(srcw[0] != 0);  
       cc += 1 + LINK_SIZE + IMM2_SIZE;  
       break;  
1388    
1389        case OP_COND:      case OP_TYPEMINUPTO:
1390        /* Might be a hidden SCOND. */      space = 2;
1391        alternative = cc + GET(cc, 1);      size = 1 + IMM2_SIZE;
1392        if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)      break;
         {  
         count = 1;  
         srcw[0] = PRIV_DATA(cc);  
         SLJIT_ASSERT(srcw[0] != 0);  
         }  
       cc += 1 + LINK_SIZE;  
       break;  
1393    
1394        default:      case OP_CLASS:
1395        cc = next_opcode(common, cc);      case OP_NCLASS:
1396        SLJIT_ASSERT(cc != NULL);      size += 1 + 32 / sizeof(pcre_uchar);
1397        break;      space = get_class_iterator_size(cc + size);
       }  
1398      break;      break;
1399    
1400      case end:  #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1401      SLJIT_ASSERT_STOP();      case OP_XCLASS:
1402        size = GET(cc, 1);
1403        space = get_class_iterator_size(cc + size);
1404        break;
1405    #endif
1406    
1407        default:
1408        cc = next_opcode(common, cc);
1409        SLJIT_ASSERT(cc != NULL);
1410      break;      break;
1411      }      }
1412    
1413    while (count > 0)    /* Character iterators, which are not inside a repeated bracket,
1414         gets a private slot instead of allocating it on the stack. */
1415      if (space > 0 && cc >= end)
1416      {      {
1417      count--;      common->private_data_ptrs[cc - common->start] = private_data_ptr;
1418      if (save)      private_data_ptr += sizeof(sljit_sw) * space;
       {  
       if (tmp1next)  
         {  
         if (!tmp1empty)  
           {  
           OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);  
           stackptr += sizeof(sljit_w);  
           }  
         OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);  
         tmp1empty = FALSE;  
         tmp1next = FALSE;  
         }  
       else  
         {  
         if (!tmp2empty)  
           {  
           OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);  
           stackptr += sizeof(sljit_w);  
           }  
         OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);  
         tmp2empty = FALSE;  
         tmp1next = TRUE;  
         }  
       }  
     else  
       {  
       if (tmp1next)  
         {  
         SLJIT_ASSERT(!tmp1empty);  
         OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP1, 0);  
         tmp1empty = stackptr >= stacktop;  
         if (!tmp1empty)  
           {  
           OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);  
           stackptr += sizeof(sljit_w);  
           }  
         tmp1next = FALSE;  
         }  
       else  
         {  
         SLJIT_ASSERT(!tmp2empty);  
         OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP2, 0);  
         tmp2empty = stackptr >= stacktop;  
         if (!tmp2empty)  
           {  
           OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);  
           stackptr += sizeof(sljit_w);  
           }  
         tmp1next = TRUE;  
         }  
       }  
1419      }      }
   }  
1420    
1421  if (save)    if (size != 0)
   {  
   if (tmp1next)  
1422      {      {
1423      if (!tmp1empty)      if (size < 0)
       {  
       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);  
       stackptr += sizeof(sljit_w);  
       }  
     if (!tmp2empty)  
1424        {        {
1425        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);        cc += -size;
1426        stackptr += sizeof(sljit_w);  #ifdef SUPPORT_UTF
1427          if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1428    #endif
1429        }        }
1430        else
1431          cc += size;
1432      }      }
1433    else  
1434      if (bracketlen > 0)
1435      {      {
1436      if (!tmp2empty)      if (cc >= end)
       {  
       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);  
       stackptr += sizeof(sljit_w);  
       }  
     if (!tmp1empty)  
1437        {        {
1438        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);        end = bracketend(cc);
1439        stackptr += sizeof(sljit_w);        if (end[-1 - LINK_SIZE] == OP_KET)
1440            end = NULL;
1441        }        }
1442        cc += bracketlen;
1443      }      }
1444    }    }
1445  SLJIT_ASSERT(cc == ccend && stackptr == stacktop && (save || (tmp1empty && tmp2empty)));  *private_data_start = private_data_ptr;
 }  
   
 static SLJIT_INLINE BOOL ispowerof2(unsigned int value)  
 {  
 return (value & (value - 1)) == 0;  
 }  
   
 static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label)  
 {  
 while (list)  
   {  
   /* sljit_set_label is clever enough to do nothing  
   if either the jump or the label is NULL */  
   sljit_set_label(list->jump, label);  
   list = list->next;  
   }  
 }  
   
 static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump* jump)  
 {  
 jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list));  
 if (list_item)  
   {  
   list_item->next = *list;  
   list_item->jump = jump;  
   *list = list_item;  
   }  
1446  }  }
1447    
1448  static void add_stub(compiler_common *common, enum stub_types type, int data, struct sljit_jump *start)  /* Returns with a frame_types (always < 0) if no need for frame. */
1449    static int get_framesize(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL recursive, BOOL *needs_control_head)
1450  {  {
1451  DEFINE_COMPILER;  int length = 0;
1452  stub_list* list_item = sljit_alloc_memory(compiler, sizeof(stub_list));  int possessive = 0;
1453    BOOL stack_restore = FALSE;
1454  if (list_item)  BOOL setsom_found = recursive;
1455    {  BOOL setmark_found = recursive;
1456    list_item->type = type;  /* The last capture is a local variable even for recursions. */
1457    list_item->data = data;  BOOL capture_last_found = FALSE;
   list_item->start = start;  
   list_item->leave = LABEL();  
   list_item->next = common->stubs;  
   common->stubs = list_item;  
   }  
 }  
1458    
1459  static void flush_stubs(compiler_common *common)  #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
1460  {  SLJIT_ASSERT(common->control_head_ptr != 0);
1461  DEFINE_COMPILER;  *needs_control_head = TRUE;
1462  stub_list* list_item = common->stubs;  #else
1463    *needs_control_head = FALSE;
1464    #endif
1465    
1466  while (list_item)  if (ccend == NULL)
1467    {    {
1468    JUMPHERE(list_item->start);    ccend = bracketend(cc) - (1 + LINK_SIZE);
1469    switch(list_item->type)    if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))
1470      {      {
1471      case stack_alloc:      possessive = length = (common->capture_last_ptr != 0) ? 5 : 3;
1472      add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));      /* This is correct regardless of common->capture_last_ptr. */
1473      break;      capture_last_found = TRUE;
1474      }      }
1475    JUMPTO(SLJIT_JUMP, list_item->leave);    cc = next_opcode(common, cc);
   list_item = list_item->next;  
1476    }    }
 common->stubs = NULL;  
 }  
1477    
1478  static SLJIT_INLINE void decrease_call_count(compiler_common *common)  SLJIT_ASSERT(cc != NULL);
1479  {  while (cc < ccend)
1480  DEFINE_COMPILER;    switch(*cc)
1481        {
1482        case OP_SET_SOM:
1483        SLJIT_ASSERT(common->has_set_som);
1484        stack_restore = TRUE;
1485        if (!setsom_found)
1486          {
1487          length += 2;
1488          setsom_found = TRUE;
1489          }
1490        cc += 1;
1491        break;
1492    
1493        case OP_MARK:
1494        case OP_PRUNE_ARG:
1495        case OP_THEN_ARG:
1496        SLJIT_ASSERT(common->mark_ptr != 0);
1497        stack_restore = TRUE;
1498        if (!setmark_found)
1499          {
1500          length += 2;
1501          setmark_found = TRUE;
1502          }
1503        if (common->control_head_ptr != 0)
1504          *needs_control_head = TRUE;
1505        cc += 1 + 2 + cc[1];
1506        break;
1507    
1508        case OP_RECURSE:
1509        stack_restore = TRUE;
1510        if (common->has_set_som && !setsom_found)
1511          {
1512          length += 2;
1513          setsom_found = TRUE;
1514          }
1515        if (common->mark_ptr != 0 && !setmark_found)
1516          {
1517          length += 2;
1518          setmark_found = TRUE;
1519          }
1520        if (common->capture_last_ptr != 0 && !capture_last_found)
1521          {
1522          length += 2;
1523          capture_last_found = TRUE;
1524          }
1525        cc += 1 + LINK_SIZE;
1526        break;
1527    
1528        case OP_CBRA:
1529        case OP_CBRAPOS:
1530        case OP_SCBRA:
1531        case OP_SCBRAPOS:
1532        stack_restore = TRUE;
1533        if (common->capture_last_ptr != 0 && !capture_last_found)
1534          {
1535          length += 2;
1536          capture_last_found = TRUE;
1537          }
1538        length += 3;
1539        cc += 1 + LINK_SIZE + IMM2_SIZE;
1540        break;
1541    
1542        case OP_THEN:
1543        stack_restore = TRUE;
1544        if (common->control_head_ptr != 0)
1545          *needs_control_head = TRUE;
1546        cc ++;
1547        break;
1548    
1549        default:
1550        stack_restore = TRUE;
1551        /* Fall through. */
1552    
1553        case OP_NOT_WORD_BOUNDARY:
1554        case OP_WORD_BOUNDARY:
1555        case OP_NOT_DIGIT:
1556        case OP_DIGIT:
1557        case OP_NOT_WHITESPACE:
1558        case OP_WHITESPACE:
1559        case OP_NOT_WORDCHAR:
1560        case OP_WORDCHAR:
1561        case OP_ANY:
1562        case OP_ALLANY:
1563        case OP_ANYBYTE:
1564        case OP_NOTPROP:
1565        case OP_PROP:
1566        case OP_ANYNL:
1567        case OP_NOT_HSPACE:
1568        case OP_HSPACE:
1569        case OP_NOT_VSPACE:
1570        case OP_VSPACE:
1571        case OP_EXTUNI:
1572        case OP_EODN:
1573        case OP_EOD:
1574        case OP_CIRC:
1575        case OP_CIRCM:
1576        case OP_DOLL:
1577        case OP_DOLLM:
1578        case OP_CHAR:
1579        case OP_CHARI:
1580        case OP_NOT:
1581        case OP_NOTI:
1582    
1583        case OP_EXACT:
1584        case OP_POSSTAR:
1585        case OP_POSPLUS:
1586        case OP_POSQUERY:
1587        case OP_POSUPTO:
1588    
1589        case OP_EXACTI:
1590        case OP_POSSTARI:
1591        case OP_POSPLUSI:
1592        case OP_POSQUERYI:
1593        case OP_POSUPTOI:
1594    
1595        case OP_NOTEXACT:
1596        case OP_NOTPOSSTAR:
1597        case OP_NOTPOSPLUS:
1598        case OP_NOTPOSQUERY:
1599        case OP_NOTPOSUPTO:
1600    
1601        case OP_NOTEXACTI:
1602        case OP_NOTPOSSTARI:
1603        case OP_NOTPOSPLUSI:
1604        case OP_NOTPOSQUERYI:
1605        case OP_NOTPOSUPTOI:
1606    
1607        case OP_TYPEEXACT:
1608        case OP_TYPEPOSSTAR:
1609        case OP_TYPEPOSPLUS:
1610        case OP_TYPEPOSQUERY:
1611        case OP_TYPEPOSUPTO:
1612    
1613        case OP_CLASS:
1614        case OP_NCLASS:
1615        case OP_XCLASS:
1616        case OP_CALLOUT:
1617    
1618        cc = next_opcode(common, cc);
1619        SLJIT_ASSERT(cc != NULL);
1620        break;
1621        }
1622    
1623    /* Possessive quantifiers can use a special case. */
1624    if (SLJIT_UNLIKELY(possessive == length))
1625      return stack_restore ? no_frame : no_stack;
1626    
1627    if (length > 0)
1628      return length + 1;
1629    return stack_restore ? no_frame : no_stack;
1630    }
1631    
1632    static void init_frame(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, int stackpos, int stacktop, BOOL recursive)
1633    {
1634    DEFINE_COMPILER;
1635    BOOL setsom_found = recursive;
1636    BOOL setmark_found = recursive;
1637    /* The last capture is a local variable even for recursions. */
1638    BOOL capture_last_found = FALSE;
1639    int offset;
1640    
1641    /* >= 1 + shortest item size (2) */
1642    SLJIT_UNUSED_ARG(stacktop);
1643    SLJIT_ASSERT(stackpos >= stacktop + 2);
1644    
1645    stackpos = STACK(stackpos);
1646    if (ccend == NULL)
1647      {
1648      ccend = bracketend(cc) - (1 + LINK_SIZE);
1649      if (recursive || (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS))
1650        cc = next_opcode(common, cc);
1651      }
1652    
1653    SLJIT_ASSERT(cc != NULL);
1654    while (cc < ccend)
1655      switch(*cc)
1656        {
1657        case OP_SET_SOM:
1658        SLJIT_ASSERT(common->has_set_som);
1659        if (!setsom_found)
1660          {
1661          OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
1662          OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1663          stackpos += (int)sizeof(sljit_sw);
1664          OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1665          stackpos += (int)sizeof(sljit_sw);
1666          setsom_found = TRUE;
1667          }
1668        cc += 1;
1669        break;
1670    
1671        case OP_MARK:
1672        case OP_PRUNE_ARG:
1673        case OP_THEN_ARG:
1674        SLJIT_ASSERT(common->mark_ptr != 0);
1675        if (!setmark_found)
1676          {
1677          OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
1678          OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1679          stackpos += (int)sizeof(sljit_sw);
1680          OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1681          stackpos += (int)sizeof(sljit_sw);
1682          setmark_found = TRUE;
1683          }
1684        cc += 1 + 2 + cc[1];
1685        break;
1686    
1687        case OP_RECURSE:
1688        if (common->has_set_som && !setsom_found)
1689          {
1690          OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
1691          OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1692          stackpos += (int)sizeof(sljit_sw);
1693          OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1694          stackpos += (int)sizeof(sljit_sw);
1695          setsom_found = TRUE;
1696          }
1697        if (common->mark_ptr != 0 && !setmark_found)
1698          {
1699          OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
1700          OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1701          stackpos += (int)sizeof(sljit_sw);
1702          OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1703          stackpos += (int)sizeof(sljit_sw);
1704          setmark_found = TRUE;
1705          }
1706        if (common->capture_last_ptr != 0 && !capture_last_found)
1707          {
1708          OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
1709          OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1710          stackpos += (int)sizeof(sljit_sw);
1711          OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1712          stackpos += (int)sizeof(sljit_sw);
1713          capture_last_found = TRUE;
1714          }
1715        cc += 1 + LINK_SIZE;
1716        break;
1717    
1718        case OP_CBRA:
1719        case OP_CBRAPOS:
1720        case OP_SCBRA:
1721        case OP_SCBRAPOS:
1722        if (common->capture_last_ptr != 0 && !capture_last_found)
1723          {
1724          OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
1725          OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1726          stackpos += (int)sizeof(sljit_sw);
1727          OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1728          stackpos += (int)sizeof(sljit_sw);
1729          capture_last_found = TRUE;
1730          }
1731        offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
1732        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));
1733        stackpos += (int)sizeof(sljit_sw);
1734        OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
1735        OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
1736        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1737        stackpos += (int)sizeof(sljit_sw);
1738        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
1739        stackpos += (int)sizeof(sljit_sw);
1740    
1741        cc += 1 + LINK_SIZE + IMM2_SIZE;
1742        break;
1743    
1744        default:
1745        cc = next_opcode(common, cc);
1746        SLJIT_ASSERT(cc != NULL);
1747        break;
1748        }
1749    
1750    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, 0);
1751    SLJIT_ASSERT(stackpos == STACK(stacktop));
1752    }
1753    
1754    static SLJIT_INLINE int get_private_data_copy_length(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL needs_control_head)
1755    {
1756    int private_data_length = needs_control_head ? 3 : 2;
1757    int size;
1758    pcre_uchar *alternative;
1759    /* Calculate the sum of the private machine words. */
1760    while (cc < ccend)
1761      {
1762      size = 0;
1763      switch(*cc)
1764        {
1765        case OP_KET:
1766        if (PRIVATE_DATA(cc) != 0)
1767          {
1768          private_data_length++;
1769          SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0);
1770          cc += PRIVATE_DATA(cc + 1);
1771          }
1772        cc += 1 + LINK_SIZE;
1773        break;
1774    
1775        case OP_ASSERT:
1776        case OP_ASSERT_NOT:
1777        case OP_ASSERTBACK:
1778        case OP_ASSERTBACK_NOT:
1779        case OP_ONCE:
1780        case OP_ONCE_NC:
1781        case OP_BRAPOS:
1782        case OP_SBRA:
1783        case OP_SBRAPOS:
1784        case OP_SCOND:
1785        private_data_length++;
1786        SLJIT_ASSERT(PRIVATE_DATA(cc) != 0);
1787        cc += 1 + LINK_SIZE;
1788        break;
1789    
1790        case OP_CBRA:
1791        case OP_SCBRA:
1792        if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1793          private_data_length++;
1794        cc += 1 + LINK_SIZE + IMM2_SIZE;
1795        break;
1796    
1797        case OP_CBRAPOS:
1798        case OP_SCBRAPOS:
1799        private_data_length += 2;
1800        cc += 1 + LINK_SIZE + IMM2_SIZE;
1801        break;
1802    
1803        case OP_COND:
1804        /* Might be a hidden SCOND. */
1805        alternative = cc + GET(cc, 1);
1806        if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1807          private_data_length++;
1808        cc += 1 + LINK_SIZE;
1809        break;
1810    
1811        CASE_ITERATOR_PRIVATE_DATA_1
1812        if (PRIVATE_DATA(cc))
1813          private_data_length++;
1814        cc += 2;
1815    #ifdef SUPPORT_UTF
1816        if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1817    #endif
1818        break;
1819    
1820        CASE_ITERATOR_PRIVATE_DATA_2A
1821        if (PRIVATE_DATA(cc))
1822          private_data_length += 2;
1823        cc += 2;
1824    #ifdef SUPPORT_UTF
1825        if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1826    #endif
1827        break;
1828    
1829        CASE_ITERATOR_PRIVATE_DATA_2B
1830        if (PRIVATE_DATA(cc))
1831          private_data_length += 2;
1832        cc += 2 + IMM2_SIZE;
1833    #ifdef SUPPORT_UTF
1834        if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1835    #endif
1836        break;
1837    
1838        CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1839        if (PRIVATE_DATA(cc))
1840          private_data_length++;
1841        cc += 1;
1842        break;
1843    
1844        CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1845        if (PRIVATE_DATA(cc))
1846          private_data_length += 2;
1847        cc += 1;
1848        break;
1849    
1850        CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1851        if (PRIVATE_DATA(cc))
1852          private_data_length += 2;
1853        cc += 1 + IMM2_SIZE;
1854        break;
1855    
1856        case OP_CLASS:
1857        case OP_NCLASS:
1858    #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1859        case OP_XCLASS:
1860        size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1861    #else
1862        size = 1 + 32 / (int)sizeof(pcre_uchar);
1863    #endif
1864        if (PRIVATE_DATA(cc))
1865          private_data_length += get_class_iterator_size(cc + size);
1866        cc += size;
1867        break;
1868    
1869        default:
1870        cc = next_opcode(common, cc);
1871        SLJIT_ASSERT(cc != NULL);
1872        break;
1873        }
1874      }
1875    SLJIT_ASSERT(cc == ccend);
1876    return private_data_length;
1877    }
1878    
1879    static void copy_private_data(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend,
1880      BOOL save, int stackptr, int stacktop, BOOL needs_control_head)
1881    {
1882    DEFINE_COMPILER;
1883    int srcw[2];
1884    int count, size;
1885    BOOL tmp1next = TRUE;
1886    BOOL tmp1empty = TRUE;
1887    BOOL tmp2empty = TRUE;
1888    pcre_uchar *alternative;
1889    enum {
1890      start,
1891      loop,
1892      end
1893    } status;
1894    
1895    status = save ? start : loop;
1896    stackptr = STACK(stackptr - 2);
1897    stacktop = STACK(stacktop - 1);
1898    
1899    if (!save)
1900      {
1901      stackptr += (needs_control_head ? 2 : 1) * sizeof(sljit_sw);
1902      if (stackptr < stacktop)
1903        {
1904        OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1905        stackptr += sizeof(sljit_sw);
1906        tmp1empty = FALSE;
1907        }
1908      if (stackptr < stacktop)
1909        {
1910        OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1911        stackptr += sizeof(sljit_sw);
1912        tmp2empty = FALSE;
1913        }
1914      /* The tmp1next must be TRUE in either way. */
1915      }
1916    
1917    do
1918      {
1919      count = 0;
1920      switch(status)
1921        {
1922        case start:
1923        SLJIT_ASSERT(save && common->recursive_head_ptr != 0);
1924        count = 1;
1925        srcw[0] = common->recursive_head_ptr;
1926        if (needs_control_head)
1927          {
1928          SLJIT_ASSERT(common->control_head_ptr != 0);
1929          count = 2;
1930          srcw[1] = common->control_head_ptr;
1931          }
1932        status = loop;
1933        break;
1934    
1935        case loop:
1936        if (cc >= ccend)
1937          {
1938          status = end;
1939          break;
1940          }
1941    
1942        switch(*cc)
1943          {
1944          case OP_KET:
1945          if (PRIVATE_DATA(cc) != 0)
1946            {
1947            count = 1;
1948            srcw[0] = PRIVATE_DATA(cc);
1949            SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0);
1950            cc += PRIVATE_DATA(cc + 1);
1951            }
1952          cc += 1 + LINK_SIZE;
1953          break;
1954    
1955          case OP_ASSERT:
1956          case OP_ASSERT_NOT:
1957          case OP_ASSERTBACK:
1958          case OP_ASSERTBACK_NOT:
1959          case OP_ONCE:
1960          case OP_ONCE_NC:
1961          case OP_BRAPOS:
1962          case OP_SBRA:
1963          case OP_SBRAPOS:
1964          case OP_SCOND:
1965          count = 1;
1966          srcw[0] = PRIVATE_DATA(cc);
1967          SLJIT_ASSERT(srcw[0] != 0);
1968          cc += 1 + LINK_SIZE;
1969          break;
1970    
1971          case OP_CBRA:
1972          case OP_SCBRA:
1973          if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1974            {
1975            count = 1;
1976            srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1977            }
1978          cc += 1 + LINK_SIZE + IMM2_SIZE;
1979          break;
1980    
1981          case OP_CBRAPOS:
1982          case OP_SCBRAPOS:
1983          count = 2;
1984          srcw[0] = PRIVATE_DATA(cc);
1985          srcw[1] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1986          SLJIT_ASSERT(srcw[0] != 0 && srcw[1] != 0);
1987          cc += 1 + LINK_SIZE + IMM2_SIZE;
1988          break;
1989    
1990          case OP_COND:
1991          /* Might be a hidden SCOND. */
1992          alternative = cc + GET(cc, 1);
1993          if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1994            {
1995            count = 1;
1996            srcw[0] = PRIVATE_DATA(cc);
1997            SLJIT_ASSERT(srcw[0] != 0);
1998            }
1999          cc += 1 + LINK_SIZE;
2000          break;
2001    
2002          CASE_ITERATOR_PRIVATE_DATA_1
2003          if (PRIVATE_DATA(cc))
2004            {
2005            count = 1;
2006            srcw[0] = PRIVATE_DATA(cc);
2007            }
2008          cc += 2;
2009    #ifdef SUPPORT_UTF
2010          if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2011    #endif
2012          break;
2013    
2014          CASE_ITERATOR_PRIVATE_DATA_2A
2015          if (PRIVATE_DATA(cc))
2016            {
2017            count = 2;
2018            srcw[0] = PRIVATE_DATA(cc);
2019            srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
2020            }
2021          cc += 2;
2022    #ifdef SUPPORT_UTF
2023          if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2024    #endif
2025          break;
2026    
2027          CASE_ITERATOR_PRIVATE_DATA_2B
2028          if (PRIVATE_DATA(cc))
2029            {
2030            count = 2;
2031            srcw[0] = PRIVATE_DATA(cc);
2032            srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
2033            }
2034          cc += 2 + IMM2_SIZE;
2035    #ifdef SUPPORT_UTF
2036          if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2037    #endif
2038          break;
2039    
2040          CASE_ITERATOR_TYPE_PRIVATE_DATA_1
2041          if (PRIVATE_DATA(cc))
2042            {
2043            count = 1;
2044            srcw[0] = PRIVATE_DATA(cc);
2045            }
2046          cc += 1;
2047          break;
2048    
2049          CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
2050          if (PRIVATE_DATA(cc))
2051            {
2052            count = 2;
2053            srcw[0] = PRIVATE_DATA(cc);
2054            srcw[1] = srcw[0] + sizeof(sljit_sw);
2055            }
2056          cc += 1;
2057          break;
2058    
2059          CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
2060          if (PRIVATE_DATA(cc))
2061            {
2062            count = 2;
2063            srcw[0] = PRIVATE_DATA(cc);
2064            srcw[1] = srcw[0] + sizeof(sljit_sw);
2065            }
2066          cc += 1 + IMM2_SIZE;
2067          break;
2068    
2069          case OP_CLASS:
2070          case OP_NCLASS:
2071    #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
2072          case OP_XCLASS:
2073          size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
2074    #else
2075          size = 1 + 32 / (int)sizeof(pcre_uchar);
2076    #endif
2077          if (PRIVATE_DATA(cc))
2078            switch(get_class_iterator_size(cc + size))
2079              {
2080              case 1:
2081              count = 1;
2082              srcw[0] = PRIVATE_DATA(cc);
2083              break;
2084    
2085              case 2:
2086              count = 2;
2087              srcw[0] = PRIVATE_DATA(cc);
2088              srcw[1] = srcw[0] + sizeof(sljit_sw);
2089              break;
2090    
2091              default:
2092              SLJIT_ASSERT_STOP();
2093              break;
2094              }
2095          cc += size;
2096          break;
2097    
2098          default:
2099          cc = next_opcode(common, cc);
2100          SLJIT_ASSERT(cc != NULL);
2101          break;
2102          }
2103        break;
2104    
2105        case end:
2106        SLJIT_ASSERT_STOP();
2107        break;
2108        }
2109    
2110      while (count > 0)
2111        {
2112        count--;
2113        if (save)
2114          {
2115          if (tmp1next)
2116            {
2117            if (!tmp1empty)
2118              {
2119              OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
2120              stackptr += sizeof(sljit_sw);
2121              }
2122            OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), srcw[count]);
2123            tmp1empty = FALSE;
2124            tmp1next = FALSE;
2125            }
2126          else
2127            {
2128            if (!tmp2empty)
2129              {
2130              OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
2131              stackptr += sizeof(sljit_sw);
2132              }
2133            OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), srcw[count]);
2134            tmp2empty = FALSE;
2135            tmp1next = TRUE;
2136            }
2137          }
2138        else
2139          {
2140          if (tmp1next)
2141            {
2142            SLJIT_ASSERT(!tmp1empty);
2143            OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), srcw[count], TMP1, 0);
2144            tmp1empty = stackptr >= stacktop;
2145            if (!tmp1empty)
2146              {
2147              OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
2148              stackptr += sizeof(sljit_sw);
2149              }
2150            tmp1next = FALSE;
2151            }
2152          else
2153            {
2154            SLJIT_ASSERT(!tmp2empty);
2155            OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), srcw[count], TMP2, 0);
2156            tmp2empty = stackptr >= stacktop;
2157            if (!tmp2empty)
2158              {
2159              OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
2160              stackptr += sizeof(sljit_sw);
2161              }
2162            tmp1next = TRUE;
2163            }
2164          }
2165        }
2166      }
2167    while (status != end);
2168    
2169    if (save)
2170      {
2171      if (tmp1next)
2172        {
2173        if (!tmp1empty)
2174          {
2175          OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
2176          stackptr += sizeof(sljit_sw);
2177          }
2178        if (!tmp2empty)
2179          {
2180          OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
2181          stackptr += sizeof(sljit_sw);
2182          }
2183        }
2184      else
2185        {
2186        if (!tmp2empty)
2187          {
2188          OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
2189          stackptr += sizeof(sljit_sw);
2190          }
2191        if (!tmp1empty)
2192          {
2193          OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
2194          stackptr += sizeof(sljit_sw);
2195          }
2196        }
2197      }
2198    SLJIT_ASSERT(cc == ccend && stackptr == stacktop && (save || (tmp1empty && tmp2empty)));
2199    }
2200    
2201    static SLJIT_INLINE pcre_uchar *set_then_offsets(compiler_common *common, pcre_uchar *cc, pcre_uint8 *current_offset)
2202    {
2203    pcre_uchar *end = bracketend(cc);
2204    BOOL has_alternatives = cc[GET(cc, 1)] == OP_ALT;
2205    
2206    /* Assert captures then. */
2207    if (*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT)
2208      current_offset = NULL;
2209    /* Conditional block does not. */
2210    if (*cc == OP_COND || *cc == OP_SCOND)
2211      has_alternatives = FALSE;
2212    
2213    cc = next_opcode(common, cc);
2214    if (has_alternatives)
2215      current_offset = common->then_offsets + (cc - common->start);
2216    
2217    while (cc < end)
2218      {
2219      if ((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND))
2220        cc = set_then_offsets(common, cc, current_offset);
2221      else
2222        {
2223        if (*cc == OP_ALT && has_alternatives)
2224          current_offset = common->then_offsets + (cc + 1 + LINK_SIZE - common->start);
2225        if (*cc >= OP_THEN && *cc <= OP_THEN_ARG && current_offset != NULL)
2226          *current_offset = 1;
2227        cc = next_opcode(common, cc);
2228        }
2229      }
2230    
2231    return end;
2232    }
2233    
2234    #undef CASE_ITERATOR_PRIVATE_DATA_1
2235    #undef CASE_ITERATOR_PRIVATE_DATA_2A
2236    #undef CASE_ITERATOR_PRIVATE_DATA_2B
2237    #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_1
2238    #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
2239    #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
2240    
2241    static SLJIT_INLINE BOOL is_powerof2(unsigned int value)
2242    {
2243    return (value & (value - 1)) == 0;
2244    }
2245    
2246    static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label)
2247    {
2248    while (list)
2249      {
2250      /* sljit_set_label is clever enough to do nothing
2251      if either the jump or the label is NULL. */
2252      SET_LABEL(list->jump, label);
2253      list = list->next;
2254      }
2255    }
2256    
2257    static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump *jump)
2258    {
2259    jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list));
2260    if (list_item)
2261      {
2262      list_item->next = *list;
2263      list_item->jump = jump;
2264      *list = list_item;
2265      }
2266    }
2267    
2268    static void add_stub(compiler_common *common, struct sljit_jump *start)
2269    {
2270    DEFINE_COMPILER;
2271    stub_list *list_item = sljit_alloc_memory(compiler, sizeof(stub_list));
2272    
2273    if (list_item)
2274      {
2275      list_item->start = start;
2276      list_item->quit = LABEL();
2277      list_item->next = common->stubs;
2278      common->stubs = list_item;
2279      }
2280    }
2281    
2282    static void flush_stubs(compiler_common *common)
2283    {
2284    DEFINE_COMPILER;
2285    stub_list *list_item = common->stubs;
2286    
2287    while (list_item)
2288      {
2289      JUMPHERE(list_item->start);
2290      add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));
2291      JUMPTO(SLJIT_JUMP, list_item->quit);
2292      list_item = list_item->next;
2293      }
2294    common->stubs = NULL;
2295    }
2296    
2297    static void add_label_addr(compiler_common *common, sljit_uw *update_addr)
2298    {
2299    DEFINE_COMPILER;
2300    label_addr_list *label_addr;
2301    
2302    label_addr = sljit_alloc_memory(compiler, sizeof(label_addr_list));
2303    if (label_addr == NULL)
2304      return;
2305    label_addr->label = LABEL();
2306    label_addr->update_addr = update_addr;
2307    label_addr->next = common->label_addrs;
2308    common->label_addrs = label_addr;
2309    }
2310    
2311    static SLJIT_INLINE void count_match(compiler_common *common)
2312    {
2313    DEFINE_COMPILER;
2314    
2315    OP2(SLJIT_SUB | SLJIT_SET_E, COUNT_MATCH, 0, COUNT_MATCH, 0, SLJIT_IMM, 1);
2316    add_jump(compiler, &common->calllimit, JUMP(SLJIT_ZERO));
2317    }
2318    
2319    static SLJIT_INLINE void allocate_stack(compiler_common *common, int size)
2320    {
2321    /* May destroy all locals and registers except TMP2. */
2322    DEFINE_COMPILER;
2323    
2324    SLJIT_ASSERT(size > 0);
2325    OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
2326    #ifdef DESTROY_REGISTERS
2327    OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);
2328    OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
2329    OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
2330    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, TMP1, 0);
2331    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP1, 0);
2332    #endif
2333    add_stub(common, CMP(SLJIT_GREATER, STACK_TOP, 0, STACK_LIMIT, 0));
2334    }
2335    
2336    static SLJIT_INLINE void free_stack(compiler_common *common, int size)
2337    {
2338    DEFINE_COMPILER;
2339    
2340    SLJIT_ASSERT(size > 0);
2341    OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
2342    }
2343    
2344    static sljit_uw * allocate_read_only_data(compiler_common *common, sljit_uw size)
2345    {
2346    DEFINE_COMPILER;
2347    sljit_uw *result;
2348    
2349    if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
2350      return NULL;
2351    
2352    result = (sljit_uw *)SLJIT_MALLOC(size + sizeof(sljit_uw), compiler->allocator_data);
2353    if (SLJIT_UNLIKELY(result == NULL))
2354      {
2355      sljit_set_compiler_memory_error(compiler);
2356      return NULL;
2357      }
2358    
2359    *(void**)result = common->read_only_data_head;
2360    common->read_only_data_head = (void *)result;
2361    return result + 1;
2362    }
2363    
2364    static void free_read_only_data(void *current, void *allocator_data)
2365    {
2366    void *next;
2367    
2368    SLJIT_UNUSED_ARG(allocator_data);
2369    
2370    while (current != NULL)
2371      {
2372      next = *(void**)current;
2373      SLJIT_FREE(current, allocator_data);
2374      current = next;
2375      }
2376    }
2377    
2378    static SLJIT_INLINE void reset_ovector(compiler_common *common, int length)
2379    {
2380    DEFINE_COMPILER;
2381    struct sljit_label *loop;
2382    int i;
2383    
2384    /* At this point we can freely use all temporary registers. */
2385    SLJIT_ASSERT(length > 1);
2386    /* TMP1 returns with begin - 1. */
2387    OP2(SLJIT_SUB, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_S0), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));
2388    if (length < 8)
2389      {
2390      for (i = 1; i < length; i++)
2391        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), SLJIT_R0, 0);
2392      }
2393    else
2394      {
2395      GET_LOCAL_BASE(SLJIT_R1, 0, OVECTOR_START);
2396      OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, length - 1);
2397      loop = LABEL();
2398      OP1(SLJIT_MOVU, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw), SLJIT_R0, 0);
2399      OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, 1);
2400      JUMPTO(SLJIT_NOT_ZERO, loop);
2401      }
2402    }
2403    
2404    static SLJIT_INLINE void reset_fast_fail(compiler_common *common)
2405    {
2406    DEFINE_COMPILER;
2407    sljit_s32 i;
2408    
2409    SLJIT_ASSERT(common->fast_fail_start_ptr < common->fast_fail_end_ptr);
2410    
2411    OP2(SLJIT_SUB, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2412    for (i = common->fast_fail_start_ptr; i < common->fast_fail_end_ptr; i += sizeof(sljit_sw))
2413      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), i, TMP1, 0);
2414    }
2415    
2416    static SLJIT_INLINE void do_reset_match(compiler_common *common, int length)
2417    {
2418    DEFINE_COMPILER;
2419    struct sljit_label *loop;
2420    int i;
2421    
2422    SLJIT_ASSERT(length > 1);
2423    /* OVECTOR(1) contains the "string begin - 1" constant. */
2424    if (length > 2)
2425      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
2426    if (length < 8)
2427      {
2428      for (i = 2; i < length; i++)
2429        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), TMP1, 0);
2430      }
2431    else
2432      {
2433      GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + sizeof(sljit_sw));
2434      OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
2435      loop = LABEL();
2436      OP1(SLJIT_MOVU, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
2437      OP2(SLJIT_SUB | SLJIT_SET_E, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
2438      JUMPTO(SLJIT_NOT_ZERO, loop);
2439      }
2440    
2441    OP1(SLJIT_MOV, STACK_TOP, 0, ARGUMENTS, 0);
2442    if (common->mark_ptr != 0)
2443      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, SLJIT_IMM, 0);
2444    if (common->control_head_ptr != 0)
2445      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
2446    OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(jit_arguments, stack));
2447    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
2448    OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(struct sljit_stack, base));
2449    }
2450    
2451    static sljit_sw SLJIT_CALL do_search_mark(sljit_sw *current, const pcre_uchar *skip_arg)
2452    {
2453    while (current != NULL)
2454      {
2455      switch (current[-2])
2456        {
2457        case type_then_trap:
2458        break;
2459    
2460        case type_mark:
2461        if (STRCMP_UC_UC(skip_arg, (pcre_uchar *)current[-3]) == 0)
2462          return current[-4];
2463        break;
2464    
2465        default:
2466        SLJIT_ASSERT_STOP();
2467        break;
2468        }
2469      SLJIT_ASSERT(current > (sljit_sw*)current[-1]);
2470      current = (sljit_sw*)current[-1];
2471      }
2472    return -1;
2473    }
2474    
2475    static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)
2476    {
2477    DEFINE_COMPILER;
2478    struct sljit_label *loop;
2479    struct sljit_jump *early_quit;
2480    
2481    /* At this point we can freely use all registers. */
2482    OP1(SLJIT_MOV, SLJIT_S2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
2483    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(1), STR_PTR, 0);
2484    
2485    OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0);
2486    if (common->mark_ptr != 0)
2487      OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
2488    OP1(SLJIT_MOV_S32, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, offset_count));
2489    if (common->mark_ptr != 0)
2490      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_R2, 0);
2491    OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, offsets), SLJIT_IMM, sizeof(int));
2492    OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, begin));
2493    GET_LOCAL_BASE(SLJIT_S0, 0, OVECTOR_START);
2494    /* Unlikely, but possible */
2495    early_quit = CMP(SLJIT_EQUAL, SLJIT_R1, 0, SLJIT_IMM, 0);
2496    loop = LABEL();
2497    OP2(SLJIT_SUB, SLJIT_S1, 0, SLJIT_MEM1(SLJIT_S0), 0, SLJIT_R0, 0);
2498    OP2(SLJIT_ADD, SLJIT_S0, 0, SLJIT_S0, 0, SLJIT_IMM, sizeof(sljit_sw));
2499    /* Copy the integer value to the output buffer */
2500    #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2501    OP2(SLJIT_ASHR, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_IMM, UCHAR_SHIFT);
2502    #endif
2503    OP1(SLJIT_MOVU_S32, SLJIT_MEM1(SLJIT_R2), sizeof(int), SLJIT_S1, 0);
2504    OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
2505    JUMPTO(SLJIT_NOT_ZERO, loop);
2506    JUMPHERE(early_quit);
2507    
2508    /* Calculate the return value, which is the maximum ovector value. */
2509    if (topbracket > 1)
2510      {
2511      GET_LOCAL_BASE(SLJIT_R0, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_sw));
2512      OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, topbracket + 1);
2513    
2514      /* OVECTOR(0) is never equal to SLJIT_S2. */
2515      loop = LABEL();
2516      OP1(SLJIT_MOVU, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), -(2 * (sljit_sw)sizeof(sljit_sw)));
2517      OP2(SLJIT_SUB, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
2518      CMPTO(SLJIT_EQUAL, SLJIT_R2, 0, SLJIT_S2, 0, loop);
2519      OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_R1, 0);
2520      }
2521    else
2522      OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
2523    }
2524    
2525    static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *quit)
2526    {
2527    DEFINE_COMPILER;
2528    struct sljit_jump *jump;
2529    
2530    SLJIT_COMPILE_ASSERT(STR_END == SLJIT_S1, str_end_must_be_saved_reg2);
2531    SLJIT_ASSERT(common->start_used_ptr != 0 && common->start_ptr != 0
2532      && (common->mode == JIT_PARTIAL_SOFT_COMPILE ? common->hit_start != 0 : common->hit_start == 0));
2533    
2534    OP1(SLJIT_MOV, SLJIT_R1, 0, ARGUMENTS, 0);
2535    OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_PARTIAL);
2536    OP1(SLJIT_MOV_S32, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, real_offset_count));
2537    CMPTO(SLJIT_SIG_LESS, SLJIT_R2, 0, SLJIT_IMM, 2, quit);
2538    
2539    /* Store match begin and end. */
2540    OP1(SLJIT_MOV, SLJIT_S0, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, begin));
2541    OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, offsets));
2542    
2543    jump = CMP(SLJIT_SIG_LESS, SLJIT_R2, 0, SLJIT_IMM, 3);
2544    OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_ptr : (common->hit_start + (int)sizeof(sljit_sw)), SLJIT_S0, 0);
2545    #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2546    OP2(SLJIT_ASHR, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, UCHAR_SHIFT);
2547    #endif
2548    OP1(SLJIT_MOV_S32, SLJIT_MEM1(SLJIT_R1), 2 * sizeof(int), SLJIT_R2, 0);
2549    JUMPHERE(jump);
2550    
2551    OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_used_ptr : common->hit_start);
2552    OP2(SLJIT_SUB, SLJIT_S1, 0, STR_END, 0, SLJIT_S0, 0);
2553    #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2554    OP2(SLJIT_ASHR, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_IMM, UCHAR_SHIFT);
2555    #endif
2556    OP1(SLJIT_MOV_S32, SLJIT_MEM1(SLJIT_R1), sizeof(int), SLJIT_S1, 0);
2557    
2558    OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_S0, 0);
2559    #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2560    OP2(SLJIT_ASHR, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, UCHAR_SHIFT);
2561    #endif
2562    OP1(SLJIT_MOV_S32, SLJIT_MEM1(SLJIT_R1), 0, SLJIT_R2, 0);
2563    
2564    JUMPTO(SLJIT_JUMP, quit);
2565    }
2566    
2567    static SLJIT_INLINE void check_start_used_ptr(compiler_common *common)
2568    {
2569    /* May destroy TMP1. */
2570    DEFINE_COMPILER;
2571    struct sljit_jump *jump;
2572    
2573    if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2574      {
2575      /* The value of -1 must be kept for start_used_ptr! */
2576      OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, 1);
2577      /* Jumps if start_used_ptr < STR_PTR, or start_used_ptr == -1. Although overwriting
2578      is not necessary if start_used_ptr == STR_PTR, it does not hurt as well. */
2579      jump = CMP(SLJIT_LESS_EQUAL, TMP1, 0, STR_PTR, 0);
2580      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2581      JUMPHERE(jump);
2582      }
2583    else if (common->mode == JIT_PARTIAL_HARD_COMPILE)
2584      {
2585      jump = CMP(SLJIT_LESS_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2586      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2587      JUMPHERE(jump);
2588      }
2589    }
2590    
2591    static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, pcre_uchar *cc)
2592    {
2593    /* Detects if the character has an othercase. */
2594    unsigned int c;
2595    
2596    #ifdef SUPPORT_UTF
2597    if (common->utf)
2598      {
2599      GETCHAR(c, cc);
2600      if (c > 127)
2601        {
2602    #ifdef SUPPORT_UCP
2603        return c != UCD_OTHERCASE(c);
2604    #else
2605        return FALSE;
2606    #endif
2607        }
2608    #ifndef COMPILE_PCRE8
2609      return common->fcc[c] != c;
2610    #endif
2611      }
2612    else
2613    #endif
2614      c = *cc;
2615    return MAX_255(c) ? common->fcc[c] != c : FALSE;
2616    }
2617    
2618    static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)
2619    {
2620    /* Returns with the othercase. */
2621    #ifdef SUPPORT_UTF
2622    if (common->utf && c > 127)
2623      {
2624    #ifdef SUPPORT_UCP
2625      return UCD_OTHERCASE(c);
2626    #else
2627      return c;
2628    #endif
2629      }
2630    #endif
2631    return TABLE_GET(c, common->fcc, c);
2632    }
2633    
2634    static unsigned int char_get_othercase_bit(compiler_common *common, pcre_uchar *cc)
2635    {
2636    /* Detects if the character and its othercase has only 1 bit difference. */
2637    unsigned int c, oc, bit;
2638    #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2639    int n;
2640    #endif
2641    
2642    #ifdef SUPPORT_UTF
2643    if (common->utf)
2644      {
2645      GETCHAR(c, cc);
2646      if (c <= 127)
2647        oc = common->fcc[c];
2648      else
2649        {
2650    #ifdef SUPPORT_UCP
2651        oc = UCD_OTHERCASE(c);
2652    #else
2653        oc = c;
2654    #endif
2655        }
2656      }
2657    else
2658      {
2659      c = *cc;
2660      oc = TABLE_GET(c, common->fcc, c);
2661      }
2662    #else
2663    c = *cc;
2664    oc = TABLE_GET(c, common->fcc, c);
2665    #endif
2666    
2667    SLJIT_ASSERT(c != oc);
2668    
2669    bit = c ^ oc;
2670    /* Optimized for English alphabet. */
2671    if (c <= 127 && bit == 0x20)
2672      return (0 << 8) | 0x20;
2673    
2674    /* Since c != oc, they must have at least 1 bit difference. */
2675    if (!is_powerof2(bit))
2676      return 0;
2677    
2678    #if defined COMPILE_PCRE8
2679    
2680    #ifdef SUPPORT_UTF
2681    if (common->utf && c > 127)
2682      {
2683      n = GET_EXTRALEN(*cc);
2684      while ((bit & 0x3f) == 0)
2685        {
2686        n--;
2687        bit >>= 6;
2688        }
2689      return (n << 8) | bit;
2690      }
2691    #endif /* SUPPORT_UTF */
2692    return (0 << 8) | bit;
2693    
2694    #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2695    
2696    #ifdef SUPPORT_UTF
2697    if (common->utf && c > 65535)
2698      {
2699      if (bit >= (1 << 10))
2700        bit >>= 10;
2701      else
2702        return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8));
2703      }
2704    #endif /* SUPPORT_UTF */
2705    return (bit < 256) ? ((0 << 8) | bit) : ((1 << 8) | (bit >> 8));
2706    
2707    #endif /* COMPILE_PCRE[8|16|32] */
2708    }
2709    
2710    static void check_partial(compiler_common *common, BOOL force)
2711    {
2712    /* Checks whether a partial matching is occurred. Does not modify registers. */
2713    DEFINE_COMPILER;
2714    struct sljit_jump *jump = NULL;
2715    
2716    SLJIT_ASSERT(!force || common->mode != JIT_COMPILE);
2717    
2718    if (common->mode == JIT_COMPILE)
2719      return;
2720    
2721    if (!force)
2722      jump = CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2723    else if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2724      jump = CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1);
2725    
2726    if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2727      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
2728    else
2729      {
2730      if (common->partialmatchlabel != NULL)
2731        JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2732      else
2733        add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2734      }
2735    
2736    if (jump != NULL)
2737      JUMPHERE(jump);
2738    }
2739    
2740    static void check_str_end(compiler_common *common, jump_list **end_reached)
2741    {
2742    /* Does not affect registers. Usually used in a tight spot. */
2743    DEFINE_COMPILER;
2744    struct sljit_jump *jump;
2745    
2746    if (common->mode == JIT_COMPILE)
2747      {
2748      add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2749      return;
2750      }
2751    
2752    jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
2753    if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2754      {
2755      add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
2756      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
2757      add_jump(compiler, end_reached, JUMP(SLJIT_JUMP));
2758      }
2759    else
2760      {
2761      add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
2762      if (common->partialmatchlabel != NULL)
2763        JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2764      else
2765        add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2766      }
2767    JUMPHERE(jump);
2768    }
2769    
2770    static void detect_partial_match(compiler_common *common, jump_list **backtracks)
2771    {
2772    DEFINE_COMPILER;
2773    struct sljit_jump *jump;
2774    
2775    if (common->mode == JIT_COMPILE)
2776      {
2777      add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2778      return;
2779      }
2780    
2781    /* Partial matching mode. */
2782    jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
2783    add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
2784    if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2785      {
2786      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
2787      add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
2788      }
2789    else
2790      {
2791      if (common->partialmatchlabel != NULL)
2792        JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2793      else
2794        add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2795      }
2796    JUMPHERE(jump);
2797    }
2798    
2799    static void peek_char(compiler_common *common, sljit_u32 max)
2800    {
2801    /* Reads the character into TMP1, keeps STR_PTR.
2802    Does not check STR_END. TMP2 Destroyed. */
2803    DEFINE_COMPILER;
2804    #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2805    struct sljit_jump *jump;
2806    #endif
2807    
2808    SLJIT_UNUSED_ARG(max);
2809    
2810    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2811    #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2812    if (common->utf)
2813      {
2814      if (max < 128) return;
2815    
2816      jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2817      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2818      add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2819      OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2820      JUMPHERE(jump);
2821      }
2822    #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2823    
2824    #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2825    if (common->utf)
2826      {
2827      if (max < 0xd800) return;
2828    
2829      OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2830      jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2831      /* TMP2 contains the high surrogate. */
2832      OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2833      OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x40);
2834      OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
2835      OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
2836      OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2837      JUMPHERE(jump);
2838      }
2839    #endif
2840    }
2841    
2842    #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2843    
2844    static BOOL is_char7_bitset(const sljit_u8 *bitset, BOOL nclass)
2845    {
2846    /* Tells whether the character codes below 128 are enough
2847    to determine a match. */
2848    const sljit_u8 value = nclass ? 0xff : 0;
2849    const sljit_u8 *end = bitset + 32;
2850    
2851    bitset += 16;
2852    do
2853      {
2854      if (*bitset++ != value)
2855        return FALSE;
2856      }
2857    while (bitset < end);
2858    return TRUE;
2859    }
2860    
2861    static void read_char7_type(compiler_common *common, BOOL full_read)
2862    {
2863    /* Reads the precise character type of a character into TMP1, if the character
2864    is less than 128. Otherwise it returns with zero. Does not check STR_END. The
2865    full_read argument tells whether characters above max are accepted or not. */
2866    DEFINE_COMPILER;
2867    struct sljit_jump *jump;
2868    
2869    SLJIT_ASSERT(common->utf);
2870    
2871    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2872    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2873    
2874    OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2875    
2876    if (full_read)
2877      {
2878      jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
2879      OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2880      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2881      JUMPHERE(jump);
2882      }
2883    }
2884    
2885    #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
2886    
2887    static void read_char_range(compiler_common *common, sljit_u32 min, sljit_u32 max, BOOL update_str_ptr)
2888    {
2889    /* Reads the precise value of a character into TMP1, if the character is
2890    between min and max (c >= min && c <= max). Otherwise it returns with a value
2891    outside the range. Does not check STR_END. */
2892    DEFINE_COMPILER;
2893    #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2894    struct sljit_jump *jump;
2895    #endif
2896    #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2897    struct sljit_jump *jump2;
2898    #endif
2899    
2900    SLJIT_UNUSED_ARG(update_str_ptr);
2901    SLJIT_UNUSED_ARG(min);
2902    SLJIT_UNUSED_ARG(max);
2903    SLJIT_ASSERT(min <= max);
2904    
2905    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2906    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2907    
2908    #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2909    if (common->utf)
2910      {
2911      if (max < 128 && !update_str_ptr) return;
2912    
2913      jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2914      if (min >= 0x10000)
2915        {
2916        OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xf0);
2917        if (update_str_ptr)
2918          OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2919        OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2920        jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x7);
2921        OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2922        OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2923        OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2924        OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2925        OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2926        OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2927        OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2928        OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2929        if (!update_str_ptr)
2930          OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
2931        OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2932        OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2933        OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2934        JUMPHERE(jump2);
2935        if (update_str_ptr)
2936          OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
2937        }
2938      else if (min >= 0x800 && max <= 0xffff)
2939        {
2940        OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xe0);
2941        if (update_str_ptr)
2942          OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2943        OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2944        jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xf);
2945        OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2946        OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2947        OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2948        OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2949        if (!update_str_ptr)
2950          OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
2951        OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2952        OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2953        OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2954        JUMPHERE(jump2);
2955        if (update_str_ptr)
2956          OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
2957        }
2958      else if (max >= 0x800)
2959        add_jump(compiler, (max < 0x10000) ? &common->utfreadchar16 : &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2960      else if (max < 128)
2961        {
2962        OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2963        OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2964        }
2965      else
2966        {
2967        OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2968        if (!update_str_ptr)
2969          OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2970        else
2971          OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2972        OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2973        OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2974        OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2975        OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2976        if (update_str_ptr)
2977          OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
2978        }
2979      JUMPHERE(jump);
2980      }
2981    #endif
2982    
2983    #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2984    if (common->utf)
2985      {
2986      if (max >= 0x10000)
2987        {
2988        OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2989        jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2990        /* TMP2 contains the high surrogate. */
2991        OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2992        OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x40);
2993        OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
2994        OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2995        OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
2996        OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2997        JUMPHERE(jump);
2998        return;
2999        }
3000    
3001      if (max < 0xd800 && !update_str_ptr) return;
3002    
3003      /* Skip low surrogate if necessary. */
3004      OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3005      jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
3006      if (update_str_ptr)
3007        OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3008      if (max >= 0xd800)
3009        OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x10000);
3010      JUMPHERE(jump);
3011      }
3012    #endif
3013    }
3014    
3015    static SLJIT_INLINE void read_char(compiler_common *common)
3016    {
3017    read_char_range(common, 0, READ_CHAR_MAX, TRUE);
3018    }
3019    
3020    static void read_char8_type(compiler_common *common, BOOL update_str_ptr)
3021    {
3022    /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */
3023    DEFINE_COMPILER;
3024    #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3025    struct sljit_jump *jump;
3026    #endif
3027    #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3028    struct sljit_jump *jump2;
3029    #endif
3030    
3031    SLJIT_UNUSED_ARG(update_str_ptr);
3032    
3033    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
3034    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3035    
3036    #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3037    if (common->utf)
3038      {
3039      /* This can be an extra read in some situations, but hopefully
3040      it is needed in most cases. */
3041      OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
3042      jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
3043      if (!update_str_ptr)
3044        {
3045        OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3046        OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3047        OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3048        OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
3049        OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3050        OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
3051        OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
3052        jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
3053        OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
3054        JUMPHERE(jump2);
3055        }
3056      else
3057        add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));
3058      JUMPHERE(jump);
3059      return;
3060      }
3061    #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
3062    
3063    #if !defined COMPILE_PCRE8
3064    /* The ctypes array contains only 256 values. */
3065    OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
3066    jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
3067    #endif
3068    OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
3069    #if !defined COMPILE_PCRE8
3070    JUMPHERE(jump);
3071    #endif
3072    
3073    #if defined SUPPORT_UTF && defined COMPILE_PCRE16
3074    if (common->utf && update_str_ptr)
3075      {
3076      /* Skip low surrogate if necessary. */
3077      OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);
3078      jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
3079      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3080      JUMPHERE(jump);
3081      }
3082    #endif /* SUPPORT_UTF && COMPILE_PCRE16 */
3083    }
3084    
3085    static void skip_char_back(compiler_common *common)
3086    {
3087    /* Goes one character back. Affects STR_PTR and TMP1. Does not check begin. */
3088    DEFINE_COMPILER;
3089    #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3090    #if defined COMPILE_PCRE8
3091    struct sljit_label *label;
3092    
3093    if (common->utf)
3094      {
3095      label = LABEL();
3096      OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
3097      OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3098      OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
3099      CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);
3100      return;
3101      }
3102    #elif defined COMPILE_PCRE16
3103    if (common->utf)
3104      {
3105      OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
3106      OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3107      /* Skip low surrogate if necessary. */
3108      OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
3109      OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
3110      OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
3111      OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3112      OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3113      return;
3114      }
3115    #endif /* COMPILE_PCRE[8|16] */
3116    #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
3117    OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3118    }
3119    
3120    static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpifmatch)
3121    {
3122    /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */
3123    DEFINE_COMPILER;
3124    struct sljit_jump *jump;
3125    
3126  OP2(SLJIT_SUB | SLJIT_SET_E, CALL_COUNT, 0, CALL_COUNT, 0, SLJIT_IMM, 1);  if (nltype == NLTYPE_ANY)
3127  add_jump(compiler, &common->calllimit, JUMP(SLJIT_C_ZERO));    {
3128      add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
3129      add_jump(compiler, backtracks, JUMP(jumpifmatch ? SLJIT_NOT_ZERO : SLJIT_ZERO));
3130      }
3131    else if (nltype == NLTYPE_ANYCRLF)
3132      {
3133      if (jumpifmatch)
3134        {
3135        add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR));
3136        add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
3137        }
3138      else
3139        {
3140        jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
3141        add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
3142        JUMPHERE(jump);
3143        }
3144      }
3145    else
3146      {
3147      SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);
3148      add_jump(compiler, backtracks, CMP(jumpifmatch ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
3149      }
3150  }  }
3151    
3152  static SLJIT_INLINE void allocate_stack(compiler_common *common, int size)  #ifdef SUPPORT_UTF
3153    
3154    #if defined COMPILE_PCRE8
3155    static void do_utfreadchar(compiler_common *common)
3156  {  {
3157  /* May destroy all locals and registers except TMP2. */  /* Fast decoding a UTF-8 character. TMP1 contains the first byte
3158    of the character (>= 0xc0). Return char value in TMP1, length in TMP2. */
3159  DEFINE_COMPILER;  DEFINE_COMPILER;
3160    struct sljit_jump *jump;
3161    
3162  OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_w));  sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3163  #ifdef DESTROY_REGISTERS  OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3164  OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);  OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3165  OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);  OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3166  OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3167  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, TMP1, 0);  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3168  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);  
3169  #endif  /* Searching for the first zero. */
3170  add_stub(common, stack_alloc, 0, CMP(SLJIT_C_GREATER, STACK_TOP, 0, STACK_LIMIT, 0));  OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
3171    jump = JUMP(SLJIT_NOT_ZERO);
3172    /* Two byte sequence. */
3173    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3174    OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(2));
3175    sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3176    
3177    JUMPHERE(jump);
3178    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3179    OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x800);
3180    OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3181    OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3182    OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3183    
3184    OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10000);
3185    jump = JUMP(SLJIT_NOT_ZERO);
3186    /* Three byte sequence. */
3187    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
3188    OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));
3189    sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3190    
3191    /* Four byte sequence. */
3192    JUMPHERE(jump);
3193    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
3194    OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
3195    OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3196    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
3197    OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3198    OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3199    OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(4));
3200    sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3201  }  }
3202    
3203  static SLJIT_INLINE void free_stack(compiler_common *common, int size)  static void do_utfreadchar16(compiler_common *common)
3204  {  {
3205    /* Fast decoding a UTF-8 character. TMP1 contains the first byte
3206    of the character (>= 0xc0). Return value in TMP1. */
3207  DEFINE_COMPILER;  DEFINE_COMPILER;
3208  OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_w));  struct sljit_jump *jump;
3209    
3210    sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3211    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3212    OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3213    OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3214    OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3215    OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3216    
3217    /* Searching for the first zero. */
3218    OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
3219    jump = JUMP(SLJIT_NOT_ZERO);
3220    /* Two byte sequence. */
3221    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3222    sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3223    
3224    JUMPHERE(jump);
3225    OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x400);
3226    OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_NOT_ZERO);
3227    /* This code runs only in 8 bit mode. No need to shift the value. */
3228    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3229    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3230    OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x800);
3231    OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3232    OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3233    OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3234    /* Three byte sequence. */
3235    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
3236    sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3237  }  }
3238    
3239  static SLJIT_INLINE void reset_ovector(compiler_common *common, int length)  static void do_utfreadtype8(compiler_common *common)
3240  {  {
3241    /* Fast decoding a UTF-8 character type. TMP2 contains the first byte
3242    of the character (>= 0xc0). Return value in TMP1. */
3243  DEFINE_COMPILER;  DEFINE_COMPILER;
3244  struct sljit_label *loop;  struct sljit_jump *jump;
3245  int i;  struct sljit_jump *compare;
3246  /* At this point we can freely use all temporary registers. */  
3247  /* TMP1 returns with begin - 1. */  sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3248  OP2(SLJIT_SUB, SLJIT_TEMPORARY_REG1, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));  
3249  if (length < 8)  OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20);
3250    jump = JUMP(SLJIT_NOT_ZERO);
3251    /* Two byte sequence. */
3252    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3253    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3254    OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
3255    /* The upper 5 bits are known at this point. */
3256    compare = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x3);
3257    OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
3258    OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3259    OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
3260    OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
3261    sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3262    
3263    JUMPHERE(compare);
3264    OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
3265    sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3266    
3267    /* We only have types for characters less than 256. */
3268    JUMPHERE(jump);
3269    OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3270    OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
3271    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3272    sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3273    }
3274    
3275    #endif /* COMPILE_PCRE8 */
3276    
3277    #endif /* SUPPORT_UTF */
3278    
3279    #ifdef SUPPORT_UCP
3280    
3281    /* UCD_BLOCK_SIZE must be 128 (see the assert below). */
3282    #define UCD_BLOCK_MASK 127
3283    #define UCD_BLOCK_SHIFT 7
3284    
3285    static void do_getucd(compiler_common *common)
3286    {
3287    /* Search the UCD record for the character comes in TMP1.
3288    Returns chartype in TMP1 and UCD offset in TMP2. */
3289    DEFINE_COMPILER;
3290    
3291    SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 8);
3292    
3293    sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3294    OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
3295    OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
3296    OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
3297    OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
3298    OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
3299    OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
3300    OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
3301    OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
3302    OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
3303    sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3304    }
3305    #endif
3306    
3307    static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common, BOOL hascrorlf)
3308    {
3309    DEFINE_COMPILER;
3310    struct sljit_label *mainloop;
3311    struct sljit_label *newlinelabel = NULL;
3312    struct sljit_jump *start;
3313    struct sljit_jump *end = NULL;
3314    struct sljit_jump *end2 = NULL;
3315    #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3316    struct sljit_jump *singlechar;
3317    #endif
3318    jump_list *newline = NULL;
3319    BOOL newlinecheck = FALSE;
3320    BOOL readuchar = FALSE;
3321    
3322    if (!(hascrorlf || (common->match_end_ptr != 0)) &&
3323        (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF || common->newline > 255))
3324      newlinecheck = TRUE;
3325    
3326    if (common->match_end_ptr != 0)
3327      {
3328      /* Search for the end of the first line. */
3329      OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
3330    
3331      if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3332        {
3333        mainloop = LABEL();
3334        OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3335        end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3336        OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
3337        OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3338        CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);
3339        CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);
3340        JUMPHERE(end);
3341        OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3342        }
3343      else
3344        {
3345        end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3346        mainloop = LABEL();
3347        /* Continual stores does not cause data dependency. */
3348        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0);
3349        read_char_range(common, common->nlmin, common->nlmax, TRUE);
3350        check_newlinechar(common, common->nltype, &newline, TRUE);
3351        CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, mainloop);