/[pcre]/code/trunk/pcre_jit_compile.c
ViewVC logotype

Diff of /code/trunk/pcre_jit_compile.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 695 by zherczeg, Sat Sep 17 06:05:38 2011 UTC revision 1626 by zherczeg, Mon Feb 8 09:29:08 2016 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2008 University of Cambridge             Copyright (c) 1997-2013 University of Cambridge
10    
11    The machine code generator part (this module) was written by Zoltan Herczeg    The machine code generator part (this module) was written by Zoltan Herczeg
12                        Copyright (c) 2010-2011                        Copyright (c) 2010-2013
13    
14  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
15  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 46  POSSIBILITY OF SUCH DAMAGE. Line 46  POSSIBILITY OF SUCH DAMAGE.
46    
47  #include "pcre_internal.h"  #include "pcre_internal.h"
48    
49  #ifdef SUPPORT_JIT  #if defined SUPPORT_JIT
50    
51  /* All-in-one: Since we use the JIT compiler only from here,  /* All-in-one: Since we use the JIT compiler only from here,
52  we just include it. This way we don't need to touch the build  we just include it. This way we don't need to touch the build
53  system files. */  system files. */
54    
55    #define SLJIT_MALLOC(size, allocator_data) (PUBL(malloc))(size)
56    #define SLJIT_FREE(ptr, allocator_data) (PUBL(free))(ptr)
57  #define SLJIT_CONFIG_AUTO 1  #define SLJIT_CONFIG_AUTO 1
58    #define SLJIT_CONFIG_STATIC 1
59  #define SLJIT_VERBOSE 0  #define SLJIT_VERBOSE 0
60  #define SLJIT_DEBUG 0  #define SLJIT_DEBUG 0
61    
62  #include "sljit/sljitLir.c"  #include "sljit/sljitLir.c"
63    
64  #if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED  #if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED
65  #error "Unsupported architecture"  #error Unsupported architecture
66  #endif  #endif
67    
68  /* Allocate memory on the stack. Fast, but limited size. */  /* Defines for debugging purposes. */
 #define LOCAL_SPACE_SIZE 32768  
69    
70    /* 1 - Use unoptimized capturing brackets.
71       2 - Enable capture_last_ptr (includes option 1). */
72    /* #define DEBUG_FORCE_UNOPTIMIZED_CBRAS 2 */
73    
74    /* 1 - Always have a control head. */
75    /* #define DEBUG_FORCE_CONTROL_HEAD 1 */
76    
77    /* Allocate memory for the regex stack on the real machine stack.
78    Fast, but limited size. */
79    #define MACHINE_STACK_SIZE 32768
80    
81    /* Growth rate for stack allocated by the OS. Should be the multiply
82    of page size. */
83  #define STACK_GROWTH_RATE 8192  #define STACK_GROWTH_RATE 8192
84    
85  /* Enable to check that the allocation could destroy temporaries. */  /* Enable to check that the allocation could destroy temporaries. */
# Line 79  The code generator follows the recursive Line 94  The code generator follows the recursive
94  expressions. The basic blocks of regular expressions are condition checkers  expressions. The basic blocks of regular expressions are condition checkers
95  whose execute different commands depending on the result of the condition check.  whose execute different commands depending on the result of the condition check.
96  The relationship between the operators can be horizontal (concatenation) and  The relationship between the operators can be horizontal (concatenation) and
97  vertical (sub-expression) (See struct fallback_common for more details).  vertical (sub-expression) (See struct backtrack_common for more details).
98    
99    'ab' - 'a' and 'b' regexps are concatenated    'ab' - 'a' and 'b' regexps are concatenated
100    'a+' - 'a' is the sub-expression of the '+' operator    'a+' - 'a' is the sub-expression of the '+' operator
101    
102  The condition checkers are boolean (true/false) checkers. Machine code is generated  The condition checkers are boolean (true/false) checkers. Machine code is generated
103  for the checker itself and for the actions depending on the result of the checker.  for the checker itself and for the actions depending on the result of the checker.
104  The 'true' case is called as the hot path (expected path), and the other is called as  The 'true' case is called as the matching path (expected path), and the other is called as
105  the 'fallback' path. Branch instructions are expesive for all CPUs, so we avoid taken  the 'backtrack' path. Branch instructions are expesive for all CPUs, so we avoid taken
106  branches on the hot path.  branches on the matching path.
107    
108   Greedy star operator (*) :   Greedy star operator (*) :
109     Hot path: match happens.     Matching path: match happens.
110     Fallback path: match failed.     Backtrack path: match failed.
111   Non-greedy star operator (*?) :   Non-greedy star operator (*?) :
112     Hot path: no need to perform a match.     Matching path: no need to perform a match.
113     Fallback path: match is required.     Backtrack path: match is required.
114    
115  The following example shows how the code generated for a capturing bracket  The following example shows how the code generated for a capturing bracket
116  with two alternatives. Let A, B, C, D are arbirary regular expressions, and  with two alternatives. Let A, B, C, D are arbirary regular expressions, and
# Line 105  we have the following regular expression Line 120  we have the following regular expression
120    
121  The generated code will be the following:  The generated code will be the following:
122    
123   A hot path   A matching path
124   '(' hot path (pushing arguments to the stack)   '(' matching path (pushing arguments to the stack)
125   B hot path   B matching path
126   ')' hot path (pushing arguments to the stack)   ')' matching path (pushing arguments to the stack)
127   D hot path   D matching path
128   return with successful match   return with successful match
129    
130   D fallback path   D backtrack path
131   ')' fallback path (If we arrived from "C" jump to the fallback of "C")   ')' backtrack path (If we arrived from "C" jump to the backtrack of "C")
132   B fallback path   B backtrack path
133   C expected path   C expected path
134   jump to D hot path   jump to D matching path
135   C fallback path   C backtrack path
136   A fallback path   A backtrack path
137    
138   Notice, that the order of fallback code paths are the opposite of the fast   Notice, that the order of backtrack code paths are the opposite of the fast
139   code paths. In this way the topmost value on the stack is always belong   code paths. In this way the topmost value on the stack is always belong
140   to the current fallback code path. The fallback code path must check   to the current backtrack code path. The backtrack path must check
141   whether there is a next alternative. If so, it needs to jump back to   whether there is a next alternative. If so, it needs to jump back to
142   the hot path eventually. Otherwise it needs to clear out its own stack   the matching path eventually. Otherwise it needs to clear out its own stack
143   frame and continue the execution on the fallback code paths.   frame and continue the execution on the backtrack code paths.
144  */  */
145    
146  /*  /*
147  Saved stack frames:  Saved stack frames:
148    
149  Atomic blocks and asserts require reloading the values of local variables  Atomic blocks and asserts require reloading the values of private data
150  when the fallback mechanism performed. Because of OP_RECURSE, the locals  when the backtrack mechanism performed. Because of OP_RECURSE, the data
151  are not necessarly known in compile time, thus we need a dynamic restore  are not necessarly known in compile time, thus we need a dynamic restore
152  mechanism.  mechanism.
153    
154  The stack frames are stored in a chain list, and have the following format:  The stack frames are stored in a chain list, and have the following format:
155  ([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]  ([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]
156    
157  Thus we can restore the locals to a particular point in the stack.  Thus we can restore the private data to a particular point in the stack.
158  */  */
159    
160  typedef struct jit_arguments {  typedef struct jit_arguments {
161    /* Pointers first. */    /* Pointers first. */
162    struct sljit_stack *stack;    struct sljit_stack *stack;
163    PCRE_SPTR str;    const pcre_uchar *str;
164    PCRE_SPTR begin;    const pcre_uchar *begin;
165    PCRE_SPTR end;    const pcre_uchar *end;
166    int *offsets;    int *offsets;
167    uschar *ptr;    pcre_uchar *uchar_ptr;
168      pcre_uchar *mark_ptr;
169      void *callout_data;
170    /* Everything else after. */    /* Everything else after. */
171    int offsetcount;    pcre_uint32 limit_match;
172    int calllimit;    int real_offset_count;
173    uschar notbol;    int offset_count;
174    uschar noteol;    pcre_uint8 notbol;
175    uschar notempty;    pcre_uint8 noteol;
176    uschar notempty_atstart;    pcre_uint8 notempty;
177      pcre_uint8 notempty_atstart;
178  } jit_arguments;  } jit_arguments;
179    
180  typedef struct executable_function {  typedef struct executable_functions {
181    void *executable_func;    void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES];
182    pcre_jit_callback callback;    void *read_only_data_heads[JIT_NUMBER_OF_COMPILE_MODES];
183      sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];
184      PUBL(jit_callback) callback;
185    void *userdata;    void *userdata;
186  } executable_function;    pcre_uint32 top_bracket;
187      pcre_uint32 limit_match;
188    } executable_functions;
189    
190  typedef struct jump_list {  typedef struct jump_list {
191    struct sljit_jump *jump;    struct sljit_jump *jump;
192    struct jump_list *next;    struct jump_list *next;
193  } jump_list;  } jump_list;
194    
 enum stub_types { stack_alloc, max_index };  
   
195  typedef struct stub_list {  typedef struct stub_list {
   enum stub_types type;  
   int data;  
196    struct sljit_jump *start;    struct sljit_jump *start;
197    struct sljit_label *leave;    struct sljit_label *quit;
198    struct stub_list *next;    struct stub_list *next;
199  } stub_list;  } stub_list;
200    
201    typedef struct label_addr_list {
202      struct sljit_label *label;
203      sljit_uw *update_addr;
204      struct label_addr_list *next;
205    } label_addr_list;
206    
207    enum frame_types {
208      no_frame = -1,
209      no_stack = -2
210    };
211    
212    enum control_types {
213      type_mark = 0,
214      type_then_trap = 1
215    };
216    
217  typedef int (SLJIT_CALL *jit_function)(jit_arguments *args);  typedef int (SLJIT_CALL *jit_function)(jit_arguments *args);
218    
219  /* The following structure is the key data type for the recursive  /* The following structure is the key data type for the recursive
220  code generator. It is allocated by compile_hotpath, and contains  code generator. It is allocated by compile_matchingpath, and contains
221  the aguments for compile_fallbackpath. Must be the first member  the arguments for compile_backtrackingpath. Must be the first member
222  of its descendants. */  of its descendants. */
223  typedef struct fallback_common {  typedef struct backtrack_common {
224    /* Concatenation stack. */    /* Concatenation stack. */
225    struct fallback_common *prev;    struct backtrack_common *prev;
226    jump_list *nextfallbacks;    jump_list *nextbacktracks;
227    /* Internal stack (for component operators). */    /* Internal stack (for component operators). */
228    struct fallback_common *top;    struct backtrack_common *top;
229    jump_list *topfallbacks;    jump_list *topbacktracks;
230    /* Opcode pointer. */    /* Opcode pointer. */
231    uschar *cc;    pcre_uchar *cc;
232  } fallback_common;  } backtrack_common;
233    
234  typedef struct assert_fallback {  typedef struct assert_backtrack {
235    fallback_common common;    backtrack_common common;
236    jump_list *condfailed;    jump_list *condfailed;
237    /* Less than 0 (-1) if a frame is not needed. */    /* Less than 0 if a frame is not needed. */
238    int framesize;    int framesize;
239    /* Points to our private memory word on the stack. */    /* Points to our private memory word on the stack. */
240    int localptr;    int private_data_ptr;
241    /* For iterators. */    /* For iterators. */
242    struct sljit_label *hotpath;    struct sljit_label *matchingpath;
243  } assert_fallback;  } assert_backtrack;
244    
245  typedef struct bracket_fallback {  typedef struct bracket_backtrack {
246    fallback_common common;    backtrack_common common;
247    /* Where to coninue if an alternative is successfully matched. */    /* Where to coninue if an alternative is successfully matched. */
248    struct sljit_label *althotpath;    struct sljit_label *alternative_matchingpath;
249    /* For rmin and rmax iterators. */    /* For rmin and rmax iterators. */
250    struct sljit_label *recursivehotpath;    struct sljit_label *recursive_matchingpath;
251    /* For greedy ? operator. */    /* For greedy ? operator. */
252    struct sljit_label *zerohotpath;    struct sljit_label *zero_matchingpath;
253    /* Contains the branches of a failed condition. */    /* Contains the branches of a failed condition. */
254    union {    union {
255      /* Both for OP_COND, OP_SCOND. */      /* Both for OP_COND, OP_SCOND. */
256      jump_list *condfailed;      jump_list *condfailed;
257      assert_fallback *assert;      assert_backtrack *assert;
258      /* For OP_ONCE. -1 if not needed. */      /* For OP_ONCE. Less than 0 if not needed. */
259      int framesize;      int framesize;
260    } u;    } u;
261    /* Points to our private memory word on the stack. */    /* Points to our private memory word on the stack. */
262    int localptr;    int private_data_ptr;
263  } bracket_fallback;  } bracket_backtrack;
264    
265  typedef struct bracketpos_fallback {  typedef struct bracketpos_backtrack {
266    fallback_common common;    backtrack_common common;
267    /* Points to our private memory word on the stack. */    /* Points to our private memory word on the stack. */
268    int localptr;    int private_data_ptr;
269    /* Reverting stack is needed. */    /* Reverting stack is needed. */
270    int framesize;    int framesize;
271    /* Allocated stack size. */    /* Allocated stack size. */
272    int stacksize;    int stacksize;
273  } bracketpos_fallback;  } bracketpos_backtrack;
274    
275  typedef struct braminzero_fallback {  typedef struct braminzero_backtrack {
276    fallback_common common;    backtrack_common common;
277    struct sljit_label *hotpath;    struct sljit_label *matchingpath;
278  } braminzero_fallback;  } braminzero_backtrack;
279    
280  typedef struct iterator_fallback {  typedef struct iterator_backtrack {
281    fallback_common common;    backtrack_common common;
282    /* Next iteration. */    /* Next iteration. */
283    struct sljit_label *hotpath;    struct sljit_label *matchingpath;
284  } iterator_fallback;  } iterator_backtrack;
285    
286  typedef struct recurse_entry {  typedef struct recurse_entry {
287    struct recurse_entry *next;    struct recurse_entry *next;
# Line 256  typedef struct recurse_entry { Line 290  typedef struct recurse_entry {
290    /* Collects the calls until the function is not created. */    /* Collects the calls until the function is not created. */
291    jump_list *calls;    jump_list *calls;
292    /* Points to the starting opcode. */    /* Points to the starting opcode. */
293    int start;    sljit_sw start;
294  } recurse_entry;  } recurse_entry;
295    
296  typedef struct recurse_fallback {  typedef struct recurse_backtrack {
297    fallback_common common;    backtrack_common common;
298  } recurse_fallback;    BOOL inlined_pattern;
299    } recurse_backtrack;
300    
301    #define OP_THEN_TRAP OP_TABLE_LENGTH
302    
303    typedef struct then_trap_backtrack {
304      backtrack_common common;
305      /* If then_trap is not NULL, this structure contains the real
306      then_trap for the backtracking path. */
307      struct then_trap_backtrack *then_trap;
308      /* Points to the starting opcode. */
309      sljit_sw start;
310      /* Exit point for the then opcodes of this alternative. */
311      jump_list *quit;
312      /* Frame size of the current alternative. */
313      int framesize;
314    } then_trap_backtrack;
315    
316    #define MAX_RANGE_SIZE 4
317    
318  typedef struct compiler_common {  typedef struct compiler_common {
319      /* The sljit ceneric compiler. */
320    struct sljit_compiler *compiler;    struct sljit_compiler *compiler;
321    uschar *start;    /* First byte code. */
322    int localsize;    pcre_uchar *start;
323    int *localptrs;    /* Maps private data offset to each opcode. */
324    const uschar *fcc;    sljit_si *private_data_ptrs;
325    sljit_w lcc;    /* Chain list of read-only data ptrs. */
326    int cbraptr;    void *read_only_data_head;
327      /* Tells whether the capturing bracket is optimized. */
328      sljit_ub *optimized_cbracket;
329      /* Tells whether the starting offset is a target of then. */
330      sljit_ub *then_offsets;
331      /* Current position where a THEN must jump. */
332      then_trap_backtrack *then_trap;
333      /* Starting offset of private data for capturing brackets. */
334      sljit_si cbra_ptr;
335      /* Output vector starting point. Must be divisible by 2. */
336      sljit_si ovector_start;
337      /* Points to the starting character of the current match. */
338      sljit_si start_ptr;
339      /* Last known position of the requested byte. */
340      sljit_si req_char_ptr;
341      /* Head of the last recursion. */
342      sljit_si recursive_head_ptr;
343      /* First inspected character for partial matching.
344         (Needed for avoiding zero length partial matches.) */
345      sljit_si start_used_ptr;
346      /* Starting pointer for partial soft matches. */
347      sljit_si hit_start;
348      /* End pointer of the first line. */
349      sljit_si first_line_end;
350      /* Points to the marked string. */
351      sljit_si mark_ptr;
352      /* Recursive control verb management chain. */
353      sljit_si control_head_ptr;
354      /* Points to the last matched capture block index. */
355      sljit_si capture_last_ptr;
356    
357      /* Flipped and lower case tables. */
358      const sljit_ub *fcc;
359      sljit_sw lcc;
360      /* Mode can be PCRE_STUDY_JIT_COMPILE and others. */
361      int mode;
362      /* TRUE, when minlength is greater than 0. */
363      BOOL might_be_empty;
364      /* \K is found in the pattern. */
365      BOOL has_set_som;
366      /* (*SKIP:arg) is found in the pattern. */
367      BOOL has_skip_arg;
368      /* (*THEN) is found in the pattern. */
369      BOOL has_then;
370      /* Currently in recurse or negative assert. */
371      BOOL local_exit;
372      /* Currently in a positive assert. */
373      BOOL positive_assert;
374      /* Newline control. */
375    int nltype;    int nltype;
376      sljit_ui nlmax;
377      sljit_ui nlmin;
378    int newline;    int newline;
379    int bsr_nltype;    int bsr_nltype;
380      sljit_ui bsr_nlmax;
381      sljit_ui bsr_nlmin;
382      /* Dollar endonly. */
383    int endonly;    int endonly;
384    sljit_w ctypes;    /* Tables. */
385    struct sljit_label *acceptlabel;    sljit_sw ctypes;
386      /* Named capturing brackets. */
387      pcre_uchar *name_table;
388      sljit_sw name_count;
389      sljit_sw name_entry_size;
390    
391      /* Labels and jump lists. */
392      struct sljit_label *partialmatchlabel;
393      struct sljit_label *quit_label;
394      struct sljit_label *forced_quit_label;
395      struct sljit_label *accept_label;
396      struct sljit_label *ff_newline_shortcut;
397    stub_list *stubs;    stub_list *stubs;
398      label_addr_list *label_addrs;
399    recurse_entry *entries;    recurse_entry *entries;
400    recurse_entry *currententry;    recurse_entry *currententry;
401      jump_list *partialmatch;
402      jump_list *quit;
403      jump_list *positive_assert_quit;
404      jump_list *forced_quit;
405    jump_list *accept;    jump_list *accept;
406    jump_list *calllimit;    jump_list *calllimit;
407    jump_list *stackalloc;    jump_list *stackalloc;
# Line 290  typedef struct compiler_common { Line 412  typedef struct compiler_common {
412    jump_list *vspace;    jump_list *vspace;
413    jump_list *casefulcmp;    jump_list *casefulcmp;
414    jump_list *caselesscmp;    jump_list *caselesscmp;
415      jump_list *reset_match;
416    BOOL jscript_compat;    BOOL jscript_compat;
417  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
418    BOOL utf8;    BOOL utf;
419  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
420    BOOL useucp;    BOOL use_ucp;
421  #endif  #endif
422    jump_list *utf8readchar;  #ifdef COMPILE_PCRE8
423    jump_list *utf8readtype8;    jump_list *utfreadchar;
424      jump_list *utfreadchar16;
425      jump_list *utfreadtype8;
426  #endif  #endif
427    #endif /* SUPPORT_UTF */
428  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
429    jump_list *getucd;    jump_list *getucd;
430  #endif  #endif
# Line 310  typedef struct compare_context { Line 436  typedef struct compare_context {
436    int length;    int length;
437    int sourcereg;    int sourcereg;
438  #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED  #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
439    int byteptr;    int ucharptr;
440    union {    union {
441      int asint;      sljit_si asint;
442      short asshort;      sljit_uh asushort;
443    #if defined COMPILE_PCRE8
444      sljit_ub asbyte;      sljit_ub asbyte;
445      sljit_ub asbytes[4];      sljit_ub asuchars[4];
446    #elif defined COMPILE_PCRE16
447        sljit_uh asuchars[2];
448    #elif defined COMPILE_PCRE32
449        sljit_ui asuchars[1];
450    #endif
451    } c;    } c;
452    union {    union {
453      int asint;      sljit_si asint;
454      short asshort;      sljit_uh asushort;
455    #if defined COMPILE_PCRE8
456      sljit_ub asbyte;      sljit_ub asbyte;
457      sljit_ub asbytes[4];      sljit_ub asuchars[4];
458    #elif defined COMPILE_PCRE16
459        sljit_uh asuchars[2];
460    #elif defined COMPILE_PCRE32
461        sljit_ui asuchars[1];
462    #endif
463    } oc;    } oc;
464  #endif  #endif
465  } compare_context;  } compare_context;
466    
467  enum {  /* Undefine sljit macros. */
468    frame_end = 0,  #undef CMP
   frame_setmaxindex = -1,  
   frame_setstrbegin = -2  
 };  
469    
470  /* Used for accessing the elements of the stack. */  /* Used for accessing the elements of the stack. */
471  #define STACK(i)      ((-(i) - 1) * (int)sizeof(sljit_w))  #define STACK(i)      ((-(i) - 1) * (int)sizeof(sljit_sw))
472    
473  #define TMP1          SLJIT_TEMPORARY_REG1  #define TMP1          SLJIT_R0
474  #define TMP2          SLJIT_TEMPORARY_REG3  #define TMP2          SLJIT_R2
475  #define TMP3          SLJIT_TEMPORARY_EREG2  #define TMP3          SLJIT_R3
476  #define STR_PTR       SLJIT_GENERAL_REG1  #define STR_PTR       SLJIT_S0
477  #define STR_END       SLJIT_GENERAL_REG2  #define STR_END       SLJIT_S1
478  #define STACK_TOP     SLJIT_TEMPORARY_REG2  #define STACK_TOP     SLJIT_R1
479  #define STACK_LIMIT   SLJIT_GENERAL_REG3  #define STACK_LIMIT   SLJIT_S2
480  #define ARGUMENTS     SLJIT_GENERAL_EREG1  #define COUNT_MATCH   SLJIT_S3
481  #define CALL_COUNT    SLJIT_GENERAL_EREG2  #define ARGUMENTS     SLJIT_S4
482  #define RETURN_ADDR   SLJIT_TEMPORARY_EREG1  #define RETURN_ADDR   SLJIT_R4
483    
484  /* Locals layout. */  /* Local space layout. */
485  /* These two locals can be used by the current opcode. */  /* These two locals can be used by the current opcode. */
486  #define LOCALS0          (0 * sizeof(sljit_w))  #define LOCALS0          (0 * sizeof(sljit_sw))
487  #define LOCALS1          (1 * sizeof(sljit_w))  #define LOCALS1          (1 * sizeof(sljit_sw))
488  /* Two local variables for possessive quantifiers (char1 cannot use them). */  /* Two local variables for possessive quantifiers (char1 cannot use them). */
489  #define POSSESSIVE0      (2 * sizeof(sljit_w))  #define POSSESSIVE0      (2 * sizeof(sljit_sw))
490  #define POSSESSIVE1      (3 * sizeof(sljit_w))  #define POSSESSIVE1      (3 * sizeof(sljit_sw))
 /* Head of the saved local variables */  
 #define LOCALS_HEAD      (4 * sizeof(sljit_w))  
 /* Head of the last recursion. */  
 #define RECURSIVE_HEAD   (5 * sizeof(sljit_w))  
 /* Number of recursions. */  
 #define MAX_INDEX        (6 * sizeof(sljit_w))  
491  /* Max limit of recursions. */  /* Max limit of recursions. */
492  #define CALL_LIMIT       (7 * sizeof(sljit_w))  #define LIMIT_MATCH      (4 * sizeof(sljit_sw))
 /* Last known position of the requested byte. */  
 #define REQ_BYTE_PTR     (8 * sizeof(sljit_w))  
 /* End pointer of the first line. */  
 #define FIRSTLINE_END    (9 * sizeof(sljit_w))  
493  /* The output vector is stored on the stack, and contains pointers  /* The output vector is stored on the stack, and contains pointers
494  to characters. The vector data is divided into two groups: the first  to characters. The vector data is divided into two groups: the first
495  group contains the start / end character pointers, and the second is  group contains the start / end character pointers, and the second is
496  the start pointers when the end of the capturing group has not yet reached. */  the start pointers when the end of the capturing group has not yet reached. */
497  #define OVECTOR_START    (10 * sizeof(sljit_w))  #define OVECTOR_START    (common->ovector_start)
498  #define OVECTOR(i)       (OVECTOR_START + (i) * sizeof(sljit_w))  #define OVECTOR(i)       (OVECTOR_START + (i) * (sljit_sw)sizeof(sljit_sw))
499  #define OVECTOR_PRIV(i)  (common->cbraptr + (i) * sizeof(sljit_w))  #define OVECTOR_PRIV(i)  (common->cbra_ptr + (i) * (sljit_sw)sizeof(sljit_sw))
500  #define PRIV(cc)         (common->localptrs[(cc) - common->start])  #define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start])
501    
502    #if defined COMPILE_PCRE8
503    #define MOV_UCHAR  SLJIT_MOV_UB
504    #define MOVU_UCHAR SLJIT_MOVU_UB
505    #elif defined COMPILE_PCRE16
506    #define MOV_UCHAR  SLJIT_MOV_UH
507    #define MOVU_UCHAR SLJIT_MOVU_UH
508    #elif defined COMPILE_PCRE32
509    #define MOV_UCHAR  SLJIT_MOV_UI
510    #define MOVU_UCHAR SLJIT_MOVU_UI
511    #else
512    #error Unsupported compiling mode
513    #endif
514    
515  /* Shortcuts. */  /* Shortcuts. */
516  #define DEFINE_COMPILER \  #define DEFINE_COMPILER \
# Line 389  the start pointers when the end of the c Line 527  the start pointers when the end of the c
527    sljit_set_label(sljit_emit_jump(compiler, (type)), (label))    sljit_set_label(sljit_emit_jump(compiler, (type)), (label))
528  #define JUMPHERE(jump) \  #define JUMPHERE(jump) \
529    sljit_set_label((jump), sljit_emit_label(compiler))    sljit_set_label((jump), sljit_emit_label(compiler))
530    #define SET_LABEL(jump, label) \
531      sljit_set_label((jump), (label))
532  #define CMP(type, src1, src1w, src2, src2w) \  #define CMP(type, src1, src1w, src2, src2w) \
533    sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))    sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))
534  #define CMPTO(type, src1, src1w, src2, src2w, label) \  #define CMPTO(type, src1, src1w, src2, src2w, label) \
535    sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))    sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))
536  #define COND_VALUE(op, dst, dstw, type) \  #define OP_FLAGS(op, dst, dstw, src, srcw, type) \
537    sljit_emit_cond_value(compiler, (op), (dst), (dstw), (type))    sljit_emit_op_flags(compiler, (op), (dst), (dstw), (src), (srcw), (type))
538    #define GET_LOCAL_BASE(dst, dstw, offset) \
539      sljit_get_local_base(compiler, (dst), (dstw), (offset))
540    
541    #define READ_CHAR_MAX 0x7fffffff
542    
543  static uschar* bracketend(uschar* cc)  static pcre_uchar *bracketend(pcre_uchar *cc)
544  {  {
545  SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));  SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
546  do cc += GET(cc, 1); while (*cc == OP_ALT);  do cc += GET(cc, 1); while (*cc == OP_ALT);
# Line 405  cc += 1 + LINK_SIZE; Line 549  cc += 1 + LINK_SIZE;
549  return cc;  return cc;
550  }  }
551    
552    static int no_alternatives(pcre_uchar *cc)
553    {
554    int count = 0;
555    SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
556    do
557      {
558      cc += GET(cc, 1);
559      count++;
560      }
561    while (*cc == OP_ALT);
562    SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
563    return count;
564    }
565    
566    static int ones_in_half_byte[16] = {
567      /* 0 */ 0, 1, 1, 2, /* 4 */ 1, 2, 2, 3,
568      /* 8 */ 1, 2, 2, 3, /* 12 */ 2, 3, 3, 4
569    };
570    
571  /* Functions whose might need modification for all new supported opcodes:  /* Functions whose might need modification for all new supported opcodes:
572   next_opcode   next_opcode
573   get_localspace   check_opcode_types
574   set_localptrs   set_private_data_ptrs
575   get_framesize   get_framesize
576   init_frame   init_frame
577   get_localsize   get_private_data_copy_length
578   copy_locals   copy_private_data
579   compile_hotpath   compile_matchingpath
580   compile_fallbackpath   compile_backtrackingpath
581  */  */
582    
583  static uschar *next_opcode(compiler_common *common, uschar *cc)  static pcre_uchar *next_opcode(compiler_common *common, pcre_uchar *cc)
584  {  {
585  SLJIT_UNUSED_ARG(common);  SLJIT_UNUSED_ARG(common);
586  switch(*cc)  switch(*cc)
# Line 435  switch(*cc) Line 598  switch(*cc)
598    case OP_WORDCHAR:    case OP_WORDCHAR:
599    case OP_ANY:    case OP_ANY:
600    case OP_ALLANY:    case OP_ALLANY:
601      case OP_NOTPROP:
602      case OP_PROP:
603    case OP_ANYNL:    case OP_ANYNL:
604    case OP_NOT_HSPACE:    case OP_NOT_HSPACE:
605    case OP_HSPACE:    case OP_HSPACE:
# Line 447  switch(*cc) Line 612  switch(*cc)
612    case OP_CIRCM:    case OP_CIRCM:
613    case OP_DOLL:    case OP_DOLL:
614    case OP_DOLLM:    case OP_DOLLM:
   case OP_TYPESTAR:  
   case OP_TYPEMINSTAR:  
   case OP_TYPEPLUS:  
   case OP_TYPEMINPLUS:  
   case OP_TYPEQUERY:  
   case OP_TYPEMINQUERY:  
   case OP_TYPEPOSSTAR:  
   case OP_TYPEPOSPLUS:  
   case OP_TYPEPOSQUERY:  
615    case OP_CRSTAR:    case OP_CRSTAR:
616    case OP_CRMINSTAR:    case OP_CRMINSTAR:
617    case OP_CRPLUS:    case OP_CRPLUS:
618    case OP_CRMINPLUS:    case OP_CRMINPLUS:
619    case OP_CRQUERY:    case OP_CRQUERY:
620    case OP_CRMINQUERY:    case OP_CRMINQUERY:
621      case OP_CRRANGE:
622      case OP_CRMINRANGE:
623      case OP_CRPOSSTAR:
624      case OP_CRPOSPLUS:
625      case OP_CRPOSQUERY:
626      case OP_CRPOSRANGE:
627      case OP_CLASS:
628      case OP_NCLASS:
629      case OP_REF:
630      case OP_REFI:
631      case OP_DNREF:
632      case OP_DNREFI:
633      case OP_RECURSE:
634      case OP_CALLOUT:
635      case OP_ALT:
636      case OP_KET:
637      case OP_KETRMAX:
638      case OP_KETRMIN:
639      case OP_KETRPOS:
640      case OP_REVERSE:
641      case OP_ASSERT:
642      case OP_ASSERT_NOT:
643      case OP_ASSERTBACK:
644      case OP_ASSERTBACK_NOT:
645      case OP_ONCE:
646      case OP_ONCE_NC:
647      case OP_BRA:
648      case OP_BRAPOS:
649      case OP_CBRA:
650      case OP_CBRAPOS:
651      case OP_COND:
652      case OP_SBRA:
653      case OP_SBRAPOS:
654      case OP_SCBRA:
655      case OP_SCBRAPOS:
656      case OP_SCOND:
657      case OP_CREF:
658      case OP_DNCREF:
659      case OP_RREF:
660      case OP_DNRREF:
661    case OP_DEF:    case OP_DEF:
662    case OP_BRAZERO:    case OP_BRAZERO:
663    case OP_BRAMINZERO:    case OP_BRAMINZERO:
664    case OP_BRAPOSZERO:    case OP_BRAPOSZERO:
665      case OP_PRUNE:
666      case OP_SKIP:
667      case OP_THEN:
668      case OP_COMMIT:
669    case OP_FAIL:    case OP_FAIL:
670    case OP_ACCEPT:    case OP_ACCEPT:
671    case OP_ASSERT_ACCEPT:    case OP_ASSERT_ACCEPT:
672      case OP_CLOSE:
673    case OP_SKIPZERO:    case OP_SKIPZERO:
674    return cc + 1;    return cc + PRIV(OP_lengths)[*cc];
675    
676    case OP_CHAR:    case OP_CHAR:
677    case OP_CHARI:    case OP_CHARI:
678    case OP_NOT:    case OP_NOT:
679    case OP_NOTI:    case OP_NOTI:
   
680    case OP_STAR:    case OP_STAR:
681    case OP_MINSTAR:    case OP_MINSTAR:
682    case OP_PLUS:    case OP_PLUS:
683    case OP_MINPLUS:    case OP_MINPLUS:
684    case OP_QUERY:    case OP_QUERY:
685    case OP_MINQUERY:    case OP_MINQUERY:
686      case OP_UPTO:
687      case OP_MINUPTO:
688      case OP_EXACT:
689    case OP_POSSTAR:    case OP_POSSTAR:
690    case OP_POSPLUS:    case OP_POSPLUS:
691    case OP_POSQUERY:    case OP_POSQUERY:
692      case OP_POSUPTO:
693    case OP_STARI:    case OP_STARI:
694    case OP_MINSTARI:    case OP_MINSTARI:
695    case OP_PLUSI:    case OP_PLUSI:
696    case OP_MINPLUSI:    case OP_MINPLUSI:
697    case OP_QUERYI:    case OP_QUERYI:
698    case OP_MINQUERYI:    case OP_MINQUERYI:
699      case OP_UPTOI:
700      case OP_MINUPTOI:
701      case OP_EXACTI:
702    case OP_POSSTARI:    case OP_POSSTARI:
703    case OP_POSPLUSI:    case OP_POSPLUSI:
704    case OP_POSQUERYI:    case OP_POSQUERYI:
705      case OP_POSUPTOI:
706    case OP_NOTSTAR:    case OP_NOTSTAR:
707    case OP_NOTMINSTAR:    case OP_NOTMINSTAR:
708    case OP_NOTPLUS:    case OP_NOTPLUS:
709    case OP_NOTMINPLUS:    case OP_NOTMINPLUS:
710    case OP_NOTQUERY:    case OP_NOTQUERY:
711    case OP_NOTMINQUERY:    case OP_NOTMINQUERY:
712      case OP_NOTUPTO:
713      case OP_NOTMINUPTO:
714      case OP_NOTEXACT:
715    case OP_NOTPOSSTAR:    case OP_NOTPOSSTAR:
716    case OP_NOTPOSPLUS:    case OP_NOTPOSPLUS:
717    case OP_NOTPOSQUERY:    case OP_NOTPOSQUERY:
718      case OP_NOTPOSUPTO:
719    case OP_NOTSTARI:    case OP_NOTSTARI:
720    case OP_NOTMINSTARI:    case OP_NOTMINSTARI:
721    case OP_NOTPLUSI:    case OP_NOTPLUSI:
722    case OP_NOTMINPLUSI:    case OP_NOTMINPLUSI:
723    case OP_NOTQUERYI:    case OP_NOTQUERYI:
724    case OP_NOTMINQUERYI:    case OP_NOTMINQUERYI:
   case OP_NOTPOSSTARI:  
   case OP_NOTPOSPLUSI:  
   case OP_NOTPOSQUERYI:  
   cc += 2;  
 #ifdef SUPPORT_UTF8  
   if (common->utf8 && cc[-1] >= 0xc0) cc += _pcre_utf8_table4[cc[-1] & 0x3f];  
 #endif  
   return cc;  
   
   case OP_UPTO:  
   case OP_MINUPTO:  
   case OP_EXACT:  
   case OP_POSUPTO:  
   case OP_UPTOI:  
   case OP_MINUPTOI:  
   case OP_EXACTI:  
   case OP_POSUPTOI:  
   case OP_NOTUPTO:  
   case OP_NOTMINUPTO:  
   case OP_NOTEXACT:  
   case OP_NOTPOSUPTO:  
725    case OP_NOTUPTOI:    case OP_NOTUPTOI:
726    case OP_NOTMINUPTOI:    case OP_NOTMINUPTOI:
727    case OP_NOTEXACTI:    case OP_NOTEXACTI:
728      case OP_NOTPOSSTARI:
729      case OP_NOTPOSPLUSI:
730      case OP_NOTPOSQUERYI:
731    case OP_NOTPOSUPTOI:    case OP_NOTPOSUPTOI:
732    cc += 4;    cc += PRIV(OP_lengths)[*cc];
733  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
734    if (common->utf8 && cc[-1] >= 0xc0) cc += _pcre_utf8_table4[cc[-1] & 0x3f];    if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
735  #endif  #endif
736    return cc;    return cc;
737    
738    case OP_NOTPROP:    /* Special cases. */
739    case OP_PROP:    case OP_TYPESTAR:
740      case OP_TYPEMINSTAR:
741      case OP_TYPEPLUS:
742      case OP_TYPEMINPLUS:
743      case OP_TYPEQUERY:
744      case OP_TYPEMINQUERY:
745    case OP_TYPEUPTO:    case OP_TYPEUPTO:
746    case OP_TYPEMINUPTO:    case OP_TYPEMINUPTO:
747    case OP_TYPEEXACT:    case OP_TYPEEXACT:
748      case OP_TYPEPOSSTAR:
749      case OP_TYPEPOSPLUS:
750      case OP_TYPEPOSQUERY:
751    case OP_TYPEPOSUPTO:    case OP_TYPEPOSUPTO:
752    case OP_REF:    return cc + PRIV(OP_lengths)[*cc] - 1;
   case OP_REFI:  
   case OP_CREF:  
   case OP_CLOSE:  
   cc += 3;  
   return cc;  
   
   case OP_CRRANGE:  
   case OP_CRMINRANGE:  
   return cc + 5;  
753    
754    case OP_CLASS:    case OP_ANYBYTE:
755    case OP_NCLASS:  #ifdef SUPPORT_UTF
756    return cc + 33;    if (common->utf) return NULL;
757    #endif
758      return cc + 1;
759    
760  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
761    case OP_XCLASS:    case OP_XCLASS:
762    return cc + GET(cc, 1);    return cc + GET(cc, 1);
763  #endif  #endif
764    
765    case OP_RECURSE:    case OP_MARK:
766    case OP_ASSERT:    case OP_PRUNE_ARG:
767    case OP_ASSERT_NOT:    case OP_SKIP_ARG:
768    case OP_ASSERTBACK:    case OP_THEN_ARG:
769    case OP_ASSERTBACK_NOT:    return cc + 1 + 2 + cc[1];
   case OP_REVERSE:  
   case OP_ONCE:  
   case OP_BRA:  
   case OP_BRAPOS:  
   case OP_COND:  
   case OP_SBRA:  
   case OP_SBRAPOS:  
   case OP_SCOND:  
   case OP_ALT:  
   case OP_KET:  
   case OP_KETRMAX:  
   case OP_KETRMIN:  
   case OP_KETRPOS:  
   return cc + 1 + LINK_SIZE;  
   
   case OP_CBRA:  
   case OP_CBRAPOS:  
   case OP_SCBRA:  
   case OP_SCBRAPOS:  
   return cc + 1 + LINK_SIZE + 2;  
770    
771    default:    default:
772      /* All opcodes are supported now! */
773      SLJIT_ASSERT_STOP();
774    return NULL;    return NULL;
775    }    }
776  }  }
777    
778  static int get_localspace(compiler_common *common, uschar *cc, uschar *ccend)  static BOOL check_opcode_types(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
779  {  {
780  int localspace = 0;  int count;
781  uschar *alternative;  pcre_uchar *slot;
782    
783  /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */  /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
784  while (cc < ccend)  while (cc < ccend)
785    {    {
786    switch(*cc)    switch(*cc)
787      {      {
788      case OP_ASSERT:      case OP_SET_SOM:
789      case OP_ASSERT_NOT:      common->has_set_som = TRUE;
790      case OP_ASSERTBACK:      common->might_be_empty = TRUE;
791      case OP_ASSERTBACK_NOT:      cc += 1;
792      case OP_ONCE:      break;
793      case OP_BRAPOS:  
794      case OP_SBRA:      case OP_REF:
795      case OP_SBRAPOS:      case OP_REFI:
796      case OP_SCOND:      common->optimized_cbracket[GET2(cc, 1)] = 0;
797      localspace += sizeof(sljit_w);      cc += 1 + IMM2_SIZE;
     cc += 1 + LINK_SIZE;  
798      break;      break;
799    
800      case OP_CBRAPOS:      case OP_CBRAPOS:
801      case OP_SCBRAPOS:      case OP_SCBRAPOS:
802      localspace += sizeof(sljit_w);      common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] = 0;
803      cc += 1 + LINK_SIZE + 2;      cc += 1 + LINK_SIZE + IMM2_SIZE;
804      break;      break;
805    
806      case OP_COND:      case OP_COND:
     /* Might be a hidden SCOND. */  
     alternative = cc + GET(cc, 1);  
     if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)  
       localspace += sizeof(sljit_w);  
     cc += 1 + LINK_SIZE;  
     break;  
   
     default:  
     cc = next_opcode(common, cc);  
     if (cc == NULL)  
       return -1;  
     break;  
     }  
   }  
 return localspace;  
 }  
   
 static void set_localptrs(compiler_common *common, int localptr, uschar *ccend)  
 {  
 uschar *cc = common->start;  
 uschar *alternative;  
 while (cc < ccend)  
   {  
   switch(*cc)  
     {  
     case OP_ASSERT:  
     case OP_ASSERT_NOT:  
     case OP_ASSERTBACK:  
     case OP_ASSERTBACK_NOT:  
     case OP_ONCE:  
     case OP_BRAPOS:  
     case OP_SBRA:  
     case OP_SBRAPOS:  
807      case OP_SCOND:      case OP_SCOND:
808      common->localptrs[cc - common->start] = localptr;      /* Only AUTO_CALLOUT can insert this opcode. We do
809      localptr += sizeof(sljit_w);         not intend to support this case. */
810        if (cc[1 + LINK_SIZE] == OP_CALLOUT)
811          return FALSE;
812      cc += 1 + LINK_SIZE;      cc += 1 + LINK_SIZE;
813      break;      break;
814    
815      case OP_CBRAPOS:      case OP_CREF:
816      case OP_SCBRAPOS:      common->optimized_cbracket[GET2(cc, 1)] = 0;
817      common->localptrs[cc - common->start] = localptr;      cc += 1 + IMM2_SIZE;
818      localptr += sizeof(sljit_w);      break;
819      cc += 1 + LINK_SIZE + 2;  
820        case OP_DNREF:
821        case OP_DNREFI:
822        case OP_DNCREF:
823        count = GET2(cc, 1 + IMM2_SIZE);
824        slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
825        while (count-- > 0)
826          {
827          common->optimized_cbracket[GET2(slot, 0)] = 0;
828          slot += common->name_entry_size;
829          }
830        cc += 1 + 2 * IMM2_SIZE;
831      break;      break;
832    
833      case OP_COND:      case OP_RECURSE:
834      /* Might be a hidden SCOND. */      /* Set its value only once. */
835      alternative = cc + GET(cc, 1);      if (common->recursive_head_ptr == 0)
     if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)  
836        {        {
837        common->localptrs[cc - common->start] = localptr;        common->recursive_head_ptr = common->ovector_start;
838        localptr += sizeof(sljit_w);        common->ovector_start += sizeof(sljit_sw);
839        }        }
840      cc += 1 + LINK_SIZE;      cc += 1 + LINK_SIZE;
841      break;      break;
842    
843      default:      case OP_CALLOUT:
844      cc = next_opcode(common, cc);      if (common->capture_last_ptr == 0)
845      SLJIT_ASSERT(cc != NULL);        {
846          common->capture_last_ptr = common->ovector_start;
847          common->ovector_start += sizeof(sljit_sw);
848          }
849        cc += 2 + 2 * LINK_SIZE;
850      break;      break;
     }  
   }  
 }  
851    
852  /* Returns with -1 if no need for frame. */      case OP_THEN_ARG:
853  static int get_framesize(compiler_common *common, uschar *cc, BOOL recursive)      common->has_then = TRUE;
854  {      common->control_head_ptr = 1;
855  uschar *ccend = bracketend(cc);      /* Fall through. */
 uschar *end;  
 int length = 0;  
 BOOL possessive = FALSE;  
 BOOL needs_frame = FALSE;  
 BOOL needs_maxindex = FALSE;  
 BOOL setsom_found = FALSE;  
   
 if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))  
   {  
   length = 3 + 2;  
   needs_maxindex = TRUE;  
   possessive = TRUE;  
   }  
856    
857  cc = next_opcode(common, cc);      case OP_PRUNE_ARG:
858  SLJIT_ASSERT(cc != NULL);      case OP_MARK:
859  while (cc < ccend)      if (common->mark_ptr == 0)
   switch(*cc)  
     {  
     case OP_SET_SOM:  
     case OP_RECURSE:  
     if (!setsom_found)  
860        {        {
861        length += 2;        common->mark_ptr = common->ovector_start;
862        setsom_found = TRUE;        common->ovector_start += sizeof(sljit_sw);
863        }        }
864      cc += (*cc == OP_SET_SOM) ? 1 : 1 + LINK_SIZE;      cc += 1 + 2 + cc[1];
865      break;      break;
866    
867      case OP_ASSERT:      case OP_THEN:
868      case OP_ASSERT_NOT:      common->has_then = TRUE;
869      case OP_ASSERTBACK:      common->control_head_ptr = 1;
870      case OP_ASSERTBACK_NOT:      /* Fall through. */
871      case OP_ONCE:  
872      if (needs_frame || length > 0)      case OP_PRUNE:
873        {      case OP_SKIP:
874        cc = bracketend(cc);      cc += 1;
       break;  
       }  
     /* Check whether a frame must be created. */  
     end = bracketend(cc);  
     while (cc < end)  
       {  
       if (*cc == OP_SET_SOM || *cc == OP_CBRA || *cc == OP_CBRAPOS  
           || *cc == OP_SCBRA || *cc == OP_SCBRAPOS || *cc == OP_RECURSE)  
         needs_frame = TRUE;  
       cc = next_opcode(common, cc);  
       SLJIT_ASSERT(cc != NULL);  
       }  
875      break;      break;
876    
877      case OP_CBRA:      case OP_SKIP_ARG:
878      case OP_CBRAPOS:      common->control_head_ptr = 1;
879      case OP_SCBRA:      common->has_skip_arg = TRUE;
880      case OP_SCBRAPOS:      cc += 1 + 2 + cc[1];
     if (!needs_maxindex)  
       {  
       needs_maxindex = TRUE;  
       length += 2;  
       }  
     length += 3;  
     cc += 1 + LINK_SIZE + 2;  
881      break;      break;
882    
883      default:      default:
884      cc = next_opcode(common, cc);      cc = next_opcode(common, cc);
885      SLJIT_ASSERT(cc != NULL);      if (cc == NULL)
886          return FALSE;
887      break;      break;
888      }      }
889      }
890  /* Possessive quantifiers can use a special case. */  return TRUE;
 if (SLJIT_UNLIKELY(possessive) && !needs_frame && length == 3 + 2)  
   return -1;  
   
 if (length > 0)  
   return length + 2;  
 return needs_frame ? 0 : -1;  
891  }  }
892    
893  static void init_frame(compiler_common *common, uschar *cc, int stackpos, int stacktop, BOOL recursive)  static int get_class_iterator_size(pcre_uchar *cc)
894  {  {
895  /* TMP2 must contain STACK_TOP - (-STACK(stackpos)) */  switch(*cc)
896  DEFINE_COMPILER;    {
897  uschar *ccend = bracketend(cc);    case OP_CRSTAR:
898  BOOL needs_maxindex = FALSE;    case OP_CRPLUS:
899  BOOL setsom_found = FALSE;    return 2;
900  int offset;  
901      case OP_CRMINSTAR:
902      case OP_CRMINPLUS:
903      case OP_CRQUERY:
904      case OP_CRMINQUERY:
905      return 1;
906    
907      case OP_CRRANGE:
908      case OP_CRMINRANGE:
909      if (GET2(cc, 1) == GET2(cc, 1 + IMM2_SIZE))
910        return 0;
911      return 2;
912    
913      default:
914      return 0;
915      }
916    }
917    
918    static BOOL detect_repeat(compiler_common *common, pcre_uchar *begin)
919    {
920    pcre_uchar *end = bracketend(begin);
921    pcre_uchar *next;
922    pcre_uchar *next_end;
923    pcre_uchar *max_end;
924    pcre_uchar type;
925    sljit_sw length = end - begin;
926    int min, max, i;
927    
928    /* Detect fixed iterations first. */
929    if (end[-(1 + LINK_SIZE)] != OP_KET)
930      return FALSE;
931    
932    /* Already detected repeat. */
933    if (common->private_data_ptrs[end - common->start - LINK_SIZE] != 0)
934      return TRUE;
935    
936  if (stackpos < stacktop)  next = end;
937    min = 1;
938    while (1)
939    {    {
940    SLJIT_ASSERT(stackpos + 1 == stacktop);    if (*next != *begin)
941    return;      break;
942      next_end = bracketend(next);
943      if (next_end - next != length || memcmp(begin, next, IN_UCHARS(length)) != 0)
944        break;
945      next = next_end;
946      min++;
947    }    }
948    
949  stackpos = STACK(stackpos);  if (min == 2)
950  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS_HEAD);    return FALSE;
 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS_HEAD, TMP2, 0);  
 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacktop), TMP1, 0);  
951    
952  if (recursive || (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS))  max = 0;
953    cc = next_opcode(common, cc);  max_end = next;
954  SLJIT_ASSERT(cc != NULL);  if (*next == OP_BRAZERO || *next == OP_BRAMINZERO)
955  while (cc < ccend)    {
956    switch(*cc)    type = *next;
957      while (1)
958      {      {
959      case OP_SET_SOM:      if (next[0] != type || next[1] != OP_BRA || next[2 + LINK_SIZE] != *begin)
960      case OP_RECURSE:        break;
961      if (!setsom_found)      next_end = bracketend(next + 2 + LINK_SIZE);
962        if (next_end - next != (length + 2 + LINK_SIZE) || memcmp(begin, next + 2 + LINK_SIZE, IN_UCHARS(length)) != 0)
963          break;
964        next = next_end;
965        max++;
966        }
967    
968      if (next[0] == type && next[1] == *begin && max >= 1)
969        {
970        next_end = bracketend(next + 1);
971        if (next_end - next == (length + 1) && memcmp(begin, next + 1, IN_UCHARS(length)) == 0)
972        {        {
973        OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));        for (i = 0; i < max; i++, next_end += 1 + LINK_SIZE)
974        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_setstrbegin);          if (*next_end != OP_KET)
975        stackpos += (int)sizeof(sljit_w);            break;
976        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);  
977        stackpos += (int)sizeof(sljit_w);        if (i == max)
978        setsom_found = TRUE;          {
979            common->private_data_ptrs[max_end - common->start - LINK_SIZE] = next_end - max_end;
980            common->private_data_ptrs[max_end - common->start - LINK_SIZE + 1] = (type == OP_BRAZERO) ? OP_UPTO : OP_MINUPTO;
981            /* +2 the original and the last. */
982            common->private_data_ptrs[max_end - common->start - LINK_SIZE + 2] = max + 2;
983            if (min == 1)
984              return TRUE;
985            min--;
986            max_end -= (1 + LINK_SIZE) + GET(max_end, -LINK_SIZE);
987            }
988        }        }
989      cc += (*cc == OP_SET_SOM) ? 1 : 1 + LINK_SIZE;      }
990      break;    }
991    
992      case OP_ASSERT:  if (min >= 3)
993      case OP_ASSERT_NOT:    {
994      case OP_ASSERTBACK:    common->private_data_ptrs[end - common->start - LINK_SIZE] = max_end - end;
995      case OP_ASSERTBACK_NOT:    common->private_data_ptrs[end - common->start - LINK_SIZE + 1] = OP_EXACT;
996      case OP_ONCE:    common->private_data_ptrs[end - common->start - LINK_SIZE + 2] = min;
997      cc = bracketend(cc);    return TRUE;
998      break;    }
999    
1000    return FALSE;
1001    }
1002    
1003    #define CASE_ITERATOR_PRIVATE_DATA_1 \
1004        case OP_MINSTAR: \
1005        case OP_MINPLUS: \
1006        case OP_QUERY: \
1007        case OP_MINQUERY: \
1008        case OP_MINSTARI: \
1009        case OP_MINPLUSI: \
1010        case OP_QUERYI: \
1011        case OP_MINQUERYI: \
1012        case OP_NOTMINSTAR: \
1013        case OP_NOTMINPLUS: \
1014        case OP_NOTQUERY: \
1015        case OP_NOTMINQUERY: \
1016        case OP_NOTMINSTARI: \
1017        case OP_NOTMINPLUSI: \
1018        case OP_NOTQUERYI: \
1019        case OP_NOTMINQUERYI:
1020    
1021      case OP_CBRA:  #define CASE_ITERATOR_PRIVATE_DATA_2A \
1022      case OP_CBRAPOS:      case OP_STAR: \
1023      case OP_SCBRA:      case OP_PLUS: \
1024      case OP_SCBRAPOS:      case OP_STARI: \
1025      if (!needs_maxindex)      case OP_PLUSI: \
1026        {      case OP_NOTSTAR: \
1027        OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), MAX_INDEX);      case OP_NOTPLUS: \
1028        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_setmaxindex);      case OP_NOTSTARI: \
1029        stackpos += (int)sizeof(sljit_w);      case OP_NOTPLUSI:
       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);  
       stackpos += (int)sizeof(sljit_w);  
       needs_maxindex = TRUE;  
       }  
     offset = (GET2(cc, 1 + LINK_SIZE)) << 1;  
     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));  
     stackpos += (int)sizeof(sljit_w);  
     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));  
     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));  
     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);  
     stackpos += (int)sizeof(sljit_w);  
     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);  
     stackpos += (int)sizeof(sljit_w);  
1030    
1031      cc += 1 + LINK_SIZE + 2;  #define CASE_ITERATOR_PRIVATE_DATA_2B \
1032      break;      case OP_UPTO: \
1033        case OP_MINUPTO: \
1034        case OP_UPTOI: \
1035        case OP_MINUPTOI: \
1036        case OP_NOTUPTO: \
1037        case OP_NOTMINUPTO: \
1038        case OP_NOTUPTOI: \
1039        case OP_NOTMINUPTOI:
1040    
1041      default:  #define CASE_ITERATOR_TYPE_PRIVATE_DATA_1 \
1042      cc = next_opcode(common, cc);      case OP_TYPEMINSTAR: \
1043      SLJIT_ASSERT(cc != NULL);      case OP_TYPEMINPLUS: \
1044      break;      case OP_TYPEQUERY: \
1045      }      case OP_TYPEMINQUERY:
1046    
1047  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_end);  #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2A \
1048  SLJIT_ASSERT(stackpos == STACK(stacktop + 1));      case OP_TYPESTAR: \
1049  }      case OP_TYPEPLUS:
1050    
1051    #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2B \
1052        case OP_TYPEUPTO: \
1053        case OP_TYPEMINUPTO:
1054    
1055  static SLJIT_INLINE int get_localsize(compiler_common *common, uschar *cc, uschar *ccend)  static void set_private_data_ptrs(compiler_common *common, int *private_data_start, pcre_uchar *ccend)
1056  {  {
1057  int localsize = 2;  pcre_uchar *cc = common->start;
1058  uschar *alternative;  pcre_uchar *alternative;
1059  /* Calculate the sum of the local variables. */  pcre_uchar *end = NULL;
1060    int private_data_ptr = *private_data_start;
1061    int space, size, bracketlen;
1062    BOOL repeat_check = TRUE;
1063    
1064  while (cc < ccend)  while (cc < ccend)
1065    {    {
1066      space = 0;
1067      size = 0;
1068      bracketlen = 0;
1069      if (private_data_ptr > SLJIT_MAX_LOCAL_SIZE)
1070        break;
1071    
1072      if (repeat_check && (*cc == OP_ONCE || *cc == OP_ONCE_NC || *cc == OP_BRA || *cc == OP_CBRA || *cc == OP_COND))
1073        {
1074        if (detect_repeat(common, cc))
1075          {
1076          /* These brackets are converted to repeats, so no global
1077          based single character repeat is allowed. */
1078          if (cc >= end)
1079            end = bracketend(cc);
1080          }
1081        }
1082      repeat_check = TRUE;
1083    
1084    switch(*cc)    switch(*cc)
1085      {      {
1086        case OP_KET:
1087        if (common->private_data_ptrs[cc + 1 - common->start] != 0)
1088          {
1089          common->private_data_ptrs[cc - common->start] = private_data_ptr;
1090          private_data_ptr += sizeof(sljit_sw);
1091          cc += common->private_data_ptrs[cc + 1 - common->start];
1092          }
1093        cc += 1 + LINK_SIZE;
1094        break;
1095    
1096      case OP_ASSERT:      case OP_ASSERT:
1097      case OP_ASSERT_NOT:      case OP_ASSERT_NOT:
1098      case OP_ASSERTBACK:      case OP_ASSERTBACK:
1099      case OP_ASSERTBACK_NOT:      case OP_ASSERTBACK_NOT:
1100      case OP_ONCE:      case OP_ONCE:
1101        case OP_ONCE_NC:
1102      case OP_BRAPOS:      case OP_BRAPOS:
1103      case OP_SBRA:      case OP_SBRA:
1104      case OP_SBRAPOS:      case OP_SBRAPOS:
1105      case OP_SCOND:      case OP_SCOND:
1106      localsize++;      common->private_data_ptrs[cc - common->start] = private_data_ptr;
1107      cc += 1 + LINK_SIZE;      private_data_ptr += sizeof(sljit_sw);
1108      break;      bracketlen = 1 + LINK_SIZE;
   
     case OP_CBRA:  
     case OP_SCBRA:  
     localsize++;  
     cc += 1 + LINK_SIZE + 2;  
1109      break;      break;
1110    
1111      case OP_CBRAPOS:      case OP_CBRAPOS:
1112      case OP_SCBRAPOS:      case OP_SCBRAPOS:
1113      localsize += 2;      common->private_data_ptrs[cc - common->start] = private_data_ptr;
1114      cc += 1 + LINK_SIZE + 2;      private_data_ptr += sizeof(sljit_sw);
1115        bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1116      break;      break;
1117    
1118      case OP_COND:      case OP_COND:
1119      /* Might be a hidden SCOND. */      /* Might be a hidden SCOND. */
1120      alternative = cc + GET(cc, 1);      alternative = cc + GET(cc, 1);
1121      if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)      if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1122        localsize++;        {
1123      cc += 1 + LINK_SIZE;        common->private_data_ptrs[cc - common->start] = private_data_ptr;
1124          private_data_ptr += sizeof(sljit_sw);
1125          }
1126        bracketlen = 1 + LINK_SIZE;
1127        break;
1128    
1129        case OP_BRA:
1130        bracketlen = 1 + LINK_SIZE;
1131        break;
1132    
1133        case OP_CBRA:
1134        case OP_SCBRA:
1135        bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1136        break;
1137    
1138        case OP_BRAZERO:
1139        case OP_BRAMINZERO:
1140        case OP_BRAPOSZERO:
1141        repeat_check = FALSE;
1142        size = 1;
1143        break;
1144    
1145        CASE_ITERATOR_PRIVATE_DATA_1
1146        space = 1;
1147        size = -2;
1148        break;
1149    
1150        CASE_ITERATOR_PRIVATE_DATA_2A
1151        space = 2;
1152        size = -2;
1153        break;
1154    
1155        CASE_ITERATOR_PRIVATE_DATA_2B
1156        space = 2;
1157        size = -(2 + IMM2_SIZE);
1158        break;
1159    
1160        CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1161        space = 1;
1162        size = 1;
1163        break;
1164    
1165        CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1166        if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
1167          space = 2;
1168        size = 1;
1169        break;
1170    
1171        case OP_TYPEUPTO:
1172        if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
1173          space = 2;
1174        size = 1 + IMM2_SIZE;
1175      break;      break;
1176    
1177        case OP_TYPEMINUPTO:
1178        space = 2;
1179        size = 1 + IMM2_SIZE;
1180        break;
1181    
1182        case OP_CLASS:
1183        case OP_NCLASS:
1184        size += 1 + 32 / sizeof(pcre_uchar);
1185        space = get_class_iterator_size(cc + size);
1186        break;
1187    
1188    #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1189        case OP_XCLASS:
1190        size = GET(cc, 1);
1191        space = get_class_iterator_size(cc + size);
1192        break;
1193    #endif
1194    
1195      default:      default:
1196      cc = next_opcode(common, cc);      cc = next_opcode(common, cc);
1197      SLJIT_ASSERT(cc != NULL);      SLJIT_ASSERT(cc != NULL);
1198      break;      break;
1199      }      }
1200    
1201      /* Character iterators, which are not inside a repeated bracket,
1202         gets a private slot instead of allocating it on the stack. */
1203      if (space > 0 && cc >= end)
1204        {
1205        common->private_data_ptrs[cc - common->start] = private_data_ptr;
1206        private_data_ptr += sizeof(sljit_sw) * space;
1207        }
1208    
1209      if (size != 0)
1210        {
1211        if (size < 0)
1212          {
1213          cc += -size;
1214    #ifdef SUPPORT_UTF
1215          if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1216    #endif
1217          }
1218        else
1219          cc += size;
1220        }
1221    
1222      if (bracketlen > 0)
1223        {
1224        if (cc >= end)
1225          {
1226          end = bracketend(cc);
1227          if (end[-1 - LINK_SIZE] == OP_KET)
1228            end = NULL;
1229          }
1230        cc += bracketlen;
1231        }
1232    }    }
1233  SLJIT_ASSERT(cc == ccend);  *private_data_start = private_data_ptr;
 return localsize;  
1234  }  }
1235    
1236  static void copy_locals(compiler_common *common, uschar *cc, uschar *ccend,  /* Returns with a frame_types (always < 0) if no need for frame. */
1237    BOOL save, int stackptr, int stacktop)  static int get_framesize(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL recursive, BOOL *needs_control_head)
1238  {  {
1239  DEFINE_COMPILER;  int length = 0;
1240  int srcw[2];  int possessive = 0;
1241  int count;  BOOL stack_restore = FALSE;
1242  BOOL tmp1next = TRUE;  BOOL setsom_found = recursive;
1243  BOOL tmp1empty = TRUE;  BOOL setmark_found = recursive;
1244  BOOL tmp2empty = TRUE;  /* The last capture is a local variable even for recursions. */
1245  uschar *alternative;  BOOL capture_last_found = FALSE;
1246  enum {  
1247    start,  #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
1248    loop,  SLJIT_ASSERT(common->control_head_ptr != 0);
1249    end  *needs_control_head = TRUE;
1250  } status;  #else
1251    *needs_control_head = FALSE;
1252  status = save ? start : loop;  #endif
 stackptr = STACK(stackptr - 2);  
 stacktop = STACK(stacktop - 1);  
1253    
1254  if (!save)  if (ccend == NULL)
1255    {    {
1256    stackptr += sizeof(sljit_w);    ccend = bracketend(cc) - (1 + LINK_SIZE);
1257    if (stackptr < stacktop)    if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))
     {  
     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);  
     stackptr += sizeof(sljit_w);  
     tmp1empty = FALSE;  
     }  
   if (stackptr < stacktop)  
1258      {      {
1259      OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);      possessive = length = (common->capture_last_ptr != 0) ? 5 : 3;
1260      stackptr += sizeof(sljit_w);      /* This is correct regardless of common->capture_last_ptr. */
1261      tmp2empty = FALSE;      capture_last_found = TRUE;
1262      }      }
1263    /* The tmp1next must be TRUE in either way. */    cc = next_opcode(common, cc);
1264    }    }
1265    
1266  while (status != end)  SLJIT_ASSERT(cc != NULL);
1267    {  while (cc < ccend)
1268    count = 0;    switch(*cc)
   switch(status)  
1269      {      {
1270      case start:      case OP_SET_SOM:
1271      SLJIT_ASSERT(save);      SLJIT_ASSERT(common->has_set_som);
1272      count = 1;      stack_restore = TRUE;
1273      srcw[0] = RECURSIVE_HEAD;      if (!setsom_found)
1274      status = loop;        {
1275          length += 2;
1276          setsom_found = TRUE;
1277          }
1278        cc += 1;
1279      break;      break;
1280    
1281      case loop:      case OP_MARK:
1282      if (cc >= ccend)      case OP_PRUNE_ARG:
1283        case OP_THEN_ARG:
1284        SLJIT_ASSERT(common->mark_ptr != 0);
1285        stack_restore = TRUE;
1286        if (!setmark_found)
1287        {        {
1288        status = end;        length += 2;
1289        break;        setmark_found = TRUE;
1290        }        }
1291        if (common->control_head_ptr != 0)
1292          *needs_control_head = TRUE;
1293        cc += 1 + 2 + cc[1];
1294        break;
1295    
1296      switch(*cc)      case OP_RECURSE:
1297        stack_restore = TRUE;
1298        if (common->has_set_som && !setsom_found)
1299        {        {
1300        case OP_ASSERT:        length += 2;
1301        case OP_ASSERT_NOT:        setsom_found = TRUE;
1302        case OP_ASSERTBACK:        }
1303        case OP_ASSERTBACK_NOT:      if (common->mark_ptr != 0 && !setmark_found)
1304        case OP_ONCE:        {
1305        case OP_BRAPOS:        length += 2;
1306        case OP_SBRA:        setmark_found = TRUE;
1307        case OP_SBRAPOS:        }
1308        case OP_SCOND:      if (common->capture_last_ptr != 0 && !capture_last_found)
1309        count = 1;        {
1310        srcw[0] = PRIV(cc);        length += 2;
1311        SLJIT_ASSERT(srcw[0] != 0);        capture_last_found = TRUE;
1312        cc += 1 + LINK_SIZE;        }
1313        break;      cc += 1 + LINK_SIZE;
1314        break;
1315    
1316        case OP_CBRA:      case OP_CBRA:
1317        case OP_SCBRA:      case OP_CBRAPOS:
1318        count = 1;      case OP_SCBRA:
1319        srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));      case OP_SCBRAPOS:
1320        cc += 1 + LINK_SIZE + 2;      stack_restore = TRUE;
1321        break;      if (common->capture_last_ptr != 0 && !capture_last_found)
1322          {
1323          length += 2;
1324          capture_last_found = TRUE;
1325          }
1326        length += 3;
1327        cc += 1 + LINK_SIZE + IMM2_SIZE;
1328        break;
1329    
1330        case OP_CBRAPOS:      case OP_THEN:
1331        case OP_SCBRAPOS:      stack_restore = TRUE;
1332        count = 2;      if (common->control_head_ptr != 0)
1333        srcw[1] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));        *needs_control_head = TRUE;
1334        srcw[0] = PRIV(cc);      cc ++;
1335        SLJIT_ASSERT(srcw[0] != 0);      break;
       cc += 1 + LINK_SIZE + 2;  
       break;  
1336    
1337        case OP_COND:      default:
1338        /* Might be a hidden SCOND. */      stack_restore = TRUE;
1339        alternative = cc + GET(cc, 1);      /* Fall through. */
       if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)  
         {  
         count = 1;  
         srcw[0] = PRIV(cc);  
         SLJIT_ASSERT(srcw[0] != 0);  
         }  
       cc += 1 + LINK_SIZE;  
       break;  
1340    
1341        default:      case OP_NOT_WORD_BOUNDARY:
1342        cc = next_opcode(common, cc);      case OP_WORD_BOUNDARY:
1343        SLJIT_ASSERT(cc != NULL);      case OP_NOT_DIGIT:
1344        break;      case OP_DIGIT:
1345        }      case OP_NOT_WHITESPACE:
1346      break;      case OP_WHITESPACE:
1347        case OP_NOT_WORDCHAR:
1348        case OP_WORDCHAR:
1349        case OP_ANY:
1350        case OP_ALLANY:
1351        case OP_ANYBYTE:
1352        case OP_NOTPROP:
1353        case OP_PROP:
1354        case OP_ANYNL:
1355        case OP_NOT_HSPACE:
1356        case OP_HSPACE:
1357        case OP_NOT_VSPACE:
1358        case OP_VSPACE:
1359        case OP_EXTUNI:
1360        case OP_EODN:
1361        case OP_EOD:
1362        case OP_CIRC:
1363        case OP_CIRCM:
1364        case OP_DOLL:
1365        case OP_DOLLM:
1366        case OP_CHAR:
1367        case OP_CHARI:
1368        case OP_NOT:
1369        case OP_NOTI:
1370    
1371      case end:      case OP_EXACT:
1372      SLJIT_ASSERT_STOP();      case OP_POSSTAR:
1373        case OP_POSPLUS:
1374        case OP_POSQUERY:
1375        case OP_POSUPTO:
1376    
1377        case OP_EXACTI:
1378        case OP_POSSTARI:
1379        case OP_POSPLUSI:
1380        case OP_POSQUERYI:
1381        case OP_POSUPTOI:
1382    
1383        case OP_NOTEXACT:
1384        case OP_NOTPOSSTAR:
1385        case OP_NOTPOSPLUS:
1386        case OP_NOTPOSQUERY:
1387        case OP_NOTPOSUPTO:
1388    
1389        case OP_NOTEXACTI:
1390        case OP_NOTPOSSTARI:
1391        case OP_NOTPOSPLUSI:
1392        case OP_NOTPOSQUERYI:
1393        case OP_NOTPOSUPTOI:
1394    
1395        case OP_TYPEEXACT:
1396        case OP_TYPEPOSSTAR:
1397        case OP_TYPEPOSPLUS:
1398        case OP_TYPEPOSQUERY:
1399        case OP_TYPEPOSUPTO:
1400    
1401        case OP_CLASS:
1402        case OP_NCLASS:
1403        case OP_XCLASS:
1404        case OP_CALLOUT:
1405    
1406        cc = next_opcode(common, cc);
1407        SLJIT_ASSERT(cc != NULL);
1408      break;      break;
1409      }      }
1410    
1411    while (count > 0)  /* Possessive quantifiers can use a special case. */
1412    if (SLJIT_UNLIKELY(possessive == length))
1413      return stack_restore ? no_frame : no_stack;
1414    
1415    if (length > 0)
1416      return length + 1;
1417    return stack_restore ? no_frame : no_stack;
1418    }
1419    
1420    static void init_frame(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, int stackpos, int stacktop, BOOL recursive)
1421    {
1422    DEFINE_COMPILER;
1423    BOOL setsom_found = recursive;
1424    BOOL setmark_found = recursive;
1425    /* The last capture is a local variable even for recursions. */
1426    BOOL capture_last_found = FALSE;
1427    int offset;
1428    
1429    /* >= 1 + shortest item size (2) */
1430    SLJIT_UNUSED_ARG(stacktop);
1431    SLJIT_ASSERT(stackpos >= stacktop + 2);
1432    
1433    stackpos = STACK(stackpos);
1434    if (ccend == NULL)
1435      {
1436      ccend = bracketend(cc) - (1 + LINK_SIZE);
1437      if (recursive || (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS))
1438        cc = next_opcode(common, cc);
1439      }
1440    
1441    SLJIT_ASSERT(cc != NULL);
1442    while (cc < ccend)
1443      switch(*cc)
1444      {      {
1445      count--;      case OP_SET_SOM:
1446      if (save)      SLJIT_ASSERT(common->has_set_som);
1447        if (!setsom_found)
1448        {        {
1449        if (tmp1next)        OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
1450          {        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1451          if (!tmp1empty)        stackpos += (int)sizeof(sljit_sw);
1452            {        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1453            OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);        stackpos += (int)sizeof(sljit_sw);
1454            stackptr += sizeof(sljit_w);        setsom_found = TRUE;
           }  
         OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);  
         tmp1empty = FALSE;  
         tmp1next = FALSE;  
         }  
       else  
         {  
         if (!tmp2empty)  
           {  
           OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);  
           stackptr += sizeof(sljit_w);  
           }  
         OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);  
         tmp2empty = FALSE;  
         tmp1next = TRUE;  
         }  
1455        }        }
1456      else      cc += 1;
1457        break;
1458    
1459        case OP_MARK:
1460        case OP_PRUNE_ARG:
1461        case OP_THEN_ARG:
1462        SLJIT_ASSERT(common->mark_ptr != 0);
1463        if (!setmark_found)
1464          {
1465          OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
1466          OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1467          stackpos += (int)sizeof(sljit_sw);
1468          OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1469          stackpos += (int)sizeof(sljit_sw);
1470          setmark_found = TRUE;
1471          }
1472        cc += 1 + 2 + cc[1];
1473        break;
1474    
1475        case OP_RECURSE:
1476        if (common->has_set_som && !setsom_found)
1477        {        {
1478        if (tmp1next)        OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
1479          {        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1480          SLJIT_ASSERT(!tmp1empty);        stackpos += (int)sizeof(sljit_sw);
1481          OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP1, 0);        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1482          tmp1empty = stackptr >= stacktop;        stackpos += (int)sizeof(sljit_sw);
1483          if (!tmp1empty)        setsom_found = TRUE;
1484            {        }
1485            OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);      if (common->mark_ptr != 0 && !setmark_found)
1486            stackptr += sizeof(sljit_w);        {
1487            }        OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
1488          tmp1next = FALSE;        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1489          }        stackpos += (int)sizeof(sljit_sw);
1490        else        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1491          {        stackpos += (int)sizeof(sljit_sw);
1492          SLJIT_ASSERT(!tmp2empty);        setmark_found = TRUE;
1493          OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP2, 0);        }
1494          tmp2empty = stackptr >= stacktop;      if (common->capture_last_ptr != 0 && !capture_last_found)
1495          if (!tmp2empty)        {
1496            {        OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
1497            OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1498            stackptr += sizeof(sljit_w);        stackpos += (int)sizeof(sljit_sw);
1499            }        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1500          tmp1next = TRUE;        stackpos += (int)sizeof(sljit_sw);
1501          }        capture_last_found = TRUE;
1502          }
1503        cc += 1 + LINK_SIZE;
1504        break;
1505    
1506        case OP_CBRA:
1507        case OP_CBRAPOS:
1508        case OP_SCBRA:
1509        case OP_SCBRAPOS:
1510        if (common->capture_last_ptr != 0 && !capture_last_found)
1511          {
1512          OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
1513          OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1514          stackpos += (int)sizeof(sljit_sw);
1515          OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1516          stackpos += (int)sizeof(sljit_sw);
1517          capture_last_found = TRUE;
1518          }
1519        offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
1520        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));
1521        stackpos += (int)sizeof(sljit_sw);
1522        OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
1523        OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
1524        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1525        stackpos += (int)sizeof(sljit_sw);
1526        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
1527        stackpos += (int)sizeof(sljit_sw);
1528    
1529        cc += 1 + LINK_SIZE + IMM2_SIZE;
1530        break;
1531    
1532        default:
1533        cc = next_opcode(common, cc);
1534        SLJIT_ASSERT(cc != NULL);
1535        break;
1536        }
1537    
1538    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, 0);
1539    SLJIT_ASSERT(stackpos == STACK(stacktop));
1540    }
1541    
1542    static SLJIT_INLINE int get_private_data_copy_length(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL needs_control_head)
1543    {
1544    int private_data_length = needs_control_head ? 3 : 2;
1545    int size;
1546    pcre_uchar *alternative;
1547    /* Calculate the sum of the private machine words. */
1548    while (cc < ccend)
1549      {
1550      size = 0;
1551      switch(*cc)
1552        {
1553        case OP_KET:
1554        if (PRIVATE_DATA(cc) != 0)
1555          {
1556          private_data_length++;
1557          SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0);
1558          cc += PRIVATE_DATA(cc + 1);
1559        }        }
1560        cc += 1 + LINK_SIZE;
1561        break;
1562    
1563        case OP_ASSERT:
1564        case OP_ASSERT_NOT:
1565        case OP_ASSERTBACK:
1566        case OP_ASSERTBACK_NOT:
1567        case OP_ONCE:
1568        case OP_ONCE_NC:
1569        case OP_BRAPOS:
1570        case OP_SBRA:
1571        case OP_SBRAPOS:
1572        case OP_SCOND:
1573        private_data_length++;
1574        SLJIT_ASSERT(PRIVATE_DATA(cc) != 0);
1575        cc += 1 + LINK_SIZE;
1576        break;
1577    
1578        case OP_CBRA:
1579        case OP_SCBRA:
1580        if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1581          private_data_length++;
1582        cc += 1 + LINK_SIZE + IMM2_SIZE;
1583        break;
1584    
1585        case OP_CBRAPOS:
1586        case OP_SCBRAPOS:
1587        private_data_length += 2;
1588        cc += 1 + LINK_SIZE + IMM2_SIZE;
1589        break;
1590    
1591        case OP_COND:
1592        /* Might be a hidden SCOND. */
1593        alternative = cc + GET(cc, 1);
1594        if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1595          private_data_length++;
1596        cc += 1 + LINK_SIZE;
1597        break;
1598    
1599        CASE_ITERATOR_PRIVATE_DATA_1
1600        if (PRIVATE_DATA(cc))
1601          private_data_length++;
1602        cc += 2;
1603    #ifdef SUPPORT_UTF
1604        if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1605    #endif
1606        break;
1607    
1608        CASE_ITERATOR_PRIVATE_DATA_2A
1609        if (PRIVATE_DATA(cc))
1610          private_data_length += 2;
1611        cc += 2;
1612    #ifdef SUPPORT_UTF
1613        if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1614    #endif
1615        break;
1616    
1617        CASE_ITERATOR_PRIVATE_DATA_2B
1618        if (PRIVATE_DATA(cc))
1619          private_data_length += 2;
1620        cc += 2 + IMM2_SIZE;
1621    #ifdef SUPPORT_UTF
1622        if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1623    #endif
1624        break;
1625    
1626        CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1627        if (PRIVATE_DATA(cc))
1628          private_data_length++;
1629        cc += 1;
1630        break;
1631    
1632        CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1633        if (PRIVATE_DATA(cc))
1634          private_data_length += 2;
1635        cc += 1;
1636        break;
1637    
1638        CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1639        if (PRIVATE_DATA(cc))
1640          private_data_length += 2;
1641        cc += 1 + IMM2_SIZE;
1642        break;
1643    
1644        case OP_CLASS:
1645        case OP_NCLASS:
1646    #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1647        case OP_XCLASS:
1648        size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1649    #else
1650        size = 1 + 32 / (int)sizeof(pcre_uchar);
1651    #endif
1652        if (PRIVATE_DATA(cc))
1653          private_data_length += get_class_iterator_size(cc + size);
1654        cc += size;
1655        break;
1656    
1657        default:
1658        cc = next_opcode(common, cc);
1659        SLJIT_ASSERT(cc != NULL);
1660        break;
1661      }      }
1662    }    }
1663    SLJIT_ASSERT(cc == ccend);
1664    return private_data_length;
1665    }
1666    
1667    static void copy_private_data(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend,
1668      BOOL save, int stackptr, int stacktop, BOOL needs_control_head)
1669    {
1670    DEFINE_COMPILER;
1671    int srcw[2];
1672    int count, size;
1673    BOOL tmp1next = TRUE;
1674    BOOL tmp1empty = TRUE;
1675    BOOL tmp2empty = TRUE;
1676    pcre_uchar *alternative;
1677    enum {
1678      start,
1679      loop,
1680      end
1681    } status;
1682    
1683    status = save ? start : loop;
1684    stackptr = STACK(stackptr - 2);
1685    stacktop = STACK(stacktop - 1);
1686    
1687    if (!save)
1688      {
1689      stackptr += (needs_control_head ? 2 : 1) * sizeof(sljit_sw);
1690      if (stackptr < stacktop)
1691        {
1692        OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1693        stackptr += sizeof(sljit_sw);
1694        tmp1empty = FALSE;
1695        }
1696      if (stackptr < stacktop)
1697        {
1698        OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1699        stackptr += sizeof(sljit_sw);
1700        tmp2empty = FALSE;
1701        }
1702      /* The tmp1next must be TRUE in either way. */
1703      }
1704    
1705    do
1706      {
1707      count = 0;
1708      switch(status)
1709        {
1710        case start:
1711        SLJIT_ASSERT(save && common->recursive_head_ptr != 0);
1712        count = 1;
1713        srcw[0] = common->recursive_head_ptr;
1714        if (needs_control_head)
1715          {
1716          SLJIT_ASSERT(common->control_head_ptr != 0);
1717          count = 2;
1718          srcw[1] = common->control_head_ptr;
1719          }
1720        status = loop;
1721        break;
1722    
1723        case loop:
1724        if (cc >= ccend)
1725          {
1726          status = end;
1727          break;
1728          }
1729    
1730        switch(*cc)
1731          {
1732          case OP_KET:
1733          if (PRIVATE_DATA(cc) != 0)
1734            {
1735            count = 1;
1736            srcw[0] = PRIVATE_DATA(cc);
1737            SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0);
1738            cc += PRIVATE_DATA(cc + 1);
1739            }
1740          cc += 1 + LINK_SIZE;
1741          break;
1742    
1743          case OP_ASSERT:
1744          case OP_ASSERT_NOT:
1745          case OP_ASSERTBACK:
1746          case OP_ASSERTBACK_NOT:
1747          case OP_ONCE:
1748          case OP_ONCE_NC:
1749          case OP_BRAPOS:
1750          case OP_SBRA:
1751          case OP_SBRAPOS:
1752          case OP_SCOND:
1753          count = 1;
1754          srcw[0] = PRIVATE_DATA(cc);
1755          SLJIT_ASSERT(srcw[0] != 0);
1756          cc += 1 + LINK_SIZE;
1757          break;
1758    
1759          case OP_CBRA:
1760          case OP_SCBRA:
1761          if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1762            {
1763            count = 1;
1764            srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1765            }
1766          cc += 1 + LINK_SIZE + IMM2_SIZE;
1767          break;
1768    
1769          case OP_CBRAPOS:
1770          case OP_SCBRAPOS:
1771          count = 2;
1772          srcw[0] = PRIVATE_DATA(cc);
1773          srcw[1] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1774          SLJIT_ASSERT(srcw[0] != 0 && srcw[1] != 0);
1775          cc += 1 + LINK_SIZE + IMM2_SIZE;
1776          break;
1777    
1778          case OP_COND:
1779          /* Might be a hidden SCOND. */
1780          alternative = cc + GET(cc, 1);
1781          if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1782            {
1783            count = 1;
1784            srcw[0] = PRIVATE_DATA(cc);
1785            SLJIT_ASSERT(srcw[0] != 0);
1786            }
1787          cc += 1 + LINK_SIZE;
1788          break;
1789    
1790          CASE_ITERATOR_PRIVATE_DATA_1
1791          if (PRIVATE_DATA(cc))
1792            {
1793            count = 1;
1794            srcw[0] = PRIVATE_DATA(cc);
1795            }
1796          cc += 2;
1797    #ifdef SUPPORT_UTF
1798          if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1799    #endif
1800          break;
1801    
1802          CASE_ITERATOR_PRIVATE_DATA_2A
1803          if (PRIVATE_DATA(cc))
1804            {
1805            count = 2;
1806            srcw[0] = PRIVATE_DATA(cc);
1807            srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
1808            }
1809          cc += 2;
1810    #ifdef SUPPORT_UTF
1811          if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1812    #endif
1813          break;
1814    
1815          CASE_ITERATOR_PRIVATE_DATA_2B
1816          if (PRIVATE_DATA(cc))
1817            {
1818            count = 2;
1819            srcw[0] = PRIVATE_DATA(cc);
1820            srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
1821            }
1822          cc += 2 + IMM2_SIZE;
1823    #ifdef SUPPORT_UTF
1824          if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1825    #endif
1826          break;
1827    
1828          CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1829          if (PRIVATE_DATA(cc))
1830            {
1831            count = 1;
1832            srcw[0] = PRIVATE_DATA(cc);
1833            }
1834          cc += 1;
1835          break;
1836    
1837          CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1838          if (PRIVATE_DATA(cc))
1839            {
1840            count = 2;
1841            srcw[0] = PRIVATE_DATA(cc);
1842            srcw[1] = srcw[0] + sizeof(sljit_sw);
1843            }
1844          cc += 1;
1845          break;
1846    
1847          CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1848          if (PRIVATE_DATA(cc))
1849            {
1850            count = 2;
1851            srcw[0] = PRIVATE_DATA(cc);
1852            srcw[1] = srcw[0] + sizeof(sljit_sw);
1853            }
1854          cc += 1 + IMM2_SIZE;
1855          break;
1856    
1857          case OP_CLASS:
1858          case OP_NCLASS:
1859    #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1860          case OP_XCLASS:
1861          size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1862    #else
1863          size = 1 + 32 / (int)sizeof(pcre_uchar);
1864    #endif
1865          if (PRIVATE_DATA(cc))
1866            switch(get_class_iterator_size(cc + size))
1867              {
1868              case 1:
1869              count = 1;
1870              srcw[0] = PRIVATE_DATA(cc);
1871              break;
1872    
1873              case 2:
1874              count = 2;
1875              srcw[0] = PRIVATE_DATA(cc);
1876              srcw[1] = srcw[0] + sizeof(sljit_sw);
1877              break;
1878    
1879              default:
1880              SLJIT_ASSERT_STOP();
1881              break;
1882              }
1883          cc += size;
1884          break;
1885    
1886          default:
1887          cc = next_opcode(common, cc);
1888          SLJIT_ASSERT(cc != NULL);
1889          break;
1890          }
1891        break;
1892    
1893        case end:
1894        SLJIT_ASSERT_STOP();
1895        break;
1896        }
1897    
1898      while (count > 0)
1899        {
1900        count--;
1901        if (save)
1902          {
1903          if (tmp1next)
1904            {
1905            if (!tmp1empty)
1906              {
1907              OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1908              stackptr += sizeof(sljit_sw);
1909              }
1910            OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), srcw[count]);
1911            tmp1empty = FALSE;
1912            tmp1next = FALSE;
1913            }
1914          else
1915            {
1916            if (!tmp2empty)
1917              {
1918              OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1919              stackptr += sizeof(sljit_sw);
1920              }
1921            OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), srcw[count]);
1922            tmp2empty = FALSE;
1923            tmp1next = TRUE;
1924            }
1925          }
1926        else
1927          {
1928          if (tmp1next)
1929            {
1930            SLJIT_ASSERT(!tmp1empty);
1931            OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), srcw[count], TMP1, 0);
1932            tmp1empty = stackptr >= stacktop;
1933            if (!tmp1empty)
1934              {
1935              OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1936              stackptr += sizeof(sljit_sw);
1937              }
1938            tmp1next = FALSE;
1939            }
1940          else
1941            {
1942            SLJIT_ASSERT(!tmp2empty);
1943            OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), srcw[count], TMP2, 0);
1944            tmp2empty = stackptr >= stacktop;
1945            if (!tmp2empty)
1946              {
1947              OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1948              stackptr += sizeof(sljit_sw);
1949              }
1950            tmp1next = TRUE;
1951            }
1952          }
1953        }
1954      }
1955    while (status != end);
1956    
1957    if (save)
1958      {
1959      if (tmp1next)
1960        {
1961        if (!tmp1empty)
1962          {
1963          OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1964          stackptr += sizeof(sljit_sw);
1965          }
1966        if (!tmp2empty)
1967          {
1968          OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1969          stackptr += sizeof(sljit_sw);
1970          }
1971        }
1972      else
1973        {
1974        if (!tmp2empty)
1975          {
1976          OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1977          stackptr += sizeof(sljit_sw);
1978          }
1979        if (!tmp1empty)
1980          {
1981          OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1982          stackptr += sizeof(sljit_sw);
1983          }
1984        }
1985      }
1986    SLJIT_ASSERT(cc == ccend && stackptr == stacktop && (save || (tmp1empty && tmp2empty)));
1987    }
1988    
1989    static SLJIT_INLINE pcre_uchar *set_then_offsets(compiler_common *common, pcre_uchar *cc, pcre_uint8 *current_offset)
1990    {
1991    pcre_uchar *end = bracketend(cc);
1992    BOOL has_alternatives = cc[GET(cc, 1)] == OP_ALT;
1993    
1994    /* Assert captures then. */
1995    if (*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT)
1996      current_offset = NULL;
1997    /* Conditional block does not. */
1998    if (*cc == OP_COND || *cc == OP_SCOND)
1999      has_alternatives = FALSE;
2000    
2001    cc = next_opcode(common, cc);
2002    if (has_alternatives)
2003      current_offset = common->then_offsets + (cc - common->start);
2004    
2005    while (cc < end)
2006      {
2007      if ((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND))
2008        cc = set_then_offsets(common, cc, current_offset);
2009      else
2010        {
2011        if (*cc == OP_ALT && has_alternatives)
2012          current_offset = common->then_offsets + (cc + 1 + LINK_SIZE - common->start);
2013        if (*cc >= OP_THEN && *cc <= OP_THEN_ARG && current_offset != NULL)
2014          *current_offset = 1;
2015        cc = next_opcode(common, cc);
2016        }
2017      }
2018    
2019    return end;
2020    }
2021    
2022    #undef CASE_ITERATOR_PRIVATE_DATA_1
2023    #undef CASE_ITERATOR_PRIVATE_DATA_2A
2024    #undef CASE_ITERATOR_PRIVATE_DATA_2B
2025    #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_1
2026    #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
2027    #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
2028    
2029    static SLJIT_INLINE BOOL is_powerof2(unsigned int value)
2030    {
2031    return (value & (value - 1)) == 0;
2032    }
2033    
2034    static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label)
2035    {
2036    while (list)
2037      {
2038      /* sljit_set_label is clever enough to do nothing
2039      if either the jump or the label is NULL. */
2040      SET_LABEL(list->jump, label);
2041      list = list->next;
2042      }
2043    }
2044    
2045    static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump *jump)
2046    {
2047    jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list));
2048    if (list_item)
2049      {
2050      list_item->next = *list;
2051      list_item->jump = jump;
2052      *list = list_item;
2053      }
2054    }
2055    
2056    static void add_stub(compiler_common *common, struct sljit_jump *start)
2057    {
2058    DEFINE_COMPILER;
2059    stub_list *list_item = sljit_alloc_memory(compiler, sizeof(stub_list));
2060    
2061    if (list_item)
2062      {
2063      list_item->start = start;
2064      list_item->quit = LABEL();
2065      list_item->next = common->stubs;
2066      common->stubs = list_item;
2067      }
2068    }
2069    
2070    static void flush_stubs(compiler_common *common)
2071    {
2072    DEFINE_COMPILER;
2073    stub_list *list_item = common->stubs;
2074    
2075    while (list_item)
2076      {
2077      JUMPHERE(list_item->start);
2078      add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));
2079      JUMPTO(SLJIT_JUMP, list_item->quit);
2080      list_item = list_item->next;
2081      }
2082    common->stubs = NULL;
2083    }
2084    
2085    static void add_label_addr(compiler_common *common, sljit_uw *update_addr)
2086    {
2087    DEFINE_COMPILER;
2088    label_addr_list *label_addr;
2089    
2090    label_addr = sljit_alloc_memory(compiler, sizeof(label_addr_list));
2091    if (label_addr == NULL)
2092      return;
2093    label_addr->label = LABEL();
2094    label_addr->update_addr = update_addr;
2095    label_addr->next = common->label_addrs;
2096    common->label_addrs = label_addr;
2097    }
2098    
2099    static SLJIT_INLINE void count_match(compiler_common *common)
2100    {
2101    DEFINE_COMPILER;
2102    
2103    OP2(SLJIT_SUB | SLJIT_SET_E, COUNT_MATCH, 0, COUNT_MATCH, 0, SLJIT_IMM, 1);
2104    add_jump(compiler, &common->calllimit, JUMP(SLJIT_ZERO));
2105    }
2106    
2107    static SLJIT_INLINE void allocate_stack(compiler_common *common, int size)
2108    {
2109    /* May destroy all locals and registers except TMP2. */
2110    DEFINE_COMPILER;
2111    
2112    SLJIT_ASSERT(size > 0);
2113    OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
2114    #ifdef DESTROY_REGISTERS
2115    OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);
2116    OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
2117    OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
2118    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, TMP1, 0);
2119    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP1, 0);
2120    #endif
2121    add_stub(common, CMP(SLJIT_GREATER, STACK_TOP, 0, STACK_LIMIT, 0));
2122    }
2123    
2124    static SLJIT_INLINE void free_stack(compiler_common *common, int size)
2125    {
2126    DEFINE_COMPILER;
2127    
2128    SLJIT_ASSERT(size > 0);
2129    OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
2130    }
2131    
2132    static sljit_uw * allocate_read_only_data(compiler_common *common, sljit_uw size)
2133    {
2134    DEFINE_COMPILER;
2135    sljit_uw *result;
2136    
2137    if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
2138      return NULL;
2139    
2140    result = (sljit_uw *)SLJIT_MALLOC(size + sizeof(sljit_uw), compiler->allocator_data);
2141    if (SLJIT_UNLIKELY(result == NULL))
2142      {
2143      sljit_set_compiler_memory_error(compiler);
2144      return NULL;
2145      }
2146    
2147    *(void**)result = common->read_only_data_head;
2148    common->read_only_data_head = (void *)result;
2149    return result + 1;
2150    }
2151    
2152    static void free_read_only_data(void *current, void *allocator_data)
2153    {
2154    void *next;
2155    
2156    SLJIT_UNUSED_ARG(allocator_data);
2157    
2158    while (current != NULL)
2159      {
2160      next = *(void**)current;
2161      SLJIT_FREE(current, allocator_data);
2162      current = next;
2163      }
2164    }
2165    
2166    static SLJIT_INLINE void reset_ovector(compiler_common *common, int length)
2167    {
2168    DEFINE_COMPILER;
2169    struct sljit_label *loop;
2170    int i;
2171    
2172    /* At this point we can freely use all temporary registers. */
2173    SLJIT_ASSERT(length > 1);
2174    /* TMP1 returns with begin - 1. */
2175    OP2(SLJIT_SUB, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_S0), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));
2176    if (length < 8)
2177      {
2178      for (i = 1; i < length; i++)
2179        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), SLJIT_R0, 0);
2180      }
2181    else
2182      {
2183      GET_LOCAL_BASE(SLJIT_R1, 0, OVECTOR_START);
2184      OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, length - 1);
2185      loop = LABEL();
2186      OP1(SLJIT_MOVU, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw), SLJIT_R0, 0);
2187      OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, 1);
2188      JUMPTO(SLJIT_NOT_ZERO, loop);
2189      }
2190    }
2191    
2192    static SLJIT_INLINE void do_reset_match(compiler_common *common, int length)
2193    {
2194    DEFINE_COMPILER;
2195    struct sljit_label *loop;
2196    int i;
2197    
2198    SLJIT_ASSERT(length > 1);
2199    /* OVECTOR(1) contains the "string begin - 1" constant. */
2200    if (length > 2)
2201      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
2202    if (length < 8)
2203      {
2204      for (i = 2; i < length; i++)
2205        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), TMP1, 0);
2206      }
2207    else
2208      {
2209      GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + sizeof(sljit_sw));
2210      OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
2211      loop = LABEL();
2212      OP1(SLJIT_MOVU, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
2213      OP2(SLJIT_SUB | SLJIT_SET_E, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
2214      JUMPTO(SLJIT_NOT_ZERO, loop);
2215      }
2216    
2217    OP1(SLJIT_MOV, STACK_TOP, 0, ARGUMENTS, 0);
2218    if (common->mark_ptr != 0)
2219      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, SLJIT_IMM, 0);
2220    if (common->control_head_ptr != 0)
2221      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
2222    OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(jit_arguments, stack));
2223    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
2224    OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(struct sljit_stack, base));
2225    }
2226    
2227    static sljit_sw SLJIT_CALL do_search_mark(sljit_sw *current, const pcre_uchar *skip_arg)
2228    {
2229    while (current != NULL)
2230      {
2231      switch (current[-2])
2232        {
2233        case type_then_trap:
2234        break;
2235    
2236        case type_mark:
2237        if (STRCMP_UC_UC(skip_arg, (pcre_uchar *)current[-3]) == 0)
2238          return current[-4];
2239        break;
2240    
2241        default:
2242        SLJIT_ASSERT_STOP();
2243        break;
2244        }
2245      SLJIT_ASSERT(current > (sljit_sw*)current[-1]);
2246      current = (sljit_sw*)current[-1];
2247      }
2248    return -1;
2249    }
2250    
2251    static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)
2252    {
2253    DEFINE_COMPILER;
2254    struct sljit_label *loop;
2255    struct sljit_jump *early_quit;
2256    
2257    /* At this point we can freely use all registers. */
2258    OP1(SLJIT_MOV, SLJIT_S2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
2259    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(1), STR_PTR, 0);
2260    
2261    OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0);
2262    if (common->mark_ptr != 0)
2263      OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
2264    OP1(SLJIT_MOV_SI, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, offset_count));
2265    if (common->mark_ptr != 0)
2266      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_R2, 0);
2267    OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, offsets), SLJIT_IMM, sizeof(int));
2268    OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, begin));
2269    GET_LOCAL_BASE(SLJIT_S0, 0, OVECTOR_START);
2270    /* Unlikely, but possible */
2271    early_quit = CMP(SLJIT_EQUAL, SLJIT_R1, 0, SLJIT_IMM, 0);
2272    loop = LABEL();
2273    OP2(SLJIT_SUB, SLJIT_S1, 0, SLJIT_MEM1(SLJIT_S0), 0, SLJIT_R0, 0);
2274    OP2(SLJIT_ADD, SLJIT_S0, 0, SLJIT_S0, 0, SLJIT_IMM, sizeof(sljit_sw));
2275    /* Copy the integer value to the output buffer */
2276    #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2277    OP2(SLJIT_ASHR, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_IMM, UCHAR_SHIFT);
2278    #endif
2279    OP1(SLJIT_MOVU_SI, SLJIT_MEM1(SLJIT_R2), sizeof(int), SLJIT_S1, 0);
2280    OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
2281    JUMPTO(SLJIT_NOT_ZERO, loop);
2282    JUMPHERE(early_quit);
2283    
2284    /* Calculate the return value, which is the maximum ovector value. */
2285    if (topbracket > 1)
2286      {
2287      GET_LOCAL_BASE(SLJIT_R0, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_sw));
2288      OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, topbracket + 1);
2289    
2290      /* OVECTOR(0) is never equal to SLJIT_S2. */
2291      loop = LABEL();
2292      OP1(SLJIT_MOVU, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), -(2 * (sljit_sw)sizeof(sljit_sw)));
2293      OP2(SLJIT_SUB, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
2294      CMPTO(SLJIT_EQUAL, SLJIT_R2, 0, SLJIT_S2, 0, loop);
2295      OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_R1, 0);
2296      }
2297    else
2298      OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
2299    }
2300    
2301    static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *quit)
2302    {
2303    DEFINE_COMPILER;
2304    struct sljit_jump *jump;
2305    
2306    SLJIT_COMPILE_ASSERT(STR_END == SLJIT_S1, str_end_must_be_saved_reg2);
2307    SLJIT_ASSERT(common->start_used_ptr != 0 && common->start_ptr != 0
2308      && (common->mode == JIT_PARTIAL_SOFT_COMPILE ? common->hit_start != 0 : common->hit_start == 0));
2309    
2310    OP1(SLJIT_MOV, SLJIT_R1, 0, ARGUMENTS, 0);
2311    OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_PARTIAL);
2312    OP1(SLJIT_MOV_SI, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, real_offset_count));
2313    CMPTO(SLJIT_SIG_LESS, SLJIT_R2, 0, SLJIT_IMM, 2, quit);
2314    
2315    /* Store match begin and end. */
2316    OP1(SLJIT_MOV, SLJIT_S0, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, begin));
2317    OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, offsets));
2318    
2319    jump = CMP(SLJIT_SIG_LESS, SLJIT_R2, 0, SLJIT_IMM, 3);
2320    OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_ptr : (common->hit_start + (int)sizeof(sljit_sw)), SLJIT_S0, 0);
2321    #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2322    OP2(SLJIT_ASHR, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, UCHAR_SHIFT);
2323    #endif
2324    OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_R1), 2 * sizeof(int), SLJIT_R2, 0);
2325    JUMPHERE(jump);
2326    
2327    OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_used_ptr : common->hit_start);
2328    OP2(SLJIT_SUB, SLJIT_S1, 0, STR_END, 0, SLJIT_S0, 0);
2329    #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2330    OP2(SLJIT_ASHR, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_IMM, UCHAR_SHIFT);
2331    #endif
2332    OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_R1), sizeof(int), SLJIT_S1, 0);
2333    
2334    OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_S0, 0);
2335    #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2336    OP2(SLJIT_ASHR, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, UCHAR_SHIFT);
2337    #endif
2338    OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_R1), 0, SLJIT_R2, 0);
2339    
2340    JUMPTO(SLJIT_JUMP, quit);
2341    }
2342    
2343    static SLJIT_INLINE void check_start_used_ptr(compiler_common *common)
2344    {
2345    /* May destroy TMP1. */
2346    DEFINE_COMPILER;
2347    struct sljit_jump *jump;
2348    
2349    if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2350      {
2351      /* The value of -1 must be kept for start_used_ptr! */
2352      OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, 1);
2353      /* Jumps if start_used_ptr < STR_PTR, or start_used_ptr == -1. Although overwriting
2354      is not necessary if start_used_ptr == STR_PTR, it does not hurt as well. */
2355      jump = CMP(SLJIT_LESS_EQUAL, TMP1, 0, STR_PTR, 0);
2356      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2357      JUMPHERE(jump);
2358      }
2359    else if (common->mode == JIT_PARTIAL_HARD_COMPILE)
2360      {
2361      jump = CMP(SLJIT_LESS_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2362      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2363      JUMPHERE(jump);
2364      }
2365    }
2366    
2367    static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, pcre_uchar *cc)
2368    {
2369    /* Detects if the character has an othercase. */
2370    unsigned int c;
2371    
2372    #ifdef SUPPORT_UTF
2373    if (common->utf)
2374      {
2375      GETCHAR(c, cc);
2376      if (c > 127)
2377        {
2378    #ifdef SUPPORT_UCP
2379        return c != UCD_OTHERCASE(c);
2380    #else
2381        return FALSE;
2382    #endif
2383        }
2384    #ifndef COMPILE_PCRE8
2385      return common->fcc[c] != c;
2386    #endif
2387      }
2388    else
2389    #endif
2390      c = *cc;
2391    return MAX_255(c) ? common->fcc[c] != c : FALSE;
2392    }
2393    
2394    static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)
2395    {
2396    /* Returns with the othercase. */
2397    #ifdef SUPPORT_UTF
2398    if (common->utf && c > 127)
2399      {
2400    #ifdef SUPPORT_UCP
2401      return UCD_OTHERCASE(c);
2402    #else
2403      return c;
2404    #endif
2405      }
2406    #endif
2407    return TABLE_GET(c, common->fcc, c);
2408    }
2409    
2410    static unsigned int char_get_othercase_bit(compiler_common *common, pcre_uchar *cc)
2411    {
2412    /* Detects if the character and its othercase has only 1 bit difference. */
2413    unsigned int c, oc, bit;
2414    #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2415    int n;
2416    #endif
2417    
2418    #ifdef SUPPORT_UTF
2419    if (common->utf)
2420      {
2421      GETCHAR(c, cc);
2422      if (c <= 127)
2423        oc = common->fcc[c];
2424      else
2425        {
2426    #ifdef SUPPORT_UCP
2427        oc = UCD_OTHERCASE(c);
2428    #else
2429        oc = c;
2430    #endif
2431        }
2432      }
2433    else
2434      {
2435      c = *cc;
2436      oc = TABLE_GET(c, common->fcc, c);
2437      }
2438    #else
2439    c = *cc;
2440    oc = TABLE_GET(c, common->fcc, c);
2441    #endif
2442    
2443    SLJIT_ASSERT(c != oc);
2444    
2445    bit = c ^ oc;
2446    /* Optimized for English alphabet. */
2447    if (c <= 127 && bit == 0x20)
2448      return (0 << 8) | 0x20;
2449    
2450    /* Since c != oc, they must have at least 1 bit difference. */
2451    if (!is_powerof2(bit))
2452      return 0;
2453    
2454    #if defined COMPILE_PCRE8
2455    
2456    #ifdef SUPPORT_UTF
2457    if (common->utf && c > 127)
2458      {
2459      n = GET_EXTRALEN(*cc);
2460      while ((bit & 0x3f) == 0)
2461        {
2462        n--;
2463        bit >>= 6;
2464        }
2465      return (n << 8) | bit;
2466      }
2467    #endif /* SUPPORT_UTF */
2468    return (0 << 8) | bit;
2469    
2470    #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2471    
2472    #ifdef SUPPORT_UTF
2473    if (common->utf && c > 65535)
2474      {
2475      if (bit >= (1 << 10))
2476        bit >>= 10;
2477      else
2478        return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8));
2479      }
2480    #endif /* SUPPORT_UTF */
2481    return (bit < 256) ? ((0 << 8) | bit) : ((1 << 8) | (bit >> 8));
2482    
2483    #endif /* COMPILE_PCRE[8|16|32] */
2484    }
2485    
2486    static void check_partial(compiler_common *common, BOOL force)
2487    {
2488    /* Checks whether a partial matching is occurred. Does not modify registers. */
2489    DEFINE_COMPILER;
2490    struct sljit_jump *jump = NULL;
2491    
2492    SLJIT_ASSERT(!force || common->mode != JIT_COMPILE);
2493    
2494    if (common->mode == JIT_COMPILE)
2495      return;
2496    
2497    if (!force)
2498      jump = CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2499    else if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2500      jump = CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1);
2501    
2502    if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2503      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
2504    else
2505      {
2506      if (common->partialmatchlabel != NULL)
2507        JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2508      else
2509        add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2510      }
2511    
2512    if (jump != NULL)
2513      JUMPHERE(jump);
2514    }
2515    
2516    static void check_str_end(compiler_common *common, jump_list **end_reached)
2517    {
2518    /* Does not affect registers. Usually used in a tight spot. */
2519    DEFINE_COMPILER;
2520    struct sljit_jump *jump;
2521    
2522    if (common->mode == JIT_COMPILE)
2523      {
2524      add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2525      return;
2526      }
2527    
2528    jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
2529    if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2530      {
2531      add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
2532      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
2533      add_jump(compiler, end_reached, JUMP(SLJIT_JUMP));
2534      }
2535    else
2536      {
2537      add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
2538      if (common->partialmatchlabel != NULL)
2539        JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2540      else
2541        add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2542      }
2543    JUMPHERE(jump);
2544    }
2545    
2546    static void detect_partial_match(compiler_common *common, jump_list **backtracks)
2547    {
2548    DEFINE_COMPILER;
2549    struct sljit_jump *jump;
2550    
2551    if (common->mode == JIT_COMPILE)
2552      {
2553      add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2554      return;
2555      }
2556    
2557    /* Partial matching mode. */
2558    jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
2559    add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
2560    if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2561      {
2562      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
2563      add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
2564      }
2565    else
2566      {
2567      if (common->partialmatchlabel != NULL)
2568        JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2569      else
2570        add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2571      }
2572    JUMPHERE(jump);
2573    }
2574    
2575    static void peek_char(compiler_common *common, pcre_uint32 max)
2576    {
2577    /* Reads the character into TMP1, keeps STR_PTR.
2578    Does not check STR_END. TMP2 Destroyed. */
2579    DEFINE_COMPILER;
2580    #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2581    struct sljit_jump *jump;
2582    #endif
2583    
2584    SLJIT_UNUSED_ARG(max);
2585    
2586    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2587    #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2588    if (common->utf)
2589      {
2590      if (max < 128) return;
2591    
2592      jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2593      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2594      add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2595      OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2596      JUMPHERE(jump);
2597      }
2598    #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2599    
2600    #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2601    if (common->utf)
2602      {
2603      if (max < 0xd800) return;
2604    
2605      OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2606      jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2607      /* TMP2 contains the high surrogate. */
2608      OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2609      OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x40);
2610      OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
2611      OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
2612      OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2613      JUMPHERE(jump);
2614      }
2615    #endif
2616    }
2617    
2618    #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2619    
2620    static BOOL is_char7_bitset(const pcre_uint8 *bitset, BOOL nclass)
2621    {
2622    /* Tells whether the character codes below 128 are enough
2623    to determine a match. */
2624    const sljit_ub value = nclass ? 0xff : 0;
2625    const sljit_ub *end = bitset + 32;
2626    
2627    bitset += 16;
2628    do
2629      {
2630      if (*bitset++ != value)
2631        return FALSE;
2632      }
2633    while (bitset < end);
2634    return TRUE;
2635    }
2636    
2637    static void read_char7_type(compiler_common *common, BOOL full_read)
2638    {
2639    /* Reads the precise character type of a character into TMP1, if the character
2640    is less than 128. Otherwise it returns with zero. Does not check STR_END. The
2641    full_read argument tells whether characters above max are accepted or not. */
2642    DEFINE_COMPILER;
2643    struct sljit_jump *jump;
2644    
2645    SLJIT_ASSERT(common->utf);
2646    
2647    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2648    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2649    
2650    OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2651    
2652    if (full_read)
2653      {
2654      jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
2655      OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2656      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2657      JUMPHERE(jump);
2658      }
2659    }
2660    
2661    #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
2662    
2663    static void read_char_range(compiler_common *common, pcre_uint32 min, pcre_uint32 max, BOOL update_str_ptr)
2664    {
2665    /* Reads the precise value of a character into TMP1, if the character is
2666    between min and max (c >= min && c <= max). Otherwise it returns with a value
2667    outside the range. Does not check STR_END. */
2668    DEFINE_COMPILER;
2669    #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2670    struct sljit_jump *jump;
2671    #endif
2672    #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2673    struct sljit_jump *jump2;
2674    #endif
2675    
2676    SLJIT_UNUSED_ARG(update_str_ptr);
2677    SLJIT_UNUSED_ARG(min);
2678    SLJIT_UNUSED_ARG(max);
2679    SLJIT_ASSERT(min <= max);
2680    
2681    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2682    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2683    
2684    #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2685    if (common->utf)
2686      {
2687      if (max < 128 && !update_str_ptr) return;
2688    
2689      jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2690      if (min >= 0x10000)
2691        {
2692        OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xf0);
2693        if (update_str_ptr)
2694          OP1(SLJIT_MOV_UB, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2695        OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2696        jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x7);
2697        OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2698        OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2699        OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2700        OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2701        OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2702        OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2703        OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2704        OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2705        if (!update_str_ptr)
2706          OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
2707        OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2708        OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2709        OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2710        JUMPHERE(jump2);
2711        if (update_str_ptr)
2712          OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
2713        }
2714      else if (min >= 0x800 && max <= 0xffff)
2715        {
2716        OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xe0);
2717        if (update_str_ptr)
2718          OP1(SLJIT_MOV_UB, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2719        OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2720        jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xf);
2721        OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2722        OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2723        OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2724        OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2725        if (!update_str_ptr)
2726          OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
2727        OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2728        OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2729        OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2730        JUMPHERE(jump2);
2731        if (update_str_ptr)
2732          OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
2733        }
2734      else if (max >= 0x800)
2735        add_jump(compiler, (max < 0x10000) ? &common->utfreadchar16 : &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2736      else if (max < 128)
2737        {
2738        OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2739        OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2740        }
2741      else
2742        {
2743        OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2744        if (!update_str_ptr)
2745          OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2746        else
2747          OP1(SLJIT_MOV_UB, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2748        OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2749        OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2750        OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2751        OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2752        if (update_str_ptr)
2753          OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
2754        }
2755      JUMPHERE(jump);
2756      }
2757    #endif
2758    
2759    #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2760    if (common->utf)
2761      {
2762      if (max >= 0x10000)
2763        {
2764        OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2765        jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2766        /* TMP2 contains the high surrogate. */
2767        OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2768        OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x40);
2769        OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
2770        OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2771        OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
2772        OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2773        JUMPHERE(jump);
2774        return;
2775        }
2776    
2777      if (max < 0xd800 && !update_str_ptr) return;
2778    
2779      /* Skip low surrogate if necessary. */
2780      OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2781      jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2782      if (update_str_ptr)
2783        OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2784      if (max >= 0xd800)
2785        OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x10000);
2786      JUMPHERE(jump);
2787      }
2788    #endif
2789    }
2790    
2791    static SLJIT_INLINE void read_char(compiler_common *common)
2792    {
2793    read_char_range(common, 0, READ_CHAR_MAX, TRUE);
2794    }
2795    
2796    static void read_char8_type(compiler_common *common, BOOL update_str_ptr)
2797    {
2798    /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */
2799    DEFINE_COMPILER;
2800    #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
2801    struct sljit_jump *jump;
2802    #endif
2803    #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2804    struct sljit_jump *jump2;
2805    #endif
2806    
2807    SLJIT_UNUSED_ARG(update_str_ptr);
2808    
2809    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2810    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2811    
2812    #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2813    if (common->utf)
2814      {
2815      /* This can be an extra read in some situations, but hopefully
2816      it is needed in most cases. */
2817      OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2818      jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
2819      if (!update_str_ptr)
2820        {
2821        OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2822        OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2823        OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2824        OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2825        OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2826        OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
2827        OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2828        jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
2829        OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2830        JUMPHERE(jump2);
2831        }
2832      else
2833        add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));
2834      JUMPHERE(jump);
2835      return;
2836      }
2837    #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
2838    
2839    #if !defined COMPILE_PCRE8
2840    /* The ctypes array contains only 256 values. */
2841    OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2842    jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
2843    #endif
2844    OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2845    #if !defined COMPILE_PCRE8
2846    JUMPHERE(jump);
2847    #endif
2848    
2849    #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2850    if (common->utf && update_str_ptr)
2851      {
2852      /* Skip low surrogate if necessary. */
2853      OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);
2854      jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2855      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2856      JUMPHERE(jump);
2857      }
2858    #endif /* SUPPORT_UTF && COMPILE_PCRE16 */
2859    }
2860    
2861    static void skip_char_back(compiler_common *common)
2862    {
2863    /* Goes one character back. Affects STR_PTR and TMP1. Does not check begin. */
2864    DEFINE_COMPILER;
2865    #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2866    #if defined COMPILE_PCRE8
2867    struct sljit_label *label;
2868    
2869    if (common->utf)
2870      {
2871      label = LABEL();
2872      OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2873      OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2874      OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
2875      CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);
2876      return;
2877      }
2878    #elif defined COMPILE_PCRE16
2879    if (common->utf)
2880      {
2881      OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2882      OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2883      /* Skip low surrogate if necessary. */
2884      OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2885      OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
2886      OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
2887      OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2888      OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2889      return;
2890      }
2891    #endif /* COMPILE_PCRE[8|16] */
2892    #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2893    OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2894    }
2895    
2896    static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpifmatch)
2897    {
2898    /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */
2899    DEFINE_COMPILER;
2900    struct sljit_jump *jump;
2901    
2902    if (nltype == NLTYPE_ANY)
2903      {
2904      add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
2905      add_jump(compiler, backtracks, JUMP(jumpifmatch ? SLJIT_NOT_ZERO : SLJIT_ZERO));
2906      }
2907    else if (nltype == NLTYPE_ANYCRLF)
2908      {
2909      if (jumpifmatch)
2910        {
2911        add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR));
2912        add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
2913        }
2914      else
2915        {
2916        jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
2917        add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
2918        JUMPHERE(jump);
2919        }
2920      }
2921    else
2922      {
2923      SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);
2924      add_jump(compiler, backtracks, CMP(jumpifmatch ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
2925      }
2926    }
2927    
2928    #ifdef SUPPORT_UTF
2929    
2930    #if defined COMPILE_PCRE8
2931    static void do_utfreadchar(compiler_common *common)
2932    {
2933    /* Fast decoding a UTF-8 character. TMP1 contains the first byte
2934    of the character (>= 0xc0). Return char value in TMP1, length in TMP2. */
2935    DEFINE_COMPILER;
2936    struct sljit_jump *jump;
2937    
2938    sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2939    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2940    OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2941    OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2942    OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2943    OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2944    
2945    /* Searching for the first zero. */
2946    OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
2947    jump = JUMP(SLJIT_NOT_ZERO);
2948    /* Two byte sequence. */
2949    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2950    OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(2));
2951    sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2952    
2953    JUMPHERE(jump);
2954    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2955    OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x800);
2956    OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2957    OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2958    OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2959    
2960    OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10000);
2961    jump = JUMP(SLJIT_NOT_ZERO);
2962    /* Three byte sequence. */
2963    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
2964    OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));
2965    sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2966    
2967    /* Four byte sequence. */
2968    JUMPHERE(jump);
2969    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2970    OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
2971    OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2972    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
2973    OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2974    OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2975    OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(4));
2976    sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2977    }
2978    
2979    static void do_utfreadchar16(compiler_common *common)
2980    {
2981    /* Fast decoding a UTF-8 character. TMP1 contains the first byte
2982    of the character (>= 0xc0). Return value in TMP1. */
2983    DEFINE_COMPILER;
2984    struct sljit_jump *jump;
2985    
2986    sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2987    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2988    OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2989    OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2990    OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2991    OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2992    
2993    /* Searching for the first zero. */
2994    OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
2995    jump = JUMP(SLJIT_NOT_ZERO);
2996    /* Two byte sequence. */
2997    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2998    sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2999    
3000    JUMPHERE(jump);
3001    OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x400);
3002    OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_NOT_ZERO);
3003    /* This code runs only in 8 bit mode. No need to shift the value. */
3004    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3005    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3006    OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x800);
3007    OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3008    OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3009    OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3010    /* Three byte sequence. */
3011    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
3012    sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3013    }
3014    
3015    static void do_utfreadtype8(compiler_common *common)
3016    {
3017    /* Fast decoding a UTF-8 character type. TMP2 contains the first byte
3018    of the character (>= 0xc0). Return value in TMP1. */
3019    DEFINE_COMPILER;
3020    struct sljit_jump *jump;
3021    struct sljit_jump *compare;
3022    
3023    sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3024    
3025    OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20);
3026    jump = JUMP(SLJIT_NOT_ZERO);
3027    /* Two byte sequence. */
3028    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3029    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3030    OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
3031    /* The upper 5 bits are known at this point. */
3032    compare = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x3);
3033    OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
3034    OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3035    OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
3036    OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
3037    sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3038    
3039    JUMPHERE(compare);
3040    OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
3041    sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3042    
3043    /* We only have types for characters less than 256. */
3044    JUMPHERE(jump);
3045    OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3046    OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
3047    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3048    sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3049    }
3050    
3051    #endif /* COMPILE_PCRE8 */
3052    
3053    #endif /* SUPPORT_UTF */
3054    
3055    #ifdef SUPPORT_UCP
3056    
3057    /* UCD_BLOCK_SIZE must be 128 (see the assert below). */
3058    #define UCD_BLOCK_MASK 127
3059    #define UCD_BLOCK_SHIFT 7
3060    
3061    static void do_getucd(compiler_common *common)
3062    {
3063    /* Search the UCD record for the character comes in TMP1.
3064    Returns chartype in TMP1 and UCD offset in TMP2. */
3065    DEFINE_COMPILER;
3066    
3067    SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 8);
3068    
3069    sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3070    OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
3071    OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
3072    OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
3073    OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
3074    OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
3075    OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
3076    OP1(SLJIT_MOV_UH, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
3077    OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
3078    OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
3079    sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3080    }
3081    #endif
3082    
3083    static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common, BOOL hascrorlf, BOOL firstline)
3084    {
3085    DEFINE_COMPILER;
3086    struct sljit_label *mainloop;
3087    struct sljit_label *newlinelabel = NULL;
3088    struct sljit_jump *start;
3089    struct sljit_jump *end = NULL;
3090    struct sljit_jump *nl = NULL;
3091    #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3092    struct sljit_jump *singlechar;
3093    #endif
3094    jump_list *newline = NULL;
3095    BOOL newlinecheck = FALSE;
3096    BOOL readuchar = FALSE;
3097    
3098    if (!(hascrorlf || firstline) && (common->nltype == NLTYPE_ANY ||
3099        common->nltype == NLTYPE_ANYCRLF || common->newline > 255))
3100      newlinecheck = TRUE;
3101    
3102    if (firstline)
3103      {
3104      /* Search for the end of the first line. */
3105      SLJIT_ASSERT(common->first_line_end != 0);
3106      OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
3107    
3108      if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3109        {
3110        mainloop = LABEL();
3111        OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3112        end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3113        OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
3114        OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3115        CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);
3116        CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);
3117        JUMPHERE(end);
3118        OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_SP), common->first_line_end, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3119        }
3120      else
3121        {
3122        end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3123        mainloop = LABEL();
3124        /* Continual stores does not cause data dependency. */
3125        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->first_line_end, STR_PTR, 0);
3126        read_char_range(common, common->nlmin, common->nlmax, TRUE);
3127        check_newlinechar(common, common->nltype, &newline, TRUE);
3128        CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, mainloop);
3129        JUMPHERE(end);
3130        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->first_line_end, STR_PTR, 0);
3131        set_jumps(newline, LABEL());
3132        }
3133    
3134      OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
3135      }
3136    
3137    start = JUMP(SLJIT_JUMP);
3138    
3139    if (newlinecheck)
3140      {
3141      newlinelabel = LABEL();
3142      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3143      end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3144      OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3145      OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
3146      OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
3147    #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3148      OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
3149    #endif
3150      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3151      nl = JUMP(SLJIT_JUMP);
3152      }
3153    
3154    mainloop = LABEL();
3155    
3156    /* Increasing the STR_PTR here requires one less jump in the most common case. */
3157    #ifdef SUPPORT_UTF
3158    if (common->utf) readuchar = TRUE;
3159    #endif
3160    if (newlinecheck) readuchar = TRUE;
3161    
3162    if (readuchar)
3163      OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3164    
3165    if (newlinecheck)
3166      CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);
3167    
3168    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3169    #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3170    #if defined COMPILE_PCRE8
3171    if (common->utf)
3172      {
3173      singlechar = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
3174      OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3175      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3176      JUMPHERE(singlechar);
3177      }
3178    #elif defined COMPILE_PCRE16
3179    if (common->utf)
3180      {
3181      singlechar = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
3182      OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
3183      OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3184      OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
3185      OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3186      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3187      JUMPHERE(singlechar);
3188      }
3189    #endif /* COMPILE_PCRE[8|16] */
3190    #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
3191    JUMPHERE(start);
3192    
3193  if (save)  if (newlinecheck)
3194    {    {
3195    if (tmp1next)    JUMPHERE(end);
3196      {    JUMPHERE(nl);
     if (!tmp1empty)  
       {  
       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);  
       stackptr += sizeof(sljit_w);  
       }  
     if (!tmp2empty)  
       {  
       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);  
       stackptr += sizeof(sljit_w);  
       }  
     }  
   else  
     {  
     if (!tmp2empty)  
       {  
       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);  
       stackptr += sizeof(sljit_w);  
       }  
     if (!tmp1empty)  
       {  
       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);  
       stackptr += sizeof(sljit_w);  
       }  
     }  
3197    }    }
 SLJIT_ASSERT(cc == ccend && stackptr == stacktop && (save || (tmp1empty && tmp2empty)));  
 }  
3198    
3199  static SLJIT_INLINE BOOL ispowerof2(unsigned int value)  return mainloop;
 {  
 return (value & (value - 1)) == 0;  
3200  }  }
3201    
3202  static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label)  #define MAX_N_CHARS 16
3203  {  #define MAX_N_BYTES 8
 while (list)  
   {  
   /* sljit_set_label is clever enough to do nothing  
   if either the jump or the label is NULL */  
   sljit_set_label(list->jump, label);  
   list = list->next;  
   }  
 }  
3204    
3205  static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump* jump)  static SLJIT_INLINE void add_prefix_byte(pcre_uint8 byte, pcre_uint8 *bytes)
3206  {  {
3207  jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list));  pcre_uint8 len = bytes[0];
3208  if (list_item)  int i;
3209    
3210    if (len == 255)
3211      return;
3212    
3213    if (len == 0)
3214    {    {
3215    list_item->next = *list;    bytes[0] = 1;
3216    list_item->jump = jump;    bytes[1] = byte;
3217    *list = list_item;    return;
3218    }    }
 }  
3219    
3220  static void add_stub(compiler_common *common, enum stub_types type, int data, struct sljit_jump *start)  for (i = len; i > 0; i--)
3221  {    if (bytes[i] == byte)
3222  DEFINE_COMPILER;      return;
 stub_list* list_item = sljit_alloc_memory(compiler, sizeof(stub_list));  
3223    
3224  if (list_item)  if (len >= MAX_N_BYTES - 1)
3225    {    {
3226    list_item->type = type;    bytes[0] = 255;
3227    list_item->data = data;    return;
   list_item->start = start;  
   list_item->leave = LABEL();  
   list_item->next = common->stubs;  
   common->stubs = list_item;  
3228    }    }
 }  
3229    
3230  static void flush_stubs(compiler_common *common)  len++;
3231  {  bytes[len] = byte;
3232  DEFINE_COMPILER;  bytes[0] = len;
3233  stub_list* list_item = common->stubs;  }
3234    
3235    static int scan_prefix(compiler_common *common, pcre_uchar *cc, pcre_uint32 *chars, pcre_uint8 *bytes, int max_chars, pcre_uint32 *rec_count)
3236    {
3237    /* Recursive function, which scans prefix literals. */
3238    BOOL last, any, caseless;
3239    int len, repeat, len_save, consumed = 0;
3240    pcre_uint32 chr, mask;
3241    pcre_uchar *alternative, *cc_save, *oc;
3242    #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3243    pcre_uchar othercase[8];
3244    #elif defined SUPPORT_UTF && defined COMPILE_PCRE16
3245    pcre_uchar othercase[2];
3246    #else
3247    pcre_uchar othercase[1];
3248    #endif
3249    
3250  while (list_item)  repeat = 1;
3251    while (TRUE)
3252    {    {
3253    JUMPHERE(list_item->start);    if (*rec_count == 0)
3254    switch(list_item->type)      return 0;
3255      (*rec_count)--;
3256    
3257      last = TRUE;
3258      any = FALSE;
3259      caseless = FALSE;
3260    
3261      switch (*cc)
3262      {      {
3263      case stack_alloc:      case OP_CHARI:
3264      add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));      caseless = TRUE;
3265        case OP_CHAR:
3266        last = FALSE;
3267        cc++;
3268      break;      break;
3269    
3270      case max_index:      case OP_SOD:
3271      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), MAX_INDEX, SLJIT_IMM, list_item->data);      case OP_SOM:
3272      break;      case OP_SET_SOM:
3273      }      case OP_NOT_WORD_BOUNDARY:
3274    JUMPTO(SLJIT_JUMP, list_item->leave);      case OP_WORD_BOUNDARY:
3275    list_item = list_item->next;      case OP_EODN:
3276    }      case OP_EOD:
3277  common->stubs = NULL;      case OP_CIRC:
3278  }      case OP_CIRCM:
3279        case OP_DOLL:
3280        case OP_DOLLM:
3281        /* Zero width assertions. */
3282        cc++;
3283        continue;
3284    
3285  static SLJIT_INLINE void decrease_call_count(compiler_common *common)      case OP_ASSERT:
3286  {      case OP_ASSERT_NOT:
3287  DEFINE_COMPILER;      case OP_ASSERTBACK:
3288        case OP_ASSERTBACK_NOT:
3289        cc = bracketend(cc);
3290        continue;
3291    
3292  OP2(SLJIT_SUB | SLJIT_SET_E, CALL_COUNT, 0, CALL_COUNT, 0, SLJIT_IMM, 1);      case OP_PLUSI:
3293  add_jump(compiler, &common->calllimit, JUMP(SLJIT_C_ZERO));      case OP_MINPLUSI:
3294  }      case OP_POSPLUSI:
3295        caseless = TRUE;
3296        case OP_PLUS:
3297        case OP_MINPLUS:
3298        case OP_POSPLUS:
3299        cc++;
3300        break;
3301    
3302  static SLJIT_INLINE void allocate_stack(compiler_common *common, int size)      case OP_EXACTI:
3303  {      caseless = TRUE;
3304  /* May destroy all locals and registers except TMP2. */      case OP_EXACT:
3305  DEFINE_COMPILER;      repeat = GET2(cc, 1);
3306        last = FALSE;
3307        cc += 1 + IMM2_SIZE;
3308        break;
3309    
3310  OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_w));      case OP_QUERYI:
3311  #ifdef DESTROY_REGISTERS      case OP_MINQUERYI:
3312  OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);      case OP_POSQUERYI:
3313  OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);      caseless = TRUE;
3314  OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);      case OP_QUERY:
3315  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, TMP1, 0);      case OP_MINQUERY:
3316  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);      case OP_POSQUERY:
3317        len = 1;
3318        cc++;
3319    #ifdef SUPPORT_UTF
3320        if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
3321  #endif  #endif
3322  add_stub(common, stack_alloc, 0, CMP(SLJIT_C_GREATER, STACK_TOP, 0, STACK_LIMIT, 0));      max_chars = scan_prefix(common, cc + len, chars, bytes, max_chars, rec_count);
3323  }      if (max_chars == 0)
3324          return consumed;
3325        last = FALSE;
3326        break;
3327    
3328  static SLJIT_INLINE void free_stack(compiler_common *common, int size)      case OP_KET:
3329  {      cc += 1 + LINK_SIZE;
3330  DEFINE_COMPILER;      continue;
 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_w));  
 }  
3331    
3332  static SLJIT_INLINE void reset_ovector(compiler_common *common, int length)      case OP_ALT:
3333  {      cc += GET(cc, 1);
3334  DEFINE_COMPILER;      continue;
 struct sljit_label *loop;  
 int i;  
 /* At this point we can freely use all temporary registers. */  
 /* TMP1 returns with begin - 1. */  
 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), MAX_INDEX, SLJIT_IMM, 1);  
 OP2(SLJIT_SUB, SLJIT_TEMPORARY_REG1, 0, SLJIT_MEM1(SLJIT_GENERAL_REG1), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, 1);  
 if (length < 8)  
   {  
   for (i = 0; i < length; i++)  
     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(i), SLJIT_TEMPORARY_REG1, 0);  
   }  
 else  
   {  
   OP2(SLJIT_ADD, SLJIT_TEMPORARY_REG2, 0, SLJIT_LOCALS_REG, 0, SLJIT_IMM, OVECTOR_START - sizeof(sljit_w));  
   OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, length);  
   loop = LABEL();  
   OP1(SLJIT_MOVU, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), sizeof(sljit_w), SLJIT_TEMPORARY_REG1, 0);  
   OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_TEMPORARY_REG3, 0, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, 1);  
   JUMPTO(SLJIT_C_NOT_ZERO, loop);  
   }  
 }  
3335    
3336  static SLJIT_INLINE void copy_ovector(compiler_common *common)      case OP_ONCE:
3337  {      case OP_ONCE_NC:
3338  DEFINE_COMPILER;      case OP_BRA:
3339  struct sljit_label *loop;      case OP_BRAPOS:
3340  struct sljit_jump *earlyexit;      case OP_CBRA:
3341        case OP_CBRAPOS:
3342        alternative = cc + GET(cc, 1);
3343        while (*alternative == OP_ALT)
3344          {
3345          max_chars = scan_prefix(common, alternative + 1 + LINK_SIZE, chars, bytes, max_chars, rec_count);
3346          if (max_chars == 0)
3347            return consumed;
3348          alternative += GET(alternative, 1);
3349          }
3350    
3351  /* At this point we can freely use all registers. */      if (*cc == OP_CBRA || *cc == OP_CBRAPOS)
3352  OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG1, 0, ARGUMENTS, 0);        cc += IMM2_SIZE;
3353  OP1(SLJIT_MOV_SI, SLJIT_TEMPORARY_REG2, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), SLJIT_OFFSETOF(jit_arguments, offsetcount));      cc += 1 + LINK_SIZE;
3354  OP2(SLJIT_SUB, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), SLJIT_OFFSETOF(jit_arguments, offsets), SLJIT_IMM, sizeof(int));      continue;
 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG1, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), SLJIT_OFFSETOF(jit_arguments, begin));  
 OP2(SLJIT_ADD, SLJIT_GENERAL_REG1, 0, SLJIT_LOCALS_REG, 0, SLJIT_IMM, OVECTOR_START);  
 /* Unlikely, but possible */  
 earlyexit = CMP(SLJIT_C_EQUAL, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, 0);  
 loop = LABEL();  
 OP2(SLJIT_SUB, SLJIT_GENERAL_REG2, 0, SLJIT_MEM1(SLJIT_GENERAL_REG1), 0, SLJIT_TEMPORARY_REG1, 0);  
 OP2(SLJIT_ADD, SLJIT_GENERAL_REG1, 0, SLJIT_GENERAL_REG1, 0, SLJIT_IMM, sizeof(sljit_w));  
 /* Copy the integer value to the output buffer */  
 OP1(SLJIT_MOVU_SI, SLJIT_MEM1(SLJIT_TEMPORARY_REG3), sizeof(int), SLJIT_GENERAL_REG2, 0);  
 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_TEMPORARY_REG2, 0, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, 1);  
 JUMPTO(SLJIT_C_NOT_ZERO, loop);  
 JUMPHERE(earlyexit);  
 }  
3355    
3356  static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, uschar* cc)      case OP_CLASS:
3357  {  #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3358  /* Detects if the character has an othercase. */      if (common->utf && !is_char7_bitset((const pcre_uint8 *)(cc + 1), FALSE)) return consumed;
3359  unsigned int c;  #endif
3360        any = TRUE;
3361        cc += 1 + 32 / sizeof(pcre_uchar);
3362        break;
3363    
3364  #ifdef SUPPORT_UTF8      case OP_NCLASS:
3365  if (common->utf8)  #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3366    {      if (common->utf) return consumed;
   GETCHAR(c, cc);  
   if (c > 127)  
     {  
 #ifdef SUPPORT_UCP  
     return c != UCD_OTHERCASE(c);  
 #else  
     return FALSE;  
3367  #endif  #endif
3368      }      any = TRUE;
3369    }      cc += 1 + 32 / sizeof(pcre_uchar);
3370  else      break;
3371    
3372    #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3373        case OP_XCLASS:
3374    #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3375        if (common->utf) return consumed;
3376    #endif
3377        any = TRUE;
3378        cc += GET(cc, 1);
3379        break;
3380  #endif  #endif
   c = *cc;  
 return common->fcc[c] != c;  
 }  
3381    
3382  static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)      case OP_DIGIT:
3383  {  #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3384  /* Returns with the othercase. */      if (common->utf && !is_char7_bitset((const pcre_uint8 *)common->ctypes - cbit_length + cbit_digit, FALSE))
3385  #ifdef SUPPORT_UTF8        return consumed;
 if (common->utf8 && c > 127)  
   {  
 #ifdef SUPPORT_UCP  
   return UCD_OTHERCASE(c);  
 #else  
   return c;  
3386  #endif  #endif
3387    }      any = TRUE;
3388        cc++;
3389        break;
3390    
3391        case OP_WHITESPACE:
3392    #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3393        if (common->utf && !is_char7_bitset((const pcre_uint8 *)common->ctypes - cbit_length + cbit_space, FALSE))
3394          return consumed;
3395  #endif  #endif
3396  return common->fcc[c];      any = TRUE;
3397  }      cc++;
3398        break;
3399    
3400  static unsigned int char_get_othercase_bit(compiler_common *common, uschar* cc)      case OP_WORDCHAR:
3401  {  #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3402  /* Detects if the character and its othercase has only 1 bit difference. */      if (common->utf && !is_char7_bitset((const pcre_uint8 *)common->ctypes - cbit_length + cbit_word, FALSE))
3403  unsigned int c, oc, bit;        return consumed;
 #ifdef SUPPORT_UTF8  
 int n;  
3404  #endif  #endif
3405        any = TRUE;
3406        cc++;
3407        break;
3408    
3409        case OP_NOT:
3410        case OP_NOTI:
3411        cc++;
3412        /* Fall through. */
3413        case OP_NOT_DIGIT:
3414        case OP_NOT_WHITESPACE:
3415        case OP_NOT_WORDCHAR:
3416        case OP_ANY:
3417        case OP_ALLANY:
3418    #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3419        if (common->utf) return consumed;
3420    #endif
3421        any = TRUE;
3422        cc++;
3423        break;
3424    
 #ifdef SUPPORT_UTF8  
 if (common->utf8)  
   {  
   GETCHAR(c, cc);  
   if (c <= 127)  
     oc = common->fcc[c];  
   else  
     {  
3425  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
3426      oc = UCD_OTHERCASE(c);      case OP_NOTPROP:
3427  #else      case OP_PROP:
3428      oc = c;  #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3429        if (common->utf) return consumed;
3430  #endif  #endif
3431      }      any = TRUE;
3432    }      cc += 1 + 2;
3433  else      break;
   {  
   c = *cc;  
   oc = common->fcc[c];  
   }  
 #else  
 c = *cc;  
 oc = common->fcc[c];  
3434  #endif  #endif
3435    
3436  SLJIT_ASSERT(c != oc);      case OP_TYPEEXACT:
3437        repeat = GET2(cc, 1);
3438        cc += 1 + IMM2_SIZE;
3439        continue;
3440    
3441  bit = c ^ oc;      case OP_NOTEXACT:
3442  /* Optimized for English alphabet. */      case OP_NOTEXACTI:
3443  if (c <= 127 && bit == 0x20)  #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3444    return (0 << 8) | 0x20;      if (common->utf) return consumed;
3445    #endif
3446        any = TRUE;