/[pcre]/code/trunk/pcre_jit_compile.c
ViewVC logotype

Diff of /code/trunk/pcre_jit_compile.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 792 by ph10, Wed Dec 7 16:44:48 2011 UTC revision 1278 by zherczeg, Tue Mar 12 06:15:04 2013 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2008 University of Cambridge             Copyright (c) 1997-2013 University of Cambridge
10    
11    The machine code generator part (this module) was written by Zoltan Herczeg    The machine code generator part (this module) was written by Zoltan Herczeg
12                        Copyright (c) 2010-2011                        Copyright (c) 2010-2013
13    
14  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
15  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 46  POSSIBILITY OF SUCH DAMAGE. Line 46  POSSIBILITY OF SUCH DAMAGE.
46    
47  #include "pcre_internal.h"  #include "pcre_internal.h"
48    
49  #ifdef SUPPORT_JIT  #if defined SUPPORT_JIT
50    
51  /* All-in-one: Since we use the JIT compiler only from here,  /* All-in-one: Since we use the JIT compiler only from here,
52  we just include it. This way we don't need to touch the build  we just include it. This way we don't need to touch the build
53  system files. */  system files. */
54    
55  #define SLJIT_MALLOC(size) (pcre_malloc)(size)  #define SLJIT_MALLOC(size) (PUBL(malloc))(size)
56  #define SLJIT_FREE(ptr) (pcre_free)(ptr)  #define SLJIT_FREE(ptr) (PUBL(free))(ptr)
57  #define SLJIT_CONFIG_AUTO 1  #define SLJIT_CONFIG_AUTO 1
58  #define SLJIT_CONFIG_STATIC 1  #define SLJIT_CONFIG_STATIC 1
59  #define SLJIT_VERBOSE 0  #define SLJIT_VERBOSE 0
# Line 62  system files. */ Line 62  system files. */
62  #include "sljit/sljitLir.c"  #include "sljit/sljitLir.c"
63    
64  #if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED  #if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED
65  #error "Unsupported architecture"  #error Unsupported architecture
66  #endif  #endif
67    
68  /* Allocate memory on the stack. Fast, but limited size. */  /* Defines for debugging purposes. */
 #define LOCAL_SPACE_SIZE 32768  
69    
70    /* 1 - Use unoptimized capturing brackets.
71       2 - Enable capture_last_ptr (includes option 1). */
72    /* #define DEBUG_FORCE_UNOPTIMIZED_CBRAS 2 */
73    
74    /* 1 - Always have a control head. */
75    /* #define DEBUG_FORCE_CONTROL_HEAD 1 */
76    
77    /* Allocate memory for the regex stack on the real machine stack.
78    Fast, but limited size. */
79    #define MACHINE_STACK_SIZE 32768
80    
81    /* Growth rate for stack allocated by the OS. Should be the multiply
82    of page size. */
83  #define STACK_GROWTH_RATE 8192  #define STACK_GROWTH_RATE 8192
84    
85  /* Enable to check that the allocation could destroy temporaries. */  /* Enable to check that the allocation could destroy temporaries. */
# Line 82  The code generator follows the recursive Line 94  The code generator follows the recursive
94  expressions. The basic blocks of regular expressions are condition checkers  expressions. The basic blocks of regular expressions are condition checkers
95  whose execute different commands depending on the result of the condition check.  whose execute different commands depending on the result of the condition check.
96  The relationship between the operators can be horizontal (concatenation) and  The relationship between the operators can be horizontal (concatenation) and
97  vertical (sub-expression) (See struct fallback_common for more details).  vertical (sub-expression) (See struct backtrack_common for more details).
98    
99    'ab' - 'a' and 'b' regexps are concatenated    'ab' - 'a' and 'b' regexps are concatenated
100    'a+' - 'a' is the sub-expression of the '+' operator    'a+' - 'a' is the sub-expression of the '+' operator
101    
102  The condition checkers are boolean (true/false) checkers. Machine code is generated  The condition checkers are boolean (true/false) checkers. Machine code is generated
103  for the checker itself and for the actions depending on the result of the checker.  for the checker itself and for the actions depending on the result of the checker.
104  The 'true' case is called as the hot path (expected path), and the other is called as  The 'true' case is called as the matching path (expected path), and the other is called as
105  the 'fallback' path. Branch instructions are expesive for all CPUs, so we avoid taken  the 'backtrack' path. Branch instructions are expesive for all CPUs, so we avoid taken
106  branches on the hot path.  branches on the matching path.
107    
108   Greedy star operator (*) :   Greedy star operator (*) :
109     Hot path: match happens.     Matching path: match happens.
110     Fallback path: match failed.     Backtrack path: match failed.
111   Non-greedy star operator (*?) :   Non-greedy star operator (*?) :
112     Hot path: no need to perform a match.     Matching path: no need to perform a match.
113     Fallback path: match is required.     Backtrack path: match is required.
114    
115  The following example shows how the code generated for a capturing bracket  The following example shows how the code generated for a capturing bracket
116  with two alternatives. Let A, B, C, D are arbirary regular expressions, and  with two alternatives. Let A, B, C, D are arbirary regular expressions, and
# Line 108  we have the following regular expression Line 120  we have the following regular expression
120    
121  The generated code will be the following:  The generated code will be the following:
122    
123   A hot path   A matching path
124   '(' hot path (pushing arguments to the stack)   '(' matching path (pushing arguments to the stack)
125   B hot path   B matching path
126   ')' hot path (pushing arguments to the stack)   ')' matching path (pushing arguments to the stack)
127   D hot path   D matching path
128   return with successful match   return with successful match
129    
130   D fallback path   D backtrack path
131   ')' fallback path (If we arrived from "C" jump to the fallback of "C")   ')' backtrack path (If we arrived from "C" jump to the backtrack of "C")
132   B fallback path   B backtrack path
133   C expected path   C expected path
134   jump to D hot path   jump to D matching path
135   C fallback path   C backtrack path
136   A fallback path   A backtrack path
137    
138   Notice, that the order of fallback code paths are the opposite of the fast   Notice, that the order of backtrack code paths are the opposite of the fast
139   code paths. In this way the topmost value on the stack is always belong   code paths. In this way the topmost value on the stack is always belong
140   to the current fallback code path. The fallback code path must check   to the current backtrack code path. The backtrack path must check
141   whether there is a next alternative. If so, it needs to jump back to   whether there is a next alternative. If so, it needs to jump back to
142   the hot path eventually. Otherwise it needs to clear out its own stack   the matching path eventually. Otherwise it needs to clear out its own stack
143   frame and continue the execution on the fallback code paths.   frame and continue the execution on the backtrack code paths.
144  */  */
145    
146  /*  /*
147  Saved stack frames:  Saved stack frames:
148    
149  Atomic blocks and asserts require reloading the values of local variables  Atomic blocks and asserts require reloading the values of private data
150  when the fallback mechanism performed. Because of OP_RECURSE, the locals  when the backtrack mechanism performed. Because of OP_RECURSE, the data
151  are not necessarly known in compile time, thus we need a dynamic restore  are not necessarly known in compile time, thus we need a dynamic restore
152  mechanism.  mechanism.
153    
154  The stack frames are stored in a chain list, and have the following format:  The stack frames are stored in a chain list, and have the following format:
155  ([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]  ([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]
156    
157  Thus we can restore the locals to a particular point in the stack.  Thus we can restore the private data to a particular point in the stack.
158  */  */
159    
160  typedef struct jit_arguments {  typedef struct jit_arguments {
161    /* Pointers first. */    /* Pointers first. */
162    struct sljit_stack *stack;    struct sljit_stack *stack;
163    PCRE_SPTR str;    const pcre_uchar *str;
164    PCRE_SPTR begin;    const pcre_uchar *begin;
165    PCRE_SPTR end;    const pcre_uchar *end;
166    int *offsets;    int *offsets;
167    uschar *ptr;    pcre_uchar *uchar_ptr;
168      pcre_uchar *mark_ptr;
169      void *callout_data;
170    /* Everything else after. */    /* Everything else after. */
171    int offsetcount;    int real_offset_count;
172    int calllimit;    int offset_count;
173    uschar notbol;    int call_limit;
174    uschar noteol;    pcre_uint8 notbol;
175    uschar notempty;    pcre_uint8 noteol;
176    uschar notempty_atstart;    pcre_uint8 notempty;
177      pcre_uint8 notempty_atstart;
178  } jit_arguments;  } jit_arguments;
179    
180  typedef struct executable_function {  typedef struct executable_functions {
181    void *executable_func;    void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES];
182    pcre_jit_callback callback;    PUBL(jit_callback) callback;
183    void *userdata;    void *userdata;
184    sljit_uw executable_size;    pcre_uint32 top_bracket;
185  } executable_function;    sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];
186    } executable_functions;
187    
188  typedef struct jump_list {  typedef struct jump_list {
189    struct sljit_jump *jump;    struct sljit_jump *jump;
190    struct jump_list *next;    struct jump_list *next;
191  } jump_list;  } jump_list;
192    
 enum stub_types { stack_alloc };  
   
193  typedef struct stub_list {  typedef struct stub_list {
   enum stub_types type;  
   int data;  
194    struct sljit_jump *start;    struct sljit_jump *start;
195    struct sljit_label *leave;    struct sljit_label *quit;
196    struct stub_list *next;    struct stub_list *next;
197  } stub_list;  } stub_list;
198    
199    enum frame_types {
200      no_frame = -1,
201      no_stack = -2
202    };
203    
204    enum control_types {
205      type_commit = 0,
206      type_prune = 1,
207      type_skip = 2,
208      type_skip_arg = 3,
209      type_mark = 4
210    };
211    
212  typedef int (SLJIT_CALL *jit_function)(jit_arguments *args);  typedef int (SLJIT_CALL *jit_function)(jit_arguments *args);
213    
214  /* The following structure is the key data type for the recursive  /* The following structure is the key data type for the recursive
215  code generator. It is allocated by compile_hotpath, and contains  code generator. It is allocated by compile_matchingpath, and contains
216  the aguments for compile_fallbackpath. Must be the first member  the aguments for compile_backtrackingpath. Must be the first member
217  of its descendants. */  of its descendants. */
218  typedef struct fallback_common {  typedef struct backtrack_common {
219    /* Concatenation stack. */    /* Concatenation stack. */
220    struct fallback_common *prev;    struct backtrack_common *prev;
221    jump_list *nextfallbacks;    jump_list *nextbacktracks;
222    /* Internal stack (for component operators). */    /* Internal stack (for component operators). */
223    struct fallback_common *top;    struct backtrack_common *top;
224    jump_list *topfallbacks;    jump_list *topbacktracks;
225    /* Opcode pointer. */    /* Opcode pointer. */
226    uschar *cc;    pcre_uchar *cc;
227  } fallback_common;  } backtrack_common;
228    
229  typedef struct assert_fallback {  typedef struct assert_backtrack {
230    fallback_common common;    backtrack_common common;
231    jump_list *condfailed;    jump_list *condfailed;
232    /* Less than 0 (-1) if a frame is not needed. */    /* Less than 0 if a frame is not needed. */
233    int framesize;    int framesize;
234    /* Points to our private memory word on the stack. */    /* Points to our private memory word on the stack. */
235    int localptr;    int private_data_ptr;
236    /* For iterators. */    /* For iterators. */
237    struct sljit_label *hotpath;    struct sljit_label *matchingpath;
238  } assert_fallback;  } assert_backtrack;
239    
240  typedef struct bracket_fallback {  typedef struct bracket_backtrack {
241    fallback_common common;    backtrack_common common;
242    /* Where to coninue if an alternative is successfully matched. */    /* Where to coninue if an alternative is successfully matched. */
243    struct sljit_label *althotpath;    struct sljit_label *alternative_matchingpath;
244    /* For rmin and rmax iterators. */    /* For rmin and rmax iterators. */
245    struct sljit_label *recursivehotpath;    struct sljit_label *recursive_matchingpath;
246    /* For greedy ? operator. */    /* For greedy ? operator. */
247    struct sljit_label *zerohotpath;    struct sljit_label *zero_matchingpath;
248    /* Contains the branches of a failed condition. */    /* Contains the branches of a failed condition. */
249    union {    union {
250      /* Both for OP_COND, OP_SCOND. */      /* Both for OP_COND, OP_SCOND. */
251      jump_list *condfailed;      jump_list *condfailed;
252      assert_fallback *assert;      assert_backtrack *assert;
253      /* For OP_ONCE. -1 if not needed. */      /* For OP_ONCE. Less than 0 if not needed. */
254      int framesize;      int framesize;
255    } u;    } u;
256    /* Points to our private memory word on the stack. */    /* Points to our private memory word on the stack. */
257    int localptr;    int private_data_ptr;
258  } bracket_fallback;  } bracket_backtrack;
259    
260  typedef struct bracketpos_fallback {  typedef struct bracketpos_backtrack {
261    fallback_common common;    backtrack_common common;
262    /* Points to our private memory word on the stack. */    /* Points to our private memory word on the stack. */
263    int localptr;    int private_data_ptr;
264    /* Reverting stack is needed. */    /* Reverting stack is needed. */
265    int framesize;    int framesize;
266    /* Allocated stack size. */    /* Allocated stack size. */
267    int stacksize;    int stacksize;
268  } bracketpos_fallback;  } bracketpos_backtrack;
269    
270  typedef struct braminzero_fallback {  typedef struct braminzero_backtrack {
271    fallback_common common;    backtrack_common common;
272    struct sljit_label *hotpath;    struct sljit_label *matchingpath;
273  } braminzero_fallback;  } braminzero_backtrack;
274    
275  typedef struct iterator_fallback {  typedef struct iterator_backtrack {
276    fallback_common common;    backtrack_common common;
277    /* Next iteration. */    /* Next iteration. */
278    struct sljit_label *hotpath;    struct sljit_label *matchingpath;
279  } iterator_fallback;  } iterator_backtrack;
280    
281  typedef struct recurse_entry {  typedef struct recurse_entry {
282    struct recurse_entry *next;    struct recurse_entry *next;
# Line 263  typedef struct recurse_entry { Line 288  typedef struct recurse_entry {
288    int start;    int start;
289  } recurse_entry;  } recurse_entry;
290    
291  typedef struct recurse_fallback {  typedef struct recurse_backtrack {
292    fallback_common common;    backtrack_common common;
293  } recurse_fallback;    BOOL inlined_pattern;
294    } recurse_backtrack;
295    
296    #define MAX_RANGE_SIZE 6
297    
298  typedef struct compiler_common {  typedef struct compiler_common {
299      /* The sljit ceneric compiler. */
300    struct sljit_compiler *compiler;    struct sljit_compiler *compiler;
301    uschar *start;    /* First byte code. */
302    int localsize;    pcre_uchar *start;
303    int *localptrs;    /* Maps private data offset to each opcode. */
304    const uschar *fcc;    int *private_data_ptrs;
305    sljit_w lcc;    /* Tells whether the capturing bracket is optimized. */
306    int cbraptr;    pcre_uint8 *optimized_cbracket;
307      /* Starting offset of private data for capturing brackets. */
308      int cbra_ptr;
309      /* Output vector starting point. Must be divisible by 2. */
310      int ovector_start;
311      /* Last known position of the requested byte. */
312      int req_char_ptr;
313      /* Head of the last recursion. */
314      int recursive_head_ptr;
315      /* First inspected character for partial matching. */
316      int start_used_ptr;
317      /* Starting pointer for partial soft matches. */
318      int hit_start;
319      /* End pointer of the first line. */
320      int first_line_end;
321      /* Points to the marked string. */
322      int mark_ptr;
323      /* Recursive control verb management chain. */
324      int control_head_ptr;
325      /* Points to the last matched capture block index. */
326      int capture_last_ptr;
327      /* Points to the starting position of the current match. */
328      int start_ptr;
329    
330      /* Flipped and lower case tables. */
331      const pcre_uint8 *fcc;
332      sljit_sw lcc;
333      /* Mode can be PCRE_STUDY_JIT_COMPILE and others. */
334      int mode;
335      /* \K is in the pattern. */
336      BOOL has_set_som;
337      /* (*SKIP:arg) is in the pattern. */
338      BOOL has_skip_arg;
339      /* Needs to know the start position anytime. */
340      BOOL needs_start_ptr;
341      /* Currently in recurse or assert. */
342      BOOL local_exit;
343      /* Newline control. */
344    int nltype;    int nltype;
345    int newline;    int newline;
346    int bsr_nltype;    int bsr_nltype;
347      /* Dollar endonly. */
348    int endonly;    int endonly;
349    sljit_w ctypes;    /* Tables. */
350      sljit_sw ctypes;
351      int digits[2 + MAX_RANGE_SIZE];
352      /* Named capturing brackets. */
353    sljit_uw name_table;    sljit_uw name_table;
354    sljit_w name_count;    sljit_sw name_count;
355    sljit_w name_entry_size;    sljit_sw name_entry_size;
356    struct sljit_label *acceptlabel;  
357      /* Labels and jump lists. */
358      struct sljit_label *partialmatchlabel;
359      struct sljit_label *quit_label;
360      struct sljit_label *forced_quit_label;
361      struct sljit_label *accept_label;
362    stub_list *stubs;    stub_list *stubs;
363    recurse_entry *entries;    recurse_entry *entries;
364    recurse_entry *currententry;    recurse_entry *currententry;
365      jump_list *partialmatch;
366      jump_list *quit;
367      jump_list *forced_quit;
368    jump_list *accept;    jump_list *accept;
369    jump_list *calllimit;    jump_list *calllimit;
370    jump_list *stackalloc;    jump_list *stackalloc;
# Line 297  typedef struct compiler_common { Line 375  typedef struct compiler_common {
375    jump_list *vspace;    jump_list *vspace;
376    jump_list *casefulcmp;    jump_list *casefulcmp;
377    jump_list *caselesscmp;    jump_list *caselesscmp;
378      jump_list *reset_match;
379    BOOL jscript_compat;    BOOL jscript_compat;
380  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
381    BOOL utf8;    BOOL utf;
382  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
383    BOOL useucp;    BOOL use_ucp;
384    #endif
385    #ifndef COMPILE_PCRE32
386      jump_list *utfreadchar;
387  #endif  #endif
388    jump_list *utf8readchar;  #ifdef COMPILE_PCRE8
389    jump_list *utf8readtype8;    jump_list *utfreadtype8;
390  #endif  #endif
391    #endif /* SUPPORT_UTF */
392  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
393    jump_list *getucd;    jump_list *getucd;
394  #endif  #endif
# Line 317  typedef struct compare_context { Line 400  typedef struct compare_context {
400    int length;    int length;
401    int sourcereg;    int sourcereg;
402  #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED  #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
403    int byteptr;    int ucharptr;
404    union {    union {
405      int asint;      sljit_si asint;
406      short asshort;      sljit_uh asushort;
407    #if defined COMPILE_PCRE8
408      sljit_ub asbyte;      sljit_ub asbyte;
409      sljit_ub asbytes[4];      sljit_ub asuchars[4];
410    #elif defined COMPILE_PCRE16
411        sljit_uh asuchars[2];
412    #elif defined COMPILE_PCRE32
413        sljit_ui asuchars[1];
414    #endif
415    } c;    } c;
416    union {    union {
417      int asint;      sljit_si asint;
418      short asshort;      sljit_uh asushort;
419    #if defined COMPILE_PCRE8
420      sljit_ub asbyte;      sljit_ub asbyte;
421      sljit_ub asbytes[4];      sljit_ub asuchars[4];
422    #elif defined COMPILE_PCRE16
423        sljit_uh asuchars[2];
424    #elif defined COMPILE_PCRE32
425        sljit_ui asuchars[1];
426    #endif
427    } oc;    } oc;
428  #endif  #endif
429  } compare_context;  } compare_context;
430    
431  enum {  /* Undefine sljit macros. */
432    frame_end = 0,  #undef CMP
   frame_setstrbegin = -1  
 };  
433    
434  /* Used for accessing the elements of the stack. */  /* Used for accessing the elements of the stack. */
435  #define STACK(i)      ((-(i) - 1) * (int)sizeof(sljit_w))  #define STACK(i)      ((-(i) - 1) * (int)sizeof(sljit_sw))
436    
437  #define TMP1          SLJIT_TEMPORARY_REG1  #define TMP1          SLJIT_SCRATCH_REG1
438  #define TMP2          SLJIT_TEMPORARY_REG3  #define TMP2          SLJIT_SCRATCH_REG3
439  #define TMP3          SLJIT_TEMPORARY_EREG2  #define TMP3          SLJIT_TEMPORARY_EREG2
440  #define STR_PTR       SLJIT_GENERAL_REG1  #define STR_PTR       SLJIT_SAVED_REG1
441  #define STR_END       SLJIT_GENERAL_REG2  #define STR_END       SLJIT_SAVED_REG2
442  #define STACK_TOP     SLJIT_TEMPORARY_REG2  #define STACK_TOP     SLJIT_SCRATCH_REG2
443  #define STACK_LIMIT   SLJIT_GENERAL_REG3  #define STACK_LIMIT   SLJIT_SAVED_REG3
444  #define ARGUMENTS     SLJIT_GENERAL_EREG1  #define ARGUMENTS     SLJIT_SAVED_EREG1
445  #define CALL_COUNT    SLJIT_GENERAL_EREG2  #define CALL_COUNT    SLJIT_SAVED_EREG2
446  #define RETURN_ADDR   SLJIT_TEMPORARY_EREG1  #define RETURN_ADDR   SLJIT_TEMPORARY_EREG1
447    
448  /* Locals layout. */  /* Local space layout. */
449  /* These two locals can be used by the current opcode. */  /* These two locals can be used by the current opcode. */
450  #define LOCALS0          (0 * sizeof(sljit_w))  #define LOCALS0          (0 * sizeof(sljit_sw))
451  #define LOCALS1          (1 * sizeof(sljit_w))  #define LOCALS1          (1 * sizeof(sljit_sw))
452  /* Two local variables for possessive quantifiers (char1 cannot use them). */  /* Two local variables for possessive quantifiers (char1 cannot use them). */
453  #define POSSESSIVE0      (2 * sizeof(sljit_w))  #define POSSESSIVE0      (2 * sizeof(sljit_sw))
454  #define POSSESSIVE1      (3 * sizeof(sljit_w))  #define POSSESSIVE1      (3 * sizeof(sljit_sw))
 /* Head of the last recursion. */  
 #define RECURSIVE_HEAD   (4 * sizeof(sljit_w))  
455  /* Max limit of recursions. */  /* Max limit of recursions. */
456  #define CALL_LIMIT       (5 * sizeof(sljit_w))  #define CALL_LIMIT       (4 * sizeof(sljit_sw))
 /* Last known position of the requested byte. */  
 #define REQ_BYTE_PTR     (6 * sizeof(sljit_w))  
 /* End pointer of the first line. */  
 #define FIRSTLINE_END    (7 * sizeof(sljit_w))  
457  /* The output vector is stored on the stack, and contains pointers  /* The output vector is stored on the stack, and contains pointers
458  to characters. The vector data is divided into two groups: the first  to characters. The vector data is divided into two groups: the first
459  group contains the start / end character pointers, and the second is  group contains the start / end character pointers, and the second is
460  the start pointers when the end of the capturing group has not yet reached. */  the start pointers when the end of the capturing group has not yet reached. */
461  #define OVECTOR_START    (8 * sizeof(sljit_w))  #define OVECTOR_START    (common->ovector_start)
462  #define OVECTOR(i)       (OVECTOR_START + (i) * sizeof(sljit_w))  #define OVECTOR(i)       (OVECTOR_START + (i) * sizeof(sljit_sw))
463  #define OVECTOR_PRIV(i)  (common->cbraptr + (i) * sizeof(sljit_w))  #define OVECTOR_PRIV(i)  (common->cbra_ptr + (i) * sizeof(sljit_sw))
464  #define PRIV(cc)         (common->localptrs[(cc) - common->start])  #define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start])
465    
466    #if defined COMPILE_PCRE8
467    #define MOV_UCHAR  SLJIT_MOV_UB
468    #define MOVU_UCHAR SLJIT_MOVU_UB
469    #elif defined COMPILE_PCRE16
470    #define MOV_UCHAR  SLJIT_MOV_UH
471    #define MOVU_UCHAR SLJIT_MOVU_UH
472    #elif defined COMPILE_PCRE32
473    #define MOV_UCHAR  SLJIT_MOV_UI
474    #define MOVU_UCHAR SLJIT_MOVU_UI
475    #else
476    #error Unsupported compiling mode
477    #endif
478    
479  /* Shortcuts. */  /* Shortcuts. */
480  #define DEFINE_COMPILER \  #define DEFINE_COMPILER \
# Line 391  the start pointers when the end of the c Line 491  the start pointers when the end of the c
491    sljit_set_label(sljit_emit_jump(compiler, (type)), (label))    sljit_set_label(sljit_emit_jump(compiler, (type)), (label))
492  #define JUMPHERE(jump) \  #define JUMPHERE(jump) \
493    sljit_set_label((jump), sljit_emit_label(compiler))    sljit_set_label((jump), sljit_emit_label(compiler))
494    #define SET_LABEL(jump, label) \
495      sljit_set_label((jump), (label))
496  #define CMP(type, src1, src1w, src2, src2w) \  #define CMP(type, src1, src1w, src2, src2w) \
497    sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))    sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))
498  #define CMPTO(type, src1, src1w, src2, src2w, label) \  #define CMPTO(type, src1, src1w, src2, src2w, label) \
499    sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))    sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))
500  #define COND_VALUE(op, dst, dstw, type) \  #define OP_FLAGS(op, dst, dstw, src, srcw, type) \
501    sljit_emit_cond_value(compiler, (op), (dst), (dstw), (type))    sljit_emit_op_flags(compiler, (op), (dst), (dstw), (src), (srcw), (type))
502    #define GET_LOCAL_BASE(dst, dstw, offset) \
503      sljit_get_local_base(compiler, (dst), (dstw), (offset))
504    
505  static uschar* bracketend(uschar* cc)  static pcre_uchar* bracketend(pcre_uchar* cc)
506  {  {
507  SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));  SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
508  do cc += GET(cc, 1); while (*cc == OP_ALT);  do cc += GET(cc, 1); while (*cc == OP_ALT);
# Line 409  return cc; Line 513  return cc;
513    
514  /* Functions whose might need modification for all new supported opcodes:  /* Functions whose might need modification for all new supported opcodes:
515   next_opcode   next_opcode
516   get_localspace   get_private_data_length
517   set_localptrs   set_private_data_ptrs
518   get_framesize   get_framesize
519   init_frame   init_frame
520   get_localsize   get_private_data_copy_length
521   copy_locals   copy_private_data
522   compile_hotpath   compile_matchingpath
523   compile_fallbackpath   compile_backtrackingpath
524  */  */
525    
526  static uschar *next_opcode(compiler_common *common, uschar *cc)  static pcre_uchar *next_opcode(compiler_common *common, pcre_uchar *cc)
527  {  {
528  SLJIT_UNUSED_ARG(common);  SLJIT_UNUSED_ARG(common);
529  switch(*cc)  switch(*cc)
# Line 437  switch(*cc) Line 541  switch(*cc)
541    case OP_WORDCHAR:    case OP_WORDCHAR:
542    case OP_ANY:    case OP_ANY:
543    case OP_ALLANY:    case OP_ALLANY:
544      case OP_NOTPROP:
545      case OP_PROP:
546    case OP_ANYNL:    case OP_ANYNL:
547    case OP_NOT_HSPACE:    case OP_NOT_HSPACE:
548    case OP_HSPACE:    case OP_HSPACE:
# Line 449  switch(*cc) Line 555  switch(*cc)
555    case OP_CIRCM:    case OP_CIRCM:
556    case OP_DOLL:    case OP_DOLL:
557    case OP_DOLLM:    case OP_DOLLM:
   case OP_TYPESTAR:  
   case OP_TYPEMINSTAR:  
   case OP_TYPEPLUS:  
   case OP_TYPEMINPLUS:  
   case OP_TYPEQUERY:  
   case OP_TYPEMINQUERY:  
   case OP_TYPEPOSSTAR:  
   case OP_TYPEPOSPLUS:  
   case OP_TYPEPOSQUERY:  
558    case OP_CRSTAR:    case OP_CRSTAR:
559    case OP_CRMINSTAR:    case OP_CRMINSTAR:
560    case OP_CRPLUS:    case OP_CRPLUS:
561    case OP_CRMINPLUS:    case OP_CRMINPLUS:
562    case OP_CRQUERY:    case OP_CRQUERY:
563    case OP_CRMINQUERY:    case OP_CRMINQUERY:
564      case OP_CRRANGE:
565      case OP_CRMINRANGE:
566      case OP_CLASS:
567      case OP_NCLASS:
568      case OP_REF:
569      case OP_REFI:
570      case OP_RECURSE:
571      case OP_CALLOUT:
572      case OP_ALT:
573      case OP_KET:
574      case OP_KETRMAX:
575      case OP_KETRMIN:
576      case OP_KETRPOS:
577      case OP_REVERSE:
578      case OP_ASSERT:
579      case OP_ASSERT_NOT:
580      case OP_ASSERTBACK:
581      case OP_ASSERTBACK_NOT:
582      case OP_ONCE:
583      case OP_ONCE_NC:
584      case OP_BRA:
585      case OP_BRAPOS:
586      case OP_CBRA:
587      case OP_CBRAPOS:
588      case OP_COND:
589      case OP_SBRA:
590      case OP_SBRAPOS:
591      case OP_SCBRA:
592      case OP_SCBRAPOS:
593      case OP_SCOND:
594      case OP_CREF:
595      case OP_NCREF:
596      case OP_RREF:
597      case OP_NRREF:
598    case OP_DEF:    case OP_DEF:
599    case OP_BRAZERO:    case OP_BRAZERO:
600    case OP_BRAMINZERO:    case OP_BRAMINZERO:
601    case OP_BRAPOSZERO:    case OP_BRAPOSZERO:
602      case OP_PRUNE:
603      case OP_SKIP:
604      case OP_COMMIT:
605    case OP_FAIL:    case OP_FAIL:
606    case OP_ACCEPT:    case OP_ACCEPT:
607    case OP_ASSERT_ACCEPT:    case OP_ASSERT_ACCEPT:
608      case OP_CLOSE:
609    case OP_SKIPZERO:    case OP_SKIPZERO:
610    return cc + 1;    return cc + PRIV(OP_lengths)[*cc];
   
   case OP_ANYBYTE:  
 #ifdef SUPPORT_UTF8  
   if (common->utf8) return NULL;  
 #endif  
   return cc + 1;  
611    
612    case OP_CHAR:    case OP_CHAR:
613    case OP_CHARI:    case OP_CHARI:
614    case OP_NOT:    case OP_NOT:
615    case OP_NOTI:    case OP_NOTI:
   
616    case OP_STAR:    case OP_STAR:
617    case OP_MINSTAR:    case OP_MINSTAR:
618    case OP_PLUS:    case OP_PLUS:
619    case OP_MINPLUS:    case OP_MINPLUS:
620    case OP_QUERY:    case OP_QUERY:
621    case OP_MINQUERY:    case OP_MINQUERY:
622      case OP_UPTO:
623      case OP_MINUPTO:
624      case OP_EXACT:
625    case OP_POSSTAR:    case OP_POSSTAR:
626    case OP_POSPLUS:    case OP_POSPLUS:
627    case OP_POSQUERY:    case OP_POSQUERY:
628      case OP_POSUPTO:
629    case OP_STARI:    case OP_STARI:
630    case OP_MINSTARI:    case OP_MINSTARI:
631    case OP_PLUSI:    case OP_PLUSI:
632    case OP_MINPLUSI:    case OP_MINPLUSI:
633    case OP_QUERYI:    case OP_QUERYI:
634    case OP_MINQUERYI:    case OP_MINQUERYI:
635      case OP_UPTOI:
636      case OP_MINUPTOI:
637      case OP_EXACTI:
638    case OP_POSSTARI:    case OP_POSSTARI:
639    case OP_POSPLUSI:    case OP_POSPLUSI:
640    case OP_POSQUERYI:    case OP_POSQUERYI:
641      case OP_POSUPTOI:
642    case OP_NOTSTAR:    case OP_NOTSTAR:
643    case OP_NOTMINSTAR:    case OP_NOTMINSTAR:
644    case OP_NOTPLUS:    case OP_NOTPLUS:
645    case OP_NOTMINPLUS:    case OP_NOTMINPLUS:
646    case OP_NOTQUERY:    case OP_NOTQUERY:
647    case OP_NOTMINQUERY:    case OP_NOTMINQUERY:
648      case OP_NOTUPTO:
649      case OP_NOTMINUPTO:
650      case OP_NOTEXACT:
651    case OP_NOTPOSSTAR:    case OP_NOTPOSSTAR:
652    case OP_NOTPOSPLUS:    case OP_NOTPOSPLUS:
653    case OP_NOTPOSQUERY:    case OP_NOTPOSQUERY:
654      case OP_NOTPOSUPTO:
655    case OP_NOTSTARI:    case OP_NOTSTARI:
656    case OP_NOTMINSTARI:    case OP_NOTMINSTARI:
657    case OP_NOTPLUSI:    case OP_NOTPLUSI:
658    case OP_NOTMINPLUSI:    case OP_NOTMINPLUSI:
659    case OP_NOTQUERYI:    case OP_NOTQUERYI:
660    case OP_NOTMINQUERYI:    case OP_NOTMINQUERYI:
   case OP_NOTPOSSTARI:  
   case OP_NOTPOSPLUSI:  
   case OP_NOTPOSQUERYI:  
   cc += 2;  
 #ifdef SUPPORT_UTF8  
   if (common->utf8 && cc[-1] >= 0xc0) cc += _pcre_utf8_table4[cc[-1] & 0x3f];  
 #endif  
   return cc;  
   
   case OP_UPTO:  
   case OP_MINUPTO:  
   case OP_EXACT:  
   case OP_POSUPTO:  
   case OP_UPTOI:  
   case OP_MINUPTOI:  
   case OP_EXACTI:  
   case OP_POSUPTOI:  
   case OP_NOTUPTO:  
   case OP_NOTMINUPTO:  
   case OP_NOTEXACT:  
   case OP_NOTPOSUPTO:  
661    case OP_NOTUPTOI:    case OP_NOTUPTOI:
662    case OP_NOTMINUPTOI:    case OP_NOTMINUPTOI:
663    case OP_NOTEXACTI:    case OP_NOTEXACTI:
664      case OP_NOTPOSSTARI:
665      case OP_NOTPOSPLUSI:
666      case OP_NOTPOSQUERYI:
667    case OP_NOTPOSUPTOI:    case OP_NOTPOSUPTOI:
668    cc += 4;    cc += PRIV(OP_lengths)[*cc];
669  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
670    if (common->utf8 && cc[-1] >= 0xc0) cc += _pcre_utf8_table4[cc[-1] & 0x3f];    if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
671  #endif  #endif
672    return cc;    return cc;
673    
674    case OP_NOTPROP:    /* Special cases. */
675    case OP_PROP:    case OP_TYPESTAR:
676      case OP_TYPEMINSTAR:
677      case OP_TYPEPLUS:
678      case OP_TYPEMINPLUS:
679      case OP_TYPEQUERY:
680      case OP_TYPEMINQUERY:
681    case OP_TYPEUPTO:    case OP_TYPEUPTO:
682    case OP_TYPEMINUPTO:    case OP_TYPEMINUPTO:
683    case OP_TYPEEXACT:    case OP_TYPEEXACT:
684      case OP_TYPEPOSSTAR:
685      case OP_TYPEPOSPLUS:
686      case OP_TYPEPOSQUERY:
687    case OP_TYPEPOSUPTO:    case OP_TYPEPOSUPTO:
688    case OP_REF:    return cc + PRIV(OP_lengths)[*cc] - 1;
   case OP_REFI:  
   case OP_CREF:  
   case OP_NCREF:  
   case OP_RREF:  
   case OP_NRREF:  
   case OP_CLOSE:  
   cc += 3;  
   return cc;  
689    
690    case OP_CRRANGE:    case OP_ANYBYTE:
691    case OP_CRMINRANGE:  #ifdef SUPPORT_UTF
692    return cc + 5;    if (common->utf) return NULL;
693    #endif
694    case OP_CLASS:    return cc + 1;
   case OP_NCLASS:  
   return cc + 33;  
695    
696  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
697    case OP_XCLASS:    case OP_XCLASS:
698    return cc + GET(cc, 1);    return cc + GET(cc, 1);
699  #endif  #endif
700    
701    case OP_RECURSE:    case OP_MARK:
702    case OP_ASSERT:    case OP_PRUNE_ARG:
703    case OP_ASSERT_NOT:    case OP_SKIP_ARG:
704    case OP_ASSERTBACK:    return cc + 1 + 2 + cc[1];
   case OP_ASSERTBACK_NOT:  
   case OP_REVERSE:  
   case OP_ONCE:  
   case OP_ONCE_NC:  
   case OP_BRA:  
   case OP_BRAPOS:  
   case OP_COND:  
   case OP_SBRA:  
   case OP_SBRAPOS:  
   case OP_SCOND:  
   case OP_ALT:  
   case OP_KET:  
   case OP_KETRMAX:  
   case OP_KETRMIN:  
   case OP_KETRPOS:  
   return cc + 1 + LINK_SIZE;  
   
   case OP_CBRA:  
   case OP_CBRAPOS:  
   case OP_SCBRA:  
   case OP_SCBRAPOS:  
   return cc + 1 + LINK_SIZE + 2;  
705    
706    default:    default:
707    return NULL;    return NULL;
708    }    }
709  }  }
710    
711  static int get_localspace(compiler_common *common, uschar *cc, uschar *ccend)  #define CASE_ITERATOR_PRIVATE_DATA_1 \
712        case OP_MINSTAR: \
713        case OP_MINPLUS: \
714        case OP_QUERY: \
715        case OP_MINQUERY: \
716        case OP_MINSTARI: \
717        case OP_MINPLUSI: \
718        case OP_QUERYI: \
719        case OP_MINQUERYI: \
720        case OP_NOTMINSTAR: \
721        case OP_NOTMINPLUS: \
722        case OP_NOTQUERY: \
723        case OP_NOTMINQUERY: \
724        case OP_NOTMINSTARI: \
725        case OP_NOTMINPLUSI: \
726        case OP_NOTQUERYI: \
727        case OP_NOTMINQUERYI:
728    
729    #define CASE_ITERATOR_PRIVATE_DATA_2A \
730        case OP_STAR: \
731        case OP_PLUS: \
732        case OP_STARI: \
733        case OP_PLUSI: \
734        case OP_NOTSTAR: \
735        case OP_NOTPLUS: \
736        case OP_NOTSTARI: \
737        case OP_NOTPLUSI:
738    
739    #define CASE_ITERATOR_PRIVATE_DATA_2B \
740        case OP_UPTO: \
741        case OP_MINUPTO: \
742        case OP_UPTOI: \
743        case OP_MINUPTOI: \
744        case OP_NOTUPTO: \
745        case OP_NOTMINUPTO: \
746        case OP_NOTUPTOI: \
747        case OP_NOTMINUPTOI:
748    
749    #define CASE_ITERATOR_TYPE_PRIVATE_DATA_1 \
750        case OP_TYPEMINSTAR: \
751        case OP_TYPEMINPLUS: \
752        case OP_TYPEQUERY: \
753        case OP_TYPEMINQUERY:
754    
755    #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2A \
756        case OP_TYPESTAR: \
757        case OP_TYPEPLUS:
758    
759    #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2B \
760        case OP_TYPEUPTO: \
761        case OP_TYPEMINUPTO:
762    
763    static int get_class_iterator_size(pcre_uchar *cc)
764    {
765    switch(*cc)
766      {
767      case OP_CRSTAR:
768      case OP_CRPLUS:
769      return 2;
770    
771      case OP_CRMINSTAR:
772      case OP_CRMINPLUS:
773      case OP_CRQUERY:
774      case OP_CRMINQUERY:
775      return 1;
776    
777      case OP_CRRANGE:
778      case OP_CRMINRANGE:
779      if (GET2(cc, 1) == GET2(cc, 1 + IMM2_SIZE))
780        return 0;
781      return 2;
782    
783      default:
784      return 0;
785      }
786    }
787    
788    static int get_private_data_length(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
789  {  {
790  int localspace = 0;  int private_data_length = 0;
791  uschar *alternative;  pcre_uchar *alternative;
792    pcre_uchar *name;
793    pcre_uchar *end = NULL;
794    int space, size, i;
795    pcre_uint32 bracketlen;
796    
797  /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */  /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
798  while (cc < ccend)  while (cc < ccend)
799    {    {
800      space = 0;
801      size = 0;
802      bracketlen = 0;
803    switch(*cc)    switch(*cc)
804      {      {
805        case OP_SET_SOM:
806        common->has_set_som = TRUE;
807        cc += 1;
808        break;
809    
810        case OP_REF:
811        case OP_REFI:
812        common->optimized_cbracket[GET2(cc, 1)] = 0;
813        cc += 1 + IMM2_SIZE;
814        break;
815    
816      case OP_ASSERT:      case OP_ASSERT:
817      case OP_ASSERT_NOT:      case OP_ASSERT_NOT:
818      case OP_ASSERTBACK:      case OP_ASSERTBACK:
# Line 628  while (cc < ccend) Line 822  while (cc < ccend)
822      case OP_BRAPOS:      case OP_BRAPOS:
823      case OP_SBRA:      case OP_SBRA:
824      case OP_SBRAPOS:      case OP_SBRAPOS:
825      case OP_SCOND:      private_data_length += sizeof(sljit_sw);
826      localspace += sizeof(sljit_w);      bracketlen = 1 + LINK_SIZE;
     cc += 1 + LINK_SIZE;  
827      break;      break;
828    
829      case OP_CBRAPOS:      case OP_CBRAPOS:
830      case OP_SCBRAPOS:      case OP_SCBRAPOS:
831      localspace += sizeof(sljit_w);      private_data_length += sizeof(sljit_sw);
832      cc += 1 + LINK_SIZE + 2;      common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] = 0;
833        bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
834      break;      break;
835    
836      case OP_COND:      case OP_COND:
837      /* Might be a hidden SCOND. */      case OP_SCOND:
838      alternative = cc + GET(cc, 1);      /* Only AUTO_CALLOUT can insert this opcode. We do
839      if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)         not intend to support this case. */
840        localspace += sizeof(sljit_w);      if (cc[1 + LINK_SIZE] == OP_CALLOUT)
841          return -1;
842    
843        if (*cc == OP_COND)
844          {
845          /* Might be a hidden SCOND. */
846          alternative = cc + GET(cc, 1);
847          if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
848            private_data_length += sizeof(sljit_sw);
849          }
850        else
851          private_data_length += sizeof(sljit_sw);
852        bracketlen = 1 + LINK_SIZE;
853        break;
854    
855        case OP_CREF:
856        i = GET2(cc, 1);
857        common->optimized_cbracket[i] = 0;
858        cc += 1 + IMM2_SIZE;
859        break;
860    
861        case OP_NCREF:
862        bracketlen = GET2(cc, 1);
863        name = (pcre_uchar *)common->name_table;
864        alternative = name;
865        for (i = 0; i < common->name_count; i++)
866          {
867          if (GET2(name, 0) == bracketlen) break;
868          name += common->name_entry_size;
869          }
870        SLJIT_ASSERT(i != common->name_count);
871    
872        for (i = 0; i < common->name_count; i++)
873          {
874          if (STRCMP_UC_UC(alternative + IMM2_SIZE, name + IMM2_SIZE) == 0)
875            common->optimized_cbracket[GET2(alternative, 0)] = 0;
876          alternative += common->name_entry_size;
877          }
878        bracketlen = 0;
879        cc += 1 + IMM2_SIZE;
880        break;
881    
882        case OP_BRA:
883        bracketlen = 1 + LINK_SIZE;
884        break;
885    
886        case OP_CBRA:
887        case OP_SCBRA:
888        bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
889        break;
890    
891        CASE_ITERATOR_PRIVATE_DATA_1
892        space = 1;
893        size = -2;
894        break;
895    
896        CASE_ITERATOR_PRIVATE_DATA_2A
897        space = 2;
898        size = -2;
899        break;
900    
901        CASE_ITERATOR_PRIVATE_DATA_2B
902        space = 2;
903        size = -(2 + IMM2_SIZE);
904        break;
905    
906        CASE_ITERATOR_TYPE_PRIVATE_DATA_1
907        space = 1;
908        size = 1;
909        break;
910    
911        CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
912        if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
913          space = 2;
914        size = 1;
915        break;
916    
917        CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
918        if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
919          space = 2;
920        size = 1 + IMM2_SIZE;
921        break;
922    
923        case OP_CLASS:
924        case OP_NCLASS:
925        size += 1 + 32 / sizeof(pcre_uchar);
926        space = get_class_iterator_size(cc + size);
927        break;
928    
929    #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
930        case OP_XCLASS:
931        size = GET(cc, 1);
932        space = get_class_iterator_size(cc + size);
933        break;
934    #endif
935    
936        case OP_RECURSE:
937        /* Set its value only once. */
938        if (common->recursive_head_ptr == 0)
939          {
940          common->recursive_head_ptr = common->ovector_start;
941          common->ovector_start += sizeof(sljit_sw);
942          }
943      cc += 1 + LINK_SIZE;      cc += 1 + LINK_SIZE;
944      break;      break;
945    
946        case OP_CALLOUT:
947        if (common->capture_last_ptr == 0)
948          {
949          common->capture_last_ptr = common->ovector_start;
950          common->ovector_start += sizeof(sljit_sw);
951          }
952        cc += 2 + 2 * LINK_SIZE;
953        break;
954    
955        case OP_PRUNE_ARG:
956        common->needs_start_ptr = TRUE;
957        common->control_head_ptr = 1;
958        /* Fall through. */
959    
960        case OP_MARK:
961        if (common->mark_ptr == 0)
962          {
963          common->mark_ptr = common->ovector_start;
964          common->ovector_start += sizeof(sljit_sw);
965          }
966        cc += 1 + 2 + cc[1];
967        break;
968    
969        case OP_PRUNE:
970        case OP_SKIP:
971        common->needs_start_ptr = TRUE;
972        common->control_head_ptr = 1;
973        cc += 1;
974        break;
975    
976        case OP_SKIP_ARG:
977        common->control_head_ptr = 1;
978        common->has_skip_arg = TRUE;
979        cc += 1 + 2 + cc[1];
980        break;
981    
982      default:      default:
983      cc = next_opcode(common, cc);      cc = next_opcode(common, cc);
984      if (cc == NULL)      if (cc == NULL)
985        return -1;        return -1;
986      break;      break;
987      }      }
988    
989      if (space > 0 && cc >= end)
990        private_data_length += sizeof(sljit_sw) * space;
991    
992      if (size != 0)
993        {
994        if (size < 0)
995          {
996          cc += -size;
997    #ifdef SUPPORT_UTF
998          if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
999    #endif
1000          }
1001        else
1002          cc += size;
1003        }
1004    
1005      if (bracketlen != 0)
1006        {
1007        if (cc >= end)
1008          {
1009          end = bracketend(cc);
1010          if (end[-1 - LINK_SIZE] == OP_KET)
1011            end = NULL;
1012          }
1013        cc += bracketlen;
1014        }
1015    }    }
1016  return localspace;  return private_data_length;
1017  }  }
1018    
1019  static void set_localptrs(compiler_common *common, int localptr, uschar *ccend)  static void set_private_data_ptrs(compiler_common *common, int private_data_ptr, pcre_uchar *ccend)
1020  {  {
1021  uschar *cc = common->start;  pcre_uchar *cc = common->start;
1022  uschar *alternative;  pcre_uchar *alternative;
1023    pcre_uchar *end = NULL;
1024    int space, size, bracketlen;
1025    
1026  while (cc < ccend)  while (cc < ccend)
1027    {    {
1028      space = 0;
1029      size = 0;
1030      bracketlen = 0;
1031    switch(*cc)    switch(*cc)
1032      {      {
1033      case OP_ASSERT:      case OP_ASSERT:
# Line 675  while (cc < ccend) Line 1040  while (cc < ccend)
1040      case OP_SBRA:      case OP_SBRA:
1041      case OP_SBRAPOS:      case OP_SBRAPOS:
1042      case OP_SCOND:      case OP_SCOND:
1043      common->localptrs[cc - common->start] = localptr;      common->private_data_ptrs[cc - common->start] = private_data_ptr;
1044      localptr += sizeof(sljit_w);      private_data_ptr += sizeof(sljit_sw);
1045      cc += 1 + LINK_SIZE;      bracketlen = 1 + LINK_SIZE;
1046      break;      break;
1047    
1048      case OP_CBRAPOS:      case OP_CBRAPOS:
1049      case OP_SCBRAPOS:      case OP_SCBRAPOS:
1050      common->localptrs[cc - common->start] = localptr;      common->private_data_ptrs[cc - common->start] = private_data_ptr;
1051      localptr += sizeof(sljit_w);      private_data_ptr += sizeof(sljit_sw);
1052      cc += 1 + LINK_SIZE + 2;      bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1053      break;      break;
1054    
1055      case OP_COND:      case OP_COND:
# Line 692  while (cc < ccend) Line 1057  while (cc < ccend)
1057      alternative = cc + GET(cc, 1);      alternative = cc + GET(cc, 1);
1058      if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)      if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1059        {        {
1060        common->localptrs[cc - common->start] = localptr;        common->private_data_ptrs[cc - common->start] = private_data_ptr;
1061        localptr += sizeof(sljit_w);        private_data_ptr += sizeof(sljit_sw);
1062        }        }
1063      cc += 1 + LINK_SIZE;      bracketlen = 1 + LINK_SIZE;
1064        break;
1065    
1066        case OP_BRA:
1067        bracketlen = 1 + LINK_SIZE;
1068        break;
1069    
1070        case OP_CBRA:
1071        case OP_SCBRA:
1072        bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1073        break;
1074    
1075        CASE_ITERATOR_PRIVATE_DATA_1
1076        space = 1;
1077        size = -2;
1078        break;
1079    
1080        CASE_ITERATOR_PRIVATE_DATA_2A
1081        space = 2;
1082        size = -2;
1083        break;
1084    
1085        CASE_ITERATOR_PRIVATE_DATA_2B
1086        space = 2;
1087        size = -(2 + IMM2_SIZE);
1088      break;      break;
1089    
1090        CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1091        space = 1;
1092        size = 1;
1093        break;
1094    
1095        CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1096        if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
1097          space = 2;
1098        size = 1;
1099        break;
1100    
1101        CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1102        if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
1103          space = 2;
1104        size = 1 + IMM2_SIZE;
1105        break;
1106    
1107        case OP_CLASS:
1108        case OP_NCLASS:
1109        size += 1 + 32 / sizeof(pcre_uchar);
1110        space = get_class_iterator_size(cc + size);
1111        break;
1112    
1113    #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1114        case OP_XCLASS:
1115        size = GET(cc, 1);
1116        space = get_class_iterator_size(cc + size);
1117        break;
1118    #endif
1119    
1120      default:      default:
1121      cc = next_opcode(common, cc);      cc = next_opcode(common, cc);
1122      SLJIT_ASSERT(cc != NULL);      SLJIT_ASSERT(cc != NULL);
1123      break;      break;
1124      }      }
1125    
1126      if (space > 0 && cc >= end)
1127        {
1128        common->private_data_ptrs[cc - common->start] = private_data_ptr;
1129        private_data_ptr += sizeof(sljit_sw) * space;
1130        }
1131    
1132      if (size != 0)
1133        {
1134        if (size < 0)
1135          {
1136          cc += -size;
1137    #ifdef SUPPORT_UTF
1138          if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1139    #endif
1140          }
1141        else
1142          cc += size;
1143        }
1144    
1145      if (bracketlen > 0)
1146        {
1147        if (cc >= end)
1148          {
1149          end = bracketend(cc);
1150          if (end[-1 - LINK_SIZE] == OP_KET)
1151            end = NULL;
1152          }
1153        cc += bracketlen;
1154        }
1155    }    }
1156  }  }
1157    
1158  /* Returns with -1 if no need for frame. */  /* Returns with a frame_types (always < 0) if no need for frame. */
1159  static int get_framesize(compiler_common *common, uschar *cc, BOOL recursive)  static int get_framesize(compiler_common *common, pcre_uchar *cc, BOOL recursive, BOOL* needs_control_head)
1160  {  {
1161  uschar *ccend = bracketend(cc);  pcre_uchar *ccend = bracketend(cc) - (1 + LINK_SIZE);
1162  int length = 0;  int length = 0;
1163  BOOL possessive = FALSE;  int possessive = 0;
1164  BOOL setsom_found = FALSE;  BOOL stack_restore = FALSE;
1165    BOOL setsom_found = recursive;
1166    BOOL setmark_found = recursive;
1167    /* The last capture is a local variable even for recursions. */
1168    BOOL capture_last_found = FALSE;
1169    
1170    #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
1171    SLJIT_ASSERT(common->control_head_ptr != 0);
1172    *needs_control_head = TRUE;
1173    #else
1174    *needs_control_head = FALSE;
1175    #endif
1176    
1177  if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))  if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))
1178    {    {
1179    length = 3;    possessive = length = (common->capture_last_ptr != 0) ? 5 : 3;
1180    possessive = TRUE;    /* This is correct regardless of common->capture_last_ptr. */
1181      capture_last_found = TRUE;
1182    }    }
1183    
1184  cc = next_opcode(common, cc);  cc = next_opcode(common, cc);
1185  SLJIT_ASSERT(cc != NULL);  SLJIT_ASSERT(cc != NULL);
1186  while (cc < ccend)  while (cc < ccend)
1187    switch(*cc)    switch(*cc)
1188      {      {
1189      case OP_SET_SOM:      case OP_SET_SOM:
1190      case OP_RECURSE:      SLJIT_ASSERT(common->has_set_som);
1191      if (!setsom_found)      stack_restore = TRUE;
1192        {      if (!setsom_found)
1193        length += 2;        {
1194        setsom_found = TRUE;        length += 2;
1195        }        setsom_found = TRUE;
1196      cc += (*cc == OP_SET_SOM) ? 1 : 1 + LINK_SIZE;        }
1197      break;      cc += 1;
1198        break;
1199    
1200        case OP_MARK:
1201        case OP_PRUNE_ARG:
1202        SLJIT_ASSERT(common->mark_ptr != 0);
1203        stack_restore = TRUE;
1204        if (!setmark_found)
1205          {
1206          length += 2;
1207          setmark_found = TRUE;
1208          }
1209        if (common->control_head_ptr != 0)
1210          *needs_control_head = TRUE;
1211        cc += 1 + 2 + cc[1];
1212        break;
1213    
1214        case OP_RECURSE:
1215        stack_restore = TRUE;
1216        if (common->has_set_som && !setsom_found)
1217          {
1218          length += 2;
1219          setsom_found = TRUE;
1220          }
1221        if (common->mark_ptr != 0 && !setmark_found)
1222          {
1223          length += 2;
1224          setmark_found = TRUE;
1225          }
1226        if (common->capture_last_ptr != 0 && !capture_last_found)
1227          {
1228          length += 2;
1229          capture_last_found = TRUE;
1230          }
1231        cc += 1 + LINK_SIZE;
1232        break;
1233    
1234        case OP_CBRA:
1235        case OP_CBRAPOS:
1236        case OP_SCBRA:
1237        case OP_SCBRAPOS:
1238        stack_restore = TRUE;
1239        if (common->capture_last_ptr != 0 && !capture_last_found)
1240          {
1241          length += 2;
1242          capture_last_found = TRUE;
1243          }
1244        length += 3;
1245        cc += 1 + LINK_SIZE + IMM2_SIZE;
1246        break;
1247    
1248        case OP_PRUNE:
1249        case OP_SKIP:
1250        case OP_SKIP_ARG:
1251        case OP_COMMIT:
1252        if (common->control_head_ptr != 0)
1253          *needs_control_head = TRUE;
1254        /* Fall through. */
1255    
1256        default:
1257        stack_restore = TRUE;
1258        /* Fall through. */
1259    
1260        case OP_NOT_WORD_BOUNDARY:
1261        case OP_WORD_BOUNDARY:
1262        case OP_NOT_DIGIT:
1263        case OP_DIGIT:
1264        case OP_NOT_WHITESPACE:
1265        case OP_WHITESPACE:
1266        case OP_NOT_WORDCHAR:
1267        case OP_WORDCHAR:
1268        case OP_ANY:
1269        case OP_ALLANY:
1270        case OP_ANYBYTE:
1271        case OP_NOTPROP:
1272        case OP_PROP:
1273        case OP_ANYNL:
1274        case OP_NOT_HSPACE:
1275        case OP_HSPACE:
1276        case OP_NOT_VSPACE:
1277        case OP_VSPACE:
1278        case OP_EXTUNI:
1279        case OP_EODN:
1280        case OP_EOD:
1281        case OP_CIRC:
1282        case OP_CIRCM:
1283        case OP_DOLL:
1284        case OP_DOLLM:
1285        case OP_CHAR:
1286        case OP_CHARI:
1287        case OP_NOT:
1288        case OP_NOTI:
1289    
1290        case OP_EXACT:
1291        case OP_POSSTAR:
1292        case OP_POSPLUS:
1293        case OP_POSQUERY:
1294        case OP_POSUPTO:
1295    
1296        case OP_EXACTI:
1297        case OP_POSSTARI:
1298        case OP_POSPLUSI:
1299        case OP_POSQUERYI:
1300        case OP_POSUPTOI:
1301    
1302        case OP_NOTEXACT:
1303        case OP_NOTPOSSTAR:
1304        case OP_NOTPOSPLUS:
1305        case OP_NOTPOSQUERY:
1306        case OP_NOTPOSUPTO:
1307    
1308        case OP_NOTEXACTI:
1309        case OP_NOTPOSSTARI:
1310        case OP_NOTPOSPLUSI:
1311        case OP_NOTPOSQUERYI:
1312        case OP_NOTPOSUPTOI:
1313    
1314        case OP_TYPEEXACT:
1315        case OP_TYPEPOSSTAR:
1316        case OP_TYPEPOSPLUS:
1317        case OP_TYPEPOSQUERY:
1318        case OP_TYPEPOSUPTO:
1319    
1320      case OP_CBRA:      case OP_CLASS:
1321      case OP_CBRAPOS:      case OP_NCLASS:
1322      case OP_SCBRA:      case OP_XCLASS:
     case OP_SCBRAPOS:  
     length += 3;  
     cc += 1 + LINK_SIZE + 2;  
     break;  
1323    
     default:  
1324      cc = next_opcode(common, cc);      cc = next_opcode(common, cc);
1325      SLJIT_ASSERT(cc != NULL);      SLJIT_ASSERT(cc != NULL);
1326      break;      break;
1327      }      }
1328    
1329  /* Possessive quantifiers can use a special case. */  /* Possessive quantifiers can use a special case. */
1330  if (SLJIT_UNLIKELY(possessive) && length == 3)  if (SLJIT_UNLIKELY(possessive == length))
1331    return -1;    return stack_restore ? no_frame : no_stack;
1332    
1333  if (length > 0)  if (length > 0)
1334    return length + 1;    return length + 1;
1335  return -1;  return stack_restore ? no_frame : no_stack;
1336  }  }
1337    
1338  static void init_frame(compiler_common *common, uschar *cc, int stackpos, int stacktop, BOOL recursive)  static void init_frame(compiler_common *common, pcre_uchar *cc, int stackpos, int stacktop, BOOL recursive)
1339  {  {
1340  DEFINE_COMPILER;  DEFINE_COMPILER;
1341  uschar *ccend = bracketend(cc);  pcre_uchar *ccend = bracketend(cc) - (1 + LINK_SIZE);
1342  BOOL setsom_found = FALSE;  BOOL setsom_found = recursive;
1343    BOOL setmark_found = recursive;
1344    /* The last capture is a local variable even for recursions. */
1345    BOOL capture_last_found = FALSE;
1346  int offset;  int offset;
1347    
1348  /* >= 1 + shortest item size (2) */  /* >= 1 + shortest item size (2) */
1349    SLJIT_UNUSED_ARG(stacktop);
1350  SLJIT_ASSERT(stackpos >= stacktop + 2);  SLJIT_ASSERT(stackpos >= stacktop + 2);
1351    
1352  stackpos = STACK(stackpos);  stackpos = STACK(stackpos);
# Line 776  while (cc < ccend) Line 1357  while (cc < ccend)
1357    switch(*cc)    switch(*cc)
1358      {      {
1359      case OP_SET_SOM:      case OP_SET_SOM:
1360      case OP_RECURSE:      SLJIT_ASSERT(common->has_set_som);
1361      if (!setsom_found)      if (!setsom_found)
1362        {        {
1363        OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));        OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
1364        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_setstrbegin);        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1365        stackpos += (int)sizeof(sljit_w);        stackpos += (int)sizeof(sljit_sw);
1366          OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1367          stackpos += (int)sizeof(sljit_sw);
1368          setsom_found = TRUE;
1369          }
1370        cc += 1;
1371        break;
1372    
1373        case OP_MARK:
1374        case OP_PRUNE_ARG:
1375        SLJIT_ASSERT(common->mark_ptr != 0);
1376        if (!setmark_found)
1377          {
1378          OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
1379          OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1380          stackpos += (int)sizeof(sljit_sw);
1381          OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1382          stackpos += (int)sizeof(sljit_sw);
1383          setmark_found = TRUE;
1384          }
1385        cc += 1 + 2 + cc[1];
1386        break;
1387    
1388        case OP_RECURSE:
1389        if (common->has_set_som && !setsom_found)
1390          {
1391          OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
1392          OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1393          stackpos += (int)sizeof(sljit_sw);
1394        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1395        stackpos += (int)sizeof(sljit_w);        stackpos += (int)sizeof(sljit_sw);
1396        setsom_found = TRUE;        setsom_found = TRUE;
1397        }        }
1398      cc += (*cc == OP_SET_SOM) ? 1 : 1 + LINK_SIZE;      if (common->mark_ptr != 0 && !setmark_found)
1399          {
1400          OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
1401          OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1402          stackpos += (int)sizeof(sljit_sw);
1403          OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1404          stackpos += (int)sizeof(sljit_sw);
1405          setmark_found = TRUE;
1406          }
1407        if (common->capture_last_ptr != 0 && !capture_last_found)
1408          {
1409          OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
1410          OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1411          stackpos += (int)sizeof(sljit_sw);
1412          OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1413          stackpos += (int)sizeof(sljit_sw);
1414          capture_last_found = TRUE;
1415          }
1416        cc += 1 + LINK_SIZE;
1417      break;      break;
1418    
1419      case OP_CBRA:      case OP_CBRA:
1420      case OP_CBRAPOS:      case OP_CBRAPOS:
1421      case OP_SCBRA:      case OP_SCBRA:
1422      case OP_SCBRAPOS:      case OP_SCBRAPOS:
1423        if (common->capture_last_ptr != 0 && !capture_last_found)
1424          {
1425          OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
1426          OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1427          stackpos += (int)sizeof(sljit_sw);
1428          OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1429          stackpos += (int)sizeof(sljit_sw);
1430          capture_last_found = TRUE;
1431          }
1432      offset = (GET2(cc, 1 + LINK_SIZE)) << 1;      offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
1433      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));
1434      stackpos += (int)sizeof(sljit_w);      stackpos += (int)sizeof(sljit_sw);
1435      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
1436      OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));      OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
1437      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1438      stackpos += (int)sizeof(sljit_w);      stackpos += (int)sizeof(sljit_sw);
1439      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
1440      stackpos += (int)sizeof(sljit_w);      stackpos += (int)sizeof(sljit_sw);
1441    
1442      cc += 1 + LINK_SIZE + 2;      cc += 1 + LINK_SIZE + IMM2_SIZE;
1443      break;      break;
1444    
1445      default:      default:
# Line 812  while (cc < ccend) Line 1448  while (cc < ccend)
1448      break;      break;
1449      }      }
1450    
1451  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_end);  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, 0);
1452  SLJIT_ASSERT(stackpos == STACK(stacktop));  SLJIT_ASSERT(stackpos == STACK(stacktop));
1453  }  }
1454    
1455  static SLJIT_INLINE int get_localsize(compiler_common *common, uschar *cc, uschar *ccend)  static SLJIT_INLINE int get_private_data_copy_length(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL needs_control_head)
1456  {  {
1457  int localsize = 2;  int private_data_length = needs_control_head ? 3 : 2;
1458  uschar *alternative;  int size;
1459  /* Calculate the sum of the local variables. */  pcre_uchar *alternative;
1460    /* Calculate the sum of the private machine words. */
1461  while (cc < ccend)  while (cc < ccend)
1462    {    {
1463      size = 0;
1464    switch(*cc)    switch(*cc)
1465      {      {
1466      case OP_ASSERT:      case OP_ASSERT:
# Line 835  while (cc < ccend) Line 1473  while (cc < ccend)
1473      case OP_SBRA:      case OP_SBRA:
1474      case OP_SBRAPOS:      case OP_SBRAPOS:
1475      case OP_SCOND:      case OP_SCOND:
1476      localsize++;      private_data_length++;
1477      cc += 1 + LINK_SIZE;      cc += 1 + LINK_SIZE;
1478      break;      break;
1479    
1480      case OP_CBRA:      case OP_CBRA:
1481      case OP_SCBRA:      case OP_SCBRA:
1482      localsize++;      if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1483      cc += 1 + LINK_SIZE + 2;        private_data_length++;
1484        cc += 1 + LINK_SIZE + IMM2_SIZE;
1485      break;      break;
1486    
1487      case OP_CBRAPOS:      case OP_CBRAPOS:
1488      case OP_SCBRAPOS:      case OP_SCBRAPOS:
1489      localsize += 2;      private_data_length += 2;
1490      cc += 1 + LINK_SIZE + 2;      cc += 1 + LINK_SIZE + IMM2_SIZE;
1491      break;      break;
1492    
1493      case OP_COND:      case OP_COND:
1494      /* Might be a hidden SCOND. */      /* Might be a hidden SCOND. */
1495      alternative = cc + GET(cc, 1);      alternative = cc + GET(cc, 1);
1496      if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)      if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1497        localsize++;        private_data_length++;
1498      cc += 1 + LINK_SIZE;      cc += 1 + LINK_SIZE;
1499      break;      break;
1500    
1501        CASE_ITERATOR_PRIVATE_DATA_1
1502        if (PRIVATE_DATA(cc))
1503          private_data_length++;
1504        cc += 2;
1505    #ifdef SUPPORT_UTF
1506        if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1507    #endif
1508        break;
1509    
1510        CASE_ITERATOR_PRIVATE_DATA_2A
1511        if (PRIVATE_DATA(cc))
1512          private_data_length += 2;
1513        cc += 2;
1514    #ifdef SUPPORT_UTF
1515        if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1516    #endif
1517        break;
1518    
1519        CASE_ITERATOR_PRIVATE_DATA_2B
1520        if (PRIVATE_DATA(cc))
1521          private_data_length += 2;
1522        cc += 2 + IMM2_SIZE;
1523    #ifdef SUPPORT_UTF
1524        if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1525    #endif
1526        break;
1527    
1528        CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1529        if (PRIVATE_DATA(cc))
1530          private_data_length++;
1531        cc += 1;
1532        break;
1533    
1534        CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1535        if (PRIVATE_DATA(cc))
1536          private_data_length += 2;
1537        cc += 1;
1538        break;
1539    
1540        CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1541        if (PRIVATE_DATA(cc))
1542          private_data_length += 2;
1543        cc += 1 + IMM2_SIZE;
1544        break;
1545    
1546        case OP_CLASS:
1547        case OP_NCLASS:
1548    #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1549        case OP_XCLASS:
1550        size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1551    #else
1552        size = 1 + 32 / (int)sizeof(pcre_uchar);
1553    #endif
1554        if (PRIVATE_DATA(cc))
1555          private_data_length += get_class_iterator_size(cc + size);
1556        cc += size;
1557        break;
1558    
1559      default:      default:
1560      cc = next_opcode(common, cc);      cc = next_opcode(common, cc);
1561      SLJIT_ASSERT(cc != NULL);      SLJIT_ASSERT(cc != NULL);
# Line 866  while (cc < ccend) Line 1563  while (cc < ccend)
1563      }      }
1564    }    }
1565  SLJIT_ASSERT(cc == ccend);  SLJIT_ASSERT(cc == ccend);
1566  return localsize;  return private_data_length;
1567  }  }
1568    
1569  static void copy_locals(compiler_common *common, uschar *cc, uschar *ccend,  static void copy_private_data(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend,
1570    BOOL save, int stackptr, int stacktop)    BOOL save, int stackptr, int stacktop, BOOL needs_control_head)
1571  {  {
1572  DEFINE_COMPILER;  DEFINE_COMPILER;
1573  int srcw[2];  int srcw[2];
1574  int count;  int count, size;
1575  BOOL tmp1next = TRUE;  BOOL tmp1next = TRUE;
1576  BOOL tmp1empty = TRUE;  BOOL tmp1empty = TRUE;
1577  BOOL tmp2empty = TRUE;  BOOL tmp2empty = TRUE;
1578  uschar *alternative;  pcre_uchar *alternative;
1579  enum {  enum {
1580    start,    start,
1581    loop,    loop,
# Line 891  stacktop = STACK(stacktop - 1); Line 1588  stacktop = STACK(stacktop - 1);
1588    
1589  if (!save)  if (!save)
1590    {    {
1591    stackptr += sizeof(sljit_w);    stackptr += (needs_control_head ? 2 : 1) * sizeof(sljit_sw);
1592    if (stackptr < stacktop)    if (stackptr < stacktop)
1593      {      {
1594      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1595      stackptr += sizeof(sljit_w);      stackptr += sizeof(sljit_sw);
1596      tmp1empty = FALSE;      tmp1empty = FALSE;
1597      }      }
1598    if (stackptr < stacktop)    if (stackptr < stacktop)
1599      {      {
1600      OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);      OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1601      stackptr += sizeof(sljit_w);      stackptr += sizeof(sljit_sw);
1602      tmp2empty = FALSE;      tmp2empty = FALSE;
1603      }      }
1604    /* The tmp1next must be TRUE in either way. */    /* The tmp1next must be TRUE in either way. */
1605    }    }
1606    
1607  while (status != end)  do
1608    {    {
1609    count = 0;    count = 0;
1610    switch(status)    switch(status)
1611      {      {
1612      case start:      case start:
1613      SLJIT_ASSERT(save);      SLJIT_ASSERT(save && common->recursive_head_ptr != 0);
1614      count = 1;      count = 1;
1615      srcw[0] = RECURSIVE_HEAD;      srcw[0] = common->recursive_head_ptr;
1616        if (needs_control_head)
1617          {
1618          SLJIT_ASSERT(common->control_head_ptr != 0);
1619          count = 2;
1620          srcw[1] = common->control_head_ptr;
1621          }
1622      status = loop;      status = loop;
1623      break;      break;
1624    
# Line 939  while (status != end) Line 1642  while (status != end)
1642        case OP_SBRAPOS:        case OP_SBRAPOS:
1643        case OP_SCOND:        case OP_SCOND:
1644        count = 1;        count = 1;
1645        srcw[0] = PRIV(cc);        srcw[0] = PRIVATE_DATA(cc);
1646        SLJIT_ASSERT(srcw[0] != 0);        SLJIT_ASSERT(srcw[0] != 0);
1647        cc += 1 + LINK_SIZE;        cc += 1 + LINK_SIZE;
1648        break;        break;
1649    
1650        case OP_CBRA:        case OP_CBRA:
1651        case OP_SCBRA:        case OP_SCBRA:
1652        count = 1;        if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1653        srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));          {
1654        cc += 1 + LINK_SIZE + 2;          count = 1;
1655            srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1656            }
1657          cc += 1 + LINK_SIZE + IMM2_SIZE;
1658        break;        break;
1659    
1660        case OP_CBRAPOS:        case OP_CBRAPOS:
1661        case OP_SCBRAPOS:        case OP_SCBRAPOS:
1662        count = 2;        count = 2;
1663          srcw[0] = PRIVATE_DATA(cc);
1664        srcw[1] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));        srcw[1] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1665        srcw[0] = PRIV(cc);        SLJIT_ASSERT(srcw[0] != 0 && srcw[1] != 0);
1666        SLJIT_ASSERT(srcw[0] != 0);        cc += 1 + LINK_SIZE + IMM2_SIZE;
       cc += 1 + LINK_SIZE + 2;  
1667        break;        break;
1668    
1669        case OP_COND:        case OP_COND:
# Line 966  while (status != end) Line 1672  while (status != end)
1672        if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)        if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1673          {          {
1674          count = 1;          count = 1;
1675          srcw[0] = PRIV(cc);          srcw[0] = PRIVATE_DATA(cc);
1676          SLJIT_ASSERT(srcw[0] != 0);          SLJIT_ASSERT(srcw[0] != 0);
1677          }          }
1678        cc += 1 + LINK_SIZE;        cc += 1 + LINK_SIZE;
1679        break;        break;
1680    
1681          CASE_ITERATOR_PRIVATE_DATA_1
1682          if (PRIVATE_DATA(cc))
1683            {
1684            count = 1;
1685            srcw[0] = PRIVATE_DATA(cc);
1686            }
1687          cc += 2;
1688    #ifdef SUPPORT_UTF
1689          if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1690    #endif
1691          break;
1692    
1693          CASE_ITERATOR_PRIVATE_DATA_2A
1694          if (PRIVATE_DATA(cc))
1695            {
1696            count = 2;
1697            srcw[0] = PRIVATE_DATA(cc);
1698            srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
1699            }
1700          cc += 2;
1701    #ifdef SUPPORT_UTF
1702          if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1703    #endif
1704          break;
1705    
1706          CASE_ITERATOR_PRIVATE_DATA_2B
1707          if (PRIVATE_DATA(cc))
1708            {
1709            count = 2;
1710            srcw[0] = PRIVATE_DATA(cc);
1711            srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
1712            }
1713          cc += 2 + IMM2_SIZE;
1714    #ifdef SUPPORT_UTF
1715          if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1716    #endif
1717          break;
1718    
1719          CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1720          if (PRIVATE_DATA(cc))
1721            {
1722            count = 1;
1723            srcw[0] = PRIVATE_DATA(cc);
1724            }
1725          cc += 1;
1726          break;
1727    
1728          CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1729          if (PRIVATE_DATA(cc))
1730            {
1731            count = 2;
1732            srcw[0] = PRIVATE_DATA(cc);
1733            srcw[1] = srcw[0] + sizeof(sljit_sw);
1734            }
1735          cc += 1;
1736          break;
1737    
1738          CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1739          if (PRIVATE_DATA(cc))
1740            {
1741            count = 2;
1742            srcw[0] = PRIVATE_DATA(cc);
1743            srcw[1] = srcw[0] + sizeof(sljit_sw);
1744            }
1745          cc += 1 + IMM2_SIZE;
1746          break;
1747    
1748          case OP_CLASS:
1749          case OP_NCLASS:
1750    #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1751          case OP_XCLASS:
1752          size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1753    #else
1754          size = 1 + 32 / (int)sizeof(pcre_uchar);
1755    #endif
1756          if (PRIVATE_DATA(cc))
1757            switch(get_class_iterator_size(cc + size))
1758              {
1759              case 1:
1760              count = 1;
1761              srcw[0] = PRIVATE_DATA(cc);
1762              break;
1763    
1764              case 2:
1765              count = 2;
1766              srcw[0] = PRIVATE_DATA(cc);
1767              srcw[1] = srcw[0] + sizeof(sljit_sw);
1768              break;
1769    
1770              default:
1771              SLJIT_ASSERT_STOP();
1772              break;
1773              }
1774          cc += size;
1775          break;
1776    
1777        default:        default:
1778        cc = next_opcode(common, cc);        cc = next_opcode(common, cc);
1779        SLJIT_ASSERT(cc != NULL);        SLJIT_ASSERT(cc != NULL);
# Line 994  while (status != end) Line 1796  while (status != end)
1796          if (!tmp1empty)          if (!tmp1empty)
1797            {            {
1798            OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);            OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1799            stackptr += sizeof(sljit_w);            stackptr += sizeof(sljit_sw);
1800            }            }
1801          OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);          OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);
1802          tmp1empty = FALSE;          tmp1empty = FALSE;
# Line 1005  while (status != end) Line 1807  while (status != end)
1807          if (!tmp2empty)          if (!tmp2empty)
1808            {            {
1809            OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);            OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1810            stackptr += sizeof(sljit_w);            stackptr += sizeof(sljit_sw);
1811            }            }
1812          OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);          OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);
1813          tmp2empty = FALSE;          tmp2empty = FALSE;
# Line 1022  while (status != end) Line 1824  while (status != end)
1824          if (!tmp1empty)          if (!tmp1empty)
1825            {            {
1826            OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);            OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1827            stackptr += sizeof(sljit_w);            stackptr += sizeof(sljit_sw);
1828            }            }
1829          tmp1next = FALSE;          tmp1next = FALSE;
1830          }          }
# Line 1034  while (status != end) Line 1836  while (status != end)
1836          if (!tmp2empty)          if (!tmp2empty)
1837            {            {
1838            OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);            OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1839            stackptr += sizeof(sljit_w);            stackptr += sizeof(sljit_sw);
1840            }            }
1841          tmp1next = TRUE;          tmp1next = TRUE;
1842          }          }
1843        }        }
1844      }      }
1845    }    }
1846    while (status != end);
1847    
1848  if (save)  if (save)
1849    {    {
# Line 1049  if (save) Line 1852  if (save)
1852      if (!tmp1empty)      if (!tmp1empty)
1853        {        {
1854        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1855        stackptr += sizeof(sljit_w);        stackptr += sizeof(sljit_sw);
1856        }        }
1857      if (!tmp2empty)      if (!tmp2empty)
1858        {        {
1859        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1860        stackptr += sizeof(sljit_w);        stackptr += sizeof(sljit_sw);
1861        }        }
1862      }      }
1863    else    else
# Line 1062  if (save) Line 1865  if (save)
1865      if (!tmp2empty)      if (!tmp2empty)
1866        {        {
1867        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1868        stackptr += sizeof(sljit_w);        stackptr += sizeof(sljit_sw);
1869        }        }
1870      if (!tmp1empty)      if (!tmp1empty)
1871        {        {
1872        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1873        stackptr += sizeof(sljit_w);        stackptr += sizeof(sljit_sw);
1874        }        }
1875      }      }
1876    }    }
1877  SLJIT_ASSERT(cc == ccend && stackptr == stacktop && (save || (tmp1empty && tmp2empty)));  SLJIT_ASSERT(cc == ccend && stackptr == stacktop && (save || (tmp1empty && tmp2empty)));
1878  }  }
1879    
1880  static SLJIT_INLINE BOOL ispowerof2(unsigned int value)  #undef CASE_ITERATOR_PRIVATE_DATA_1
1881    #undef CASE_ITERATOR_PRIVATE_DATA_2A
1882    #undef CASE_ITERATOR_PRIVATE_DATA_2B
1883    #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1884    #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1885    #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1886    
1887    static SLJIT_INLINE BOOL is_powerof2(unsigned int value)
1888  {  {
1889  return (value & (value - 1)) == 0;  return (value & (value - 1)) == 0;
1890  }  }
# Line 1084  static SLJIT_INLINE void set_jumps(jump_ Line 1894  static SLJIT_INLINE void set_jumps(jump_
1894  while (list)  while (list)
1895    {    {
1896    /* sljit_set_label is clever enough to do nothing    /* sljit_set_label is clever enough to do nothing
1897    if either the jump or the label is NULL */    if either the jump or the label is NULL. */
1898    sljit_set_label(list->jump, label);    SET_LABEL(list->jump, label);
1899    list = list->next;    list = list->next;
1900    }    }
1901  }  }
# Line 1101  if (list_item) Line 1911  if (list_item)
1911    }    }
1912  }  }
1913    
1914  static void add_stub(compiler_common *common, enum stub_types type, int data, struct sljit_jump *start)  static void add_stub(compiler_common *common, struct sljit_jump *start)
1915  {  {
1916  DEFINE_COMPILER;  DEFINE_COMPILER;
1917  stub_list* list_item = sljit_alloc_memory(compiler, sizeof(stub_list));  stub_list* list_item = sljit_alloc_memory(compiler, sizeof(stub_list));
1918    
1919  if (list_item)  if (list_item)
1920    {    {
   list_item->type = type;  
   list_item->data = data;  
1921    list_item->start = start;    list_item->start = start;
1922    list_item->leave = LABEL();    list_item->quit = LABEL();
1923    list_item->next = common->stubs;    list_item->next = common->stubs;
1924    common->stubs = list_item;    common->stubs = list_item;
1925    }    }
# Line 1125  stub_list* list_item = common->stubs; Line 1933  stub_list* list_item = common->stubs;
1933  while (list_item)  while (list_item)
1934    {    {
1935    JUMPHERE(list_item->start);    JUMPHERE(list_item->start);
1936    switch(list_item->type)    add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));
1937      {    JUMPTO(SLJIT_JUMP, list_item->quit);
     case stack_alloc:  
     add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));  
     break;  
     }  
   JUMPTO(SLJIT_JUMP, list_item->leave);  
1938    list_item = list_item->next;    list_item = list_item->next;
1939    }    }
1940  common->stubs = NULL;  common->stubs = NULL;
# Line 1150  static SLJIT_INLINE void allocate_stack( Line 1953  static SLJIT_INLINE void allocate_stack(
1953  /* May destroy all locals and registers except TMP2. */  /* May destroy all locals and registers except TMP2. */
1954  DEFINE_COMPILER;  DEFINE_COMPILER;
1955    
1956  OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_w));  OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
1957  #ifdef DESTROY_REGISTERS  #ifdef DESTROY_REGISTERS
1958  OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);  OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);
1959  OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);  OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
# Line 1158  OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0); Line 1961  OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
1961  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, TMP1, 0);  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, TMP1, 0);
1962  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);
1963  #endif  #endif
1964  add_stub(common, stack_alloc, 0, CMP(SLJIT_C_GREATER, STACK_TOP, 0, STACK_LIMIT, 0));  add_stub(common, CMP(SLJIT_C_GREATER, STACK_TOP, 0, STACK_LIMIT, 0));
1965  }  }
1966    
1967  static SLJIT_INLINE void free_stack(compiler_common *common, int size)  static SLJIT_INLINE void free_stack(compiler_common *common, int size)
1968  {  {
1969  DEFINE_COMPILER;  DEFINE_COMPILER;
1970  OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_w));  OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
1971  }  }
1972    
1973  static SLJIT_INLINE void reset_ovector(compiler_common *common, int length)  static SLJIT_INLINE void reset_ovector(compiler_common *common, int length)
# Line 1172  static SLJIT_INLINE void reset_ovector(c Line 1975  static SLJIT_INLINE void reset_ovector(c
1975  DEFINE_COMPILER;  DEFINE_COMPILER;
1976  struct sljit_label *loop;  struct sljit_label *loop;
1977  int i;  int i;
1978    
1979  /* At this point we can freely use all temporary registers. */  /* At this point we can freely use all temporary registers. */
1980    SLJIT_ASSERT(length > 1);
1981  /* TMP1 returns with begin - 1. */  /* TMP1 returns with begin - 1. */
1982  OP2(SLJIT_SUB, SLJIT_TEMPORARY_REG1, 0, SLJIT_MEM1(SLJIT_GENERAL_REG1), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, 1);  OP2(SLJIT_SUB, SLJIT_SCRATCH_REG1, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));
1983  if (length < 8)  if (length < 8)
1984    {    {
1985    for (i = 0; i < length; i++)    for (i = 1; i < length; i++)
1986      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(i), SLJIT_TEMPORARY_REG1, 0);      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(i), SLJIT_SCRATCH_REG1, 0);
1987    }    }
1988  else  else
1989    {    {
1990    OP2(SLJIT_ADD, SLJIT_TEMPORARY_REG2, 0, SLJIT_LOCALS_REG, 0, SLJIT_IMM, OVECTOR_START - sizeof(sljit_w));    GET_LOCAL_BASE(SLJIT_SCRATCH_REG2, 0, OVECTOR_START);
1991    OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, length);    OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, length - 1);
1992    loop = LABEL();    loop = LABEL();
1993    OP1(SLJIT_MOVU, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), sizeof(sljit_w), SLJIT_TEMPORARY_REG1, 0);    OP1(SLJIT_MOVU, SLJIT_MEM1(SLJIT_SCRATCH_REG2), sizeof(sljit_sw), SLJIT_SCRATCH_REG1, 0);
1994    OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_TEMPORARY_REG3, 0, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, 1);    OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 1);
1995    JUMPTO(SLJIT_C_NOT_ZERO, loop);    JUMPTO(SLJIT_C_NOT_ZERO, loop);
1996    }    }
1997  }  }
1998    
1999    static SLJIT_INLINE void do_reset_match(compiler_common *common, int length)
2000    {
2001    DEFINE_COMPILER;
2002    struct sljit_label *loop;
2003    int i;
2004    
2005    SLJIT_ASSERT(length > 1);
2006    /* OVECTOR(1) contains the "string begin - 1" constant. */
2007    if (length > 2)
2008      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
2009    if (length < 8)
2010      {
2011      for (i = 2; i < length; i++)
2012        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(i), TMP1, 0);
2013      }
2014    else
2015      {
2016      GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + sizeof(sljit_sw));
2017      OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
2018      loop = LABEL();
2019      OP1(SLJIT_MOVU, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
2020      OP2(SLJIT_SUB | SLJIT_SET_E, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
2021      JUMPTO(SLJIT_C_NOT_ZERO, loop);
2022      }
2023    
2024    OP1(SLJIT_MOV, STACK_TOP, 0, ARGUMENTS, 0);
2025    if (common->mark_ptr != 0)
2026      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr, SLJIT_IMM, 0);
2027    SLJIT_ASSERT(common->control_head_ptr != 0);
2028    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_IMM, 0);
2029    OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(jit_arguments, stack));
2030    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_ptr);
2031    OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(struct sljit_stack, base));
2032    }
2033    
2034    static sljit_sw SLJIT_CALL do_check_control_chain(sljit_sw *current)
2035    {
2036    sljit_sw return_value = 0;
2037    const pcre_uchar *skip_arg = NULL;
2038    
2039    SLJIT_ASSERT(current != NULL);
2040    do
2041      {
2042      switch (current[-2])
2043        {
2044        case type_commit:
2045        /* Commit overwrites all. */
2046        return -1;
2047    
2048        case type_prune:
2049        break;
2050    
2051        case type_skip:
2052        /* Overwrites prune, but not other skips. */
2053        if (return_value == 0 && skip_arg == NULL)
2054          return_value = current[-3];
2055        break;
2056    
2057        case type_skip_arg:
2058        if (return_value == 0 && skip_arg == NULL)
2059          skip_arg = (pcre_uchar *)current[-3];
2060        break;
2061    
2062        case type_mark:
2063        if (return_value == 0 && skip_arg != NULL)
2064          if (STRCMP_UC_UC(skip_arg, (pcre_uchar *)current[-3]) == 0)
2065            return_value = current[-4];
2066        break;
2067    
2068        default:
2069        SLJIT_ASSERT_STOP();
2070        break;
2071        }
2072      current = (sljit_sw*)current[-1];
2073      }
2074    while (current != NULL);
2075    return (return_value != 0 || skip_arg == NULL) ? return_value : -2;
2076    }
2077    
2078  static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)  static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)
2079  {  {
2080  DEFINE_COMPILER;  DEFINE_COMPILER;
2081  struct sljit_label *loop;  struct sljit_label *loop;
2082  struct sljit_jump *earlyexit;  struct sljit_jump *early_quit;
2083    
2084  /* At this point we can freely use all registers. */  /* At this point we can freely use all registers. */
2085  OP1(SLJIT_MOV, SLJIT_GENERAL_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));  OP1(SLJIT_MOV, SLJIT_SAVED_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
2086  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1), STR_PTR, 0);  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1), STR_PTR, 0);
2087    
2088  OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG1, 0, ARGUMENTS, 0);  OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, ARGUMENTS, 0);
2089  OP1(SLJIT_MOV_SI, SLJIT_TEMPORARY_REG2, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), SLJIT_OFFSETOF(jit_arguments, offsetcount));  if (common->mark_ptr != 0)
2090  OP2(SLJIT_SUB, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), SLJIT_OFFSETOF(jit_arguments, offsets), SLJIT_IMM, sizeof(int));    OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
2091  OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG1, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), SLJIT_OFFSETOF(jit_arguments, begin));  OP1(SLJIT_MOV_SI, SLJIT_SCRATCH_REG2, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, offset_count));
2092  OP2(SLJIT_ADD, SLJIT_GENERAL_REG1, 0, SLJIT_LOCALS_REG, 0, SLJIT_IMM, OVECTOR_START);  if (common->mark_ptr != 0)
2093      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_SCRATCH_REG3, 0);
2094    OP2(SLJIT_SUB, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, offsets), SLJIT_IMM, sizeof(int));
2095    OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, begin));
2096    GET_LOCAL_BASE(SLJIT_SAVED_REG1, 0, OVECTOR_START);
2097  /* Unlikely, but possible */  /* Unlikely, but possible */
2098  earlyexit = CMP(SLJIT_C_EQUAL, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, 0);  early_quit = CMP(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, 0);
2099  loop = LABEL();  loop = LABEL();
2100  OP2(SLJIT_SUB, SLJIT_GENERAL_REG2, 0, SLJIT_MEM1(SLJIT_GENERAL_REG1), 0, SLJIT_TEMPORARY_REG1, 0);  OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), 0, SLJIT_SCRATCH_REG1, 0);
2101  OP2(SLJIT_ADD, SLJIT_GENERAL_REG1, 0, SLJIT_GENERAL_REG1, 0, SLJIT_IMM, sizeof(sljit_w));  OP2(SLJIT_ADD, SLJIT_SAVED_REG1, 0, SLJIT_SAVED_REG1, 0, SLJIT_IMM, sizeof(sljit_sw));
2102  /* Copy the integer value to the output buffer */  /* Copy the integer value to the output buffer */
2103  OP1(SLJIT_MOVU_SI, SLJIT_MEM1(SLJIT_TEMPORARY_REG3), sizeof(int), SLJIT_GENERAL_REG2, 0);  #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2104  OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_TEMPORARY_REG2, 0, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, 1);  OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, UCHAR_SHIFT);
2105    #endif
2106    OP1(SLJIT_MOVU_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG3), sizeof(int), SLJIT_SAVED_REG2, 0);
2107    OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_SCRATCH_REG2, 0, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, 1);
2108  JUMPTO(SLJIT_C_NOT_ZERO, loop);  JUMPTO(SLJIT_C_NOT_ZERO, loop);
2109  JUMPHERE(earlyexit);  JUMPHERE(early_quit);
2110    
2111  /* Calculate the return value, which is the maximum ovector value. */  /* Calculate the return value, which is the maximum ovector value. */
2112  if (topbracket > 1)  if (topbracket > 1)
2113    {    {
2114    OP2(SLJIT_ADD, SLJIT_TEMPORARY_REG1, 0, SLJIT_LOCALS_REG, 0, SLJIT_IMM, OVECTOR_START + topbracket * 2 * sizeof(sljit_w));    GET_LOCAL_BASE(SLJIT_SCRATCH_REG1, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_sw));
2115    OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, topbracket + 1);    OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, topbracket + 1);
2116    
2117    /* OVECTOR(0) is never equal to SLJIT_GENERAL_REG3. */    /* OVECTOR(0) is never equal to SLJIT_SAVED_REG3. */
2118    loop = LABEL();    loop = LABEL();
2119    OP1(SLJIT_MOVU, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), -(2 * (sljit_w)sizeof(sljit_w)));    OP1(SLJIT_MOVU, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), -(2 * (sljit_sw)sizeof(sljit_sw)));
2120    OP2(SLJIT_SUB, SLJIT_TEMPORARY_REG2, 0, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, 1);    OP2(SLJIT_SUB, SLJIT_SCRATCH_REG2, 0, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, 1);
2121    CMPTO(SLJIT_C_EQUAL, SLJIT_TEMPORARY_REG3, 0, SLJIT_GENERAL_REG3, 0, loop);    CMPTO(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG3, 0, SLJIT_SAVED_REG3, 0, loop);
2122    OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_TEMPORARY_REG2, 0);    OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_SCRATCH_REG2, 0);
2123    }    }
2124  else  else
2125    OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);    OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
2126  }  }
2127    
2128  static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, uschar* cc)  static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *quit)
2129    {
2130    DEFINE_COMPILER;
2131    struct sljit_jump *jump;
2132    
2133    SLJIT_COMPILE_ASSERT(STR_END == SLJIT_SAVED_REG2, str_end_must_be_saved_reg2);
2134    SLJIT_ASSERT(common->start_used_ptr != 0 && common->start_ptr != 0
2135      && (common->mode == JIT_PARTIAL_SOFT_COMPILE ? common->hit_start != 0 : common->hit_start == 0));
2136    
2137    OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, ARGUMENTS, 0);
2138    OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_PARTIAL);
2139    OP1(SLJIT_MOV_SI, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, real_offset_count));
2140    CMPTO(SLJIT_C_SIG_LESS, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 2, quit);
2141    
2142    /* Store match begin and end. */
2143    OP1(SLJIT_MOV, SLJIT_SAVED_REG1, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, begin));
2144    OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, offsets));
2145    
2146    jump = CMP(SLJIT_C_SIG_LESS, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 3);
2147    OP2(SLJIT_SUB, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_ptr : (common->hit_start + (int)sizeof(sljit_sw)), SLJIT_SAVED_REG1, 0);
2148    #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2149    OP2(SLJIT_ASHR, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, UCHAR_SHIFT);
2150    #endif
2151    OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG2), 2 * sizeof(int), SLJIT_SCRATCH_REG3, 0);
2152    JUMPHERE(jump);
2153    
2154    OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_used_ptr : common->hit_start);
2155    OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, STR_END, 0, SLJIT_SAVED_REG1, 0);
2156    #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2157    OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, UCHAR_SHIFT);
2158    #endif
2159    OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG2), sizeof(int), SLJIT_SAVED_REG2, 0);
2160    
2161    OP2(SLJIT_SUB, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_SAVED_REG1, 0);
2162    #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2163    OP2(SLJIT_ASHR, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, UCHAR_SHIFT);
2164    #endif
2165    OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG2), 0, SLJIT_SCRATCH_REG3, 0);
2166    
2167    JUMPTO(SLJIT_JUMP, quit);
2168    }
2169    
2170    static SLJIT_INLINE void check_start_used_ptr(compiler_common *common)
2171    {
2172    /* May destroy TMP1. */
2173    DEFINE_COMPILER;
2174    struct sljit_jump *jump;
2175    
2176    if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2177      {
2178      /* The value of -1 must be kept for start_used_ptr! */
2179      OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, 1);
2180      /* Jumps if start_used_ptr < STR_PTR, or start_used_ptr == -1. Although overwriting
2181      is not necessary if start_used_ptr == STR_PTR, it does not hurt as well. */
2182      jump = CMP(SLJIT_C_LESS_EQUAL, TMP1, 0, STR_PTR, 0);
2183      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2184      JUMPHERE(jump);
2185      }
2186    else if (common->mode == JIT_PARTIAL_HARD_COMPILE)
2187      {
2188      jump = CMP(SLJIT_C_LESS_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2189      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2190      JUMPHERE(jump);
2191      }
2192    }
2193    
2194    static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, pcre_uchar* cc)
2195  {  {
2196  /* Detects if the character has an othercase. */  /* Detects if the character has an othercase. */
2197  unsigned int c;  unsigned int c;
2198    
2199  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2200  if (common->utf8)  if (common->utf)
2201    {    {
2202    GETCHAR(c, cc);    GETCHAR(c, cc);
2203    if (c > 127)    if (c > 127)
# Line 1251  if (common->utf8) Line 2208  if (common->utf8)
2208      return FALSE;      return FALSE;
2209  #endif  #endif
2210      }      }
2211    #ifndef COMPILE_PCRE8
2212      return common->fcc[c] != c;
2213    #endif
2214    }    }
2215  else  else
2216  #endif  #endif
2217    c = *cc;    c = *cc;
2218  return common->fcc[c] != c;  return MAX_255(c) ? common->fcc[c] != c : FALSE;
2219  }  }
2220    
2221  static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)  static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)
2222  {  {
2223  /* Returns with the othercase. */  /* Returns with the othercase. */
2224  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2225  if (common->utf8 && c > 127)  if (common->utf && c > 127)
2226    {    {
2227  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2228    return UCD_OTHERCASE(c);    return UCD_OTHERCASE(c);
# Line 1271  if (common->utf8 && c > 127) Line 2231  if (common->utf8 && c > 127)
2231  #endif  #endif
2232    }    }
2233  #endif  #endif
2234  return common->fcc[c];  return TABLE_GET(c, common->fcc, c);
2235  }  }
2236    
2237  static unsigned int char_get_othercase_bit(compiler_common *common, uschar* cc)  static unsigned int char_get_othercase_bit(compiler_common *common, pcre_uchar* cc)
2238  {  {
2239  /* Detects if the character and its othercase has only 1 bit difference. */  /* Detects if the character and its othercase has only 1 bit difference. */
2240  unsigned int c, oc, bit;  unsigned int c, oc, bit;
2241  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2242  int n;  int n;
2243  #endif  #endif
2244    
2245  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2246  if (common->utf8)  if (common->utf)
2247    {    {
2248    GETCHAR(c, cc);    GETCHAR(c, cc);
2249    if (c <= 127)    if (c <= 127)
# Line 1300  if (common->utf8) Line 2260  if (common->utf8)
2260  else  else
2261    {    {
2262    c = *cc;    c = *cc;
2263    oc = common->fcc[c];    oc = TABLE_GET(c, common->fcc, c);
2264    }    }
2265  #else  #else
2266  c = *cc;  c = *cc;
2267  oc = common->fcc[c];  oc = TABLE_GET(c, common->fcc, c);
2268  #endif  #endif
2269    
2270  SLJIT_ASSERT(c != oc);  SLJIT_ASSERT(c != oc);
# Line 1315  if (c <= 127 && bit == 0x20) Line 2275  if (c <= 127 && bit == 0x20)
2275    return (0 << 8) | 0x20;    return (0 << 8) | 0x20;
2276    
2277  /* Since c != oc, they must have at least 1 bit difference. */  /* Since c != oc, they must have at least 1 bit difference. */
2278  if (!ispowerof2(bit))  if (!is_powerof2(bit))
2279    return 0;    return 0;
2280    
2281  #ifdef SUPPORT_UTF8  #if defined COMPILE_PCRE8
2282  if (common->utf8 && c > 127)  
2283    #ifdef SUPPORT_UTF
2284    if (common->utf && c > 127)
2285    {    {
2286    n = _pcre_utf8_table4[*cc & 0x3f];    n = GET_EXTRALEN(*cc);
2287    while ((bit & 0x3f) == 0)    while ((bit & 0x3f) == 0)
2288      {      {
2289      n--;      n--;
# Line 1329  if (common->utf8 && c > 127) Line 2291  if (common->utf8 && c > 127)
2291      }      }
2292    return (n << 8) | bit;    return (n << 8) | bit;
2293    }    }
2294  #endif  #endif /* SUPPORT_UTF */
2295  return (0 << 8) | bit;  return (0 << 8) | bit;
2296    
2297    #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2298    
2299    #ifdef SUPPORT_UTF
2300    if (common->utf && c > 65535)
2301      {
2302      if (bit >= (1 << 10))
2303        bit >>= 10;
2304      else
2305        return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8));
2306      }
2307    #endif /* SUPPORT_UTF */
2308    return (bit < 256) ? ((0 << 8) | bit) : ((1 << 8) | (bit >> 8));
2309    
2310    #endif /* COMPILE_PCRE[8|16|32] */
2311    }
2312    
2313    static void check_partial(compiler_common *common, BOOL force)
2314    {
2315    /* Checks whether a partial matching is occured. Does not modify registers. */
2316    DEFINE_COMPILER;
2317    struct sljit_jump *jump = NULL;
2318    
2319    SLJIT_ASSERT(!force || common->mode != JIT_COMPILE);
2320    
2321    if (common->mode == JIT_COMPILE)
2322      return;
2323    
2324    if (!force)
2325      jump = CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2326    else if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2327      jump = CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, -1);
2328    
2329    if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2330      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, 0);
2331    else
2332      {
2333      if (common->partialmatchlabel != NULL)
2334        JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2335      else
2336        add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2337      }
2338    
2339    if (jump != NULL)
2340      JUMPHERE(jump);
2341    }
2342    
2343    static void check_str_end(compiler_common *common, jump_list **end_reached)
2344    {
2345    /* Does not affect registers. Usually used in a tight spot. */
2346    DEFINE_COMPILER;
2347    struct sljit_jump *jump;
2348    
2349    if (common->mode == JIT_COMPILE)
2350      {
2351      add_jump(compiler, end_reached, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2352      return;
2353      }
2354    
2355    jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
2356    if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2357      {
2358      add_jump(compiler, end_reached, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0));
2359      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, 0);
2360      add_jump(compiler, end_reached, JUMP(SLJIT_JUMP));
2361      }
2362    else
2363      {
2364      add_jump(compiler, end_reached, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0));
2365      if (common->partialmatchlabel != NULL)
2366        JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2367      else
2368        add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2369      }
2370    JUMPHERE(jump);
2371  }  }
2372    
2373  static SLJIT_INLINE void check_input_end(compiler_common *common, jump_list **fallbacks)  static void detect_partial_match(compiler_common *common, jump_list **backtracks)
2374  {  {
2375  DEFINE_COMPILER;  DEFINE_COMPILER;
2376  add_jump(compiler, fallbacks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));  struct sljit_jump *jump;
2377    
2378    if (common->mode == JIT_COMPILE)
2379      {
2380      add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2381      return;
2382      }
2383    
2384    /* Partial matching mode. */
2385    jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
2386    add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0));
2387    if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2388      {
2389      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, 0);
2390      add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
2391      }
2392    else
2393      {
2394      if (common->partialmatchlabel != NULL)
2395        JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2396      else
2397        add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2398      }
2399    JUMPHERE(jump);
2400  }  }
2401    
2402  static void read_char(compiler_common *common)  static void read_char(compiler_common *common)
# Line 1344  static void read_char(compiler_common *c Line 2404  static void read_char(compiler_common *c
2404  /* Reads the character into TMP1, updates STR_PTR.  /* Reads the character into TMP1, updates STR_PTR.
2405  Does not check STR_END. TMP2 Destroyed. */  Does not check STR_END. TMP2 Destroyed. */
2406  DEFINE_COMPILER;  DEFINE_COMPILER;
2407  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2408  struct sljit_jump *jump;  struct sljit_jump *jump;
2409  #endif  #endif
2410    
2411  OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2412  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2413  if (common->utf8)  if (common->utf)
2414    {    {
2415    #if defined COMPILE_PCRE8
2416    jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);    jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2417    add_jump(compiler, &common->utf8readchar, JUMP(SLJIT_FAST_CALL));  #elif defined COMPILE_PCRE16
2418      jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
2419    #endif /* COMPILE_PCRE[8|16] */
2420      add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2421    JUMPHERE(jump);    JUMPHERE(jump);
2422    }    }
2423  #endif  #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2424  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2425  }  }
2426    
2427  static void peek_char(compiler_common *common)  static void peek_char(compiler_common *common)
# Line 1365  static void peek_char(compiler_common *c Line 2429  static void peek_char(compiler_common *c
2429  /* Reads the character into TMP1, keeps STR_PTR.  /* Reads the character into TMP1, keeps STR_PTR.
2430  Does not check STR_END. TMP2 Destroyed. */  Does not check STR_END. TMP2 Destroyed. */
2431  DEFINE_COMPILER;  DEFINE_COMPILER;
2432  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2433  struct sljit_jump *jump;  struct sljit_jump *jump;
2434  #endif  #endif
2435    
2436  OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2437  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2438  if (common->utf8)  if (common->utf)
2439    {    {
2440    #if defined COMPILE_PCRE8
2441    jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);    jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2442    add_jump(compiler, &common->utf8readchar, JUMP(SLJIT_FAST_CALL));  #elif defined COMPILE_PCRE16
2443      jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
2444    #endif /* COMPILE_PCRE[8|16] */
2445      add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2446    OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);    OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2447    JUMPHERE(jump);    JUMPHERE(jump);
2448    }    }
2449  #endif  #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2450  }  }
2451    
2452  static void read_char8_type(compiler_common *common)  static void read_char8_type(compiler_common *common)
2453  {  {
2454  /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */  /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */
2455  DEFINE_COMPILER;  DEFINE_COMPILER;
2456  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2457  struct sljit_jump *jump;  struct sljit_jump *jump;
2458  #endif  #endif
2459    
2460  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2461  if (common->utf8)  if (common->utf)
2462    {    {
2463    OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2464    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2465    #if defined COMPILE_PCRE8
2466    /* This can be an extra read in some situations, but hopefully    /* This can be an extra read in some situations, but hopefully
2467    it is a clever early read in most cases. */    it is needed in most cases. */
2468    OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);    OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2469    jump = CMP(SLJIT_C_LESS, TMP2, 0, SLJIT_IMM, 0xc0);    jump = CMP(SLJIT_C_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
2470    add_jump(compiler, &common->utf8readtype8, JUMP(SLJIT_FAST_CALL));    add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));
2471      JUMPHERE(jump);
2472    #elif defined COMPILE_PCRE16
2473      OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2474      jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2475      OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2476      JUMPHERE(jump);
2477      /* Skip low surrogate if necessary. */
2478      OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xfc00);
2479      OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0xd800);
2480      OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2481      OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
2482      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2483    #elif defined COMPILE_PCRE32
2484      OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2485      jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2486      OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2487    JUMPHERE(jump);    JUMPHERE(jump);
2488    #endif /* COMPILE_PCRE[8|16|32] */
2489    return;    return;
2490    }    }
2491    #endif /* SUPPORT_UTF */
2492    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2493    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2494    #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2495    /* The ctypes array contains only 256 values. */
2496    OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2497    jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2498    #endif
2499    OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2500    #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2501    JUMPHERE(jump);
2502  #endif  #endif
 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);  
 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);  
 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);  
2503  }  }
2504    
2505  static void skip_char_back(compiler_common *common)  static void skip_char_back(compiler_common *common)
2506  {  {
2507  /* Goes one character back. Only affects STR_PTR. Does not check begin. */  /* Goes one character back. Affects STR_PTR and TMP1. Does not check begin. */
2508  DEFINE_COMPILER;  DEFINE_COMPILER;
2509  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2510    #if defined COMPILE_PCRE8
2511  struct sljit_label *label;  struct sljit_label *label;
2512    
2513  if (common->utf8)  if (common->utf)
2514    {    {
2515    label = LABEL();    label = LABEL();
2516    OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2517    OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);    OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2518    OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);    OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
2519    CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);    CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);
2520    return;    return;
2521    }    }
2522  #endif  #elif defined COMPILE_PCRE16
2523  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);  if (common->utf)
2524      {
2525      OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2526      OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2527      /* Skip low surrogate if necessary. */
2528      OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2529      OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
2530      OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2531      OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2532      OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2533      return;
2534      }
2535    #endif /* COMPILE_PCRE[8|16] */
2536    #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2537    OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2538  }  }
2539    
2540  static void check_newlinechar(compiler_common *common, int nltype, jump_list **fallbacks, BOOL jumpiftrue)  static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpiftrue)
2541  {  {
2542  /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */  /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */
2543  DEFINE_COMPILER;  DEFINE_COMPILER;
# Line 1436  DEFINE_COMPILER; Line 2545  DEFINE_COMPILER;
2545  if (nltype == NLTYPE_ANY)  if (nltype == NLTYPE_ANY)
2546    {    {
2547    add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));    add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
2548    add_jump(compiler, fallbacks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));    add_jump(compiler, backtracks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
2549    }    }
2550  else if (nltype == NLTYPE_ANYCRLF)  else if (nltype == NLTYPE_ANYCRLF)
2551    {    {
2552    OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_CR);    OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_CR);
2553    COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);    OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2554    OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);    OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
2555    COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);    OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
2556    add_jump(compiler, fallbacks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));    add_jump(compiler, backtracks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
2557    }    }
2558  else  else
2559    {    {
2560    SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline <= 255);    SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);
2561    add_jump(compiler, fallbacks, CMP(jumpiftrue ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));    add_jump(compiler, backtracks, CMP(jumpiftrue ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
2562    }    }
2563  }  }
2564    
2565  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2566  static void do_utf8readchar(compiler_common *common)  
2567    #if defined COMPILE_PCRE8
2568    static void do_utfreadchar(compiler_common *common)
2569  {  {
2570  /* Fast decoding an utf8 character. TMP1 contains the first byte  /* Fast decoding a UTF-8 character. TMP1 contains the first byte
2571  of the character (>= 0xc0). Return char value in TMP1, length - 1 in TMP2. */  of the character (>= 0xc0). Return char value in TMP1, length - 1 in TMP2. */
2572  DEFINE_COMPILER;  DEFINE_COMPILER;
2573  struct sljit_jump *jump;  struct sljit_jump *jump;
2574    
2575  sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);  sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2576  /* Searching for the first zero. */  /* Searching for the first zero. */
2577  OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);  OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
2578  jump = JUMP(SLJIT_C_NOT_ZERO);  jump = JUMP(SLJIT_C_NOT_ZERO);
2579  /* 2 byte sequence */  /* Two byte sequence. */
2580  OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(STR_PTR), 1);  OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2581  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2582  OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1f);  OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1f);
2583  OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);  OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2584  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2585  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2586  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
2587  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2588  JUMPHERE(jump);  JUMPHERE(jump);
2589    
2590  OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10);  OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10);
2591  jump = JUMP(SLJIT_C_NOT_ZERO);  jump = JUMP(SLJIT_C_NOT_ZERO);
2592  /* 3 byte sequence */  /* Three byte sequence. */
2593  OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(STR_PTR), 1);  OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2594  OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0f);  OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0f);
2595  OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 12);  OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 12);
2596  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2597  OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);  OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2598  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2599  OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(STR_PTR), 2);  OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2600  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 2);  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
2601  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2602  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2603  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 2);  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(2));
2604  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2605  JUMPHERE(jump);  JUMPHERE(jump);
2606    
2607  OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x08);  /* Four byte sequence. */
2608  jump = JUMP(SLJIT_C_NOT_ZERO);  OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
 /* 4 byte sequence */  
 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(STR_PTR), 1);  
2609  OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x07);  OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x07);
2610  OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 18);  OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 18);
2611  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2612  OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12);  OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12);
2613  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2614  OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(STR_PTR), 2);  OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);  
 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);  
 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);  
 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(STR_PTR), 3);  
 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 3);  
 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);  
 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);  
 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 3);  
 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);  
 JUMPHERE(jump);  
   
 /* 5 byte sequence */  
 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(STR_PTR), 1);  
 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x03);  
 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 24);  
 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);  
 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 18);  
 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);  
 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(STR_PTR), 2);  
 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);  
 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12);  
 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);  
 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(STR_PTR), 3);  
2615  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2616  OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);  OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2617  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2618  OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(STR_PTR), 4);  OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(3));
2619  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 4);  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
2620  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2621  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2622  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 4);  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));
2623  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2624  }  }
2625    
2626  static void do_utf8readtype8(compiler_common *common)  static void do_utfreadtype8(compiler_common *common)
2627  {  {
2628  /* Fast decoding an utf8 character type. TMP2 contains the first byte  /* Fast decoding a UTF-8 character type. TMP2 contains the first byte
2629  of the character (>= 0xc0) and TMP1 is destroyed. Return value in TMP1. */  of the character (>= 0xc0). Return value in TMP1. */
2630  DEFINE_COMPILER;  DEFINE_COMPILER;
2631  struct sljit_jump *jump;  struct sljit_jump *jump;
2632  struct sljit_jump *compare;  struct sljit_jump *compare;
2633    
2634  sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);  sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2635    
2636  OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20);  OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20);
2637  jump = JUMP(SLJIT_C_NOT_ZERO);  jump = JUMP(SLJIT_C_NOT_ZERO);
2638  /* 2 byte sequence */  /* Two byte sequence. */
2639  OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2640  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2641  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
2642  OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);  OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2643  OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);  OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
# Line 1566  sljit_emit_fast_return(compiler, RETURN_ Line 2652  sljit_emit_fast_return(compiler, RETURN_
2652  JUMPHERE(jump);  JUMPHERE(jump);
2653    
2654  /* We only have types for characters less than 256. */  /* We only have types for characters less than 256. */
2655  OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_w)_pcre_utf8_char_sizes - 0xc0);  OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2656  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2657  OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);  OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2658  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2659  }  }
2660    
2661  #endif  #elif defined COMPILE_PCRE16
2662    
2663    static void do_utfreadchar(compiler_common *common)
2664    {
2665    /* Fast decoding a UTF-16 character. TMP1 contains the first 16 bit char
2666    of the character (>= 0xd800). Return char value in TMP1, length - 1 in TMP2. */
2667    DEFINE_COMPILER;
2668    struct sljit_jump *jump;
2669    
2670    sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2671    jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xdc00);
2672    /* Do nothing, only return. */
2673    sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2674    
2675    JUMPHERE(jump);
2676    /* Combine two 16 bit characters. */
2677    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2678    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2679    OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
2680    OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 10);
2681    OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3ff);
2682    OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2683    OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
2684    OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
2685    sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2686    }
2687    
2688    #endif /* COMPILE_PCRE[8|16] */
2689    
2690    #endif /* SUPPORT_UTF */
2691    
2692  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2693    
# Line 1588  DEFINE_COMPILER; Line 2703  DEFINE_COMPILER;
2703    
2704  SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 8);  SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 8);
2705    
2706  sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);  sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2707  OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);  OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
2708  OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_w)_pcre_ucd_stage1);  OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
2709  OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);  OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
2710  OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);  OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
2711  OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);  OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
2712  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_w)_pcre_ucd_stage2);  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
2713  OP1(SLJIT_MOV_UH, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);  OP1(SLJIT_MOV_UH, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
2714  OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_w)_pcre_ucd_records + SLJIT_OFFSETOF(ucd_record, chartype));  OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
2715  OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);  OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
2716  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2717  }  }
# Line 1610  struct sljit_label *newlinelabel = NULL; Line 2725  struct sljit_label *newlinelabel = NULL;
2725  struct sljit_jump *start;  struct sljit_jump *start;
2726  struct sljit_jump *end = NULL;  struct sljit_jump *end = NULL;
2727  struct sljit_jump *nl = NULL;  struct sljit_jump *nl = NULL;
2728  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2729  struct sljit_jump *singlebyte;  struct sljit_jump *singlechar;
2730  #endif  #endif
2731  jump_list *newline = NULL;  jump_list *newline = NULL;
2732  BOOL newlinecheck = FALSE;  BOOL newlinecheck = FALSE;
2733  BOOL readbyte = FALSE;  BOOL readuchar = FALSE;
2734    
2735  if (!(hascrorlf || firstline) && (common->nltype == NLTYPE_ANY ||  if (!(hascrorlf || firstline) && (common->nltype == NLTYPE_ANY ||
2736      common->nltype == NLTYPE_ANYCRLF || common->newline > 255))      common->nltype == NLTYPE_ANYCRLF || common->newline > 255))
# Line 1624  if (!(hascrorlf || firstline) && (common Line 2739  if (!(hascrorlf || firstline) && (common
2739  if (firstline)  if (firstline)
2740    {    {
2741    /* Search for the end of the first line. */    /* Search for the end of the first line. */
2742    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STR_PTR, 0);    SLJIT_ASSERT(common->first_line_end != 0);
2743    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), FIRSTLINE_END, STR_END, 0);    OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
2744    
2745    if (common->nltype == NLTYPE_FIXED && common->newline > 255)    if (common->nltype == NLTYPE_FIXED && common->newline > 255)
2746      {      {
2747      mainloop = LABEL();      mainloop = LABEL();
2748      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2749      end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);      end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2750      OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), -1);      OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
2751      OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);      OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2752      CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);      CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);
2753      CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);      CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);
2754      OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), FIRSTLINE_END, STR_PTR, 0, SLJIT_IMM, 1);      JUMPHERE(end);
2755        OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2756      }      }
2757    else    else
2758      {      {
2759      end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);      end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2760      mainloop = LABEL();      mainloop = LABEL();
2761      /* Continual stores does not cause data dependency. */      /* Continual stores does not cause data dependency. */
2762      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), FIRSTLINE_END, STR_PTR, 0);      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0);
2763      read_char(common);      read_char(common);
2764      check_newlinechar(common, common->nltype, &newline, TRUE);      check_newlinechar(common, common->nltype, &newline, TRUE);
2765      CMPTO(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0, mainloop);      CMPTO(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0, mainloop);
2766      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), FIRSTLINE_END, STR_PTR, 0);      JUMPHERE(end);
2767        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0);
2768      set_jumps(newline, LABEL());      set_jumps(newline, LABEL());
2769      }      }
2770    
2771    JUMPHERE(end);    OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);  
2772    }    }
2773    
2774  start = JUMP(SLJIT_JUMP);  start = JUMP(SLJIT_JUMP);
# Line 1660  start = JUMP(SLJIT_JUMP); Line 2776  start = JUMP(SLJIT_JUMP);
2776  if (newlinecheck)  if (newlinecheck)
2777    {    {
2778    newlinelabel = LABEL();    newlinelabel = LABEL();
2779    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2780    end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);    end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2781    OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2782    OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff);    OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
2783    COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);    OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2784    #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2785      OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
2786    #endif
2787    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2788    nl = JUMP(SLJIT_JUMP);    nl = JUMP(SLJIT_JUMP);
2789    }    }
# Line 1672  if (newlinecheck) Line 2791  if (newlinecheck)
2791  mainloop = LABEL();  mainloop = LABEL();
2792    
2793  /* Increasing the STR_PTR here requires one less jump in the most common case. */  /* Increasing the STR_PTR here requires one less jump in the most common case. */
2794  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2795  if (common->utf8) readbyte = TRUE;  if (common->utf) readuchar = TRUE;
2796  #endif  #endif
2797  if (newlinecheck) readbyte = TRUE;  if (newlinecheck) readuchar = TRUE;
2798    
2799  if (readbyte)  if (readuchar)
2800    OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2801    
2802  if (newlinecheck)  if (newlinecheck)
2803    CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);    CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);
2804    
2805  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2806  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2807  if (common->utf8)  #if defined COMPILE_PCRE8
2808    if (common->utf)
2809    {    {
2810    singlebyte = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);    singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2811    OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)_pcre_utf8_char_sizes - 0xc0);    OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2812    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2813    JUMPHERE(singlebyte);    JUMPHERE(singlechar);
2814    }    }
2815  #endif  #elif defined COMPILE_PCRE16
2816    if (common->utf)
2817      {
2818      singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
2819      OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2820      OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2821      OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2822      OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2823      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2824      JUMPHERE(singlechar);
2825      }
2826    #endif /* COMPILE_PCRE[8|16] */
2827    #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2828  JUMPHERE(start);  JUMPHERE(start);
2829    
2830  if (newlinecheck)  if (newlinecheck)
# Line 1704  if (newlinecheck) Line 2836  if (newlinecheck)
2836  return mainloop;  return mainloop;
2837  }  }
2838    
2839  static SLJIT_INLINE void fast_forward_first_byte(compiler_common *common, pcre_uint16 firstbyte, BOOL firstline)  #define MAX_N_CHARS 3
2840    
2841    static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common, BOOL firstline)
2842    {
2843    DEFINE_COMPILER;
2844    struct sljit_label *start;
2845    struct sljit_jump *quit;
2846    pcre_uint32 chars[MAX_N_CHARS * 2];
2847    pcre_uchar *cc = common->start + 1 + LINK_SIZE;
2848    int location = 0;
2849    pcre_int32 len, c, bit, caseless;
2850    int must_stop;
2851    
2852    /* We do not support alternatives now. */
2853    if (*(common->start + GET(common->start, 1)) == OP_ALT)
2854      return FALSE;
2855    
2856    while (TRUE)
2857      {
2858      caseless = 0;
2859      must_stop = 1;
2860      switch(*cc)
2861        {
2862        case OP_CHAR:
2863        must_stop = 0;
2864        cc++;
2865        break;
2866    
2867        case OP_CHARI:
2868        caseless = 1;
2869        must_stop = 0;
2870        cc++;
2871        break;
2872    
2873        case OP_SOD:
2874        case OP_SOM:
2875        case OP_SET_SOM:
2876        case OP_NOT_WORD_BOUNDARY:
2877        case OP_WORD_BOUNDARY:
2878        case OP_EODN:
2879        case OP_EOD:
2880        case OP_CIRC:
2881        case OP_CIRCM:
2882        case OP_DOLL:
2883        case OP_DOLLM:
2884        /* Zero width assertions. */
2885        cc++;
2886        continue;
2887    
2888        case OP_PLUS:
2889        case OP_MINPLUS:
2890        case OP_POSPLUS:
2891        cc++;
2892        break;
2893    
2894        case OP_EXACT:
2895        cc += 1 + IMM2_SIZE;
2896        break;
2897    
2898        case OP_PLUSI:
2899        case OP_MINPLUSI:
2900        case OP_POSPLUSI:
2901        caseless = 1;
2902        cc++;
2903        break;
2904    
2905        case OP_EXACTI:
2906        caseless = 1;
2907        cc += 1 + IMM2_SIZE;
2908        break;
2909    
2910        default:
2911        must_stop = 2;
2912        break;
2913        }
2914    
2915      if (must_stop == 2)
2916          break;
2917    
2918      len = 1;
2919    #ifdef SUPPORT_UTF
2920      if (common->utf && HAS_EXTRALEN(cc[0])) len += GET_EXTRALEN(cc[0]);
2921    #endif
2922    
2923      if (caseless && char_has_othercase(common, cc))
2924        {
2925        caseless = char_get_othercase_bit(common, cc);
2926        if (caseless == 0)
2927          return FALSE;
2928    #ifdef COMPILE_PCRE8
2929        caseless = ((caseless & 0xff) << 8) | (len - (caseless >> 8));
2930    #else
2931        if ((caseless & 0x100) != 0)
2932          caseless = ((caseless & 0xff) << 16) | (len - (caseless >> 9));
2933        else
2934          caseless = ((caseless & 0xff) << 8) | (len - (caseless >> 9));
2935    #endif
2936        }
2937      else
2938        caseless = 0;
2939    
2940      while (len > 0 && location < MAX_N_CHARS * 2)
2941        {
2942        c = *cc;
2943        bit = 0;
2944        if (len == (caseless & 0xff))
2945          {
2946          bit = caseless >> 8;
2947          c |= bit;
2948          }
2949    
2950        chars[location] = c;
2951        chars[location + 1] = bit;
2952    
2953        len--;
2954        location += 2;
2955        cc++;
2956        }
2957    
2958      if (location >= MAX_N_CHARS * 2 || must_stop != 0)
2959        break;
2960      }
2961    
2962    /* At least two characters are required. */
2963    if (location < 2 * 2)
2964        return FALSE;
2965    
2966    if (firstline)
2967      {
2968      SLJIT_ASSERT(common->first_line_end != 0);
2969      OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
2970      OP2(SLJIT_SUB, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, SLJIT_IMM, IN_UCHARS((location >> 1) - 1));
2971      }
2972    else
2973      OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS((location >> 1) - 1));
2974    
2975    start = LABEL();
2976    quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2977    
2978    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2979    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2980    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2981    if (chars[1] != 0)
2982      OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[1]);
2983    CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[0], start);
2984    if (location > 2 * 2)
2985      OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2986    if (chars[3] != 0)
2987      OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, chars[3]);
2988    CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, chars[2], start);
2989    if (location > 2 * 2)
2990      {
2991      if (chars[5] != 0)
2992        OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[5]);
2993      CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[4], start);
2994      }
2995    OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2996    
2997    JUMPHERE(quit);
2998    
2999    if (firstline)
3000      OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3001    else
3002      OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS((location >> 1) - 1));
3003    return TRUE;
3004    }
3005    
3006    #undef MAX_N_CHARS
3007    
3008    static SLJIT_INLINE void fast_forward_first_char(compiler_common *common, pcre_uchar first_char, BOOL caseless, BOOL firstline)
3009  {  {
3010  DEFINE_COMPILER;  DEFINE_COMPILER;
3011  struct sljit_label *start;  struct sljit_label *start;
3012  struct sljit_jump *leave;  struct sljit_jump *quit;
3013  struct sljit_jump *found;  struct sljit_jump *found;
3014  pcre_uint16 oc, bit;  pcre_uchar oc, bit;
3015    
3016  if (firstline)  if (firstline)
3017    {    {
3018    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, STR_END, 0);    SLJIT_ASSERT(common->first_line_end != 0);
3019    OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), FIRSTLINE_END);    OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
3020      OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
3021    }    }
3022    
3023  start = LABEL();  start = LABEL();
3024  leave = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);  quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3025  OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3026    
3027  if ((firstbyte & REQ_CASELESS) == 0)  oc = first_char;
3028    found = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, firstbyte & 0xff);  if (caseless)
3029      {
3030      oc = TABLE_GET(first_char, common->fcc, first_char);
3031    #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
3032      if (first_char > 127 && common->utf)
3033        oc = UCD_OTHERCASE(first_char);
3034    #endif
3035      }
3036    if (first_char == oc)
3037      found = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, first_char);
3038  else  else
3039    {    {
3040    firstbyte &= 0xff;    bit = first_char ^ oc;
3041    oc = common->fcc[firstbyte];    if (is_powerof2(bit))
   bit = firstbyte ^ oc;  
   if (ispowerof2(bit))  
3042      {      {
3043      OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, bit);      OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, bit);
3044      found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, firstbyte | bit);      found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, first_char | bit);
3045      }      }
3046    else    else
3047      {      {
3048      OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, firstbyte);      OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, first_char);
3049      COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);      OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3050      OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);      OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);
3051      COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);      OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3052      found = JUMP(SLJIT_C_NOT_ZERO);      found = JUMP(SLJIT_C_NOT_ZERO);
3053      }      }
3054    }    }
3055    
3056  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
 #ifdef SUPPORT_UTF8  
 if (common->utf8)  
   {  
   CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start);  
   OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)_pcre_utf8_char_sizes - 0xc0);  
   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);  
   }  
 #endif  
3057  JUMPTO(SLJIT_JUMP, start);  JUMPTO(SLJIT_JUMP, start);
3058  JUMPHERE(found);  JUMPHERE(found);
3059  JUMPHERE(leave);  JUMPHERE(quit);
3060    
3061  if (firstline)  if (firstline)
3062    OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);    OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3063  }  }
3064    
3065  static SLJIT_INLINE void fast_forward_newline(compiler_common *common, BOOL firstline)  static SLJIT_INLINE void fast_forward_newline(compiler_common *common, BOOL firstline)
# Line 1767  DEFINE_COMPILER; Line 3068  DEFINE_COMPILER;
3068  struct sljit_label *loop;  struct sljit_label *loop;
3069  struct sljit_jump *lastchar;  struct sljit_jump *lastchar;
3070  struct sljit_jump *firstchar;  struct sljit_jump *firstchar;
3071  struct sljit_jump *leave;  struct sljit_jump *quit;
3072  struct sljit_jump *foundcr = NULL;  struct sljit_jump *foundcr = NULL;
3073  struct sljit_jump *notfoundnl;  struct sljit_jump *notfoundnl;
3074  jump_list *newline = NULL;  jump_list *newline = NULL;
3075    
3076  if (firstline)  if (firstline)
3077    {    {
3078    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, STR_END, 0);    SLJIT_ASSERT(common->first_line_end != 0);
3079    OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), FIRSTLINE_END);    OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
3080      OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
3081    }    }
3082    
3083  if (common->nltype == NLTYPE_FIXED && common->newline > 255)  if (common->nltype == NLTYPE_FIXED && common->newline > 255)
# Line 1786  if (common->nltype == NLTYPE_FIXED && co Line 3088  if (common->nltype == NLTYPE_FIXED && co
3088    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
3089    firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);    firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
3090    
3091    OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2);    OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2));
3092    OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0);    OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0);
3093    COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_GREATER_EQUAL);    OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_GREATER_EQUAL);
3094    #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3095      OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
3096    #endif
3097    OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);    OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3098    
3099    loop = LABEL();    loop = LABEL();
3100    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3101    leave = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);    quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3102    OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), -2);    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
3103    OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(STR_PTR), -1);    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
3104    CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop);    CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop);
3105    CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop);    CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop);
3106    
3107    JUMPHERE(leave);    JUMPHERE(quit);
3108    JUMPHERE(firstchar);    JUMPHERE(firstchar);
3109    JUMPHERE(lastchar);    JUMPHERE(lastchar);
3110    
# Line 1823  set_jumps(newline, loop); Line 3128  set_jumps(newline, loop);
3128    
3129  if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)  if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
3130    {    {
3131    leave = JUMP(SLJIT_JUMP);    quit = JUMP(SLJIT_JUMP);
3132    JUMPHERE(foundcr);    JUMPHERE(foundcr);
3133    notfoundnl = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);    notfoundnl = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3134    OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3135    OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);    OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
3136    COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);    OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3137    #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3138      OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
3139    #endif
3140    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3141    JUMPHERE(notfoundnl);    JUMPHERE(notfoundnl);
3142    JUMPHERE(leave);    JUMPHERE(quit);
3143    }    }
3144  JUMPHERE(lastchar);  JUMPHERE(lastchar);
3145  JUMPHERE(firstchar);  JUMPHERE(firstchar);
3146    
3147  if (firstline)  if (firstline)
3148    OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);    OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3149  }  }
3150    
3151    static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, jump_list **backtracks);
3152    
3153  static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, sljit_uw start_bits, BOOL firstline)  static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, sljit_uw start_bits, BOOL firstline)
3154  {  {
3155  DEFINE_COMPILER;  DEFINE_COMPILER;
3156  struct sljit_label *start;  struct sljit_label *start;
3157  struct sljit_jump *leave;  struct sljit_jump *quit;
3158  struct sljit_jump *found;  struct sljit_jump *found = NULL;
3159    jump_list *matches = NULL;
3160    pcre_uint8 inverted_start_bits[32];
3161    int i;
3162    #ifndef COMPILE_PCRE8
3163    struct sljit_jump *jump;
3164    #endif
3165    
3166    for (i = 0; i < 32; ++i)
3167      inverted_start_bits[i] = ~(((pcre_uint8*)start_bits)[i]);
3168    
3169  if (firstline)  if (firstline)
3170    {    {
3171    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, STR_END, 0);    SLJIT_ASSERT(common->first_line_end != 0);
3172    OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), FIRSTLINE_END);    OP1(SLJIT_MOV, RETURN_ADDR, 0, STR_END, 0);
3173      OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
3174    }    }
3175    
3176  start = LABEL();  start = LABEL();
3177  leave = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);  quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3178  OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3179  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3180  if (common->utf8)  if (common->utf)
3181    OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);    OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
3182  #endif  #endif
 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);  
 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);  
 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), start_bits);  
 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);  
 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);  
 found = JUMP(SLJIT_C_NOT_ZERO);  
3183    
3184  #ifdef SUPPORT_UTF8  if (!check_class_ranges(common, inverted_start_bits, (inverted_start_bits[31] & 0x80) != 0, &matches))
3185  if (common->utf8)    {
3186    #ifndef COMPILE_PCRE8
3187      jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 255);
3188      OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 255);
3189      JUMPHERE(jump);
3190    #endif
3191      OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
3192      OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
3193      OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), start_bits);
3194      OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
3195      OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
3196      found = JUMP(SLJIT_C_NOT_ZERO);
3197      }
3198    
3199    #ifdef SUPPORT_UTF
3200    if (common->utf)
3201    OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);    OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
3202  #endif  #endif
3203  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3204  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3205  if (common->utf8)  #if defined COMPILE_PCRE8
3206    if (common->utf)
3207    {    {
3208    CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start);    CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start);
3209    OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)_pcre_utf8_char_sizes - 0xc0);    OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3210    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3211    }    }
3212  #endif  #elif defined COMPILE_PCRE16
3213    if (common->utf)
3214      {
3215      CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800, start);
3216      OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
3217      OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3218      OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3219      OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3220      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3221      }
3222    #endif /* COMPILE_PCRE[8|16] */
3223    #endif /* SUPPORT_UTF */
3224  JUMPTO(SLJIT_JUMP, start);  JUMPTO(SLJIT_JUMP, start);
3225  JUMPHERE(found);  if (found != NULL)
3226  JUMPHERE(leave);    JUMPHERE(found);
3227    if (matches != NULL)
3228      set_jumps(matches, LABEL());
3229    JUMPHERE(quit);
3230    
3231  if (firstline)  if (firstline)
3232    OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);    OP1(SLJIT_MOV, STR_END, 0, RETURN_ADDR, 0);
3233  }  }
3234    
3235  static SLJIT_INLINE struct sljit_jump *search_requested_char(compiler_common *common, pcre_uint16 reqbyte, BOOL has_firstbyte)  static SLJIT_INLINE struct sljit_jump *search_requested_char(compiler_common *common, pcre_uchar req_char, BOOL caseless, BOOL has_firstchar)
3236  {  {
3237  DEFINE_COMPILER;  DEFINE_COMPILER;
3238  struct sljit_label *loop;  struct sljit_label *loop;
# Line 1897  struct sljit_jump *alreadyfound; Line 3241  struct sljit_jump *alreadyfound;
3241  struct sljit_jump *found;  struct sljit_jump *found;
3242  struct sljit_jump *foundoc = NULL;  struct sljit_jump *foundoc = NULL;
3243  struct sljit_jump *notfound;  struct sljit_jump *notfound;
3244  pcre_uint16 oc, bit;  pcre_uint32 oc, bit;
3245    
3246  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), REQ_BYTE_PTR);  SLJIT_ASSERT(common->req_char_ptr != 0);
3247    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr);
3248  OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, REQ_BYTE_MAX);  OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, REQ_BYTE_MAX);
3249  toolong = CMP(SLJIT_C_LESS, TMP1, 0, STR_END, 0);  toolong = CMP(SLJIT_C_LESS, TMP1, 0, STR_END, 0);
3250  alreadyfound = CMP(SLJIT_C_LESS, STR_PTR, 0, TMP2, 0);  alreadyfound = CMP(SLJIT_C_LESS, STR_PTR, 0, TMP2, 0);
3251    
3252  if (has_firstbyte)  if (has_firstchar)
3253    OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, 1);    OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3254  else  else
3255    OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);    OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);
3256    
3257  loop = LABEL();  loop = LABEL();
3258  notfound = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, STR_END, 0);  notfound = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, STR_END, 0);
3259    
3260  OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), 0);  OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0);
3261  if ((reqbyte & REQ_CASELESS) == 0)  oc = req_char;
3262    found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, reqbyte & 0xff);  if (caseless)
3263      {
3264      oc = TABLE_GET(req_char, common->fcc, req_char);
3265    #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
3266      if (req_char > 127 && common->utf)
3267        oc = UCD_OTHERCASE(req_char);
3268    #endif
3269      }
3270    if (req_char == oc)
3271      found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
3272  else  else
3273    {    {
3274    reqbyte &= 0xff;    bit = req_char ^ oc;
3275    oc = common->fcc[reqbyte];    if (is_powerof2(bit))
   bit = reqbyte ^ oc;  
   if (ispowerof2(bit))  
3276      {      {
3277      OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);      OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);
3278      found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, reqbyte | bit);      found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit);
3279      }      }
3280    else    else
3281      {      {
3282      found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, reqbyte);      found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
3283      foundoc = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, oc);      foundoc = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, oc);
3284      }      }
3285    }    }
3286  OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);  OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
3287  JUMPTO(SLJIT_JUMP, loop);  JUMPTO(SLJIT_JUMP, loop);
3288    
3289  JUMPHERE(found);  JUMPHERE(found);
3290  if (foundoc)  if (foundoc)
3291    JUMPHERE(foundoc);    JUMPHERE(foundoc);
3292  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), REQ_BYTE_PTR, TMP1, 0);  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr, TMP1, 0);
3293  JUMPHERE(alreadyfound);  JUMPHERE(alreadyfound);
3294  JUMPHERE(toolong);  JUMPHERE(toolong);
3295  return notfound;  return notfound;
# Line 1949  DEFINE_COMPILER; Line 3301  DEFINE_COMPILER;
3301  struct sljit_jump *jump;  struct sljit_jump *jump;
3302  struct sljit_label *mainloop;  struct sljit_label *mainloop;
3303    
3304  sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);  sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3305  OP1(SLJIT_MOV, TMP1, 0, STACK_TOP, 0);  OP1(SLJIT_MOV, TMP1, 0, STACK_TOP, 0);
3306    GET_LOCAL_BASE(TMP3, 0, 0);
3307    
3308  /* Drop frames until we reach STACK_TOP. */  /* Drop frames until we reach STACK_TOP. */
3309  mainloop = LABEL();  mainloop = LABEL();
3310  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), 0);  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), 0);
3311  jump = CMP(SLJIT_C_SIG_LESS_EQUAL, TMP2, 0, SLJIT_IMM, frame_end);  OP2(SLJIT_SUB | SLJIT_SET_S, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0);
3312  OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_LOCALS_REG, 0);  jump = JUMP(SLJIT_C_SIG_LESS_EQUAL);
3313  OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_w));  
3314  OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_w), SLJIT_MEM1(TMP1), 2 * sizeof(sljit_w));  OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
3315  OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_w));  OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw));
3316    OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), SLJIT_MEM1(TMP1), 2 * sizeof(sljit_sw));
3317    OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
3318  JUMPTO(SLJIT_JUMP, mainloop);  JUMPTO(SLJIT_JUMP, mainloop);
3319    
3320  JUMPHERE(jump);  JUMPHERE(jump);
3321  jump = CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, frame_end);  jump = JUMP(SLJIT_C_SIG_LESS);
3322  /* End of dropping frames. */  /* End of dropping frames. */
3323  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3324    
3325  JUMPHERE(jump);  JUMPHERE(jump);
3326  jump = CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, frame_setstrbegin);  OP1(SLJIT_NEG, TMP2, 0, TMP2, 0);
3327  /* Set string begin. */  OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
3328  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), sizeof(sljit_w));  OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw));
3329  OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_w));  OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_sw));
 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), TMP2, 0);  
 JUMPTO(SLJIT_JUMP, mainloop);  
   
 JUMPHERE(jump);  
 /* Unknown command. */  
 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_w));  
3330  JUMPTO(SLJIT_JUMP, mainloop);  JUMPTO(SLJIT_JUMP, mainloop);
3331  }  }
3332    
3333  static void check_wordboundary(compiler_common *common)  static void check_wordboundary(compiler_common *common)
3334  {  {
3335  DEFINE_COMPILER;  DEFINE_COMPILER;
3336  struct sljit_jump *beginend;  struct sljit_jump *skipread;
3337  #ifdef SUPPORT_UTF8  jump_list *skipread_list = NULL;
3338    #if !(defined COMPILE_PCRE8) || defined SUPPORT_UTF
3339  struct sljit_jump *jump;  struct sljit_jump *jump;
3340  #endif  #endif
3341    
3342  SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16);  SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16);
3343    
3344  sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, 1, 5, 5, common->localsize);  sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3345  /* Get type of the previous char, and put it to LOCALS1. */  /* Get type of the previous char, and put it to LOCALS1. */
3346  OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);  OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3347  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
3348  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, 0);  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, 0);
3349  beginend = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP1, 0);  skipread = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
3350  skip_char_back(common);  skip_char_back(common);
3351    check_start_used_ptr(common);
3352  read_char(common);  read_char(common);
3353    
3354  /* Testing char type. */  /* Testing char type. */
3355  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
3356  if (common->useucp)  if (common->use_ucp)
3357    {    {
3358    OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);    OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
3359    jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);    jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
3360    add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));    add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
3361    OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);    OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
3362    OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);    OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
3363    COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);    OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
3364    OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);    OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
3365    OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);    OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
3366    COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_LESS_EQUAL);    OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
3367    JUMPHERE(jump);    JUMPHERE(jump);
3368    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP2, 0);    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP2, 0);
3369    }    }
3370  else  else
3371  #endif  #endif
3372    {    {
3373  #ifdef SUPPORT_UTF8  #ifndef COMPILE_PCRE8
3374    /* Here LOCALS1 has already been zeroed. */    jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3375    jump = NULL;  #elif defined SUPPORT_UTF
3376    if (common->utf8)    /* Here LOCALS1 has already been zeroed. */
3377      jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);    jump = NULL;
3378  #endif    if (common->utf)
3379    OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);      jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3380    OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */);  #endif /* COMPILE_PCRE8 */
3381    OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);    OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);
3382    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);    OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */);
3383  #ifdef SUPPORT_UTF8    OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3384    if (jump != NULL)    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);
3385      JUMPHERE(jump);  #ifndef COMPILE_PCRE8
3386  #endif    JUMPHERE(jump);
3387    #elif defined SUPPORT_UTF
3388      if (jump != NULL)
3389        JUMPHERE(jump);
3390    #endif /* COMPILE_PCRE8 */
3391      }
3392    JUMPHERE(skipread);
3393    
3394    OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3395    check_str_end(common, &skipread_list);
3396    peek_char(common);
3397    
3398    /* Testing char type. This is a code duplication. */
3399    #ifdef SUPPORT_UCP
3400    if (common->use_ucp)
3401      {
3402      OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
3403      jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
3404      add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
3405      OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
3406      OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
3407      OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
3408      OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
3409      OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
3410      OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
3411      JUMPHERE(jump);
3412      }
3413    else
3414    #endif
3415      {
3416    #ifndef COMPILE_PCRE8
3417      /* TMP2 may be destroyed by peek_char. */
3418      OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3419      jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3420    #elif defined SUPPORT_UTF
3421      OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3422      jump = NULL;
3423      if (common->utf)
3424        jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3425    #endif
3426      OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes);
3427      OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */);
3428      OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
3429    #ifndef COMPILE_PCRE8
3430      JUMPHERE(jump);
3431    #elif defined SUPPORT_UTF
3432      if (jump != NULL)
3433        JUMPHERE(jump);
3434    #endif /* COMPILE_PCRE8 */
3435      }
3436    set_jumps(skipread_list, LABEL());
3437    
3438    OP2(SLJIT_XOR | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
3439    sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3440    }
3441    
3442    /*
3443      range format:
3444    
3445      ranges[0] = length of the range (max MAX_RANGE_SIZE, -1 means invalid range).
3446      ranges[1] = first bit (0 or 1)
3447      ranges[2-length] = position of the bit change (when the current bit is not equal to the previous)
3448    */
3449    
3450    static BOOL check_ranges(compiler_common *common, int *ranges, jump_list **backtracks, BOOL readch)
3451    {
3452    DEFINE_COMPILER;
3453    struct sljit_jump *jump;
3454    
3455    if (ranges[0] < 0)
3456      return FALSE;
3457    
3458    switch(ranges[0])
3459      {
3460      case 1:
3461      if (readch)
3462        read_char(common);
3463      add_jump(compiler, backtracks, CMP(ranges[1] == 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
3464      return TRUE;
3465    
3466      case 2:
3467      if (readch)
3468        read_char(common);
3469      OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);
3470      add_jump(compiler, backtracks, CMP(ranges[1] != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
3471      return TRUE;
3472    
3473      case 4:
3474      if (ranges[2] + 1 == ranges[3] && ranges[4] + 1 == ranges[5])
3475        {
3476        if (readch)
3477          read_char(common);
3478        if (ranges[1] != 0)
3479          {
3480          add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
3481          add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[4]));
3482          }
3483        else
3484          {
3485          jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]);
3486          add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[4]));
3487          JUMPHERE(jump);
3488          }
3489        return TRUE;
3490        }
3491      if ((ranges[3] - ranges[2]) == (ranges[5] - ranges[4]) && is_powerof2(ranges[4] - ranges[2]))
3492        {
3493        if (readch)
3494          read_char(common);
3495        OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[4] - ranges[2]);
3496        OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[4]);
3497        add_jump(compiler, backtracks, CMP(ranges[1] != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[5] - ranges[4]));
3498        return TRUE;
3499        }
3500      return FALSE;
3501    
3502      default:
3503      return FALSE;
3504      }
3505    }
3506    
3507    static void get_ctype_ranges(compiler_common *common, int flag, int *ranges)
3508    {
3509    int i, bit, length;
3510    const pcre_uint8 *ctypes = (const pcre_uint8*)common->ctypes;
3511    
3512    bit = ctypes[0] & flag;
3513    ranges[0] = -1;
3514    ranges[1] = bit != 0 ? 1 : 0;
3515    length = 0;
3516    
3517    for (i = 1; i < 256; i++)
3518      if ((ctypes[i] & flag) != bit)
3519        {
3520        if (length >= MAX_RANGE_SIZE)
3521          return;
3522        ranges[2 + length] = i;
3523        length++;
3524        bit ^= flag;
3525        }
3526    
3527    if (bit != 0)
3528      {
3529      if (length >= MAX_RANGE_SIZE)
3530        return;
3531      ranges[2 + length] = 256;
3532      length++;
3533    }    }
3534  JUMPHERE(beginend);  ranges[0] = length;
3535    }
3536    
3537  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);  static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, jump_list **backtracks)
3538  beginend = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);  {
3539  peek_char(common);  int ranges[2 + MAX_RANGE_SIZE];
3540    pcre_uint8 bit, cbit, all;
3541    int i, byte, length = 0;
3542    
3543  /* Testing char type. This is a code duplication. */  bit = bits[0] & 0x1;
3544  #ifdef SUPPORT_UCP  ranges[1] = bit;
3545  if (common->useucp)  /* Can be 0 or 255. */
3546    all = -bit;
3547    
3548    for (i = 0; i < 256; )
3549    {    {
3550    OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);    byte = i >> 3;