/[pcre]/code/trunk/pcre_jit_compile.c
ViewVC logotype

Diff of /code/trunk/pcre_jit_compile.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 792 by ph10, Wed Dec 7 16:44:48 2011 UTC revision 1272 by zherczeg, Thu Mar 7 11:30:01 2013 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2008 University of Cambridge             Copyright (c) 1997-2013 University of Cambridge
10    
11    The machine code generator part (this module) was written by Zoltan Herczeg    The machine code generator part (this module) was written by Zoltan Herczeg
12                        Copyright (c) 2010-2011                        Copyright (c) 2010-2013
13    
14  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
15  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 46  POSSIBILITY OF SUCH DAMAGE. Line 46  POSSIBILITY OF SUCH DAMAGE.
46    
47  #include "pcre_internal.h"  #include "pcre_internal.h"
48    
49  #ifdef SUPPORT_JIT  #if defined SUPPORT_JIT
50    
51  /* All-in-one: Since we use the JIT compiler only from here,  /* All-in-one: Since we use the JIT compiler only from here,
52  we just include it. This way we don't need to touch the build  we just include it. This way we don't need to touch the build
53  system files. */  system files. */
54    
55  #define SLJIT_MALLOC(size) (pcre_malloc)(size)  #define SLJIT_MALLOC(size) (PUBL(malloc))(size)
56  #define SLJIT_FREE(ptr) (pcre_free)(ptr)  #define SLJIT_FREE(ptr) (PUBL(free))(ptr)
57  #define SLJIT_CONFIG_AUTO 1  #define SLJIT_CONFIG_AUTO 1
58  #define SLJIT_CONFIG_STATIC 1  #define SLJIT_CONFIG_STATIC 1
59  #define SLJIT_VERBOSE 0  #define SLJIT_VERBOSE 0
# Line 62  system files. */ Line 62  system files. */
62  #include "sljit/sljitLir.c"  #include "sljit/sljitLir.c"
63    
64  #if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED  #if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED
65  #error "Unsupported architecture"  #error Unsupported architecture
66  #endif  #endif
67    
68  /* Allocate memory on the stack. Fast, but limited size. */  /* Defines for debugging purposes. */
 #define LOCAL_SPACE_SIZE 32768  
69    
70    /* 1 - Use unoptimized capturing brackets.
71       2 - Enable capture_last_ptr (includes option 1). */
72    /* #define DEBUG_FORCE_UNOPTIMIZED_CBRAS 2 */
73    
74    /* Allocate memory for the regex stack on the real machine stack.
75    Fast, but limited size. */
76    #define MACHINE_STACK_SIZE 32768
77    
78    /* Growth rate for stack allocated by the OS. Should be the multiply
79    of page size. */
80  #define STACK_GROWTH_RATE 8192  #define STACK_GROWTH_RATE 8192
81    
82  /* Enable to check that the allocation could destroy temporaries. */  /* Enable to check that the allocation could destroy temporaries. */
# Line 82  The code generator follows the recursive Line 91  The code generator follows the recursive
91  expressions. The basic blocks of regular expressions are condition checkers  expressions. The basic blocks of regular expressions are condition checkers
92  whose execute different commands depending on the result of the condition check.  whose execute different commands depending on the result of the condition check.
93  The relationship between the operators can be horizontal (concatenation) and  The relationship between the operators can be horizontal (concatenation) and
94  vertical (sub-expression) (See struct fallback_common for more details).  vertical (sub-expression) (See struct backtrack_common for more details).
95    
96    'ab' - 'a' and 'b' regexps are concatenated    'ab' - 'a' and 'b' regexps are concatenated
97    'a+' - 'a' is the sub-expression of the '+' operator    'a+' - 'a' is the sub-expression of the '+' operator
98    
99  The condition checkers are boolean (true/false) checkers. Machine code is generated  The condition checkers are boolean (true/false) checkers. Machine code is generated
100  for the checker itself and for the actions depending on the result of the checker.  for the checker itself and for the actions depending on the result of the checker.
101  The 'true' case is called as the hot path (expected path), and the other is called as  The 'true' case is called as the matching path (expected path), and the other is called as
102  the 'fallback' path. Branch instructions are expesive for all CPUs, so we avoid taken  the 'backtrack' path. Branch instructions are expesive for all CPUs, so we avoid taken
103  branches on the hot path.  branches on the matching path.
104    
105   Greedy star operator (*) :   Greedy star operator (*) :
106     Hot path: match happens.     Matching path: match happens.
107     Fallback path: match failed.     Backtrack path: match failed.
108   Non-greedy star operator (*?) :   Non-greedy star operator (*?) :
109     Hot path: no need to perform a match.     Matching path: no need to perform a match.
110     Fallback path: match is required.     Backtrack path: match is required.
111    
112  The following example shows how the code generated for a capturing bracket  The following example shows how the code generated for a capturing bracket
113  with two alternatives. Let A, B, C, D are arbirary regular expressions, and  with two alternatives. Let A, B, C, D are arbirary regular expressions, and
# Line 108  we have the following regular expression Line 117  we have the following regular expression
117    
118  The generated code will be the following:  The generated code will be the following:
119    
120   A hot path   A matching path
121   '(' hot path (pushing arguments to the stack)   '(' matching path (pushing arguments to the stack)
122   B hot path   B matching path
123   ')' hot path (pushing arguments to the stack)   ')' matching path (pushing arguments to the stack)
124   D hot path   D matching path
125   return with successful match   return with successful match
126    
127   D fallback path   D backtrack path
128   ')' fallback path (If we arrived from "C" jump to the fallback of "C")   ')' backtrack path (If we arrived from "C" jump to the backtrack of "C")
129   B fallback path   B backtrack path
130   C expected path   C expected path
131   jump to D hot path   jump to D matching path
132   C fallback path   C backtrack path
133   A fallback path   A backtrack path
134    
135   Notice, that the order of fallback code paths are the opposite of the fast   Notice, that the order of backtrack code paths are the opposite of the fast
136   code paths. In this way the topmost value on the stack is always belong   code paths. In this way the topmost value on the stack is always belong
137   to the current fallback code path. The fallback code path must check   to the current backtrack code path. The backtrack path must check
138   whether there is a next alternative. If so, it needs to jump back to   whether there is a next alternative. If so, it needs to jump back to
139   the hot path eventually. Otherwise it needs to clear out its own stack   the matching path eventually. Otherwise it needs to clear out its own stack
140   frame and continue the execution on the fallback code paths.   frame and continue the execution on the backtrack code paths.
141  */  */
142    
143  /*  /*
144  Saved stack frames:  Saved stack frames:
145    
146  Atomic blocks and asserts require reloading the values of local variables  Atomic blocks and asserts require reloading the values of private data
147  when the fallback mechanism performed. Because of OP_RECURSE, the locals  when the backtrack mechanism performed. Because of OP_RECURSE, the data
148  are not necessarly known in compile time, thus we need a dynamic restore  are not necessarly known in compile time, thus we need a dynamic restore
149  mechanism.  mechanism.
150    
151  The stack frames are stored in a chain list, and have the following format:  The stack frames are stored in a chain list, and have the following format:
152  ([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]  ([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]
153    
154  Thus we can restore the locals to a particular point in the stack.  Thus we can restore the private data to a particular point in the stack.
155  */  */
156    
157  typedef struct jit_arguments {  typedef struct jit_arguments {
158    /* Pointers first. */    /* Pointers first. */
159    struct sljit_stack *stack;    struct sljit_stack *stack;
160    PCRE_SPTR str;    const pcre_uchar *str;
161    PCRE_SPTR begin;    const pcre_uchar *begin;
162    PCRE_SPTR end;    const pcre_uchar *end;
163    int *offsets;    int *offsets;
164    uschar *ptr;    pcre_uchar *uchar_ptr;
165      pcre_uchar *mark_ptr;
166      void *callout_data;
167    /* Everything else after. */    /* Everything else after. */
168    int offsetcount;    int real_offset_count;
169    int calllimit;    int offset_count;
170    uschar notbol;    int call_limit;
171    uschar noteol;    pcre_uint8 notbol;
172    uschar notempty;    pcre_uint8 noteol;
173    uschar notempty_atstart;    pcre_uint8 notempty;
174      pcre_uint8 notempty_atstart;
175  } jit_arguments;  } jit_arguments;
176    
177  typedef struct executable_function {  typedef struct executable_functions {
178    void *executable_func;    void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES];
179    pcre_jit_callback callback;    PUBL(jit_callback) callback;
180    void *userdata;    void *userdata;
181    sljit_uw executable_size;    pcre_uint32 top_bracket;
182  } executable_function;    sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];
183    } executable_functions;
184    
185  typedef struct jump_list {  typedef struct jump_list {
186    struct sljit_jump *jump;    struct sljit_jump *jump;
187    struct jump_list *next;    struct jump_list *next;
188  } jump_list;  } jump_list;
189    
 enum stub_types { stack_alloc };  
   
190  typedef struct stub_list {  typedef struct stub_list {
   enum stub_types type;  
   int data;  
191    struct sljit_jump *start;    struct sljit_jump *start;
192    struct sljit_label *leave;    struct sljit_label *quit;
193    struct stub_list *next;    struct stub_list *next;
194  } stub_list;  } stub_list;
195    
196    enum frame_types { no_frame = -1, no_stack = -2 };
197    
198  typedef int (SLJIT_CALL *jit_function)(jit_arguments *args);  typedef int (SLJIT_CALL *jit_function)(jit_arguments *args);
199    
200  /* The following structure is the key data type for the recursive  /* The following structure is the key data type for the recursive
201  code generator. It is allocated by compile_hotpath, and contains  code generator. It is allocated by compile_matchingpath, and contains
202  the aguments for compile_fallbackpath. Must be the first member  the aguments for compile_backtrackingpath. Must be the first member
203  of its descendants. */  of its descendants. */
204  typedef struct fallback_common {  typedef struct backtrack_common {
205    /* Concatenation stack. */    /* Concatenation stack. */
206    struct fallback_common *prev;    struct backtrack_common *prev;
207    jump_list *nextfallbacks;    jump_list *nextbacktracks;
208    /* Internal stack (for component operators). */    /* Internal stack (for component operators). */
209    struct fallback_common *top;    struct backtrack_common *top;
210    jump_list *topfallbacks;    jump_list *topbacktracks;
211    /* Opcode pointer. */    /* Opcode pointer. */
212    uschar *cc;    pcre_uchar *cc;
213  } fallback_common;  } backtrack_common;
214    
215  typedef struct assert_fallback {  typedef struct assert_backtrack {
216    fallback_common common;    backtrack_common common;
217    jump_list *condfailed;    jump_list *condfailed;
218    /* Less than 0 (-1) if a frame is not needed. */    /* Less than 0 (-1) if a frame is not needed. */
219    int framesize;    int framesize;
220    /* Points to our private memory word on the stack. */    /* Points to our private memory word on the stack. */
221    int localptr;    int private_data_ptr;
222    /* For iterators. */    /* For iterators. */
223    struct sljit_label *hotpath;    struct sljit_label *matchingpath;
224  } assert_fallback;  } assert_backtrack;
225    
226  typedef struct bracket_fallback {  typedef struct bracket_backtrack {
227    fallback_common common;    backtrack_common common;
228    /* Where to coninue if an alternative is successfully matched. */    /* Where to coninue if an alternative is successfully matched. */
229    struct sljit_label *althotpath;    struct sljit_label *alternative_matchingpath;
230    /* For rmin and rmax iterators. */    /* For rmin and rmax iterators. */
231    struct sljit_label *recursivehotpath;    struct sljit_label *recursive_matchingpath;
232    /* For greedy ? operator. */    /* For greedy ? operator. */
233    struct sljit_label *zerohotpath;    struct sljit_label *zero_matchingpath;
234    /* Contains the branches of a failed condition. */    /* Contains the branches of a failed condition. */
235    union {    union {
236      /* Both for OP_COND, OP_SCOND. */      /* Both for OP_COND, OP_SCOND. */
237      jump_list *condfailed;      jump_list *condfailed;
238      assert_fallback *assert;      assert_backtrack *assert;
239      /* For OP_ONCE. -1 if not needed. */      /* For OP_ONCE. -1 if not needed. */
240      int framesize;      int framesize;
241    } u;    } u;
242    /* Points to our private memory word on the stack. */    /* Points to our private memory word on the stack. */
243    int localptr;    int private_data_ptr;
244  } bracket_fallback;  } bracket_backtrack;
245    
246  typedef struct bracketpos_fallback {  typedef struct bracketpos_backtrack {
247    fallback_common common;    backtrack_common common;
248    /* Points to our private memory word on the stack. */    /* Points to our private memory word on the stack. */
249    int localptr;    int private_data_ptr;
250    /* Reverting stack is needed. */    /* Reverting stack is needed. */
251    int framesize;    int framesize;
252    /* Allocated stack size. */    /* Allocated stack size. */
253    int stacksize;    int stacksize;
254  } bracketpos_fallback;  } bracketpos_backtrack;
255    
256  typedef struct braminzero_fallback {  typedef struct braminzero_backtrack {
257    fallback_common common;    backtrack_common common;
258    struct sljit_label *hotpath;    struct sljit_label *matchingpath;
259  } braminzero_fallback;  } braminzero_backtrack;
260    
261  typedef struct iterator_fallback {  typedef struct iterator_backtrack {
262    fallback_common common;    backtrack_common common;
263    /* Next iteration. */    /* Next iteration. */
264    struct sljit_label *hotpath;    struct sljit_label *matchingpath;
265  } iterator_fallback;  } iterator_backtrack;
266    
267  typedef struct recurse_entry {  typedef struct recurse_entry {
268    struct recurse_entry *next;    struct recurse_entry *next;
# Line 263  typedef struct recurse_entry { Line 274  typedef struct recurse_entry {
274    int start;    int start;
275  } recurse_entry;  } recurse_entry;
276    
277  typedef struct recurse_fallback {  typedef struct recurse_backtrack {
278    fallback_common common;    backtrack_common common;
279  } recurse_fallback;    BOOL inlined_pattern;
280    } recurse_backtrack;
281    
282    #define MAX_RANGE_SIZE 6
283    
284  typedef struct compiler_common {  typedef struct compiler_common {
285      /* The sljit ceneric compiler. */
286    struct sljit_compiler *compiler;    struct sljit_compiler *compiler;
287    uschar *start;    /* First byte code. */
288    int localsize;    pcre_uchar *start;
289    int *localptrs;    /* Maps private data offset to each opcode. */
290    const uschar *fcc;    int *private_data_ptrs;
291    sljit_w lcc;    /* Tells whether the capturing bracket is optimized. */
292    int cbraptr;    pcre_uint8 *optimized_cbracket;
293      /* Starting offset of private data for capturing brackets. */
294      int cbra_ptr;
295      /* Output vector starting point. Must be divisible by 2. */
296      int ovector_start;
297      /* Last known position of the requested byte. */
298      int req_char_ptr;
299      /* Head of the last recursion. */
300      int recursive_head_ptr;
301      /* First inspected character for partial matching. */
302      int start_used_ptr;
303      /* Starting pointer for partial soft matches. */
304      int hit_start;
305      /* End pointer of the first line. */
306      int first_line_end;
307      /* Points to the marked string. */
308      int mark_ptr;
309      /* Points to the last matched capture block index. */
310      int capture_last_ptr;
311      /* Points to the starting position of the current match. */
312      int start_ptr;
313    
314      /* Flipped and lower case tables. */
315      const pcre_uint8 *fcc;
316      sljit_sw lcc;
317      /* Mode can be PCRE_STUDY_JIT_COMPILE and others. */
318      int mode;
319      /* \K is in the pattern. */
320      BOOL has_set_som;
321      /* Needs to know the start position anytime. */
322      BOOL needs_start_ptr;
323      /* Currently in compile_recurse. */
324      BOOL in_recurse;
325      /* Newline control. */
326    int nltype;    int nltype;
327    int newline;    int newline;
328    int bsr_nltype;    int bsr_nltype;
329      /* Dollar endonly. */
330    int endonly;    int endonly;
331    sljit_w ctypes;    /* Tables. */
332      sljit_sw ctypes;
333      int digits[2 + MAX_RANGE_SIZE];
334      /* Named capturing brackets. */
335    sljit_uw name_table;    sljit_uw name_table;
336    sljit_w name_count;    sljit_sw name_count;
337    sljit_w name_entry_size;    sljit_sw name_entry_size;
338    struct sljit_label *acceptlabel;  
339      /* Labels and jump lists. */
340      struct sljit_label *partialmatchlabel;
341      struct sljit_label *quit_label;
342      struct sljit_label *forced_quit_label;
343      struct sljit_label *accept_label;
344    stub_list *stubs;    stub_list *stubs;
345    recurse_entry *entries;    recurse_entry *entries;
346    recurse_entry *currententry;    recurse_entry *currententry;
347      jump_list *partialmatch;
348      jump_list *quit;
349      jump_list *forced_quit;
350    jump_list *accept;    jump_list *accept;
351    jump_list *calllimit;    jump_list *calllimit;
352    jump_list *stackalloc;    jump_list *stackalloc;
# Line 297  typedef struct compiler_common { Line 357  typedef struct compiler_common {
357    jump_list *vspace;    jump_list *vspace;
358    jump_list *casefulcmp;    jump_list *casefulcmp;
359    jump_list *caselesscmp;    jump_list *caselesscmp;
360      jump_list *reset_match;
361    BOOL jscript_compat;    BOOL jscript_compat;
362  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
363    BOOL utf8;    BOOL utf;
364  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
365    BOOL useucp;    BOOL use_ucp;
366    #endif
367    #ifndef COMPILE_PCRE32
368      jump_list *utfreadchar;
369  #endif  #endif
370    jump_list *utf8readchar;  #ifdef COMPILE_PCRE8
371    jump_list *utf8readtype8;    jump_list *utfreadtype8;
372  #endif  #endif
373    #endif /* SUPPORT_UTF */
374  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
375    jump_list *getucd;    jump_list *getucd;
376  #endif  #endif
# Line 317  typedef struct compare_context { Line 382  typedef struct compare_context {
382    int length;    int length;
383    int sourcereg;    int sourcereg;
384  #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED  #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
385    int byteptr;    int ucharptr;
386    union {    union {
387      int asint;      sljit_si asint;
388      short asshort;      sljit_uh asushort;
389    #if defined COMPILE_PCRE8
390      sljit_ub asbyte;      sljit_ub asbyte;
391      sljit_ub asbytes[4];      sljit_ub asuchars[4];
392    #elif defined COMPILE_PCRE16
393        sljit_uh asuchars[2];
394    #elif defined COMPILE_PCRE32
395        sljit_ui asuchars[1];
396    #endif
397    } c;    } c;
398    union {    union {
399      int asint;      sljit_si asint;
400      short asshort;      sljit_uh asushort;
401    #if defined COMPILE_PCRE8
402      sljit_ub asbyte;      sljit_ub asbyte;
403      sljit_ub asbytes[4];      sljit_ub asuchars[4];
404    #elif defined COMPILE_PCRE16
405        sljit_uh asuchars[2];
406    #elif defined COMPILE_PCRE32
407        sljit_ui asuchars[1];
408    #endif
409    } oc;    } oc;
410  #endif  #endif
411  } compare_context;  } compare_context;
412    
413  enum {  /* Undefine sljit macros. */
414    frame_end = 0,  #undef CMP
   frame_setstrbegin = -1  
 };  
415    
416  /* Used for accessing the elements of the stack. */  /* Used for accessing the elements of the stack. */
417  #define STACK(i)      ((-(i) - 1) * (int)sizeof(sljit_w))  #define STACK(i)      ((-(i) - 1) * (int)sizeof(sljit_sw))
418    
419  #define TMP1          SLJIT_TEMPORARY_REG1  #define TMP1          SLJIT_SCRATCH_REG1
420  #define TMP2          SLJIT_TEMPORARY_REG3  #define TMP2          SLJIT_SCRATCH_REG3
421  #define TMP3          SLJIT_TEMPORARY_EREG2  #define TMP3          SLJIT_TEMPORARY_EREG2
422  #define STR_PTR       SLJIT_GENERAL_REG1  #define STR_PTR       SLJIT_SAVED_REG1
423  #define STR_END       SLJIT_GENERAL_REG2  #define STR_END       SLJIT_SAVED_REG2
424  #define STACK_TOP     SLJIT_TEMPORARY_REG2  #define STACK_TOP     SLJIT_SCRATCH_REG2
425  #define STACK_LIMIT   SLJIT_GENERAL_REG3  #define STACK_LIMIT   SLJIT_SAVED_REG3
426  #define ARGUMENTS     SLJIT_GENERAL_EREG1  #define ARGUMENTS     SLJIT_SAVED_EREG1
427  #define CALL_COUNT    SLJIT_GENERAL_EREG2  #define CALL_COUNT    SLJIT_SAVED_EREG2
428  #define RETURN_ADDR   SLJIT_TEMPORARY_EREG1  #define RETURN_ADDR   SLJIT_TEMPORARY_EREG1
429    
430  /* Locals layout. */  /* Local space layout. */
431  /* These two locals can be used by the current opcode. */  /* These two locals can be used by the current opcode. */
432  #define LOCALS0          (0 * sizeof(sljit_w))  #define LOCALS0          (0 * sizeof(sljit_sw))
433  #define LOCALS1          (1 * sizeof(sljit_w))  #define LOCALS1          (1 * sizeof(sljit_sw))
434  /* Two local variables for possessive quantifiers (char1 cannot use them). */  /* Two local variables for possessive quantifiers (char1 cannot use them). */
435  #define POSSESSIVE0      (2 * sizeof(sljit_w))  #define POSSESSIVE0      (2 * sizeof(sljit_sw))
436  #define POSSESSIVE1      (3 * sizeof(sljit_w))  #define POSSESSIVE1      (3 * sizeof(sljit_sw))
 /* Head of the last recursion. */  
 #define RECURSIVE_HEAD   (4 * sizeof(sljit_w))  
437  /* Max limit of recursions. */  /* Max limit of recursions. */
438  #define CALL_LIMIT       (5 * sizeof(sljit_w))  #define CALL_LIMIT       (4 * sizeof(sljit_sw))
 /* Last known position of the requested byte. */  
 #define REQ_BYTE_PTR     (6 * sizeof(sljit_w))  
 /* End pointer of the first line. */  
 #define FIRSTLINE_END    (7 * sizeof(sljit_w))  
439  /* The output vector is stored on the stack, and contains pointers  /* The output vector is stored on the stack, and contains pointers
440  to characters. The vector data is divided into two groups: the first  to characters. The vector data is divided into two groups: the first
441  group contains the start / end character pointers, and the second is  group contains the start / end character pointers, and the second is
442  the start pointers when the end of the capturing group has not yet reached. */  the start pointers when the end of the capturing group has not yet reached. */
443  #define OVECTOR_START    (8 * sizeof(sljit_w))  #define OVECTOR_START    (common->ovector_start)
444  #define OVECTOR(i)       (OVECTOR_START + (i) * sizeof(sljit_w))  #define OVECTOR(i)       (OVECTOR_START + (i) * sizeof(sljit_sw))
445  #define OVECTOR_PRIV(i)  (common->cbraptr + (i) * sizeof(sljit_w))  #define OVECTOR_PRIV(i)  (common->cbra_ptr + (i) * sizeof(sljit_sw))
446  #define PRIV(cc)         (common->localptrs[(cc) - common->start])  #define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start])
447    
448    #if defined COMPILE_PCRE8
449    #define MOV_UCHAR  SLJIT_MOV_UB
450    #define MOVU_UCHAR SLJIT_MOVU_UB
451    #elif defined COMPILE_PCRE16
452    #define MOV_UCHAR  SLJIT_MOV_UH
453    #define MOVU_UCHAR SLJIT_MOVU_UH
454    #elif defined COMPILE_PCRE32
455    #define MOV_UCHAR  SLJIT_MOV_UI
456    #define MOVU_UCHAR SLJIT_MOVU_UI
457    #else
458    #error Unsupported compiling mode
459    #endif
460    
461  /* Shortcuts. */  /* Shortcuts. */
462  #define DEFINE_COMPILER \  #define DEFINE_COMPILER \
# Line 391  the start pointers when the end of the c Line 473  the start pointers when the end of the c
473    sljit_set_label(sljit_emit_jump(compiler, (type)), (label))    sljit_set_label(sljit_emit_jump(compiler, (type)), (label))
474  #define JUMPHERE(jump) \  #define JUMPHERE(jump) \
475    sljit_set_label((jump), sljit_emit_label(compiler))    sljit_set_label((jump), sljit_emit_label(compiler))
476    #define SET_LABEL(jump, label) \
477      sljit_set_label((jump), (label))
478  #define CMP(type, src1, src1w, src2, src2w) \  #define CMP(type, src1, src1w, src2, src2w) \
479    sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))    sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))
480  #define CMPTO(type, src1, src1w, src2, src2w, label) \  #define CMPTO(type, src1, src1w, src2, src2w, label) \
481    sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))    sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))
482  #define COND_VALUE(op, dst, dstw, type) \  #define OP_FLAGS(op, dst, dstw, src, srcw, type) \
483    sljit_emit_cond_value(compiler, (op), (dst), (dstw), (type))    sljit_emit_op_flags(compiler, (op), (dst), (dstw), (src), (srcw), (type))
484    #define GET_LOCAL_BASE(dst, dstw, offset) \
485      sljit_get_local_base(compiler, (dst), (dstw), (offset))
486    
487  static uschar* bracketend(uschar* cc)  static pcre_uchar* bracketend(pcre_uchar* cc)
488  {  {
489  SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));  SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
490  do cc += GET(cc, 1); while (*cc == OP_ALT);  do cc += GET(cc, 1); while (*cc == OP_ALT);
# Line 409  return cc; Line 495  return cc;
495    
496  /* Functions whose might need modification for all new supported opcodes:  /* Functions whose might need modification for all new supported opcodes:
497   next_opcode   next_opcode
498   get_localspace   get_private_data_length
499   set_localptrs   set_private_data_ptrs
500   get_framesize   get_framesize
501   init_frame   init_frame
502   get_localsize   get_private_data_length_for_copy
503   copy_locals   copy_private_data
504   compile_hotpath   compile_matchingpath
505   compile_fallbackpath   compile_backtrackingpath
506  */  */
507    
508  static uschar *next_opcode(compiler_common *common, uschar *cc)  static pcre_uchar *next_opcode(compiler_common *common, pcre_uchar *cc)
509  {  {
510  SLJIT_UNUSED_ARG(common);  SLJIT_UNUSED_ARG(common);
511  switch(*cc)  switch(*cc)
# Line 437  switch(*cc) Line 523  switch(*cc)
523    case OP_WORDCHAR:    case OP_WORDCHAR:
524    case OP_ANY:    case OP_ANY:
525    case OP_ALLANY:    case OP_ALLANY:
526      case OP_NOTPROP:
527      case OP_PROP:
528    case OP_ANYNL:    case OP_ANYNL:
529    case OP_NOT_HSPACE:    case OP_NOT_HSPACE:
530    case OP_HSPACE:    case OP_HSPACE:
# Line 449  switch(*cc) Line 537  switch(*cc)
537    case OP_CIRCM:    case OP_CIRCM:
538    case OP_DOLL:    case OP_DOLL:
539    case OP_DOLLM:    case OP_DOLLM:
   case OP_TYPESTAR:  
   case OP_TYPEMINSTAR:  
   case OP_TYPEPLUS:  
   case OP_TYPEMINPLUS:  
   case OP_TYPEQUERY:  
   case OP_TYPEMINQUERY:  
   case OP_TYPEPOSSTAR:  
   case OP_TYPEPOSPLUS:  
   case OP_TYPEPOSQUERY:  
540    case OP_CRSTAR:    case OP_CRSTAR:
541    case OP_CRMINSTAR:    case OP_CRMINSTAR:
542    case OP_CRPLUS:    case OP_CRPLUS:
543    case OP_CRMINPLUS:    case OP_CRMINPLUS:
544    case OP_CRQUERY:    case OP_CRQUERY:
545    case OP_CRMINQUERY:    case OP_CRMINQUERY:
546      case OP_CRRANGE:
547      case OP_CRMINRANGE:
548      case OP_CLASS:
549      case OP_NCLASS:
550      case OP_REF:
551      case OP_REFI:
552      case OP_RECURSE:
553      case OP_CALLOUT:
554      case OP_ALT:
555      case OP_KET:
556      case OP_KETRMAX:
557      case OP_KETRMIN:
558      case OP_KETRPOS:
559      case OP_REVERSE:
560      case OP_ASSERT:
561      case OP_ASSERT_NOT:
562      case OP_ASSERTBACK:
563      case OP_ASSERTBACK_NOT:
564      case OP_ONCE:
565      case OP_ONCE_NC:
566      case OP_BRA:
567      case OP_BRAPOS:
568      case OP_CBRA:
569      case OP_CBRAPOS:
570      case OP_COND:
571      case OP_SBRA:
572      case OP_SBRAPOS:
573      case OP_SCBRA:
574      case OP_SCBRAPOS:
575      case OP_SCOND:
576      case OP_CREF:
577      case OP_NCREF:
578      case OP_RREF:
579      case OP_NRREF:
580    case OP_DEF:    case OP_DEF:
581    case OP_BRAZERO:    case OP_BRAZERO:
582    case OP_BRAMINZERO:    case OP_BRAMINZERO:
583    case OP_BRAPOSZERO:    case OP_BRAPOSZERO:
584      case OP_PRUNE:
585      case OP_COMMIT:
586    case OP_FAIL:    case OP_FAIL:
587    case OP_ACCEPT:    case OP_ACCEPT:
588    case OP_ASSERT_ACCEPT:    case OP_ASSERT_ACCEPT:
589      case OP_CLOSE:
590    case OP_SKIPZERO:    case OP_SKIPZERO:
591    return cc + 1;    return cc + PRIV(OP_lengths)[*cc];
   
   case OP_ANYBYTE:  
 #ifdef SUPPORT_UTF8  
   if (common->utf8) return NULL;  
 #endif  
   return cc + 1;  
592    
593    case OP_CHAR:    case OP_CHAR:
594    case OP_CHARI:    case OP_CHARI:
595    case OP_NOT:    case OP_NOT:
596    case OP_NOTI:    case OP_NOTI:
   
597    case OP_STAR:    case OP_STAR:
598    case OP_MINSTAR:    case OP_MINSTAR:
599    case OP_PLUS:    case OP_PLUS:
600    case OP_MINPLUS:    case OP_MINPLUS:
601    case OP_QUERY:    case OP_QUERY:
602    case OP_MINQUERY:    case OP_MINQUERY:
603      case OP_UPTO:
604      case OP_MINUPTO:
605      case OP_EXACT:
606    case OP_POSSTAR:    case OP_POSSTAR:
607    case OP_POSPLUS:    case OP_POSPLUS:
608    case OP_POSQUERY:    case OP_POSQUERY:
609      case OP_POSUPTO:
610    case OP_STARI:    case OP_STARI:
611    case OP_MINSTARI:    case OP_MINSTARI:
612    case OP_PLUSI:    case OP_PLUSI:
613    case OP_MINPLUSI:    case OP_MINPLUSI:
614    case OP_QUERYI:    case OP_QUERYI:
615    case OP_MINQUERYI:    case OP_MINQUERYI:
616      case OP_UPTOI:
617      case OP_MINUPTOI:
618      case OP_EXACTI:
619    case OP_POSSTARI:    case OP_POSSTARI:
620    case OP_POSPLUSI:    case OP_POSPLUSI:
621    case OP_POSQUERYI:    case OP_POSQUERYI:
622      case OP_POSUPTOI:
623    case OP_NOTSTAR:    case OP_NOTSTAR:
624    case OP_NOTMINSTAR:    case OP_NOTMINSTAR:
625    case OP_NOTPLUS:    case OP_NOTPLUS:
626    case OP_NOTMINPLUS:    case OP_NOTMINPLUS:
627    case OP_NOTQUERY:    case OP_NOTQUERY:
628    case OP_NOTMINQUERY:    case OP_NOTMINQUERY:
629      case OP_NOTUPTO:
630      case OP_NOTMINUPTO:
631      case OP_NOTEXACT:
632    case OP_NOTPOSSTAR:    case OP_NOTPOSSTAR:
633    case OP_NOTPOSPLUS:    case OP_NOTPOSPLUS:
634    case OP_NOTPOSQUERY:    case OP_NOTPOSQUERY:
635      case OP_NOTPOSUPTO:
636    case OP_NOTSTARI:    case OP_NOTSTARI:
637    case OP_NOTMINSTARI:    case OP_NOTMINSTARI:
638    case OP_NOTPLUSI:    case OP_NOTPLUSI:
639    case OP_NOTMINPLUSI:    case OP_NOTMINPLUSI:
640    case OP_NOTQUERYI:    case OP_NOTQUERYI:
641    case OP_NOTMINQUERYI:    case OP_NOTMINQUERYI:
   case OP_NOTPOSSTARI:  
   case OP_NOTPOSPLUSI:  
   case OP_NOTPOSQUERYI:  
   cc += 2;  
 #ifdef SUPPORT_UTF8  
   if (common->utf8 && cc[-1] >= 0xc0) cc += _pcre_utf8_table4[cc[-1] & 0x3f];  
 #endif  
   return cc;  
   
   case OP_UPTO:  
   case OP_MINUPTO:  
   case OP_EXACT:  
   case OP_POSUPTO:  
   case OP_UPTOI:  
   case OP_MINUPTOI:  
   case OP_EXACTI:  
   case OP_POSUPTOI:  
   case OP_NOTUPTO:  
   case OP_NOTMINUPTO:  
   case OP_NOTEXACT:  
   case OP_NOTPOSUPTO:  
642    case OP_NOTUPTOI:    case OP_NOTUPTOI:
643    case OP_NOTMINUPTOI:    case OP_NOTMINUPTOI:
644    case OP_NOTEXACTI:    case OP_NOTEXACTI:
645      case OP_NOTPOSSTARI:
646      case OP_NOTPOSPLUSI:
647      case OP_NOTPOSQUERYI:
648    case OP_NOTPOSUPTOI:    case OP_NOTPOSUPTOI:
649    cc += 4;    cc += PRIV(OP_lengths)[*cc];
650  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
651    if (common->utf8 && cc[-1] >= 0xc0) cc += _pcre_utf8_table4[cc[-1] & 0x3f];    if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
652  #endif  #endif
653    return cc;    return cc;
654    
655    case OP_NOTPROP:    /* Special cases. */
656    case OP_PROP:    case OP_TYPESTAR:
657      case OP_TYPEMINSTAR:
658      case OP_TYPEPLUS:
659      case OP_TYPEMINPLUS:
660      case OP_TYPEQUERY:
661      case OP_TYPEMINQUERY:
662    case OP_TYPEUPTO:    case OP_TYPEUPTO:
663    case OP_TYPEMINUPTO:    case OP_TYPEMINUPTO:
664    case OP_TYPEEXACT:    case OP_TYPEEXACT:
665      case OP_TYPEPOSSTAR:
666      case OP_TYPEPOSPLUS:
667      case OP_TYPEPOSQUERY:
668    case OP_TYPEPOSUPTO:    case OP_TYPEPOSUPTO:
669    case OP_REF:    return cc + PRIV(OP_lengths)[*cc] - 1;
   case OP_REFI:  
   case OP_CREF:  
   case OP_NCREF:  
   case OP_RREF:  
   case OP_NRREF:  
   case OP_CLOSE:  
   cc += 3;  
   return cc;  
670    
671    case OP_CRRANGE:    case OP_ANYBYTE:
672    case OP_CRMINRANGE:  #ifdef SUPPORT_UTF
673    return cc + 5;    if (common->utf) return NULL;
674    #endif
675    case OP_CLASS:    return cc + 1;
   case OP_NCLASS:  
   return cc + 33;  
676    
677  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
678    case OP_XCLASS:    case OP_XCLASS:
679    return cc + GET(cc, 1);    return cc + GET(cc, 1);
680  #endif  #endif
681    
682    case OP_RECURSE:    case OP_MARK:
683    case OP_ASSERT:    case OP_PRUNE_ARG:
684    case OP_ASSERT_NOT:    return cc + 1 + 2 + cc[1];
   case OP_ASSERTBACK:  
   case OP_ASSERTBACK_NOT:  
   case OP_REVERSE:  
   case OP_ONCE:  
   case OP_ONCE_NC:  
   case OP_BRA:  
   case OP_BRAPOS:  
   case OP_COND:  
   case OP_SBRA:  
   case OP_SBRAPOS:  
   case OP_SCOND:  
   case OP_ALT:  
   case OP_KET:  
   case OP_KETRMAX:  
   case OP_KETRMIN:  
   case OP_KETRPOS:  
   return cc + 1 + LINK_SIZE;  
   
   case OP_CBRA:  
   case OP_CBRAPOS:  
   case OP_SCBRA:  
   case OP_SCBRAPOS:  
   return cc + 1 + LINK_SIZE + 2;  
685    
686    default:    default:
687    return NULL;    return NULL;
688    }    }
689  }  }
690    
691  static int get_localspace(compiler_common *common, uschar *cc, uschar *ccend)  #define CASE_ITERATOR_PRIVATE_DATA_1 \
692        case OP_MINSTAR: \
693        case OP_MINPLUS: \
694        case OP_QUERY: \
695        case OP_MINQUERY: \
696        case OP_MINSTARI: \
697        case OP_MINPLUSI: \
698        case OP_QUERYI: \
699        case OP_MINQUERYI: \
700        case OP_NOTMINSTAR: \
701        case OP_NOTMINPLUS: \
702        case OP_NOTQUERY: \
703        case OP_NOTMINQUERY: \
704        case OP_NOTMINSTARI: \
705        case OP_NOTMINPLUSI: \
706        case OP_NOTQUERYI: \
707        case OP_NOTMINQUERYI:
708    
709    #define CASE_ITERATOR_PRIVATE_DATA_2A \
710        case OP_STAR: \
711        case OP_PLUS: \
712        case OP_STARI: \
713        case OP_PLUSI: \
714        case OP_NOTSTAR: \
715        case OP_NOTPLUS: \
716        case OP_NOTSTARI: \
717        case OP_NOTPLUSI:
718    
719    #define CASE_ITERATOR_PRIVATE_DATA_2B \
720        case OP_UPTO: \
721        case OP_MINUPTO: \
722        case OP_UPTOI: \
723        case OP_MINUPTOI: \
724        case OP_NOTUPTO: \
725        case OP_NOTMINUPTO: \
726        case OP_NOTUPTOI: \
727        case OP_NOTMINUPTOI:
728    
729    #define CASE_ITERATOR_TYPE_PRIVATE_DATA_1 \
730        case OP_TYPEMINSTAR: \
731        case OP_TYPEMINPLUS: \
732        case OP_TYPEQUERY: \
733        case OP_TYPEMINQUERY:
734    
735    #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2A \
736        case OP_TYPESTAR: \
737        case OP_TYPEPLUS:
738    
739    #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2B \
740        case OP_TYPEUPTO: \
741        case OP_TYPEMINUPTO:
742    
743    static int get_class_iterator_size(pcre_uchar *cc)
744    {
745    switch(*cc)
746      {
747      case OP_CRSTAR:
748      case OP_CRPLUS:
749      return 2;
750    
751      case OP_CRMINSTAR:
752      case OP_CRMINPLUS:
753      case OP_CRQUERY:
754      case OP_CRMINQUERY:
755      return 1;
756    
757      case OP_CRRANGE:
758      case OP_CRMINRANGE:
759      if (GET2(cc, 1) == GET2(cc, 1 + IMM2_SIZE))
760        return 0;
761      return 2;
762    
763      default:
764      return 0;
765      }
766    }
767    
768    static int get_private_data_length(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
769  {  {
770  int localspace = 0;  int private_data_length = 0;
771  uschar *alternative;  pcre_uchar *alternative;
772    pcre_uchar *name;
773    pcre_uchar *end = NULL;
774    int space, size, i;
775    pcre_uint32 bracketlen;
776    
777  /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */  /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
778  while (cc < ccend)  while (cc < ccend)
779    {    {
780      space = 0;
781      size = 0;
782      bracketlen = 0;
783    switch(*cc)    switch(*cc)
784      {      {
785        case OP_SET_SOM:
786        common->has_set_som = TRUE;
787        cc += 1;
788        break;
789    
790        case OP_REF:
791        case OP_REFI:
792        common->optimized_cbracket[GET2(cc, 1)] = 0;
793        cc += 1 + IMM2_SIZE;
794        break;
795    
796      case OP_ASSERT:      case OP_ASSERT:
797      case OP_ASSERT_NOT:      case OP_ASSERT_NOT:
798      case OP_ASSERTBACK:      case OP_ASSERTBACK:
# Line 628  while (cc < ccend) Line 802  while (cc < ccend)
802      case OP_BRAPOS:      case OP_BRAPOS:
803      case OP_SBRA:      case OP_SBRA:
804      case OP_SBRAPOS:      case OP_SBRAPOS:
805      case OP_SCOND:      private_data_length += sizeof(sljit_sw);
806      localspace += sizeof(sljit_w);      bracketlen = 1 + LINK_SIZE;
     cc += 1 + LINK_SIZE;  
807      break;      break;
808    
809      case OP_CBRAPOS:      case OP_CBRAPOS:
810      case OP_SCBRAPOS:      case OP_SCBRAPOS:
811      localspace += sizeof(sljit_w);      private_data_length += sizeof(sljit_sw);
812      cc += 1 + LINK_SIZE + 2;      common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] = 0;
813        bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
814      break;      break;
815    
816      case OP_COND:      case OP_COND:
817      /* Might be a hidden SCOND. */      case OP_SCOND:
818      alternative = cc + GET(cc, 1);      /* Only AUTO_CALLOUT can insert this opcode. We do
819      if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)         not intend to support this case. */
820        localspace += sizeof(sljit_w);      if (cc[1 + LINK_SIZE] == OP_CALLOUT)
821          return -1;
822    
823        if (*cc == OP_COND)
824          {
825          /* Might be a hidden SCOND. */
826          alternative = cc + GET(cc, 1);
827          if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
828            private_data_length += sizeof(sljit_sw);
829          }
830        else
831          private_data_length += sizeof(sljit_sw);
832        bracketlen = 1 + LINK_SIZE;
833        break;
834    
835        case OP_CREF:
836        i = GET2(cc, 1);
837        common->optimized_cbracket[i] = 0;
838        cc += 1 + IMM2_SIZE;
839        break;
840    
841        case OP_NCREF:
842        bracketlen = GET2(cc, 1);
843        name = (pcre_uchar *)common->name_table;
844        alternative = name;
845        for (i = 0; i < common->name_count; i++)
846          {
847          if (GET2(name, 0) == bracketlen) break;
848          name += common->name_entry_size;
849          }
850        SLJIT_ASSERT(i != common->name_count);
851    
852        for (i = 0; i < common->name_count; i++)
853          {
854          if (STRCMP_UC_UC(alternative + IMM2_SIZE, name + IMM2_SIZE) == 0)
855            common->optimized_cbracket[GET2(alternative, 0)] = 0;
856          alternative += common->name_entry_size;
857          }
858        bracketlen = 0;
859        cc += 1 + IMM2_SIZE;
860        break;
861    
862        case OP_BRA:
863        bracketlen = 1 + LINK_SIZE;
864        break;
865    
866        case OP_CBRA:
867        case OP_SCBRA:
868        bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
869        break;
870    
871        CASE_ITERATOR_PRIVATE_DATA_1
872        space = 1;
873        size = -2;
874        break;
875    
876        CASE_ITERATOR_PRIVATE_DATA_2A
877        space = 2;
878        size = -2;
879        break;
880    
881        CASE_ITERATOR_PRIVATE_DATA_2B
882        space = 2;
883        size = -(2 + IMM2_SIZE);
884        break;
885    
886        CASE_ITERATOR_TYPE_PRIVATE_DATA_1
887        space = 1;
888        size = 1;
889        break;
890    
891        CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
892        if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
893          space = 2;
894        size = 1;
895        break;
896    
897        CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
898        if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
899          space = 2;
900        size = 1 + IMM2_SIZE;
901        break;
902    
903        case OP_CLASS:
904        case OP_NCLASS:
905        size += 1 + 32 / sizeof(pcre_uchar);
906        space = get_class_iterator_size(cc + size);
907        break;
908    
909    #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
910        case OP_XCLASS:
911        size = GET(cc, 1);
912        space = get_class_iterator_size(cc + size);
913        break;
914    #endif
915    
916        case OP_RECURSE:
917        /* Set its value only once. */
918        if (common->recursive_head_ptr == 0)
919          {
920          common->recursive_head_ptr = common->ovector_start;
921          common->ovector_start += sizeof(sljit_sw);
922          }
923      cc += 1 + LINK_SIZE;      cc += 1 + LINK_SIZE;
924      break;      break;
925    
926        case OP_CALLOUT:
927        if (common->capture_last_ptr == 0)
928          {
929          common->capture_last_ptr = common->ovector_start;
930          common->ovector_start += sizeof(sljit_sw);
931          }
932        cc += 2 + 2 * LINK_SIZE;
933        break;
934    
935        case OP_PRUNE_ARG:
936        common->needs_start_ptr = TRUE;
937        /* Fall through. */
938    
939        case OP_MARK:
940        if (common->mark_ptr == 0)
941          {
942          common->mark_ptr = common->ovector_start;
943          common->ovector_start += sizeof(sljit_sw);
944          }
945        cc += 1 + 2 + cc[1];
946        break;
947    
948        case OP_PRUNE:
949        common->needs_start_ptr = TRUE;
950        cc += 1;
951        break;
952    
953      default:      default:
954      cc = next_opcode(common, cc);      cc = next_opcode(common, cc);
955      if (cc == NULL)      if (cc == NULL)
956        return -1;        return -1;
957      break;      break;
958      }      }
959    
960      if (space > 0 && cc >= end)
961        private_data_length += sizeof(sljit_sw) * space;
962    
963      if (size != 0)
964        {
965        if (size < 0)
966          {
967          cc += -size;
968    #ifdef SUPPORT_UTF
969          if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
970    #endif
971          }
972        else
973          cc += size;
974        }
975    
976      if (bracketlen != 0)
977        {
978        if (cc >= end)
979          {
980          end = bracketend(cc);
981          if (end[-1 - LINK_SIZE] == OP_KET)
982            end = NULL;
983          }
984        cc += bracketlen;
985        }
986    }    }
987  return localspace;  return private_data_length;
988  }  }
989    
990  static void set_localptrs(compiler_common *common, int localptr, uschar *ccend)  static void set_private_data_ptrs(compiler_common *common, int private_data_ptr, pcre_uchar *ccend)
991  {  {
992  uschar *cc = common->start;  pcre_uchar *cc = common->start;
993  uschar *alternative;  pcre_uchar *alternative;
994    pcre_uchar *end = NULL;
995    int space, size, bracketlen;
996    
997  while (cc < ccend)  while (cc < ccend)
998    {    {
999      space = 0;
1000      size = 0;
1001      bracketlen = 0;
1002    switch(*cc)    switch(*cc)
1003      {      {
1004      case OP_ASSERT:      case OP_ASSERT:
# Line 675  while (cc < ccend) Line 1011  while (cc < ccend)
1011      case OP_SBRA:      case OP_SBRA:
1012      case OP_SBRAPOS:      case OP_SBRAPOS:
1013      case OP_SCOND:      case OP_SCOND:
1014      common->localptrs[cc - common->start] = localptr;      common->private_data_ptrs[cc - common->start] = private_data_ptr;
1015      localptr += sizeof(sljit_w);      private_data_ptr += sizeof(sljit_sw);
1016      cc += 1 + LINK_SIZE;      bracketlen = 1 + LINK_SIZE;
1017      break;      break;
1018    
1019      case OP_CBRAPOS:      case OP_CBRAPOS:
1020      case OP_SCBRAPOS:      case OP_SCBRAPOS:
1021      common->localptrs[cc - common->start] = localptr;      common->private_data_ptrs[cc - common->start] = private_data_ptr;
1022      localptr += sizeof(sljit_w);      private_data_ptr += sizeof(sljit_sw);
1023      cc += 1 + LINK_SIZE + 2;      bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1024      break;      break;
1025    
1026      case OP_COND:      case OP_COND:
# Line 692  while (cc < ccend) Line 1028  while (cc < ccend)
1028      alternative = cc + GET(cc, 1);      alternative = cc + GET(cc, 1);
1029      if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)      if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1030        {        {
1031        common->localptrs[cc - common->start] = localptr;        common->private_data_ptrs[cc - common->start] = private_data_ptr;
1032        localptr += sizeof(sljit_w);        private_data_ptr += sizeof(sljit_sw);
1033          }
1034        bracketlen = 1 + LINK_SIZE;
1035        break;
1036    
1037        case OP_BRA:
1038        bracketlen = 1 + LINK_SIZE;
1039        break;
1040    
1041        case OP_CBRA:
1042        case OP_SCBRA:
1043        bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1044        break;
1045    
1046        CASE_ITERATOR_PRIVATE_DATA_1
1047        space = 1;
1048        size = -2;
1049        break;
1050    
1051        CASE_ITERATOR_PRIVATE_DATA_2A
1052        space = 2;
1053        size = -2;
1054        break;
1055    
1056        CASE_ITERATOR_PRIVATE_DATA_2B
1057        space = 2;
1058        size = -(2 + IMM2_SIZE);
1059        break;
1060    
1061        CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1062        space = 1;
1063        size = 1;
1064        break;
1065    
1066        CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1067        if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
1068          space = 2;
1069        size = 1;
1070        break;
1071    
1072        CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1073        if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
1074          space = 2;
1075        size = 1 + IMM2_SIZE;
1076        break;
1077    
1078        case OP_CLASS:
1079        case OP_NCLASS:
1080        size += 1 + 32 / sizeof(pcre_uchar);
1081        space = get_class_iterator_size(cc + size);
1082        break;
1083    
1084    #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1085        case OP_XCLASS:
1086        size = GET(cc, 1);
1087        space = get_class_iterator_size(cc + size);
1088        break;
1089    #endif
1090    
1091        default:
1092        cc = next_opcode(common, cc);
1093        SLJIT_ASSERT(cc != NULL);
1094        break;
1095        }
1096    
1097      if (space > 0 && cc >= end)
1098        {
1099        common->private_data_ptrs[cc - common->start] = private_data_ptr;
1100        private_data_ptr += sizeof(sljit_sw) * space;
1101        }
1102    
1103      if (size != 0)
1104        {
1105        if (size < 0)
1106          {
1107          cc += -size;
1108    #ifdef SUPPORT_UTF
1109          if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1110    #endif
1111          }
1112        else
1113          cc += size;
1114        }
1115    
1116      if (bracketlen > 0)
1117        {
1118        if (cc >= end)
1119          {
1120          end = bracketend(cc);
1121          if (end[-1 - LINK_SIZE] == OP_KET)
1122            end = NULL;
1123          }
1124        cc += bracketlen;
1125        }
1126      }
1127    }
1128    
1129    /* Returns with a frame_types (always < 0) if no need for frame. */
1130    static int get_framesize(compiler_common *common, pcre_uchar *cc, BOOL recursive)
1131    {
1132    pcre_uchar *ccend = bracketend(cc) - (1 + LINK_SIZE);
1133    int length = 0;
1134    int possessive = 0;
1135    BOOL stack_restore = FALSE;
1136    BOOL setsom_found = recursive;
1137    BOOL setmark_found = recursive;
1138    /* The last capture is a local variable even for recursions. */
1139    BOOL capture_last_found = FALSE;
1140    
1141    if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))
1142      {
1143      possessive = length = (common->capture_last_ptr != 0) ? 5 : 3;
1144      /* This is correct regardless of common->capture_last_ptr. */
1145      capture_last_found = TRUE;
1146      }
1147    
1148    cc = next_opcode(common, cc);
1149    SLJIT_ASSERT(cc != NULL);
1150    while (cc < ccend)
1151      switch(*cc)
1152        {
1153        case OP_SET_SOM:
1154        SLJIT_ASSERT(common->has_set_som);
1155        stack_restore = TRUE;
1156        if (!setsom_found)
1157          {
1158          length += 2;
1159          setsom_found = TRUE;
1160          }
1161        cc += 1;
1162        break;
1163    
1164        case OP_MARK:
1165        case OP_PRUNE_ARG:
1166        SLJIT_ASSERT(common->mark_ptr != 0);
1167        stack_restore = TRUE;
1168        if (!setmark_found)
1169          {
1170          length += 2;
1171          setmark_found = TRUE;
1172          }
1173        cc += 1 + 2 + cc[1];
1174        break;
1175    
1176        case OP_RECURSE:
1177        stack_restore = TRUE;
1178        if (common->has_set_som && !setsom_found)
1179          {
1180          length += 2;
1181          setsom_found = TRUE;
1182          }
1183        if (common->mark_ptr != 0 && !setmark_found)
1184          {
1185          length += 2;
1186          setmark_found = TRUE;
1187          }
1188        if (common->capture_last_ptr != 0 && !capture_last_found)
1189          {
1190          length += 2;
1191          capture_last_found = TRUE;
1192        }        }
1193      cc += 1 + LINK_SIZE;      cc += 1 + LINK_SIZE;
1194      break;      break;
1195    
1196      default:      case OP_CBRA:
1197      cc = next_opcode(common, cc);      case OP_CBRAPOS:
1198      SLJIT_ASSERT(cc != NULL);      case OP_SCBRA:
1199      break;      case OP_SCBRAPOS:
1200      }      stack_restore = TRUE;
1201    }      if (common->capture_last_ptr != 0 && !capture_last_found)
1202  }        {
1203          length += 2;
1204          capture_last_found = TRUE;
1205          }
1206        length += 3;
1207        cc += 1 + LINK_SIZE + IMM2_SIZE;
1208        break;
1209    
1210        default:
1211        stack_restore = TRUE;
1212        /* Fall through. */
1213    
1214        case OP_NOT_WORD_BOUNDARY:
1215        case OP_WORD_BOUNDARY:
1216        case OP_NOT_DIGIT:
1217        case OP_DIGIT:
1218        case OP_NOT_WHITESPACE:
1219        case OP_WHITESPACE:
1220        case OP_NOT_WORDCHAR:
1221        case OP_WORDCHAR:
1222        case OP_ANY:
1223        case OP_ALLANY:
1224        case OP_ANYBYTE:
1225        case OP_NOTPROP:
1226        case OP_PROP:
1227        case OP_ANYNL:
1228        case OP_NOT_HSPACE:
1229        case OP_HSPACE:
1230        case OP_NOT_VSPACE:
1231        case OP_VSPACE:
1232        case OP_EXTUNI:
1233        case OP_EODN:
1234        case OP_EOD:
1235        case OP_CIRC:
1236        case OP_CIRCM:
1237        case OP_DOLL:
1238        case OP_DOLLM:
1239        case OP_CHAR:
1240        case OP_CHARI:
1241        case OP_NOT:
1242        case OP_NOTI:
1243    
1244        case OP_EXACT:
1245        case OP_POSSTAR:
1246        case OP_POSPLUS:
1247        case OP_POSQUERY:
1248        case OP_POSUPTO:
1249    
1250        case OP_EXACTI:
1251        case OP_POSSTARI:
1252        case OP_POSPLUSI:
1253        case OP_POSQUERYI:
1254        case OP_POSUPTOI:
1255    
1256  /* Returns with -1 if no need for frame. */      case OP_NOTEXACT:
1257  static int get_framesize(compiler_common *common, uschar *cc, BOOL recursive)      case OP_NOTPOSSTAR:
1258  {      case OP_NOTPOSPLUS:
1259  uschar *ccend = bracketend(cc);      case OP_NOTPOSQUERY:
1260  int length = 0;      case OP_NOTPOSUPTO:
 BOOL possessive = FALSE;  
 BOOL setsom_found = FALSE;  
1261    
1262  if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))      case OP_NOTEXACTI:
1263    {      case OP_NOTPOSSTARI:
1264    length = 3;      case OP_NOTPOSPLUSI:
1265    possessive = TRUE;      case OP_NOTPOSQUERYI:
1266    }      case OP_NOTPOSUPTOI:
1267    
1268  cc = next_opcode(common, cc);      case OP_TYPEEXACT:
1269  SLJIT_ASSERT(cc != NULL);      case OP_TYPEPOSSTAR:
1270  while (cc < ccend)      case OP_TYPEPOSPLUS:
1271    switch(*cc)      case OP_TYPEPOSQUERY:
1272      {      case OP_TYPEPOSUPTO:
     case OP_SET_SOM:  
     case OP_RECURSE:  
     if (!setsom_found)  
       {  
       length += 2;  
       setsom_found = TRUE;  
       }  
     cc += (*cc == OP_SET_SOM) ? 1 : 1 + LINK_SIZE;  
     break;  
1273    
1274      case OP_CBRA:      case OP_CLASS:
1275      case OP_CBRAPOS:      case OP_NCLASS:
1276      case OP_SCBRA:      case OP_XCLASS:
     case OP_SCBRAPOS:  
     length += 3;  
     cc += 1 + LINK_SIZE + 2;  
     break;  
1277    
     default:  
1278      cc = next_opcode(common, cc);      cc = next_opcode(common, cc);
1279      SLJIT_ASSERT(cc != NULL);      SLJIT_ASSERT(cc != NULL);
1280      break;      break;
1281      }      }
1282    
1283  /* Possessive quantifiers can use a special case. */  /* Possessive quantifiers can use a special case. */
1284  if (SLJIT_UNLIKELY(possessive) && length == 3)  if (SLJIT_UNLIKELY(possessive == length))
1285    return -1;    return stack_restore ? no_frame : no_stack;
1286    
1287  if (length > 0)  if (length > 0)
1288    return length + 1;    return length + 1;
1289  return -1;  return stack_restore ? no_frame : no_stack;
1290  }  }
1291    
1292  static void init_frame(compiler_common *common, uschar *cc, int stackpos, int stacktop, BOOL recursive)  static void init_frame(compiler_common *common, pcre_uchar *cc, int stackpos, int stacktop, BOOL recursive)
1293  {  {
1294  DEFINE_COMPILER;  DEFINE_COMPILER;
1295  uschar *ccend = bracketend(cc);  pcre_uchar *ccend = bracketend(cc) - (1 + LINK_SIZE);
1296  BOOL setsom_found = FALSE;  BOOL setsom_found = recursive;
1297    BOOL setmark_found = recursive;
1298    /* The last capture is a local variable even for recursions. */
1299    BOOL capture_last_found = FALSE;
1300  int offset;  int offset;
1301    
1302  /* >= 1 + shortest item size (2) */  /* >= 1 + shortest item size (2) */
1303    SLJIT_UNUSED_ARG(stacktop);
1304  SLJIT_ASSERT(stackpos >= stacktop + 2);  SLJIT_ASSERT(stackpos >= stacktop + 2);
1305    
1306  stackpos = STACK(stackpos);  stackpos = STACK(stackpos);
# Line 776  while (cc < ccend) Line 1311  while (cc < ccend)
1311    switch(*cc)    switch(*cc)
1312      {      {
1313      case OP_SET_SOM:      case OP_SET_SOM:
1314      case OP_RECURSE:      SLJIT_ASSERT(common->has_set_som);
1315      if (!setsom_found)      if (!setsom_found)
1316        {        {
1317        OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));        OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
1318        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_setstrbegin);        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1319        stackpos += (int)sizeof(sljit_w);        stackpos += (int)sizeof(sljit_sw);
1320          OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1321          stackpos += (int)sizeof(sljit_sw);
1322          setsom_found = TRUE;
1323          }
1324        cc += 1;
1325        break;
1326    
1327        case OP_MARK:
1328        case OP_PRUNE_ARG:
1329        SLJIT_ASSERT(common->mark_ptr != 0);
1330        if (!setmark_found)
1331          {
1332          OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
1333          OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1334          stackpos += (int)sizeof(sljit_sw);
1335          OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1336          stackpos += (int)sizeof(sljit_sw);
1337          setmark_found = TRUE;
1338          }
1339        cc += 1 + 2 + cc[1];
1340        break;
1341    
1342        case OP_RECURSE:
1343        if (common->has_set_som && !setsom_found)
1344          {
1345          OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
1346          OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1347          stackpos += (int)sizeof(sljit_sw);
1348        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1349        stackpos += (int)sizeof(sljit_w);        stackpos += (int)sizeof(sljit_sw);
1350        setsom_found = TRUE;        setsom_found = TRUE;
1351        }        }
1352      cc += (*cc == OP_SET_SOM) ? 1 : 1 + LINK_SIZE;      if (common->mark_ptr != 0 && !setmark_found)
1353          {
1354          OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
1355          OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1356          stackpos += (int)sizeof(sljit_sw);
1357          OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1358          stackpos += (int)sizeof(sljit_sw);
1359          setmark_found = TRUE;
1360          }
1361        if (common->capture_last_ptr != 0 && !capture_last_found)
1362          {
1363          OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
1364          OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1365          stackpos += (int)sizeof(sljit_sw);
1366          OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1367          stackpos += (int)sizeof(sljit_sw);
1368          capture_last_found = TRUE;
1369          }
1370        cc += 1 + LINK_SIZE;
1371      break;      break;
1372    
1373      case OP_CBRA:      case OP_CBRA:
1374      case OP_CBRAPOS:      case OP_CBRAPOS:
1375      case OP_SCBRA:      case OP_SCBRA:
1376      case OP_SCBRAPOS:      case OP_SCBRAPOS:
1377        if (common->capture_last_ptr != 0 && !capture_last_found)
1378          {
1379          OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
1380          OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1381          stackpos += (int)sizeof(sljit_sw);
1382          OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1383          stackpos += (int)sizeof(sljit_sw);
1384          capture_last_found = TRUE;
1385          }
1386      offset = (GET2(cc, 1 + LINK_SIZE)) << 1;      offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
1387      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));
1388      stackpos += (int)sizeof(sljit_w);      stackpos += (int)sizeof(sljit_sw);
1389      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
1390      OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));      OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
1391      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1392      stackpos += (int)sizeof(sljit_w);      stackpos += (int)sizeof(sljit_sw);
1393      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
1394      stackpos += (int)sizeof(sljit_w);      stackpos += (int)sizeof(sljit_sw);
1395    
1396      cc += 1 + LINK_SIZE + 2;      cc += 1 + LINK_SIZE + IMM2_SIZE;
1397      break;      break;
1398    
1399      default:      default:
# Line 812  while (cc < ccend) Line 1402  while (cc < ccend)
1402      break;      break;
1403      }      }
1404    
1405  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_end);  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, 0);
1406  SLJIT_ASSERT(stackpos == STACK(stacktop));  SLJIT_ASSERT(stackpos == STACK(stacktop));
1407  }  }
1408    
1409  static SLJIT_INLINE int get_localsize(compiler_common *common, uschar *cc, uschar *ccend)  static SLJIT_INLINE int get_private_data_length_for_copy(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
1410  {  {
1411  int localsize = 2;  int private_data_length = 2;
1412  uschar *alternative;  int size;
1413  /* Calculate the sum of the local variables. */  pcre_uchar *alternative;
1414    /* Calculate the sum of the private machine words. */
1415  while (cc < ccend)  while (cc < ccend)
1416    {    {
1417      size = 0;
1418    switch(*cc)    switch(*cc)
1419      {      {
1420      case OP_ASSERT:      case OP_ASSERT:
# Line 835  while (cc < ccend) Line 1427  while (cc < ccend)
1427      case OP_SBRA:      case OP_SBRA:
1428      case OP_SBRAPOS:      case OP_SBRAPOS:
1429      case OP_SCOND:      case OP_SCOND:
1430      localsize++;      private_data_length++;
1431      cc += 1 + LINK_SIZE;      cc += 1 + LINK_SIZE;
1432      break;      break;
1433    
1434      case OP_CBRA:      case OP_CBRA:
1435      case OP_SCBRA:      case OP_SCBRA:
1436      localsize++;      if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1437      cc += 1 + LINK_SIZE + 2;        private_data_length++;
1438        cc += 1 + LINK_SIZE + IMM2_SIZE;
1439      break;      break;
1440    
1441      case OP_CBRAPOS:      case OP_CBRAPOS:
1442      case OP_SCBRAPOS:      case OP_SCBRAPOS:
1443      localsize += 2;      private_data_length += 2;
1444      cc += 1 + LINK_SIZE + 2;      cc += 1 + LINK_SIZE + IMM2_SIZE;
1445      break;      break;
1446    
1447      case OP_COND:      case OP_COND:
1448      /* Might be a hidden SCOND. */      /* Might be a hidden SCOND. */
1449      alternative = cc + GET(cc, 1);      alternative = cc + GET(cc, 1);
1450      if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)      if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1451        localsize++;        private_data_length++;
1452      cc += 1 + LINK_SIZE;      cc += 1 + LINK_SIZE;
1453      break;      break;
1454    
1455        CASE_ITERATOR_PRIVATE_DATA_1
1456        if (PRIVATE_DATA(cc))
1457          private_data_length++;
1458        cc += 2;
1459    #ifdef SUPPORT_UTF
1460        if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1461    #endif
1462        break;
1463    
1464        CASE_ITERATOR_PRIVATE_DATA_2A
1465        if (PRIVATE_DATA(cc))
1466          private_data_length += 2;
1467        cc += 2;
1468    #ifdef SUPPORT_UTF
1469        if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1470    #endif
1471        break;
1472    
1473        CASE_ITERATOR_PRIVATE_DATA_2B
1474        if (PRIVATE_DATA(cc))
1475          private_data_length += 2;
1476        cc += 2 + IMM2_SIZE;
1477    #ifdef SUPPORT_UTF
1478        if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1479    #endif
1480        break;
1481    
1482        CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1483        if (PRIVATE_DATA(cc))
1484          private_data_length++;
1485        cc += 1;
1486        break;
1487    
1488        CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1489        if (PRIVATE_DATA(cc))
1490          private_data_length += 2;
1491        cc += 1;
1492        break;
1493    
1494        CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1495        if (PRIVATE_DATA(cc))
1496          private_data_length += 2;
1497        cc += 1 + IMM2_SIZE;
1498        break;
1499    
1500        case OP_CLASS:
1501        case OP_NCLASS:
1502    #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1503        case OP_XCLASS:
1504        size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1505    #else
1506        size = 1 + 32 / (int)sizeof(pcre_uchar);
1507    #endif
1508        if (PRIVATE_DATA(cc))
1509          private_data_length += get_class_iterator_size(cc + size);
1510        cc += size;
1511        break;
1512    
1513      default:      default:
1514      cc = next_opcode(common, cc);      cc = next_opcode(common, cc);
1515      SLJIT_ASSERT(cc != NULL);      SLJIT_ASSERT(cc != NULL);
# Line 866  while (cc < ccend) Line 1517  while (cc < ccend)
1517      }      }
1518    }    }
1519  SLJIT_ASSERT(cc == ccend);  SLJIT_ASSERT(cc == ccend);
1520  return localsize;  return private_data_length;
1521  }  }
1522    
1523  static void copy_locals(compiler_common *common, uschar *cc, uschar *ccend,  static void copy_private_data(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend,
1524    BOOL save, int stackptr, int stacktop)    BOOL save, int stackptr, int stacktop)
1525  {  {
1526  DEFINE_COMPILER;  DEFINE_COMPILER;
1527  int srcw[2];  int srcw[2];
1528  int count;  int count, size;
1529  BOOL tmp1next = TRUE;  BOOL tmp1next = TRUE;
1530  BOOL tmp1empty = TRUE;  BOOL tmp1empty = TRUE;
1531  BOOL tmp2empty = TRUE;  BOOL tmp2empty = TRUE;
1532  uschar *alternative;  pcre_uchar *alternative;
1533  enum {  enum {
1534    start,    start,
1535    loop,    loop,
# Line 891  stacktop = STACK(stacktop - 1); Line 1542  stacktop = STACK(stacktop - 1);
1542    
1543  if (!save)  if (!save)
1544    {    {
1545    stackptr += sizeof(sljit_w);    stackptr += sizeof(sljit_sw);
1546    if (stackptr < stacktop)    if (stackptr < stacktop)
1547      {      {
1548      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1549      stackptr += sizeof(sljit_w);      stackptr += sizeof(sljit_sw);
1550      tmp1empty = FALSE;      tmp1empty = FALSE;
1551      }      }
1552    if (stackptr < stacktop)    if (stackptr < stacktop)
1553      {      {
1554      OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);      OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1555      stackptr += sizeof(sljit_w);      stackptr += sizeof(sljit_sw);
1556      tmp2empty = FALSE;      tmp2empty = FALSE;
1557      }      }
1558    /* The tmp1next must be TRUE in either way. */    /* The tmp1next must be TRUE in either way. */
# Line 913  while (status != end) Line 1564  while (status != end)
1564    switch(status)    switch(status)
1565      {      {
1566      case start:      case start:
1567      SLJIT_ASSERT(save);      SLJIT_ASSERT(save && common->recursive_head_ptr != 0);
1568      count = 1;      count = 1;
1569      srcw[0] = RECURSIVE_HEAD;      srcw[0] = common->recursive_head_ptr;
1570      status = loop;      status = loop;
1571      break;      break;
1572    
# Line 939  while (status != end) Line 1590  while (status != end)
1590        case OP_SBRAPOS:        case OP_SBRAPOS:
1591        case OP_SCOND:        case OP_SCOND:
1592        count = 1;        count = 1;
1593        srcw[0] = PRIV(cc);        srcw[0] = PRIVATE_DATA(cc);
1594        SLJIT_ASSERT(srcw[0] != 0);        SLJIT_ASSERT(srcw[0] != 0);
1595        cc += 1 + LINK_SIZE;        cc += 1 + LINK_SIZE;
1596        break;        break;
1597    
1598        case OP_CBRA:        case OP_CBRA:
1599        case OP_SCBRA:        case OP_SCBRA:
1600        count = 1;        if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1601        srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));          {
1602        cc += 1 + LINK_SIZE + 2;          count = 1;
1603            srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1604            }
1605          cc += 1 + LINK_SIZE + IMM2_SIZE;
1606        break;        break;
1607    
1608        case OP_CBRAPOS:        case OP_CBRAPOS:
1609        case OP_SCBRAPOS:        case OP_SCBRAPOS:
1610        count = 2;        count = 2;
1611          srcw[0] = PRIVATE_DATA(cc);
1612        srcw[1] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));        srcw[1] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1613        srcw[0] = PRIV(cc);        SLJIT_ASSERT(srcw[0] != 0 && srcw[1] != 0);
1614        SLJIT_ASSERT(srcw[0] != 0);        cc += 1 + LINK_SIZE + IMM2_SIZE;
       cc += 1 + LINK_SIZE + 2;  
1615        break;        break;
1616    
1617        case OP_COND:        case OP_COND:
# Line 966  while (status != end) Line 1620  while (status != end)
1620        if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)        if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1621          {          {
1622          count = 1;          count = 1;
1623          srcw[0] = PRIV(cc);          srcw[0] = PRIVATE_DATA(cc);
1624          SLJIT_ASSERT(srcw[0] != 0);          SLJIT_ASSERT(srcw[0] != 0);
1625          }          }
1626        cc += 1 + LINK_SIZE;        cc += 1 + LINK_SIZE;
1627        break;        break;
1628    
1629          CASE_ITERATOR_PRIVATE_DATA_1
1630          if (PRIVATE_DATA(cc))
1631            {
1632            count = 1;
1633            srcw[0] = PRIVATE_DATA(cc);
1634            }
1635          cc += 2;
1636    #ifdef SUPPORT_UTF
1637          if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1638    #endif
1639          break;
1640    
1641          CASE_ITERATOR_PRIVATE_DATA_2A
1642          if (PRIVATE_DATA(cc))
1643            {
1644            count = 2;
1645            srcw[0] = PRIVATE_DATA(cc);
1646            srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
1647            }
1648          cc += 2;
1649    #ifdef SUPPORT_UTF
1650          if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1651    #endif
1652          break;
1653    
1654          CASE_ITERATOR_PRIVATE_DATA_2B
1655          if (PRIVATE_DATA(cc))
1656            {
1657            count = 2;
1658            srcw[0] = PRIVATE_DATA(cc);
1659            srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
1660            }
1661          cc += 2 + IMM2_SIZE;
1662    #ifdef SUPPORT_UTF
1663          if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1664    #endif
1665          break;
1666    
1667          CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1668          if (PRIVATE_DATA(cc))
1669            {
1670            count = 1;
1671            srcw[0] = PRIVATE_DATA(cc);
1672            }
1673          cc += 1;
1674          break;
1675    
1676          CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1677          if (PRIVATE_DATA(cc))
1678            {
1679            count = 2;
1680            srcw[0] = PRIVATE_DATA(cc);
1681            srcw[1] = srcw[0] + sizeof(sljit_sw);
1682            }
1683          cc += 1;
1684          break;
1685    
1686          CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1687          if (PRIVATE_DATA(cc))
1688            {
1689            count = 2;
1690            srcw[0] = PRIVATE_DATA(cc);
1691            srcw[1] = srcw[0] + sizeof(sljit_sw);
1692            }
1693          cc += 1 + IMM2_SIZE;
1694          break;
1695    
1696          case OP_CLASS:
1697          case OP_NCLASS:
1698    #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1699          case OP_XCLASS:
1700          size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1701    #else
1702          size = 1 + 32 / (int)sizeof(pcre_uchar);
1703    #endif
1704          if (PRIVATE_DATA(cc))
1705            switch(get_class_iterator_size(cc + size))
1706              {
1707              case 1:
1708              count = 1;
1709              srcw[0] = PRIVATE_DATA(cc);
1710              break;
1711    
1712              case 2:
1713              count = 2;
1714              srcw[0] = PRIVATE_DATA(cc);
1715              srcw[1] = srcw[0] + sizeof(sljit_sw);
1716              break;
1717    
1718              default:
1719              SLJIT_ASSERT_STOP();
1720              break;
1721              }
1722          cc += size;
1723          break;
1724    
1725        default:        default:
1726        cc = next_opcode(common, cc);        cc = next_opcode(common, cc);
1727        SLJIT_ASSERT(cc != NULL);        SLJIT_ASSERT(cc != NULL);
# Line 994  while (status != end) Line 1744  while (status != end)
1744          if (!tmp1empty)          if (!tmp1empty)
1745            {            {
1746            OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);            OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1747            stackptr += sizeof(sljit_w);            stackptr += sizeof(sljit_sw);
1748            }            }
1749          OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);          OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);
1750          tmp1empty = FALSE;          tmp1empty = FALSE;
# Line 1005  while (status != end) Line 1755  while (status != end)
1755          if (!tmp2empty)          if (!tmp2empty)
1756            {            {
1757            OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);            OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1758            stackptr += sizeof(sljit_w);            stackptr += sizeof(sljit_sw);
1759            }            }
1760          OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);          OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);
1761          tmp2empty = FALSE;          tmp2empty = FALSE;
# Line 1022  while (status != end) Line 1772  while (status != end)
1772          if (!tmp1empty)          if (!tmp1empty)
1773            {            {
1774            OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);            OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1775            stackptr += sizeof(sljit_w);            stackptr += sizeof(sljit_sw);
1776            }            }
1777          tmp1next = FALSE;          tmp1next = FALSE;
1778          }          }
# Line 1034  while (status != end) Line 1784  while (status != end)
1784          if (!tmp2empty)          if (!tmp2empty)
1785            {            {
1786            OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);            OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1787            stackptr += sizeof(sljit_w);            stackptr += sizeof(sljit_sw);
1788            }            }
1789          tmp1next = TRUE;          tmp1next = TRUE;
1790          }          }
# Line 1049  if (save) Line 1799  if (save)
1799      if (!tmp1empty)      if (!tmp1empty)
1800        {        {
1801        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1802        stackptr += sizeof(sljit_w);        stackptr += sizeof(sljit_sw);
1803        }        }
1804      if (!tmp2empty)      if (!tmp2empty)
1805        {        {
1806        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1807        stackptr += sizeof(sljit_w);        stackptr += sizeof(sljit_sw);
1808        }        }
1809      }      }
1810    else    else
# Line 1062  if (save) Line 1812  if (save)
1812      if (!tmp2empty)      if (!tmp2empty)
1813        {        {
1814        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1815        stackptr += sizeof(sljit_w);        stackptr += sizeof(sljit_sw);
1816        }        }
1817      if (!tmp1empty)      if (!tmp1empty)
1818        {        {
1819        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1820        stackptr += sizeof(sljit_w);        stackptr += sizeof(sljit_sw);
1821        }        }
1822      }      }
1823    }    }
1824  SLJIT_ASSERT(cc == ccend && stackptr == stacktop && (save || (tmp1empty && tmp2empty)));  SLJIT_ASSERT(cc == ccend && stackptr == stacktop && (save || (tmp1empty && tmp2empty)));
1825  }  }
1826    
1827  static SLJIT_INLINE BOOL ispowerof2(unsigned int value)  #undef CASE_ITERATOR_PRIVATE_DATA_1
1828    #undef CASE_ITERATOR_PRIVATE_DATA_2A
1829    #undef CASE_ITERATOR_PRIVATE_DATA_2B
1830    #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1831    #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1832    #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1833    
1834    static SLJIT_INLINE BOOL is_powerof2(unsigned int value)
1835  {  {
1836  return (value & (value - 1)) == 0;  return (value & (value - 1)) == 0;
1837  }  }
# Line 1084  static SLJIT_INLINE void set_jumps(jump_ Line 1841  static SLJIT_INLINE void set_jumps(jump_
1841  while (list)  while (list)
1842    {    {
1843    /* sljit_set_label is clever enough to do nothing    /* sljit_set_label is clever enough to do nothing
1844    if either the jump or the label is NULL */    if either the jump or the label is NULL. */
1845    sljit_set_label(list->jump, label);    SET_LABEL(list->jump, label);
1846    list = list->next;    list = list->next;
1847    }    }
1848  }  }
# Line 1101  if (list_item) Line 1858  if (list_item)
1858    }    }
1859  }  }
1860    
1861  static void add_stub(compiler_common *common, enum stub_types type, int data, struct sljit_jump *start)  static void add_stub(compiler_common *common, struct sljit_jump *start)
1862  {  {
1863  DEFINE_COMPILER;  DEFINE_COMPILER;
1864  stub_list* list_item = sljit_alloc_memory(compiler, sizeof(stub_list));  stub_list* list_item = sljit_alloc_memory(compiler, sizeof(stub_list));
1865    
1866  if (list_item)  if (list_item)
1867    {    {
   list_item->type = type;  
   list_item->data = data;  
1868    list_item->start = start;    list_item->start = start;
1869    list_item->leave = LABEL();    list_item->quit = LABEL();
1870    list_item->next = common->stubs;    list_item->next = common->stubs;
1871    common->stubs = list_item;    common->stubs = list_item;
1872    }    }
# Line 1125  stub_list* list_item = common->stubs; Line 1880  stub_list* list_item = common->stubs;
1880  while (list_item)  while (list_item)
1881    {    {
1882    JUMPHERE(list_item->start);    JUMPHERE(list_item->start);
1883    switch(list_item->type)    add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));
1884      {    JUMPTO(SLJIT_JUMP, list_item->quit);
     case stack_alloc:  
     add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));  
     break;  
     }  
   JUMPTO(SLJIT_JUMP, list_item->leave);  
1885    list_item = list_item->next;    list_item = list_item->next;
1886    }    }
1887  common->stubs = NULL;  common->stubs = NULL;
# Line 1150  static SLJIT_INLINE void allocate_stack( Line 1900  static SLJIT_INLINE void allocate_stack(
1900  /* May destroy all locals and registers except TMP2. */  /* May destroy all locals and registers except TMP2. */
1901  DEFINE_COMPILER;  DEFINE_COMPILER;
1902    
1903  OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_w));  OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
1904  #ifdef DESTROY_REGISTERS  #ifdef DESTROY_REGISTERS
1905  OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);  OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);
1906  OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);  OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
# Line 1158  OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0); Line 1908  OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
1908  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, TMP1, 0);  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, TMP1, 0);
1909  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);
1910  #endif  #endif
1911  add_stub(common, stack_alloc, 0, CMP(SLJIT_C_GREATER, STACK_TOP, 0, STACK_LIMIT, 0));  add_stub(common, CMP(SLJIT_C_GREATER, STACK_TOP, 0, STACK_LIMIT, 0));
1912  }  }
1913    
1914  static SLJIT_INLINE void free_stack(compiler_common *common, int size)  static SLJIT_INLINE void free_stack(compiler_common *common, int size)
1915  {  {
1916  DEFINE_COMPILER;  DEFINE_COMPILER;
1917  OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_w));  OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
1918  }  }
1919    
1920  static SLJIT_INLINE void reset_ovector(compiler_common *common, int length)  static SLJIT_INLINE void reset_ovector(compiler_common *common, int length)
# Line 1172  static SLJIT_INLINE void reset_ovector(c Line 1922  static SLJIT_INLINE void reset_ovector(c
1922  DEFINE_COMPILER;  DEFINE_COMPILER;
1923  struct sljit_label *loop;  struct sljit_label *loop;
1924  int i;  int i;
1925    
1926  /* At this point we can freely use all temporary registers. */  /* At this point we can freely use all temporary registers. */
1927    SLJIT_ASSERT(length > 1);
1928  /* TMP1 returns with begin - 1. */  /* TMP1 returns with begin - 1. */
1929  OP2(SLJIT_SUB, SLJIT_TEMPORARY_REG1, 0, SLJIT_MEM1(SLJIT_GENERAL_REG1), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, 1);  OP2(SLJIT_SUB, SLJIT_SCRATCH_REG1, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));
1930    if (length < 8)
1931      {
1932      for (i = 1; i < length; i++)
1933        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(i), SLJIT_SCRATCH_REG1, 0);
1934      }
1935    else
1936      {
1937      GET_LOCAL_BASE(SLJIT_SCRATCH_REG2, 0, OVECTOR_START);
1938      OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, length - 1);
1939      loop = LABEL();
1940      OP1(SLJIT_MOVU, SLJIT_MEM1(SLJIT_SCRATCH_REG2), sizeof(sljit_sw), SLJIT_SCRATCH_REG1, 0);
1941      OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 1);
1942      JUMPTO(SLJIT_C_NOT_ZERO, loop);
1943      }
1944    }
1945    
1946    static void do_reset_match(compiler_common *common, int length)
1947    {
1948    DEFINE_COMPILER;
1949    struct sljit_label *loop;
1950    int i;
1951    
1952    SLJIT_ASSERT(length > 1);
1953    /* OVECTOR(1) contains the "string begin - 1" constant. */
1954    if (length > 2)
1955      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
1956    OP1(SLJIT_MOV, STACK_TOP, 0, ARGUMENTS, 0);
1957  if (length < 8)  if (length < 8)
1958    {    {
1959    for (i = 0; i < length; i++)    OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(jit_arguments, stack));
1960      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(i), SLJIT_TEMPORARY_REG1, 0);    for (i = 2; i < length; i++)
1961        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(i), TMP1, 0);
1962    }    }
1963  else  else
1964    {    {
1965    OP2(SLJIT_ADD, SLJIT_TEMPORARY_REG2, 0, SLJIT_LOCALS_REG, 0, SLJIT_IMM, OVECTOR_START - sizeof(sljit_w));    GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + sizeof(sljit_sw));
1966    OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, length);    OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_IMM, length - 2);
1967      OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(jit_arguments, stack));
1968    loop = LABEL();    loop = LABEL();
1969    OP1(SLJIT_MOVU, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), sizeof(sljit_w), SLJIT_TEMPORARY_REG1, 0);    OP1(SLJIT_MOVU, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
1970    OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_TEMPORARY_REG3, 0, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, 1);    OP2(SLJIT_SUB | SLJIT_SET_E, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);
1971    JUMPTO(SLJIT_C_NOT_ZERO, loop);    JUMPTO(SLJIT_C_NOT_ZERO, loop);
1972    }    }
1973    OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(struct sljit_stack, base));
1974  }  }
1975    
1976  static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)  static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)
1977  {  {
1978  DEFINE_COMPILER;  DEFINE_COMPILER;
1979  struct sljit_label *loop;  struct sljit_label *loop;
1980  struct sljit_jump *earlyexit;  struct sljit_jump *early_quit;
1981    
1982  /* At this point we can freely use all registers. */  /* At this point we can freely use all registers. */
1983  OP1(SLJIT_MOV, SLJIT_GENERAL_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));  OP1(SLJIT_MOV, SLJIT_SAVED_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
1984  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1), STR_PTR, 0);  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1), STR_PTR, 0);
1985    
1986  OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG1, 0, ARGUMENTS, 0);  OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, ARGUMENTS, 0);
1987  OP1(SLJIT_MOV_SI, SLJIT_TEMPORARY_REG2, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), SLJIT_OFFSETOF(jit_arguments, offsetcount));  if (common->mark_ptr != 0)
1988  OP2(SLJIT_SUB, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), SLJIT_OFFSETOF(jit_arguments, offsets), SLJIT_IMM, sizeof(int));    OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
1989  OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG1, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), SLJIT_OFFSETOF(jit_arguments, begin));  OP1(SLJIT_MOV_SI, SLJIT_SCRATCH_REG2, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, offset_count));
1990  OP2(SLJIT_ADD, SLJIT_GENERAL_REG1, 0, SLJIT_LOCALS_REG, 0, SLJIT_IMM, OVECTOR_START);  if (common->mark_ptr != 0)
1991      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_SCRATCH_REG3, 0);
1992    OP2(SLJIT_SUB, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, offsets), SLJIT_IMM, sizeof(int));
1993    OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, begin));
1994    GET_LOCAL_BASE(SLJIT_SAVED_REG1, 0, OVECTOR_START);
1995  /* Unlikely, but possible */  /* Unlikely, but possible */
1996  earlyexit = CMP(SLJIT_C_EQUAL, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, 0);  early_quit = CMP(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, 0);
1997  loop = LABEL();  loop = LABEL();
1998  OP2(SLJIT_SUB, SLJIT_GENERAL_REG2, 0, SLJIT_MEM1(SLJIT_GENERAL_REG1), 0, SLJIT_TEMPORARY_REG1, 0);  OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), 0, SLJIT_SCRATCH_REG1, 0);
1999  OP2(SLJIT_ADD, SLJIT_GENERAL_REG1, 0, SLJIT_GENERAL_REG1, 0, SLJIT_IMM, sizeof(sljit_w));  OP2(SLJIT_ADD, SLJIT_SAVED_REG1, 0, SLJIT_SAVED_REG1, 0, SLJIT_IMM, sizeof(sljit_sw));
2000  /* Copy the integer value to the output buffer */  /* Copy the integer value to the output buffer */
2001  OP1(SLJIT_MOVU_SI, SLJIT_MEM1(SLJIT_TEMPORARY_REG3), sizeof(int), SLJIT_GENERAL_REG2, 0);  #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2002  OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_TEMPORARY_REG2, 0, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, 1);  OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, UCHAR_SHIFT);
2003    #endif
2004    OP1(SLJIT_MOVU_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG3), sizeof(int), SLJIT_SAVED_REG2, 0);
2005    OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_SCRATCH_REG2, 0, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, 1);
2006  JUMPTO(SLJIT_C_NOT_ZERO, loop);  JUMPTO(SLJIT_C_NOT_ZERO, loop);
2007  JUMPHERE(earlyexit);  JUMPHERE(early_quit);
2008    
2009  /* Calculate the return value, which is the maximum ovector value. */  /* Calculate the return value, which is the maximum ovector value. */
2010  if (topbracket > 1)  if (topbracket > 1)
2011    {    {
2012    OP2(SLJIT_ADD, SLJIT_TEMPORARY_REG1, 0, SLJIT_LOCALS_REG, 0, SLJIT_IMM, OVECTOR_START + topbracket * 2 * sizeof(sljit_w));    GET_LOCAL_BASE(SLJIT_SCRATCH_REG1, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_sw));
2013    OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, topbracket + 1);    OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, topbracket + 1);
2014    
2015    /* OVECTOR(0) is never equal to SLJIT_GENERAL_REG3. */    /* OVECTOR(0) is never equal to SLJIT_SAVED_REG3. */
2016    loop = LABEL();    loop = LABEL();
2017    OP1(SLJIT_MOVU, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), -(2 * (sljit_w)sizeof(sljit_w)));    OP1(SLJIT_MOVU, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), -(2 * (sljit_sw)sizeof(sljit_sw)));
2018    OP2(SLJIT_SUB, SLJIT_TEMPORARY_REG2, 0, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, 1);    OP2(SLJIT_SUB, SLJIT_SCRATCH_REG2, 0, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, 1);
2019    CMPTO(SLJIT_C_EQUAL, SLJIT_TEMPORARY_REG3, 0, SLJIT_GENERAL_REG3, 0, loop);    CMPTO(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG3, 0, SLJIT_SAVED_REG3, 0, loop);
2020    OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_TEMPORARY_REG2, 0);    OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_SCRATCH_REG2, 0);
2021    }    }
2022  else  else
2023    OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);    OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
2024  }  }
2025    
2026  static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, uschar* cc)  static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *quit)
2027    {
2028    DEFINE_COMPILER;
2029    struct sljit_jump *jump;
2030    
2031    SLJIT_COMPILE_ASSERT(STR_END == SLJIT_SAVED_REG2, str_end_must_be_saved_reg2);
2032    SLJIT_ASSERT(common->start_used_ptr != 0 && common->start_ptr != 0
2033      && (common->mode == JIT_PARTIAL_SOFT_COMPILE ? common->hit_start != 0 : common->hit_start == 0));
2034    
2035    OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, ARGUMENTS, 0);
2036    OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_PARTIAL);
2037    OP1(SLJIT_MOV_SI, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, real_offset_count));
2038    CMPTO(SLJIT_C_SIG_LESS, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 2, quit);
2039    
2040    /* Store match begin and end. */
2041    OP1(SLJIT_MOV, SLJIT_SAVED_REG1, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, begin));
2042    OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, offsets));
2043    
2044    jump = CMP(SLJIT_C_SIG_LESS, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 3);
2045    OP2(SLJIT_SUB, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_ptr : (common->hit_start + sizeof(sljit_sw)), SLJIT_SAVED_REG1, 0);
2046    #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2047    OP2(SLJIT_ASHR, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, UCHAR_SHIFT);
2048    #endif
2049    OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG2), 2 * sizeof(int), SLJIT_SCRATCH_REG3, 0);
2050    JUMPHERE(jump);
2051    
2052    OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_used_ptr : common->hit_start);
2053    OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, STR_END, 0, SLJIT_SAVED_REG1, 0);
2054    #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2055    OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, UCHAR_SHIFT);
2056    #endif
2057    OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG2), sizeof(int), SLJIT_SAVED_REG2, 0);
2058    
2059    OP2(SLJIT_SUB, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_SAVED_REG1, 0);
2060    #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2061    OP2(SLJIT_ASHR, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, UCHAR_SHIFT);
2062    #endif
2063    OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG2), 0, SLJIT_SCRATCH_REG3, 0);
2064    
2065    JUMPTO(SLJIT_JUMP, quit);
2066    }
2067    
2068    static SLJIT_INLINE void check_start_used_ptr(compiler_common *common)
2069    {
2070    /* May destroy TMP1. */
2071    DEFINE_COMPILER;
2072    struct sljit_jump *jump;
2073    
2074    if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2075      {
2076      /* The value of -1 must be kept for start_used_ptr! */
2077      OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, 1);
2078      /* Jumps if start_used_ptr < STR_PTR, or start_used_ptr == -1. Although overwriting
2079      is not necessary if start_used_ptr == STR_PTR, it does not hurt as well. */
2080      jump = CMP(SLJIT_C_LESS_EQUAL, TMP1, 0, STR_PTR, 0);
2081      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2082      JUMPHERE(jump);
2083      }
2084    else if (common->mode == JIT_PARTIAL_HARD_COMPILE)
2085      {
2086      jump = CMP(SLJIT_C_LESS_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2087      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2088      JUMPHERE(jump);
2089      }
2090    }
2091    
2092    static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, pcre_uchar* cc)
2093  {  {
2094  /* Detects if the character has an othercase. */  /* Detects if the character has an othercase. */
2095  unsigned int c;  unsigned int c;
2096    
2097  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2098  if (common->utf8)  if (common->utf)
2099    {    {
2100    GETCHAR(c, cc);    GETCHAR(c, cc);
2101    if (c > 127)    if (c > 127)
# Line 1251  if (common->utf8) Line 2106  if (common->utf8)
2106      return FALSE;      return FALSE;
2107  #endif  #endif
2108      }      }
2109    #ifndef COMPILE_PCRE8
2110      return common->fcc[c] != c;
2111    #endif
2112    }    }
2113  else  else
2114  #endif  #endif
2115    c = *cc;    c = *cc;
2116  return common->fcc[c] != c;  return MAX_255(c) ? common->fcc[c] != c : FALSE;
2117  }  }
2118    
2119  static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)  static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)
2120  {  {
2121  /* Returns with the othercase. */  /* Returns with the othercase. */
2122  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2123  if (common->utf8 && c > 127)  if (common->utf && c > 127)
2124    {    {
2125  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2126    return UCD_OTHERCASE(c);    return UCD_OTHERCASE(c);
# Line 1271  if (common->utf8 && c > 127) Line 2129  if (common->utf8 && c > 127)
2129  #endif  #endif
2130    }    }
2131  #endif  #endif
2132  return common->fcc[c];  return TABLE_GET(c, common->fcc, c);
2133  }  }
2134    
2135  static unsigned int char_get_othercase_bit(compiler_common *common, uschar* cc)  static unsigned int char_get_othercase_bit(compiler_common *common, pcre_uchar* cc)
2136  {  {
2137  /* Detects if the character and its othercase has only 1 bit difference. */  /* Detects if the character and its othercase has only 1 bit difference. */
2138  unsigned int c, oc, bit;  unsigned int c, oc, bit;
2139  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2140  int n;  int n;
2141  #endif  #endif
2142    
2143  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2144  if (common->utf8)  if (common->utf)
2145    {    {
2146    GETCHAR(c, cc);    GETCHAR(c, cc);
2147    if (c <= 127)    if (c <= 127)
# Line 1300  if (common->utf8) Line 2158  if (common->utf8)
2158  else  else
2159    {    {
2160    c = *cc;    c = *cc;
2161    oc = common->fcc[c];    oc = TABLE_GET(c, common->fcc, c);
2162    }    }
2163  #else  #else
2164  c = *cc;  c = *cc;
2165  oc = common->fcc[c];  oc = TABLE_GET(c, common->fcc, c);
2166  #endif  #endif
2167    
2168  SLJIT_ASSERT(c != oc);  SLJIT_ASSERT(c != oc);
# Line 1315  if (c <= 127 && bit == 0x20) Line 2173  if (c <= 127 && bit == 0x20)
2173    return (0 << 8) | 0x20;    return (0 << 8) | 0x20;
2174    
2175  /* Since c != oc, they must have at least 1 bit difference. */  /* Since c != oc, they must have at least 1 bit difference. */
2176  if (!ispowerof2(bit))  if (!is_powerof2(bit))
2177    return 0;    return 0;
2178    
2179  #ifdef SUPPORT_UTF8  #if defined COMPILE_PCRE8
2180  if (common->utf8 && c > 127)  
2181    #ifdef SUPPORT_UTF
2182    if (common->utf && c > 127)
2183    {    {
2184    n = _pcre_utf8_table4[*cc & 0x3f];    n = GET_EXTRALEN(*cc);
2185    while ((bit & 0x3f) == 0)    while ((bit & 0x3f) == 0)
2186      {      {
2187      n--;      n--;
# Line 1329  if (common->utf8 && c > 127) Line 2189  if (common->utf8 && c > 127)
2189      }      }
2190    return (n << 8) | bit;    return (n << 8) | bit;
2191    }    }
2192  #endif  #endif /* SUPPORT_UTF */
2193  return (0 << 8) | bit;  return (0 << 8) | bit;
2194    
2195    #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2196    
2197    #ifdef SUPPORT_UTF
2198    if (common->utf && c > 65535)
2199      {
2200      if (bit >= (1 << 10))
2201        bit >>= 10;
2202      else
2203        return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8));
2204      }
2205    #endif /* SUPPORT_UTF */
2206    return (bit < 256) ? ((0 << 8) | bit) : ((1 << 8) | (bit >> 8));
2207    
2208    #endif /* COMPILE_PCRE[8|16|32] */
2209    }
2210    
2211    static void check_partial(compiler_common *common, BOOL force)
2212    {
2213    /* Checks whether a partial matching is occured. Does not modify registers. */
2214    DEFINE_COMPILER;
2215    struct sljit_jump *jump = NULL;
2216    
2217    SLJIT_ASSERT(!force || common->mode != JIT_COMPILE);
2218    
2219    if (common->mode == JIT_COMPILE)
2220      return;
2221    
2222    if (!force)
2223      jump = CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2224    else if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2225      jump = CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, -1);
2226    
2227    if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2228      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, 0);
2229    else
2230      {
2231      if (common->partialmatchlabel != NULL)
2232        JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2233      else
2234        add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2235      }
2236    
2237    if (jump != NULL)
2238      JUMPHERE(jump);
2239    }
2240    
2241    static void check_str_end(compiler_common *common, jump_list **end_reached)
2242    {
2243    /* Does not affect registers. Usually used in a tight spot. */
2244    DEFINE_COMPILER;
2245    struct sljit_jump *jump;
2246    
2247    if (common->mode == JIT_COMPILE)
2248      {
2249      add_jump(compiler, end_reached, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2250      return;
2251      }
2252    
2253    jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
2254    if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2255      {
2256      add_jump(compiler, end_reached, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0));
2257      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, 0);
2258      add_jump(compiler, end_reached, JUMP(SLJIT_JUMP));
2259      }
2260    else
2261      {
2262      add_jump(compiler, end_reached, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0));
2263      if (common->partialmatchlabel != NULL)
2264        JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2265      else
2266        add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2267      }
2268    JUMPHERE(jump);
2269  }  }
2270    
2271  static SLJIT_INLINE void check_input_end(compiler_common *common, jump_list **fallbacks)  static void detect_partial_match(compiler_common *common, jump_list **backtracks)
2272  {  {
2273  DEFINE_COMPILER;  DEFINE_COMPILER;
2274  add_jump(compiler, fallbacks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));  struct sljit_jump *jump;
2275    
2276    if (common->mode == JIT_COMPILE)
2277      {
2278      add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2279      return;
2280      }
2281    
2282    /* Partial matching mode. */
2283    jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
2284    add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0));
2285    if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2286      {
2287      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, 0);
2288      add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
2289      }
2290    else
2291      {
2292      if (common->partialmatchlabel != NULL)
2293        JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2294      else
2295        add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2296      }
2297    JUMPHERE(jump);
2298  }  }
2299    
2300  static void read_char(compiler_common *common)  static void read_char(compiler_common *common)
# Line 1344  static void read_char(compiler_common *c Line 2302  static void read_char(compiler_common *c
2302  /* Reads the character into TMP1, updates STR_PTR.  /* Reads the character into TMP1, updates STR_PTR.
2303  Does not check STR_END. TMP2 Destroyed. */  Does not check STR_END. TMP2 Destroyed. */
2304  DEFINE_COMPILER;  DEFINE_COMPILER;
2305  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2306  struct sljit_jump *jump;  struct sljit_jump *jump;
2307  #endif  #endif
2308    
2309  OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2310  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2311  if (common->utf8)  if (common->utf)
2312    {    {
2313    #if defined COMPILE_PCRE8
2314    jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);    jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2315    add_jump(compiler, &common->utf8readchar, JUMP(SLJIT_FAST_CALL));  #elif defined COMPILE_PCRE16
2316      jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
2317    #endif /* COMPILE_PCRE[8|16] */
2318      add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2319    JUMPHERE(jump);    JUMPHERE(jump);
2320    }    }
2321  #endif  #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2322  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2323  }  }
2324    
2325  static void peek_char(compiler_common *common)  static void peek_char(compiler_common *common)
# Line 1365  static void peek_char(compiler_common *c Line 2327  static void peek_char(compiler_common *c
2327  /* Reads the character into TMP1, keeps STR_PTR.  /* Reads the character into TMP1, keeps STR_PTR.
2328  Does not check STR_END. TMP2 Destroyed. */  Does not check STR_END. TMP2 Destroyed. */
2329  DEFINE_COMPILER;  DEFINE_COMPILER;
2330  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2331  struct sljit_jump *jump;  struct sljit_jump *jump;
2332  #endif  #endif
2333    
2334  OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2335  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2336  if (common->utf8)  if (common->utf)
2337    {    {
2338    #if defined COMPILE_PCRE8
2339    jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);    jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2340    add_jump(compiler, &common->utf8readchar, JUMP(SLJIT_FAST_CALL));  #elif defined COMPILE_PCRE16
2341      jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
2342    #endif /* COMPILE_PCRE[8|16] */
2343      add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2344    OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);    OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2345    JUMPHERE(jump);    JUMPHERE(jump);
2346    }    }
2347  #endif  #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2348  }  }
2349    
2350  static void read_char8_type(compiler_common *common)  static void read_char8_type(compiler_common *common)
2351  {  {
2352  /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */  /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */
2353  DEFINE_COMPILER;  DEFINE_COMPILER;
2354  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2355  struct sljit_jump *jump;  struct sljit_jump *jump;
2356  #endif  #endif
2357    
2358  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2359  if (common->utf8)  if (common->utf)
2360    {    {
2361    OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2362    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2363    #if defined COMPILE_PCRE8
2364    /* This can be an extra read in some situations, but hopefully    /* This can be an extra read in some situations, but hopefully
2365    it is a clever early read in most cases. */    it is needed in most cases. */
2366      OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2367      jump = CMP(SLJIT_C_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
2368      add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));
2369      JUMPHERE(jump);
2370    #elif defined COMPILE_PCRE16
2371      OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2372      jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2373      OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2374      JUMPHERE(jump);
2375      /* Skip low surrogate if necessary. */
2376      OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xfc00);
2377      OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0xd800);
2378      OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2379      OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
2380      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2381    #elif defined COMPILE_PCRE32
2382      OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2383      jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2384    OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);    OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
   jump = CMP(SLJIT_C_LESS, TMP2, 0, SLJIT_IMM, 0xc0);  
   add_jump(compiler, &common->utf8readtype8, JUMP(SLJIT_FAST_CALL));  
2385    JUMPHERE(jump);    JUMPHERE(jump);
2386    #endif /* COMPILE_PCRE[8|16|32] */
2387    return;    return;
2388    }    }
2389    #endif /* SUPPORT_UTF */
2390    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2391    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2392    #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2393    /* The ctypes array contains only 256 values. */
2394    OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2395    jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2396    #endif
2397    OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2398    #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2399    JUMPHERE(jump);
2400  #endif  #endif
 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);  
 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);  
 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);  
2401  }  }
2402    
2403  static void skip_char_back(compiler_common *common)  static void skip_char_back(compiler_common *common)
2404  {  {
2405  /* Goes one character back. Only affects STR_PTR. Does not check begin. */  /* Goes one character back. Affects STR_PTR and TMP1. Does not check begin. */
2406  DEFINE_COMPILER;  DEFINE_COMPILER;
2407  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2408    #if defined COMPILE_PCRE8
2409  struct sljit_label *label;  struct sljit_label *label;
2410    
2411  if (common->utf8)  if (common->utf)
2412    {    {
2413    label = LABEL();    label = LABEL();
2414    OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2415    OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);    OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2416    OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);    OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
2417    CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);    CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);
2418    return;    return;
2419    }    }
2420  #endif  #elif defined COMPILE_PCRE16
2421  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);  if (common->utf)
2422      {
2423      OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2424      OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2425      /* Skip low surrogate if necessary. */
2426      OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2427      OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
2428      OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2429      OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2430      OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2431      return;
2432      }
2433    #endif /* COMPILE_PCRE[8|16] */
2434    #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2435    OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2436  }  }
2437    
2438  static void check_newlinechar(compiler_common *common, int nltype, jump_list **fallbacks, BOOL jumpiftrue)  static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpiftrue)
2439  {  {
2440  /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */  /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */
2441  DEFINE_COMPILER;  DEFINE_COMPILER;
# Line 1436  DEFINE_COMPILER; Line 2443  DEFINE_COMPILER;
2443  if (nltype == NLTYPE_ANY)  if (nltype == NLTYPE_ANY)
2444    {    {
2445    add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));    add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
2446    add_jump(compiler, fallbacks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));    add_jump(compiler, backtracks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
2447    }    }
2448  else if (nltype == NLTYPE_ANYCRLF)  else if (nltype == NLTYPE_ANYCRLF)
2449    {    {
2450    OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_CR);    OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_CR);
2451    COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);    OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2452    OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);    OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
2453    COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);    OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
2454    add_jump(compiler, fallbacks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));    add_jump(compiler, backtracks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
2455    }    }
2456  else  else
2457    {    {
2458    SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline <= 255);    SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);
2459    add_jump(compiler, fallbacks, CMP(jumpiftrue ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));    add_jump(compiler, backtracks, CMP(jumpiftrue ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
2460    }    }
2461  }  }
2462    
2463  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2464  static void do_utf8readchar(compiler_common *common)  
2465    #if defined COMPILE_PCRE8
2466    static void do_utfreadchar(compiler_common *common)
2467  {  {
2468  /* Fast decoding an utf8 character. TMP1 contains the first byte  /* Fast decoding a UTF-8 character. TMP1 contains the first byte
2469  of the character (>= 0xc0). Return char value in TMP1, length - 1 in TMP2. */  of the character (>= 0xc0). Return char value in TMP1, length - 1 in TMP2. */
2470  DEFINE_COMPILER;  DEFINE_COMPILER;
2471  struct sljit_jump *jump;  struct sljit_jump *jump;
2472    
2473  sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);  sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2474  /* Searching for the first zero. */  /* Searching for the first zero. */
2475  OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);  OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
2476  jump = JUMP(SLJIT_C_NOT_ZERO);  jump = JUMP(SLJIT_C_NOT_ZERO);
2477  /* 2 byte sequence */  /* Two byte sequence. */
2478  OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(STR_PTR), 1);  OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2479  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2480  OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1f);  OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1f);
2481  OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);  OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2482  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2483  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2484  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
2485  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2486  JUMPHERE(jump);  JUMPHERE(jump);
2487    
2488  OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10);  OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10);
2489  jump = JUMP(SLJIT_C_NOT_ZERO);  jump = JUMP(SLJIT_C_NOT_ZERO);
2490  /* 3 byte sequence */  /* Three byte sequence. */
2491  OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(STR_PTR), 1);  OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2492  OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0f);  OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0f);
2493  OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 12);  OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 12);
2494  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2495  OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);  OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2496  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2497  OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(STR_PTR), 2);  OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2498  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 2);  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
2499  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2500  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2501  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 2);  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(2));
2502  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2503  JUMPHERE(jump);  JUMPHERE(jump);
2504    
2505  OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x08);  /* Four byte sequence. */
2506  jump = JUMP(SLJIT_C_NOT_ZERO);  OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
 /* 4 byte sequence */  
 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(STR_PTR), 1);  
2507  OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x07);  OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x07);
2508  OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 18);  OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 18);
2509  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2510  OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12);  OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12);
2511  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2512  OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(STR_PTR), 2);  OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);  
 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);  
 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);  
 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(STR_PTR), 3);  
 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 3);  
 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);  
 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);  
 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 3);  
 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);  
 JUMPHERE(jump);  
   
 /* 5 byte sequence */  
 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(STR_PTR), 1);  
 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x03);  
 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 24);  
 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);  
 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 18);  
 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);  
 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(STR_PTR), 2);  
 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);  
 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12);  
 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);  
 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(STR_PTR), 3);  
2513  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2514  OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);  OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2515  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2516  OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(STR_PTR), 4);  OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(3));
2517  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 4);  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
2518  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2519  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2520  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 4);  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));
2521  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2522  }  }
2523    
2524  static void do_utf8readtype8(compiler_common *common)  static void do_utfreadtype8(compiler_common *common)
2525  {  {
2526  /* Fast decoding an utf8 character type. TMP2 contains the first byte  /* Fast decoding a UTF-8 character type. TMP2 contains the first byte
2527  of the character (>= 0xc0) and TMP1 is destroyed. Return value in TMP1. */  of the character (>= 0xc0). Return value in TMP1. */
2528  DEFINE_COMPILER;  DEFINE_COMPILER;
2529  struct sljit_jump *jump;  struct sljit_jump *jump;
2530  struct sljit_jump *compare;  struct sljit_jump *compare;
2531    
2532  sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);  sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2533    
2534  OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20);  OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20);
2535  jump = JUMP(SLJIT_C_NOT_ZERO);  jump = JUMP(SLJIT_C_NOT_ZERO);
2536  /* 2 byte sequence */  /* Two byte sequence. */
2537  OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2538  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2539  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
2540  OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);  OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2541  OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);  OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
# Line 1566  sljit_emit_fast_return(compiler, RETURN_ Line 2550  sljit_emit_fast_return(compiler, RETURN_
2550  JUMPHERE(jump);  JUMPHERE(jump);
2551    
2552  /* We only have types for characters less than 256. */  /* We only have types for characters less than 256. */
2553  OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_w)_pcre_utf8_char_sizes - 0xc0);  OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2554  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2555  OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);  OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2556  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2557  }  }
2558    
2559  #endif  #elif defined COMPILE_PCRE16
2560    
2561    static void do_utfreadchar(compiler_common *common)
2562    {
2563    /* Fast decoding a UTF-16 character. TMP1 contains the first 16 bit char
2564    of the character (>= 0xd800). Return char value in TMP1, length - 1 in TMP2. */
2565    DEFINE_COMPILER;
2566    struct sljit_jump *jump;
2567    
2568    sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2569    jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xdc00);
2570    /* Do nothing, only return. */
2571    sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2572    
2573    JUMPHERE(jump);
2574    /* Combine two 16 bit characters. */
2575    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2576    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2577    OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
2578    OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 10);
2579    OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3ff);
2580    OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2581    OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
2582    OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
2583    sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2584    }
2585    
2586    #endif /* COMPILE_PCRE[8|16] */
2587    
2588    #endif /* SUPPORT_UTF */
2589    
2590  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2591    
# Line 1588  DEFINE_COMPILER; Line 2601  DEFINE_COMPILER;
2601    
2602  SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 8);  SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 8);
2603    
2604  sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);  sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2605  OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);  OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
2606  OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_w)_pcre_ucd_stage1);  OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
2607  OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);  OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
2608  OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);  OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
2609  OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);  OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
2610  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_w)_pcre_ucd_stage2);  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
2611  OP1(SLJIT_MOV_UH, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);  OP1(SLJIT_MOV_UH, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
2612  OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_w)_pcre_ucd_records + SLJIT_OFFSETOF(ucd_record, chartype));  OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
2613  OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);  OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
2614  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2615  }  }
# Line 1610  struct sljit_label *newlinelabel = NULL; Line 2623  struct sljit_label *newlinelabel = NULL;
2623  struct sljit_jump *start;  struct sljit_jump *start;
2624  struct sljit_jump *end = NULL;  struct sljit_jump *end = NULL;
2625  struct sljit_jump *nl = NULL;  struct sljit_jump *nl = NULL;
2626  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2627  struct sljit_jump *singlebyte;  struct sljit_jump *singlechar;
2628  #endif  #endif
2629  jump_list *newline = NULL;  jump_list *newline = NULL;
2630  BOOL newlinecheck = FALSE;  BOOL newlinecheck = FALSE;
2631  BOOL readbyte = FALSE;  BOOL readuchar = FALSE;
2632    
2633  if (!(hascrorlf || firstline) && (common->nltype == NLTYPE_ANY ||  if (!(hascrorlf || firstline) && (common->nltype == NLTYPE_ANY ||
2634      common->nltype == NLTYPE_ANYCRLF || common->newline > 255))      common->nltype == NLTYPE_ANYCRLF || common->newline > 255))
# Line 1624  if (!(hascrorlf || firstline) && (common Line 2637  if (!(hascrorlf || firstline) && (common
2637  if (firstline)  if (firstline)
2638    {    {
2639    /* Search for the end of the first line. */    /* Search for the end of the first line. */
2640    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STR_PTR, 0);    SLJIT_ASSERT(common->first_line_end != 0);
2641    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), FIRSTLINE_END, STR_END, 0);    OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
2642    
2643    if (common->nltype == NLTYPE_FIXED && common->newline > 255)    if (common->nltype == NLTYPE_FIXED && common->newline > 255)
2644      {      {
2645      mainloop = LABEL();      mainloop = LABEL();
2646      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2647      end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);      end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2648      OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), -1);      OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
2649      OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);      OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2650      CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);      CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);
2651      CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);      CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);
2652      OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), FIRSTLINE_END, STR_PTR, 0, SLJIT_IMM, 1);      JUMPHERE(end);
2653        OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2654      }      }
2655    else    else
2656      {      {
2657      end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);      end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2658      mainloop = LABEL();      mainloop = LABEL();
2659      /* Continual stores does not cause data dependency. */      /* Continual stores does not cause data dependency. */
2660      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), FIRSTLINE_END, STR_PTR, 0);      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0);
2661      read_char(common);      read_char(common);
2662      check_newlinechar(common, common->nltype, &newline, TRUE);      check_newlinechar(common, common->nltype, &newline, TRUE);
2663      CMPTO(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0, mainloop);      CMPTO(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0, mainloop);
2664      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), FIRSTLINE_END, STR_PTR, 0);      JUMPHERE(end);
2665        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0);
2666      set_jumps(newline, LABEL());      set_jumps(newline, LABEL());
2667      }      }
2668    
2669    JUMPHERE(end);    OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);  
2670    }    }
2671    
2672  start = JUMP(SLJIT_JUMP);  start = JUMP(SLJIT_JUMP);
# Line 1660  start = JUMP(SLJIT_JUMP); Line 2674  start = JUMP(SLJIT_JUMP);
2674  if (newlinecheck)  if (newlinecheck)
2675    {    {
2676    newlinelabel = LABEL();    newlinelabel = LABEL();
2677    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2678    end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);    end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2679    OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2680    OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff);    OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
2681    COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);    OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2682    #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2683      OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
2684    #endif
2685    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2686    nl = JUMP(SLJIT_JUMP);    nl = JUMP(SLJIT_JUMP);
2687    }    }
# Line 1672  if (newlinecheck) Line 2689  if (newlinecheck)
2689  mainloop = LABEL();  mainloop = LABEL();
2690    
2691  /* Increasing the STR_PTR here requires one less jump in the most common case. */  /* Increasing the STR_PTR here requires one less jump in the most common case. */
2692  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2693  if (common->utf8) readbyte = TRUE;  if (common->utf) readuchar = TRUE;
2694  #endif  #endif
2695  if (newlinecheck) readbyte = TRUE;  if (newlinecheck) readuchar = TRUE;
2696    
2697  if (readbyte)  if (readuchar)
2698    OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2699    
2700  if (newlinecheck)  if (newlinecheck)
2701    CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);    CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);
2702    
2703  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2704  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2705  if (common->utf8)  #if defined COMPILE_PCRE8
2706    if (common->utf)
2707    {    {
2708    singlebyte = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);    singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2709    OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)_pcre_utf8_char_sizes - 0xc0);    OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2710    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2711    JUMPHERE(singlebyte);    JUMPHERE(singlechar);
2712    }    }
2713  #endif  #elif defined COMPILE_PCRE16
2714    if (common->utf)
2715      {
2716      singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
2717      OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2718      OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2719      OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2720      OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2721      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2722      JUMPHERE(singlechar);
2723      }
2724    #endif /* COMPILE_PCRE[8|16] */
2725    #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2726  JUMPHERE(start);  JUMPHERE(start);
2727    
2728  if (newlinecheck)  if (newlinecheck)
# Line 1704  if (newlinecheck) Line 2734  if (newlinecheck)
2734  return mainloop;  return mainloop;
2735  }  }
2736    
2737  static SLJIT_INLINE void fast_forward_first_byte(compiler_common *common, pcre_uint16 firstbyte, BOOL firstline)  #define MAX_N_CHARS 3
2738    
2739    static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common, BOOL firstline)
2740    {
2741    DEFINE_COMPILER;
2742    struct sljit_label *start;
2743    struct sljit_jump *quit;
2744    pcre_uint32 chars[MAX_N_CHARS * 2];
2745    pcre_uchar *cc = common->start + 1 + LINK_SIZE;
2746    int location = 0;
2747    pcre_int32 len, c, bit, caseless;
2748    int must_stop;
2749    
2750    /* We do not support alternatives now. */
2751    if (*(common->start + GET(common->start, 1)) == OP_ALT)
2752      return FALSE;
2753    
2754    while (TRUE)
2755      {
2756      caseless = 0;
2757      must_stop = 1;
2758      switch(*cc)
2759        {
2760        case OP_CHAR:
2761        must_stop = 0;
2762        cc++;
2763        break;
2764    
2765        case OP_CHARI:
2766        caseless = 1;
2767        must_stop = 0;
2768        cc++;
2769        break;
2770    
2771        case OP_SOD:
2772        case OP_SOM:
2773        case OP_SET_SOM:
2774        case OP_NOT_WORD_BOUNDARY:
2775        case OP_WORD_BOUNDARY:
2776        case OP_EODN:
2777        case OP_EOD:
2778        case OP_CIRC:
2779        case OP_CIRCM:
2780        case OP_DOLL:
2781        case OP_DOLLM:
2782        /* Zero width assertions. */
2783        cc++;
2784        continue;
2785    
2786        case OP_PLUS:
2787        case OP_MINPLUS:
2788        case OP_POSPLUS:
2789        cc++;
2790        break;
2791    
2792        case OP_EXACT:
2793        cc += 1 + IMM2_SIZE;
2794        break;
2795    
2796        case OP_PLUSI:
2797        case OP_MINPLUSI:
2798        case OP_POSPLUSI:
2799        caseless = 1;
2800        cc++;
2801        break;
2802    
2803        case OP_EXACTI:
2804        caseless = 1;
2805        cc += 1 + IMM2_SIZE;
2806        break;
2807    
2808        default:
2809        must_stop = 2;
2810        break;
2811        }
2812    
2813      if (must_stop == 2)
2814          break;
2815    
2816      len = 1;
2817    #ifdef SUPPORT_UTF
2818      if (common->utf && HAS_EXTRALEN(cc[0])) len += GET_EXTRALEN(cc[0]);
2819    #endif
2820    
2821      if (caseless && char_has_othercase(common, cc))
2822        {
2823        caseless = char_get_othercase_bit(common, cc);
2824        if (caseless == 0)
2825          return FALSE;
2826    #ifdef COMPILE_PCRE8
2827        caseless = ((caseless & 0xff) << 8) | (len - (caseless >> 8));
2828    #else
2829        if ((caseless & 0x100) != 0)
2830          caseless = ((caseless & 0xff) << 16) | (len - (caseless >> 9));
2831        else
2832          caseless = ((caseless & 0xff) << 8) | (len - (caseless >> 9));
2833    #endif
2834        }
2835      else
2836        caseless = 0;
2837    
2838      while (len > 0 && location < MAX_N_CHARS * 2)
2839        {
2840        c = *cc;
2841        bit = 0;
2842        if (len == (caseless & 0xff))
2843          {
2844          bit = caseless >> 8;
2845          c |= bit;
2846          }
2847    
2848        chars[location] = c;
2849        chars[location + 1] = bit;
2850    
2851        len--;
2852        location += 2;
2853        cc++;
2854        }
2855    
2856      if (location >= MAX_N_CHARS * 2 || must_stop != 0)
2857        break;
2858      }
2859    
2860    /* At least two characters are required. */
2861    if (location < 2 * 2)
2862        return FALSE;
2863    
2864    if (firstline)
2865      {
2866      SLJIT_ASSERT(common->first_line_end != 0);
2867      OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
2868      OP2(SLJIT_SUB, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, SLJIT_IMM, IN_UCHARS((location >> 1) - 1));
2869      }
2870    else
2871      OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS((location >> 1) - 1));
2872    
2873    start = LABEL();
2874    quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2875    
2876    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2877    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2878    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2879    if (chars[1] != 0)
2880      OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[1]);
2881    CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[0], start);
2882    if (location > 2 * 2)
2883      OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2884    if (chars[3] != 0)
2885      OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, chars[3]);
2886    CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, chars[2], start);
2887    if (location > 2 * 2)
2888      {
2889      if (chars[5] != 0)
2890        OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[5]);
2891      CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[4], start);
2892      }
2893    OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2894    
2895    JUMPHERE(quit);
2896    
2897    if (firstline)
2898      OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
2899    else
2900      OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS((location >> 1) - 1));
2901    return TRUE;
2902    }
2903    
2904    #undef MAX_N_CHARS
2905    
2906    static SLJIT_INLINE void fast_forward_first_char(compiler_common *common, pcre_uchar first_char, BOOL caseless, BOOL firstline)
2907  {  {
2908  DEFINE_COMPILER;  DEFINE_COMPILER;
2909  struct sljit_label *start;  struct sljit_label *start;
2910  struct sljit_jump *leave;  struct sljit_jump *quit;
2911  struct sljit_jump *found;  struct sljit_jump *found;
2912  pcre_uint16 oc, bit;  pcre_uchar oc, bit;
2913    
2914  if (firstline)  if (firstline)
2915    {    {
2916    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, STR_END, 0);    SLJIT_ASSERT(common->first_line_end != 0);
2917    OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), FIRSTLINE_END);    OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
2918      OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
2919    }    }
2920    
2921  start = LABEL();  start = LABEL();
2922  leave = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);  quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2923  OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2924    
2925  if ((firstbyte & REQ_CASELESS) == 0)  oc = first_char;
2926    found = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, firstbyte & 0xff);  if (caseless)
2927      {
2928      oc = TABLE_GET(first_char, common->fcc, first_char);
2929    #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
2930      if (first_char > 127 && common->utf)
2931        oc = UCD_OTHERCASE(first_char);
2932    #endif
2933      }
2934    if (first_char == oc)
2935      found = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, first_char);
2936  else  else
2937    {    {
2938    firstbyte &= 0xff;    bit = first_char ^ oc;
2939    oc = common->fcc[firstbyte];    if (is_powerof2(bit))
   bit = firstbyte ^ oc;  
   if (ispowerof2(bit))  
2940      {      {
2941      OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, bit);      OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, bit);
2942      found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, firstbyte | bit);      found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, first_char | bit);
2943      }      }
2944    else    else
2945      {      {
2946      OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, firstbyte);      OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, first_char);
2947      COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);      OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2948      OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);      OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);
2949      COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);      OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
2950      found = JUMP(SLJIT_C_NOT_ZERO);      found = JUMP(SLJIT_C_NOT_ZERO);
2951      }      }
2952    }    }
2953    
2954  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
 #ifdef SUPPORT_UTF8  
 if (common->utf8)  
   {  
   CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start);  
   OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)_pcre_utf8_char_sizes - 0xc0);  
   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);  
   }  
 #endif  
2955  JUMPTO(SLJIT_JUMP, start);  JUMPTO(SLJIT_JUMP, start);
2956  JUMPHERE(found);  JUMPHERE(found);
2957  JUMPHERE(leave);  JUMPHERE(quit);
2958    
2959  if (firstline)  if (firstline)
2960    OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);    OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
2961  }  }
2962    
2963  static SLJIT_INLINE void fast_forward_newline(compiler_common *common, BOOL firstline)  static SLJIT_INLINE void fast_forward_newline(compiler_common *common, BOOL firstline)
# Line 1767  DEFINE_COMPILER; Line 2966  DEFINE_COMPILER;
2966  struct sljit_label *loop;  struct sljit_label *loop;
2967  struct sljit_jump *lastchar;  struct sljit_jump *lastchar;
2968  struct sljit_jump *firstchar;  struct sljit_jump *firstchar;
2969  struct sljit_jump *leave;  struct sljit_jump *quit;
2970  struct sljit_jump *foundcr = NULL;  struct sljit_jump *foundcr = NULL;
2971  struct sljit_jump *notfoundnl;  struct sljit_jump *notfoundnl;
2972  jump_list *newline = NULL;  jump_list *newline = NULL;
2973    
2974  if (firstline)  if (firstline)
2975    {    {
2976    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, STR_END, 0);    SLJIT_ASSERT(common->first_line_end != 0);
2977    OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), FIRSTLINE_END);    OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
2978      OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
2979    }    }
2980    
2981  if (common->nltype == NLTYPE_FIXED && common->newline > 255)  if (common->nltype == NLTYPE_FIXED && common->newline > 255)
# Line 1786  if (common->nltype == NLTYPE_FIXED && co Line 2986  if (common->nltype == NLTYPE_FIXED && co
2986    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
2987    firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);    firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
2988    
2989    OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2);    OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2));
2990    OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0);    OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0);
2991    COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_GREATER_EQUAL);    OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_GREATER_EQUAL);
2992    #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2993      OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
2994    #endif
2995    OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);    OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2996    
2997    loop = LABEL();    loop = LABEL();
2998    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2999    leave = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);    quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3000    OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), -2);    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
3001    OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(STR_PTR), -1);    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
3002    CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop);    CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop);
3003    CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop);    CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop);
3004    
3005    JUMPHERE(leave);    JUMPHERE(quit);
3006    JUMPHERE(firstchar);    JUMPHERE(firstchar);
3007    JUMPHERE(lastchar);    JUMPHERE(lastchar);
3008    
# Line 1823  set_jumps(newline, loop); Line 3026  set_jumps(newline, loop);
3026    
3027  if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)  if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
3028    {    {
3029    leave = JUMP(SLJIT_JUMP);    quit = JUMP(SLJIT_JUMP);
3030    JUMPHERE(foundcr);    JUMPHERE(foundcr);
3031    notfoundnl = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);    notfoundnl = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3032    OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3033    OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);    OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
3034    COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);    OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3035    #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3036      OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
3037    #endif
3038    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3039    JUMPHERE(notfoundnl);    JUMPHERE(notfoundnl);
3040    JUMPHERE(leave);    JUMPHERE(quit);
3041    }    }
3042  JUMPHERE(lastchar);  JUMPHERE(lastchar);
3043  JUMPHERE(firstchar);  JUMPHERE(firstchar);
3044    
3045  if (firstline)  if (firstline)
3046    OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);    OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3047  }  }
3048    
3049    static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, jump_list **backtracks);
3050    
3051  static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, sljit_uw start_bits, BOOL firstline)  static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, sljit_uw start_bits, BOOL firstline)
3052  {  {
3053  DEFINE_COMPILER;  DEFINE_COMPILER;
3054  struct sljit_label *start;  struct sljit_label *start;
3055  struct sljit_jump *leave;  struct sljit_jump *quit;
3056  struct sljit_jump *found;  struct sljit_jump *found = NULL;
3057    jump_list *matches = NULL;
3058    pcre_uint8 inverted_start_bits[32];
3059    int i;
3060    #ifndef COMPILE_PCRE8
3061    struct sljit_jump *jump;
3062    #endif
3063    
3064    for (i = 0; i < 32; ++i)
3065      inverted_start_bits[i] = ~(((pcre_uint8*)start_bits)[i]);
3066    
3067  if (firstline)  if (firstline)
3068    {    {
3069    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, STR_END, 0);    SLJIT_ASSERT(common->first_line_end != 0);
3070    OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), FIRSTLINE_END);    OP1(SLJIT_MOV, RETURN_ADDR, 0, STR_END, 0);
3071      OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
3072    }    }
3073    
3074  start = LABEL();  start = LABEL();
3075  leave = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);  quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3076  OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3077  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3078  if (common->utf8)  if (common->utf)
3079    OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);    OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
3080  #endif  #endif
 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);  
 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);  
 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), start_bits);  
 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);  
 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);  
 found = JUMP(SLJIT_C_NOT_ZERO);  
3081    
3082  #ifdef SUPPORT_UTF8  if (!check_class_ranges(common, inverted_start_bits, (inverted_start_bits[31] & 0x80) != 0, &matches))
3083  if (common->utf8)    {
3084    #ifndef COMPILE_PCRE8
3085      jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 255);
3086      OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 255);
3087      JUMPHERE(jump);
3088    #endif
3089      OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
3090      OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
3091      OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), start_bits);
3092      OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
3093      OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
3094      found = JUMP(SLJIT_C_NOT_ZERO);
3095      }
3096    
3097    #ifdef SUPPORT_UTF
3098    if (common->utf)
3099    OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);    OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
3100  #endif  #endif
3101  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3102  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3103  if (common->utf8)  #if defined COMPILE_PCRE8
3104    if (common->utf)
3105    {    {
3106    CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start);    CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start);
3107    OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)_pcre_utf8_char_sizes - 0xc0);    OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3108    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3109    }    }
3110  #endif  #elif defined COMPILE_PCRE16
3111    if (common->utf)
3112      {
3113      CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800, start);
3114      OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
3115      OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3116      OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3117      OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3118      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3119      }
3120    #endif /* COMPILE_PCRE[8|16] */
3121    #endif /* SUPPORT_UTF */
3122  JUMPTO(SLJIT_JUMP, start);  JUMPTO(SLJIT_JUMP, start);
3123  JUMPHERE(found);  if (found != NULL)
3124  JUMPHERE(leave);    JUMPHERE(found);
3125    if (matches != NULL)
3126      set_jumps(matches, LABEL());
3127    JUMPHERE(quit);
3128    
3129  if (firstline)  if (firstline)
3130    OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);    OP1(SLJIT_MOV, STR_END, 0, RETURN_ADDR, 0);
3131  }  }
3132    
3133  static SLJIT_INLINE struct sljit_jump *search_requested_char(compiler_common *common, pcre_uint16 reqbyte, BOOL has_firstbyte)  static SLJIT_INLINE struct sljit_jump *search_requested_char(compiler_common *common, pcre_uchar req_char, BOOL caseless, BOOL has_firstchar)
3134  {  {
3135  DEFINE_COMPILER;  DEFINE_COMPILER;
3136  struct sljit_label *loop;  struct sljit_label *loop;
# Line 1897  struct sljit_jump *alreadyfound; Line 3139  struct sljit_jump *alreadyfound;
3139  struct sljit_jump *found;  struct sljit_jump *found;
3140  struct sljit_jump *foundoc = NULL;  struct sljit_jump *foundoc = NULL;
3141  struct sljit_jump *notfound;  struct sljit_jump *notfound;
3142  pcre_uint16 oc, bit;  pcre_uint32 oc, bit;
3143    
3144  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), REQ_BYTE_PTR);  SLJIT_ASSERT(common->req_char_ptr != 0);
3145    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr);
3146  OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, REQ_BYTE_MAX);  OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, REQ_BYTE_MAX);
3147  toolong = CMP(SLJIT_C_LESS, TMP1, 0, STR_END, 0);  toolong = CMP(SLJIT_C_LESS, TMP1, 0, STR_END, 0);
3148  alreadyfound = CMP(SLJIT_C_LESS, STR_PTR, 0, TMP2, 0);  alreadyfound = CMP(SLJIT_C_LESS, STR_PTR, 0, TMP2, 0);
3149    
3150  if (has_firstbyte)  if (has_firstchar)
3151    OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, 1);    OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3152  else  else
3153    OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);    OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);
3154    
3155  loop = LABEL();  loop = LABEL();
3156  notfound = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, STR_END, 0);  notfound = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, STR_END, 0);
3157    
3158  OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), 0);  OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0);
3159  if ((reqbyte & REQ_CASELESS) == 0)  oc = req_char;
3160    found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, reqbyte & 0xff);  if (caseless)
3161      {
3162      oc = TABLE_GET(req_char, common->fcc, req_char);
3163    #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
3164      if (req_char > 127 && common->utf)
3165        oc = UCD_OTHERCASE(req_char);
3166    #endif
3167      }
3168    if (req_char == oc)
3169      found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
3170  else  else
3171    {    {
3172    reqbyte &= 0xff;    bit = req_char ^ oc;
3173    oc = common->fcc[reqbyte];    if (is_powerof2(bit))
   bit = reqbyte ^ oc;  
   if (ispowerof2(bit))  
3174      {      {
3175      OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);      OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);
3176      found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, reqbyte | bit);      found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit);
3177      }      }
3178    else    else
3179      {      {
3180      found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, reqbyte);      found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
3181      foundoc = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, oc);      foundoc = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, oc);
3182      }      }
3183    }    }
3184  OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);  OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
3185  JUMPTO(SLJIT_JUMP, loop);  JUMPTO(SLJIT_JUMP, loop);
3186    
3187  JUMPHERE(found);  JUMPHERE(found);
3188  if (foundoc)  if (foundoc)
3189    JUMPHERE(foundoc);    JUMPHERE(foundoc);
3190  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), REQ_BYTE_PTR, TMP1, 0);  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr, TMP1, 0);
3191  JUMPHERE(alreadyfound);  JUMPHERE(alreadyfound);
3192  JUMPHERE(toolong);  JUMPHERE(toolong);
3193  return notfound;  return notfound;
# Line 1949  DEFINE_COMPILER; Line 3199  DEFINE_COMPILER;
3199  struct sljit_jump *jump;  struct sljit_jump *jump;
3200  struct sljit_label *mainloop;  struct sljit_label *mainloop;
3201    
3202  sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);  sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3203  OP1(SLJIT_MOV, TMP1, 0, STACK_TOP, 0);  OP1(SLJIT_MOV, TMP1, 0, STACK_TOP, 0);
3204    GET_LOCAL_BASE(TMP3, 0, 0);
3205    
3206  /* Drop frames until we reach STACK_TOP. */  /* Drop frames until we reach STACK_TOP. */
3207  mainloop = LABEL();  mainloop = LABEL();
3208  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), 0);  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), 0);
3209  jump = CMP(SLJIT_C_SIG_LESS_EQUAL, TMP2, 0, SLJIT_IMM, frame_end);  OP2(SLJIT_SUB | SLJIT_SET_S, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0);
3210  OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_LOCALS_REG, 0);  jump = JUMP(SLJIT_C_SIG_LESS_EQUAL);
3211  OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_w));  
3212  OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_w), SLJIT_MEM1(TMP1), 2 * sizeof(sljit_w));  OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
3213  OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_w));  OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw));
3214    OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), SLJIT_MEM1(TMP1), 2 * sizeof(sljit_sw));
3215    OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
3216  JUMPTO(SLJIT_JUMP, mainloop);  JUMPTO(SLJIT_JUMP, mainloop);
3217    
3218  JUMPHERE(jump);  JUMPHERE(jump);
3219  jump = CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, frame_end);  jump = JUMP(SLJIT_C_SIG_LESS);
3220  /* End of dropping frames. */  /* End of dropping frames. */
3221  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3222    
3223  JUMPHERE(jump);  JUMPHERE(jump);
3224  jump = CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, frame_setstrbegin);  OP1(SLJIT_NEG, TMP2, 0, TMP2, 0);
3225  /* Set string begin. */  OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
3226  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), sizeof(sljit_w));  OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw));
3227  OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_w));  OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_sw));
 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), TMP2, 0);  
 JUMPTO(SLJIT_JUMP, mainloop);  
   
 JUMPHERE(jump);  
 /* Unknown command. */  
 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_w));  
3228  JUMPTO(SLJIT_JUMP, mainloop);  JUMPTO(SLJIT_JUMP, mainloop);
3229  }  }
3230    
3231  static void check_wordboundary(compiler_common *common)  static void check_wordboundary(compiler_common *common)
3232  {  {
3233  DEFINE_COMPILER;  DEFINE_COMPILER;
3234  struct sljit_jump *beginend;  struct sljit_jump *skipread;
3235  #ifdef SUPPORT_UTF8  jump_list *skipread_list = NULL;
3236    #if !(defined COMPILE_PCRE8) || defined SUPPORT_UTF
3237  struct sljit_jump *jump;  struct sljit_jump *jump;
3238  #endif  #endif
3239    
3240  SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16);  SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16);
3241    
3242  sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, 1, 5, 5, common->localsize);  sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3243  /* Get type of the previous char, and put it to LOCALS1. */  /* Get type of the previous char, and put it to LOCALS1. */
3244  OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);  OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3245  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
3246  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, 0);  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, 0);
3247  beginend = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP1, 0);  skipread = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
3248  skip_char_back(common);  skip_char_back(common);
3249    check_start_used_ptr(common);
3250  read_char(common);  read_char(common);
3251    
3252  /* Testing char type. */  /* Testing char type. */
3253  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
3254  if (common->useucp)  if (common->use_ucp)
3255    {    {
3256    OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);    OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
3257    jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);    jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
3258    add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));    add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
3259    OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);    OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
3260    OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);    OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
3261    COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);    OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
3262    OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);    OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
3263    OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);    OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
3264    COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_LESS_EQUAL);    OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
3265    JUMPHERE(jump);    JUMPHERE(jump);
3266    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP2, 0);    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP2, 0);
3267    }    }
3268  else  else
3269  #endif  #endif
3270    {    {
3271  #ifdef SUPPORT_UTF8  #ifndef COMPILE_PCRE8
3272      jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3273    #elif defined SUPPORT_UTF
3274    /* Here LOCALS1 has already been zeroed. */    /* Here LOCALS1 has already been zeroed. */
3275    jump = NULL;    jump = NULL;
3276    if (common->utf8)    if (common->utf)
3277      jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);      jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3278  #endif  #endif /* COMPILE_PCRE8 */
3279    OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);    OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);
3280    OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */);    OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */);
3281    OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);    OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3282    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);
3283  #ifdef SUPPORT_UTF8  #ifndef COMPILE_PCRE8
3284      JUMPHERE(jump);
3285    #elif defined SUPPORT_UTF
3286    if (jump != NULL)    if (jump != NULL)
3287      JUMPHERE(jump);      JUMPHERE(jump);
3288  #endif  #endif /* COMPILE_PCRE8 */
3289    }    }
3290  JUMPHERE(beginend);  JUMPHERE(skipread);
3291    
3292  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3293  beginend = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);  check_str_end(common, &skipread_list);
3294  peek_char(common);  peek_char(common);
3295    
3296  /* Testing char type. This is a code duplication. */  /* Testing char type. This is a code duplication. */
3297  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
3298  if (common->useucp)  if (common->use_ucp)
3299    {    {
3300    OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);    OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
3301    jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);    jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
3302    add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));    add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
3303    OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);    OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
3304    OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);    OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
3305    COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);    OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
3306    OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);    OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
3307    OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);    OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
3308    COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_LESS_EQUAL);    OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
3309    JUMPHERE(jump);    JUMPHERE(jump);
3310    }    }
3311  else  else
3312  #endif  #endif
3313    {    {
3314  #ifdef SUPPORT_UTF8  #ifndef COMPILE_PCRE8
3315      /* TMP2 may be destroyed by peek_char. */
3316      OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3317      jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3318    #elif defined SUPPORT_UTF
3319    OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);    OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3320    jump = NULL;    jump = NULL;
3321    if (common->utf8)    if (common->utf)
3322      jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);      jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3323  #endif  #endif
3324    OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes);    OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes);
3325    OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */);    OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */);
3326    OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);    OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
3327  #ifdef SUPPORT_UTF8  #ifndef COMPILE_PCRE8
3328      JUMPHERE(jump);
3329    #elif defined SUPPORT_UTF
3330    if (jump != NULL)    if (jump != NULL)
3331      JUMPHERE(jump);      JUMPHERE(jump);
3332  #endif  #endif /* COMPILE_PCRE8 */
3333    }    }
3334  JUMPHERE(beginend);  set_jumps(skipread_list, LABEL());
3335    
3336  OP2(SLJIT_XOR | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);  OP2(SLJIT_XOR | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
3337  sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);  sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3338  }  }
3339    
3340    /*
3341      range format:
3342    
3343      ranges[0] = length of the range (max MAX_RANGE_SIZE, -1 means invalid range).
3344      ranges[1] = first bit (0 or 1)
3345      ranges[2-length] = position of the bit change (when the current bit is not equal to the previous)
3346    */
3347    
3348    static BOOL check_ranges(compiler_common *common, int *ranges, jump_list **backtracks, BOOL readch)
3349    {
3350    DEFINE_COMPILER;
3351    struct sljit_jump *jump;
3352    
3353    if (ranges[0] < 0)
3354      return FALSE;
3355    
3356    switch(ranges[0])
3357      {
3358      case 1:
3359      if (readch)
3360        read_char(common);
3361      add_jump(compiler, backtracks, CMP(ranges[1] == 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
3362      return TRUE;
3363    
3364      case 2:
3365      if (readch)
3366        read_char(common);
3367      OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);
3368      add_jump(compiler, backtracks, CMP(ranges[1] != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
3369      return TRUE;
3370    
3371      case 4:
3372      if (ranges[2] + 1 == ranges[3] && ranges[4] + 1 == ranges[5])
3373        {
3374        if (readch)
3375          read_char(common);
3376        if (ranges[1] != 0)
3377          {
3378          add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
3379          add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[4]));
3380          }
3381        else
3382          {
3383          jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]);
3384          add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[4]));
3385          JUMPHERE(jump);
3386          }
3387        return TRUE;
3388        }
3389      if ((ranges[3] - ranges[2]) == (ranges[5] - ranges[4]) && is_powerof2(ranges[4] - ranges[2]))
3390        {
3391        if (readch)
3392          read_char(common);
3393        OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[4] - ranges[2]);
3394        OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[4]);
3395        add_jump(compiler, backtracks, CMP(ranges[1] != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[5] - ranges[4]));
3396        return TRUE;
3397        }
3398      return FALSE;
3399    
3400      default:
3401      return FALSE;
3402      }
3403    }
3404    
3405    static void get_ctype_ranges(compiler_common *common, int flag, int *ranges)
3406    {
3407    int i, bit, length;
3408    const pcre_uint8 *ctypes = (const pcre_uint8*)common->ctypes;
3409    
3410    bit = ctypes[0] & flag;
3411    ranges[0] = -1;
3412    ranges[1] = bit != 0 ? 1 : 0;
3413    length = 0;
3414    
3415    for (i = 1; i < 256; i++)
3416      if ((ctypes[i] & flag) != bit)
3417        {
3418        if (length >= MAX_RANGE_SIZE)
3419          return;
3420        ranges[2 + length] = i;
3421        length++;
3422        bit ^= flag;
3423        }
3424    
3425    if (bit != 0)
3426      {
3427      if (length >= MAX_RANGE_SIZE)
3428        return;
3429      ranges[2 + length] = 256;
3430      length++;
3431      }
3432    ranges[0] = length;
3433    }
3434    
3435    static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, jump_list **backtracks)
3436    {
3437    int ranges[2 + MAX_RANGE_SIZE];
3438    pcre_uint8 bit, cbit, all;
3439    int i, byte, length = 0;
3440    
3441    bit = bits[0] & 0x1;
3442    ranges[1] = bit;
3443    /* Can be 0 or 255. */
3444    all = -bit;
3445    
3446    for (i = 0; i < 256; )
3447      {
3448      byte = i >> 3;
3449      if ((i & 0x7) == 0 && bits[byte] == all)
3450        i += 8;
3451      else
3452        {
3453        cbit = (bits[byte] >> (i & 0x7)) & 0x1;
3454        if (cbit != bit)
3455          {
3456          if (length >= MAX_RANGE_SIZE)
3457            return FALSE;
3458          ranges[2 + length] = i;
3459          length++;
3460          bit = cbit;
3461          all = -cbit;
3462          }
3463        i++;
3464        }
3465      }
3466    
3467    if (((bit == 0) && nclass) || ((bit == 1) && !nclass))
3468      {
3469      if (length >= MAX_RANGE_SIZE)
3470        return FALSE;
3471      ranges[2 + length] = 256;
3472      length++;
3473      }
3474    ranges[0] = length;
3475    
3476    return check_ranges(common, ranges, backtracks, FALSE);
3477    }
3478    
3479  static void check_anynewline(compiler_common *common)  static void check_anynewline(compiler_common *common)
3480  {  {
3481  /* Check whether TMP1 contains a newline character. TMP2 destroyed. */  /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
3482  DEFINE_COMPILER;  DEFINE_COMPILER;
3483    
3484  sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);  sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3485    
3486  OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);  OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
3487  OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);  OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
3488  COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);  OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
3489  OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);  OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
3490  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3491  if (common->utf8)  #ifdef COMPILE_PCRE8
3492    if (common->utf)
3493    {    {
3494    COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);  #endif
3495      OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3496    OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);    OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
3497    OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);    OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
3498    #ifdef COMPILE_PCRE8
3499    }    }
3500  #endif  #endif
3501  COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);  #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
3502    OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3503  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3504  }  }
3505    
# Line 2106  static void check_hspace(compiler_common Line 3508  static void check_hspace(compiler_common
3508  /* Check whether TMP1 contains a newline character. TMP2 destroyed. */  /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
3509  DEFINE_COMPILER;  DEFINE_COMPILER;
3510    
3511  sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);  sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3512    
3513  OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x09);  OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x09);
3514  COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);  OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3515  OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);  OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
3516  COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);  OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3517  OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xa0);  OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xa0);
3518  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3519  if (common->utf8)  #ifdef COMPILE_PCRE8
3520    if (common->utf)
3521    {    {
3522    COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);  #endif
3523      OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3524    OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x1680);    OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x1680);
3525    COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);    OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3526    OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e);    OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e);
3527    COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);    OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3528    OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000);    OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000);
3529    OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000);    OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000);
3530    COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_LESS_EQUAL);    OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
3531    OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000);    OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000);
3532    COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);    OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3533    OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000);    OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000);
3534    COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);    OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3535    OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000);    OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000);
3536    #ifdef COMPILE_PCRE8
3537    }    }
3538  #endif  #endif
3539  COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);  #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */