/[pcre]/code/trunk/pcre_jit_compile.c
ViewVC logotype

Diff of /code/trunk/pcre_jit_compile.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 696 by zherczeg, Sun Sep 18 15:09:49 2011 UTC revision 1051 by zherczeg, Tue Oct 2 08:18:24 2012 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2008 University of Cambridge             Copyright (c) 1997-2012 University of Cambridge
10    
11    The machine code generator part (this module) was written by Zoltan Herczeg    The machine code generator part (this module) was written by Zoltan Herczeg
12                        Copyright (c) 2010-2011                        Copyright (c) 2010-2012
13    
14  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
15  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 52  POSSIBILITY OF SUCH DAMAGE. Line 52  POSSIBILITY OF SUCH DAMAGE.
52  we just include it. This way we don't need to touch the build  we just include it. This way we don't need to touch the build
53  system files. */  system files. */
54    
55    #define SLJIT_MALLOC(size) (PUBL(malloc))(size)
56    #define SLJIT_FREE(ptr) (PUBL(free))(ptr)
57  #define SLJIT_CONFIG_AUTO 1  #define SLJIT_CONFIG_AUTO 1
58    #define SLJIT_CONFIG_STATIC 1
59  #define SLJIT_VERBOSE 0  #define SLJIT_VERBOSE 0
60  #define SLJIT_DEBUG 0  #define SLJIT_DEBUG 0
61    
62  #include "sljit/sljitLir.c"  #include "sljit/sljitLir.c"
63    
64  #if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED  #if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED
65  #error "Unsupported architecture"  #error Unsupported architecture
66  #endif  #endif
67    
68  /* Allocate memory on the stack. Fast, but limited size. */  /* Allocate memory for the regex stack on the real machine stack.
69  #define LOCAL_SPACE_SIZE 32768  Fast, but limited size. */
70    #define MACHINE_STACK_SIZE 32768
71    
72    /* Growth rate for stack allocated by the OS. Should be the multiply
73    of page size. */
74  #define STACK_GROWTH_RATE 8192  #define STACK_GROWTH_RATE 8192
75    
76  /* Enable to check that the allocation could destroy temporaries. */  /* Enable to check that the allocation could destroy temporaries. */
# Line 79  The code generator follows the recursive Line 85  The code generator follows the recursive
85  expressions. The basic blocks of regular expressions are condition checkers  expressions. The basic blocks of regular expressions are condition checkers
86  whose execute different commands depending on the result of the condition check.  whose execute different commands depending on the result of the condition check.
87  The relationship between the operators can be horizontal (concatenation) and  The relationship between the operators can be horizontal (concatenation) and
88  vertical (sub-expression) (See struct fallback_common for more details).  vertical (sub-expression) (See struct backtrack_common for more details).
89    
90    'ab' - 'a' and 'b' regexps are concatenated    'ab' - 'a' and 'b' regexps are concatenated
91    'a+' - 'a' is the sub-expression of the '+' operator    'a+' - 'a' is the sub-expression of the '+' operator
92    
93  The condition checkers are boolean (true/false) checkers. Machine code is generated  The condition checkers are boolean (true/false) checkers. Machine code is generated
94  for the checker itself and for the actions depending on the result of the checker.  for the checker itself and for the actions depending on the result of the checker.
95  The 'true' case is called as the hot path (expected path), and the other is called as  The 'true' case is called as the matching path (expected path), and the other is called as
96  the 'fallback' path. Branch instructions are expesive for all CPUs, so we avoid taken  the 'backtrack' path. Branch instructions are expesive for all CPUs, so we avoid taken
97  branches on the hot path.  branches on the matching path.
98    
99   Greedy star operator (*) :   Greedy star operator (*) :
100     Hot path: match happens.     Matching path: match happens.
101     Fallback path: match failed.     Backtrack path: match failed.
102   Non-greedy star operator (*?) :   Non-greedy star operator (*?) :
103     Hot path: no need to perform a match.     Matching path: no need to perform a match.
104     Fallback path: match is required.     Backtrack path: match is required.
105    
106  The following example shows how the code generated for a capturing bracket  The following example shows how the code generated for a capturing bracket
107  with two alternatives. Let A, B, C, D are arbirary regular expressions, and  with two alternatives. Let A, B, C, D are arbirary regular expressions, and
# Line 105  we have the following regular expression Line 111  we have the following regular expression
111    
112  The generated code will be the following:  The generated code will be the following:
113    
114   A hot path   A matching path
115   '(' hot path (pushing arguments to the stack)   '(' matching path (pushing arguments to the stack)
116   B hot path   B matching path
117   ')' hot path (pushing arguments to the stack)   ')' matching path (pushing arguments to the stack)
118   D hot path   D matching path
119   return with successful match   return with successful match
120    
121   D fallback path   D backtrack path
122   ')' fallback path (If we arrived from "C" jump to the fallback of "C")   ')' backtrack path (If we arrived from "C" jump to the backtrack of "C")
123   B fallback path   B backtrack path
124   C expected path   C expected path
125   jump to D hot path   jump to D matching path
126   C fallback path   C backtrack path
127   A fallback path   A backtrack path
128    
129   Notice, that the order of fallback code paths are the opposite of the fast   Notice, that the order of backtrack code paths are the opposite of the fast
130   code paths. In this way the topmost value on the stack is always belong   code paths. In this way the topmost value on the stack is always belong
131   to the current fallback code path. The fallback code path must check   to the current backtrack code path. The backtrack path must check
132   whether there is a next alternative. If so, it needs to jump back to   whether there is a next alternative. If so, it needs to jump back to
133   the hot path eventually. Otherwise it needs to clear out its own stack   the matching path eventually. Otherwise it needs to clear out its own stack
134   frame and continue the execution on the fallback code paths.   frame and continue the execution on the backtrack code paths.
135  */  */
136    
137  /*  /*
138  Saved stack frames:  Saved stack frames:
139    
140  Atomic blocks and asserts require reloading the values of local variables  Atomic blocks and asserts require reloading the values of private data
141  when the fallback mechanism performed. Because of OP_RECURSE, the locals  when the backtrack mechanism performed. Because of OP_RECURSE, the data
142  are not necessarly known in compile time, thus we need a dynamic restore  are not necessarly known in compile time, thus we need a dynamic restore
143  mechanism.  mechanism.
144    
145  The stack frames are stored in a chain list, and have the following format:  The stack frames are stored in a chain list, and have the following format:
146  ([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]  ([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]
147    
148  Thus we can restore the locals to a particular point in the stack.  Thus we can restore the private data to a particular point in the stack.
149  */  */
150    
151  typedef struct jit_arguments {  typedef struct jit_arguments {
152    /* Pointers first. */    /* Pointers first. */
153    struct sljit_stack *stack;    struct sljit_stack *stack;
154    PCRE_SPTR str;    const pcre_uchar *str;
155    PCRE_SPTR begin;    const pcre_uchar *begin;
156    PCRE_SPTR end;    const pcre_uchar *end;
157    int *offsets;    int *offsets;
158    uschar *ptr;    pcre_uchar *uchar_ptr;
159      pcre_uchar *mark_ptr;
160    /* Everything else after. */    /* Everything else after. */
161    int offsetcount;    int offsetcount;
162    int calllimit;    int calllimit;
163    uschar notbol;    pcre_uint8 notbol;
164    uschar noteol;    pcre_uint8 noteol;
165    uschar notempty;    pcre_uint8 notempty;
166    uschar notempty_atstart;    pcre_uint8 notempty_atstart;
167  } jit_arguments;  } jit_arguments;
168    
169  typedef struct executable_function {  typedef struct executable_functions {
170    void *executable_func;    void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES];
171    pcre_jit_callback callback;    PUBL(jit_callback) callback;
172    void *userdata;    void *userdata;
173  } executable_function;    sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];
174    } executable_functions;
175    
176  typedef struct jump_list {  typedef struct jump_list {
177    struct sljit_jump *jump;    struct sljit_jump *jump;
# Line 176  typedef struct stub_list { Line 184  typedef struct stub_list {
184    enum stub_types type;    enum stub_types type;
185    int data;    int data;
186    struct sljit_jump *start;    struct sljit_jump *start;
187    struct sljit_label *leave;    struct sljit_label *quit;
188    struct stub_list *next;    struct stub_list *next;
189  } stub_list;  } stub_list;
190    
191  typedef int (SLJIT_CALL *jit_function)(jit_arguments *args);  typedef int (SLJIT_CALL *jit_function)(jit_arguments *args);
192    
193  /* The following structure is the key data type for the recursive  /* The following structure is the key data type for the recursive
194  code generator. It is allocated by compile_hotpath, and contains  code generator. It is allocated by compile_matchingpath, and contains
195  the aguments for compile_fallbackpath. Must be the first member  the aguments for compile_backtrackingpath. Must be the first member
196  of its descendants. */  of its descendants. */
197  typedef struct fallback_common {  typedef struct backtrack_common {
198    /* Concatenation stack. */    /* Concatenation stack. */
199    struct fallback_common *prev;    struct backtrack_common *prev;
200    jump_list *nextfallbacks;    jump_list *nextbacktracks;
201    /* Internal stack (for component operators). */    /* Internal stack (for component operators). */
202    struct fallback_common *top;    struct backtrack_common *top;
203    jump_list *topfallbacks;    jump_list *topbacktracks;
204    /* Opcode pointer. */    /* Opcode pointer. */
205    uschar *cc;    pcre_uchar *cc;
206  } fallback_common;  } backtrack_common;
207    
208  typedef struct assert_fallback {  typedef struct assert_backtrack {
209    fallback_common common;    backtrack_common common;
210    jump_list *condfailed;    jump_list *condfailed;
211    /* Less than 0 (-1) if a frame is not needed. */    /* Less than 0 (-1) if a frame is not needed. */
212    int framesize;    int framesize;
213    /* Points to our private memory word on the stack. */    /* Points to our private memory word on the stack. */
214    int localptr;    int private_data_ptr;
215    /* For iterators. */    /* For iterators. */
216    struct sljit_label *hotpath;    struct sljit_label *matchingpath;
217  } assert_fallback;  } assert_backtrack;
218    
219  typedef struct bracket_fallback {  typedef struct bracket_backtrack {
220    fallback_common common;    backtrack_common common;
221    /* Where to coninue if an alternative is successfully matched. */    /* Where to coninue if an alternative is successfully matched. */
222    struct sljit_label *althotpath;    struct sljit_label *alternative_matchingpath;
223    /* For rmin and rmax iterators. */    /* For rmin and rmax iterators. */
224    struct sljit_label *recursivehotpath;    struct sljit_label *recursive_matchingpath;
225    /* For greedy ? operator. */    /* For greedy ? operator. */
226    struct sljit_label *zerohotpath;    struct sljit_label *zero_matchingpath;
227    /* Contains the branches of a failed condition. */    /* Contains the branches of a failed condition. */
228    union {    union {
229      /* Both for OP_COND, OP_SCOND. */      /* Both for OP_COND, OP_SCOND. */
230      jump_list *condfailed;      jump_list *condfailed;
231      assert_fallback *assert;      assert_backtrack *assert;
232      /* For OP_ONCE. -1 if not needed. */      /* For OP_ONCE. -1 if not needed. */
233      int framesize;      int framesize;
234    } u;    } u;
235    /* Points to our private memory word on the stack. */    /* Points to our private memory word on the stack. */
236    int localptr;    int private_data_ptr;
237  } bracket_fallback;  } bracket_backtrack;
238    
239  typedef struct bracketpos_fallback {  typedef struct bracketpos_backtrack {
240    fallback_common common;    backtrack_common common;
241    /* Points to our private memory word on the stack. */    /* Points to our private memory word on the stack. */
242    int localptr;    int private_data_ptr;
243    /* Reverting stack is needed. */    /* Reverting stack is needed. */
244    int framesize;    int framesize;
245    /* Allocated stack size. */    /* Allocated stack size. */
246    int stacksize;    int stacksize;
247  } bracketpos_fallback;  } bracketpos_backtrack;
248    
249  typedef struct braminzero_fallback {  typedef struct braminzero_backtrack {
250    fallback_common common;    backtrack_common common;
251    struct sljit_label *hotpath;    struct sljit_label *matchingpath;
252  } braminzero_fallback;  } braminzero_backtrack;
253    
254  typedef struct iterator_fallback {  typedef struct iterator_backtrack {
255    fallback_common common;    backtrack_common common;
256    /* Next iteration. */    /* Next iteration. */
257    struct sljit_label *hotpath;    struct sljit_label *matchingpath;
258  } iterator_fallback;  } iterator_backtrack;
259    
260  typedef struct recurse_entry {  typedef struct recurse_entry {
261    struct recurse_entry *next;    struct recurse_entry *next;
# Line 259  typedef struct recurse_entry { Line 267  typedef struct recurse_entry {
267    int start;    int start;
268  } recurse_entry;  } recurse_entry;
269    
270  typedef struct recurse_fallback {  typedef struct recurse_backtrack {
271    fallback_common common;    backtrack_common common;
272  } recurse_fallback;  } recurse_backtrack;
273    
274    #define MAX_RANGE_SIZE 6
275    
276  typedef struct compiler_common {  typedef struct compiler_common {
277    struct sljit_compiler *compiler;    struct sljit_compiler *compiler;
278    uschar *start;    pcre_uchar *start;
279    int localsize;  
280    int *localptrs;    /* Maps private data offset to each opcode. */
281    const uschar *fcc;    int *private_data_ptrs;
282    sljit_w lcc;    /* Tells whether the capturing bracket is optimized. */
283      pcre_uint8 *optimized_cbracket;
284      /* Starting offset of private data for capturing brackets. */
285    int cbraptr;    int cbraptr;
286      /* OVector starting point. Must be divisible by 2. */
287      int ovector_start;
288      /* Last known position of the requested byte. */
289      int req_char_ptr;
290      /* Head of the last recursion. */
291      int recursive_head;
292      /* First inspected character for partial matching. */
293      int start_used_ptr;
294      /* Starting pointer for partial soft matches. */
295      int hit_start;
296      /* End pointer of the first line. */
297      int first_line_end;
298      /* Points to the marked string. */
299      int mark_ptr;
300    
301      /* Flipped and lower case tables. */
302      const pcre_uint8 *fcc;
303      sljit_w lcc;
304      /* Mode can be PCRE_STUDY_JIT_COMPILE and others. */
305      int mode;
306      /* Newline control. */
307    int nltype;    int nltype;
308    int newline;    int newline;
309    int bsr_nltype;    int bsr_nltype;
310      /* Dollar endonly. */
311    int endonly;    int endonly;
312      BOOL has_set_som;
313      /* Tables. */
314    sljit_w ctypes;    sljit_w ctypes;
315      int digits[2 + MAX_RANGE_SIZE];
316      /* Named capturing brackets. */
317      sljit_uw name_table;
318      sljit_w name_count;
319      sljit_w name_entry_size;
320    
321      /* Labels and jump lists. */
322      struct sljit_label *partialmatchlabel;
323      struct sljit_label *quitlabel;
324    struct sljit_label *acceptlabel;    struct sljit_label *acceptlabel;
325    stub_list *stubs;    stub_list *stubs;
326    recurse_entry *entries;    recurse_entry *entries;
327    recurse_entry *currententry;    recurse_entry *currententry;
328      jump_list *partialmatch;
329      jump_list *quit;
330    jump_list *accept;    jump_list *accept;
331    jump_list *calllimit;    jump_list *calllimit;
332    jump_list *stackalloc;    jump_list *stackalloc;
# Line 291  typedef struct compiler_common { Line 338  typedef struct compiler_common {
338    jump_list *casefulcmp;    jump_list *casefulcmp;
339    jump_list *caselesscmp;    jump_list *caselesscmp;
340    BOOL jscript_compat;    BOOL jscript_compat;
341  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
342    BOOL utf8;    BOOL utf;
343  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
344    BOOL useucp;    BOOL use_ucp;
345  #endif  #endif
346    jump_list *utf8readchar;    jump_list *utfreadchar;
347    jump_list *utf8readtype8;  #ifdef COMPILE_PCRE8
348      jump_list *utfreadtype8;
349  #endif  #endif
350    #endif /* SUPPORT_UTF */
351  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
352    jump_list *getucd;    jump_list *getucd;
353  #endif  #endif
# Line 310  typedef struct compare_context { Line 359  typedef struct compare_context {
359    int length;    int length;
360    int sourcereg;    int sourcereg;
361  #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED  #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
362    int byteptr;    int ucharptr;
363    union {    union {
364      int asint;      sljit_i asint;
365      short asshort;      sljit_uh asushort;
366    #ifdef COMPILE_PCRE8
367      sljit_ub asbyte;      sljit_ub asbyte;
368      sljit_ub asbytes[4];      sljit_ub asuchars[4];
369    #else
370    #ifdef COMPILE_PCRE16
371        sljit_uh asuchars[2];
372    #endif
373    #endif
374    } c;    } c;
375    union {    union {
376      int asint;      sljit_i asint;
377      short asshort;      sljit_uh asushort;
378    #ifdef COMPILE_PCRE8
379      sljit_ub asbyte;      sljit_ub asbyte;
380      sljit_ub asbytes[4];      sljit_ub asuchars[4];
381    #else
382    #ifdef COMPILE_PCRE16
383        sljit_uh asuchars[2];
384    #endif
385    #endif
386    } oc;    } oc;
387  #endif  #endif
388  } compare_context;  } compare_context;
389    
390  enum {  enum {
391    frame_end = 0,    frame_end = 0,
392    frame_setstrbegin = -1    frame_setstrbegin = -1,
393      frame_setmark = -2
394  };  };
395    
396    /* Undefine sljit macros. */
397    #undef CMP
398    
399  /* Used for accessing the elements of the stack. */  /* Used for accessing the elements of the stack. */
400  #define STACK(i)      ((-(i) - 1) * (int)sizeof(sljit_w))  #define STACK(i)      ((-(i) - 1) * (int)sizeof(sljit_w))
401    
402  #define TMP1          SLJIT_TEMPORARY_REG1  #define TMP1          SLJIT_TEMPORARY_REG1
403  #define TMP2          SLJIT_TEMPORARY_REG3  #define TMP2          SLJIT_TEMPORARY_REG3
404  #define TMP3          SLJIT_TEMPORARY_EREG2  #define TMP3          SLJIT_TEMPORARY_EREG2
405  #define STR_PTR       SLJIT_GENERAL_REG1  #define STR_PTR       SLJIT_SAVED_REG1
406  #define STR_END       SLJIT_GENERAL_REG2  #define STR_END       SLJIT_SAVED_REG2
407  #define STACK_TOP     SLJIT_TEMPORARY_REG2  #define STACK_TOP     SLJIT_TEMPORARY_REG2
408  #define STACK_LIMIT   SLJIT_GENERAL_REG3  #define STACK_LIMIT   SLJIT_SAVED_REG3
409  #define ARGUMENTS     SLJIT_GENERAL_EREG1  #define ARGUMENTS     SLJIT_SAVED_EREG1
410  #define CALL_COUNT    SLJIT_GENERAL_EREG2  #define CALL_COUNT    SLJIT_SAVED_EREG2
411  #define RETURN_ADDR   SLJIT_TEMPORARY_EREG1  #define RETURN_ADDR   SLJIT_TEMPORARY_EREG1
412    
413  /* Locals layout. */  /* Local space layout. */
414  /* These two locals can be used by the current opcode. */  /* These two locals can be used by the current opcode. */
415  #define LOCALS0          (0 * sizeof(sljit_w))  #define LOCALS0          (0 * sizeof(sljit_w))
416  #define LOCALS1          (1 * sizeof(sljit_w))  #define LOCALS1          (1 * sizeof(sljit_w))
417  /* Two local variables for possessive quantifiers (char1 cannot use them). */  /* Two local variables for possessive quantifiers (char1 cannot use them). */
418  #define POSSESSIVE0      (2 * sizeof(sljit_w))  #define POSSESSIVE0      (2 * sizeof(sljit_w))
419  #define POSSESSIVE1      (3 * sizeof(sljit_w))  #define POSSESSIVE1      (3 * sizeof(sljit_w))
 /* Head of the saved local variables */  
 #define LOCALS_HEAD      (4 * sizeof(sljit_w))  
 /* Head of the last recursion. */  
 #define RECURSIVE_HEAD   (5 * sizeof(sljit_w))  
420  /* Max limit of recursions. */  /* Max limit of recursions. */
421  #define CALL_LIMIT       (7 * sizeof(sljit_w))  #define CALL_LIMIT       (4 * sizeof(sljit_w))
 /* Last known position of the requested byte. */  
 #define REQ_BYTE_PTR     (8 * sizeof(sljit_w))  
 /* End pointer of the first line. */  
 #define FIRSTLINE_END    (9 * sizeof(sljit_w))  
422  /* The output vector is stored on the stack, and contains pointers  /* The output vector is stored on the stack, and contains pointers
423  to characters. The vector data is divided into two groups: the first  to characters. The vector data is divided into two groups: the first
424  group contains the start / end character pointers, and the second is  group contains the start / end character pointers, and the second is
425  the start pointers when the end of the capturing group has not yet reached. */  the start pointers when the end of the capturing group has not yet reached. */
426  #define OVECTOR_START    (10 * sizeof(sljit_w))  #define OVECTOR_START    (common->ovector_start)
427  #define OVECTOR(i)       (OVECTOR_START + (i) * sizeof(sljit_w))  #define OVECTOR(i)       (OVECTOR_START + (i) * sizeof(sljit_w))
428  #define OVECTOR_PRIV(i)  (common->cbraptr + (i) * sizeof(sljit_w))  #define OVECTOR_PRIV(i)  (common->cbraptr + (i) * sizeof(sljit_w))
429  #define PRIV(cc)         (common->localptrs[(cc) - common->start])  #define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start])
430    
431    #ifdef COMPILE_PCRE8
432    #define MOV_UCHAR  SLJIT_MOV_UB
433    #define MOVU_UCHAR SLJIT_MOVU_UB
434    #else
435    #ifdef COMPILE_PCRE16
436    #define MOV_UCHAR  SLJIT_MOV_UH
437    #define MOVU_UCHAR SLJIT_MOVU_UH
438    #else
439    #error Unsupported compiling mode
440    #endif
441    #endif
442    
443  /* Shortcuts. */  /* Shortcuts. */
444  #define DEFINE_COMPILER \  #define DEFINE_COMPILER \
# Line 392  the start pointers when the end of the c Line 461  the start pointers when the end of the c
461    sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))    sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))
462  #define COND_VALUE(op, dst, dstw, type) \  #define COND_VALUE(op, dst, dstw, type) \
463    sljit_emit_cond_value(compiler, (op), (dst), (dstw), (type))    sljit_emit_cond_value(compiler, (op), (dst), (dstw), (type))
464    #define GET_LOCAL_BASE(dst, dstw, offset) \
465      sljit_get_local_base(compiler, (dst), (dstw), (offset))
466    
467  static uschar* bracketend(uschar* cc)  static pcre_uchar* bracketend(pcre_uchar* cc)
468  {  {
469  SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));  SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
470  do cc += GET(cc, 1); while (*cc == OP_ALT);  do cc += GET(cc, 1); while (*cc == OP_ALT);
# Line 404  return cc; Line 475  return cc;
475    
476  /* Functions whose might need modification for all new supported opcodes:  /* Functions whose might need modification for all new supported opcodes:
477   next_opcode   next_opcode
478   get_localspace   get_private_data_length
479   set_localptrs   set_private_data_ptrs
480   get_framesize   get_framesize
481   init_frame   init_frame
482   get_localsize   get_private_data_length_for_copy
483   copy_locals   copy_private_data
484   compile_hotpath   compile_matchingpath
485   compile_fallbackpath   compile_backtrackingpath
486  */  */
487    
488  static uschar *next_opcode(compiler_common *common, uschar *cc)  static pcre_uchar *next_opcode(compiler_common *common, pcre_uchar *cc)
489  {  {
490  SLJIT_UNUSED_ARG(common);  SLJIT_UNUSED_ARG(common);
491  switch(*cc)  switch(*cc)
# Line 463  switch(*cc) Line 534  switch(*cc)
534    case OP_BRAZERO:    case OP_BRAZERO:
535    case OP_BRAMINZERO:    case OP_BRAMINZERO:
536    case OP_BRAPOSZERO:    case OP_BRAPOSZERO:
537      case OP_COMMIT:
538    case OP_FAIL:    case OP_FAIL:
539    case OP_ACCEPT:    case OP_ACCEPT:
540    case OP_ASSERT_ACCEPT:    case OP_ASSERT_ACCEPT:
541    case OP_SKIPZERO:    case OP_SKIPZERO:
542    return cc + 1;    return cc + 1;
543    
544      case OP_ANYBYTE:
545    #ifdef SUPPORT_UTF
546      if (common->utf) return NULL;
547    #endif
548      return cc + 1;
549    
550    case OP_CHAR:    case OP_CHAR:
551    case OP_CHARI:    case OP_CHARI:
552    case OP_NOT:    case OP_NOT:
553    case OP_NOTI:    case OP_NOTI:
   
554    case OP_STAR:    case OP_STAR:
555    case OP_MINSTAR:    case OP_MINSTAR:
556    case OP_PLUS:    case OP_PLUS:
# Line 511  switch(*cc) Line 588  switch(*cc)
588    case OP_NOTPOSPLUSI:    case OP_NOTPOSPLUSI:
589    case OP_NOTPOSQUERYI:    case OP_NOTPOSQUERYI:
590    cc += 2;    cc += 2;
591  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
592    if (common->utf8 && cc[-1] >= 0xc0) cc += _pcre_utf8_table4[cc[-1] & 0x3f];    if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
593  #endif  #endif
594    return cc;    return cc;
595    
# Line 532  switch(*cc) Line 609  switch(*cc)
609    case OP_NOTMINUPTOI:    case OP_NOTMINUPTOI:
610    case OP_NOTEXACTI:    case OP_NOTEXACTI:
611    case OP_NOTPOSUPTOI:    case OP_NOTPOSUPTOI:
612    cc += 4;    cc += 2 + IMM2_SIZE;
613  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
614    if (common->utf8 && cc[-1] >= 0xc0) cc += _pcre_utf8_table4[cc[-1] & 0x3f];    if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
615  #endif  #endif
616    return cc;    return cc;
617    
618    case OP_NOTPROP:    case OP_NOTPROP:
619    case OP_PROP:    case OP_PROP:
620      return cc + 1 + 2;
621    
622    case OP_TYPEUPTO:    case OP_TYPEUPTO:
623    case OP_TYPEMINUPTO:    case OP_TYPEMINUPTO:
624    case OP_TYPEEXACT:    case OP_TYPEEXACT:
# Line 547  switch(*cc) Line 626  switch(*cc)
626    case OP_REF:    case OP_REF:
627    case OP_REFI:    case OP_REFI:
628    case OP_CREF:    case OP_CREF:
629      case OP_NCREF:
630      case OP_RREF:
631      case OP_NRREF:
632    case OP_CLOSE:    case OP_CLOSE:
633    cc += 3;    cc += 1 + IMM2_SIZE;
634    return cc;    return cc;
635    
636    case OP_CRRANGE:    case OP_CRRANGE:
637    case OP_CRMINRANGE:    case OP_CRMINRANGE:
638    return cc + 5;    return cc + 1 + 2 * IMM2_SIZE;
639    
640    case OP_CLASS:    case OP_CLASS:
641    case OP_NCLASS:    case OP_NCLASS:
642    return cc + 33;    return cc + 1 + 32 / sizeof(pcre_uchar);
643    
644  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
645    case OP_XCLASS:    case OP_XCLASS:
646    return cc + GET(cc, 1);    return cc + GET(cc, 1);
647  #endif  #endif
# Line 571  switch(*cc) Line 653  switch(*cc)
653    case OP_ASSERTBACK_NOT:    case OP_ASSERTBACK_NOT:
654    case OP_REVERSE:    case OP_REVERSE:
655    case OP_ONCE:    case OP_ONCE:
656      case OP_ONCE_NC:
657    case OP_BRA:    case OP_BRA:
658    case OP_BRAPOS:    case OP_BRAPOS:
659    case OP_COND:    case OP_COND:
# Line 588  switch(*cc) Line 671  switch(*cc)
671    case OP_CBRAPOS:    case OP_CBRAPOS:
672    case OP_SCBRA:    case OP_SCBRA:
673    case OP_SCBRAPOS:    case OP_SCBRAPOS:
674    return cc + 1 + LINK_SIZE + 2;    return cc + 1 + LINK_SIZE + IMM2_SIZE;
675    
676      case OP_MARK:
677      return cc + 1 + 2 + cc[1];
678    
679    default:    default:
680    return NULL;    return NULL;
681    }    }
682  }  }
683    
684  static int get_localspace(compiler_common *common, uschar *cc, uschar *ccend)  #define CASE_ITERATOR_PRIVATE_DATA_1 \
685        case OP_MINSTAR: \
686        case OP_MINPLUS: \
687        case OP_QUERY: \
688        case OP_MINQUERY: \
689        case OP_MINSTARI: \
690        case OP_MINPLUSI: \
691        case OP_QUERYI: \
692        case OP_MINQUERYI: \
693        case OP_NOTMINSTAR: \
694        case OP_NOTMINPLUS: \
695        case OP_NOTQUERY: \
696        case OP_NOTMINQUERY: \
697        case OP_NOTMINSTARI: \
698        case OP_NOTMINPLUSI: \
699        case OP_NOTQUERYI: \
700        case OP_NOTMINQUERYI:
701    
702    #define CASE_ITERATOR_PRIVATE_DATA_2A \
703        case OP_STAR: \
704        case OP_PLUS: \
705        case OP_STARI: \
706        case OP_PLUSI: \
707        case OP_NOTSTAR: \
708        case OP_NOTPLUS: \
709        case OP_NOTSTARI: \
710        case OP_NOTPLUSI:
711    
712    #define CASE_ITERATOR_PRIVATE_DATA_2B \
713        case OP_UPTO: \
714        case OP_MINUPTO: \
715        case OP_UPTOI: \
716        case OP_MINUPTOI: \
717        case OP_NOTUPTO: \
718        case OP_NOTMINUPTO: \
719        case OP_NOTUPTOI: \
720        case OP_NOTMINUPTOI:
721    
722    #define CASE_ITERATOR_TYPE_PRIVATE_DATA_1 \
723        case OP_TYPEMINSTAR: \
724        case OP_TYPEMINPLUS: \
725        case OP_TYPEQUERY: \
726        case OP_TYPEMINQUERY:
727    
728    #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2A \
729        case OP_TYPESTAR: \
730        case OP_TYPEPLUS:
731    
732    #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2B \
733        case OP_TYPEUPTO: \
734        case OP_TYPEMINUPTO:
735    
736    static int get_class_iterator_size(pcre_uchar *cc)
737  {  {
738  int localspace = 0;  switch(*cc)
739  uschar *alternative;    {
740      case OP_CRSTAR:
741      case OP_CRPLUS:
742      return 2;
743    
744      case OP_CRMINSTAR:
745      case OP_CRMINPLUS:
746      case OP_CRQUERY:
747      case OP_CRMINQUERY:
748      return 1;
749    
750      case OP_CRRANGE:
751      case OP_CRMINRANGE:
752      if (GET2(cc, 1) == GET2(cc, 1 + IMM2_SIZE))
753        return 0;
754      return 2;
755    
756      default:
757      return 0;
758      }
759    }
760    
761    static int get_private_data_length(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
762    {
763    int private_data_length = 0;
764    pcre_uchar *alternative;
765    pcre_uchar *name;
766    pcre_uchar *end = NULL;
767    int space, size, bracketlen, i;
768    
769  /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */  /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
770  while (cc < ccend)  while (cc < ccend)
771    {    {
772      space = 0;
773      size = 0;
774      bracketlen = 0;
775    switch(*cc)    switch(*cc)
776      {      {
777        case OP_SET_SOM:
778        common->has_set_som = TRUE;
779        cc += 1;
780        break;
781    
782        case OP_REF:
783        case OP_REFI:
784        common->optimized_cbracket[GET2(cc, 1)] = 0;
785        cc += 1 + IMM2_SIZE;
786        break;
787    
788      case OP_ASSERT:      case OP_ASSERT:
789      case OP_ASSERT_NOT:      case OP_ASSERT_NOT:
790      case OP_ASSERTBACK:      case OP_ASSERTBACK:
791      case OP_ASSERTBACK_NOT:      case OP_ASSERTBACK_NOT:
792      case OP_ONCE:      case OP_ONCE:
793        case OP_ONCE_NC:
794      case OP_BRAPOS:      case OP_BRAPOS:
795      case OP_SBRA:      case OP_SBRA:
796      case OP_SBRAPOS:      case OP_SBRAPOS:
797      case OP_SCOND:      private_data_length += sizeof(sljit_w);
798      localspace += sizeof(sljit_w);      bracketlen = 1 + LINK_SIZE;
     cc += 1 + LINK_SIZE;  
799      break;      break;
800    
801      case OP_CBRAPOS:      case OP_CBRAPOS:
802      case OP_SCBRAPOS:      case OP_SCBRAPOS:
803      localspace += sizeof(sljit_w);      private_data_length += sizeof(sljit_w);
804      cc += 1 + LINK_SIZE + 2;      common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] = 0;
805        bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
806      break;      break;
807    
808      case OP_COND:      case OP_COND:
809      /* Might be a hidden SCOND. */      case OP_SCOND:
810      alternative = cc + GET(cc, 1);      bracketlen = cc[1 + LINK_SIZE];
811      if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)      if (bracketlen == OP_CREF)
812        localspace += sizeof(sljit_w);        {
813          bracketlen = GET2(cc, 1 + LINK_SIZE + 1);
814          common->optimized_cbracket[bracketlen] = 0;
815          }
816        else if (bracketlen == OP_NCREF)
817          {
818          bracketlen = GET2(cc, 1 + LINK_SIZE + 1);
819          name = (pcre_uchar *)common->name_table;
820          alternative = name;
821          for (i = 0; i < common->name_count; i++)
822            {
823            if (GET2(name, 0) == bracketlen) break;
824            name += common->name_entry_size;
825            }
826          SLJIT_ASSERT(i != common->name_count);
827    
828          for (i = 0; i < common->name_count; i++)
829            {
830            if (STRCMP_UC_UC(alternative + IMM2_SIZE, name + IMM2_SIZE) == 0)
831              common->optimized_cbracket[GET2(alternative, 0)] = 0;
832            alternative += common->name_entry_size;
833            }
834          }
835    
836        if (*cc == OP_COND)
837          {
838          /* Might be a hidden SCOND. */
839          alternative = cc + GET(cc, 1);
840          if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
841            private_data_length += sizeof(sljit_w);
842          }
843        else
844          private_data_length += sizeof(sljit_w);
845        bracketlen = 1 + LINK_SIZE;
846        break;
847    
848        case OP_BRA:
849        bracketlen = 1 + LINK_SIZE;
850        break;
851    
852        case OP_CBRA:
853        case OP_SCBRA:
854        bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
855        break;
856    
857        CASE_ITERATOR_PRIVATE_DATA_1
858        space = 1;
859        size = -2;
860        break;
861    
862        CASE_ITERATOR_PRIVATE_DATA_2A
863        space = 2;
864        size = -2;
865        break;
866    
867        CASE_ITERATOR_PRIVATE_DATA_2B
868        space = 2;
869        size = -(2 + IMM2_SIZE);
870        break;
871    
872        CASE_ITERATOR_TYPE_PRIVATE_DATA_1
873        space = 1;
874        size = 1;
875        break;
876    
877        CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
878        if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
879          space = 2;
880        size = 1;
881        break;
882    
883        CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
884        if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
885          space = 2;
886        size = 1 + IMM2_SIZE;
887        break;
888    
889        case OP_CLASS:
890        case OP_NCLASS:
891        size += 1 + 32 / sizeof(pcre_uchar);
892        space = get_class_iterator_size(cc + size);
893        break;
894    
895    #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
896        case OP_XCLASS:
897        size = GET(cc, 1);
898        space = get_class_iterator_size(cc + size);
899        break;
900    #endif
901    
902        case OP_RECURSE:
903        /* Set its value only once. */
904        if (common->recursive_head == 0)
905          {
906          common->recursive_head = common->ovector_start;
907          common->ovector_start += sizeof(sljit_w);
908          }
909      cc += 1 + LINK_SIZE;      cc += 1 + LINK_SIZE;
910      break;      break;
911    
912        case OP_MARK:
913        if (common->mark_ptr == 0)
914          {
915          common->mark_ptr = common->ovector_start;
916          common->ovector_start += sizeof(sljit_w);
917          }
918        cc += 1 + 2 + cc[1];
919        break;
920    
921      default:      default:
922      cc = next_opcode(common, cc);      cc = next_opcode(common, cc);
923      if (cc == NULL)      if (cc == NULL)
924        return -1;        return -1;
925      break;      break;
926      }      }
927    
928      if (space > 0 && cc >= end)
929        private_data_length += sizeof(sljit_w) * space;
930    
931      if (size != 0)
932        {
933        if (size < 0)
934          {
935          cc += -size;
936    #ifdef SUPPORT_UTF
937          if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
938    #endif
939          }
940        else
941          cc += size;
942        }
943    
944      if (bracketlen > 0)
945        {
946        if (cc >= end)
947          {
948          end = bracketend(cc);
949          if (end[-1 - LINK_SIZE] == OP_KET)
950            end = NULL;
951          }
952        cc += bracketlen;
953        }
954    }    }
955  return localspace;  return private_data_length;
956  }  }
957    
958  static void set_localptrs(compiler_common *common, int localptr, uschar *ccend)  static void set_private_data_ptrs(compiler_common *common, int private_data_ptr, pcre_uchar *ccend)
959  {  {
960  uschar *cc = common->start;  pcre_uchar *cc = common->start;
961  uschar *alternative;  pcre_uchar *alternative;
962    pcre_uchar *end = NULL;
963    int space, size, bracketlen;
964    
965  while (cc < ccend)  while (cc < ccend)
966    {    {
967      space = 0;
968      size = 0;
969      bracketlen = 0;
970    switch(*cc)    switch(*cc)
971      {      {
972      case OP_ASSERT:      case OP_ASSERT:
# Line 654  while (cc < ccend) Line 974  while (cc < ccend)
974      case OP_ASSERTBACK:      case OP_ASSERTBACK:
975      case OP_ASSERTBACK_NOT:      case OP_ASSERTBACK_NOT:
976      case OP_ONCE:      case OP_ONCE:
977        case OP_ONCE_NC:
978      case OP_BRAPOS:      case OP_BRAPOS:
979      case OP_SBRA:      case OP_SBRA:
980      case OP_SBRAPOS:      case OP_SBRAPOS:
981      case OP_SCOND:      case OP_SCOND:
982      common->localptrs[cc - common->start] = localptr;      common->private_data_ptrs[cc - common->start] = private_data_ptr;
983      localptr += sizeof(sljit_w);      private_data_ptr += sizeof(sljit_w);
984      cc += 1 + LINK_SIZE;      bracketlen = 1 + LINK_SIZE;
985      break;      break;
986    
987      case OP_CBRAPOS:      case OP_CBRAPOS:
988      case OP_SCBRAPOS:      case OP_SCBRAPOS:
989      common->localptrs[cc - common->start] = localptr;      common->private_data_ptrs[cc - common->start] = private_data_ptr;
990      localptr += sizeof(sljit_w);      private_data_ptr += sizeof(sljit_w);
991      cc += 1 + LINK_SIZE + 2;      bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
992      break;      break;
993    
994      case OP_COND:      case OP_COND:
# Line 675  while (cc < ccend) Line 996  while (cc < ccend)
996      alternative = cc + GET(cc, 1);      alternative = cc + GET(cc, 1);
997      if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)      if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
998        {        {
999        common->localptrs[cc - common->start] = localptr;        common->private_data_ptrs[cc - common->start] = private_data_ptr;
1000        localptr += sizeof(sljit_w);        private_data_ptr += sizeof(sljit_w);
1001        }        }
1002      cc += 1 + LINK_SIZE;      bracketlen = 1 + LINK_SIZE;
1003        break;
1004    
1005        case OP_BRA:
1006        bracketlen = 1 + LINK_SIZE;
1007        break;
1008    
1009        case OP_CBRA:
1010        case OP_SCBRA:
1011        bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1012        break;
1013    
1014        CASE_ITERATOR_PRIVATE_DATA_1
1015        space = 1;
1016        size = -2;
1017      break;      break;
1018    
1019        CASE_ITERATOR_PRIVATE_DATA_2A
1020        space = 2;
1021        size = -2;
1022        break;
1023    
1024        CASE_ITERATOR_PRIVATE_DATA_2B
1025        space = 2;
1026        size = -(2 + IMM2_SIZE);
1027        break;
1028    
1029        CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1030        space = 1;
1031        size = 1;
1032        break;
1033    
1034        CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1035        if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
1036          space = 2;
1037        size = 1;
1038        break;
1039    
1040        CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1041        if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
1042          space = 2;
1043        size = 1 + IMM2_SIZE;
1044        break;
1045    
1046        case OP_CLASS:
1047        case OP_NCLASS:
1048        size += 1 + 32 / sizeof(pcre_uchar);
1049        space = get_class_iterator_size(cc + size);
1050        break;
1051    
1052    #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1053        case OP_XCLASS:
1054        size = GET(cc, 1);
1055        space = get_class_iterator_size(cc + size);
1056        break;
1057    #endif
1058    
1059      default:      default:
1060      cc = next_opcode(common, cc);      cc = next_opcode(common, cc);
1061      SLJIT_ASSERT(cc != NULL);      SLJIT_ASSERT(cc != NULL);
1062      break;      break;
1063      }      }
1064    
1065      if (space > 0 && cc >= end)
1066        {
1067        common->private_data_ptrs[cc - common->start] = private_data_ptr;
1068        private_data_ptr += sizeof(sljit_w) * space;
1069        }
1070    
1071      if (size != 0)
1072        {
1073        if (size < 0)
1074          {
1075          cc += -size;
1076    #ifdef SUPPORT_UTF
1077          if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1078    #endif
1079          }
1080        else
1081          cc += size;
1082        }
1083    
1084      if (bracketlen > 0)
1085        {
1086        if (cc >= end)
1087          {
1088          end = bracketend(cc);
1089          if (end[-1 - LINK_SIZE] == OP_KET)
1090            end = NULL;
1091          }
1092        cc += bracketlen;
1093        }
1094    }    }
1095  }  }
1096    
1097  /* Returns with -1 if no need for frame. */  /* Returns with -1 if no need for frame. */
1098  static int get_framesize(compiler_common *common, uschar *cc, BOOL recursive)  static int get_framesize(compiler_common *common, pcre_uchar *cc, BOOL recursive)
1099  {  {
1100  uschar *ccend = bracketend(cc);  pcre_uchar *ccend = bracketend(cc);
 uschar *end;  
1101  int length = 0;  int length = 0;
1102  BOOL possessive = FALSE;  BOOL possessive = FALSE;
1103  BOOL needs_frame = FALSE;  BOOL setsom_found = recursive;
1104  BOOL setsom_found = FALSE;  BOOL setmark_found = recursive;
1105    
1106  if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))  if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))
1107    {    {
# Line 711  while (cc < ccend) Line 1115  while (cc < ccend)
1115    switch(*cc)    switch(*cc)
1116      {      {
1117      case OP_SET_SOM:      case OP_SET_SOM:
1118      case OP_RECURSE:      SLJIT_ASSERT(common->has_set_som);
1119      if (!setsom_found)      if (!setsom_found)
1120        {        {
1121        length += 2;        length += 2;
1122        setsom_found = TRUE;        setsom_found = TRUE;
1123        }        }
1124      cc += (*cc == OP_SET_SOM) ? 1 : 1 + LINK_SIZE;      cc += 1;
1125      break;      break;
1126    
1127      case OP_ASSERT:      case OP_MARK:
1128      case OP_ASSERT_NOT:      SLJIT_ASSERT(common->mark_ptr != 0);
1129      case OP_ASSERTBACK:      if (!setmark_found)
     case OP_ASSERTBACK_NOT:  
     case OP_ONCE:  
     if (needs_frame || length > 0)  
1130        {        {
1131        cc = bracketend(cc);        length += 2;
1132        break;        setmark_found = TRUE;
1133        }        }
1134      /* Check whether a frame must be created. */      cc += 1 + 2 + cc[1];
1135      end = bracketend(cc);      break;
1136      while (cc < end)  
1137        {      case OP_RECURSE:
1138        if (*cc == OP_SET_SOM || *cc == OP_CBRA || *cc == OP_CBRAPOS      if (common->has_set_som && !setsom_found)
1139            || *cc == OP_SCBRA || *cc == OP_SCBRAPOS || *cc == OP_RECURSE)        {
1140          needs_frame = TRUE;        length += 2;
1141        cc = next_opcode(common, cc);        setsom_found = TRUE;
       SLJIT_ASSERT(cc != NULL);  
1142        }        }
1143        if (common->mark_ptr != 0 && !setmark_found)
1144          {
1145          length += 2;
1146          setmark_found = TRUE;
1147          }
1148        cc += 1 + LINK_SIZE;
1149      break;      break;
1150    
1151      case OP_CBRA:      case OP_CBRA:
# Line 747  while (cc < ccend) Line 1153  while (cc < ccend)
1153      case OP_SCBRA:      case OP_SCBRA:
1154      case OP_SCBRAPOS:      case OP_SCBRAPOS:
1155      length += 3;      length += 3;
1156      cc += 1 + LINK_SIZE + 2;      cc += 1 + LINK_SIZE + IMM2_SIZE;
1157      break;      break;
1158    
1159      default:      default:
# Line 757  while (cc < ccend) Line 1163  while (cc < ccend)
1163      }      }
1164    
1165  /* Possessive quantifiers can use a special case. */  /* Possessive quantifiers can use a special case. */
1166  if (SLJIT_UNLIKELY(possessive) && !needs_frame && length == 3 + 2)  if (SLJIT_UNLIKELY(possessive) && length == 3)
1167    return -1;    return -1;
1168    
1169  if (length > 0)  if (length > 0)
1170    return length + 2;    return length + 1;
1171  return needs_frame ? 0 : -1;  return -1;
1172  }  }
1173    
1174  static void init_frame(compiler_common *common, uschar *cc, int stackpos, int stacktop, BOOL recursive)  static void init_frame(compiler_common *common, pcre_uchar *cc, int stackpos, int stacktop, BOOL recursive)
1175  {  {
 /* TMP2 must contain STACK_TOP - (-STACK(stackpos)) */  
1176  DEFINE_COMPILER;  DEFINE_COMPILER;
1177  uschar *ccend = bracketend(cc);  pcre_uchar *ccend = bracketend(cc);
1178  BOOL setsom_found = FALSE;  BOOL setsom_found = recursive;
1179    BOOL setmark_found = recursive;
1180  int offset;  int offset;
1181    
1182  if (stackpos < stacktop)  /* >= 1 + shortest item size (2) */
1183    {  SLJIT_UNUSED_ARG(stacktop);
1184    SLJIT_ASSERT(stackpos + 1 == stacktop);  SLJIT_ASSERT(stackpos >= stacktop + 2);
   return;  
   }  
1185    
1186  stackpos = STACK(stackpos);  stackpos = STACK(stackpos);
 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS_HEAD);  
 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS_HEAD, TMP2, 0);  
 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacktop), TMP1, 0);  
   
1187  if (recursive || (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS))  if (recursive || (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS))
1188    cc = next_opcode(common, cc);    cc = next_opcode(common, cc);
1189  SLJIT_ASSERT(cc != NULL);  SLJIT_ASSERT(cc != NULL);
# Line 791  while (cc < ccend) Line 1191  while (cc < ccend)
1191    switch(*cc)    switch(*cc)
1192      {      {
1193      case OP_SET_SOM:      case OP_SET_SOM:
1194      case OP_RECURSE:      SLJIT_ASSERT(common->has_set_som);
1195      if (!setsom_found)      if (!setsom_found)
1196        {        {
1197        OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));        OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
# Line 801  while (cc < ccend) Line 1201  while (cc < ccend)
1201        stackpos += (int)sizeof(sljit_w);        stackpos += (int)sizeof(sljit_w);
1202        setsom_found = TRUE;        setsom_found = TRUE;
1203        }        }
1204      cc += (*cc == OP_SET_SOM) ? 1 : 1 + LINK_SIZE;      cc += 1;
1205      break;      break;
1206    
1207      case OP_ASSERT:      case OP_MARK:
1208      case OP_ASSERT_NOT:      SLJIT_ASSERT(common->mark_ptr != 0);
1209      case OP_ASSERTBACK:      if (!setmark_found)
1210      case OP_ASSERTBACK_NOT:        {
1211      case OP_ONCE:        OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
1212      cc = bracketend(cc);        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_setmark);
1213          stackpos += (int)sizeof(sljit_w);
1214          OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1215          stackpos += (int)sizeof(sljit_w);
1216          setmark_found = TRUE;
1217          }
1218        cc += 1 + 2 + cc[1];
1219        break;
1220    
1221        case OP_RECURSE:
1222        if (common->has_set_som && !setsom_found)
1223          {
1224          OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
1225          OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_setstrbegin);
1226          stackpos += (int)sizeof(sljit_w);
1227          OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1228          stackpos += (int)sizeof(sljit_w);
1229          setsom_found = TRUE;
1230          }
1231        if (common->mark_ptr != 0 && !setmark_found)
1232          {
1233          OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
1234          OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_setmark);
1235          stackpos += (int)sizeof(sljit_w);
1236          OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1237          stackpos += (int)sizeof(sljit_w);
1238          setmark_found = TRUE;
1239          }
1240        cc += 1 + LINK_SIZE;
1241      break;      break;
1242    
1243      case OP_CBRA:      case OP_CBRA:
# Line 826  while (cc < ccend) Line 1254  while (cc < ccend)
1254      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
1255      stackpos += (int)sizeof(sljit_w);      stackpos += (int)sizeof(sljit_w);
1256    
1257      cc += 1 + LINK_SIZE + 2;      cc += 1 + LINK_SIZE + IMM2_SIZE;
1258      break;      break;
1259    
1260      default:      default:
# Line 836  while (cc < ccend) Line 1264  while (cc < ccend)
1264      }      }
1265    
1266  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_end);  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_end);
1267  SLJIT_ASSERT(stackpos == STACK(stacktop + 1));  SLJIT_ASSERT(stackpos == STACK(stacktop));
1268  }  }
1269    
1270  static SLJIT_INLINE int get_localsize(compiler_common *common, uschar *cc, uschar *ccend)  static SLJIT_INLINE int get_private_data_length_for_copy(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
1271  {  {
1272  int localsize = 2;  int private_data_length = 2;
1273  uschar *alternative;  int size;
1274  /* Calculate the sum of the local variables. */  pcre_uchar *alternative;
1275    /* Calculate the sum of the private machine words. */
1276  while (cc < ccend)  while (cc < ccend)
1277    {    {
1278      size = 0;
1279    switch(*cc)    switch(*cc)
1280      {      {
1281      case OP_ASSERT:      case OP_ASSERT:
# Line 853  while (cc < ccend) Line 1283  while (cc < ccend)
1283      case OP_ASSERTBACK:      case OP_ASSERTBACK:
1284      case OP_ASSERTBACK_NOT:      case OP_ASSERTBACK_NOT:
1285      case OP_ONCE:      case OP_ONCE:
1286        case OP_ONCE_NC:
1287      case OP_BRAPOS:      case OP_BRAPOS:
1288      case OP_SBRA:      case OP_SBRA:
1289      case OP_SBRAPOS:      case OP_SBRAPOS:
1290      case OP_SCOND:      case OP_SCOND:
1291      localsize++;      private_data_length++;
1292      cc += 1 + LINK_SIZE;      cc += 1 + LINK_SIZE;
1293      break;      break;
1294    
1295      case OP_CBRA:      case OP_CBRA:
1296      case OP_SCBRA:      case OP_SCBRA:
1297      localsize++;      if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1298      cc += 1 + LINK_SIZE + 2;        private_data_length++;
1299        cc += 1 + LINK_SIZE + IMM2_SIZE;
1300      break;      break;
1301    
1302      case OP_CBRAPOS:      case OP_CBRAPOS:
1303      case OP_SCBRAPOS:      case OP_SCBRAPOS:
1304      localsize += 2;      private_data_length += 2;
1305      cc += 1 + LINK_SIZE + 2;      cc += 1 + LINK_SIZE + IMM2_SIZE;
1306      break;      break;
1307    
1308      case OP_COND:      case OP_COND:
1309      /* Might be a hidden SCOND. */      /* Might be a hidden SCOND. */
1310      alternative = cc + GET(cc, 1);      alternative = cc + GET(cc, 1);
1311      if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)      if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1312        localsize++;        private_data_length++;
1313      cc += 1 + LINK_SIZE;      cc += 1 + LINK_SIZE;
1314      break;      break;
1315    
1316        CASE_ITERATOR_PRIVATE_DATA_1
1317        if (PRIVATE_DATA(cc))
1318          private_data_length++;
1319        cc += 2;
1320    #ifdef SUPPORT_UTF
1321        if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1322    #endif
1323        break;
1324    
1325        CASE_ITERATOR_PRIVATE_DATA_2A
1326        if (PRIVATE_DATA(cc))
1327          private_data_length += 2;
1328        cc += 2;
1329    #ifdef SUPPORT_UTF
1330        if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1331    #endif
1332        break;
1333    
1334        CASE_ITERATOR_PRIVATE_DATA_2B
1335        if (PRIVATE_DATA(cc))
1336          private_data_length += 2;
1337        cc += 2 + IMM2_SIZE;
1338    #ifdef SUPPORT_UTF
1339        if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1340    #endif
1341        break;
1342    
1343        CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1344        if (PRIVATE_DATA(cc))
1345          private_data_length++;
1346        cc += 1;
1347        break;
1348    
1349        CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1350        if (PRIVATE_DATA(cc))
1351          private_data_length += 2;
1352        cc += 1;
1353        break;
1354    
1355        CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1356        if (PRIVATE_DATA(cc))
1357          private_data_length += 2;
1358        cc += 1 + IMM2_SIZE;
1359        break;
1360    
1361        case OP_CLASS:
1362        case OP_NCLASS:
1363    #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1364        case OP_XCLASS:
1365        size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1366    #else
1367        size = 1 + 32 / (int)sizeof(pcre_uchar);
1368    #endif
1369        if (PRIVATE_DATA(cc))
1370          private_data_length += get_class_iterator_size(cc + size);
1371        cc += size;
1372        break;
1373    
1374      default:      default:
1375      cc = next_opcode(common, cc);      cc = next_opcode(common, cc);
1376      SLJIT_ASSERT(cc != NULL);      SLJIT_ASSERT(cc != NULL);
# Line 888  while (cc < ccend) Line 1378  while (cc < ccend)
1378      }      }
1379    }    }
1380  SLJIT_ASSERT(cc == ccend);  SLJIT_ASSERT(cc == ccend);
1381  return localsize;  return private_data_length;
1382  }  }
1383    
1384  static void copy_locals(compiler_common *common, uschar *cc, uschar *ccend,  static void copy_private_data(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend,
1385    BOOL save, int stackptr, int stacktop)    BOOL save, int stackptr, int stacktop)
1386  {  {
1387  DEFINE_COMPILER;  DEFINE_COMPILER;
1388  int srcw[2];  int srcw[2];
1389  int count;  int count, size;
1390  BOOL tmp1next = TRUE;  BOOL tmp1next = TRUE;
1391  BOOL tmp1empty = TRUE;  BOOL tmp1empty = TRUE;
1392  BOOL tmp2empty = TRUE;  BOOL tmp2empty = TRUE;
1393  uschar *alternative;  pcre_uchar *alternative;
1394  enum {  enum {
1395    start,    start,
1396    loop,    loop,
# Line 935  while (status != end) Line 1425  while (status != end)
1425    switch(status)    switch(status)
1426      {      {
1427      case start:      case start:
1428      SLJIT_ASSERT(save);      SLJIT_ASSERT(save && common->recursive_head != 0);
1429      count = 1;      count = 1;
1430      srcw[0] = RECURSIVE_HEAD;      srcw[0] = common->recursive_head;
1431      status = loop;      status = loop;
1432      break;      break;
1433    
# Line 955  while (status != end) Line 1445  while (status != end)
1445        case OP_ASSERTBACK:        case OP_ASSERTBACK:
1446        case OP_ASSERTBACK_NOT:        case OP_ASSERTBACK_NOT:
1447        case OP_ONCE:        case OP_ONCE:
1448          case OP_ONCE_NC:
1449        case OP_BRAPOS:        case OP_BRAPOS:
1450        case OP_SBRA:        case OP_SBRA:
1451        case OP_SBRAPOS:        case OP_SBRAPOS:
1452        case OP_SCOND:        case OP_SCOND:
1453        count = 1;        count = 1;
1454        srcw[0] = PRIV(cc);        srcw[0] = PRIVATE_DATA(cc);
1455        SLJIT_ASSERT(srcw[0] != 0);        SLJIT_ASSERT(srcw[0] != 0);
1456        cc += 1 + LINK_SIZE;        cc += 1 + LINK_SIZE;
1457        break;        break;
1458    
1459        case OP_CBRA:        case OP_CBRA:
1460        case OP_SCBRA:        case OP_SCBRA:
1461        count = 1;        if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1462        srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));          {
1463        cc += 1 + LINK_SIZE + 2;          count = 1;
1464            srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1465            }
1466          cc += 1 + LINK_SIZE + IMM2_SIZE;
1467        break;        break;
1468    
1469        case OP_CBRAPOS:        case OP_CBRAPOS:
1470        case OP_SCBRAPOS:        case OP_SCBRAPOS:
1471        count = 2;        count = 2;
1472          srcw[0] = PRIVATE_DATA(cc);
1473        srcw[1] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));        srcw[1] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1474        srcw[0] = PRIV(cc);        SLJIT_ASSERT(srcw[0] != 0 && srcw[1] != 0);
1475        SLJIT_ASSERT(srcw[0] != 0);        cc += 1 + LINK_SIZE + IMM2_SIZE;
       cc += 1 + LINK_SIZE + 2;  
1476        break;        break;
1477    
1478        case OP_COND:        case OP_COND:
# Line 987  while (status != end) Line 1481  while (status != end)
1481        if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)        if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1482          {          {
1483          count = 1;          count = 1;
1484          srcw[0] = PRIV(cc);          srcw[0] = PRIVATE_DATA(cc);
1485          SLJIT_ASSERT(srcw[0] != 0);          SLJIT_ASSERT(srcw[0] != 0);
1486          }          }
1487        cc += 1 + LINK_SIZE;        cc += 1 + LINK_SIZE;
1488        break;        break;
1489    
1490          CASE_ITERATOR_PRIVATE_DATA_1
1491          if (PRIVATE_DATA(cc))
1492            {
1493            count = 1;
1494            srcw[0] = PRIVATE_DATA(cc);
1495            }
1496          cc += 2;
1497    #ifdef SUPPORT_UTF
1498          if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1499    #endif
1500          break;
1501    
1502          CASE_ITERATOR_PRIVATE_DATA_2A
1503          if (PRIVATE_DATA(cc))
1504            {
1505            count = 2;
1506            srcw[0] = PRIVATE_DATA(cc);
1507            srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_w);
1508            }
1509          cc += 2;
1510    #ifdef SUPPORT_UTF
1511          if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1512    #endif
1513          break;
1514    
1515          CASE_ITERATOR_PRIVATE_DATA_2B
1516          if (PRIVATE_DATA(cc))
1517            {
1518            count = 2;
1519            srcw[0] = PRIVATE_DATA(cc);
1520            srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_w);
1521            }
1522          cc += 2 + IMM2_SIZE;
1523    #ifdef SUPPORT_UTF
1524          if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1525    #endif
1526          break;
1527    
1528          CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1529          if (PRIVATE_DATA(cc))
1530            {
1531            count = 1;
1532            srcw[0] = PRIVATE_DATA(cc);
1533            }
1534          cc += 1;
1535          break;
1536    
1537          CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1538          if (PRIVATE_DATA(cc))
1539            {
1540            count = 2;
1541            srcw[0] = PRIVATE_DATA(cc);
1542            srcw[1] = srcw[0] + sizeof(sljit_w);
1543            }
1544          cc += 1;
1545          break;
1546    
1547          CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1548          if (PRIVATE_DATA(cc))
1549            {
1550            count = 2;
1551            srcw[0] = PRIVATE_DATA(cc);
1552            srcw[1] = srcw[0] + sizeof(sljit_w);
1553            }
1554          cc += 1 + IMM2_SIZE;
1555          break;
1556    
1557          case OP_CLASS:
1558          case OP_NCLASS:
1559    #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1560          case OP_XCLASS:
1561          size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1562    #else
1563          size = 1 + 32 / (int)sizeof(pcre_uchar);
1564    #endif
1565          if (PRIVATE_DATA(cc))
1566            switch(get_class_iterator_size(cc + size))
1567              {
1568              case 1:
1569              count = 1;
1570              srcw[0] = PRIVATE_DATA(cc);
1571              break;
1572    
1573              case 2:
1574              count = 2;
1575              srcw[0] = PRIVATE_DATA(cc);
1576              srcw[1] = srcw[0] + sizeof(sljit_w);
1577              break;
1578    
1579              default:
1580              SLJIT_ASSERT_STOP();
1581              break;
1582              }
1583          cc += size;
1584          break;
1585    
1586        default:        default:
1587        cc = next_opcode(common, cc);        cc = next_opcode(common, cc);
1588        SLJIT_ASSERT(cc != NULL);        SLJIT_ASSERT(cc != NULL);
# Line 1095  if (save) Line 1685  if (save)
1685  SLJIT_ASSERT(cc == ccend && stackptr == stacktop && (save || (tmp1empty && tmp2empty)));  SLJIT_ASSERT(cc == ccend && stackptr == stacktop && (save || (tmp1empty && tmp2empty)));
1686  }  }
1687    
1688    #undef CASE_ITERATOR_PRIVATE_DATA_1
1689    #undef CASE_ITERATOR_PRIVATE_DATA_2A
1690    #undef CASE_ITERATOR_PRIVATE_DATA_2B
1691    #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1692    #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1693    #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1694    
1695  static SLJIT_INLINE BOOL ispowerof2(unsigned int value)  static SLJIT_INLINE BOOL ispowerof2(unsigned int value)
1696  {  {
1697  return (value & (value - 1)) == 0;  return (value & (value - 1)) == 0;
# Line 1105  static SLJIT_INLINE void set_jumps(jump_ Line 1702  static SLJIT_INLINE void set_jumps(jump_
1702  while (list)  while (list)
1703    {    {
1704    /* sljit_set_label is clever enough to do nothing    /* sljit_set_label is clever enough to do nothing
1705    if either the jump or the label is NULL */    if either the jump or the label is NULL. */
1706    sljit_set_label(list->jump, label);    sljit_set_label(list->jump, label);
1707    list = list->next;    list = list->next;
1708    }    }
# Line 1132  if (list_item) Line 1729  if (list_item)
1729    list_item->type = type;    list_item->type = type;
1730    list_item->data = data;    list_item->data = data;
1731    list_item->start = start;    list_item->start = start;
1732    list_item->leave = LABEL();    list_item->quit = LABEL();
1733    list_item->next = common->stubs;    list_item->next = common->stubs;
1734    common->stubs = list_item;    common->stubs = list_item;
1735    }    }
# Line 1152  while (list_item) Line 1749  while (list_item)
1749      add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));      add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));
1750      break;      break;
1751      }      }
1752    JUMPTO(SLJIT_JUMP, list_item->leave);    JUMPTO(SLJIT_JUMP, list_item->quit);
1753    list_item = list_item->next;    list_item = list_item->next;
1754    }    }
1755  common->stubs = NULL;  common->stubs = NULL;
# Line 1195  struct sljit_label *loop; Line 1792  struct sljit_label *loop;
1792  int i;  int i;
1793  /* At this point we can freely use all temporary registers. */  /* At this point we can freely use all temporary registers. */
1794  /* TMP1 returns with begin - 1. */  /* TMP1 returns with begin - 1. */
1795  OP2(SLJIT_SUB, SLJIT_TEMPORARY_REG1, 0, SLJIT_MEM1(SLJIT_GENERAL_REG1), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, 1);  OP2(SLJIT_SUB, SLJIT_TEMPORARY_REG1, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));
1796  if (length < 8)  if (length < 8)
1797    {    {
1798    for (i = 0; i < length; i++)    for (i = 0; i < length; i++)
# Line 1203  if (length < 8) Line 1800  if (length < 8)
1800    }    }
1801  else  else
1802    {    {
1803    OP2(SLJIT_ADD, SLJIT_TEMPORARY_REG2, 0, SLJIT_LOCALS_REG, 0, SLJIT_IMM, OVECTOR_START - sizeof(sljit_w));    GET_LOCAL_BASE(SLJIT_TEMPORARY_REG2, 0, OVECTOR_START - sizeof(sljit_w));
1804    OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, length);    OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, length);
1805    loop = LABEL();    loop = LABEL();
1806    OP1(SLJIT_MOVU, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), sizeof(sljit_w), SLJIT_TEMPORARY_REG1, 0);    OP1(SLJIT_MOVU, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), sizeof(sljit_w), SLJIT_TEMPORARY_REG1, 0);
# Line 1219  struct sljit_label *loop; Line 1816  struct sljit_label *loop;
1816  struct sljit_jump *earlyexit;  struct sljit_jump *earlyexit;
1817    
1818  /* At this point we can freely use all registers. */  /* At this point we can freely use all registers. */
1819  OP1(SLJIT_MOV, SLJIT_GENERAL_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));  OP1(SLJIT_MOV, SLJIT_SAVED_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
1820  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1), STR_PTR, 0);  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1), STR_PTR, 0);
1821    
1822  OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG1, 0, ARGUMENTS, 0);  OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG1, 0, ARGUMENTS, 0);
1823    if (common->mark_ptr != 0)
1824      OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
1825  OP1(SLJIT_MOV_SI, SLJIT_TEMPORARY_REG2, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), SLJIT_OFFSETOF(jit_arguments, offsetcount));  OP1(SLJIT_MOV_SI, SLJIT_TEMPORARY_REG2, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), SLJIT_OFFSETOF(jit_arguments, offsetcount));
1826    if (common->mark_ptr != 0)
1827      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_TEMPORARY_REG3, 0);
1828  OP2(SLJIT_SUB, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), SLJIT_OFFSETOF(jit_arguments, offsets), SLJIT_IMM, sizeof(int));  OP2(SLJIT_SUB, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), SLJIT_OFFSETOF(jit_arguments, offsets), SLJIT_IMM, sizeof(int));
1829  OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG1, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), SLJIT_OFFSETOF(jit_arguments, begin));  OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG1, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), SLJIT_OFFSETOF(jit_arguments, begin));
1830  OP2(SLJIT_ADD, SLJIT_GENERAL_REG1, 0, SLJIT_LOCALS_REG, 0, SLJIT_IMM, OVECTOR_START);  GET_LOCAL_BASE(SLJIT_SAVED_REG1, 0, OVECTOR_START);
1831  /* Unlikely, but possible */  /* Unlikely, but possible */
1832  earlyexit = CMP(SLJIT_C_EQUAL, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, 0);  earlyexit = CMP(SLJIT_C_EQUAL, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, 0);
1833  loop = LABEL();  loop = LABEL();
1834  OP2(SLJIT_SUB, SLJIT_GENERAL_REG2, 0, SLJIT_MEM1(SLJIT_GENERAL_REG1), 0, SLJIT_TEMPORARY_REG1, 0);  OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), 0, SLJIT_TEMPORARY_REG1, 0);
1835  OP2(SLJIT_ADD, SLJIT_GENERAL_REG1, 0, SLJIT_GENERAL_REG1, 0, SLJIT_IMM, sizeof(sljit_w));  OP2(SLJIT_ADD, SLJIT_SAVED_REG1, 0, SLJIT_SAVED_REG1, 0, SLJIT_IMM, sizeof(sljit_w));
1836  /* Copy the integer value to the output buffer */  /* Copy the integer value to the output buffer */
1837  OP1(SLJIT_MOVU_SI, SLJIT_MEM1(SLJIT_TEMPORARY_REG3), sizeof(int), SLJIT_GENERAL_REG2, 0);  #ifdef COMPILE_PCRE16
1838    OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, 1);
1839    #endif
1840    OP1(SLJIT_MOVU_SI, SLJIT_MEM1(SLJIT_TEMPORARY_REG3), sizeof(int), SLJIT_SAVED_REG2, 0);
1841  OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_TEMPORARY_REG2, 0, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, 1);  OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_TEMPORARY_REG2, 0, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, 1);
1842  JUMPTO(SLJIT_C_NOT_ZERO, loop);  JUMPTO(SLJIT_C_NOT_ZERO, loop);
1843  JUMPHERE(earlyexit);  JUMPHERE(earlyexit);
# Line 1241  JUMPHERE(earlyexit); Line 1845  JUMPHERE(earlyexit);
1845  /* Calculate the return value, which is the maximum ovector value. */  /* Calculate the return value, which is the maximum ovector value. */
1846  if (topbracket > 1)  if (topbracket > 1)
1847    {    {
1848    OP2(SLJIT_ADD, SLJIT_TEMPORARY_REG1, 0, SLJIT_LOCALS_REG, 0, SLJIT_IMM, OVECTOR_START + topbracket * 2 * sizeof(sljit_w));    GET_LOCAL_BASE(SLJIT_TEMPORARY_REG1, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_w));
1849    OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, topbracket + 1);    OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, topbracket + 1);
1850    
1851    /* OVECTOR(0) is never equal to SLJIT_GENERAL_REG3. */    /* OVECTOR(0) is never equal to SLJIT_SAVED_REG3. */
1852    loop = LABEL();    loop = LABEL();
1853    OP1(SLJIT_MOVU, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), -(2 * sizeof(sljit_w)));    OP1(SLJIT_MOVU, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), -(2 * (sljit_w)sizeof(sljit_w)));
1854    OP2(SLJIT_SUB, SLJIT_TEMPORARY_REG2, 0, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, 1);    OP2(SLJIT_SUB, SLJIT_TEMPORARY_REG2, 0, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, 1);
1855    CMPTO(SLJIT_C_EQUAL, SLJIT_TEMPORARY_REG3, 0, SLJIT_GENERAL_REG3, 0, loop);    CMPTO(SLJIT_C_EQUAL, SLJIT_TEMPORARY_REG3, 0, SLJIT_SAVED_REG3, 0, loop);
1856    OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_TEMPORARY_REG2, 0);    OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_TEMPORARY_REG2, 0);
1857    }    }
1858  else  else
1859    OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);    OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
1860  }  }
1861    
1862  static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, uschar* cc)  static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *quit)
1863    {
1864    DEFINE_COMPILER;
1865    
1866    SLJIT_COMPILE_ASSERT(STR_END == SLJIT_SAVED_REG2, str_end_must_be_saved_reg2);
1867    SLJIT_ASSERT(common->start_used_ptr != 0 && (common->mode == JIT_PARTIAL_SOFT_COMPILE ? common->hit_start != 0 : common->hit_start == 0));
1868    
1869    OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG2, 0, ARGUMENTS, 0);
1870    OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_PARTIAL);
1871    OP1(SLJIT_MOV_SI, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), SLJIT_OFFSETOF(jit_arguments, offsetcount));
1872    CMPTO(SLJIT_C_LESS, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, 2, quit);
1873    
1874    /* Store match begin and end. */
1875    OP1(SLJIT_MOV, SLJIT_SAVED_REG1, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), SLJIT_OFFSETOF(jit_arguments, begin));
1876    OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG2, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), SLJIT_OFFSETOF(jit_arguments, offsets));
1877    OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_used_ptr : common->hit_start);
1878    OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, STR_END, 0, SLJIT_SAVED_REG1, 0);
1879    #ifdef COMPILE_PCRE16
1880    OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, 1);
1881    #endif
1882    OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), sizeof(int), SLJIT_SAVED_REG2, 0);
1883    
1884    OP2(SLJIT_SUB, SLJIT_TEMPORARY_REG3, 0, SLJIT_TEMPORARY_REG3, 0, SLJIT_SAVED_REG1, 0);
1885    #ifdef COMPILE_PCRE16
1886    OP2(SLJIT_ASHR, SLJIT_TEMPORARY_REG3, 0, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, 1);
1887    #endif
1888    OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), 0, SLJIT_TEMPORARY_REG3, 0);
1889    
1890    JUMPTO(SLJIT_JUMP, quit);
1891    }
1892    
1893    static SLJIT_INLINE void check_start_used_ptr(compiler_common *common)
1894    {
1895    /* May destroy TMP1. */
1896    DEFINE_COMPILER;
1897    struct sljit_jump *jump;
1898    
1899    if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
1900      {
1901      /* The value of -1 must be kept for start_used_ptr! */
1902      OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, 1);
1903      /* Jumps if start_used_ptr < STR_PTR, or start_used_ptr == -1. Although overwriting
1904      is not necessary if start_used_ptr == STR_PTR, it does not hurt as well. */
1905      jump = CMP(SLJIT_C_LESS_EQUAL, TMP1, 0, STR_PTR, 0);
1906      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
1907      JUMPHERE(jump);
1908      }
1909    else if (common->mode == JIT_PARTIAL_HARD_COMPILE)
1910      {
1911      jump = CMP(SLJIT_C_LESS_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
1912      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
1913      JUMPHERE(jump);
1914      }
1915    }
1916    
1917    static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, pcre_uchar* cc)
1918  {  {
1919  /* Detects if the character has an othercase. */  /* Detects if the character has an othercase. */
1920  unsigned int c;  unsigned int c;
1921    
1922  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
1923  if (common->utf8)  if (common->utf)
1924    {    {
1925    GETCHAR(c, cc);    GETCHAR(c, cc);
1926    if (c > 127)    if (c > 127)
# Line 1272  if (common->utf8) Line 1931  if (common->utf8)
1931      return FALSE;      return FALSE;
1932  #endif  #endif
1933      }      }
1934    #ifndef COMPILE_PCRE8
1935      return common->fcc[c] != c;
1936    #endif
1937    }    }
1938  else  else
1939  #endif  #endif
1940    c = *cc;    c = *cc;
1941  return common->fcc[c] != c;  return MAX_255(c) ? common->fcc[c] != c : FALSE;
1942  }  }
1943    
1944  static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)  static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)
1945  {  {
1946  /* Returns with the othercase. */  /* Returns with the othercase. */
1947  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
1948  if (common->utf8 && c > 127)  if (common->utf && c > 127)
1949    {    {
1950  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
1951    return UCD_OTHERCASE(c);    return UCD_OTHERCASE(c);
# Line 1292  if (common->utf8 && c > 127) Line 1954  if (common->utf8 && c > 127)
1954  #endif  #endif
1955    }    }
1956  #endif  #endif
1957  return common->fcc[c];  return TABLE_GET(c, common->fcc, c);
1958  }  }
1959    
1960  static unsigned int char_get_othercase_bit(compiler_common *common, uschar* cc)  static unsigned int char_get_othercase_bit(compiler_common *common, pcre_uchar* cc)
1961  {  {
1962  /* Detects if the character and its othercase has only 1 bit difference. */  /* Detects if the character and its othercase has only 1 bit difference. */
1963  unsigned int c, oc, bit;  unsigned int c, oc, bit;
1964  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF && defined COMPILE_PCRE8
1965  int n;  int n;
1966  #endif  #endif
1967    
1968  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
1969  if (common->utf8)  if (common->utf)
1970    {    {
1971    GETCHAR(c, cc);    GETCHAR(c, cc);
1972    if (c <= 127)    if (c <= 127)
# Line 1321  if (common->utf8) Line 1983  if (common->utf8)
1983  else  else
1984    {    {
1985    c = *cc;    c = *cc;
1986    oc = common->fcc[c];    oc = TABLE_GET(c, common->fcc, c);
1987    }    }
1988  #else  #else
1989  c = *cc;  c = *cc;
1990  oc = common->fcc[c];  oc = TABLE_GET(c, common->fcc, c);
1991  #endif  #endif
1992    
1993  SLJIT_ASSERT(c != oc);  SLJIT_ASSERT(c != oc);
# Line 1339  if (c <= 127 && bit == 0x20) Line 2001  if (c <= 127 && bit == 0x20)
2001  if (!ispowerof2(bit))  if (!ispowerof2(bit))
2002    return 0;    return 0;
2003    
2004  #ifdef SUPPORT_UTF8  #ifdef COMPILE_PCRE8
2005  if (common->utf8 && c > 127)  
2006    #ifdef SUPPORT_UTF
2007    if (common->utf && c > 127)
2008    {    {
2009    n = _pcre_utf8_table4[*cc & 0x3f];    n = GET_EXTRALEN(*cc);
2010    while ((bit & 0x3f) == 0)    while ((bit & 0x3f) == 0)
2011      {      {
2012      n--;      n--;
# Line 1350  if (common->utf8 && c > 127) Line 2014  if (common->utf8 && c > 127)
2014      }      }
2015    return (n << 8) | bit;    return (n << 8) | bit;
2016    }    }
2017  #endif  #endif /* SUPPORT_UTF */
2018  return (0 << 8) | bit;  return (0 << 8) | bit;
2019    
2020    #else /* COMPILE_PCRE8 */
2021    
2022    #ifdef COMPILE_PCRE16
2023    #ifdef SUPPORT_UTF
2024    if (common->utf && c > 65535)
2025      {
2026      if (bit >= (1 << 10))
2027        bit >>= 10;
2028      else
2029        return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8));
2030      }
2031    #endif /* SUPPORT_UTF */
2032    return (bit < 256) ? ((0 << 8) | bit) : ((1 << 8) | (bit >> 8));
2033    #endif /* COMPILE_PCRE16 */
2034    
2035    #endif /* COMPILE_PCRE8 */
2036    }
2037    
2038    static void check_partial(compiler_common *common, BOOL force)
2039    {
2040    /* Checks whether a partial matching is occured. Does not modify registers. */
2041    DEFINE_COMPILER;
2042    struct sljit_jump *jump = NULL;
2043    
2044    SLJIT_ASSERT(!force || common->mode != JIT_COMPILE);
2045    
2046    if (common->mode == JIT_COMPILE)
2047      return;
2048    
2049    if (!force)
2050      jump = CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2051    else if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2052      jump = CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, -1);
2053    
2054    if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2055      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, -1);
2056    else
2057      {
2058      if (common->partialmatchlabel != NULL)
2059        JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2060      else
2061        add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2062      }
2063    
2064    if (jump != NULL)
2065      JUMPHERE(jump);
2066  }  }
2067    
2068  static SLJIT_INLINE void check_input_end(compiler_common *common, jump_list **fallbacks)  static struct sljit_jump *check_str_end(compiler_common *common)
2069  {  {
2070    /* Does not affect registers. Usually used in a tight spot. */
2071  DEFINE_COMPILER;  DEFINE_COMPILER;
2072  add_jump(compiler, fallbacks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));  struct sljit_jump *jump;
2073    struct sljit_jump *nohit;
2074    struct sljit_jump *return_value;
2075    
2076    if (common->mode == JIT_COMPILE)
2077      return CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2078    
2079    jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
2080    if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2081      {
2082      nohit = CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2083      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, -1);
2084      JUMPHERE(nohit);
2085      return_value = JUMP(SLJIT_JUMP);
2086      }
2087    else
2088      {
2089      return_value = CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2090      if (common->partialmatchlabel != NULL)
2091        JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2092      else
2093        add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2094      }
2095    JUMPHERE(jump);
2096    return return_value;
2097    }
2098    
2099    static void detect_partial_match(compiler_common *common, jump_list **backtracks)
2100    {
2101    DEFINE_COMPILER;
2102    struct sljit_jump *jump;
2103    
2104    if (common->mode == JIT_COMPILE)
2105      {
2106      add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2107      return;
2108      }
2109    
2110    /* Partial matching mode. */
2111    jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
2112    add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0));
2113    if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2114      {
2115      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, -1);
2116      add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
2117      }
2118    else
2119      {
2120      if (common->partialmatchlabel != NULL)
2121        JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2122      else
2123        add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2124      }
2125    JUMPHERE(jump);
2126  }  }
2127    
2128  static void read_char(compiler_common *common)  static void read_char(compiler_common *common)
# Line 1365  static void read_char(compiler_common *c Line 2130  static void read_char(compiler_common *c
2130  /* Reads the character into TMP1, updates STR_PTR.  /* Reads the character into TMP1, updates STR_PTR.
2131  Does not check STR_END. TMP2 Destroyed. */  Does not check STR_END. TMP2 Destroyed. */
2132  DEFINE_COMPILER;  DEFINE_COMPILER;
2133  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2134  struct sljit_jump *jump;  struct sljit_jump *jump;
2135  #endif  #endif
2136    
2137  OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2138  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2139  if (common->utf8)  if (common->utf)
2140    {    {
2141    /* Should not found a value between 128 and 192 here. */  #ifdef COMPILE_PCRE8
2142    jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 192);    jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2143    add_jump(compiler, &common->utf8readchar, JUMP(SLJIT_FAST_CALL));  #else
2144    #ifdef COMPILE_PCRE16
2145      jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
2146    #endif
2147    #endif /* COMPILE_PCRE8 */
2148      add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2149    JUMPHERE(jump);    JUMPHERE(jump);
2150    }    }
2151  #endif  #endif
2152  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2153  }  }
2154    
2155  static void peek_char(compiler_common *common)  static void peek_char(compiler_common *common)
# Line 1387  static void peek_char(compiler_common *c Line 2157  static void peek_char(compiler_common *c
2157  /* Reads the character into TMP1, keeps STR_PTR.  /* Reads the character into TMP1, keeps STR_PTR.
2158  Does not check STR_END. TMP2 Destroyed. */  Does not check STR_END. TMP2 Destroyed. */
2159  DEFINE_COMPILER;  DEFINE_COMPILER;
2160  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2161  struct sljit_jump *jump;  struct sljit_jump *jump;
2162  #endif  #endif
2163    
2164  OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2165  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2166  if (common->utf8)  if (common->utf)
2167    {    {
2168    /* Should not found a value between 128 and 192 here. */  #ifdef COMPILE_PCRE8
2169    jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 192);    jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2170    add_jump(compiler, &common->utf8readchar, JUMP(SLJIT_FAST_CALL));  #else
2171    #ifdef COMPILE_PCRE16
2172      jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
2173    #endif
2174    #endif /* COMPILE_PCRE8 */
2175      add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2176    OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);    OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2177    JUMPHERE(jump);    JUMPHERE(jump);
2178    }    }
# Line 1408  static void read_char8_type(compiler_com Line 2183  static void read_char8_type(compiler_com
2183  {  {
2184  /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */  /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */
2185  DEFINE_COMPILER;  DEFINE_COMPILER;
2186  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF || defined COMPILE_PCRE16
2187  struct sljit_jump *jump;  struct sljit_jump *jump;
2188  #endif  #endif
2189    
2190  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2191  if (common->utf8)  if (common->utf)
2192    {    {
2193    OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2194    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2195    #ifdef COMPILE_PCRE8
2196    /* This can be an extra read in some situations, but hopefully    /* This can be an extra read in some situations, but hopefully
2197    it is a clever early read in most cases. */    it is needed in most cases. */
2198      OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2199      jump = CMP(SLJIT_C_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
2200      add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));
2201      JUMPHERE(jump);
2202    #else
2203    #ifdef COMPILE_PCRE16
2204      OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2205      jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2206    OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);    OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
   /* Should not found a value between 128 and 192 here. */  
   jump = CMP(SLJIT_C_LESS, TMP2, 0, SLJIT_IMM, 192);  
   add_jump(compiler, &common->utf8readtype8, JUMP(SLJIT_FAST_CALL));  
2207    JUMPHERE(jump);    JUMPHERE(jump);
2208      /* Skip low surrogate if necessary. */
2209      OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xfc00);
2210      OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0xd800);
2211      COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
2212      OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
2213      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2214    #endif
2215    #endif /* COMPILE_PCRE8 */
2216    return;    return;
2217    }    }
2218  #endif  #endif
2219  OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);  OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2220  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2221  OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);  #ifdef COMPILE_PCRE16
2222    /* The ctypes array contains only 256 values. */
2223    OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2224    jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2225    #endif
2226    OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2227    #ifdef COMPILE_PCRE16
2228    JUMPHERE(jump);
2229    #endif
2230  }  }
2231    
2232  static void skip_char_back(compiler_common *common)  static void skip_char_back(compiler_common *common)
2233  {  {
2234  /* Goes one character back. Only affects STR_PTR. Does not check begin. */  /* Goes one character back. Affects STR_PTR and TMP1. Does not check begin. */
2235  DEFINE_COMPILER;  DEFINE_COMPILER;
2236  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2237  struct sljit_label *label;  struct sljit_label *label;
2238    
2239  if (common->utf8)  if (common->utf)
2240    {    {
2241    label = LABEL();    label = LABEL();
2242    OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2243    OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);    OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2244    OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);    OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
2245    CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);    CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);
2246    return;    return;
2247    }    }
2248  #endif  #endif
2249  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);  #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2250    if (common->utf)
2251      {
2252      OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2253      OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2254      /* Skip low surrogate if necessary. */
2255      OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2256      OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
2257      COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
2258      OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2259      OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2260      return;
2261      }
2262    #endif
2263    OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2264  }  }
2265    
2266  static void check_newlinechar(compiler_common *common, int nltype, jump_list **fallbacks, BOOL jumpiftrue)  static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpiftrue)
2267  {  {
2268  /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */  /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */
2269  DEFINE_COMPILER;  DEFINE_COMPILER;
# Line 1460  DEFINE_COMPILER; Line 2271  DEFINE_COMPILER;
2271  if (nltype == NLTYPE_ANY)  if (nltype == NLTYPE_ANY)
2272    {    {
2273    add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));    add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
2274    add_jump(compiler, fallbacks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));    add_jump(compiler, backtracks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
2275    }    }
2276  else if (nltype == NLTYPE_ANYCRLF)  else if (nltype == NLTYPE_ANYCRLF)
2277    {    {
# Line 1468  else if (nltype == NLTYPE_ANYCRLF) Line 2279  else if (nltype == NLTYPE_ANYCRLF)
2279    COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);    COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
2280    OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);    OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
2281    COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);    COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
2282    add_jump(compiler, fallbacks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));    add_jump(compiler, backtracks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
2283    }    }
2284  else  else
2285    {    {
2286    SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline <= 255);    SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);
2287    add_jump(compiler, fallbacks, CMP(jumpiftrue ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));    add_jump(compiler, backtracks, CMP(jumpiftrue ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
2288    }    }
2289  }  }
2290    
2291  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2292  static void do_utf8readchar(compiler_common *common)  
2293    #ifdef COMPILE_PCRE8
2294    static void do_utfreadchar(compiler_common *common)
2295  {  {
2296  /* Fast decoding an utf8 character. TMP1 contains the first byte  /* Fast decoding a UTF-8 character. TMP1 contains the first byte
2297  of the character (>= 192). Return char value in TMP1, length - 1 in TMP2. */  of the character (>= 0xc0). Return char value in TMP1, length - 1 in TMP2. */
2298  DEFINE_COMPILER;  DEFINE_COMPILER;
2299  struct sljit_jump *jump;  struct sljit_jump *jump;
2300    
2301  sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);  sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2302  /* Searching for the first zero. */  /* Searching for the first zero. */
2303  OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);  OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
2304  jump = JUMP(SLJIT_C_NOT_ZERO);  jump = JUMP(SLJIT_C_NOT_ZERO);
2305  /* 2 byte sequence */  /* Two byte sequence. */
2306  OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(STR_PTR), 1);  OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2307  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2308  OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1f);  OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1f);
2309  OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);  OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2310  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2311  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2312  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
2313  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2314  JUMPHERE(jump);  JUMPHERE(jump);
2315    
2316  OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10);  OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10);
2317  jump = JUMP(SLJIT_C_NOT_ZERO);  jump = JUMP(SLJIT_C_NOT_ZERO);
2318  /* 3 byte sequence */  /* Three byte sequence. */
2319  OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(STR_PTR), 1);  OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2320  OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0f);  OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0f);
2321  OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 12);  OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 12);
2322  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2323  OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);  OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2324  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2325  OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(STR_PTR), 2);  OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2326  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 2);  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
2327  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2328  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2329  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 2);  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(2));
2330  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2331  JUMPHERE(jump);  JUMPHERE(jump);
2332    
2333  OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x08);  /* Four byte sequence. */
2334  jump = JUMP(SLJIT_C_NOT_ZERO);  OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
 /* 4 byte sequence */  
 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(STR_PTR), 1);  
2335  OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x07);  OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x07);
2336  OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 18);  OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 18);
2337  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2338  OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12);  OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12);
2339  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2340  OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(STR_PTR), 2);  OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);  
 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);  
 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);  
 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(STR_PTR), 3);  
 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 3);  
 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);  
 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);  
 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 3);  
 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);  
 JUMPHERE(jump);  
   
 /* 5 byte sequence */  
 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(STR_PTR), 1);  
 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x03);  
 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 24);  
 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);  
 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 18);  
 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);  
 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(STR_PTR), 2);  
 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);  
 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12);  
 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);  
 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(STR_PTR), 3);  
2341  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2342  OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);  OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2343  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2344  OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(STR_PTR), 4);  OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(3));
2345  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 4);  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
2346  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2347  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2348  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 4);  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));
2349  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2350  }  }
2351    
2352  static void do_utf8readtype8(compiler_common *common)  static void do_utfreadtype8(compiler_common *common)
2353  {  {
2354  /* Fast decoding an utf8 character type. TMP2 contains the first byte  /* Fast decoding a UTF-8 character type. TMP2 contains the first byte
2355  of the character (>= 192) and TMP1 is destroyed. Return value in TMP1. */  of the character (>= 0xc0). Return value in TMP1. */
2356  DEFINE_COMPILER;  DEFINE_COMPILER;
2357  struct sljit_jump *jump;  struct sljit_jump *jump;
2358  struct sljit_jump *compare;  struct sljit_jump *compare;
2359    
2360  sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);  sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2361    
2362  OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20);  OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20);
2363  jump = JUMP(SLJIT_C_NOT_ZERO);  jump = JUMP(SLJIT_C_NOT_ZERO);
2364  /* 2 byte sequence */  /* Two byte sequence. */
2365  OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2366  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2367  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
2368  OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);  OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2369  OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);  OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
# Line 1590  sljit_emit_fast_return(compiler, RETURN_ Line 2378  sljit_emit_fast_return(compiler, RETURN_
2378  JUMPHERE(jump);  JUMPHERE(jump);
2379    
2380  /* We only have types for characters less than 256. */  /* We only have types for characters less than 256. */
2381  OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_w)_pcre_utf8_char_sizes);  OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_w)PRIV(utf8_table4) - 0xc0);
 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);  
2382  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2383  OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);  OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2384  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2385  }  }
2386    
2387  #endif  #else /* COMPILE_PCRE8 */
2388    
2389    #ifdef COMPILE_PCRE16
2390    static void do_utfreadchar(compiler_common *common)
2391    {
2392    /* Fast decoding a UTF-16 character. TMP1 contains the first 16 bit char
2393    of the character (>= 0xd800). Return char value in TMP1, length - 1 in TMP2. */
2394    DEFINE_COMPILER;
2395    struct sljit_jump *jump;
2396    
2397    sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2398    jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xdc00);
2399    /* Do nothing, only return. */
2400    sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2401    
2402    JUMPHERE(jump);
2403    /* Combine two 16 bit characters. */
2404    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2405    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2406    OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
2407    OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 10);
2408    OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3ff);
2409    OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2410    OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
2411    OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
2412    sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2413    }
2414    #endif /* COMPILE_PCRE16 */
2415    
2416    #endif /* COMPILE_PCRE8 */
2417    
2418    #endif /* SUPPORT_UTF */
2419    
2420  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2421    
# Line 1613  DEFINE_COMPILER; Line 2431  DEFINE_COMPILER;
2431    
2432  SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 8);  SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 8);
2433    
2434  sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);  sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2435  OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);  OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
2436  OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_w)_pcre_ucd_stage1);  OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_w)PRIV(ucd_stage1));
2437  OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);  OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
2438  OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);  OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
2439  OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);  OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
2440  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_w)_pcre_ucd_stage2);  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_w)PRIV(ucd_stage2));
2441  OP1(SLJIT_MOV_UH, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);  OP1(SLJIT_MOV_UH, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
2442  OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_w)_pcre_ucd_records + SLJIT_OFFSETOF(ucd_record, chartype));  OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_w)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
2443  OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);  OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
2444  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2445  }  }
# Line 1635  struct sljit_label *newlinelabel = NULL; Line 2453  struct sljit_label *newlinelabel = NULL;
2453  struct sljit_jump *start;  struct sljit_jump *start;
2454  struct sljit_jump *end = NULL;  struct sljit_jump *end = NULL;
2455  struct sljit_jump *nl = NULL;  struct sljit_jump *nl = NULL;
2456    #ifdef SUPPORT_UTF
2457    struct sljit_jump *singlechar;
2458    #endif
2459  jump_list *newline = NULL;  jump_list *newline = NULL;
2460  BOOL newlinecheck = FALSE;  BOOL newlinecheck = FALSE;
2461  BOOL readbyte = FALSE;  BOOL readuchar = FALSE;
2462    
2463  if (!(hascrorlf || firstline) && (common->nltype == NLTYPE_ANY ||  if (!(hascrorlf || firstline) && (common->nltype == NLTYPE_ANY ||
2464      common->nltype == NLTYPE_ANYCRLF || common->newline > 255))      common->nltype == NLTYPE_ANYCRLF || common->newline > 255))
# Line 1646  if (!(hascrorlf || firstline) && (common Line 2467  if (!(hascrorlf || firstline) && (common
2467  if (firstline)  if (firstline)
2468    {    {
2469    /* Search for the end of the first line. */    /* Search for the end of the first line. */
2470    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STR_PTR, 0);    SLJIT_ASSERT(common->first_line_end != 0);
2471    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), FIRSTLINE_END, STR_END, 0);    OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
2472    
2473    if (common->nltype == NLTYPE_FIXED && common->newline > 255)    if (common->nltype == NLTYPE_FIXED && common->newline > 255)
2474      {      {
2475      mainloop = LABEL();      mainloop = LABEL();
2476      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2477      end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);      end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2478      OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), -1);      OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
2479      OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);      OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2480      CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);      CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);
2481      CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);      CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);
2482      OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), FIRSTLINE_END, STR_PTR, 0, SLJIT_IMM, 1);      JUMPHERE(end);
2483        OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2484      }      }
2485    else    else
2486      {      {
2487      end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);      end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2488      mainloop = LABEL();      mainloop = LABEL();
2489      /* Continual stores does not cause data dependency. */      /* Continual stores does not cause data dependency. */
2490      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), FIRSTLINE_END, STR_PTR, 0);      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0);
2491      read_char(common);      read_char(common);
2492      check_newlinechar(common, common->nltype, &newline, TRUE);      check_newlinechar(common, common->nltype, &newline, TRUE);
2493      CMPTO(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0, mainloop);      CMPTO(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0, mainloop);
2494      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), FIRSTLINE_END, STR_PTR, 0);      JUMPHERE(end);
2495        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0);
2496      set_jumps(newline, LABEL());      set_jumps(newline, LABEL());
2497      }      }
2498    
2499    JUMPHERE(end);    OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);  
2500    }    }
2501    
2502  start = JUMP(SLJIT_JUMP);  start = JUMP(SLJIT_JUMP);
# Line 1682  start = JUMP(SLJIT_JUMP); Line 2504  start = JUMP(SLJIT_JUMP);
2504  if (newlinecheck)  if (newlinecheck)
2505    {    {
2506    newlinelabel = LABEL();    newlinelabel = LABEL();
2507    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2508    end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);    end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2509    OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2510    OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff);    OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
2511    COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);    COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
2512    #ifdef COMPILE_PCRE16
2513      OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2514    #endif
2515    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2516    nl = JUMP(SLJIT_JUMP);    nl = JUMP(SLJIT_JUMP);
2517    }    }
# Line 1694  if (newlinecheck) Line 2519  if (newlinecheck)
2519  mainloop = LABEL();  mainloop = LABEL();
2520    
2521  /* Increasing the STR_PTR here requires one less jump in the most common case. */  /* Increasing the STR_PTR here requires one less jump in the most common case. */
2522  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2523  if (common->utf8) readbyte = TRUE;  if (common->utf) readuchar = TRUE;
2524  #endif  #endif
2525  if (newlinecheck) readbyte = TRUE;  if (newlinecheck) readuchar = TRUE;
2526    
2527  if (readbyte)  if (readuchar)
2528    OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2529    
2530  if (newlinecheck)  if (newlinecheck)
2531    CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);    CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);
2532    
2533  #ifdef SUPPORT_UTF8  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2534  if (common->utf8)  #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2535    if (common->utf)
2536    {    {
2537    OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)_pcre_utf8_char_sizes);    singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2538      OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0);
2539    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2540      JUMPHERE(singlechar);
2541      }
2542    #endif
2543    #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2544    if (common->utf)
2545      {
2546      singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
2547      OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2548      OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2549      COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
2550      OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2551      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2552      JUMPHERE(singlechar);
2553    }    }
 else  
   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);  
 #else  
 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);  
2554  #endif  #endif
2555  JUMPHERE(start);  JUMPHERE(start);
2556    
# Line 1727  if (newlinecheck) Line 2563  if (newlinecheck)
2563  return mainloop;  return mainloop;
2564  }  }
2565    
2566  static SLJIT_INLINE void fast_forward_first_byte(compiler_common *common, pcre_uint16 firstbyte, BOOL firstline)  #define MAX_N_CHARS 3
2567    
2568    static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common, BOOL firstline)
2569    {
2570    DEFINE_COMPILER;
2571    struct sljit_label *start;
2572    struct sljit_jump *quit;
2573    pcre_int32 chars[MAX_N_CHARS * 2];
2574    pcre_uchar *cc = common->start + 1 + IMM2_SIZE;
2575    int location = 0;
2576    pcre_int32 len, c, bit, caseless;
2577    int must_stop;
2578    
2579    /* We do not support alternatives now. */
2580    if (*(common->start + GET(common->start, 1)) == OP_ALT)
2581      return FALSE;
2582    
2583    while (TRUE)
2584      {
2585      caseless = 0;
2586      must_stop = 1;
2587      switch(*cc)
2588        {
2589        case OP_CHAR:
2590        must_stop = 0;
2591        cc++;
2592        break;
2593    
2594        case OP_CHARI:
2595        caseless = 1;
2596        must_stop = 0;
2597        cc++;
2598        break;
2599    
2600        case OP_SOD:
2601        case OP_SOM:
2602        case OP_SET_SOM:
2603        case OP_NOT_WORD_BOUNDARY:
2604        case OP_WORD_BOUNDARY:
2605        case OP_EODN:
2606        case OP_EOD:
2607        case OP_CIRC:
2608        case OP_CIRCM:
2609        case OP_DOLL:
2610        case OP_DOLLM:
2611        /* Zero width assertions. */
2612        cc++;
2613        continue;
2614    
2615        case OP_PLUS:
2616        case OP_MINPLUS:
2617        case OP_POSPLUS:
2618        cc++;
2619        break;
2620    
2621        case OP_EXACT:
2622        cc += 1 + IMM2_SIZE;
2623        break;
2624    
2625        case OP_PLUSI:
2626        case OP_MINPLUSI:
2627        case OP_POSPLUSI:
2628        caseless = 1;
2629        cc++;
2630        break;
2631    
2632        case OP_EXACTI:
2633        caseless = 1;
2634        cc += 1 + IMM2_SIZE;
2635        break;
2636    
2637        default:
2638        must_stop = 2;
2639        break;
2640        }
2641    
2642      if (must_stop == 2)
2643          break;
2644    
2645      len = 1;
2646    #ifdef SUPPORT_UTF
2647      if (common->utf && HAS_EXTRALEN(cc[0])) len += GET_EXTRALEN(cc[0]);
2648    #endif
2649    
2650      if (caseless && char_has_othercase(common, cc))
2651        {
2652        caseless = char_get_othercase_bit(common, cc);
2653        if (caseless == 0)
2654          return FALSE;
2655    #ifdef COMPILE_PCRE8
2656        caseless = ((caseless & 0xff) << 8) | (len - (caseless >> 8));
2657    #else
2658        if ((caseless & 0x100) != 0)
2659          caseless = ((caseless & 0xff) << 16) | (len - (caseless >> 9));
2660        else
2661          caseless = ((caseless & 0xff) << 8) | (len - (caseless >> 9));
2662    #endif
2663        }
2664      else
2665        caseless = 0;
2666    
2667      while (len > 0 && location < MAX_N_CHARS * 2)
2668        {
2669        c = *cc;
2670        bit = 0;
2671        if (len == (caseless & 0xff))
2672          {
2673          bit = caseless >> 8;
2674          c |= bit;
2675          }
2676    
2677        chars[location] = c;
2678        chars[location + 1] = bit;
2679    
2680        len--;
2681        location += 2;
2682        cc++;
2683        }
2684    
2685      if (location >= MAX_N_CHARS * 2 || must_stop != 0)
2686        break;
2687      }
2688    
2689    /* At least two characters are required. */
2690    if (location < 2 * 2)
2691        return FALSE;
2692    
2693    if (firstline)
2694      {
2695      SLJIT_ASSERT(common->first_line_end != 0);
2696      OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
2697      OP2(SLJIT_SUB, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, SLJIT_IMM, (location >> 1) - 1);
2698      }
2699    else
2700      OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, (location >> 1) - 1);
2701    
2702    start = LABEL();
2703    quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2704    
2705    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2706    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2707    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2708    if (chars[1] != 0)
2709      OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[1]);
2710    CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[0], start);
2711    if (location > 2 * 2)
2712      OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2713    if (chars[3] != 0)
2714      OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, chars[3]);
2715    CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, chars[2], start);
2716    if (location > 2 * 2)
2717      {
2718      if (chars[5] != 0)
2719        OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[5]);
2720      CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[4], start);
2721      }
2722    OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2723    
2724    JUMPHERE(quit);
2725    
2726    if (firstline)
2727      OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
2728    else
2729      OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, (location >> 1) - 1);
2730    return TRUE;
2731    }
2732    
2733    #undef MAX_N_CHARS
2734    
2735    static SLJIT_INLINE void fast_forward_first_char(compiler_common *common, pcre_uchar first_char, BOOL caseless, BOOL firstline)
2736  {  {
2737  DEFINE_COMPILER;  DEFINE_COMPILER;
2738  struct sljit_label *start;  struct sljit_label *start;
2739  struct sljit_jump *leave;  struct sljit_jump *quit;
2740  struct sljit_jump *found;  struct sljit_jump *found;
2741  pcre_uint16 oc, bit;  pcre_uchar oc, bit;
2742    
2743  if (firstline)  if (firstline)
2744    {    {
2745    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, STR_END, 0);    SLJIT_ASSERT(common->first_line_end != 0);
2746    OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), FIRSTLINE_END);    OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
2747      OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
2748    }    }
2749    
2750  start = LABEL();  start = LABEL();
2751  leave = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);  quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2752  OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2753    
2754  if ((firstbyte & REQ_CASELESS) == 0)  oc = first_char;
2755    found = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, firstbyte & 0xff);  if (caseless)
2756      {
2757      oc = TABLE_GET(first_char, common->fcc, first_char);
2758    #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
2759      if (first_char > 127 && common->utf)
2760        oc = UCD_OTHERCASE(first_char);
2761    #endif
2762      }
2763    if (first_char == oc)
2764      found = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, first_char);
2765  else  else
2766    {    {
2767    firstbyte &= 0xff;    bit = first_char ^ oc;
   oc = common->fcc[firstbyte];  
   bit = firstbyte ^ oc;  
2768    if (ispowerof2(bit))    if (ispowerof2(bit))
2769      {      {
2770      OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, bit);      OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, bit);
2771      found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, firstbyte | bit);      found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, first_char | bit);
2772      }      }
2773    else    else
2774      {      {
2775      OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, firstbyte);      OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, first_char);
2776      COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);      COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
2777      OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);      OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);
2778      COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);      COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
# Line 1767  else Line 2780  else
2780      }      }
2781    }    }
2782    
2783  #ifdef SUPPORT_UTF8  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
 if (common->utf8)  
   {  
   OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)_pcre_utf8_char_sizes);  
   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);  
   }  
 else  
   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);  
 #else  
 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);  
 #endif  
2784  JUMPTO(SLJIT_JUMP, start);  JUMPTO(SLJIT_JUMP, start);
2785  JUMPHERE(found);  JUMPHERE(found);
2786  JUMPHERE(leave);  JUMPHERE(quit);
2787    
2788  if (firstline)  if (firstline)
2789    OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);    OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
2790  }  }
2791    
2792  static SLJIT_INLINE void fast_forward_newline(compiler_common *common, BOOL firstline)  static SLJIT_INLINE void fast_forward_newline(compiler_common *common, BOOL firstline)
# Line 1792  DEFINE_COMPILER; Line 2795  DEFINE_COMPILER;
2795  struct sljit_label *loop;  struct sljit_label *loop;
2796  struct sljit_jump *lastchar;  struct sljit_jump *lastchar;
2797  struct sljit_jump *firstchar;  struct sljit_jump *firstchar;
2798  struct sljit_jump *leave;  struct sljit_jump *quit;
2799  struct sljit_jump *foundcr = NULL;  struct sljit_jump *foundcr = NULL;
2800  struct sljit_jump *notfoundnl;  struct sljit_jump *notfoundnl;
2801  jump_list *newline = NULL;  jump_list *newline = NULL;
2802    
2803  if (firstline)  if (firstline)
2804    {    {
2805    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, STR_END, 0);    SLJIT_ASSERT(common->first_line_end != 0);
2806    OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), FIRSTLINE_END);    OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
2807      OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
2808    }    }
2809    
2810  if (common->nltype == NLTYPE_FIXED && common->newline > 255)  if (common->nltype == NLTYPE_FIXED && common->newline > 255)
# Line 1811  if (common->nltype == NLTYPE_FIXED && co Line 2815  if (common->nltype == NLTYPE_FIXED && co
2815    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
2816    firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);    firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
2817    
2818    OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2);    OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2));
2819    OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0);    OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0);
2820    COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_GREATER_EQUAL);    COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_GREATER_EQUAL);
2821    #ifdef COMPILE_PCRE16
2822      OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
2823    #endif
2824    OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);    OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2825    
2826    loop = LABEL();    loop = LABEL();
2827    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2828    leave = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);    quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2829    OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), -2);    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
2830    OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(STR_PTR), -1);    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
2831    CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop);    CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop);
2832    CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop);    CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop);
2833    
2834    JUMPHERE(leave);    JUMPHERE(quit);
2835    JUMPHERE(firstchar);    JUMPHERE(firstchar);
2836    JUMPHERE(lastchar);    JUMPHERE(lastchar);
2837    
# Line 1848  set_jumps(newline, loop); Line 2855  set_jumps(newline, loop);
2855    
2856  if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)  if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
2857    {    {
2858    leave = JUMP(SLJIT_JUMP);    quit = JUMP(SLJIT_JUMP);
2859    JUMPHERE(foundcr);    JUMPHERE(foundcr);
2860    notfoundnl = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);    notfoundnl = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2861    OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2862    OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);    OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
2863    COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);    COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
2864    #ifdef COMPILE_PCRE16
2865      OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2866    #endif
2867    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2868    JUMPHERE(notfoundnl);    JUMPHERE(notfoundnl);
2869    JUMPHERE(leave);    JUMPHERE(quit);
2870    }    }
2871  JUMPHERE(lastchar);  JUMPHERE(lastchar);
2872  JUMPHERE(firstchar);  JUMPHERE(firstchar);
2873    
2874  if (firstline)  if (firstline)
2875    OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);    OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
2876  }  }
2877    
2878  static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, sljit_uw start_bits, BOOL firstline)  static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, sljit_uw start_bits, BOOL firstline)
2879  {  {
2880  DEFINE_COMPILER;  DEFINE_COMPILER;
2881  struct sljit_label *start;  struct sljit_label *start;
2882  struct sljit_jump *leave;  struct sljit_jump *quit;
2883  struct sljit_jump *found;  struct sljit_jump *found;
2884    #ifndef COMPILE_PCRE8
2885    struct sljit_jump *jump;
2886    #endif
2887    
2888  if (firstline)  if (firstline)
2889    {    {
2890    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, STR_END, 0);    SLJIT_ASSERT(common->first_line_end != 0);
2891    OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), FIRSTLINE_END);    OP1(SLJIT_MOV, RETURN_ADDR, 0, STR_END, 0);
2892      OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
2893    }    }
2894    
2895  start = LABEL();  start = LABEL();
2896  leave = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);  quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2897  OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2898  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2899  if (common->utf8)  if (common->utf)
2900    OP1(SLJIT_MOV_UB, TMP3, 0, SLJIT_MEM1(TMP1), (sljit_w)_pcre_utf8_char_sizes);    OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
2901    #endif
2902    #ifndef COMPILE_PCRE8
2903    jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 255);
2904    OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 255);
2905    JUMPHERE(jump);
2906  #endif  #endif
2907  OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);  OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
2908  OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);  OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
# Line 1892  OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TM Line 2911  OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TM
2911  OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);  OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
2912  found = JUMP(SLJIT_C_NOT_ZERO);  found = JUMP(SLJIT_C_NOT_ZERO);
2913    
2914  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2915  if (common->utf8)  if (common->utf)
2916    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP3, 0);    OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
2917  else  #endif
2918    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2919  #else  #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2920  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);  if (common->utf)
2921      {
2922      CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start);
2923      OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0);
2924      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2925      }
2926    #endif
2927    #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2928    if (common->utf)
2929      {
2930      CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800, start);
2931      OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2932      OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2933      COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
2934      OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2935      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2936      }
2937  #endif  #endif
2938  JUMPTO(SLJIT_JUMP, start);  JUMPTO(SLJIT_JUMP, start);
2939  JUMPHERE(found);  JUMPHERE(found);
2940  JUMPHERE(leave);  JUMPHERE(quit);
2941    
2942  if (firstline)  if (firstline)
2943    OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);    OP1(SLJIT_MOV, STR_END, 0, RETURN_ADDR, 0);
2944  }  }
2945    
2946  static SLJIT_INLINE struct sljit_jump *search_requested_char(compiler_common *common, pcre_uint16 reqbyte, BOOL has_firstbyte)  static SLJIT_INLINE struct sljit_jump *search_requested_char(compiler_common *common, pcre_uchar req_char, BOOL caseless, BOOL has_firstchar)
2947  {  {
2948  DEFINE_COMPILER;  DEFINE_COMPILER;
2949  struct sljit_label *loop;  struct sljit_label *loop;
# Line 1917  struct sljit_jump *alreadyfound; Line 2952  struct sljit_jump *alreadyfound;
2952  struct sljit_jump *found;  struct sljit_jump *found;
2953  struct sljit_jump *foundoc = NULL;  struct sljit_jump *foundoc = NULL;
2954  struct sljit_jump *notfound;  struct sljit_jump *notfound;
2955  pcre_uint16 oc, bit;  pcre_uchar oc, bit;
2956    
2957  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), REQ_BYTE_PTR);  SLJIT_ASSERT(common->req_char_ptr != 0);
2958    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr);
2959  OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, REQ_BYTE_MAX);  OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, REQ_BYTE_MAX);
2960  toolong = CMP(SLJIT_C_LESS, TMP1, 0, STR_END, 0);  toolong = CMP(SLJIT_C_LESS, TMP1, 0, STR_END, 0);
2961  alreadyfound = CMP(SLJIT_C_LESS, STR_PTR, 0, TMP2, 0);  alreadyfound = CMP(SLJIT_C_LESS, STR_PTR, 0, TMP2, 0);
2962    
2963  if (has_firstbyte)  if (has_firstchar)
2964    OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, 1);    OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2965  else  else
2966    OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);    OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);
2967    
2968  loop = LABEL();  loop = LABEL();
2969  notfound = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, STR_END, 0);  notfound = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, STR_END, 0);
2970    
2971  OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), 0);  OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0);
2972  if ((reqbyte & REQ_CASELESS) == 0)  oc = req_char;
2973    found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, reqbyte & 0xff);  if (caseless)
2974      {
2975      oc = TABLE_GET(req_char, common->fcc, req_char);
2976    #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
2977      if (req_char > 127 && common->utf)
2978        oc = UCD_OTHERCASE(req_char);
2979    #endif
2980      }
2981    if (req_char == oc)
2982      found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
2983  else  else
2984    {    {
2985    reqbyte &= 0xff;    bit = req_char ^ oc;
   oc = common->fcc[reqbyte];  
   bit = reqbyte ^ oc;  
2986    if (ispowerof2(bit))    if (ispowerof2(bit))
2987      {      {
2988      OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);      OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);
2989      found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, reqbyte | bit);      found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit);
2990      }      }
2991    else    else
2992      {      {
2993      found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, reqbyte);      found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
2994      foundoc = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, oc);      foundoc = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, oc);
2995      }      }
2996    }    }
2997  OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);  OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
2998  JUMPTO(SLJIT_JUMP, loop);  JUMPTO(SLJIT_JUMP, loop);
2999    
3000  JUMPHERE(found);  JUMPHERE(found);
3001  if (foundoc)  if (foundoc)
3002    JUMPHERE(foundoc);    JUMPHERE(foundoc);
3003  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), REQ_BYTE_PTR, TMP1, 0);  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr, TMP1, 0);
3004  JUMPHERE(alreadyfound);  JUMPHERE(alreadyfound);
3005  JUMPHERE(toolong);  JUMPHERE(toolong);
3006  return notfound;  return notfound;
# Line 1966  return notfound; Line 3009  return notfound;
3009  static void do_revertframes(compiler_common *common)  static void do_revertframes(compiler_common *common)
3010  {  {
3011  DEFINE_COMPILER;  DEFINE_COMPILER;
 struct sljit_jump *earlyexit;  
3012  struct sljit_jump *jump;  struct sljit_jump *jump;
3013  struct sljit_label *mainloop;  struct sljit_label *mainloop;
3014    
3015  sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);  sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3016  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS_HEAD);  OP1(SLJIT_MOV, TMP1, 0, STACK_TOP, 0);
3017    GET_LOCAL_BASE(TMP3, 0, 0);
3018    
3019  /* Drop frames until we reach STACK_TOP. */  /* Drop frames until we reach STACK_TOP. */
 earlyexit = CMP(SLJIT_C_LESS, TMP1, 0, STACK_TOP, 0);  
3020  mainloop = LABEL();  mainloop = LABEL();
3021  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), 0);  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), 0);
3022  jump = CMP(SLJIT_C_SIG_LESS_EQUAL, TMP2, 0, SLJIT_IMM, frame_end);  jump = CMP(SLJIT_C_SIG_LESS_EQUAL, TMP2, 0, SLJIT_IMM, frame_end);
3023  OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_LOCALS_REG, 0);  OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
3024  OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_w));  OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_w));
3025  OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_w), SLJIT_MEM1(TMP1), 2 * sizeof(sljit_w));  OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_w), SLJIT_MEM1(TMP1), 2 * sizeof(sljit_w));
3026  OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_w));  OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_w));
# Line 1987  JUMPTO(SLJIT_JUMP, mainloop); Line 3029  JUMPTO(SLJIT_JUMP, mainloop);
3029  JUMPHERE(jump);  JUMPHERE(jump);
3030  jump = CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, frame_end);  jump = CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, frame_end);
3031  /* End of dropping frames. */  /* End of dropping frames. */
 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), sizeof(sljit_w));  
 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS_HEAD, TMP1, 0);  
 CMPTO(SLJIT_C_GREATER_EQUAL, TMP1, 0, STACK_TOP, 0, mainloop);  
 JUMPHERE(earlyexit);  
3032  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3033    
3034  JUMPHERE(jump);  JUMPHERE(jump);
# Line 2002  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_R Line 3040  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_R
3040  JUMPTO(SLJIT_JUMP, mainloop);  JUMPTO(SLJIT_JUMP, mainloop);
3041    
3042  JUMPHERE(jump);  JUMPHERE(jump);
3043    if (common->mark_ptr != 0)
3044      {
3045      jump = CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, frame_setmark);
3046      OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), sizeof(sljit_w));
3047      OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_w));
3048      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr, TMP2, 0);
3049      JUMPTO(SLJIT_JUMP, mainloop);
3050    
3051      JUMPHERE(jump);
3052      }
3053    
3054  /* Unknown command. */  /* Unknown command. */
3055  OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_w));  OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_w));
3056  JUMPTO(SLJIT_JUMP, mainloop);  JUMPTO(SLJIT_JUMP, mainloop);
# Line 2010  JUMPTO(SLJIT_JUMP, mainloop); Line 3059  JUMPTO(SLJIT_JUMP, mainloop);
3059  static void check_wordboundary(compiler_common *common)  static void check_wordboundary(compiler_common *common)
3060  {  {
3061  DEFINE_COMPILER;  DEFINE_COMPILER;
3062  struct sljit_jump *beginend;  struct sljit_jump *skipread;
3063  #ifdef SUPPORT_UTF8  #if !(defined COMPILE_PCRE8) || defined SUPPORT_UTF
3064  struct sljit_jump *jump;  struct sljit_jump *jump;
3065  #endif  #endif
3066    
3067  SLJIT_ASSERT(ctype_word == 0x10);  SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16);
3068    
3069  sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, 1, 5, 5, common->localsize);  sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3070  /* Get type of the previous char, and put it to LOCALS1. */  /* Get type of the previous char, and put it to LOCALS1. */
3071  OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);  OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3072  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
3073  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, 0);  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, 0);
3074  beginend = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP1, 0);  skipread = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
3075  skip_char_back(common);  skip_char_back(common);
3076    check_start_used_ptr(common);
3077  read_char(common);  read_char(common);
3078    
3079  /* Testing char type. */  /* Testing char type. */
3080  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
3081  if (common->useucp)  if (common->use_ucp)
3082    {    {
3083    OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);    OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
3084    jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);    jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
# Line 2045  if (common->useucp) Line 3095  if (common->useucp)
3095  else  else
3096  #endif  #endif
3097    {    {
3098  #ifdef SUPPORT_UTF8  #ifndef COMPILE_PCRE8
3099      jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3100    #elif defined SUPPORT_UTF
3101    /* Here LOCALS1 has already been zeroed. */    /* Here LOCALS1 has already been zeroed. */
3102    jump = NULL;    jump = NULL;
3103    if (common->utf8)    if (common->utf)
3104      jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);      jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3105  #endif  #endif /* COMPILE_PCRE8 */
3106    OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);    OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);
3107    OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */);    OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */);
3108    OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);    OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3109    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);
3110  #ifdef SUPPORT_UTF8  #ifndef COMPILE_PCRE8
3111      JUMPHERE(jump);
3112    #elif defined SUPPORT_UTF
3113    if (jump != NULL)    if (jump != NULL)
3114      JUMPHERE(jump);      JUMPHERE(jump);
3115  #endif  #endif /* COMPILE_PCRE8 */
3116    }    }
3117  JUMPHERE(beginend);  JUMPHERE(skipread);
3118    
3119  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3120  beginend = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);  skipread = check_str_end(common);
3121  peek_char(common);  peek_char(common);
3122    
3123  /* Testing char type. This is a code duplication. */  /* Testing char type. This is a code duplication. */
3124  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
3125  if (common->useucp)  if (common->use_ucp)
3126    {    {
3127    OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);    OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
3128    jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);    jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
# Line 2084  if (common->useucp) Line 3138  if (common->useucp)
3138  else  else
3139  #endif  #endif
3140    {    {
3141  #ifdef SUPPORT_UTF8  #ifndef COMPILE_PCRE8
3142      /* TMP2 may be destroyed by peek_char. */
3143      OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3144      jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3145    #elif defined SUPPORT_UTF
3146    OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);    OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3147    jump = NULL;    jump = NULL;
3148    if (common->utf8)    if (common->utf)
3149      jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);      jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3150  #endif  #endif
3151    OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes);    OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes);
3152    OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */);    OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */);
3153    OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);    OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
3154  #ifdef SUPPORT_UTF8  #ifndef COMPILE_PCRE8
3155      JUMPHERE(jump);
3156    #elif defined SUPPORT_UTF
3157    if (jump != NULL)    if (jump != NULL)
3158      JUMPHERE(jump);      JUMPHERE(jump);
3159  #endif  #endif /* COMPILE_PCRE8 */
3160    }    }
3161  JUMPHERE(beginend);  JUMPHERE(skipread);
3162    
3163  OP2(SLJIT_XOR | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);  OP2(SLJIT_XOR | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
3164  sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);  sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3165  }  }
3166    
3167    /*
3168      range format:
3169    
3170      ranges[0] = length of the range (max MAX_RANGE_SIZE, -1 means invalid range).
3171      ranges[1] = first bit (0 or 1)
3172      ranges[2-length] = position of the bit change (when the current bit is not equal to the previous)
3173    */
3174    
3175    static BOOL check_ranges(compiler_common *common, int *ranges, jump_list **backtracks, BOOL readch)
3176    {
3177    DEFINE_COMPILER;
3178    struct sljit_jump *jump;
3179    
3180    if (ranges[0] < 0)
3181      return FALSE;
3182    
3183    switch(ranges[0])
3184      {
3185      case 1:
3186      if (readch)
3187        read_char(common);
3188      add_jump(compiler, backtracks, CMP(ranges[1] == 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
3189      return TRUE;
3190    
3191      case 2:
3192      if (readch)
3193        read_char(common);
3194      OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);
3195      add_jump(compiler, backtracks, CMP(ranges[1] != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
3196      return TRUE;
3197    
3198      case 4:
3199      if (ranges[2] + 1 == ranges[3] && ranges[4] + 1 == ranges[5])
3200        {
3201        if (readch)
3202          read_char(common);
3203        if (ranges[1] != 0)
3204          {
3205          add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
3206          add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[4]));
3207          }
3208        else
3209          {
3210          jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]);
3211          add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[4]));
3212          JUMPHERE(jump);
3213          }
3214        return TRUE;
3215        }
3216      if ((ranges[3] - ranges[2]) == (ranges[5] - ranges[4]) && ispowerof2(ranges[4] - ranges[2]))
3217        {
3218        if (readch)
3219          read_char(common);
3220        OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[4] - ranges[2]);
3221        OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[4]);
3222        add_jump(compiler, backtracks, CMP(ranges[1] != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[5] - ranges[4]));
3223        return TRUE;
3224        }
3225      return FALSE;
3226    
3227      default:
3228      return FALSE;
3229      }
3230    }
3231    
3232    static void get_ctype_ranges(compiler_common *common, int flag, int *ranges)
3233    {
3234    int i, bit, length;
3235    const pcre_uint8 *ctypes = (const pcre_uint8*)common->ctypes;
3236    
3237    bit = ctypes[0] & flag;
3238    ranges[0] = -1;
3239    ranges[1] = bit != 0 ? 1 : 0;
3240    length = 0;
3241    
3242    for (i = 1; i < 256; i++)
3243      if ((ctypes[i] & flag) != bit)
3244        {
3245        if (length >= MAX_RANGE_SIZE)
3246          return;
3247        ranges[2 + length] = i;
3248        length++;
3249        bit ^= flag;
3250        }
3251    
3252    if (bit != 0)
3253      {
3254      if (length >= MAX_RANGE_SIZE)
3255        return;
3256      ranges[2 + length] = 256;
3257      length++;
3258      }
3259    ranges[0] = length;
3260    }
3261    
3262    static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, jump_list **backtracks)
3263    {
3264    int ranges[2 + MAX_RANGE_SIZE];
3265    pcre_uint8 bit, cbit, all;
3266    int i, byte, length = 0;
3267    
3268    bit = bits[0] & 0x1;
3269    ranges[1] = bit;
3270    /* Can be 0 or 255. */
3271    all = -bit;
3272    
3273    for (i = 0; i < 256; )
3274      {
3275      byte = i >> 3;
3276      if ((i & 0x7) == 0 && bits[byte] == all)
3277        i += 8;
3278      else
3279        {
3280        cbit = (bits[byte] >> (i & 0x7)) & 0x1;
3281        if (cbit != bit)
3282          {
3283          if (length >= MAX_RANGE_SIZE)
3284            return FALSE;
3285          ranges[2 + length] = i;
3286          length++;
3287          bit = cbit;
3288          all = -cbit;
3289          }
3290        i++;
3291        }
3292      }
3293    
3294    if (((bit == 0) && nclass) || ((bit == 1) && !nclass))
3295      {
3296      if (length >= MAX_RANGE_SIZE)
3297        return FALSE;
3298      ranges[2 + length] = 256;
3299      length++;
3300      }
3301    ranges[0] = length;
3302    
3303    return check_ranges(common, ranges, backtracks, FALSE);
3304    }
3305    
3306  static void check_anynewline(compiler_common *common)  static void check_anynewline(compiler_common *common)
3307  {  {
3308  /* Check whether TMP1 contains a newline character. TMP2 destroyed. */  /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
3309  DEFINE_COMPILER;  DEFINE_COMPILER;
3310    
3311  sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);  sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3312    
3313  OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);  OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
3314  OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);  OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
3315  COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);  COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);
3316  OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);  OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
3317  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF || defined COMPILE_PCRE16
3318  if (common->utf8)  #ifdef COMPILE_PCRE8
3319    if (common->utf)
3320    {    {
3321    #endif
3322    COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);    COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
3323    OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);    OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
3324    OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);    OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
3325    #ifdef COMPILE_PCRE8
3326    }    }
3327  #endif  #endif
3328    #endif /* SUPPORT_UTF || COMPILE_PCRE16 */
3329  COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);  COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
3330  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3331  }  }
# Line 2132  static void check_hspace(compiler_common Line 3335  static void check_hspace(compiler_common
3335  /* Check whether TMP1 contains a newline character. TMP2 destroyed. */  /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
3336  DEFINE_COMPILER;  DEFINE_COMPILER;
3337    
3338  sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);  sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3339    
3340  OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x09);  OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x09);
3341  COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);  COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
3342  OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);  OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
3343  COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);  COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
3344  OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xa0);  OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xa0);
3345  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF || defined COMPILE_PCRE16
3346  if (common->utf8)  #ifdef COMPILE_PCRE8
3347    if (common->utf)
3348    {    {
3349    #endif
3350    COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);    COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
3351    OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x1680);    OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x1680);
3352    COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);    COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
# Line 2155  if (common->utf8) Line 3360  if (common->utf8)
3360    OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000);    OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000);
3361    COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);    COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
3362    OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000);    OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000);
3363    #ifdef COMPILE_PCRE8
3364    }    }
3365  #endif  #endif
3366    #endif /* SUPPORT_UTF || COMPILE_PCRE16 */
3367  COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);  COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
3368    
3369  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
# Line 2167  static void check_vspace(compiler_common Line 3374  static void check_vspace(compiler_common
3374  /* Check whether TMP1 contains a newline character. TMP2 destroyed. */  /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
3375  DEFINE_COMPILER;  DEFINE_COMPILER;
3376    
3377  sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);  sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3378    
3379  OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);  OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
3380  OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);  OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
3381  COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);  COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);
3382  OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);  OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
3383  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF || defined COMPILE_PCRE16
3384  if (common->utf8)  #ifdef COMPILE_PCRE8
3385    if (common->utf)
3386    {    {
3387    #endif
3388    COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);    COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
3389    OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);    OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
3390    OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);    OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
3391    #ifdef COMPILE_PCRE8
3392    }    }
3393  #endif  #endif
3394    #endif /* SUPPORT_UTF || COMPILE_PCRE16 */
3395  COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);  COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
3396    
3397  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
# Line 2195  DEFINE_COMPILER; Line 3406  DEFINE_COMPILER;
3406  struct sljit_jump *jump;  struct sljit_jump *jump;
3407  struct sljit_label *label;  struct sljit_label *label;
3408    
3409  sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);  sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3410  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3411  OP1(SLJIT_MOV, TMP3, 0, CHAR1, 0);  OP1(SLJIT_MOV, TMP3, 0, CHAR1, 0);
3412  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR2, 0);  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR2, 0);
3413  OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);  OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
3414  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3415    
3416  label = LABEL();  label = LABEL();
3417  OP1(SLJIT_MOVU_UB, CHAR1, 0, SLJIT_MEM1(TMP1), 1);  OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
3418  OP1(SLJIT_MOVU_UB, CHAR2, 0, SLJIT_MEM1(STR_PTR), 1);  OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3419  jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);  jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
3420  OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);  OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
3421  JUMPTO(SLJIT_C_NOT_ZERO, label);  JUMPTO(SLJIT_C_NOT_ZERO, label);
3422    
3423  JUMPHERE(jump);  JUMPHERE(jump);
3424  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3425  OP1(SLJIT_MOV, CHAR1, 0, TMP3, 0);  OP1(SLJIT_MOV, CHAR1, 0, TMP3, 0);
3426  OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);  OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3427  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
# Line 2224  DEFINE_COMPILER; Line 3435  DEFINE_COMPILER;
3435  struct sljit_jump *jump;  struct sljit_jump *jump;
3436  struct sljit_label *label;  struct sljit_label *label;
3437    
3438  sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);  sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3439  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3440    
3441  OP1(SLJIT_MOV, TMP3, 0, LCC_TABLE, 0);  OP1(SLJIT_MOV, TMP3, 0, LCC_TABLE, 0);
3442  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR1, 0);  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR1, 0);
3443  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, CHAR2, 0);  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, CHAR2, 0);
3444  OP1(SLJIT_MOV, LCC_TABLE, 0, SLJIT_IMM, common->lcc);  OP1(SLJIT_MOV, LCC_TABLE, 0, SLJIT_IMM, common->lcc);
3445  OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);  OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
3446  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3447    
3448  label = LABEL();  label = LABEL();
3449  OP1(SLJIT_MOVU_UB, CHAR1, 0, SLJIT_MEM1(TMP1), 1);  OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
3450  OP1(SLJIT_MOVU_UB, CHAR2, 0, SLJIT_MEM1(STR_PTR), 1);  OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3451    #ifndef COMPILE_PCRE8
3452    jump = CMP(SLJIT_C_GREATER, CHAR1, 0, SLJIT_IMM, 255);
3453    #endif
3454  OP1(SLJIT_MOV_UB, CHAR1, 0, SLJIT_MEM2(LCC_TABLE, CHAR1), 0);  OP1(SLJIT_MOV_UB, CHAR1, 0, SLJIT_MEM2(LCC_TABLE, CHAR1), 0);
3455    #ifndef COMPILE_PCRE8
3456    JUMPHERE(jump);
3457    jump = CMP(SLJIT_C_GREATER, CHAR2, 0, SLJIT_IMM, 255);
3458    #endif
3459  OP1(SLJIT_MOV_UB, CHAR2, 0, SLJIT_MEM2(LCC_TABLE, CHAR2), 0);  OP1(SLJIT_MOV_UB, CHAR2, 0, SLJIT_MEM2(LCC_TABLE, CHAR2), 0);
3460    #ifndef COMPILE_PCRE8
3461    JUMPHERE(jump);
3462    #endif
3463  jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);  jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
3464  OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);  OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
3465  JUMPTO(SLJIT_C_NOT_ZERO, label);  JUMPTO(SLJIT_C_NOT_ZERO, label);
3466    
3467  JUMPHERE(jump);  JUMPHERE(jump);
3468  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3469  OP1(SLJIT_MOV, LCC_TABLE, 0, TMP3, 0);  OP1(SLJIT_MOV, LCC_TABLE, 0, TMP3, 0);
3470  OP1(SLJIT_MOV, CHAR1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);  OP1(SLJIT_MOV, CHAR1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3471  OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);  OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
# Line 2255  sljit_emit_fast_return(compiler, RETURN_ Line 3476  sljit_emit_fast_return(compiler, RETURN_
3476  #undef CHAR1  #undef CHAR1
3477  #undef CHAR2  #undef CHAR2
3478    
3479  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF && defined SUPPORT_UCP
 #ifdef SUPPORT_UCP  
3480    
3481  static uschar * SLJIT_CALL do_utf8caselesscmp(uschar *src1, jit_arguments *args, uschar *end1)  static const pcre_uchar *SLJIT_CALL do_utf_caselesscmp(pcre_uchar *src1, jit_arguments *args, pcre_uchar *end1)
3482  {  {
3483  /* This function would be ineffective to do in JIT level. */  /* This function would be ineffective to do in JIT level. */
3484  int c1, c2;  pcre_uint32 c1, c2;
3485  uschar *src2 = args->ptr;  const pcre_uchar *src2 = args->uchar_ptr;
3486  uschar *end2 = (uschar*)args->end;  const pcre_uchar *end2 = args->end;
3487    const ucd_record *ur;
3488    const pcre_uint32 *pp;
3489    
3490  while (src1 < end1)  while (src1 < end1)
3491    {    {
3492    if (src2 >= end2)    if (src2 >= end2)
3493      return 0;      return (pcre_uchar*)1;
3494    GETCHARINC(c1, src1);    GETCHARINC(c1, src1);
3495    GETCHARINC(c2, src2);    GETCHARINC(c2, src2);
3496    if (c1 != c2 && c1 != UCD_OTHERCASE(c2)) return 0;    ur = GET_UCD(c2);
3497      if (c1 != c2 && c1 != c2 + ur->other_case)
3498        {
3499        pp = PRIV(ucd_caseless_sets) + ur->caseset;
3500        for (;;)
3501          {
3502          if (c1 < *pp) return NULL;
3503          if (c1 == *pp++) break;
3504          }
3505        }
3506    }    }
3507  return src2;  return src2;
3508  }  }
3509    
3510  #endif  #endif /* SUPPORT_UTF && SUPPORT_UCP */
 #endif  
3511    
3512  static uschar *byte_sequence_compare(compiler_common *common, BOOL caseless, uschar *cc,  static pcre_uchar *byte_sequence_compare(compiler_common *common, BOOL caseless, pcre_uchar *cc,
3513      compare_context* context, jump_list **fallbacks)      compare_context* context, jump_list **backtracks)
3514  {  {
3515  DEFINE_COMPILER;  DEFINE_COMPILER;
3516  unsigned int othercasebit = 0;  unsigned int othercasebit = 0;
3517  uschar *othercasebyte = NULL;  pcre_uchar *othercasechar = NULL;
3518  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3519  int utf8length;  int utflength;
3520  #endif  #endif
3521    
3522  if (caseless && char_has_othercase(common, cc))  if (caseless && char_has_othercase(common, cc))
# Line 2294  if (caseless && char_has_othercase(commo Line 3524  if (caseless && char_has_othercase(commo
3524    othercasebit = char_get_othercase_bit(common, cc);    othercasebit = char_get_othercase_bit(common, cc);
3525    SLJIT_ASSERT(othercasebit);    SLJIT_ASSERT(othercasebit);
3526    /* Extracting bit difference info. */    /* Extracting bit difference info. */
3527    othercasebyte = cc + (othercasebit >> 8);  #ifdef COMPILE_PCRE8
3528      othercasechar = cc + (othercasebit >> 8);
3529    othercasebit &= 0xff;    othercasebit &= 0xff;
3530    #else
3531    #ifdef COMPILE_PCRE16
3532      othercasechar = cc + (othercasebit >> 9);
3533      if ((othercasebit & 0x100) != 0)
3534        othercasebit = (othercasebit & 0xff) << 8;
3535      else
3536        othercasebit &= 0xff;
3537    #endif
3538    #endif
3539    }    }
3540    
3541  if (context->sourcereg == -1)  if (context->sourcereg == -1)
3542    {    {
3543    #ifdef COMPILE_PCRE8
3544  #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED  #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
3545    if (context->length >= 4)    if (context->length >= 4)
3546      OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);      OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3547    else if (context->length >= 2)    else if (context->length >= 2)
3548      OP1(SLJIT_MOV_SH, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);      OP1(SLJIT_MOV_UH, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3549    else    else
3550  #endif  #endif
3551      OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);      OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3552    #else
3553    #ifdef COMPILE_PCRE16
3554    #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
3555      if (context->length >= 4)
3556        OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3557      else
3558    #endif
3559        OP1(SLJIT_MOV_UH, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3560    #endif
3561    #endif /* COMPILE_PCRE8 */
3562    context->sourcereg = TMP2;    context->sourcereg = TMP2;
3563    }    }
3564    
3565  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3566  utf8length = 1;  utflength = 1;
3567  if (common->utf8 && *cc >= 0xc0)  if (common->utf && HAS_EXTRALEN(*cc))
3568    utf8length += _pcre_utf8_table4[*cc & 0x3f];    utflength += GET_EXTRALEN(*cc);
3569    
3570  do  do
3571    {    {
3572  #endif  #endif
3573    
3574    context->length--;    context->length -= IN_UCHARS(1);
3575  #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED  #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
3576    
3577    /* Unaligned read is supported. */    /* Unaligned read is supported. */
3578    if (othercasebit != 0 && othercasebyte == cc)    if (othercasebit != 0 && othercasechar == cc)
3579      {      {
3580      context->c.asbytes[context->byteptr] = *cc | othercasebit;      context->c.asuchars[context->ucharptr] = *cc | othercasebit;
3581      context->oc.asbytes[context->byteptr] = othercasebit;      context->oc.asuchars[context->ucharptr] = othercasebit;
3582      }      }
3583    else    else
3584      {      {
3585      context->c.asbytes[context->byteptr] = *cc;      context->c.asuchars[context->ucharptr] = *cc;
3586      context->oc.asbytes[context->byteptr] = 0;      context->oc.asuchars[context->ucharptr] = 0;
3587      }      }
3588    context->byteptr++;    context->ucharptr++;
3589    
3590    if (context->byteptr >= 4 || context->length == 0 || (context->byteptr == 2 && context->length == 1))  #ifdef COMPILE_PCRE8
3591      if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1))
3592    #else
3593      if (context->ucharptr >= 2 || context->length == 0)
3594    #endif
3595      {      {
3596      if (context->length >= 4)      if (context->length >= 4)
3597        OP1(SLJIT_MOV_SI, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);        OP1(SLJIT_MOV_SI, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3598    #ifdef COMPILE_PCRE8
3599      else if (context->length >= 2)      else if (context->length >= 2)
3600        OP1(SLJIT_MOV_SH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);        OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3601      else if (context->length >= 1)      else if (context->length >= 1)
3602        OP1(SLJIT_MOV_UB, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);        OP1(SLJIT_MOV_UB, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3603    #else
3604        else if (context->length >= 2)
3605          OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3606    #endif
3607      context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;      context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
3608    
3609      switch(context->byteptr)      switch(context->ucharptr)
3610        {        {
3611        case 4:        case 4 / sizeof(pcre_uchar):
3612        if (context->oc.asint != 0)        if (context->oc.asint != 0)
3613          OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint);          OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint);
3614        add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint));        add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint));
3615        break;        break;
3616    
3617        case 2:        case 2 / sizeof(pcre_uchar):
3618        if (context->oc.asshort != 0)        if (context->oc.asushort != 0)
3619          OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asshort);          OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort);
3620        add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asshort | context->oc.asshort));        add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort));
3621        break;        break;
3622    
3623    #ifdef COMPILE_PCRE8
3624        case 1:        case 1:
3625        if (context->oc.asbyte != 0)        if (context->oc.asbyte != 0)
3626          OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte);          OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte);
3627        add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte));        add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte));
3628        break;        break;
3629    #endif
3630    
3631        default:        default:
3632        SLJIT_ASSERT_STOP();        SLJIT_ASSERT_STOP();
3633        break;        break;
3634        }        }
3635      context->byteptr = 0;      context->ucharptr = 0;
3636      }      }
3637    
3638  #else  #else
3639    
3640    /* Unaligned read is unsupported. */    /* Unaligned read is unsupported. */
3641    #ifdef COMPILE_PCRE8
3642    if (context->length > 0)    if (context->length > 0)
3643      OP1(SLJIT_MOV_UB, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);      OP1(SLJIT_MOV_UB, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3644    #else
3645      if (context->length > 0)
3646        OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3647    #endif
3648    context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;    context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
3649    
3650    if (othercasebit != 0 && othercasebyte == cc)    if (othercasebit != 0 && othercasechar == cc)
3651      {      {
3652      OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit);      OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit);
3653      add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit));      add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit));
3654      }      }
3655    else    else
3656      add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc));      add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc));
3657    
3658  #endif  #endif
3659    
3660    cc++;    cc++;
3661  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3662    utf8length--;    utflength--;
3663    }    }
3664  while (utf8length > 0);  while (utflength > 0);
3665  #endif  #endif
3666    
3667  return cc;  return cc;
3668  }  }
3669    
3670  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3671    
3672  #define SET_TYPE_OFFSET(value) \  #define SET_TYPE_OFFSET(value) \
3673    if ((value) != typeoffset) \    if ((value) != typeoffset) \
# Line 2422  return cc; Line 3689  return cc;
3689      } \      } \
3690    charoffset = (value);    charoffset = (value);
3691    
3692  static void compile_xclass_hotpath(compiler_common *common, uschar *cc, jump_list **fallbacks)  static void compile_xclass_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
3693  {  {
3694  DEFINE_COMPILER;  DEFINE_COMPILER;
3695  jump_list *found = NULL;  jump_list *found = NULL;
3696  jump_list **list = (*cc & XCL_NOT) == 0 ? &found : fallbacks;  jump_list **list = (*cc & XCL_NOT) == 0 ? &found : backtracks;
3697  unsigned int c;  pcre_int32 c, charoffset;
3698  int compares;  const pcre_uint32 *other_cases;
3699  struct sljit_jump *jump = NULL;  struct sljit_jump *jump = NULL;
3700  uschar *ccbegin;  pcre_uchar *ccbegin;
3701    int compares, invertcmp, numberofcmps;
3702  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
3703  BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE;  BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE;
3704  BOOL charsaved = FALSE;  BOOL charsaved = FALSE;
3705  int typereg = TMP1, scriptreg = TMP1, typeoffset;  int typereg = TMP1, scriptreg = TMP1;
3706    pcre_int32 typeoffset;
3707  #endif  #endif
 int charoffset, invertcmp, numberofcmps;  
3708    
3709  /* Although SUPPORT_UTF8 must be defined, we are not necessary in utf8 mode. */  /* Although SUPPORT_UTF must be defined, we are
3710  check_input_end(common, fallbacks);     not necessary in utf mode even in 8 bit mode. */
3711    detect_partial_match(common, backtracks);
3712  read_char(common);  read_char(common);
3713    
3714  if ((*cc++ & XCL_MAP) != 0)  if ((*cc++ & XCL_MAP) != 0)
3715    {    {
3716    OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);    OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
3717    if (common->utf8)  #ifndef COMPILE_PCRE8
3718      jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3719    #elif defined SUPPORT_UTF
3720      if (common->utf)
3721      jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);      jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3722    #endif
3723    
3724    OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);    if (!check_class_ranges(common, (const pcre_uint8 *)cc, TRUE, list))
3725    OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);      {
3726    OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)cc);      OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
3727    OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);      OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
3728    OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);      OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)cc);
3729    add_jump(compiler, list, JUMP(SLJIT_C_NOT_ZERO));      OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
3730        OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
3731        add_jump(compiler, list, JUMP(SLJIT_C_NOT_ZERO));
3732        }
3733    
3734    if (common->utf8)  #ifndef COMPILE_PCRE8
3735      JUMPHERE(jump);
3736    #elif defined SUPPORT_UTF
3737      if (common->utf)
3738      JUMPHERE(jump);      JUMPHERE(jump);
3739    #endif
3740    OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);    OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
3741  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
3742    charsaved = TRUE;    charsaved = TRUE;
3743  #endif  #endif
3744    cc += 32;    cc += 32 / sizeof(pcre_uchar);
3745    }    }
3746    
3747  /* Scanning the necessary info. */  /* Scanning the necessary info. */
# Line 2473  while (*cc != XCL_END) Line 3753  while (*cc != XCL_END)
3753    if (*cc == XCL_SINGLE)    if (*cc == XCL_SINGLE)
3754      {      {
3755      cc += 2;      cc += 2;
3756  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3757      if (common->utf8 && cc[-1] >= 0xc0) cc += _pcre_utf8_table4[cc[-1] & 0x3f];      if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
3758  #endif  #endif
3759  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
3760      needschar = TRUE;      needschar = TRUE;
# Line 2483  while (*cc != XCL_END) Line 3763  while (*cc != XCL_END)
3763    else if (*cc == XCL_RANGE)    else if (*cc == XCL_RANGE)
3764      {      {
3765      cc += 2;      cc += 2;
3766  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3767      if (common->utf8 && cc[-1] >= 0xc0) cc += _pcre_utf8_table4[cc[-1] & 0x3f];      if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
3768  #endif  #endif
3769      cc++;      cc++;
3770  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3771      if (common->utf8 && cc[-1] >= 0xc0) cc += _pcre_utf8_table4[cc[-1] & 0x3f];      if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
3772  #endif  #endif
3773  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
3774      needschar = TRUE;      needschar = TRUE;
# Line 2522  while (*cc != XCL_END) Line 3802  while (*cc != XCL_END)
3802        needschar = TRUE;        needschar = TRUE;
3803        break;        break;
3804    
3805          case PT_CLIST:
3806          needschar = TRUE;
3807          break;
3808    
3809        default:        default:
3810        SLJIT_ASSERT_STOP();        SLJIT_ASSERT_STOP();
3811        break;        break;
# Line 2558  if (needstype || needsscript) Line 3842  if (needstype || needsscript)
3842      {      {
3843      if (scriptreg == TMP1)      if (scriptreg == TMP1)
3844        {        {
3845        OP1(SLJIT_MOV, scriptreg, 0, SLJIT_IMM, (sljit_w)_pcre_ucd_records + SLJIT_OFFSETOF(ucd_record, script));        OP1(SLJIT_MOV, scriptreg, 0, SLJIT_IMM, (sljit_w)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
3846        OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM2(scriptreg, TMP2), 3);        OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM2(scriptreg, TMP2), 3);
3847        }        }
3848      else      else
3849        {        {
3850        OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);        OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
3851        OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_w)_pcre_ucd_records + SLJIT_OFFSETOF(ucd_record, script));        OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_w)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
3852        OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM1(TMP2), 0);        OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM1(TMP2), 0);
3853        }        }
3854      }      }
# Line 2582  typeoffset = 0; Line 3866  typeoffset = 0;
3866  while (*cc != XCL_END)  while (*cc != XCL_END)
3867    {    {
3868    compares--;    compares--;
3869    invertcmp = (compares == 0 && list != fallbacks);    invertcmp = (compares == 0 && list != backtracks);
3870    jump = NULL;    jump = NULL;
3871    
3872    if (*cc == XCL_SINGLE)    if (*cc == XCL_SINGLE)
3873      {      {
3874      cc ++;      cc ++;
3875  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3876      if (common->utf8)      if (common->utf)
3877        {        {
3878        GETCHARINC(c, cc);        GETCHARINC(c, cc);
3879        }        }
# Line 2619  while (*cc != XCL_END) Line 3903  while (*cc != XCL_END)
3903    else if (*cc == XCL_RANGE)    else if (*cc == XCL_RANGE)
3904      {      {
3905      cc ++;      cc ++;
3906  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3907      if (common->utf8)      if (common->utf)
3908        {        {
3909        GETCHARINC(c, cc);        GETCHARINC(c, cc);
3910        }        }
# Line 2628  while (*cc != XCL_END) Line 3912  while (*cc != XCL_END)
3912  #endif  #endif
3913        c = *cc++;        c = *cc++;
3914      SET_CHAR_OFFSET(c);      SET_CHAR_OFFSET(c);
3915  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3916      if (common->utf8)      if (common->utf)
3917        {        {
3918        GETCHARINC(c, cc);        GETCHARINC(c, cc);
3919        }        }
# Line 2664  while (*cc != XCL_END) Line 3948  while (*cc != XCL_END)
3948      switch(*cc)      switch(*cc)
3949        {        {
3950        case PT_ANY:        case PT_ANY:
3951        if (list != fallbacks)        if (list != backtracks)
3952          {          {
3953          if ((cc[-1] == XCL_NOTPROP && compares > 0) || (cc[-1] == XCL_PROP && compares == 0))          if ((cc[-1] == XCL_NOTPROP && compares > 0) || (cc[-1] == XCL_PROP && compares == 0))
3954            continue;            continue;
# Line 2685  while (*cc != XCL_END) Line 3969  while (*cc != XCL_END)
3969        break;        break;
3970