/[pcre]/code/trunk/pcre_jit_compile.c
ViewVC logotype

Diff of /code/trunk/pcre_jit_compile.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 696 by zherczeg, Sun Sep 18 15:09:49 2011 UTC revision 1014 by zherczeg, Sun Aug 26 04:53:49 2012 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2008 University of Cambridge             Copyright (c) 1997-2012 University of Cambridge
10    
11    The machine code generator part (this module) was written by Zoltan Herczeg    The machine code generator part (this module) was written by Zoltan Herczeg
12                        Copyright (c) 2010-2011                        Copyright (c) 2010-2012
13    
14  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
15  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 52  POSSIBILITY OF SUCH DAMAGE. Line 52  POSSIBILITY OF SUCH DAMAGE.
52  we just include it. This way we don't need to touch the build  we just include it. This way we don't need to touch the build
53  system files. */  system files. */
54    
55    #define SLJIT_MALLOC(size) (PUBL(malloc))(size)
56    #define SLJIT_FREE(ptr) (PUBL(free))(ptr)
57  #define SLJIT_CONFIG_AUTO 1  #define SLJIT_CONFIG_AUTO 1
58    #define SLJIT_CONFIG_STATIC 1
59  #define SLJIT_VERBOSE 0  #define SLJIT_VERBOSE 0
60  #define SLJIT_DEBUG 0  #define SLJIT_DEBUG 0
61    
62  #include "sljit/sljitLir.c"  #include "sljit/sljitLir.c"
63    
64  #if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED  #if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED
65  #error "Unsupported architecture"  #error Unsupported architecture
66  #endif  #endif
67    
68  /* Allocate memory on the stack. Fast, but limited size. */  /* Allocate memory for the regex stack on the real machine stack.
69  #define LOCAL_SPACE_SIZE 32768  Fast, but limited size. */
70    #define MACHINE_STACK_SIZE 32768
71    
72    /* Growth rate for stack allocated by the OS. Should be the multiply
73    of page size. */
74  #define STACK_GROWTH_RATE 8192  #define STACK_GROWTH_RATE 8192
75    
76  /* Enable to check that the allocation could destroy temporaries. */  /* Enable to check that the allocation could destroy temporaries. */
# Line 79  The code generator follows the recursive Line 85  The code generator follows the recursive
85  expressions. The basic blocks of regular expressions are condition checkers  expressions. The basic blocks of regular expressions are condition checkers
86  whose execute different commands depending on the result of the condition check.  whose execute different commands depending on the result of the condition check.
87  The relationship between the operators can be horizontal (concatenation) and  The relationship between the operators can be horizontal (concatenation) and
88  vertical (sub-expression) (See struct fallback_common for more details).  vertical (sub-expression) (See struct backtrack_common for more details).
89    
90    'ab' - 'a' and 'b' regexps are concatenated    'ab' - 'a' and 'b' regexps are concatenated
91    'a+' - 'a' is the sub-expression of the '+' operator    'a+' - 'a' is the sub-expression of the '+' operator
92    
93  The condition checkers are boolean (true/false) checkers. Machine code is generated  The condition checkers are boolean (true/false) checkers. Machine code is generated
94  for the checker itself and for the actions depending on the result of the checker.  for the checker itself and for the actions depending on the result of the checker.
95  The 'true' case is called as the hot path (expected path), and the other is called as  The 'true' case is called as the matching path (expected path), and the other is called as
96  the 'fallback' path. Branch instructions are expesive for all CPUs, so we avoid taken  the 'backtrack' path. Branch instructions are expesive for all CPUs, so we avoid taken
97  branches on the hot path.  branches on the matching path.
98    
99   Greedy star operator (*) :   Greedy star operator (*) :
100     Hot path: match happens.     Matching path: match happens.
101     Fallback path: match failed.     Backtrack path: match failed.
102   Non-greedy star operator (*?) :   Non-greedy star operator (*?) :
103     Hot path: no need to perform a match.     Matching path: no need to perform a match.
104     Fallback path: match is required.     Backtrack path: match is required.
105    
106  The following example shows how the code generated for a capturing bracket  The following example shows how the code generated for a capturing bracket
107  with two alternatives. Let A, B, C, D are arbirary regular expressions, and  with two alternatives. Let A, B, C, D are arbirary regular expressions, and
# Line 105  we have the following regular expression Line 111  we have the following regular expression
111    
112  The generated code will be the following:  The generated code will be the following:
113    
114   A hot path   A matching path
115   '(' hot path (pushing arguments to the stack)   '(' matching path (pushing arguments to the stack)
116   B hot path   B matching path
117   ')' hot path (pushing arguments to the stack)   ')' matching path (pushing arguments to the stack)
118   D hot path   D matching path
119   return with successful match   return with successful match
120    
121   D fallback path   D backtrack path
122   ')' fallback path (If we arrived from "C" jump to the fallback of "C")   ')' backtrack path (If we arrived from "C" jump to the backtrack of "C")
123   B fallback path   B backtrack path
124   C expected path   C expected path
125   jump to D hot path   jump to D matching path
126   C fallback path   C backtrack path
127   A fallback path   A backtrack path
128    
129   Notice, that the order of fallback code paths are the opposite of the fast   Notice, that the order of backtrack code paths are the opposite of the fast
130   code paths. In this way the topmost value on the stack is always belong   code paths. In this way the topmost value on the stack is always belong
131   to the current fallback code path. The fallback code path must check   to the current backtrack code path. The backtrack path must check
132   whether there is a next alternative. If so, it needs to jump back to   whether there is a next alternative. If so, it needs to jump back to
133   the hot path eventually. Otherwise it needs to clear out its own stack   the matching path eventually. Otherwise it needs to clear out its own stack
134   frame and continue the execution on the fallback code paths.   frame and continue the execution on the backtrack code paths.
135  */  */
136    
137  /*  /*
138  Saved stack frames:  Saved stack frames:
139    
140  Atomic blocks and asserts require reloading the values of local variables  Atomic blocks and asserts require reloading the values of private data
141  when the fallback mechanism performed. Because of OP_RECURSE, the locals  when the backtrack mechanism performed. Because of OP_RECURSE, the data
142  are not necessarly known in compile time, thus we need a dynamic restore  are not necessarly known in compile time, thus we need a dynamic restore
143  mechanism.  mechanism.
144    
145  The stack frames are stored in a chain list, and have the following format:  The stack frames are stored in a chain list, and have the following format:
146  ([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]  ([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]
147    
148  Thus we can restore the locals to a particular point in the stack.  Thus we can restore the private data to a particular point in the stack.
149  */  */
150    
151  typedef struct jit_arguments {  typedef struct jit_arguments {
152    /* Pointers first. */    /* Pointers first. */
153    struct sljit_stack *stack;    struct sljit_stack *stack;
154    PCRE_SPTR str;    const pcre_uchar *str;
155    PCRE_SPTR begin;    const pcre_uchar *begin;
156    PCRE_SPTR end;    const pcre_uchar *end;
157    int *offsets;    int *offsets;
158    uschar *ptr;    pcre_uchar *uchar_ptr;
159      pcre_uchar *mark_ptr;
160    /* Everything else after. */    /* Everything else after. */
161    int offsetcount;    int offsetcount;
162    int calllimit;    int calllimit;
163    uschar notbol;    pcre_uint8 notbol;
164    uschar noteol;    pcre_uint8 noteol;
165    uschar notempty;    pcre_uint8 notempty;
166    uschar notempty_atstart;    pcre_uint8 notempty_atstart;
167  } jit_arguments;  } jit_arguments;
168    
169  typedef struct executable_function {  typedef struct executable_functions {
170    void *executable_func;    void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES];
171    pcre_jit_callback callback;    PUBL(jit_callback) callback;
172    void *userdata;    void *userdata;
173  } executable_function;    sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];
174    } executable_functions;
175    
176  typedef struct jump_list {  typedef struct jump_list {
177    struct sljit_jump *jump;    struct sljit_jump *jump;
# Line 176  typedef struct stub_list { Line 184  typedef struct stub_list {
184    enum stub_types type;    enum stub_types type;
185    int data;    int data;
186    struct sljit_jump *start;    struct sljit_jump *start;
187    struct sljit_label *leave;    struct sljit_label *quit;
188    struct stub_list *next;    struct stub_list *next;
189  } stub_list;  } stub_list;
190    
191  typedef int (SLJIT_CALL *jit_function)(jit_arguments *args);  typedef int (SLJIT_CALL *jit_function)(jit_arguments *args);
192    
193  /* The following structure is the key data type for the recursive  /* The following structure is the key data type for the recursive
194  code generator. It is allocated by compile_hotpath, and contains  code generator. It is allocated by compile_matchingpath, and contains
195  the aguments for compile_fallbackpath. Must be the first member  the aguments for compile_backtrackingpath. Must be the first member
196  of its descendants. */  of its descendants. */
197  typedef struct fallback_common {  typedef struct backtrack_common {
198    /* Concatenation stack. */    /* Concatenation stack. */
199    struct fallback_common *prev;    struct backtrack_common *prev;
200    jump_list *nextfallbacks;    jump_list *nextbacktracks;
201    /* Internal stack (for component operators). */    /* Internal stack (for component operators). */
202    struct fallback_common *top;    struct backtrack_common *top;
203    jump_list *topfallbacks;    jump_list *topbacktracks;
204    /* Opcode pointer. */    /* Opcode pointer. */
205    uschar *cc;    pcre_uchar *cc;
206  } fallback_common;  } backtrack_common;
207    
208  typedef struct assert_fallback {  typedef struct assert_backtrack {
209    fallback_common common;    backtrack_common common;
210    jump_list *condfailed;    jump_list *condfailed;
211    /* Less than 0 (-1) if a frame is not needed. */    /* Less than 0 (-1) if a frame is not needed. */
212    int framesize;    int framesize;
213    /* Points to our private memory word on the stack. */    /* Points to our private memory word on the stack. */
214    int localptr;    int private_data_ptr;
215    /* For iterators. */    /* For iterators. */
216    struct sljit_label *hotpath;    struct sljit_label *matchingpath;
217  } assert_fallback;  } assert_backtrack;
218    
219  typedef struct bracket_fallback {  typedef struct bracket_backtrack {
220    fallback_common common;    backtrack_common common;
221    /* Where to coninue if an alternative is successfully matched. */    /* Where to coninue if an alternative is successfully matched. */
222    struct sljit_label *althotpath;    struct sljit_label *alternative_matchingpath;
223    /* For rmin and rmax iterators. */    /* For rmin and rmax iterators. */
224    struct sljit_label *recursivehotpath;    struct sljit_label *recursive_matchingpath;
225    /* For greedy ? operator. */    /* For greedy ? operator. */
226    struct sljit_label *zerohotpath;    struct sljit_label *zero_matchingpath;
227    /* Contains the branches of a failed condition. */    /* Contains the branches of a failed condition. */
228    union {    union {
229      /* Both for OP_COND, OP_SCOND. */      /* Both for OP_COND, OP_SCOND. */
230      jump_list *condfailed;      jump_list *condfailed;
231      assert_fallback *assert;      assert_backtrack *assert;
232      /* For OP_ONCE. -1 if not needed. */      /* For OP_ONCE. -1 if not needed. */
233      int framesize;      int framesize;
234    } u;    } u;
235    /* Points to our private memory word on the stack. */    /* Points to our private memory word on the stack. */
236    int localptr;    int private_data_ptr;
237  } bracket_fallback;  } bracket_backtrack;
238    
239  typedef struct bracketpos_fallback {  typedef struct bracketpos_backtrack {
240    fallback_common common;    backtrack_common common;
241    /* Points to our private memory word on the stack. */    /* Points to our private memory word on the stack. */
242    int localptr;    int private_data_ptr;
243    /* Reverting stack is needed. */    /* Reverting stack is needed. */
244    int framesize;    int framesize;
245    /* Allocated stack size. */    /* Allocated stack size. */
246    int stacksize;    int stacksize;
247  } bracketpos_fallback;  } bracketpos_backtrack;
248    
249  typedef struct braminzero_fallback {  typedef struct braminzero_backtrack {
250    fallback_common common;    backtrack_common common;
251    struct sljit_label *hotpath;    struct sljit_label *matchingpath;
252  } braminzero_fallback;  } braminzero_backtrack;
253    
254  typedef struct iterator_fallback {  typedef struct iterator_backtrack {
255    fallback_common common;    backtrack_common common;
256    /* Next iteration. */    /* Next iteration. */
257    struct sljit_label *hotpath;    struct sljit_label *matchingpath;
258  } iterator_fallback;  } iterator_backtrack;
259    
260  typedef struct recurse_entry {  typedef struct recurse_entry {
261    struct recurse_entry *next;    struct recurse_entry *next;
# Line 259  typedef struct recurse_entry { Line 267  typedef struct recurse_entry {
267    int start;    int start;
268  } recurse_entry;  } recurse_entry;
269    
270  typedef struct recurse_fallback {  typedef struct recurse_backtrack {
271    fallback_common common;    backtrack_common common;
272  } recurse_fallback;  } recurse_backtrack;
273    
274    #define MAX_RANGE_SIZE 6
275    
276  typedef struct compiler_common {  typedef struct compiler_common {
277    struct sljit_compiler *compiler;    struct sljit_compiler *compiler;
278    uschar *start;    pcre_uchar *start;
279    int localsize;  
280    int *localptrs;    /* Maps private data offset to each opcode. */
281    const uschar *fcc;    int *private_data_ptrs;
282    sljit_w lcc;    /* Tells whether the capturing bracket is optimized. */
283      pcre_uint8 *optimized_cbracket;
284      /* Starting offset of private data for capturing brackets. */
285    int cbraptr;    int cbraptr;
286      /* OVector starting point. Must be divisible by 2. */
287      int ovector_start;
288      /* Last known position of the requested byte. */
289      int req_char_ptr;
290      /* Head of the last recursion. */
291      int recursive_head;
292      /* First inspected character for partial matching. */
293      int start_used_ptr;
294      /* Starting pointer for partial soft matches. */
295      int hit_start;
296      /* End pointer of the first line. */
297      int first_line_end;
298      /* Points to the marked string. */
299      int mark_ptr;
300    
301      /* Flipped and lower case tables. */
302      const pcre_uint8 *fcc;
303      sljit_w lcc;
304      /* Mode can be PCRE_STUDY_JIT_COMPILE and others. */
305      int mode;
306      /* Newline control. */
307    int nltype;    int nltype;
308    int newline;    int newline;
309    int bsr_nltype;    int bsr_nltype;
310      /* Dollar endonly. */
311    int endonly;    int endonly;
312      BOOL has_set_som;
313      /* Tables. */
314    sljit_w ctypes;    sljit_w ctypes;
315      int digits[2 + MAX_RANGE_SIZE];
316      /* Named capturing brackets. */
317      sljit_uw name_table;
318      sljit_w name_count;
319      sljit_w name_entry_size;
320    
321      /* Labels and jump lists. */
322      struct sljit_label *partialmatchlabel;
323      struct sljit_label *quitlabel;
324    struct sljit_label *acceptlabel;    struct sljit_label *acceptlabel;
325    stub_list *stubs;    stub_list *stubs;
326    recurse_entry *entries;    recurse_entry *entries;
327    recurse_entry *currententry;    recurse_entry *currententry;
328      jump_list *partialmatch;
329      jump_list *quit;
330    jump_list *accept;    jump_list *accept;
331    jump_list *calllimit;    jump_list *calllimit;
332    jump_list *stackalloc;    jump_list *stackalloc;
# Line 291  typedef struct compiler_common { Line 338  typedef struct compiler_common {
338    jump_list *casefulcmp;    jump_list *casefulcmp;
339    jump_list *caselesscmp;    jump_list *caselesscmp;
340    BOOL jscript_compat;    BOOL jscript_compat;
341  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
342    BOOL utf8;    BOOL utf;
343  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
344    BOOL useucp;    BOOL use_ucp;
345  #endif  #endif
346    jump_list *utf8readchar;    jump_list *utfreadchar;
347    jump_list *utf8readtype8;  #ifdef COMPILE_PCRE8
348      jump_list *utfreadtype8;
349  #endif  #endif
350    #endif /* SUPPORT_UTF */
351  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
352    jump_list *getucd;    jump_list *getucd;
353  #endif  #endif
# Line 310  typedef struct compare_context { Line 359  typedef struct compare_context {
359    int length;    int length;
360    int sourcereg;    int sourcereg;
361  #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED  #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
362    int byteptr;    int ucharptr;
363    union {    union {
364      int asint;      sljit_i asint;
365      short asshort;      sljit_uh asushort;
366    #ifdef COMPILE_PCRE8
367      sljit_ub asbyte;      sljit_ub asbyte;
368      sljit_ub asbytes[4];      sljit_ub asuchars[4];
369    #else
370    #ifdef COMPILE_PCRE16
371        sljit_uh asuchars[2];
372    #endif
373    #endif
374    } c;    } c;
375    union {    union {
376      int asint;      sljit_i asint;
377      short asshort;      sljit_uh asushort;
378    #ifdef COMPILE_PCRE8
379      sljit_ub asbyte;      sljit_ub asbyte;
380      sljit_ub asbytes[4];      sljit_ub asuchars[4];
381    #else
382    #ifdef COMPILE_PCRE16
383        sljit_uh asuchars[2];
384    #endif
385    #endif
386    } oc;    } oc;
387  #endif  #endif
388  } compare_context;  } compare_context;
389    
390  enum {  enum {
391    frame_end = 0,    frame_end = 0,
392    frame_setstrbegin = -1    frame_setstrbegin = -1,
393      frame_setmark = -2
394  };  };
395    
396    /* Undefine sljit macros. */
397    #undef CMP
398    
399  /* Used for accessing the elements of the stack. */  /* Used for accessing the elements of the stack. */
400  #define STACK(i)      ((-(i) - 1) * (int)sizeof(sljit_w))  #define STACK(i)      ((-(i) - 1) * (int)sizeof(sljit_w))
401    
402  #define TMP1          SLJIT_TEMPORARY_REG1  #define TMP1          SLJIT_TEMPORARY_REG1
403  #define TMP2          SLJIT_TEMPORARY_REG3  #define TMP2          SLJIT_TEMPORARY_REG3
404  #define TMP3          SLJIT_TEMPORARY_EREG2  #define TMP3          SLJIT_TEMPORARY_EREG2
405  #define STR_PTR       SLJIT_GENERAL_REG1  #define STR_PTR       SLJIT_SAVED_REG1
406  #define STR_END       SLJIT_GENERAL_REG2  #define STR_END       SLJIT_SAVED_REG2
407  #define STACK_TOP     SLJIT_TEMPORARY_REG2  #define STACK_TOP     SLJIT_TEMPORARY_REG2
408  #define STACK_LIMIT   SLJIT_GENERAL_REG3  #define STACK_LIMIT   SLJIT_SAVED_REG3
409  #define ARGUMENTS     SLJIT_GENERAL_EREG1  #define ARGUMENTS     SLJIT_SAVED_EREG1
410  #define CALL_COUNT    SLJIT_GENERAL_EREG2  #define CALL_COUNT    SLJIT_SAVED_EREG2
411  #define RETURN_ADDR   SLJIT_TEMPORARY_EREG1  #define RETURN_ADDR   SLJIT_TEMPORARY_EREG1
412    
413  /* Locals layout. */  /* Local space layout. */
414  /* These two locals can be used by the current opcode. */  /* These two locals can be used by the current opcode. */
415  #define LOCALS0          (0 * sizeof(sljit_w))  #define LOCALS0          (0 * sizeof(sljit_w))
416  #define LOCALS1          (1 * sizeof(sljit_w))  #define LOCALS1          (1 * sizeof(sljit_w))
417  /* Two local variables for possessive quantifiers (char1 cannot use them). */  /* Two local variables for possessive quantifiers (char1 cannot use them). */
418  #define POSSESSIVE0      (2 * sizeof(sljit_w))  #define POSSESSIVE0      (2 * sizeof(sljit_w))
419  #define POSSESSIVE1      (3 * sizeof(sljit_w))  #define POSSESSIVE1      (3 * sizeof(sljit_w))
 /* Head of the saved local variables */  
 #define LOCALS_HEAD      (4 * sizeof(sljit_w))  
 /* Head of the last recursion. */  
 #define RECURSIVE_HEAD   (5 * sizeof(sljit_w))  
420  /* Max limit of recursions. */  /* Max limit of recursions. */
421  #define CALL_LIMIT       (7 * sizeof(sljit_w))  #define CALL_LIMIT       (4 * sizeof(sljit_w))
 /* Last known position of the requested byte. */  
 #define REQ_BYTE_PTR     (8 * sizeof(sljit_w))  
 /* End pointer of the first line. */  
 #define FIRSTLINE_END    (9 * sizeof(sljit_w))  
422  /* The output vector is stored on the stack, and contains pointers  /* The output vector is stored on the stack, and contains pointers
423  to characters. The vector data is divided into two groups: the first  to characters. The vector data is divided into two groups: the first
424  group contains the start / end character pointers, and the second is  group contains the start / end character pointers, and the second is
425  the start pointers when the end of the capturing group has not yet reached. */  the start pointers when the end of the capturing group has not yet reached. */
426  #define OVECTOR_START    (10 * sizeof(sljit_w))  #define OVECTOR_START    (common->ovector_start)
427  #define OVECTOR(i)       (OVECTOR_START + (i) * sizeof(sljit_w))  #define OVECTOR(i)       (OVECTOR_START + (i) * sizeof(sljit_w))
428  #define OVECTOR_PRIV(i)  (common->cbraptr + (i) * sizeof(sljit_w))  #define OVECTOR_PRIV(i)  (common->cbraptr + (i) * sizeof(sljit_w))
429  #define PRIV(cc)         (common->localptrs[(cc) - common->start])  #define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start])
430    
431    #ifdef COMPILE_PCRE8
432    #define MOV_UCHAR  SLJIT_MOV_UB
433    #define MOVU_UCHAR SLJIT_MOVU_UB
434    #else
435    #ifdef COMPILE_PCRE16
436    #define MOV_UCHAR  SLJIT_MOV_UH
437    #define MOVU_UCHAR SLJIT_MOVU_UH
438    #else
439    #error Unsupported compiling mode
440    #endif
441    #endif
442    
443  /* Shortcuts. */  /* Shortcuts. */
444  #define DEFINE_COMPILER \  #define DEFINE_COMPILER \
# Line 392  the start pointers when the end of the c Line 461  the start pointers when the end of the c
461    sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))    sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))
462  #define COND_VALUE(op, dst, dstw, type) \  #define COND_VALUE(op, dst, dstw, type) \
463    sljit_emit_cond_value(compiler, (op), (dst), (dstw), (type))    sljit_emit_cond_value(compiler, (op), (dst), (dstw), (type))
464    #define GET_LOCAL_BASE(dst, dstw, offset) \
465      sljit_get_local_base(compiler, (dst), (dstw), (offset))
466    
467  static uschar* bracketend(uschar* cc)  static pcre_uchar* bracketend(pcre_uchar* cc)
468  {  {
469  SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));  SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
470  do cc += GET(cc, 1); while (*cc == OP_ALT);  do cc += GET(cc, 1); while (*cc == OP_ALT);
# Line 404  return cc; Line 475  return cc;
475    
476  /* Functions whose might need modification for all new supported opcodes:  /* Functions whose might need modification for all new supported opcodes:
477   next_opcode   next_opcode
478   get_localspace   get_private_data_length
479   set_localptrs   set_private_data_ptrs
480   get_framesize   get_framesize
481   init_frame   init_frame
482   get_localsize   get_private_data_length_for_copy
483   copy_locals   copy_private_data
484   compile_hotpath   compile_matchingpath
485   compile_fallbackpath   compile_backtrackingpath
486  */  */
487    
488  static uschar *next_opcode(compiler_common *common, uschar *cc)  static pcre_uchar *next_opcode(compiler_common *common, pcre_uchar *cc)
489  {  {
490  SLJIT_UNUSED_ARG(common);  SLJIT_UNUSED_ARG(common);
491  switch(*cc)  switch(*cc)
# Line 463  switch(*cc) Line 534  switch(*cc)
534    case OP_BRAZERO:    case OP_BRAZERO:
535    case OP_BRAMINZERO:    case OP_BRAMINZERO:
536    case OP_BRAPOSZERO:    case OP_BRAPOSZERO:
537      case OP_COMMIT:
538    case OP_FAIL:    case OP_FAIL:
539    case OP_ACCEPT:    case OP_ACCEPT:
540    case OP_ASSERT_ACCEPT:    case OP_ASSERT_ACCEPT:
541    case OP_SKIPZERO:    case OP_SKIPZERO:
542    return cc + 1;    return cc + 1;
543    
544      case OP_ANYBYTE:
545    #ifdef SUPPORT_UTF
546      if (common->utf) return NULL;
547    #endif
548      return cc + 1;
549    
550    case OP_CHAR:    case OP_CHAR:
551    case OP_CHARI:    case OP_CHARI:
552    case OP_NOT:    case OP_NOT:
553    case OP_NOTI:    case OP_NOTI:
   
554    case OP_STAR:    case OP_STAR:
555    case OP_MINSTAR:    case OP_MINSTAR:
556    case OP_PLUS:    case OP_PLUS:
# Line 511  switch(*cc) Line 588  switch(*cc)
588    case OP_NOTPOSPLUSI:    case OP_NOTPOSPLUSI:
589    case OP_NOTPOSQUERYI:    case OP_NOTPOSQUERYI:
590    cc += 2;    cc += 2;
591  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
592    if (common->utf8 && cc[-1] >= 0xc0) cc += _pcre_utf8_table4[cc[-1] & 0x3f];    if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
593  #endif  #endif
594    return cc;    return cc;
595    
# Line 532  switch(*cc) Line 609  switch(*cc)
609    case OP_NOTMINUPTOI:    case OP_NOTMINUPTOI:
610    case OP_NOTEXACTI:    case OP_NOTEXACTI:
611    case OP_NOTPOSUPTOI:    case OP_NOTPOSUPTOI:
612    cc += 4;    cc += 2 + IMM2_SIZE;
613  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
614    if (common->utf8 && cc[-1] >= 0xc0) cc += _pcre_utf8_table4[cc[-1] & 0x3f];    if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
615  #endif  #endif
616    return cc;    return cc;
617    
618    case OP_NOTPROP:    case OP_NOTPROP:
619    case OP_PROP:    case OP_PROP:
620      return cc + 1 + 2;
621    
622    case OP_TYPEUPTO:    case OP_TYPEUPTO:
623    case OP_TYPEMINUPTO:    case OP_TYPEMINUPTO:
624    case OP_TYPEEXACT:    case OP_TYPEEXACT:
# Line 547  switch(*cc) Line 626  switch(*cc)
626    case OP_REF:    case OP_REF:
627    case OP_REFI:    case OP_REFI:
628    case OP_CREF:    case OP_CREF:
629      case OP_NCREF:
630      case OP_RREF:
631      case OP_NRREF:
632    case OP_CLOSE:    case OP_CLOSE:
633    cc += 3;    cc += 1 + IMM2_SIZE;
634    return cc;    return cc;
635    
636    case OP_CRRANGE:    case OP_CRRANGE:
637    case OP_CRMINRANGE:    case OP_CRMINRANGE:
638    return cc + 5;    return cc + 1 + 2 * IMM2_SIZE;
639    
640    case OP_CLASS:    case OP_CLASS:
641    case OP_NCLASS:    case OP_NCLASS:
642    return cc + 33;    return cc + 1 + 32 / sizeof(pcre_uchar);
643    
644  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
645    case OP_XCLASS:    case OP_XCLASS:
646    return cc + GET(cc, 1);    return cc + GET(cc, 1);
647  #endif  #endif
# Line 571  switch(*cc) Line 653  switch(*cc)
653    case OP_ASSERTBACK_NOT:    case OP_ASSERTBACK_NOT:
654    case OP_REVERSE:    case OP_REVERSE:
655    case OP_ONCE:    case OP_ONCE:
656      case OP_ONCE_NC:
657    case OP_BRA:    case OP_BRA:
658    case OP_BRAPOS:    case OP_BRAPOS:
659    case OP_COND:    case OP_COND:
# Line 588  switch(*cc) Line 671  switch(*cc)
671    case OP_CBRAPOS:    case OP_CBRAPOS:
672    case OP_SCBRA:    case OP_SCBRA:
673    case OP_SCBRAPOS:    case OP_SCBRAPOS:
674    return cc + 1 + LINK_SIZE + 2;    return cc + 1 + LINK_SIZE + IMM2_SIZE;
675    
676      case OP_MARK:
677      return cc + 1 + 2 + cc[1];
678    
679    default:    default:
680    return NULL;    return NULL;
681    }    }
682  }  }
683    
684  static int get_localspace(compiler_common *common, uschar *cc, uschar *ccend)  #define CASE_ITERATOR_PRIVATE_DATA_1 \
685        case OP_MINSTAR: \
686        case OP_MINPLUS: \
687        case OP_QUERY: \
688        case OP_MINQUERY: \
689        case OP_MINSTARI: \
690        case OP_MINPLUSI: \
691        case OP_QUERYI: \
692        case OP_MINQUERYI: \
693        case OP_NOTMINSTAR: \
694        case OP_NOTMINPLUS: \
695        case OP_NOTQUERY: \
696        case OP_NOTMINQUERY: \
697        case OP_NOTMINSTARI: \
698        case OP_NOTMINPLUSI: \
699        case OP_NOTQUERYI: \
700        case OP_NOTMINQUERYI:
701    
702    #define CASE_ITERATOR_PRIVATE_DATA_2A \
703        case OP_STAR: \
704        case OP_PLUS: \
705        case OP_STARI: \
706        case OP_PLUSI: \
707        case OP_NOTSTAR: \
708        case OP_NOTPLUS: \
709        case OP_NOTSTARI: \
710        case OP_NOTPLUSI:
711    
712    #define CASE_ITERATOR_PRIVATE_DATA_2B \
713        case OP_UPTO: \
714        case OP_MINUPTO: \
715        case OP_UPTOI: \
716        case OP_MINUPTOI: \
717        case OP_NOTUPTO: \
718        case OP_NOTMINUPTO: \
719        case OP_NOTUPTOI: \
720        case OP_NOTMINUPTOI:
721    
722    #define CASE_ITERATOR_TYPE_PRIVATE_DATA_1 \
723        case OP_TYPEMINSTAR: \
724        case OP_TYPEMINPLUS: \
725        case OP_TYPEQUERY: \
726        case OP_TYPEMINQUERY:
727    
728    #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2A \
729        case OP_TYPESTAR: \
730        case OP_TYPEPLUS:
731    
732    #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2B \
733        case OP_TYPEUPTO: \
734        case OP_TYPEMINUPTO:
735    
736    static int get_class_iterator_size(pcre_uchar *cc)
737    {
738    switch(*cc)
739      {
740      case OP_CRSTAR:
741      case OP_CRPLUS:
742      return 2;
743    
744      case OP_CRMINSTAR:
745      case OP_CRMINPLUS:
746      case OP_CRQUERY:
747      case OP_CRMINQUERY:
748      return 1;
749    
750      case OP_CRRANGE:
751      case OP_CRMINRANGE:
752      if (GET2(cc, 1) == GET2(cc, 1 + IMM2_SIZE))
753        return 0;
754      return 2;
755    
756      default:
757      return 0;
758      }
759    }
760    
761    static int get_private_data_length(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
762  {  {
763  int localspace = 0;  int private_data_length = 0;
764  uschar *alternative;  pcre_uchar *alternative;
765    pcre_uchar *name;
766    pcre_uchar *end = NULL;
767    int space, size, bracketlen, i;
768    
769  /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */  /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
770  while (cc < ccend)  while (cc < ccend)
771    {    {
772      space = 0;
773      size = 0;
774      bracketlen = 0;
775    switch(*cc)    switch(*cc)
776      {      {
777        case OP_SET_SOM:
778        common->has_set_som = TRUE;
779        cc += 1;
780        break;
781    
782        case OP_REF:
783        case OP_REFI:
784        common->optimized_cbracket[GET2(cc, 1)] = 0;
785        cc += 1 + IMM2_SIZE;
786        break;
787    
788      case OP_ASSERT:      case OP_ASSERT:
789      case OP_ASSERT_NOT:      case OP_ASSERT_NOT:
790      case OP_ASSERTBACK:      case OP_ASSERTBACK:
791      case OP_ASSERTBACK_NOT:      case OP_ASSERTBACK_NOT:
792      case OP_ONCE:      case OP_ONCE:
793        case OP_ONCE_NC:
794      case OP_BRAPOS:      case OP_BRAPOS:
795      case OP_SBRA:      case OP_SBRA:
796      case OP_SBRAPOS:      case OP_SBRAPOS:
797      case OP_SCOND:      private_data_length += sizeof(sljit_w);
798      localspace += sizeof(sljit_w);      bracketlen = 1 + LINK_SIZE;
     cc += 1 + LINK_SIZE;  
799      break;      break;
800    
801      case OP_CBRAPOS:      case OP_CBRAPOS:
802      case OP_SCBRAPOS:      case OP_SCBRAPOS:
803      localspace += sizeof(sljit_w);      private_data_length += sizeof(sljit_w);
804      cc += 1 + LINK_SIZE + 2;      common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] = 0;
805        bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
806      break;      break;
807    
808      case OP_COND:      case OP_COND:
809      /* Might be a hidden SCOND. */      case OP_SCOND:
810      alternative = cc + GET(cc, 1);      bracketlen = cc[1 + LINK_SIZE];
811      if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)      if (bracketlen == OP_CREF)
812        localspace += sizeof(sljit_w);        {
813          bracketlen = GET2(cc, 1 + LINK_SIZE + 1);
814          common->optimized_cbracket[bracketlen] = 0;
815          }
816        else if (bracketlen == OP_NCREF)
817          {
818          bracketlen = GET2(cc, 1 + LINK_SIZE + 1);
819          name = (pcre_uchar *)common->name_table;
820          alternative = name;
821          for (i = 0; i < common->name_count; i++)
822            {
823            if (GET2(name, 0) == bracketlen) break;
824            name += common->name_entry_size;
825            }
826          SLJIT_ASSERT(i != common->name_count);
827    
828          for (i = 0; i < common->name_count; i++)
829            {
830            if (STRCMP_UC_UC(alternative + IMM2_SIZE, name + IMM2_SIZE) == 0)
831              common->optimized_cbracket[GET2(alternative, 0)] = 0;
832            alternative += common->name_entry_size;
833            }
834          }
835    
836        if (*cc == OP_COND)
837          {
838          /* Might be a hidden SCOND. */
839          alternative = cc + GET(cc, 1);
840          if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
841            private_data_length += sizeof(sljit_w);
842          }
843        else
844          private_data_length += sizeof(sljit_w);
845        bracketlen = 1 + LINK_SIZE;
846        break;
847    
848        case OP_BRA:
849        bracketlen = 1 + LINK_SIZE;
850        break;
851    
852        case OP_CBRA:
853        case OP_SCBRA:
854        bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
855        break;
856    
857        CASE_ITERATOR_PRIVATE_DATA_1
858        space = 1;
859        size = -2;
860        break;
861    
862        CASE_ITERATOR_PRIVATE_DATA_2A
863        space = 2;
864        size = -2;
865        break;
866    
867        CASE_ITERATOR_PRIVATE_DATA_2B
868        space = 2;
869        size = -(2 + IMM2_SIZE);
870        break;
871    
872        CASE_ITERATOR_TYPE_PRIVATE_DATA_1
873        space = 1;
874        size = 1;
875        break;
876    
877        CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
878        if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
879          space = 2;
880        size = 1;
881        break;
882    
883        CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
884        if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
885          space = 2;
886        size = 1 + IMM2_SIZE;
887        break;
888    
889        case OP_CLASS:
890        case OP_NCLASS:
891        size += 1 + 32 / sizeof(pcre_uchar);
892        space = get_class_iterator_size(cc + size);
893        break;
894    
895    #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
896        case OP_XCLASS:
897        size = GET(cc, 1);
898        space = get_class_iterator_size(cc + size);
899        break;
900    #endif
901    
902        case OP_RECURSE:
903        /* Set its value only once. */
904        if (common->recursive_head == 0)
905          {
906          common->recursive_head = common->ovector_start;
907          common->ovector_start += sizeof(sljit_w);
908          }
909      cc += 1 + LINK_SIZE;      cc += 1 + LINK_SIZE;
910      break;      break;
911    
912        case OP_MARK:
913        if (common->mark_ptr == 0)
914          {
915          common->mark_ptr = common->ovector_start;
916          common->ovector_start += sizeof(sljit_w);
917          }
918        cc += 1 + 2 + cc[1];
919        break;
920    
921      default:      default:
922      cc = next_opcode(common, cc);      cc = next_opcode(common, cc);
923      if (cc == NULL)      if (cc == NULL)
924        return -1;        return -1;
925      break;      break;
926      }      }
927    
928      if (space > 0 && cc >= end)
929        private_data_length += sizeof(sljit_w) * space;
930    
931      if (size != 0)
932        {
933        if (size < 0)
934          {
935          cc += -size;
936    #ifdef SUPPORT_UTF
937          if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
938    #endif
939          }
940        else
941          cc += size;
942        }
943    
944      if (bracketlen > 0)
945        {
946        if (cc >= end)
947          {
948          end = bracketend(cc);
949          if (end[-1 - LINK_SIZE] == OP_KET)
950            end = NULL;
951          }
952        cc += bracketlen;
953        }
954    }    }
955  return localspace;  return private_data_length;
956  }  }
957    
958  static void set_localptrs(compiler_common *common, int localptr, uschar *ccend)  static void set_private_data_ptrs(compiler_common *common, int private_data_ptr, pcre_uchar *ccend)
959  {  {
960  uschar *cc = common->start;  pcre_uchar *cc = common->start;
961  uschar *alternative;  pcre_uchar *alternative;
962    pcre_uchar *end = NULL;
963    int space, size, bracketlen;
964    
965  while (cc < ccend)  while (cc < ccend)
966    {    {
967      space = 0;
968      size = 0;
969      bracketlen = 0;
970    switch(*cc)    switch(*cc)
971      {      {
972      case OP_ASSERT:      case OP_ASSERT:
# Line 654  while (cc < ccend) Line 974  while (cc < ccend)
974      case OP_ASSERTBACK:      case OP_ASSERTBACK:
975      case OP_ASSERTBACK_NOT:      case OP_ASSERTBACK_NOT:
976      case OP_ONCE:      case OP_ONCE:
977        case OP_ONCE_NC:
978      case OP_BRAPOS:      case OP_BRAPOS:
979      case OP_SBRA:      case OP_SBRA:
980      case OP_SBRAPOS:      case OP_SBRAPOS:
981      case OP_SCOND:      case OP_SCOND:
982      common->localptrs[cc - common->start] = localptr;      common->private_data_ptrs[cc - common->start] = private_data_ptr;
983      localptr += sizeof(sljit_w);      private_data_ptr += sizeof(sljit_w);
984      cc += 1 + LINK_SIZE;      bracketlen = 1 + LINK_SIZE;
985      break;      break;
986    
987      case OP_CBRAPOS:      case OP_CBRAPOS:
988      case OP_SCBRAPOS:      case OP_SCBRAPOS:
989      common->localptrs[cc - common->start] = localptr;      common->private_data_ptrs[cc - common->start] = private_data_ptr;
990      localptr += sizeof(sljit_w);      private_data_ptr += sizeof(sljit_w);
991      cc += 1 + LINK_SIZE + 2;      bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
992      break;      break;
993    
994      case OP_COND:      case OP_COND:
# Line 675  while (cc < ccend) Line 996  while (cc < ccend)
996      alternative = cc + GET(cc, 1);      alternative = cc + GET(cc, 1);
997      if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)      if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
998        {        {
999        common->localptrs[cc - common->start] = localptr;        common->private_data_ptrs[cc - common->start] = private_data_ptr;
1000        localptr += sizeof(sljit_w);        private_data_ptr += sizeof(sljit_w);
1001        }        }
1002      cc += 1 + LINK_SIZE;      bracketlen = 1 + LINK_SIZE;
1003        break;
1004    
1005        case OP_BRA:
1006        bracketlen = 1 + LINK_SIZE;
1007        break;
1008    
1009        case OP_CBRA:
1010        case OP_SCBRA:
1011        bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1012        break;
1013    
1014        CASE_ITERATOR_PRIVATE_DATA_1
1015        space = 1;
1016        size = -2;
1017        break;
1018    
1019        CASE_ITERATOR_PRIVATE_DATA_2A
1020        space = 2;
1021        size = -2;
1022        break;
1023    
1024        CASE_ITERATOR_PRIVATE_DATA_2B
1025        space = 2;
1026        size = -(2 + IMM2_SIZE);
1027        break;
1028    
1029        CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1030        space = 1;
1031        size = 1;
1032        break;
1033    
1034        CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1035        if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
1036          space = 2;
1037        size = 1;
1038        break;
1039    
1040        CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1041        if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
1042          space = 2;
1043        size = 1 + IMM2_SIZE;
1044      break;      break;
1045    
1046        case OP_CLASS:
1047        case OP_NCLASS:
1048        size += 1 + 32 / sizeof(pcre_uchar);
1049        space = get_class_iterator_size(cc + size);
1050        break;
1051    
1052    #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1053        case OP_XCLASS:
1054        size = GET(cc, 1);
1055        space = get_class_iterator_size(cc + size);
1056        break;
1057    #endif
1058    
1059      default:      default:
1060      cc = next_opcode(common, cc);      cc = next_opcode(common, cc);
1061      SLJIT_ASSERT(cc != NULL);      SLJIT_ASSERT(cc != NULL);
1062      break;      break;
1063      }      }
1064    
1065      if (space > 0 && cc >= end)
1066        {
1067        common->private_data_ptrs[cc - common->start] = private_data_ptr;
1068        private_data_ptr += sizeof(sljit_w) * space;
1069        }
1070    
1071      if (size != 0)
1072        {
1073        if (size < 0)
1074          {
1075          cc += -size;
1076    #ifdef SUPPORT_UTF
1077          if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1078    #endif
1079          }
1080        else
1081          cc += size;
1082        }
1083    
1084      if (bracketlen > 0)
1085        {
1086        if (cc >= end)
1087          {
1088          end = bracketend(cc);
1089          if (end[-1 - LINK_SIZE] == OP_KET)
1090            end = NULL;
1091          }
1092        cc += bracketlen;
1093        }
1094    }    }
1095  }  }
1096    
1097  /* Returns with -1 if no need for frame. */  /* Returns with -1 if no need for frame. */
1098  static int get_framesize(compiler_common *common, uschar *cc, BOOL recursive)  static int get_framesize(compiler_common *common, pcre_uchar *cc, BOOL recursive)
1099  {  {
1100  uschar *ccend = bracketend(cc);  pcre_uchar *ccend = bracketend(cc);
 uschar *end;  
1101  int length = 0;  int length = 0;
1102  BOOL possessive = FALSE;  BOOL possessive = FALSE;
1103  BOOL needs_frame = FALSE;  BOOL setsom_found = recursive;
1104  BOOL setsom_found = FALSE;  BOOL setmark_found = recursive;
1105    
1106  if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))  if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))
1107    {    {
# Line 711  while (cc < ccend) Line 1115  while (cc < ccend)
1115    switch(*cc)    switch(*cc)
1116      {      {
1117      case OP_SET_SOM:      case OP_SET_SOM:
1118      case OP_RECURSE:      SLJIT_ASSERT(common->has_set_som);
1119      if (!setsom_found)      if (!setsom_found)
1120        {        {
1121        length += 2;        length += 2;
1122        setsom_found = TRUE;        setsom_found = TRUE;
1123        }        }
1124      cc += (*cc == OP_SET_SOM) ? 1 : 1 + LINK_SIZE;      cc += 1;
1125      break;      break;
1126    
1127      case OP_ASSERT:      case OP_MARK:
1128      case OP_ASSERT_NOT:      SLJIT_ASSERT(common->mark_ptr != 0);
1129      case OP_ASSERTBACK:      if (!setmark_found)
     case OP_ASSERTBACK_NOT:  
     case OP_ONCE:  
     if (needs_frame || length > 0)  
1130        {        {
1131        cc = bracketend(cc);        length += 2;
1132        break;        setmark_found = TRUE;
1133        }        }
1134      /* Check whether a frame must be created. */      cc += 1 + 2 + cc[1];
1135      end = bracketend(cc);      break;
1136      while (cc < end)  
1137        {      case OP_RECURSE:
1138        if (*cc == OP_SET_SOM || *cc == OP_CBRA || *cc == OP_CBRAPOS      if (common->has_set_som && !setsom_found)
1139            || *cc == OP_SCBRA || *cc == OP_SCBRAPOS || *cc == OP_RECURSE)        {
1140          needs_frame = TRUE;        length += 2;
1141        cc = next_opcode(common, cc);        setsom_found = TRUE;
1142        SLJIT_ASSERT(cc != NULL);        }
1143        if (common->mark_ptr != 0 && !setmark_found)
1144          {
1145          length += 2;
1146          setmark_found = TRUE;
1147        }        }
1148        cc += 1 + LINK_SIZE;
1149      break;      break;
1150    
1151      case OP_CBRA:      case OP_CBRA:
# Line 747  while (cc < ccend) Line 1153  while (cc < ccend)
1153      case OP_SCBRA:      case OP_SCBRA:
1154      case OP_SCBRAPOS:      case OP_SCBRAPOS:
1155      length += 3;      length += 3;
1156      cc += 1 + LINK_SIZE + 2;      cc += 1 + LINK_SIZE + IMM2_SIZE;
1157      break;      break;
1158    
1159      default:      default:
# Line 757  while (cc < ccend) Line 1163  while (cc < ccend)
1163      }      }
1164    
1165  /* Possessive quantifiers can use a special case. */  /* Possessive quantifiers can use a special case. */
1166  if (SLJIT_UNLIKELY(possessive) && !needs_frame && length == 3 + 2)  if (SLJIT_UNLIKELY(possessive) && length == 3)
1167    return -1;    return -1;
1168    
1169  if (length > 0)  if (length > 0)
1170    return length + 2;    return length + 1;
1171  return needs_frame ? 0 : -1;  return -1;
1172  }  }
1173    
1174  static void init_frame(compiler_common *common, uschar *cc, int stackpos, int stacktop, BOOL recursive)  static void init_frame(compiler_common *common, pcre_uchar *cc, int stackpos, int stacktop, BOOL recursive)
1175  {  {
 /* TMP2 must contain STACK_TOP - (-STACK(stackpos)) */  
1176  DEFINE_COMPILER;  DEFINE_COMPILER;
1177  uschar *ccend = bracketend(cc);  pcre_uchar *ccend = bracketend(cc);
1178  BOOL setsom_found = FALSE;  BOOL setsom_found = recursive;
1179    BOOL setmark_found = recursive;
1180  int offset;  int offset;
1181    
1182  if (stackpos < stacktop)  /* >= 1 + shortest item size (2) */
1183    {  SLJIT_UNUSED_ARG(stacktop);
1184    SLJIT_ASSERT(stackpos + 1 == stacktop);  SLJIT_ASSERT(stackpos >= stacktop + 2);
   return;  
   }  
1185    
1186  stackpos = STACK(stackpos);  stackpos = STACK(stackpos);
 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS_HEAD);  
 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS_HEAD, TMP2, 0);  
 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacktop), TMP1, 0);  
   
1187  if (recursive || (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS))  if (recursive || (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS))
1188    cc = next_opcode(common, cc);    cc = next_opcode(common, cc);
1189  SLJIT_ASSERT(cc != NULL);  SLJIT_ASSERT(cc != NULL);
# Line 791  while (cc < ccend) Line 1191  while (cc < ccend)
1191    switch(*cc)    switch(*cc)
1192      {      {
1193      case OP_SET_SOM:      case OP_SET_SOM:
1194      case OP_RECURSE:      SLJIT_ASSERT(common->has_set_som);
1195      if (!setsom_found)      if (!setsom_found)
1196        {        {
1197        OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));        OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
# Line 801  while (cc < ccend) Line 1201  while (cc < ccend)
1201        stackpos += (int)sizeof(sljit_w);        stackpos += (int)sizeof(sljit_w);
1202        setsom_found = TRUE;        setsom_found = TRUE;
1203        }        }
1204      cc += (*cc == OP_SET_SOM) ? 1 : 1 + LINK_SIZE;      cc += 1;
1205      break;      break;
1206    
1207      case OP_ASSERT:      case OP_MARK:
1208      case OP_ASSERT_NOT:      SLJIT_ASSERT(common->mark_ptr != 0);
1209      case OP_ASSERTBACK:      if (!setmark_found)
1210      case OP_ASSERTBACK_NOT:        {
1211      case OP_ONCE:        OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
1212      cc = bracketend(cc);        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_setmark);
1213          stackpos += (int)sizeof(sljit_w);
1214          OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1215          stackpos += (int)sizeof(sljit_w);
1216          setmark_found = TRUE;
1217          }
1218        cc += 1 + 2 + cc[1];
1219        break;
1220    
1221        case OP_RECURSE:
1222        if (common->has_set_som && !setsom_found)
1223          {
1224          OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
1225          OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_setstrbegin);
1226          stackpos += (int)sizeof(sljit_w);
1227          OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1228          stackpos += (int)sizeof(sljit_w);
1229          setsom_found = TRUE;
1230          }
1231        if (common->mark_ptr != 0 && !setmark_found)
1232          {
1233          OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
1234          OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_setmark);
1235          stackpos += (int)sizeof(sljit_w);
1236          OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1237          stackpos += (int)sizeof(sljit_w);
1238          setmark_found = TRUE;
1239          }
1240        cc += 1 + LINK_SIZE;
1241      break;      break;
1242    
1243      case OP_CBRA:      case OP_CBRA:
# Line 826  while (cc < ccend) Line 1254  while (cc < ccend)
1254      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
1255      stackpos += (int)sizeof(sljit_w);      stackpos += (int)sizeof(sljit_w);
1256    
1257      cc += 1 + LINK_SIZE + 2;      cc += 1 + LINK_SIZE + IMM2_SIZE;
1258      break;      break;
1259    
1260      default:      default:
# Line 836  while (cc < ccend) Line 1264  while (cc < ccend)
1264      }      }
1265    
1266  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_end);  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_end);
1267  SLJIT_ASSERT(stackpos == STACK(stacktop + 1));  SLJIT_ASSERT(stackpos == STACK(stacktop));
1268  }  }
1269    
1270  static SLJIT_INLINE int get_localsize(compiler_common *common, uschar *cc, uschar *ccend)  static SLJIT_INLINE int get_private_data_length_for_copy(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
1271  {  {
1272  int localsize = 2;  int private_data_length = 2;
1273  uschar *alternative;  int size;
1274  /* Calculate the sum of the local variables. */  pcre_uchar *alternative;
1275    /* Calculate the sum of the private machine words. */
1276  while (cc < ccend)  while (cc < ccend)
1277    {    {
1278      size = 0;
1279    switch(*cc)    switch(*cc)
1280      {      {
1281      case OP_ASSERT:      case OP_ASSERT:
# Line 853  while (cc < ccend) Line 1283  while (cc < ccend)
1283      case OP_ASSERTBACK:      case OP_ASSERTBACK:
1284      case OP_ASSERTBACK_NOT:      case OP_ASSERTBACK_NOT:
1285      case OP_ONCE:      case OP_ONCE:
1286        case OP_ONCE_NC:
1287      case OP_BRAPOS:      case OP_BRAPOS:
1288      case OP_SBRA:      case OP_SBRA:
1289      case OP_SBRAPOS:      case OP_SBRAPOS:
1290      case OP_SCOND:      case OP_SCOND:
1291      localsize++;      private_data_length++;
1292      cc += 1 + LINK_SIZE;      cc += 1 + LINK_SIZE;
1293      break;      break;
1294    
1295      case OP_CBRA:      case OP_CBRA:
1296      case OP_SCBRA:      case OP_SCBRA:
1297      localsize++;      if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1298      cc += 1 + LINK_SIZE + 2;        private_data_length++;
1299        cc += 1 + LINK_SIZE + IMM2_SIZE;
1300      break;      break;
1301    
1302      case OP_CBRAPOS:      case OP_CBRAPOS:
1303      case OP_SCBRAPOS:      case OP_SCBRAPOS:
1304      localsize += 2;      private_data_length += 2;
1305      cc += 1 + LINK_SIZE + 2;      cc += 1 + LINK_SIZE + IMM2_SIZE;
1306      break;      break;
1307    
1308      case OP_COND:      case OP_COND:
1309      /* Might be a hidden SCOND. */      /* Might be a hidden SCOND. */
1310      alternative = cc + GET(cc, 1);      alternative = cc + GET(cc, 1);
1311      if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)      if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1312        localsize++;        private_data_length++;
1313      cc += 1 + LINK_SIZE;      cc += 1 + LINK_SIZE;
1314      break;      break;
1315    
1316      default:      CASE_ITERATOR_PRIVATE_DATA_1
1317      cc = next_opcode(common, cc);      if (PRIVATE_DATA(cc))
1318      SLJIT_ASSERT(cc != NULL);        private_data_length++;
1319        cc += 2;
1320    #ifdef SUPPORT_UTF
1321        if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1322    #endif
1323      break;      break;
     }  
   }  
 SLJIT_ASSERT(cc == ccend);  
 return localsize;  
 }  
1324    
1325  static void copy_locals(compiler_common *common, uschar *cc, uschar *ccend,      CASE_ITERATOR_PRIVATE_DATA_2A
1326    BOOL save, int stackptr, int stacktop)      if (PRIVATE_DATA(cc))
1327  {        private_data_length += 2;
1328  DEFINE_COMPILER;      cc += 2;
1329  int srcw[2];  #ifdef SUPPORT_UTF
1330  int count;      if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1331  BOOL tmp1next = TRUE;  #endif
1332  BOOL tmp1empty = TRUE;      break;
1333  BOOL tmp2empty = TRUE;  
1334  uschar *alternative;      CASE_ITERATOR_PRIVATE_DATA_2B
1335  enum {      if (PRIVATE_DATA(cc))
1336    start,        private_data_length += 2;
1337    loop,      cc += 2 + IMM2_SIZE;
1338    #ifdef SUPPORT_UTF
1339        if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1340    #endif
1341        break;
1342    
1343        CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1344        if (PRIVATE_DATA(cc))
1345          private_data_length++;
1346        cc += 1;
1347        break;
1348    
1349        CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1350        if (PRIVATE_DATA(cc))
1351          private_data_length += 2;
1352        cc += 1;
1353        break;
1354    
1355        CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1356        if (PRIVATE_DATA(cc))
1357          private_data_length += 2;
1358        cc += 1 + IMM2_SIZE;
1359        break;
1360    
1361        case OP_CLASS:
1362        case OP_NCLASS:
1363    #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1364        case OP_XCLASS:
1365        size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1366    #else
1367        size = 1 + 32 / (int)sizeof(pcre_uchar);
1368    #endif
1369        if (PRIVATE_DATA(cc))
1370          private_data_length += get_class_iterator_size(cc + size);
1371        cc += size;
1372        break;
1373    
1374        default:
1375        cc = next_opcode(common, cc);
1376        SLJIT_ASSERT(cc != NULL);
1377        break;
1378        }
1379      }
1380    SLJIT_ASSERT(cc == ccend);
1381    return private_data_length;
1382    }
1383    
1384    static void copy_private_data(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend,
1385      BOOL save, int stackptr, int stacktop)
1386    {
1387    DEFINE_COMPILER;
1388    int srcw[2];
1389    int count, size;
1390    BOOL tmp1next = TRUE;
1391    BOOL tmp1empty = TRUE;
1392    BOOL tmp2empty = TRUE;
1393    pcre_uchar *alternative;
1394    enum {
1395      start,
1396      loop,
1397    end    end
1398  } status;  } status;
1399    
# Line 935  while (status != end) Line 1425  while (status != end)
1425    switch(status)    switch(status)
1426      {      {
1427      case start:      case start:
1428      SLJIT_ASSERT(save);      SLJIT_ASSERT(save && common->recursive_head != 0);
1429      count = 1;      count = 1;
1430      srcw[0] = RECURSIVE_HEAD;      srcw[0] = common->recursive_head;
1431      status = loop;      status = loop;
1432      break;      break;
1433    
# Line 955  while (status != end) Line 1445  while (status != end)
1445        case OP_ASSERTBACK:        case OP_ASSERTBACK:
1446        case OP_ASSERTBACK_NOT:        case OP_ASSERTBACK_NOT:
1447        case OP_ONCE:        case OP_ONCE:
1448          case OP_ONCE_NC:
1449        case OP_BRAPOS:        case OP_BRAPOS:
1450        case OP_SBRA:        case OP_SBRA:
1451        case OP_SBRAPOS:        case OP_SBRAPOS:
1452        case OP_SCOND:        case OP_SCOND:
1453        count = 1;        count = 1;
1454        srcw[0] = PRIV(cc);        srcw[0] = PRIVATE_DATA(cc);
1455        SLJIT_ASSERT(srcw[0] != 0);        SLJIT_ASSERT(srcw[0] != 0);
1456        cc += 1 + LINK_SIZE;        cc += 1 + LINK_SIZE;
1457        break;        break;
1458    
1459        case OP_CBRA:        case OP_CBRA:
1460        case OP_SCBRA:        case OP_SCBRA:
1461        count = 1;        if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1462        srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));          {
1463        cc += 1 + LINK_SIZE + 2;          count = 1;
1464            srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1465            }
1466          cc += 1 + LINK_SIZE + IMM2_SIZE;
1467        break;        break;
1468    
1469        case OP_CBRAPOS:        case OP_CBRAPOS:
1470        case OP_SCBRAPOS:        case OP_SCBRAPOS:
1471        count = 2;        count = 2;
1472          srcw[0] = PRIVATE_DATA(cc);
1473        srcw[1] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));        srcw[1] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1474        srcw[0] = PRIV(cc);        SLJIT_ASSERT(srcw[0] != 0 && srcw[1] != 0);
1475        SLJIT_ASSERT(srcw[0] != 0);        cc += 1 + LINK_SIZE + IMM2_SIZE;
       cc += 1 + LINK_SIZE + 2;  
1476        break;        break;
1477    
1478        case OP_COND:        case OP_COND:
# Line 987  while (status != end) Line 1481  while (status != end)
1481        if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)        if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1482          {          {
1483          count = 1;          count = 1;
1484          srcw[0] = PRIV(cc);          srcw[0] = PRIVATE_DATA(cc);
1485          SLJIT_ASSERT(srcw[0] != 0);          SLJIT_ASSERT(srcw[0] != 0);
1486          }          }
1487        cc += 1 + LINK_SIZE;        cc += 1 + LINK_SIZE;
1488        break;        break;
1489    
1490          CASE_ITERATOR_PRIVATE_DATA_1
1491          if (PRIVATE_DATA(cc))
1492            {
1493            count = 1;
1494            srcw[0] = PRIVATE_DATA(cc);
1495            }
1496          cc += 2;
1497    #ifdef SUPPORT_UTF
1498          if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1499    #endif
1500          break;
1501    
1502          CASE_ITERATOR_PRIVATE_DATA_2A
1503          if (PRIVATE_DATA(cc))
1504            {
1505            count = 2;
1506            srcw[0] = PRIVATE_DATA(cc);
1507            srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_w);
1508            }
1509          cc += 2;
1510    #ifdef SUPPORT_UTF
1511          if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1512    #endif
1513          break;
1514    
1515          CASE_ITERATOR_PRIVATE_DATA_2B
1516          if (PRIVATE_DATA(cc))
1517            {
1518            count = 2;
1519            srcw[0] = PRIVATE_DATA(cc);
1520            srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_w);
1521            }
1522          cc += 2 + IMM2_SIZE;
1523    #ifdef SUPPORT_UTF
1524          if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1525    #endif
1526          break;
1527    
1528          CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1529          if (PRIVATE_DATA(cc))
1530            {
1531            count = 1;
1532            srcw[0] = PRIVATE_DATA(cc);
1533            }
1534          cc += 1;
1535          break;
1536    
1537          CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1538          if (PRIVATE_DATA(cc))
1539            {
1540            count = 2;
1541            srcw[0] = PRIVATE_DATA(cc);
1542            srcw[1] = srcw[0] + sizeof(sljit_w);
1543            }
1544          cc += 1;
1545          break;
1546    
1547          CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1548          if (PRIVATE_DATA(cc))
1549            {
1550            count = 2;
1551            srcw[0] = PRIVATE_DATA(cc);
1552            srcw[1] = srcw[0] + sizeof(sljit_w);
1553            }
1554          cc += 1 + IMM2_SIZE;
1555          break;
1556    
1557          case OP_CLASS:
1558          case OP_NCLASS:
1559    #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1560          case OP_XCLASS:
1561          size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1562    #else
1563          size = 1 + 32 / (int)sizeof(pcre_uchar);
1564    #endif
1565          if (PRIVATE_DATA(cc))
1566            switch(get_class_iterator_size(cc + size))
1567              {
1568              case 1:
1569              count = 1;
1570              srcw[0] = PRIVATE_DATA(cc);
1571              break;
1572    
1573              case 2:
1574              count = 2;
1575              srcw[0] = PRIVATE_DATA(cc);
1576              srcw[1] = srcw[0] + sizeof(sljit_w);
1577              break;
1578    
1579              default:
1580              SLJIT_ASSERT_STOP();
1581              break;
1582              }
1583          cc += size;
1584          break;
1585    
1586        default:        default:
1587        cc = next_opcode(common, cc);        cc = next_opcode(common, cc);
1588        SLJIT_ASSERT(cc != NULL);        SLJIT_ASSERT(cc != NULL);
# Line 1095  if (save) Line 1685  if (save)
1685  SLJIT_ASSERT(cc == ccend && stackptr == stacktop && (save || (tmp1empty && tmp2empty)));  SLJIT_ASSERT(cc == ccend && stackptr == stacktop && (save || (tmp1empty && tmp2empty)));
1686  }  }
1687    
1688    #undef CASE_ITERATOR_PRIVATE_DATA_1
1689    #undef CASE_ITERATOR_PRIVATE_DATA_2A
1690    #undef CASE_ITERATOR_PRIVATE_DATA_2B
1691    #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1692    #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1693    #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1694    
1695  static SLJIT_INLINE BOOL ispowerof2(unsigned int value)  static SLJIT_INLINE BOOL ispowerof2(unsigned int value)
1696  {  {
1697  return (value & (value - 1)) == 0;  return (value & (value - 1)) == 0;
# Line 1105  static SLJIT_INLINE void set_jumps(jump_ Line 1702  static SLJIT_INLINE void set_jumps(jump_
1702  while (list)  while (list)
1703    {    {
1704    /* sljit_set_label is clever enough to do nothing    /* sljit_set_label is clever enough to do nothing
1705    if either the jump or the label is NULL */    if either the jump or the label is NULL. */
1706    sljit_set_label(list->jump, label);    sljit_set_label(list->jump, label);
1707    list = list->next;    list = list->next;
1708    }    }
# Line 1132  if (list_item) Line 1729  if (list_item)
1729    list_item->type = type;    list_item->type = type;
1730    list_item->data = data;    list_item->data = data;
1731    list_item->start = start;    list_item->start = start;
1732    list_item->leave = LABEL();    list_item->quit = LABEL();
1733    list_item->next = common->stubs;    list_item->next = common->stubs;
1734    common->stubs = list_item;    common->stubs = list_item;
1735    }    }
# Line 1152  while (list_item) Line 1749  while (list_item)
1749      add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));      add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));
1750      break;      break;
1751      }      }
1752    JUMPTO(SLJIT_JUMP, list_item->leave);    JUMPTO(SLJIT_JUMP, list_item->quit);
1753    list_item = list_item->next;    list_item = list_item->next;
1754    }    }
1755  common->stubs = NULL;  common->stubs = NULL;
# Line 1195  struct sljit_label *loop; Line 1792  struct sljit_label *loop;
1792  int i;  int i;
1793  /* At this point we can freely use all temporary registers. */  /* At this point we can freely use all temporary registers. */
1794  /* TMP1 returns with begin - 1. */  /* TMP1 returns with begin - 1. */
1795  OP2(SLJIT_SUB, SLJIT_TEMPORARY_REG1, 0, SLJIT_MEM1(SLJIT_GENERAL_REG1), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, 1);  OP2(SLJIT_SUB, SLJIT_TEMPORARY_REG1, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));
1796  if (length < 8)  if (length < 8)
1797    {    {
1798    for (i = 0; i < length; i++)    for (i = 0; i < length; i++)
# Line 1203  if (length < 8) Line 1800  if (length < 8)
1800    }    }
1801  else  else
1802    {    {
1803    OP2(SLJIT_ADD, SLJIT_TEMPORARY_REG2, 0, SLJIT_LOCALS_REG, 0, SLJIT_IMM, OVECTOR_START - sizeof(sljit_w));    GET_LOCAL_BASE(SLJIT_TEMPORARY_REG2, 0, OVECTOR_START - sizeof(sljit_w));
1804    OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, length);    OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, length);
1805    loop = LABEL();    loop = LABEL();
1806    OP1(SLJIT_MOVU, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), sizeof(sljit_w), SLJIT_TEMPORARY_REG1, 0);    OP1(SLJIT_MOVU, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), sizeof(sljit_w), SLJIT_TEMPORARY_REG1, 0);
# Line 1219  struct sljit_label *loop; Line 1816  struct sljit_label *loop;
1816  struct sljit_jump *earlyexit;  struct sljit_jump *earlyexit;
1817    
1818  /* At this point we can freely use all registers. */  /* At this point we can freely use all registers. */
1819  OP1(SLJIT_MOV, SLJIT_GENERAL_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));  OP1(SLJIT_MOV, SLJIT_SAVED_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
1820  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1), STR_PTR, 0);  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1), STR_PTR, 0);
1821    
1822  OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG1, 0, ARGUMENTS, 0);  OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG1, 0, ARGUMENTS, 0);
1823    if (common->mark_ptr != 0)
1824      OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
1825  OP1(SLJIT_MOV_SI, SLJIT_TEMPORARY_REG2, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), SLJIT_OFFSETOF(jit_arguments, offsetcount));  OP1(SLJIT_MOV_SI, SLJIT_TEMPORARY_REG2, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), SLJIT_OFFSETOF(jit_arguments, offsetcount));
1826    if (common->mark_ptr != 0)
1827      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_TEMPORARY_REG3, 0);
1828  OP2(SLJIT_SUB, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), SLJIT_OFFSETOF(jit_arguments, offsets), SLJIT_IMM, sizeof(int));  OP2(SLJIT_SUB, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), SLJIT_OFFSETOF(jit_arguments, offsets), SLJIT_IMM, sizeof(int));
1829  OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG1, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), SLJIT_OFFSETOF(jit_arguments, begin));  OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG1, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), SLJIT_OFFSETOF(jit_arguments, begin));
1830  OP2(SLJIT_ADD, SLJIT_GENERAL_REG1, 0, SLJIT_LOCALS_REG, 0, SLJIT_IMM, OVECTOR_START);  GET_LOCAL_BASE(SLJIT_SAVED_REG1, 0, OVECTOR_START);
1831  /* Unlikely, but possible */  /* Unlikely, but possible */
1832  earlyexit = CMP(SLJIT_C_EQUAL, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, 0);  earlyexit = CMP(SLJIT_C_EQUAL, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, 0);
1833  loop = LABEL();  loop = LABEL();
1834  OP2(SLJIT_SUB, SLJIT_GENERAL_REG2, 0, SLJIT_MEM1(SLJIT_GENERAL_REG1), 0, SLJIT_TEMPORARY_REG1, 0);  OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), 0, SLJIT_TEMPORARY_REG1, 0);
1835  OP2(SLJIT_ADD, SLJIT_GENERAL_REG1, 0, SLJIT_GENERAL_REG1, 0, SLJIT_IMM, sizeof(sljit_w));  OP2(SLJIT_ADD, SLJIT_SAVED_REG1, 0, SLJIT_SAVED_REG1, 0, SLJIT_IMM, sizeof(sljit_w));
1836  /* Copy the integer value to the output buffer */  /* Copy the integer value to the output buffer */
1837  OP1(SLJIT_MOVU_SI, SLJIT_MEM1(SLJIT_TEMPORARY_REG3), sizeof(int), SLJIT_GENERAL_REG2, 0);  #ifdef COMPILE_PCRE16
1838    OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, 1);
1839    #endif
1840    OP1(SLJIT_MOVU_SI, SLJIT_MEM1(SLJIT_TEMPORARY_REG3), sizeof(int), SLJIT_SAVED_REG2, 0);
1841  OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_TEMPORARY_REG2, 0, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, 1);  OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_TEMPORARY_REG2, 0, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, 1);
1842  JUMPTO(SLJIT_C_NOT_ZERO, loop);  JUMPTO(SLJIT_C_NOT_ZERO, loop);
1843  JUMPHERE(earlyexit);  JUMPHERE(earlyexit);
# Line 1241  JUMPHERE(earlyexit); Line 1845  JUMPHERE(earlyexit);
1845  /* Calculate the return value, which is the maximum ovector value. */  /* Calculate the return value, which is the maximum ovector value. */
1846  if (topbracket > 1)  if (topbracket > 1)
1847    {    {
1848    OP2(SLJIT_ADD, SLJIT_TEMPORARY_REG1, 0, SLJIT_LOCALS_REG, 0, SLJIT_IMM, OVECTOR_START + topbracket * 2 * sizeof(sljit_w));    GET_LOCAL_BASE(SLJIT_TEMPORARY_REG1, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_w));
1849    OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, topbracket + 1);    OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, topbracket + 1);
1850    
1851    /* OVECTOR(0) is never equal to SLJIT_GENERAL_REG3. */    /* OVECTOR(0) is never equal to SLJIT_SAVED_REG3. */
1852    loop = LABEL();    loop = LABEL();
1853    OP1(SLJIT_MOVU, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), -(2 * sizeof(sljit_w)));    OP1(SLJIT_MOVU, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), -(2 * (sljit_w)sizeof(sljit_w)));
1854    OP2(SLJIT_SUB, SLJIT_TEMPORARY_REG2, 0, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, 1);    OP2(SLJIT_SUB, SLJIT_TEMPORARY_REG2, 0, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, 1);
1855    CMPTO(SLJIT_C_EQUAL, SLJIT_TEMPORARY_REG3, 0, SLJIT_GENERAL_REG3, 0, loop);    CMPTO(SLJIT_C_EQUAL, SLJIT_TEMPORARY_REG3, 0, SLJIT_SAVED_REG3, 0, loop);
1856    OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_TEMPORARY_REG2, 0);    OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_TEMPORARY_REG2, 0);
1857    }    }
1858  else  else
1859    OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);    OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
1860  }  }
1861    
1862  static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, uschar* cc)  static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *quit)
1863    {
1864    DEFINE_COMPILER;
1865    
1866    SLJIT_COMPILE_ASSERT(STR_END == SLJIT_SAVED_REG2, str_end_must_be_saved_reg2);
1867    SLJIT_ASSERT(common->start_used_ptr != 0 && (common->mode == JIT_PARTIAL_SOFT_COMPILE ? common->hit_start != 0 : common->hit_start == 0));
1868    
1869    OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG2, 0, ARGUMENTS, 0);
1870    OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_PARTIAL);
1871    OP1(SLJIT_MOV_SI, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), SLJIT_OFFSETOF(jit_arguments, offsetcount));
1872    CMPTO(SLJIT_C_LESS, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, 2, quit);
1873    
1874    /* Store match begin and end. */
1875    OP1(SLJIT_MOV, SLJIT_SAVED_REG1, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), SLJIT_OFFSETOF(jit_arguments, begin));
1876    OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG2, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), SLJIT_OFFSETOF(jit_arguments, offsets));
1877    OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_used_ptr : common->hit_start);
1878    OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, STR_END, 0, SLJIT_SAVED_REG1, 0);
1879    #ifdef COMPILE_PCRE16
1880    OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, 1);
1881    #endif
1882    OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), sizeof(int), SLJIT_SAVED_REG2, 0);
1883    
1884    OP2(SLJIT_SUB, SLJIT_TEMPORARY_REG3, 0, SLJIT_TEMPORARY_REG3, 0, SLJIT_SAVED_REG1, 0);
1885    #ifdef COMPILE_PCRE16
1886    OP2(SLJIT_ASHR, SLJIT_TEMPORARY_REG3, 0, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, 1);
1887    #endif
1888    OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), 0, SLJIT_TEMPORARY_REG3, 0);
1889    
1890    JUMPTO(SLJIT_JUMP, quit);
1891    }
1892    
1893    static SLJIT_INLINE void check_start_used_ptr(compiler_common *common)
1894    {
1895    /* May destroy TMP1. */
1896    DEFINE_COMPILER;
1897    struct sljit_jump *jump;
1898    
1899    if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
1900      {
1901      /* The value of -1 must be kept for start_used_ptr! */
1902      OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, 1);
1903      /* Jumps if start_used_ptr < STR_PTR, or start_used_ptr == -1. Although overwriting
1904      is not necessary if start_used_ptr == STR_PTR, it does not hurt as well. */
1905      jump = CMP(SLJIT_C_LESS_EQUAL, TMP1, 0, STR_PTR, 0);
1906      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
1907      JUMPHERE(jump);
1908      }
1909    else if (common->mode == JIT_PARTIAL_HARD_COMPILE)
1910      {
1911      jump = CMP(SLJIT_C_LESS_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
1912      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
1913      JUMPHERE(jump);
1914      }
1915    }
1916    
1917    static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, pcre_uchar* cc)
1918  {  {
1919  /* Detects if the character has an othercase. */  /* Detects if the character has an othercase. */
1920  unsigned int c;  unsigned int c;
1921    
1922  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
1923  if (common->utf8)  if (common->utf)
1924    {    {
1925    GETCHAR(c, cc);    GETCHAR(c, cc);
1926    if (c > 127)    if (c > 127)
# Line 1272  if (common->utf8) Line 1931  if (common->utf8)
1931      return FALSE;      return FALSE;
1932  #endif  #endif
1933      }      }
1934    #ifndef COMPILE_PCRE8
1935      return common->fcc[c] != c;
1936    #endif
1937    }    }
1938  else  else
1939  #endif  #endif
1940    c = *cc;    c = *cc;
1941  return common->fcc[c] != c;  return MAX_255(c) ? common->fcc[c] != c : FALSE;
1942  }  }
1943    
1944  static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)  static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)
1945  {  {
1946  /* Returns with the othercase. */  /* Returns with the othercase. */
1947  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
1948  if (common->utf8 && c > 127)  if (common->utf && c > 127)
1949    {    {
1950  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
1951    return UCD_OTHERCASE(c);    return UCD_OTHERCASE(c);
# Line 1292  if (common->utf8 && c > 127) Line 1954  if (common->utf8 && c > 127)
1954  #endif  #endif
1955    }    }
1956  #endif  #endif
1957  return common->fcc[c];  return TABLE_GET(c, common->fcc, c);
1958  }  }
1959    
1960  static unsigned int char_get_othercase_bit(compiler_common *common, uschar* cc)  static unsigned int char_get_othercase_bit(compiler_common *common, pcre_uchar* cc)
1961  {  {
1962  /* Detects if the character and its othercase has only 1 bit difference. */  /* Detects if the character and its othercase has only 1 bit difference. */
1963  unsigned int c, oc, bit;  unsigned int c, oc, bit;
1964  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF && defined COMPILE_PCRE8
1965  int n;  int n;
1966  #endif  #endif
1967    
1968  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
1969  if (common->utf8)  if (common->utf)
1970    {    {
1971    GETCHAR(c, cc);    GETCHAR(c, cc);
1972    if (c <= 127)    if (c <= 127)
# Line 1321  if (common->utf8) Line 1983  if (common->utf8)
1983  else  else
1984    {    {
1985    c = *cc;    c = *cc;
1986    oc = common->fcc[c];    oc = TABLE_GET(c, common->fcc, c);
1987    }    }
1988  #else  #else
1989  c = *cc;  c = *cc;
1990  oc = common->fcc[c];  oc = TABLE_GET(c, common->fcc, c);
1991  #endif  #endif
1992    
1993  SLJIT_ASSERT(c != oc);  SLJIT_ASSERT(c != oc);
# Line 1339  if (c <= 127 && bit == 0x20) Line 2001  if (c <= 127 && bit == 0x20)
2001  if (!ispowerof2(bit))  if (!ispowerof2(bit))
2002    return 0;    return 0;
2003    
2004  #ifdef SUPPORT_UTF8  #ifdef COMPILE_PCRE8
2005  if (common->utf8 && c > 127)  
2006    #ifdef SUPPORT_UTF
2007    if (common->utf && c > 127)
2008    {    {
2009    n = _pcre_utf8_table4[*cc & 0x3f];    n = GET_EXTRALEN(*cc);
2010    while ((bit & 0x3f) == 0)    while ((bit & 0x3f) == 0)
2011      {      {
2012      n--;      n--;
# Line 1350  if (common->utf8 && c > 127) Line 2014  if (common->utf8 && c > 127)
2014      }      }
2015    return (n << 8) | bit;    return (n << 8) | bit;
2016    }    }
2017  #endif  #endif /* SUPPORT_UTF */
2018  return (0 << 8) | bit;  return (0 << 8) | bit;
2019    
2020    #else /* COMPILE_PCRE8 */
2021    
2022    #ifdef COMPILE_PCRE16
2023    #ifdef SUPPORT_UTF
2024    if (common->utf && c > 65535)
2025      {
2026      if (bit >= (1 << 10))
2027        bit >>= 10;
2028      else
2029        return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8));
2030      }
2031    #endif /* SUPPORT_UTF */
2032    return (bit < 256) ? ((0 << 8) | bit) : ((1 << 8) | (bit >> 8));
2033    #endif /* COMPILE_PCRE16 */
2034    
2035    #endif /* COMPILE_PCRE8 */
2036  }  }
2037    
2038  static SLJIT_INLINE void check_input_end(compiler_common *common, jump_list **fallbacks)  static void check_partial(compiler_common *common, BOOL force)
2039  {  {
2040    /* Checks whether a partial matching is occured. Does not modify registers. */
2041  DEFINE_COMPILER;  DEFINE_COMPILER;
2042  add_jump(compiler, fallbacks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));  struct sljit_jump *jump = NULL;
2043    
2044    SLJIT_ASSERT(!force || common->mode != JIT_COMPILE);
2045    
2046    if (common->mode == JIT_COMPILE)
2047      return;
2048    
2049    if (!force)
2050      jump = CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2051    else if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2052      jump = CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, -1);
2053    
2054    if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2055      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, -1);
2056    else
2057      {
2058      if (common->partialmatchlabel != NULL)
2059        JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2060      else
2061        add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2062      }
2063    
2064    if (jump != NULL)
2065      JUMPHERE(jump);
2066    }
2067    
2068    static struct sljit_jump *check_str_end(compiler_common *common)
2069    {
2070    /* Does not affect registers. Usually used in a tight spot. */
2071    DEFINE_COMPILER;
2072    struct sljit_jump *jump;
2073    struct sljit_jump *nohit;
2074    struct sljit_jump *return_value;
2075    
2076    if (common->mode == JIT_COMPILE)
2077      return CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2078    
2079    jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
2080    if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2081      {
2082      nohit = CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2083      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, -1);
2084      JUMPHERE(nohit);
2085      return_value = JUMP(SLJIT_JUMP);
2086      }
2087    else
2088      {
2089      return_value = CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2090      if (common->partialmatchlabel != NULL)
2091        JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2092      else
2093        add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2094      }
2095    JUMPHERE(jump);
2096    return return_value;
2097    }
2098    
2099    static void detect_partial_match(compiler_common *common, jump_list **backtracks)
2100    {
2101    DEFINE_COMPILER;
2102    struct sljit_jump *jump;
2103    
2104    if (common->mode == JIT_COMPILE)
2105      {
2106      add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2107      return;
2108      }
2109    
2110    /* Partial matching mode. */
2111    jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
2112    add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0));
2113    if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2114      {
2115      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, -1);
2116      add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
2117      }
2118    else
2119      {
2120      if (common->partialmatchlabel != NULL)
2121        JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2122      else
2123        add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2124      }
2125    JUMPHERE(jump);
2126  }  }
2127    
2128  static void read_char(compiler_common *common)  static void read_char(compiler_common *common)
# Line 1365  static void read_char(compiler_common *c Line 2130  static void read_char(compiler_common *c
2130  /* Reads the character into TMP1, updates STR_PTR.  /* Reads the character into TMP1, updates STR_PTR.
2131  Does not check STR_END. TMP2 Destroyed. */  Does not check STR_END. TMP2 Destroyed. */
2132  DEFINE_COMPILER;  DEFINE_COMPILER;
2133  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2134  struct sljit_jump *jump;  struct sljit_jump *jump;
2135  #endif  #endif
2136    
2137  OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2138  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2139  if (common->utf8)  if (common->utf)
2140    {    {
2141    /* Should not found a value between 128 and 192 here. */  #ifdef COMPILE_PCRE8
2142    jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 192);    jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2143    add_jump(compiler, &common->utf8readchar, JUMP(SLJIT_FAST_CALL));  #else
2144    #ifdef COMPILE_PCRE16
2145      jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
2146    #endif
2147    #endif /* COMPILE_PCRE8 */
2148      add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2149    JUMPHERE(jump);    JUMPHERE(jump);
2150    }    }
2151  #endif  #endif
2152  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2153  }  }
2154    
2155  static void peek_char(compiler_common *common)  static void peek_char(compiler_common *common)
# Line 1387  static void peek_char(compiler_common *c Line 2157  static void peek_char(compiler_common *c
2157  /* Reads the character into TMP1, keeps STR_PTR.  /* Reads the character into TMP1, keeps STR_PTR.
2158  Does not check STR_END. TMP2 Destroyed. */  Does not check STR_END. TMP2 Destroyed. */
2159  DEFINE_COMPILER;  DEFINE_COMPILER;
2160  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2161  struct sljit_jump *jump;  struct sljit_jump *jump;
2162  #endif  #endif
2163    
2164  OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2165  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2166  if (common->utf8)  if (common->utf)
2167    {    {
2168    /* Should not found a value between 128 and 192 here. */  #ifdef COMPILE_PCRE8
2169    jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 192);    jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2170    add_jump(compiler, &common->utf8readchar, JUMP(SLJIT_FAST_CALL));  #else
2171    #ifdef COMPILE_PCRE16
2172      jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
2173    #endif
2174    #endif /* COMPILE_PCRE8 */
2175      add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2176    OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);    OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2177    JUMPHERE(jump);    JUMPHERE(jump);
2178    }    }
# Line 1408  static void read_char8_type(compiler_com Line 2183  static void read_char8_type(compiler_com
2183  {  {
2184  /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */  /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */
2185  DEFINE_COMPILER;  DEFINE_COMPILER;
2186  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF || defined COMPILE_PCRE16
2187  struct sljit_jump *jump;  struct sljit_jump *jump;
2188  #endif  #endif
2189    
2190  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2191  if (common->utf8)  if (common->utf)
2192    {    {
2193    OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2194    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2195    #ifdef COMPILE_PCRE8
2196    /* This can be an extra read in some situations, but hopefully    /* This can be an extra read in some situations, but hopefully
2197    it is a clever early read in most cases. */    it is needed in most cases. */
2198      OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2199      jump = CMP(SLJIT_C_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
2200      add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));
2201      JUMPHERE(jump);
2202    #else
2203    #ifdef COMPILE_PCRE16
2204      OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2205      jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2206    OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);    OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
   /* Should not found a value between 128 and 192 here. */  
   jump = CMP(SLJIT_C_LESS, TMP2, 0, SLJIT_IMM, 192);  
   add_jump(compiler, &common->utf8readtype8, JUMP(SLJIT_FAST_CALL));  
2207    JUMPHERE(jump);    JUMPHERE(jump);
2208      /* Skip low surrogate if necessary. */
2209      OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xfc00);
2210      OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0xd800);
2211      COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
2212      OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
2213      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2214    #endif
2215    #endif /* COMPILE_PCRE8 */
2216    return;    return;
2217    }    }
2218  #endif  #endif
2219  OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);  OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2220  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2221  OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);  #ifdef COMPILE_PCRE16
2222    /* The ctypes array contains only 256 values. */
2223    OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2224    jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2225    #endif
2226    OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2227    #ifdef COMPILE_PCRE16
2228    JUMPHERE(jump);
2229    #endif
2230  }  }
2231    
2232  static void skip_char_back(compiler_common *common)  static void skip_char_back(compiler_common *common)
2233  {  {
2234  /* Goes one character back. Only affects STR_PTR. Does not check begin. */  /* Goes one character back. Affects STR_PTR and TMP1. Does not check begin. */
2235  DEFINE_COMPILER;  DEFINE_COMPILER;
2236  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2237  struct sljit_label *label;  struct sljit_label *label;
2238    
2239  if (common->utf8)  if (common->utf)
2240    {    {
2241    label = LABEL();    label = LABEL();
2242    OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2243    OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);    OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2244    OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);    OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
2245    CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);    CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);
2246    return;    return;
2247    }    }
2248  #endif  #endif
2249  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);  #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2250    if (common->utf)
2251      {
2252      OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2253      OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2254      /* Skip low surrogate if necessary. */
2255      OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2256      OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
2257      COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
2258      OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2259      OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2260      return;
2261      }
2262    #endif
2263    OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2264  }  }
2265    
2266  static void check_newlinechar(compiler_common *common, int nltype, jump_list **fallbacks, BOOL jumpiftrue)  static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpiftrue)
2267  {  {
2268  /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */  /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */
2269  DEFINE_COMPILER;  DEFINE_COMPILER;
# Line 1460  DEFINE_COMPILER; Line 2271  DEFINE_COMPILER;
2271  if (nltype == NLTYPE_ANY)  if (nltype == NLTYPE_ANY)
2272    {    {
2273    add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));    add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
2274    add_jump(compiler, fallbacks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));    add_jump(compiler, backtracks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
2275    }    }
2276  else if (nltype == NLTYPE_ANYCRLF)  else if (nltype == NLTYPE_ANYCRLF)
2277    {    {
# Line 1468  else if (nltype == NLTYPE_ANYCRLF) Line 2279  else if (nltype == NLTYPE_ANYCRLF)
2279    COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);    COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
2280    OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);    OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
2281    COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);    COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
2282    add_jump(compiler, fallbacks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));    add_jump(compiler, backtracks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
2283    }    }
2284  else  else
2285    {    {
2286    SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline <= 255);    SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);
2287    add_jump(compiler, fallbacks, CMP(jumpiftrue ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));    add_jump(compiler, backtracks, CMP(jumpiftrue ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
2288    }    }
2289  }  }
2290    
2291  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2292  static void do_utf8readchar(compiler_common *common)  
2293    #ifdef COMPILE_PCRE8
2294    static void do_utfreadchar(compiler_common *common)
2295  {  {
2296  /* Fast decoding an utf8 character. TMP1 contains the first byte  /* Fast decoding a UTF-8 character. TMP1 contains the first byte
2297  of the character (>= 192). Return char value in TMP1, length - 1 in TMP2. */  of the character (>= 0xc0). Return char value in TMP1, length - 1 in TMP2. */
2298  DEFINE_COMPILER;  DEFINE_COMPILER;
2299  struct sljit_jump *jump;  struct sljit_jump *jump;
2300    
2301  sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);  sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2302  /* Searching for the first zero. */  /* Searching for the first zero. */
2303  OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);  OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
2304  jump = JUMP(SLJIT_C_NOT_ZERO);  jump = JUMP(SLJIT_C_NOT_ZERO);
2305  /* 2 byte sequence */  /* Two byte sequence. */
2306  OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(STR_PTR), 1);  OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2307  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2308  OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1f);  OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1f);
2309  OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);  OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2310  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2311  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2312  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
2313  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2314  JUMPHERE(jump);  JUMPHERE(jump);
2315    
2316  OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10);  OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10);
2317  jump = JUMP(SLJIT_C_NOT_ZERO);  jump = JUMP(SLJIT_C_NOT_ZERO);
2318  /* 3 byte sequence */  /* Three byte sequence. */
2319  OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(STR_PTR), 1);  OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2320  OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0f);  OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0f);
2321  OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 12);  OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 12);
2322  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2323  OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);  OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2324  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2325  OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(STR_PTR), 2);  OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2326  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 2);  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
2327  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2328  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2329  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 2);  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(2));
2330  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2331  JUMPHERE(jump);  JUMPHERE(jump);
2332    
2333  OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x08);  /* Four byte sequence. */
2334  jump = JUMP(SLJIT_C_NOT_ZERO);  OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
 /* 4 byte sequence */  
 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(STR_PTR), 1);  
2335  OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x07);  OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x07);
2336  OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 18);  OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 18);
2337  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2338  OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12);  OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12);
2339  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2340  OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(STR_PTR), 2);  OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2341  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2342  OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);  OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2343  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2344  OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(STR_PTR), 3);  OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(3));
2345  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 3);  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
2346  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2347  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2348  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 3);  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));
 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);  
 JUMPHERE(jump);  
   
 /* 5 byte sequence */  
 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(STR_PTR), 1);  
 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x03);  
 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 24);  
 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);  
 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 18);  
 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);  
 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(STR_PTR), 2);  
 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);  
 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12);  
 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);  
 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(STR_PTR), 3);  
 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);  
 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);  
 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);  
 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(STR_PTR), 4);  
 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 4);  
 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);  
 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);  
 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 4);  
2349  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2350  }  }
2351    
2352  static void do_utf8readtype8(compiler_common *common)  static void do_utfreadtype8(compiler_common *common)
2353  {  {
2354  /* Fast decoding an utf8 character type. TMP2 contains the first byte  /* Fast decoding a UTF-8 character type. TMP2 contains the first byte
2355  of the character (>= 192) and TMP1 is destroyed. Return value in TMP1. */  of the character (>= 0xc0). Return value in TMP1. */
2356  DEFINE_COMPILER;  DEFINE_COMPILER;
2357  struct sljit_jump *jump;  struct sljit_jump *jump;
2358  struct sljit_jump *compare;  struct sljit_jump *compare;
2359    
2360  sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);  sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2361    
2362  OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20);  OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20);
2363  jump = JUMP(SLJIT_C_NOT_ZERO);  jump = JUMP(SLJIT_C_NOT_ZERO);
2364  /* 2 byte sequence */  /* Two byte sequence. */
2365  OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2366  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2367  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
2368  OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);  OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2369  OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);  OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
# Line 1590  sljit_emit_fast_return(compiler, RETURN_ Line 2378  sljit_emit_fast_return(compiler, RETURN_
2378  JUMPHERE(jump);  JUMPHERE(jump);
2379    
2380  /* We only have types for characters less than 256. */  /* We only have types for characters less than 256. */
2381  OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_w)_pcre_utf8_char_sizes);  OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_w)PRIV(utf8_table4) - 0xc0);
 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);  
2382  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2383  OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);  OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2384  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2385  }  }
2386    
2387  #endif  #else /* COMPILE_PCRE8 */
2388    
2389    #ifdef COMPILE_PCRE16
2390    static void do_utfreadchar(compiler_common *common)
2391    {
2392    /* Fast decoding a UTF-16 character. TMP1 contains the first 16 bit char
2393    of the character (>= 0xd800). Return char value in TMP1, length - 1 in TMP2. */
2394    DEFINE_COMPILER;
2395    struct sljit_jump *jump;
2396    
2397    sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2398    jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xdc00);
2399    /* Do nothing, only return. */
2400    sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2401    
2402    JUMPHERE(jump);
2403    /* Combine two 16 bit characters. */
2404    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2405    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2406    OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
2407    OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 10);
2408    OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3ff);
2409    OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2410    OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
2411    OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
2412    sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2413    }
2414    #endif /* COMPILE_PCRE16 */
2415    
2416    #endif /* COMPILE_PCRE8 */
2417    
2418    #endif /* SUPPORT_UTF */
2419    
2420  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2421    
# Line 1613  DEFINE_COMPILER; Line 2431  DEFINE_COMPILER;
2431    
2432  SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 8);  SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 8);
2433    
2434  sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);  sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2435  OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);  OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
2436  OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_w)_pcre_ucd_stage1);  OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_w)PRIV(ucd_stage1));
2437  OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);  OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
2438  OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);  OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
2439  OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);  OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
2440  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_w)_pcre_ucd_stage2);  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_w)PRIV(ucd_stage2));
2441  OP1(SLJIT_MOV_UH, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);  OP1(SLJIT_MOV_UH, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
2442  OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_w)_pcre_ucd_records + SLJIT_OFFSETOF(ucd_record, chartype));  OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_w)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
2443  OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);  OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
2444  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2445  }  }
# Line 1635  struct sljit_label *newlinelabel = NULL; Line 2453  struct sljit_label *newlinelabel = NULL;
2453  struct sljit_jump *start;  struct sljit_jump *start;
2454  struct sljit_jump *end = NULL;  struct sljit_jump *end = NULL;
2455  struct sljit_jump *nl = NULL;  struct sljit_jump *nl = NULL;
2456    #ifdef SUPPORT_UTF
2457    struct sljit_jump *singlechar;
2458    #endif
2459  jump_list *newline = NULL;  jump_list *newline = NULL;
2460  BOOL newlinecheck = FALSE;  BOOL newlinecheck = FALSE;
2461  BOOL readbyte = FALSE;  BOOL readuchar = FALSE;
2462    
2463  if (!(hascrorlf || firstline) && (common->nltype == NLTYPE_ANY ||  if (!(hascrorlf || firstline) && (common->nltype == NLTYPE_ANY ||
2464      common->nltype == NLTYPE_ANYCRLF || common->newline > 255))      common->nltype == NLTYPE_ANYCRLF || common->newline > 255))
# Line 1646  if (!(hascrorlf || firstline) && (common Line 2467  if (!(hascrorlf || firstline) && (common
2467  if (firstline)  if (firstline)
2468    {    {
2469    /* Search for the end of the first line. */    /* Search for the end of the first line. */
2470    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STR_PTR, 0);    SLJIT_ASSERT(common->first_line_end != 0);
2471    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), FIRSTLINE_END, STR_END, 0);    OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
2472    
2473    if (common->nltype == NLTYPE_FIXED && common->newline > 255)    if (common->nltype == NLTYPE_FIXED && common->newline > 255)
2474      {      {
2475      mainloop = LABEL();      mainloop = LABEL();
2476      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2477      end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);      end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2478      OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), -1);      OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
2479      OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);      OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2480      CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);      CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);
2481      CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);      CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);
2482      OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), FIRSTLINE_END, STR_PTR, 0, SLJIT_IMM, 1);      JUMPHERE(end);
2483        OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2484      }      }
2485    else    else
2486      {      {
2487      end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);      end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2488      mainloop = LABEL();      mainloop = LABEL();
2489      /* Continual stores does not cause data dependency. */      /* Continual stores does not cause data dependency. */
2490      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), FIRSTLINE_END, STR_PTR, 0);      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0);
2491      read_char(common);      read_char(common);
2492      check_newlinechar(common, common->nltype, &newline, TRUE);      check_newlinechar(common, common->nltype, &newline, TRUE);
2493      CMPTO(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0, mainloop);      CMPTO(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0, mainloop);
2494      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), FIRSTLINE_END, STR_PTR, 0);      JUMPHERE(end);
2495        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0);
2496      set_jumps(newline, LABEL());      set_jumps(newline, LABEL());
2497      }      }
2498    
2499    JUMPHERE(end);    OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);  
2500    }    }
2501    
2502  start = JUMP(SLJIT_JUMP);  start = JUMP(SLJIT_JUMP);
# Line 1682  start = JUMP(SLJIT_JUMP); Line 2504  start = JUMP(SLJIT_JUMP);
2504  if (newlinecheck)  if (newlinecheck)
2505    {    {
2506    newlinelabel = LABEL();    newlinelabel = LABEL();
2507    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2508    end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);    end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2509    OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2510    OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff);    OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
2511    COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);    COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
2512    #ifdef COMPILE_PCRE16
2513      OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2514    #endif
2515    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2516    nl = JUMP(SLJIT_JUMP);    nl = JUMP(SLJIT_JUMP);
2517    }    }
# Line 1694  if (newlinecheck) Line 2519  if (newlinecheck)
2519  mainloop = LABEL();  mainloop = LABEL();
2520    
2521  /* Increasing the STR_PTR here requires one less jump in the most common case. */  /* Increasing the STR_PTR here requires one less jump in the most common case. */
2522  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2523  if (common->utf8) readbyte = TRUE;  if (common->utf) readuchar = TRUE;
2524  #endif  #endif
2525  if (newlinecheck) readbyte = TRUE;  if (newlinecheck) readuchar = TRUE;
2526    
2527  if (readbyte)  if (readuchar)
2528    OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2529    
2530  if (newlinecheck)  if (newlinecheck)
2531    CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);    CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);
2532    
2533  #ifdef SUPPORT_UTF8  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2534  if (common->utf8)  #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2535    if (common->utf)
2536    {    {
2537    OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)_pcre_utf8_char_sizes);    singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2538      OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0);
2539    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2540      JUMPHERE(singlechar);
2541      }
2542    #endif
2543    #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2544    if (common->utf)
2545      {
2546      singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
2547      OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2548      OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2549      COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
2550      OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2551      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2552      JUMPHERE(singlechar);
2553    }    }
 else  
   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);  
 #else  
 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);  
2554  #endif  #endif
2555  JUMPHERE(start);  JUMPHERE(start);
2556    
# Line 1727  if (newlinecheck) Line 2563  if (newlinecheck)
2563  return mainloop;  return mainloop;
2564  }  }
2565    
2566  static SLJIT_INLINE void fast_forward_first_byte(compiler_common *common, pcre_uint16 firstbyte, BOOL firstline)  static SLJIT_INLINE BOOL fast_forward_first_two_chars(compiler_common *common, BOOL firstline)
2567  {  {
2568  DEFINE_COMPILER;  DEFINE_COMPILER;
2569  struct sljit_label *start;  struct sljit_label *start;
2570  struct sljit_jump *leave;  struct sljit_jump *quit;
2571  struct sljit_jump *found;  struct sljit_jump *found;
2572  pcre_uint16 oc, bit;  pcre_int32 chars[4];
2573    pcre_uchar *cc = common->start + 1 + IMM2_SIZE;
2574    int location = 0;
2575    pcre_int32 len, c, bit, caseless;
2576    BOOL must_end;
2577    
2578    #ifdef COMPILE_PCRE8
2579    union {
2580        sljit_uh ascombined;
2581        sljit_ub asuchars[2];
2582    } pair;
2583    #else
2584    union {
2585        sljit_ui ascombined;
2586        sljit_uh asuchars[2];
2587    } pair;
2588    #endif
2589    
2590    if (*(common->start + GET(common->start, 1)) == OP_ALT)
2591      return FALSE;
2592    
2593    while (TRUE)
2594      {
2595      caseless = 0;
2596      must_end = TRUE;
2597      switch(*cc)
2598        {
2599        case OP_CHAR:
2600        must_end = FALSE;
2601        cc++;
2602        break;
2603    
2604        case OP_CHARI:
2605        caseless = 1;
2606        must_end = FALSE;
2607        cc++;
2608        break;
2609    
2610        case OP_SOD:
2611        case OP_SOM:
2612        case OP_SET_SOM:
2613        case OP_NOT_WORD_BOUNDARY:
2614        case OP_WORD_BOUNDARY:
2615        case OP_EODN:
2616        case OP_EOD:
2617        case OP_CIRC:
2618        case OP_CIRCM:
2619        case OP_DOLL:
2620        case OP_DOLLM:
2621        /* Zero width assertions. */
2622        cc++;
2623        continue;
2624    
2625        case OP_PLUS:
2626        case OP_MINPLUS:
2627        case OP_POSPLUS:
2628        cc++;
2629        break;
2630    
2631        case OP_EXACT:
2632        cc += 1 + IMM2_SIZE;
2633        break;
2634    
2635        case OP_PLUSI:
2636        case OP_MINPLUSI:
2637        case OP_POSPLUSI:
2638        caseless = 1;
2639        cc++;
2640        break;
2641    
2642        case OP_EXACTI:
2643        caseless = 1;
2644        cc += 1 + IMM2_SIZE;
2645        break;
2646    
2647        default:
2648        return FALSE;
2649        }
2650    
2651      len = 1;
2652    #ifdef SUPPORT_UTF
2653      if (common->utf && HAS_EXTRALEN(cc[0])) len += GET_EXTRALEN(cc[0]);
2654    #endif
2655    
2656      if (caseless && char_has_othercase(common, cc))
2657        {
2658        caseless = char_get_othercase_bit(common, cc);
2659        if (caseless == 0)
2660          return FALSE;
2661    #ifdef COMPILE_PCRE8
2662        caseless = ((caseless & 0xff) << 8) | (len - (caseless >> 8));
2663    #else
2664        if ((caseless & 0x100) != 0)
2665          caseless = ((caseless & 0xff) << 16) | (len - (caseless >> 9));
2666        else
2667          caseless = ((caseless & 0xff) << 8) | (len - (caseless >> 9));
2668    #endif
2669        }
2670      else
2671        caseless = 0;
2672    
2673      while (len > 0 && location < 2 * 2)
2674        {
2675        c = *cc;
2676        bit = 0;
2677        if (len == (caseless & 0xff))
2678          {
2679          bit = caseless >> 8;
2680          c |= bit;
2681          }
2682    
2683        chars[location] = c;
2684        chars[location + 1] = bit;
2685    
2686        len--;
2687        location += 2;
2688        cc++;
2689        }
2690    
2691      if (location == 2 * 2)
2692        break;
2693      else if (must_end)
2694        return FALSE;
2695      }
2696    
2697  if (firstline)  if (firstline)
2698    {    {
2699    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, STR_END, 0);    SLJIT_ASSERT(common->first_line_end != 0);
2700    OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), FIRSTLINE_END);    OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
2701      OP2(SLJIT_SUB, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, SLJIT_IMM, 1);
2702    }    }
2703    else
2704      OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, 1);
2705    
2706  start = LABEL();  start = LABEL();
2707  leave = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);  quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2708  OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);  #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
2709    #ifdef COMPILE_PCRE8
2710    OP1(SLJIT_MOV_UH, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2711    #else /* COMPILE_PCRE8 */
2712    OP1(SLJIT_MOV_UI, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2713    #endif
2714    
2715    #else /* SLJIT_UNALIGNED */
2716    
2717    #if defined SLJIT_BIG_ENDIAN && SLJIT_BIG_ENDIAN
2718    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2719    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2720    #else /* SLJIT_BIG_ENDIAN */
2721    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2722    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2723    #endif /* SLJIT_BIG_ENDIAN */
2724    
2725    #ifdef COMPILE_PCRE8
2726    OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 8);
2727    #else /* COMPILE_PCRE8 */
2728    OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 16);
2729    #endif
2730    OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2731    
2732    #endif
2733    
2734    if (chars[1] != 0 || chars[3] != 0)
2735      {
2736      pair.asuchars[0] = chars[1];
2737      pair.asuchars[1] = chars[3];
2738      OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, pair.ascombined);
2739      }
2740    
2741    pair.asuchars[0] = chars[0];
2742    pair.asuchars[1] = chars[2];
2743    found = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, pair.ascombined);
2744    
2745    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2746    JUMPTO(SLJIT_JUMP, start);
2747    JUMPHERE(found);
2748    JUMPHERE(quit);
2749    
2750  if ((firstbyte & REQ_CASELESS) == 0)  if (firstline)
2751    found = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, firstbyte & 0xff);    OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
2752  else  else
2753      OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, 1);
2754    return TRUE;
2755    }
2756    
2757    static SLJIT_INLINE void fast_forward_first_char(compiler_common *common, pcre_uchar first_char, BOOL caseless, BOOL firstline)
2758    {
2759    DEFINE_COMPILER;
2760    struct sljit_label *start;
2761    struct sljit_jump *quit;
2762    struct sljit_jump *found;
2763    pcre_uchar oc, bit;
2764    
2765    if (firstline)
2766    {    {
2767    firstbyte &= 0xff;    SLJIT_ASSERT(common->first_line_end != 0);
2768    oc = common->fcc[firstbyte];    OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
2769    bit = firstbyte ^ oc;    OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
2770      }
2771    
2772    start = LABEL();
2773    quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2774    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2775    
2776    oc = first_char;
2777    if (caseless)
2778      {
2779      oc = TABLE_GET(first_char, common->fcc, first_char);
2780    #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
2781      if (first_char > 127 && common->utf)
2782        oc = UCD_OTHERCASE(first_char);
2783    #endif
2784      }
2785    if (first_char == oc)
2786      found = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, first_char);
2787    else
2788      {
2789      bit = first_char ^ oc;
2790    if (ispowerof2(bit))    if (ispowerof2(bit))
2791      {      {
2792      OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, bit);      OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, bit);
2793      found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, firstbyte | bit);      found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, first_char | bit);
2794      }      }
2795    else    else
2796      {      {
2797      OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, firstbyte);      OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, first_char);
2798      COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);      COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
2799      OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);      OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);
2800      COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);      COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
# Line 1767  else Line 2802  else
2802      }      }
2803    }    }
2804    
2805  #ifdef SUPPORT_UTF8  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
 if (common->utf8)  
   {  
   OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)_pcre_utf8_char_sizes);  
   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);  
   }  
 else  
   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);  
 #else  
 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);  
 #endif  
2806  JUMPTO(SLJIT_JUMP, start);  JUMPTO(SLJIT_JUMP, start);
2807  JUMPHERE(found);  JUMPHERE(found);
2808  JUMPHERE(leave);  JUMPHERE(quit);
2809    
2810  if (firstline)  if (firstline)
2811    OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);    OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
2812  }  }
2813    
2814  static SLJIT_INLINE void fast_forward_newline(compiler_common *common, BOOL firstline)  static SLJIT_INLINE void fast_forward_newline(compiler_common *common, BOOL firstline)
# Line 1792  DEFINE_COMPILER; Line 2817  DEFINE_COMPILER;
2817  struct sljit_label *loop;  struct sljit_label *loop;
2818  struct sljit_jump *lastchar;  struct sljit_jump *lastchar;
2819  struct sljit_jump *firstchar;  struct sljit_jump *firstchar;
2820  struct sljit_jump *leave;  struct sljit_jump *quit;
2821  struct sljit_jump *foundcr = NULL;  struct sljit_jump *foundcr = NULL;
2822  struct sljit_jump *notfoundnl;  struct sljit_jump *notfoundnl;
2823  jump_list *newline = NULL;  jump_list *newline = NULL;
2824    
2825  if (firstline)  if (firstline)
2826    {    {
2827    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, STR_END, 0);    SLJIT_ASSERT(common->first_line_end != 0);
2828    OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), FIRSTLINE_END);    OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
2829      OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
2830    }    }
2831    
2832  if (common->nltype == NLTYPE_FIXED && common->newline > 255)  if (common->nltype == NLTYPE_FIXED && common->newline > 255)
# Line 1811  if (common->nltype == NLTYPE_FIXED && co Line 2837  if (common->nltype == NLTYPE_FIXED && co
2837    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
2838    firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);    firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
2839    
2840    OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2);    OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2));
2841    OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0);    OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0);
2842    COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_GREATER_EQUAL);    COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_GREATER_EQUAL);
2843    #ifdef COMPILE_PCRE16
2844      OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
2845    #endif
2846    OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);    OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2847    
2848    loop = LABEL();    loop = LABEL();
2849    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2850    leave = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);    quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2851    OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), -2);    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
2852    OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(STR_PTR), -1);    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
2853    CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop);    CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop);
2854    CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop);    CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop);
2855    
2856    JUMPHERE(leave);    JUMPHERE(quit);
2857    JUMPHERE(firstchar);    JUMPHERE(firstchar);
2858    JUMPHERE(lastchar);    JUMPHERE(lastchar);
2859    
# Line 1848  set_jumps(newline, loop); Line 2877  set_jumps(newline, loop);
2877    
2878  if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)  if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
2879    {    {
2880    leave = JUMP(SLJIT_JUMP);    quit = JUMP(SLJIT_JUMP);
2881    JUMPHERE(foundcr);    JUMPHERE(foundcr);
2882    notfoundnl = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);    notfoundnl = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2883    OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2884    OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);    OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
2885    COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);    COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
2886    #ifdef COMPILE_PCRE16
2887      OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2888    #endif
2889    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2890    JUMPHERE(notfoundnl);    JUMPHERE(notfoundnl);
2891    JUMPHERE(leave);    JUMPHERE(quit);
2892    }    }
2893  JUMPHERE(lastchar);  JUMPHERE(lastchar);
2894  JUMPHERE(firstchar);  JUMPHERE(firstchar);
2895    
2896  if (firstline)  if (firstline)
2897    OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);    OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
2898  }  }
2899    
2900  static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, sljit_uw start_bits, BOOL firstline)  static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, sljit_uw start_bits, BOOL firstline)
2901  {  {
2902  DEFINE_COMPILER;  DEFINE_COMPILER;
2903  struct sljit_label *start;  struct sljit_label *start;
2904  struct sljit_jump *leave;  struct sljit_jump *quit;
2905  struct sljit_jump *found;  struct sljit_jump *found;
2906    #ifndef COMPILE_PCRE8
2907    struct sljit_jump *jump;
2908    #endif
2909    
2910  if (firstline)  if (firstline)
2911    {    {
2912    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, STR_END, 0);    SLJIT_ASSERT(common->first_line_end != 0);
2913    OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), FIRSTLINE_END);    OP1(SLJIT_MOV, RETURN_ADDR, 0, STR_END, 0);
2914      OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
2915    }    }
2916    
2917  start = LABEL();  start = LABEL();
2918  leave = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);  quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2919  OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2920  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2921  if (common->utf8)  if (common->utf)
2922    OP1(SLJIT_MOV_UB, TMP3, 0, SLJIT_MEM1(TMP1), (sljit_w)_pcre_utf8_char_sizes);    OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
2923    #endif
2924    #ifndef COMPILE_PCRE8
2925    jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 255);
2926    OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 255);
2927    JUMPHERE(jump);
2928  #endif  #endif
2929  OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);  OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
2930  OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);  OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
# Line 1892  OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TM Line 2933  OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TM
2933  OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);  OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
2934  found = JUMP(SLJIT_C_NOT_ZERO);  found = JUMP(SLJIT_C_NOT_ZERO);
2935    
2936  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2937  if (common->utf8)  if (common->utf)
2938    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP3, 0);    OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
2939  else  #endif
2940    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2941  #else  #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2942  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);  if (common->utf)
2943      {
2944      CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start);
2945      OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0);
2946      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2947      }
2948    #endif
2949    #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2950    if (common->utf)
2951      {
2952      CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800, start);
2953      OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2954      OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2955      COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
2956      OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2957      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2958      }
2959  #endif  #endif
2960  JUMPTO(SLJIT_JUMP, start);  JUMPTO(SLJIT_JUMP, start);
2961  JUMPHERE(found);  JUMPHERE(found);
2962  JUMPHERE(leave);  JUMPHERE(quit);
2963    
2964  if (firstline)  if (firstline)
2965    OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);    OP1(SLJIT_MOV, STR_END, 0, RETURN_ADDR, 0);
2966  }  }
2967    
2968  static SLJIT_INLINE struct sljit_jump *search_requested_char(compiler_common *common, pcre_uint16 reqbyte, BOOL has_firstbyte)  static SLJIT_INLINE struct sljit_jump *search_requested_char(compiler_common *common, pcre_uchar req_char, BOOL caseless, BOOL has_firstchar)
2969  {  {
2970  DEFINE_COMPILER;  DEFINE_COMPILER;
2971  struct sljit_label *loop;  struct sljit_label *loop;
# Line 1917  struct sljit_jump *alreadyfound; Line 2974  struct sljit_jump *alreadyfound;
2974  struct sljit_jump *found;  struct sljit_jump *found;
2975  struct sljit_jump *foundoc = NULL;  struct sljit_jump *foundoc = NULL;
2976  struct sljit_jump *notfound;  struct sljit_jump *notfound;
2977  pcre_uint16 oc, bit;  pcre_uchar oc, bit;
2978    
2979  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), REQ_BYTE_PTR);  SLJIT_ASSERT(common->req_char_ptr != 0);
2980    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr);
2981  OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, REQ_BYTE_MAX);  OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, REQ_BYTE_MAX);
2982  toolong = CMP(SLJIT_C_LESS, TMP1, 0, STR_END, 0);  toolong = CMP(SLJIT_C_LESS, TMP1, 0, STR_END, 0);
2983  alreadyfound = CMP(SLJIT_C_LESS, STR_PTR, 0, TMP2, 0);  alreadyfound = CMP(SLJIT_C_LESS, STR_PTR, 0, TMP2, 0);
2984    
2985  if (has_firstbyte)  if (has_firstchar)
2986    OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, 1);    OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2987  else  else
2988    OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);    OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);
2989    
2990  loop = LABEL();  loop = LABEL();
2991  notfound = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, STR_END, 0);  notfound = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, STR_END, 0);
2992    
2993  OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), 0);  OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0);
2994  if ((reqbyte & REQ_CASELESS) == 0)  oc = req_char;
2995    found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, reqbyte & 0xff);  if (caseless)
2996      {
2997      oc = TABLE_GET(req_char, common->fcc, req_char);
2998    #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
2999      if (req_char > 127 && common->utf)
3000        oc = UCD_OTHERCASE(req_char);
3001    #endif
3002      }
3003    if (req_char == oc)
3004      found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
3005  else  else
3006    {    {
3007    reqbyte &= 0xff;    bit = req_char ^ oc;
   oc = common->fcc[reqbyte];  
   bit = reqbyte ^ oc;  
3008    if (ispowerof2(bit))    if (ispowerof2(bit))
3009      {      {
3010      OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);      OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);
3011      found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, reqbyte | bit);      found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit);
3012      }      }
3013    else    else
3014      {      {
3015      found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, reqbyte);      found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
3016      foundoc = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, oc);      foundoc = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, oc);
3017      }      }
3018    }    }
3019  OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);  OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
3020  JUMPTO(SLJIT_JUMP, loop);  JUMPTO(SLJIT_JUMP, loop);
3021    
3022  JUMPHERE(found);  JUMPHERE(found);
3023  if (foundoc)  if (foundoc)
3024    JUMPHERE(foundoc);    JUMPHERE(foundoc);
3025  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), REQ_BYTE_PTR, TMP1, 0);  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr, TMP1, 0);
3026  JUMPHERE(alreadyfound);  JUMPHERE(alreadyfound);
3027  JUMPHERE(toolong);  JUMPHERE(toolong);
3028  return notfound;  return notfound;
# Line 1966  return notfound; Line 3031  return notfound;
3031  static void do_revertframes(compiler_common *common)  static void do_revertframes(compiler_common *common)
3032  {  {
3033  DEFINE_COMPILER;  DEFINE_COMPILER;
 struct sljit_jump *earlyexit;  
3034  struct sljit_jump *jump;  struct sljit_jump *jump;
3035  struct sljit_label *mainloop;  struct sljit_label *mainloop;
3036    
3037  sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);  sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3038  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS_HEAD);  OP1(SLJIT_MOV, TMP1, 0, STACK_TOP, 0);
3039    GET_LOCAL_BASE(TMP3, 0, 0);
3040    
3041  /* Drop frames until we reach STACK_TOP. */  /* Drop frames until we reach STACK_TOP. */
 earlyexit = CMP(SLJIT_C_LESS, TMP1, 0, STACK_TOP, 0);  
3042  mainloop = LABEL();  mainloop = LABEL();
3043  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), 0);  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), 0);
3044  jump = CMP(SLJIT_C_SIG_LESS_EQUAL, TMP2, 0, SLJIT_IMM, frame_end);  jump = CMP(SLJIT_C_SIG_LESS_EQUAL, TMP2, 0, SLJIT_IMM, frame_end);
3045  OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_LOCALS_REG, 0);  OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
3046  OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_w));  OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_w));
3047  OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_w), SLJIT_MEM1(TMP1), 2 * sizeof(sljit_w));  OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_w), SLJIT_MEM1(TMP1), 2 * sizeof(sljit_w));
3048  OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_w));  OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_w));
# Line 1987  JUMPTO(SLJIT_JUMP, mainloop); Line 3051  JUMPTO(SLJIT_JUMP, mainloop);
3051  JUMPHERE(jump);  JUMPHERE(jump);
3052  jump = CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, frame_end);  jump = CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, frame_end);
3053  /* End of dropping frames. */  /* End of dropping frames. */
 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), sizeof(sljit_w));  
 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS_HEAD, TMP1, 0);  
 CMPTO(SLJIT_C_GREATER_EQUAL, TMP1, 0, STACK_TOP, 0, mainloop);  
 JUMPHERE(earlyexit);  
3054  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3055    
3056  JUMPHERE(jump);  JUMPHERE(jump);
# Line 2002  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_R Line 3062  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_R
3062  JUMPTO(SLJIT_JUMP, mainloop);  JUMPTO(SLJIT_JUMP, mainloop);
3063    
3064  JUMPHERE(jump);  JUMPHERE(jump);
3065    if (common->mark_ptr != 0)
3066      {
3067      jump = CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, frame_setmark);
3068      OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), sizeof(sljit_w));
3069      OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_w));
3070      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr, TMP2, 0);
3071      JUMPTO(SLJIT_JUMP, mainloop);
3072    
3073      JUMPHERE(jump);
3074      }
3075    
3076  /* Unknown command. */  /* Unknown command. */
3077  OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_w));  OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_w));
3078  JUMPTO(SLJIT_JUMP, mainloop);  JUMPTO(SLJIT_JUMP, mainloop);
# Line 2010  JUMPTO(SLJIT_JUMP, mainloop); Line 3081  JUMPTO(SLJIT_JUMP, mainloop);
3081  static void check_wordboundary(compiler_common *common)  static void check_wordboundary(compiler_common *common)
3082  {  {
3083  DEFINE_COMPILER;  DEFINE_COMPILER;
3084  struct sljit_jump *beginend;  struct sljit_jump *skipread;
3085  #ifdef SUPPORT_UTF8  #if !(defined COMPILE_PCRE8) || defined SUPPORT_UTF
3086  struct sljit_jump *jump;  struct sljit_jump *jump;
3087  #endif  #endif
3088    
3089  SLJIT_ASSERT(ctype_word == 0x10);  SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16);
3090    
3091  sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, 1, 5, 5, common->localsize);  sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3092  /* Get type of the previous char, and put it to LOCALS1. */  /* Get type of the previous char, and put it to LOCALS1. */
3093  OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);  OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3094  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
3095  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, 0);  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, 0);
3096  beginend = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP1, 0);  skipread = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
3097  skip_char_back(common);  skip_char_back(common);
3098    check_start_used_ptr(common);
3099  read_char(common);  read_char(common);
3100    
3101  /* Testing char type. */  /* Testing char type. */
3102  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
3103  if (common->useucp)  if (common->use_ucp)
3104    {    {
3105    OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);    OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
3106    jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);    jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
# Line 2045  if (common->useucp) Line 3117  if (common->useucp)
3117  else  else
3118  #endif  #endif
3119    {    {
3120  #ifdef SUPPORT_UTF8  #ifndef COMPILE_PCRE8
3121      jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3122    #elif defined SUPPORT_UTF
3123    /* Here LOCALS1 has already been zeroed. */    /* Here LOCALS1 has already been zeroed. */
3124    jump = NULL;    jump = NULL;
3125    if (common->utf8)    if (common->utf)
3126      jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);      jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3127  #endif  #endif /* COMPILE_PCRE8 */
3128    OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);    OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);
3129    OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */);    OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */);
3130    OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);    OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3131    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);
3132  #ifdef SUPPORT_UTF8  #ifndef COMPILE_PCRE8
3133      JUMPHERE(jump);
3134    #elif defined SUPPORT_UTF
3135    if (jump != NULL)    if (jump != NULL)
3136      JUMPHERE(jump);      JUMPHERE(jump);
3137  #endif  #endif /* COMPILE_PCRE8 */
3138    }    }
3139  JUMPHERE(beginend);  JUMPHERE(skipread);
3140    
3141  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3142  beginend = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);  skipread = check_str_end(common);
3143  peek_char(common);  peek_char(common);
3144    
3145  /* Testing char type. This is a code duplication. */  /* Testing char type. This is a code duplication. */
3146  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
3147  if (common->useucp)  if (common->use_ucp)
3148    {    {
3149    OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);    OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
3150    jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);    jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
# Line 2084  if (common->useucp) Line 3160  if (common->useucp)
3160  else  else
3161  #endif  #endif
3162    {    {
3163  #ifdef SUPPORT_UTF8  #ifndef COMPILE_PCRE8
3164      /* TMP2 may be destroyed by peek_char. */
3165      OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3166      jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3167    #elif defined SUPPORT_UTF
3168    OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);    OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3169    jump = NULL;    jump = NULL;
3170    if (common->utf8)    if (common->utf)
3171      jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);      jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3172  #endif  #endif
3173    OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes);    OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes);
3174    OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */);    OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */);
3175    OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);    OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
3176  #ifdef SUPPORT_UTF8  #ifndef COMPILE_PCRE8
3177      JUMPHERE(jump);
3178    #elif defined SUPPORT_UTF
3179    if (jump != NULL)    if (jump != NULL)
3180      JUMPHERE(jump);      JUMPHERE(jump);
3181  #endif  #endif /* COMPILE_PCRE8 */
3182      }
3183    JUMPHERE(skipread);
3184    
3185    OP2(SLJIT_XOR | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
3186    sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3187    }
3188    
3189    /*
3190      range format:
3191    
3192      ranges[0] = length of the range (max MAX_RANGE_SIZE, -1 means invalid range).
3193      ranges[1] = first bit (0 or 1)
3194      ranges[2-length] = position of the bit change (when the current bit is not equal to the previous)
3195    */
3196    
3197    static BOOL check_ranges(compiler_common *common, int *ranges, jump_list **backtracks, BOOL readch)
3198    {
3199    DEFINE_COMPILER;
3200    struct sljit_jump *jump;
3201    
3202    if (ranges[0] < 0)
3203      return FALSE;
3204    
3205    switch(ranges[0])
3206      {
3207      case 1:
3208      if (readch)
3209        read_char(common);
3210      add_jump(compiler, backtracks, CMP(ranges[1] == 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
3211      return TRUE;
3212    
3213      case 2:
3214      if (readch)
3215        read_char(common);
3216      OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);
3217      add_jump(compiler, backtracks, CMP(ranges[1] != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
3218      return TRUE;
3219    
3220      case 4:
3221      if (ranges[2] + 1 == ranges[3] && ranges[4] + 1 == ranges[5])
3222        {
3223        if (readch)
3224          read_char(common);
3225        if (ranges[1] != 0)
3226          {
3227          add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
3228          add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[4]));
3229          }
3230        else
3231          {
3232          jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]);
3233          add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[4]));
3234          JUMPHERE(jump);
3235          }
3236        return TRUE;
3237        }
3238      if ((ranges[3] - ranges[2]) == (ranges[5] - ranges[4]) && ispowerof2(ranges[4] - ranges[2]))
3239        {
3240        if (readch)
3241          read_char(common);
3242        OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[4] - ranges[2]);
3243        OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[4]);
3244        add_jump(compiler, backtracks, CMP(ranges[1] != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[5] - ranges[4]));
3245        return TRUE;
3246        }
3247      return FALSE;
3248    
3249      default:
3250      return FALSE;
3251      }
3252    }
3253    
3254    static void get_ctype_ranges(compiler_common *common, int flag, int *ranges)
3255    {
3256    int i, bit, length;
3257    const pcre_uint8 *ctypes = (const pcre_uint8*)common->ctypes;
3258    
3259    bit = ctypes[0] & flag;
3260    ranges[0] = -1;
3261    ranges[1] = bit != 0 ? 1 : 0;
3262    length = 0;
3263    
3264    for (i = 1; i < 256; i++)
3265      if ((ctypes[i] & flag) != bit)
3266        {
3267        if (length >= MAX_RANGE_SIZE)
3268          return;
3269        ranges[2 + length] = i;
3270        length++;
3271        bit ^= flag;
3272        }
3273    
3274    if (bit != 0)
3275      {
3276      if (length >= MAX_RANGE_SIZE)
3277        return;
3278      ranges[2 + length] = 256;
3279      length++;
3280      }
3281    ranges[0] = length;
3282    }
3283    
3284    static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, jump_list **backtracks)
3285    {
3286    int ranges[2 + MAX_RANGE_SIZE];
3287    pcre_uint8 bit, cbit, all;
3288    int i, byte, length = 0;
3289    
3290    bit = bits[0] & 0x1;
3291    ranges[1] = bit;
3292    /* Can be 0 or 255. */
3293    all = -bit;
3294    
3295    for (i = 0; i < 256; )
3296      {
3297      byte = i >> 3;
3298      if ((i & 0x7) == 0 && bits[byte] == all)
3299        i += 8;
3300      else
3301        {
3302        cbit = (bits[byte] >> (i & 0x7)) & 0x1;
3303        if (cbit != bit)
3304          {
3305          if (length >= MAX_RANGE_SIZE)
3306            return FALSE;
3307          ranges[2 + length] = i;
3308          length++;
3309          bit = cbit;
3310          all = -cbit;
3311          }
3312        i++;
3313        }
3314      }
3315    
3316    if (((bit == 0) && nclass) || ((bit == 1) && !nclass))
3317      {
3318      if (length >= MAX_RANGE_SIZE)
3319        return FALSE;
3320      ranges[2 + length] = 256;
3321      length++;
3322    }    }
3323  JUMPHERE(beginend);  ranges[0] = length;
3324    
3325  OP2(SLJIT_XOR | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);  return check_ranges(common, ranges, backtracks, FALSE);
 sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);  
3326  }  }
3327    
3328  static void check_anynewline(compiler_common *common)  static void check_anynewline(compiler_common *common)
# Line 2109  static void check_anynewline(compiler_co Line 3330  static void check_anynewline(compiler_co
3330  /* Check whether TMP1 contains a newline character. TMP2 destroyed. */  /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
3331  DEFINE_COMPILER;  DEFINE_COMPILER;
3332    
3333  sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);  sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3334    
3335  OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);  OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
3336  OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);  OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
3337  COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);  COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);
3338  OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);  OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
3339  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF || defined COMPILE_PCRE16
3340  if (common->utf8)  #ifdef COMPILE_PCRE8
3341    if (common->utf)
3342    {    {
3343    #endif
3344    COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);    COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
3345    OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);    OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
3346    OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);    OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
3347    #ifdef COMPILE_PCRE8
3348    }    }
3349  #endif  #endif
3350    #endif /* SUPPORT_UTF || COMPILE_PCRE16 */
3351  COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);  COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
3352  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3353  }  }
# Line 2132  static void check_hspace(compiler_common Line 3357  static void check_hspace(compiler_common
3357  /* Check whether TMP1 contains a newline character. TMP2 destroyed. */  /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
3358  DEFINE_COMPILER;  DEFINE_COMPILER;
3359    
3360  sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);  sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3361    
3362  OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x09);  OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x09);
3363  COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);  COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
3364  OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);  OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
3365  COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);  COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
3366  OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xa0);  OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xa0);
3367  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF || defined COMPILE_PCRE16
3368  if (common->utf8)  #ifdef COMPILE_PCRE8
3369    if (common->utf)
3370    {    {
3371    #endif
3372    COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);    COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
3373    OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x1680);    OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x1680);
3374    COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);    COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
# Line 2155  if (common->utf8) Line 3382  if (common->utf8)
3382    OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000);    OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000);
3383    COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);    COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
3384    OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000);    OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000);
3385    #ifdef COMPILE_PCRE8
3386    }    }
3387  #endif  #endif
3388    #endif /* SUPPORT_UTF || COMPILE_PCRE16 */
3389  COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);  COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
3390    
3391  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
# Line 2167  static void check_vspace(compiler_common Line 3396  static void check_vspace(compiler_common
3396  /* Check whether TMP1 contains a newline character. TMP2 destroyed. */  /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
3397  DEFINE_COMPILER;  DEFINE_COMPILER;
3398    
3399  sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);  sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3400    
3401  OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);  OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
3402  OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);  OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
3403  COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);  COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);
3404  OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);  OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
3405  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF || defined COMPILE_PCRE16
3406  if (common->utf8)  #ifdef COMPILE_PCRE8
3407    if (common->utf)
3408    {    {
3409    #endif
3410    COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);    COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
3411    OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);    OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
3412    OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);    OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
3413    #ifdef COMPILE_PCRE8
3414    }    }
3415  #endif  #endif
3416    #endif /* SUPPORT_UTF || COMPILE_PCRE16 */
3417  COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);  COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
3418    
3419  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
# Line 2195  DEFINE_COMPILER; Line 3428  DEFINE_COMPILER;
3428  struct sljit_jump *jump;  struct sljit_jump *jump;
3429  struct sljit_label *label;  struct sljit_label *label;
3430    
3431  sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);  sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3432  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3433  OP1(SLJIT_MOV, TMP3, 0, CHAR1, 0);  OP1(SLJIT_MOV, TMP3, 0, CHAR1, 0);
3434  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR2, 0);  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR2, 0);
3435  OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);  OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
3436  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3437    
3438  label = LABEL();  label = LABEL();
3439  OP1(SLJIT_MOVU_UB, CHAR1, 0, SLJIT_MEM1(TMP1), 1);  OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
3440  OP1(SLJIT_MOVU_UB, CHAR2, 0, SLJIT_MEM1(STR_PTR), 1);  OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3441  jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);  jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
3442  OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);  OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
3443  JUMPTO(SLJIT_C_NOT_ZERO, label);  JUMPTO(SLJIT_C_NOT_ZERO, label);
3444    
3445  JUMPHERE(jump);  JUMPHERE(jump);
3446  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3447  OP1(SLJIT_MOV, CHAR1, 0, TMP3, 0);  OP1(SLJIT_MOV, CHAR1, 0, TMP3, 0);
3448  OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);  OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3449  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
# Line 2224  DEFINE_COMPILER; Line 3457  DEFINE_COMPILER;
3457  struct sljit_jump *jump;  struct sljit_jump *jump;
3458  struct sljit_label *label;  struct sljit_label *label;
3459    
3460  sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);  sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3461  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3462    
3463  OP1(SLJIT_MOV, TMP3, 0, LCC_TABLE, 0);  OP1(SLJIT_MOV, TMP3, 0, LCC_TABLE, 0);
3464  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR1, 0);  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR1, 0);
3465  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, CHAR2, 0);  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, CHAR2, 0);
3466  OP1(SLJIT_MOV, LCC_TABLE, 0, SLJIT_IMM, common->lcc);  OP1(SLJIT_MOV, LCC_TABLE, 0, SLJIT_IMM, common->lcc);
3467  OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);  OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
3468  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3469    
3470  label = LABEL();  label = LABEL();
3471  OP1(SLJIT_MOVU_UB, CHAR1, 0, SLJIT_MEM1(TMP1), 1);  OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
3472  OP1(SLJIT_MOVU_UB, CHAR2, 0, SLJIT_MEM1(STR_PTR), 1);  OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3473    #ifndef COMPILE_PCRE8
3474    jump = CMP(SLJIT_C_GREATER, CHAR1, 0, SLJIT_IMM, 255);
3475    #endif
3476  OP1(SLJIT_MOV_UB, CHAR1, 0, SLJIT_MEM2(LCC_TABLE, CHAR1), 0);  OP1(SLJIT_MOV_UB, CHAR1, 0, SLJIT_MEM2(LCC_TABLE, CHAR1), 0);
3477    #ifndef COMPILE_PCRE8
3478    JUMPHERE(jump);
3479    jump = CMP(SLJIT_C_GREATER, CHAR2, 0, SLJIT_IMM, 255);
3480    #endif
3481  OP1(SLJIT_MOV_UB, CHAR2, 0, SLJIT_MEM2(LCC_TABLE, CHAR2), 0);  OP1(SLJIT_MOV_UB, CHAR2, 0, SLJIT_MEM2(LCC_TABLE, CHAR2), 0);
3482    #ifndef COMPILE_PCRE8
3483    JUMPHERE(jump);
3484    #endif
3485  jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);  jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
3486  OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);  OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
3487  JUMPTO(SLJIT_C_NOT_ZERO, label);  JUMPTO(SLJIT_C_NOT_ZERO, label);
3488    
3489  JUMPHERE(jump);  JUMPHERE(jump);
3490  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3491  OP1(SLJIT_MOV, LCC_TABLE, 0, TMP3, 0);  OP1(SLJIT_MOV, LCC_TABLE, 0, TMP3, 0);
3492  OP1(SLJIT_MOV, CHAR1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);  OP1(SLJIT_MOV, CHAR1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3493  OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);  OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
# Line 2255  sljit_emit_fast_return(compiler, RETURN_ Line 3498  sljit_emit_fast_return(compiler, RETURN_
3498  #undef CHAR1  #undef CHAR1
3499  #undef CHAR2  #undef CHAR2
3500    
3501  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF && defined SUPPORT_UCP
 #ifdef SUPPORT_UCP  
3502    
3503  static uschar * SLJIT_CALL do_utf8caselesscmp(uschar *src1, jit_arguments *args, uschar *end1)  static const pcre_uchar *SLJIT_CALL do_utf_caselesscmp(pcre_uchar *src1, jit_arguments *args, pcre_uchar *end1)
3504  {  {
3505  /* This function would be ineffective to do in JIT level. */  /* This function would be ineffective to do in JIT level. */
3506  int c1, c2;  int c1, c2;
3507  uschar *src2 = args->ptr;  const pcre_uchar *src2 = args->uchar_ptr;
3508  uschar *end2 = (uschar*)args->end;  const pcre_uchar *end2 = args->end;
3509    
3510  while (src1 < end1)  while (src1 < end1)
3511    {    {
3512    if (src2 >= end2)    if (src2 >= end2)
3513      return 0;      return (pcre_uchar*)1;
3514    GETCHARINC(c1, src1);    GETCHARINC(c1, src1);
3515    GETCHARINC(c2, src2);    GETCHARINC(c2, src2);
3516    if (c1 != c2 && c1 != UCD_OTHERCASE(c2)) return 0;    if (c1 != c2 && c1 != UCD_OTHERCASE(c2)) return NULL;
3517    }    }
3518  return src2;  return src2;
3519  }  }
3520    
3521  #endif  #endif /* SUPPORT_UTF && SUPPORT_UCP */
 #endif  
3522    
3523  static uschar *byte_sequence_compare(compiler_common *common, BOOL caseless, uschar *cc,  static pcre_uchar *byte_sequence_compare(compiler_common *common, BOOL caseless, pcre_uchar *cc,
3524      compare_context* context, jump_list **fallbacks)      compare_context* context, jump_list **backtracks)
3525  {  {
3526  DEFINE_COMPILER;  DEFINE_COMPILER;
3527  unsigned int othercasebit = 0;  unsigned int othercasebit = 0;
3528  uschar *othercasebyte = NULL;  pcre_uchar *othercasechar = NULL;
3529  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3530  int utf8length;  int utflength;
3531  #endif  #endif
3532    
3533  if (caseless && char_has_othercase(common, cc))  if (caseless && char_has_othercase(common, cc))
# Line 2294  if (caseless && char_has_othercase(commo Line 3535  if (caseless && char_has_othercase(commo
3535    othercasebit = char_get_othercase_bit(common, cc);    othercasebit = char_get_othercase_bit(common, cc);
3536    SLJIT_ASSERT(othercasebit);    SLJIT_ASSERT(othercasebit);
3537    /* Extracting bit difference info. */    /* Extracting bit difference info. */
3538    othercasebyte = cc + (othercasebit >> 8);  #ifdef COMPILE_PCRE8
3539      othercasechar = cc + (othercasebit >> 8);
3540    othercasebit &= 0xff;    othercasebit &= 0xff;
3541    #else
3542    #ifdef COMPILE_PCRE16
3543      othercasechar = cc + (othercasebit >> 9);
3544      if ((othercasebit & 0x100) != 0)
3545        othercasebit = (othercasebit & 0xff) << 8;
3546      else
3547        othercasebit &= 0xff;
3548    #endif
3549    #endif
3550    }    }
3551    
3552  if (context->sourcereg == -1)  if (context->sourcereg == -1)
3553    {    {
3554    #ifdef COMPILE_PCRE8
3555  #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED  #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
3556    if (context->length >= 4)    if (context->length >= 4)
3557      OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);      OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3558    else if (context->length >= 2)    else if (context->length >= 2)
3559      OP1(SLJIT_MOV_SH, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);      OP1(SLJIT_MOV_UH, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3560    else    else
3561  #endif  #endif
3562      OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);      OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3563    #else
3564    #ifdef COMPILE_PCRE16
3565    #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
3566      if (context->length >= 4)
3567        OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3568      else
3569    #endif
3570        OP1(SLJIT_MOV_UH, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3571    #endif
3572    #endif /* COMPILE_PCRE8 */
3573    context->sourcereg = TMP2;    context->sourcereg = TMP2;
3574    }    }
3575    
3576  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3577  utf8length = 1;  utflength = 1;
3578  if (common->utf8 && *cc >= 0xc0)  if (common->utf && HAS_EXTRALEN(*cc))
3579    utf8length += _pcre_utf8_table4[*cc & 0x3f];    utflength += GET_EXTRALEN(*cc);
3580    
3581  do  do
3582    {    {
3583  #endif  #endif
3584    
3585    context->length--;    context->length -= IN_UCHARS(1);
3586  #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED  #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
3587    
3588    /* Unaligned read is supported. */    /* Unaligned read is supported. */
3589    if (othercasebit != 0 && othercasebyte == cc)    if (othercasebit != 0 && othercasechar == cc)
3590      {      {
3591      context->c.asbytes[context->byteptr] = *cc | othercasebit;      context->c.asuchars[context->ucharptr] = *cc | othercasebit;
3592      context->oc.asbytes[context->byteptr] = othercasebit;      context->oc.asuchars[context->ucharptr] = othercasebit;
3593      }      }
3594    else    else
3595      {      {
3596      context->c.asbytes[context->byteptr] = *cc;      context->c.asuchars[context->ucharptr] = *cc;
3597      context->oc.asbytes[context->byteptr] = 0;      context->oc.asuchars[context->ucharptr] = 0;
3598      }      }
3599    context->byteptr++;    context->ucharptr++;
3600    
3601    if (context->byteptr >= 4 || context->length == 0 || (context->byteptr == 2 && context->length == 1))  #ifdef COMPILE_PCRE8
3602      if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1))
3603    #else
3604      if (context->ucharptr >= 2 || context->length == 0)
3605    #endif
3606      {      {
3607      if (context->length >= 4)      if (context->length >= 4)
3608        OP1(SLJIT_MOV_SI, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);        OP1(SLJIT_MOV_SI, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3609    #ifdef COMPILE_PCRE8
3610      else if (context->length >= 2)      else if (context->length >= 2)
3611        OP1(SLJIT_MOV_SH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);        OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3612      else if (context->length >= 1)      else if (context->length >= 1)
3613        OP1(SLJIT_MOV_UB, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);        OP1(SLJIT_MOV_UB, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3614    #else
3615        else if (context->length >= 2)
3616          OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3617    #endif
3618      context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;      context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
3619    
3620      switch(context->byteptr)      switch(context->ucharptr)
3621        {        {
3622        case 4:        case 4 / sizeof(pcre_uchar):
3623        if (context->oc.asint != 0)        if (context->oc.asint != 0)
3624          OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint);          OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint);
3625        add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint));        add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint));
3626        break;        break;
3627    
3628        case 2:        case 2 / sizeof(pcre_uchar):
3629        if (context->oc.asshort != 0)        if (context->oc.asushort != 0)
3630          OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asshort);          OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort);
3631        add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asshort | context->oc.asshort));        add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort));
3632        break;        break;
3633    
3634    #ifdef COMPILE_PCRE8
3635        case 1:        case 1:
3636        if (context->oc.asbyte != 0)        if (context->oc.asbyte != 0)
3637          OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte);          OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte);
3638        add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte));        add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte));
3639        break;        break;
3640    #endif
3641    
3642        default:        default:
3643        SLJIT_ASSERT_STOP();        SLJIT_ASSERT_STOP();
3644        break;        break;
3645        }        }
3646      context->byteptr = 0;      context->ucharptr = 0;
3647      }      }
3648    
3649  #else  #else
3650    
3651    /* Unaligned read is unsupported. */    /* Unaligned read is unsupported. */
3652    #ifdef COMPILE_PCRE8
3653    if (context->length > 0)    if (context->length > 0)
3654      OP1(SLJIT_MOV_UB, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);      OP1(SLJIT_MOV_UB, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3655    #else
3656      if (context->length > 0)
3657        OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3658    #endif
3659    context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;    context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
3660    
3661    if (othercasebit != 0 && othercasebyte == cc)    if (othercasebit != 0 && othercasechar == cc)
3662      {      {
3663      OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit);      OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit);
3664      add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit));      add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit));
3665      }      }
3666    else    else
3667      add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc));      add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc));
3668    
3669  #endif  #endif
3670    
3671    cc++;    cc++;
3672  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3673    utf8length--;    utflength--;
3674    }    }
3675  while (utf8length > 0);  while (utflength > 0);
3676  #endif  #endif
3677    
3678  return cc;  return cc;
3679  }  }
3680    
3681  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3682    
3683  #define SET_TYPE_OFFSET(value) \  #define SET_TYPE_OFFSET(value) \
3684    if ((value) != typeoffset) \    if ((value) != typeoffset) \
# Line 2422  return cc; Line 3700  return cc;
3700      } \      } \
3701    charoffset = (value);    charoffset = (value);
3702    
3703  static void compile_xclass_hotpath(compiler_common *common, uschar *cc, jump_list **fallbacks)  static void compile_xclass_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
3704  {  {
3705  DEFINE_COMPILER;  DEFINE_COMPILER;
3706  jump_list *found = NULL;  jump_list *found = NULL;
3707  jump_list **list = (*cc & XCL_NOT) == 0 ? &found : fallbacks;  jump_list **list = (*cc & XCL_NOT) == 0 ? &found : backtracks;
3708  unsigned int c;  unsigned int c;
3709  int compares;  int compares;
3710  struct sljit_jump *jump = NULL;  struct sljit_jump *jump = NULL;
3711  uschar *ccbegin;  pcre_uchar *ccbegin;
3712  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
3713  BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE;  BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE;
3714  BOOL charsaved = FALSE;  BOOL charsaved = FALSE;
3715  int typereg = TMP1, scriptreg = TMP1, typeoffset;  int typereg = TMP1, scriptreg = TMP1;
3716    unsigned int typeoffset;
3717  #endif  #endif
3718  int charoffset, invertcmp, numberofcmps;  int invertcmp, numberofcmps;
3719    unsigned int charoffset;
3720    
3721  /* Although SUPPORT_UTF8 must be defined, we are not necessary in utf8 mode. */  /* Although SUPPORT_UTF must be defined, we are
3722  check_input_end(common, fallbacks);     not necessary in utf mode even in 8 bit mode. */
3723    detect_partial_match(common, backtracks);
3724  read_char(common);  read_char(common);
3725    
3726  if ((*cc++ & XCL_MAP) != 0)  if ((*cc++ & XCL_MAP) != 0)
3727    {    {
3728    OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);    OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
3729    if (common->utf8)  #ifndef COMPILE_PCRE8
3730      jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3731    #elif defined SUPPORT_UTF
3732      if (common->utf)
3733      jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);      jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3734    #endif
3735    
3736    OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);    if (!check_class_ranges(common, (const pcre_uint8 *)cc, TRUE, list))
3737    OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);      {
3738    OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)cc);      OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
3739    OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);      OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
3740    OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);      OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)cc);
3741    add_jump(compiler, list, JUMP(SLJIT_C_NOT_ZERO));      OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
3742        OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
3743        add_jump(compiler, list, JUMP(SLJIT_C_NOT_ZERO));
3744        }
3745    
3746    if (common->utf8)  #ifndef COMPILE_PCRE8
3747      JUMPHERE(jump);
3748    #elif defined SUPPORT_UTF
3749      if (common->utf)
3750      JUMPHERE(jump);      JUMPHERE(jump);
3751    #endif
3752    OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);    OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
3753  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
3754    charsaved = TRUE;    charsaved = TRUE;
3755  #endif  #endif
3756    cc += 32;    cc += 32 / sizeof(pcre_uchar);
3757    }    }
3758    
3759  /* Scanning the necessary info. */  /* Scanning the necessary info. */
# Line 2473  while (*cc != XCL_END) Line 3765  while (*cc != XCL_END)
3765    if (*cc == XCL_SINGLE)    if (*cc == XCL_SINGLE)
3766      {      {
3767      cc += 2;      cc += 2;
3768  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3769      if (common->utf8 && cc[-1] >= 0xc0) cc += _pcre_utf8_table4[cc[-1] & 0x3f];      if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
3770  #endif  #endif
3771  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
3772      needschar = TRUE;      needschar = TRUE;
# Line 2483  while (*cc != XCL_END) Line 3775  while (*cc != XCL_END)
3775    else if (*cc == XCL_RANGE)    else if (*cc == XCL_RANGE)
3776      {      {
3777      cc += 2;      cc += 2;
3778  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3779      if (common->utf8 && cc[-1] >= 0xc0) cc += _pcre_utf8_table4[cc[-1] & 0x3f];      if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
3780  #endif  #endif
3781      cc++;      cc++;
3782  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3783      if (common->utf8 && cc[-1] >= 0xc0) cc += _pcre_utf8_table4[cc[-1] & 0x3f];      if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
3784  #endif  #endif
3785  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
3786      needschar = TRUE;      needschar = TRUE;
# Line 2558  if (needstype || needsscript) Line 3850  if (needstype || needsscript)
3850      {      {
3851      if (scriptreg == TMP1)      if (scriptreg == TMP1)
3852        {        {
3853        OP1(SLJIT_MOV, scriptreg, 0, SLJIT_IMM, (sljit_w)_pcre_ucd_records + SLJIT_OFFSETOF(ucd_record, script));        OP1(SLJIT_MOV, scriptreg, 0, SLJIT_IMM, (sljit_w)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
3854        OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM2(scriptreg, TMP2), 3);        OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM2(scriptreg, TMP2), 3);
3855        }        }
3856      else      else
3857        {        {
3858        OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);        OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
3859        OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_w)_pcre_ucd_records + SLJIT_OFFSETOF(ucd_record, script));        OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_w)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
3860        OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM1(TMP2), 0);        OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM1(TMP2), 0);
3861        }        }
3862      }      }
# Line 2582  typeoffset = 0; Line 3874  typeoffset = 0;
3874  while (*cc != XCL_END)  while (*cc != XCL_END)
3875    {    {
3876    compares--;    compares--;
3877    invertcmp = (compares == 0 && list != fallbacks);    invertcmp = (compares == 0 && list != backtracks);
3878    jump = NULL;    jump = NULL;
3879    
3880    if (*cc == XCL_SINGLE)    if (*cc == XCL_SINGLE)
3881      {      {
3882      cc ++;      cc ++;
3883  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3884      if (common->utf8)      if (common->utf)
3885        {        {
3886        GETCHARINC(c, cc);        GETCHARINC(c, cc);
3887        }        }
# Line 2619  while (*cc != XCL_END) Line 3911  while (*cc != XCL_END)
3911    else if (*cc == XCL_RANGE)    else if (*cc == XCL_RANGE)
3912      {      {
3913      cc ++;      cc ++;
3914  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3915      if (common->utf8)      if (common->utf)
3916        {        {
3917        GETCHARINC(c, cc);        GETCHARINC(c, cc);
3918        }        }
# Line 2628  while (*cc != XCL_END) Line 3920  while (*cc != XCL_END)
3920  #endif  #endif
3921        c = *cc++;        c = *cc++;