/[pcre]/code/tags/pcre-8.37/pcre_jit_compile.c
ViewVC logotype

Contents of /code/tags/pcre-8.37/pcre_jit_compile.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1533 - (show annotations)
Tue Mar 24 08:22:29 2015 UTC (4 years, 8 months ago) by zherczeg
Original Path: code/trunk/pcre_jit_compile.c
File MIME type: text/plain
File size: 330941 byte(s)
Error occurred while calculating annotation data.
Fix JIT compilation of conditional blocks, which assertion is converted to (*FAIL).
1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Written by Philip Hazel
9 Copyright (c) 1997-2013 University of Cambridge
10
11 The machine code generator part (this module) was written by Zoltan Herczeg
12 Copyright (c) 2010-2013
13
14 -----------------------------------------------------------------------------
15 Redistribution and use in source and binary forms, with or without
16 modification, are permitted provided that the following conditions are met:
17
18 * Redistributions of source code must retain the above copyright notice,
19 this list of conditions and the following disclaimer.
20
21 * Redistributions in binary form must reproduce the above copyright
22 notice, this list of conditions and the following disclaimer in the
23 documentation and/or other materials provided with the distribution.
24
25 * Neither the name of the University of Cambridge nor the names of its
26 contributors may be used to endorse or promote products derived from
27 this software without specific prior written permission.
28
29 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 POSSIBILITY OF SUCH DAMAGE.
40 -----------------------------------------------------------------------------
41 */
42
43 #ifdef HAVE_CONFIG_H
44 #include "config.h"
45 #endif
46
47 #include "pcre_internal.h"
48
49 #if defined SUPPORT_JIT
50
51 /* All-in-one: Since we use the JIT compiler only from here,
52 we just include it. This way we don't need to touch the build
53 system files. */
54
55 #define SLJIT_MALLOC(size, allocator_data) (PUBL(malloc))(size)
56 #define SLJIT_FREE(ptr, allocator_data) (PUBL(free))(ptr)
57 #define SLJIT_CONFIG_AUTO 1
58 #define SLJIT_CONFIG_STATIC 1
59 #define SLJIT_VERBOSE 0
60 #define SLJIT_DEBUG 0
61
62 #include "sljit/sljitLir.c"
63
64 #if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED
65 #error Unsupported architecture
66 #endif
67
68 /* Defines for debugging purposes. */
69
70 /* 1 - Use unoptimized capturing brackets.
71 2 - Enable capture_last_ptr (includes option 1). */
72 /* #define DEBUG_FORCE_UNOPTIMIZED_CBRAS 2 */
73
74 /* 1 - Always have a control head. */
75 /* #define DEBUG_FORCE_CONTROL_HEAD 1 */
76
77 /* Allocate memory for the regex stack on the real machine stack.
78 Fast, but limited size. */
79 #define MACHINE_STACK_SIZE 32768
80
81 /* Growth rate for stack allocated by the OS. Should be the multiply
82 of page size. */
83 #define STACK_GROWTH_RATE 8192
84
85 /* Enable to check that the allocation could destroy temporaries. */
86 #if defined SLJIT_DEBUG && SLJIT_DEBUG
87 #define DESTROY_REGISTERS 1
88 #endif
89
90 /*
91 Short summary about the backtracking mechanism empolyed by the jit code generator:
92
93 The code generator follows the recursive nature of the PERL compatible regular
94 expressions. The basic blocks of regular expressions are condition checkers
95 whose execute different commands depending on the result of the condition check.
96 The relationship between the operators can be horizontal (concatenation) and
97 vertical (sub-expression) (See struct backtrack_common for more details).
98
99 'ab' - 'a' and 'b' regexps are concatenated
100 'a+' - 'a' is the sub-expression of the '+' operator
101
102 The condition checkers are boolean (true/false) checkers. Machine code is generated
103 for the checker itself and for the actions depending on the result of the checker.
104 The 'true' case is called as the matching path (expected path), and the other is called as
105 the 'backtrack' path. Branch instructions are expesive for all CPUs, so we avoid taken
106 branches on the matching path.
107
108 Greedy star operator (*) :
109 Matching path: match happens.
110 Backtrack path: match failed.
111 Non-greedy star operator (*?) :
112 Matching path: no need to perform a match.
113 Backtrack path: match is required.
114
115 The following example shows how the code generated for a capturing bracket
116 with two alternatives. Let A, B, C, D are arbirary regular expressions, and
117 we have the following regular expression:
118
119 A(B|C)D
120
121 The generated code will be the following:
122
123 A matching path
124 '(' matching path (pushing arguments to the stack)
125 B matching path
126 ')' matching path (pushing arguments to the stack)
127 D matching path
128 return with successful match
129
130 D backtrack path
131 ')' backtrack path (If we arrived from "C" jump to the backtrack of "C")
132 B backtrack path
133 C expected path
134 jump to D matching path
135 C backtrack path
136 A backtrack path
137
138 Notice, that the order of backtrack code paths are the opposite of the fast
139 code paths. In this way the topmost value on the stack is always belong
140 to the current backtrack code path. The backtrack path must check
141 whether there is a next alternative. If so, it needs to jump back to
142 the matching path eventually. Otherwise it needs to clear out its own stack
143 frame and continue the execution on the backtrack code paths.
144 */
145
146 /*
147 Saved stack frames:
148
149 Atomic blocks and asserts require reloading the values of private data
150 when the backtrack mechanism performed. Because of OP_RECURSE, the data
151 are not necessarly known in compile time, thus we need a dynamic restore
152 mechanism.
153
154 The stack frames are stored in a chain list, and have the following format:
155 ([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]
156
157 Thus we can restore the private data to a particular point in the stack.
158 */
159
160 typedef struct jit_arguments {
161 /* Pointers first. */
162 struct sljit_stack *stack;
163 const pcre_uchar *str;
164 const pcre_uchar *begin;
165 const pcre_uchar *end;
166 int *offsets;
167 pcre_uchar *uchar_ptr;
168 pcre_uchar *mark_ptr;
169 void *callout_data;
170 /* Everything else after. */
171 pcre_uint32 limit_match;
172 int real_offset_count;
173 int offset_count;
174 pcre_uint8 notbol;
175 pcre_uint8 noteol;
176 pcre_uint8 notempty;
177 pcre_uint8 notempty_atstart;
178 } jit_arguments;
179
180 typedef struct executable_functions {
181 void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES];
182 void *read_only_data_heads[JIT_NUMBER_OF_COMPILE_MODES];
183 sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];
184 PUBL(jit_callback) callback;
185 void *userdata;
186 pcre_uint32 top_bracket;
187 pcre_uint32 limit_match;
188 } executable_functions;
189
190 typedef struct jump_list {
191 struct sljit_jump *jump;
192 struct jump_list *next;
193 } jump_list;
194
195 typedef struct stub_list {
196 struct sljit_jump *start;
197 struct sljit_label *quit;
198 struct stub_list *next;
199 } stub_list;
200
201 typedef struct label_addr_list {
202 struct sljit_label *label;
203 sljit_uw *update_addr;
204 struct label_addr_list *next;
205 } label_addr_list;
206
207 enum frame_types {
208 no_frame = -1,
209 no_stack = -2
210 };
211
212 enum control_types {
213 type_mark = 0,
214 type_then_trap = 1
215 };
216
217 typedef int (SLJIT_CALL *jit_function)(jit_arguments *args);
218
219 /* The following structure is the key data type for the recursive
220 code generator. It is allocated by compile_matchingpath, and contains
221 the arguments for compile_backtrackingpath. Must be the first member
222 of its descendants. */
223 typedef struct backtrack_common {
224 /* Concatenation stack. */
225 struct backtrack_common *prev;
226 jump_list *nextbacktracks;
227 /* Internal stack (for component operators). */
228 struct backtrack_common *top;
229 jump_list *topbacktracks;
230 /* Opcode pointer. */
231 pcre_uchar *cc;
232 } backtrack_common;
233
234 typedef struct assert_backtrack {
235 backtrack_common common;
236 jump_list *condfailed;
237 /* Less than 0 if a frame is not needed. */
238 int framesize;
239 /* Points to our private memory word on the stack. */
240 int private_data_ptr;
241 /* For iterators. */
242 struct sljit_label *matchingpath;
243 } assert_backtrack;
244
245 typedef struct bracket_backtrack {
246 backtrack_common common;
247 /* Where to coninue if an alternative is successfully matched. */
248 struct sljit_label *alternative_matchingpath;
249 /* For rmin and rmax iterators. */
250 struct sljit_label *recursive_matchingpath;
251 /* For greedy ? operator. */
252 struct sljit_label *zero_matchingpath;
253 /* Contains the branches of a failed condition. */
254 union {
255 /* Both for OP_COND, OP_SCOND. */
256 jump_list *condfailed;
257 assert_backtrack *assert;
258 /* For OP_ONCE. Less than 0 if not needed. */
259 int framesize;
260 } u;
261 /* Points to our private memory word on the stack. */
262 int private_data_ptr;
263 } bracket_backtrack;
264
265 typedef struct bracketpos_backtrack {
266 backtrack_common common;
267 /* Points to our private memory word on the stack. */
268 int private_data_ptr;
269 /* Reverting stack is needed. */
270 int framesize;
271 /* Allocated stack size. */
272 int stacksize;
273 } bracketpos_backtrack;
274
275 typedef struct braminzero_backtrack {
276 backtrack_common common;
277 struct sljit_label *matchingpath;
278 } braminzero_backtrack;
279
280 typedef struct iterator_backtrack {
281 backtrack_common common;
282 /* Next iteration. */
283 struct sljit_label *matchingpath;
284 } iterator_backtrack;
285
286 typedef struct recurse_entry {
287 struct recurse_entry *next;
288 /* Contains the function entry. */
289 struct sljit_label *entry;
290 /* Collects the calls until the function is not created. */
291 jump_list *calls;
292 /* Points to the starting opcode. */
293 sljit_sw start;
294 } recurse_entry;
295
296 typedef struct recurse_backtrack {
297 backtrack_common common;
298 BOOL inlined_pattern;
299 } recurse_backtrack;
300
301 #define OP_THEN_TRAP OP_TABLE_LENGTH
302
303 typedef struct then_trap_backtrack {
304 backtrack_common common;
305 /* If then_trap is not NULL, this structure contains the real
306 then_trap for the backtracking path. */
307 struct then_trap_backtrack *then_trap;
308 /* Points to the starting opcode. */
309 sljit_sw start;
310 /* Exit point for the then opcodes of this alternative. */
311 jump_list *quit;
312 /* Frame size of the current alternative. */
313 int framesize;
314 } then_trap_backtrack;
315
316 #define MAX_RANGE_SIZE 4
317
318 typedef struct compiler_common {
319 /* The sljit ceneric compiler. */
320 struct sljit_compiler *compiler;
321 /* First byte code. */
322 pcre_uchar *start;
323 /* Maps private data offset to each opcode. */
324 sljit_si *private_data_ptrs;
325 /* Chain list of read-only data ptrs. */
326 void *read_only_data_head;
327 /* Tells whether the capturing bracket is optimized. */
328 pcre_uint8 *optimized_cbracket;
329 /* Tells whether the starting offset is a target of then. */
330 pcre_uint8 *then_offsets;
331 /* Current position where a THEN must jump. */
332 then_trap_backtrack *then_trap;
333 /* Starting offset of private data for capturing brackets. */
334 int cbra_ptr;
335 /* Output vector starting point. Must be divisible by 2. */
336 int ovector_start;
337 /* Last known position of the requested byte. */
338 int req_char_ptr;
339 /* Head of the last recursion. */
340 int recursive_head_ptr;
341 /* First inspected character for partial matching. */
342 int start_used_ptr;
343 /* Starting pointer for partial soft matches. */
344 int hit_start;
345 /* End pointer of the first line. */
346 int first_line_end;
347 /* Points to the marked string. */
348 int mark_ptr;
349 /* Recursive control verb management chain. */
350 int control_head_ptr;
351 /* Points to the last matched capture block index. */
352 int capture_last_ptr;
353 /* Points to the starting position of the current match. */
354 int start_ptr;
355
356 /* Flipped and lower case tables. */
357 const pcre_uint8 *fcc;
358 sljit_sw lcc;
359 /* Mode can be PCRE_STUDY_JIT_COMPILE and others. */
360 int mode;
361 /* TRUE, when minlength is greater than 0. */
362 BOOL might_be_empty;
363 /* \K is found in the pattern. */
364 BOOL has_set_som;
365 /* (*SKIP:arg) is found in the pattern. */
366 BOOL has_skip_arg;
367 /* (*THEN) is found in the pattern. */
368 BOOL has_then;
369 /* Needs to know the start position anytime. */
370 BOOL needs_start_ptr;
371 /* Currently in recurse or negative assert. */
372 BOOL local_exit;
373 /* Currently in a positive assert. */
374 BOOL positive_assert;
375 /* Newline control. */
376 int nltype;
377 pcre_uint32 nlmax;
378 pcre_uint32 nlmin;
379 int newline;
380 int bsr_nltype;
381 pcre_uint32 bsr_nlmax;
382 pcre_uint32 bsr_nlmin;
383 /* Dollar endonly. */
384 int endonly;
385 /* Tables. */
386 sljit_sw ctypes;
387 /* Named capturing brackets. */
388 pcre_uchar *name_table;
389 sljit_sw name_count;
390 sljit_sw name_entry_size;
391
392 /* Labels and jump lists. */
393 struct sljit_label *partialmatchlabel;
394 struct sljit_label *quit_label;
395 struct sljit_label *forced_quit_label;
396 struct sljit_label *accept_label;
397 struct sljit_label *ff_newline_shortcut;
398 stub_list *stubs;
399 label_addr_list *label_addrs;
400 recurse_entry *entries;
401 recurse_entry *currententry;
402 jump_list *partialmatch;
403 jump_list *quit;
404 jump_list *positive_assert_quit;
405 jump_list *forced_quit;
406 jump_list *accept;
407 jump_list *calllimit;
408 jump_list *stackalloc;
409 jump_list *revertframes;
410 jump_list *wordboundary;
411 jump_list *anynewline;
412 jump_list *hspace;
413 jump_list *vspace;
414 jump_list *casefulcmp;
415 jump_list *caselesscmp;
416 jump_list *reset_match;
417 BOOL jscript_compat;
418 #ifdef SUPPORT_UTF
419 BOOL utf;
420 #ifdef SUPPORT_UCP
421 BOOL use_ucp;
422 #endif
423 #ifdef COMPILE_PCRE8
424 jump_list *utfreadchar;
425 jump_list *utfreadchar16;
426 jump_list *utfreadtype8;
427 #endif
428 #endif /* SUPPORT_UTF */
429 #ifdef SUPPORT_UCP
430 jump_list *getucd;
431 #endif
432 } compiler_common;
433
434 /* For byte_sequence_compare. */
435
436 typedef struct compare_context {
437 int length;
438 int sourcereg;
439 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
440 int ucharptr;
441 union {
442 sljit_si asint;
443 sljit_uh asushort;
444 #if defined COMPILE_PCRE8
445 sljit_ub asbyte;
446 sljit_ub asuchars[4];
447 #elif defined COMPILE_PCRE16
448 sljit_uh asuchars[2];
449 #elif defined COMPILE_PCRE32
450 sljit_ui asuchars[1];
451 #endif
452 } c;
453 union {
454 sljit_si asint;
455 sljit_uh asushort;
456 #if defined COMPILE_PCRE8
457 sljit_ub asbyte;
458 sljit_ub asuchars[4];
459 #elif defined COMPILE_PCRE16
460 sljit_uh asuchars[2];
461 #elif defined COMPILE_PCRE32
462 sljit_ui asuchars[1];
463 #endif
464 } oc;
465 #endif
466 } compare_context;
467
468 /* Undefine sljit macros. */
469 #undef CMP
470
471 /* Used for accessing the elements of the stack. */
472 #define STACK(i) ((-(i) - 1) * (int)sizeof(sljit_sw))
473
474 #define TMP1 SLJIT_R0
475 #define TMP2 SLJIT_R2
476 #define TMP3 SLJIT_R3
477 #define STR_PTR SLJIT_S0
478 #define STR_END SLJIT_S1
479 #define STACK_TOP SLJIT_R1
480 #define STACK_LIMIT SLJIT_S2
481 #define COUNT_MATCH SLJIT_S3
482 #define ARGUMENTS SLJIT_S4
483 #define RETURN_ADDR SLJIT_R4
484
485 /* Local space layout. */
486 /* These two locals can be used by the current opcode. */
487 #define LOCALS0 (0 * sizeof(sljit_sw))
488 #define LOCALS1 (1 * sizeof(sljit_sw))
489 /* Two local variables for possessive quantifiers (char1 cannot use them). */
490 #define POSSESSIVE0 (2 * sizeof(sljit_sw))
491 #define POSSESSIVE1 (3 * sizeof(sljit_sw))
492 /* Max limit of recursions. */
493 #define LIMIT_MATCH (4 * sizeof(sljit_sw))
494 /* The output vector is stored on the stack, and contains pointers
495 to characters. The vector data is divided into two groups: the first
496 group contains the start / end character pointers, and the second is
497 the start pointers when the end of the capturing group has not yet reached. */
498 #define OVECTOR_START (common->ovector_start)
499 #define OVECTOR(i) (OVECTOR_START + (i) * (sljit_sw)sizeof(sljit_sw))
500 #define OVECTOR_PRIV(i) (common->cbra_ptr + (i) * (sljit_sw)sizeof(sljit_sw))
501 #define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start])
502
503 #if defined COMPILE_PCRE8
504 #define MOV_UCHAR SLJIT_MOV_UB
505 #define MOVU_UCHAR SLJIT_MOVU_UB
506 #elif defined COMPILE_PCRE16
507 #define MOV_UCHAR SLJIT_MOV_UH
508 #define MOVU_UCHAR SLJIT_MOVU_UH
509 #elif defined COMPILE_PCRE32
510 #define MOV_UCHAR SLJIT_MOV_UI
511 #define MOVU_UCHAR SLJIT_MOVU_UI
512 #else
513 #error Unsupported compiling mode
514 #endif
515
516 /* Shortcuts. */
517 #define DEFINE_COMPILER \
518 struct sljit_compiler *compiler = common->compiler
519 #define OP1(op, dst, dstw, src, srcw) \
520 sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw))
521 #define OP2(op, dst, dstw, src1, src1w, src2, src2w) \
522 sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w))
523 #define LABEL() \
524 sljit_emit_label(compiler)
525 #define JUMP(type) \
526 sljit_emit_jump(compiler, (type))
527 #define JUMPTO(type, label) \
528 sljit_set_label(sljit_emit_jump(compiler, (type)), (label))
529 #define JUMPHERE(jump) \
530 sljit_set_label((jump), sljit_emit_label(compiler))
531 #define SET_LABEL(jump, label) \
532 sljit_set_label((jump), (label))
533 #define CMP(type, src1, src1w, src2, src2w) \
534 sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))
535 #define CMPTO(type, src1, src1w, src2, src2w, label) \
536 sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))
537 #define OP_FLAGS(op, dst, dstw, src, srcw, type) \
538 sljit_emit_op_flags(compiler, (op), (dst), (dstw), (src), (srcw), (type))
539 #define GET_LOCAL_BASE(dst, dstw, offset) \
540 sljit_get_local_base(compiler, (dst), (dstw), (offset))
541
542 #define READ_CHAR_MAX 0x7fffffff
543
544 static pcre_uchar *bracketend(pcre_uchar *cc)
545 {
546 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
547 do cc += GET(cc, 1); while (*cc == OP_ALT);
548 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
549 cc += 1 + LINK_SIZE;
550 return cc;
551 }
552
553 static int no_alternatives(pcre_uchar *cc)
554 {
555 int count = 0;
556 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
557 do
558 {
559 cc += GET(cc, 1);
560 count++;
561 }
562 while (*cc == OP_ALT);
563 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
564 return count;
565 }
566
567 static int ones_in_half_byte[16] = {
568 /* 0 */ 0, 1, 1, 2, /* 4 */ 1, 2, 2, 3,
569 /* 8 */ 1, 2, 2, 3, /* 12 */ 2, 3, 3, 4
570 };
571
572 /* Functions whose might need modification for all new supported opcodes:
573 next_opcode
574 check_opcode_types
575 set_private_data_ptrs
576 get_framesize
577 init_frame
578 get_private_data_copy_length
579 copy_private_data
580 compile_matchingpath
581 compile_backtrackingpath
582 */
583
584 static pcre_uchar *next_opcode(compiler_common *common, pcre_uchar *cc)
585 {
586 SLJIT_UNUSED_ARG(common);
587 switch(*cc)
588 {
589 case OP_SOD:
590 case OP_SOM:
591 case OP_SET_SOM:
592 case OP_NOT_WORD_BOUNDARY:
593 case OP_WORD_BOUNDARY:
594 case OP_NOT_DIGIT:
595 case OP_DIGIT:
596 case OP_NOT_WHITESPACE:
597 case OP_WHITESPACE:
598 case OP_NOT_WORDCHAR:
599 case OP_WORDCHAR:
600 case OP_ANY:
601 case OP_ALLANY:
602 case OP_NOTPROP:
603 case OP_PROP:
604 case OP_ANYNL:
605 case OP_NOT_HSPACE:
606 case OP_HSPACE:
607 case OP_NOT_VSPACE:
608 case OP_VSPACE:
609 case OP_EXTUNI:
610 case OP_EODN:
611 case OP_EOD:
612 case OP_CIRC:
613 case OP_CIRCM:
614 case OP_DOLL:
615 case OP_DOLLM:
616 case OP_CRSTAR:
617 case OP_CRMINSTAR:
618 case OP_CRPLUS:
619 case OP_CRMINPLUS:
620 case OP_CRQUERY:
621 case OP_CRMINQUERY:
622 case OP_CRRANGE:
623 case OP_CRMINRANGE:
624 case OP_CRPOSSTAR:
625 case OP_CRPOSPLUS:
626 case OP_CRPOSQUERY:
627 case OP_CRPOSRANGE:
628 case OP_CLASS:
629 case OP_NCLASS:
630 case OP_REF:
631 case OP_REFI:
632 case OP_DNREF:
633 case OP_DNREFI:
634 case OP_RECURSE:
635 case OP_CALLOUT:
636 case OP_ALT:
637 case OP_KET:
638 case OP_KETRMAX:
639 case OP_KETRMIN:
640 case OP_KETRPOS:
641 case OP_REVERSE:
642 case OP_ASSERT:
643 case OP_ASSERT_NOT:
644 case OP_ASSERTBACK:
645 case OP_ASSERTBACK_NOT:
646 case OP_ONCE:
647 case OP_ONCE_NC:
648 case OP_BRA:
649 case OP_BRAPOS:
650 case OP_CBRA:
651 case OP_CBRAPOS:
652 case OP_COND:
653 case OP_SBRA:
654 case OP_SBRAPOS:
655 case OP_SCBRA:
656 case OP_SCBRAPOS:
657 case OP_SCOND:
658 case OP_CREF:
659 case OP_DNCREF:
660 case OP_RREF:
661 case OP_DNRREF:
662 case OP_DEF:
663 case OP_BRAZERO:
664 case OP_BRAMINZERO:
665 case OP_BRAPOSZERO:
666 case OP_PRUNE:
667 case OP_SKIP:
668 case OP_THEN:
669 case OP_COMMIT:
670 case OP_FAIL:
671 case OP_ACCEPT:
672 case OP_ASSERT_ACCEPT:
673 case OP_CLOSE:
674 case OP_SKIPZERO:
675 return cc + PRIV(OP_lengths)[*cc];
676
677 case OP_CHAR:
678 case OP_CHARI:
679 case OP_NOT:
680 case OP_NOTI:
681 case OP_STAR:
682 case OP_MINSTAR:
683 case OP_PLUS:
684 case OP_MINPLUS:
685 case OP_QUERY:
686 case OP_MINQUERY:
687 case OP_UPTO:
688 case OP_MINUPTO:
689 case OP_EXACT:
690 case OP_POSSTAR:
691 case OP_POSPLUS:
692 case OP_POSQUERY:
693 case OP_POSUPTO:
694 case OP_STARI:
695 case OP_MINSTARI:
696 case OP_PLUSI:
697 case OP_MINPLUSI:
698 case OP_QUERYI:
699 case OP_MINQUERYI:
700 case OP_UPTOI:
701 case OP_MINUPTOI:
702 case OP_EXACTI:
703 case OP_POSSTARI:
704 case OP_POSPLUSI:
705 case OP_POSQUERYI:
706 case OP_POSUPTOI:
707 case OP_NOTSTAR:
708 case OP_NOTMINSTAR:
709 case OP_NOTPLUS:
710 case OP_NOTMINPLUS:
711 case OP_NOTQUERY:
712 case OP_NOTMINQUERY:
713 case OP_NOTUPTO:
714 case OP_NOTMINUPTO:
715 case OP_NOTEXACT:
716 case OP_NOTPOSSTAR:
717 case OP_NOTPOSPLUS:
718 case OP_NOTPOSQUERY:
719 case OP_NOTPOSUPTO:
720 case OP_NOTSTARI:
721 case OP_NOTMINSTARI:
722 case OP_NOTPLUSI:
723 case OP_NOTMINPLUSI:
724 case OP_NOTQUERYI:
725 case OP_NOTMINQUERYI:
726 case OP_NOTUPTOI:
727 case OP_NOTMINUPTOI:
728 case OP_NOTEXACTI:
729 case OP_NOTPOSSTARI:
730 case OP_NOTPOSPLUSI:
731 case OP_NOTPOSQUERYI:
732 case OP_NOTPOSUPTOI:
733 cc += PRIV(OP_lengths)[*cc];
734 #ifdef SUPPORT_UTF
735 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
736 #endif
737 return cc;
738
739 /* Special cases. */
740 case OP_TYPESTAR:
741 case OP_TYPEMINSTAR:
742 case OP_TYPEPLUS:
743 case OP_TYPEMINPLUS:
744 case OP_TYPEQUERY:
745 case OP_TYPEMINQUERY:
746 case OP_TYPEUPTO:
747 case OP_TYPEMINUPTO:
748 case OP_TYPEEXACT:
749 case OP_TYPEPOSSTAR:
750 case OP_TYPEPOSPLUS:
751 case OP_TYPEPOSQUERY:
752 case OP_TYPEPOSUPTO:
753 return cc + PRIV(OP_lengths)[*cc] - 1;
754
755 case OP_ANYBYTE:
756 #ifdef SUPPORT_UTF
757 if (common->utf) return NULL;
758 #endif
759 return cc + 1;
760
761 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
762 case OP_XCLASS:
763 return cc + GET(cc, 1);
764 #endif
765
766 case OP_MARK:
767 case OP_PRUNE_ARG:
768 case OP_SKIP_ARG:
769 case OP_THEN_ARG:
770 return cc + 1 + 2 + cc[1];
771
772 default:
773 /* All opcodes are supported now! */
774 SLJIT_ASSERT_STOP();
775 return NULL;
776 }
777 }
778
779 static BOOL check_opcode_types(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
780 {
781 int count;
782 pcre_uchar *slot;
783
784 /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
785 while (cc < ccend)
786 {
787 switch(*cc)
788 {
789 case OP_SET_SOM:
790 common->has_set_som = TRUE;
791 common->might_be_empty = TRUE;
792 cc += 1;
793 break;
794
795 case OP_REF:
796 case OP_REFI:
797 common->optimized_cbracket[GET2(cc, 1)] = 0;
798 cc += 1 + IMM2_SIZE;
799 break;
800
801 case OP_CBRAPOS:
802 case OP_SCBRAPOS:
803 common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] = 0;
804 cc += 1 + LINK_SIZE + IMM2_SIZE;
805 break;
806
807 case OP_COND:
808 case OP_SCOND:
809 /* Only AUTO_CALLOUT can insert this opcode. We do
810 not intend to support this case. */
811 if (cc[1 + LINK_SIZE] == OP_CALLOUT)
812 return FALSE;
813 cc += 1 + LINK_SIZE;
814 break;
815
816 case OP_CREF:
817 common->optimized_cbracket[GET2(cc, 1)] = 0;
818 cc += 1 + IMM2_SIZE;
819 break;
820
821 case OP_DNREF:
822 case OP_DNREFI:
823 case OP_DNCREF:
824 count = GET2(cc, 1 + IMM2_SIZE);
825 slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
826 while (count-- > 0)
827 {
828 common->optimized_cbracket[GET2(slot, 0)] = 0;
829 slot += common->name_entry_size;
830 }
831 cc += 1 + 2 * IMM2_SIZE;
832 break;
833
834 case OP_RECURSE:
835 /* Set its value only once. */
836 if (common->recursive_head_ptr == 0)
837 {
838 common->recursive_head_ptr = common->ovector_start;
839 common->ovector_start += sizeof(sljit_sw);
840 }
841 cc += 1 + LINK_SIZE;
842 break;
843
844 case OP_CALLOUT:
845 if (common->capture_last_ptr == 0)
846 {
847 common->capture_last_ptr = common->ovector_start;
848 common->ovector_start += sizeof(sljit_sw);
849 }
850 cc += 2 + 2 * LINK_SIZE;
851 break;
852
853 case OP_THEN_ARG:
854 common->has_then = TRUE;
855 common->control_head_ptr = 1;
856 /* Fall through. */
857
858 case OP_PRUNE_ARG:
859 common->needs_start_ptr = TRUE;
860 /* Fall through. */
861
862 case OP_MARK:
863 if (common->mark_ptr == 0)
864 {
865 common->mark_ptr = common->ovector_start;
866 common->ovector_start += sizeof(sljit_sw);
867 }
868 cc += 1 + 2 + cc[1];
869 break;
870
871 case OP_THEN:
872 common->has_then = TRUE;
873 common->control_head_ptr = 1;
874 /* Fall through. */
875
876 case OP_PRUNE:
877 case OP_SKIP:
878 common->needs_start_ptr = TRUE;
879 cc += 1;
880 break;
881
882 case OP_SKIP_ARG:
883 common->control_head_ptr = 1;
884 common->has_skip_arg = TRUE;
885 cc += 1 + 2 + cc[1];
886 break;
887
888 default:
889 cc = next_opcode(common, cc);
890 if (cc == NULL)
891 return FALSE;
892 break;
893 }
894 }
895 return TRUE;
896 }
897
898 static int get_class_iterator_size(pcre_uchar *cc)
899 {
900 switch(*cc)
901 {
902 case OP_CRSTAR:
903 case OP_CRPLUS:
904 return 2;
905
906 case OP_CRMINSTAR:
907 case OP_CRMINPLUS:
908 case OP_CRQUERY:
909 case OP_CRMINQUERY:
910 return 1;
911
912 case OP_CRRANGE:
913 case OP_CRMINRANGE:
914 if (GET2(cc, 1) == GET2(cc, 1 + IMM2_SIZE))
915 return 0;
916 return 2;
917
918 default:
919 return 0;
920 }
921 }
922
923 static BOOL detect_repeat(compiler_common *common, pcre_uchar *begin)
924 {
925 pcre_uchar *end = bracketend(begin);
926 pcre_uchar *next;
927 pcre_uchar *next_end;
928 pcre_uchar *max_end;
929 pcre_uchar type;
930 sljit_sw length = end - begin;
931 int min, max, i;
932
933 /* Detect fixed iterations first. */
934 if (end[-(1 + LINK_SIZE)] != OP_KET)
935 return FALSE;
936
937 /* Already detected repeat. */
938 if (common->private_data_ptrs[end - common->start - LINK_SIZE] != 0)
939 return TRUE;
940
941 next = end;
942 min = 1;
943 while (1)
944 {
945 if (*next != *begin)
946 break;
947 next_end = bracketend(next);
948 if (next_end - next != length || memcmp(begin, next, IN_UCHARS(length)) != 0)
949 break;
950 next = next_end;
951 min++;
952 }
953
954 if (min == 2)
955 return FALSE;
956
957 max = 0;
958 max_end = next;
959 if (*next == OP_BRAZERO || *next == OP_BRAMINZERO)
960 {
961 type = *next;
962 while (1)
963 {
964 if (next[0] != type || next[1] != OP_BRA || next[2 + LINK_SIZE] != *begin)
965 break;
966 next_end = bracketend(next + 2 + LINK_SIZE);
967 if (next_end - next != (length + 2 + LINK_SIZE) || memcmp(begin, next + 2 + LINK_SIZE, IN_UCHARS(length)) != 0)
968 break;
969 next = next_end;
970 max++;
971 }
972
973 if (next[0] == type && next[1] == *begin && max >= 1)
974 {
975 next_end = bracketend(next + 1);
976 if (next_end - next == (length + 1) && memcmp(begin, next + 1, IN_UCHARS(length)) == 0)
977 {
978 for (i = 0; i < max; i++, next_end += 1 + LINK_SIZE)
979 if (*next_end != OP_KET)
980 break;
981
982 if (i == max)
983 {
984 common->private_data_ptrs[max_end - common->start - LINK_SIZE] = next_end - max_end;
985 common->private_data_ptrs[max_end - common->start - LINK_SIZE + 1] = (type == OP_BRAZERO) ? OP_UPTO : OP_MINUPTO;
986 /* +2 the original and the last. */
987 common->private_data_ptrs[max_end - common->start - LINK_SIZE + 2] = max + 2;
988 if (min == 1)
989 return TRUE;
990 min--;
991 max_end -= (1 + LINK_SIZE) + GET(max_end, -LINK_SIZE);
992 }
993 }
994 }
995 }
996
997 if (min >= 3)
998 {
999 common->private_data_ptrs[end - common->start - LINK_SIZE] = max_end - end;
1000 common->private_data_ptrs[end - common->start - LINK_SIZE + 1] = OP_EXACT;
1001 common->private_data_ptrs[end - common->start - LINK_SIZE + 2] = min;
1002 return TRUE;
1003 }
1004
1005 return FALSE;
1006 }
1007
1008 #define CASE_ITERATOR_PRIVATE_DATA_1 \
1009 case OP_MINSTAR: \
1010 case OP_MINPLUS: \
1011 case OP_QUERY: \
1012 case OP_MINQUERY: \
1013 case OP_MINSTARI: \
1014 case OP_MINPLUSI: \
1015 case OP_QUERYI: \
1016 case OP_MINQUERYI: \
1017 case OP_NOTMINSTAR: \
1018 case OP_NOTMINPLUS: \
1019 case OP_NOTQUERY: \
1020 case OP_NOTMINQUERY: \
1021 case OP_NOTMINSTARI: \
1022 case OP_NOTMINPLUSI: \
1023 case OP_NOTQUERYI: \
1024 case OP_NOTMINQUERYI:
1025
1026 #define CASE_ITERATOR_PRIVATE_DATA_2A \
1027 case OP_STAR: \
1028 case OP_PLUS: \
1029 case OP_STARI: \
1030 case OP_PLUSI: \
1031 case OP_NOTSTAR: \
1032 case OP_NOTPLUS: \
1033 case OP_NOTSTARI: \
1034 case OP_NOTPLUSI:
1035
1036 #define CASE_ITERATOR_PRIVATE_DATA_2B \
1037 case OP_UPTO: \
1038 case OP_MINUPTO: \
1039 case OP_UPTOI: \
1040 case OP_MINUPTOI: \
1041 case OP_NOTUPTO: \
1042 case OP_NOTMINUPTO: \
1043 case OP_NOTUPTOI: \
1044 case OP_NOTMINUPTOI:
1045
1046 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_1 \
1047 case OP_TYPEMINSTAR: \
1048 case OP_TYPEMINPLUS: \
1049 case OP_TYPEQUERY: \
1050 case OP_TYPEMINQUERY:
1051
1052 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2A \
1053 case OP_TYPESTAR: \
1054 case OP_TYPEPLUS:
1055
1056 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2B \
1057 case OP_TYPEUPTO: \
1058 case OP_TYPEMINUPTO:
1059
1060 static void set_private_data_ptrs(compiler_common *common, int *private_data_start, pcre_uchar *ccend)
1061 {
1062 pcre_uchar *cc = common->start;
1063 pcre_uchar *alternative;
1064 pcre_uchar *end = NULL;
1065 int private_data_ptr = *private_data_start;
1066 int space, size, bracketlen;
1067
1068 while (cc < ccend)
1069 {
1070 space = 0;
1071 size = 0;
1072 bracketlen = 0;
1073 if (private_data_ptr > SLJIT_MAX_LOCAL_SIZE)
1074 return;
1075
1076 if (*cc == OP_ONCE || *cc == OP_ONCE_NC || *cc == OP_BRA || *cc == OP_CBRA || *cc == OP_COND)
1077 if (detect_repeat(common, cc))
1078 {
1079 /* These brackets are converted to repeats, so no global
1080 based single character repeat is allowed. */
1081 if (cc >= end)
1082 end = bracketend(cc);
1083 }
1084
1085 switch(*cc)
1086 {
1087 case OP_KET:
1088 if (common->private_data_ptrs[cc + 1 - common->start] != 0)
1089 {
1090 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1091 private_data_ptr += sizeof(sljit_sw);
1092 cc += common->private_data_ptrs[cc + 1 - common->start];
1093 }
1094 cc += 1 + LINK_SIZE;
1095 break;
1096
1097 case OP_ASSERT:
1098 case OP_ASSERT_NOT:
1099 case OP_ASSERTBACK:
1100 case OP_ASSERTBACK_NOT:
1101 case OP_ONCE:
1102 case OP_ONCE_NC:
1103 case OP_BRAPOS:
1104 case OP_SBRA:
1105 case OP_SBRAPOS:
1106 case OP_SCOND:
1107 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1108 private_data_ptr += sizeof(sljit_sw);
1109 bracketlen = 1 + LINK_SIZE;
1110 break;
1111
1112 case OP_CBRAPOS:
1113 case OP_SCBRAPOS:
1114 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1115 private_data_ptr += sizeof(sljit_sw);
1116 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1117 break;
1118
1119 case OP_COND:
1120 /* Might be a hidden SCOND. */
1121 alternative = cc + GET(cc, 1);
1122 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1123 {
1124 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1125 private_data_ptr += sizeof(sljit_sw);
1126 }
1127 bracketlen = 1 + LINK_SIZE;
1128 break;
1129
1130 case OP_BRA:
1131 bracketlen = 1 + LINK_SIZE;
1132 break;
1133
1134 case OP_CBRA:
1135 case OP_SCBRA:
1136 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1137 break;
1138
1139 CASE_ITERATOR_PRIVATE_DATA_1
1140 space = 1;
1141 size = -2;
1142 break;
1143
1144 CASE_ITERATOR_PRIVATE_DATA_2A
1145 space = 2;
1146 size = -2;
1147 break;
1148
1149 CASE_ITERATOR_PRIVATE_DATA_2B
1150 space = 2;
1151 size = -(2 + IMM2_SIZE);
1152 break;
1153
1154 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1155 space = 1;
1156 size = 1;
1157 break;
1158
1159 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1160 if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
1161 space = 2;
1162 size = 1;
1163 break;
1164
1165 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1166 if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
1167 space = 2;
1168 size = 1 + IMM2_SIZE;
1169 break;
1170
1171 case OP_CLASS:
1172 case OP_NCLASS:
1173 size += 1 + 32 / sizeof(pcre_uchar);
1174 space = get_class_iterator_size(cc + size);
1175 break;
1176
1177 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1178 case OP_XCLASS:
1179 size = GET(cc, 1);
1180 space = get_class_iterator_size(cc + size);
1181 break;
1182 #endif
1183
1184 default:
1185 cc = next_opcode(common, cc);
1186 SLJIT_ASSERT(cc != NULL);
1187 break;
1188 }
1189
1190 /* Character iterators, which are not inside a repeated bracket,
1191 gets a private slot instead of allocating it on the stack. */
1192 if (space > 0 && cc >= end)
1193 {
1194 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1195 private_data_ptr += sizeof(sljit_sw) * space;
1196 }
1197
1198 if (size != 0)
1199 {
1200 if (size < 0)
1201 {
1202 cc += -size;
1203 #ifdef SUPPORT_UTF
1204 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1205 #endif
1206 }
1207 else
1208 cc += size;
1209 }
1210
1211 if (bracketlen > 0)
1212 {
1213 if (cc >= end)
1214 {
1215 end = bracketend(cc);
1216 if (end[-1 - LINK_SIZE] == OP_KET)
1217 end = NULL;
1218 }
1219 cc += bracketlen;
1220 }
1221 }
1222 *private_data_start = private_data_ptr;
1223 }
1224
1225 /* Returns with a frame_types (always < 0) if no need for frame. */
1226 static int get_framesize(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL recursive, BOOL *needs_control_head)
1227 {
1228 int length = 0;
1229 int possessive = 0;
1230 BOOL stack_restore = FALSE;
1231 BOOL setsom_found = recursive;
1232 BOOL setmark_found = recursive;
1233 /* The last capture is a local variable even for recursions. */
1234 BOOL capture_last_found = FALSE;
1235
1236 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
1237 SLJIT_ASSERT(common->control_head_ptr != 0);
1238 *needs_control_head = TRUE;
1239 #else
1240 *needs_control_head = FALSE;
1241 #endif
1242
1243 if (ccend == NULL)
1244 {
1245 ccend = bracketend(cc) - (1 + LINK_SIZE);
1246 if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))
1247 {
1248 possessive = length = (common->capture_last_ptr != 0) ? 5 : 3;
1249 /* This is correct regardless of common->capture_last_ptr. */
1250 capture_last_found = TRUE;
1251 }
1252 cc = next_opcode(common, cc);
1253 }
1254
1255 SLJIT_ASSERT(cc != NULL);
1256 while (cc < ccend)
1257 switch(*cc)
1258 {
1259 case OP_SET_SOM:
1260 SLJIT_ASSERT(common->has_set_som);
1261 stack_restore = TRUE;
1262 if (!setsom_found)
1263 {
1264 length += 2;
1265 setsom_found = TRUE;
1266 }
1267 cc += 1;
1268 break;
1269
1270 case OP_MARK:
1271 case OP_PRUNE_ARG:
1272 case OP_THEN_ARG:
1273 SLJIT_ASSERT(common->mark_ptr != 0);
1274 stack_restore = TRUE;
1275 if (!setmark_found)
1276 {
1277 length += 2;
1278 setmark_found = TRUE;
1279 }
1280 if (common->control_head_ptr != 0)
1281 *needs_control_head = TRUE;
1282 cc += 1 + 2 + cc[1];
1283 break;
1284
1285 case OP_RECURSE:
1286 stack_restore = TRUE;
1287 if (common->has_set_som && !setsom_found)
1288 {
1289 length += 2;
1290 setsom_found = TRUE;
1291 }
1292 if (common->mark_ptr != 0 && !setmark_found)
1293 {
1294 length += 2;
1295 setmark_found = TRUE;
1296 }
1297 if (common->capture_last_ptr != 0 && !capture_last_found)
1298 {
1299 length += 2;
1300 capture_last_found = TRUE;
1301 }
1302 cc += 1 + LINK_SIZE;
1303 break;
1304
1305 case OP_CBRA:
1306 case OP_CBRAPOS:
1307 case OP_SCBRA:
1308 case OP_SCBRAPOS:
1309 stack_restore = TRUE;
1310 if (common->capture_last_ptr != 0 && !capture_last_found)
1311 {
1312 length += 2;
1313 capture_last_found = TRUE;
1314 }
1315 length += 3;
1316 cc += 1 + LINK_SIZE + IMM2_SIZE;
1317 break;
1318
1319 default:
1320 stack_restore = TRUE;
1321 /* Fall through. */
1322
1323 case OP_NOT_WORD_BOUNDARY:
1324 case OP_WORD_BOUNDARY:
1325 case OP_NOT_DIGIT:
1326 case OP_DIGIT:
1327 case OP_NOT_WHITESPACE:
1328 case OP_WHITESPACE:
1329 case OP_NOT_WORDCHAR:
1330 case OP_WORDCHAR:
1331 case OP_ANY:
1332 case OP_ALLANY:
1333 case OP_ANYBYTE:
1334 case OP_NOTPROP:
1335 case OP_PROP:
1336 case OP_ANYNL:
1337 case OP_NOT_HSPACE:
1338 case OP_HSPACE:
1339 case OP_NOT_VSPACE:
1340 case OP_VSPACE:
1341 case OP_EXTUNI:
1342 case OP_EODN:
1343 case OP_EOD:
1344 case OP_CIRC:
1345 case OP_CIRCM:
1346 case OP_DOLL:
1347 case OP_DOLLM:
1348 case OP_CHAR:
1349 case OP_CHARI:
1350 case OP_NOT:
1351 case OP_NOTI:
1352
1353 case OP_EXACT:
1354 case OP_POSSTAR:
1355 case OP_POSPLUS:
1356 case OP_POSQUERY:
1357 case OP_POSUPTO:
1358
1359 case OP_EXACTI:
1360 case OP_POSSTARI:
1361 case OP_POSPLUSI:
1362 case OP_POSQUERYI:
1363 case OP_POSUPTOI:
1364
1365 case OP_NOTEXACT:
1366 case OP_NOTPOSSTAR:
1367 case OP_NOTPOSPLUS:
1368 case OP_NOTPOSQUERY:
1369 case OP_NOTPOSUPTO:
1370
1371 case OP_NOTEXACTI:
1372 case OP_NOTPOSSTARI:
1373 case OP_NOTPOSPLUSI:
1374 case OP_NOTPOSQUERYI:
1375 case OP_NOTPOSUPTOI:
1376
1377 case OP_TYPEEXACT:
1378 case OP_TYPEPOSSTAR:
1379 case OP_TYPEPOSPLUS:
1380 case OP_TYPEPOSQUERY:
1381 case OP_TYPEPOSUPTO:
1382
1383 case OP_CLASS:
1384 case OP_NCLASS:
1385 case OP_XCLASS:
1386
1387 cc = next_opcode(common, cc);
1388 SLJIT_ASSERT(cc != NULL);
1389 break;
1390 }
1391
1392 /* Possessive quantifiers can use a special case. */
1393 if (SLJIT_UNLIKELY(possessive == length))
1394 return stack_restore ? no_frame : no_stack;
1395
1396 if (length > 0)
1397 return length + 1;
1398 return stack_restore ? no_frame : no_stack;
1399 }
1400
1401 static void init_frame(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, int stackpos, int stacktop, BOOL recursive)
1402 {
1403 DEFINE_COMPILER;
1404 BOOL setsom_found = recursive;
1405 BOOL setmark_found = recursive;
1406 /* The last capture is a local variable even for recursions. */
1407 BOOL capture_last_found = FALSE;
1408 int offset;
1409
1410 /* >= 1 + shortest item size (2) */
1411 SLJIT_UNUSED_ARG(stacktop);
1412 SLJIT_ASSERT(stackpos >= stacktop + 2);
1413
1414 stackpos = STACK(stackpos);
1415 if (ccend == NULL)
1416 {
1417 ccend = bracketend(cc) - (1 + LINK_SIZE);
1418 if (recursive || (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS))
1419 cc = next_opcode(common, cc);
1420 }
1421
1422 SLJIT_ASSERT(cc != NULL);
1423 while (cc < ccend)
1424 switch(*cc)
1425 {
1426 case OP_SET_SOM:
1427 SLJIT_ASSERT(common->has_set_som);
1428 if (!setsom_found)
1429 {
1430 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
1431 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1432 stackpos += (int)sizeof(sljit_sw);
1433 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1434 stackpos += (int)sizeof(sljit_sw);
1435 setsom_found = TRUE;
1436 }
1437 cc += 1;
1438 break;
1439
1440 case OP_MARK:
1441 case OP_PRUNE_ARG:
1442 case OP_THEN_ARG:
1443 SLJIT_ASSERT(common->mark_ptr != 0);
1444 if (!setmark_found)
1445 {
1446 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
1447 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1448 stackpos += (int)sizeof(sljit_sw);
1449 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1450 stackpos += (int)sizeof(sljit_sw);
1451 setmark_found = TRUE;
1452 }
1453 cc += 1 + 2 + cc[1];
1454 break;
1455
1456 case OP_RECURSE:
1457 if (common->has_set_som && !setsom_found)
1458 {
1459 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
1460 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1461 stackpos += (int)sizeof(sljit_sw);
1462 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1463 stackpos += (int)sizeof(sljit_sw);
1464 setsom_found = TRUE;
1465 }
1466 if (common->mark_ptr != 0 && !setmark_found)
1467 {
1468 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
1469 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1470 stackpos += (int)sizeof(sljit_sw);
1471 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1472 stackpos += (int)sizeof(sljit_sw);
1473 setmark_found = TRUE;
1474 }
1475 if (common->capture_last_ptr != 0 && !capture_last_found)
1476 {
1477 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
1478 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1479 stackpos += (int)sizeof(sljit_sw);
1480 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1481 stackpos += (int)sizeof(sljit_sw);
1482 capture_last_found = TRUE;
1483 }
1484 cc += 1 + LINK_SIZE;
1485 break;
1486
1487 case OP_CBRA:
1488 case OP_CBRAPOS:
1489 case OP_SCBRA:
1490 case OP_SCBRAPOS:
1491 if (common->capture_last_ptr != 0 && !capture_last_found)
1492 {
1493 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
1494 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1495 stackpos += (int)sizeof(sljit_sw);
1496 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1497 stackpos += (int)sizeof(sljit_sw);
1498 capture_last_found = TRUE;
1499 }
1500 offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
1501 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));
1502 stackpos += (int)sizeof(sljit_sw);
1503 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
1504 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
1505 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1506 stackpos += (int)sizeof(sljit_sw);
1507 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
1508 stackpos += (int)sizeof(sljit_sw);
1509
1510 cc += 1 + LINK_SIZE + IMM2_SIZE;
1511 break;
1512
1513 default:
1514 cc = next_opcode(common, cc);
1515 SLJIT_ASSERT(cc != NULL);
1516 break;
1517 }
1518
1519 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, 0);
1520 SLJIT_ASSERT(stackpos == STACK(stacktop));
1521 }
1522
1523 static SLJIT_INLINE int get_private_data_copy_length(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL needs_control_head)
1524 {
1525 int private_data_length = needs_control_head ? 3 : 2;
1526 int size;
1527 pcre_uchar *alternative;
1528 /* Calculate the sum of the private machine words. */
1529 while (cc < ccend)
1530 {
1531 size = 0;
1532 switch(*cc)
1533 {
1534 case OP_KET:
1535 if (PRIVATE_DATA(cc) != 0)
1536 private_data_length++;
1537 cc += 1 + LINK_SIZE;
1538 break;
1539
1540 case OP_ASSERT:
1541 case OP_ASSERT_NOT:
1542 case OP_ASSERTBACK:
1543 case OP_ASSERTBACK_NOT:
1544 case OP_ONCE:
1545 case OP_ONCE_NC:
1546 case OP_BRAPOS:
1547 case OP_SBRA:
1548 case OP_SBRAPOS:
1549 case OP_SCOND:
1550 private_data_length++;
1551 cc += 1 + LINK_SIZE;
1552 break;
1553
1554 case OP_CBRA:
1555 case OP_SCBRA:
1556 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1557 private_data_length++;
1558 cc += 1 + LINK_SIZE + IMM2_SIZE;
1559 break;
1560
1561 case OP_CBRAPOS:
1562 case OP_SCBRAPOS:
1563 private_data_length += 2;
1564 cc += 1 + LINK_SIZE + IMM2_SIZE;
1565 break;
1566
1567 case OP_COND:
1568 /* Might be a hidden SCOND. */
1569 alternative = cc + GET(cc, 1);
1570 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1571 private_data_length++;
1572 cc += 1 + LINK_SIZE;
1573 break;
1574
1575 CASE_ITERATOR_PRIVATE_DATA_1
1576 if (PRIVATE_DATA(cc))
1577 private_data_length++;
1578 cc += 2;
1579 #ifdef SUPPORT_UTF
1580 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1581 #endif
1582 break;
1583
1584 CASE_ITERATOR_PRIVATE_DATA_2A
1585 if (PRIVATE_DATA(cc))
1586 private_data_length += 2;
1587 cc += 2;
1588 #ifdef SUPPORT_UTF
1589 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1590 #endif
1591 break;
1592
1593 CASE_ITERATOR_PRIVATE_DATA_2B
1594 if (PRIVATE_DATA(cc))
1595 private_data_length += 2;
1596 cc += 2 + IMM2_SIZE;
1597 #ifdef SUPPORT_UTF
1598 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1599 #endif
1600 break;
1601
1602 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1603 if (PRIVATE_DATA(cc))
1604 private_data_length++;
1605 cc += 1;
1606 break;
1607
1608 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1609 if (PRIVATE_DATA(cc))
1610 private_data_length += 2;
1611 cc += 1;
1612 break;
1613
1614 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1615 if (PRIVATE_DATA(cc))
1616 private_data_length += 2;
1617 cc += 1 + IMM2_SIZE;
1618 break;
1619
1620 case OP_CLASS:
1621 case OP_NCLASS:
1622 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1623 case OP_XCLASS:
1624 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1625 #else
1626 size = 1 + 32 / (int)sizeof(pcre_uchar);
1627 #endif
1628 if (PRIVATE_DATA(cc))
1629 private_data_length += get_class_iterator_size(cc + size);
1630 cc += size;
1631 break;
1632
1633 default:
1634 cc = next_opcode(common, cc);
1635 SLJIT_ASSERT(cc != NULL);
1636 break;
1637 }
1638 }
1639 SLJIT_ASSERT(cc == ccend);
1640 return private_data_length;
1641 }
1642
1643 static void copy_private_data(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend,
1644 BOOL save, int stackptr, int stacktop, BOOL needs_control_head)
1645 {
1646 DEFINE_COMPILER;
1647 int srcw[2];
1648 int count, size;
1649 BOOL tmp1next = TRUE;
1650 BOOL tmp1empty = TRUE;
1651 BOOL tmp2empty = TRUE;
1652 pcre_uchar *alternative;
1653 enum {
1654 start,
1655 loop,
1656 end
1657 } status;
1658
1659 status = save ? start : loop;
1660 stackptr = STACK(stackptr - 2);
1661 stacktop = STACK(stacktop - 1);
1662
1663 if (!save)
1664 {
1665 stackptr += (needs_control_head ? 2 : 1) * sizeof(sljit_sw);
1666 if (stackptr < stacktop)
1667 {
1668 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1669 stackptr += sizeof(sljit_sw);
1670 tmp1empty = FALSE;
1671 }
1672 if (stackptr < stacktop)
1673 {
1674 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1675 stackptr += sizeof(sljit_sw);
1676 tmp2empty = FALSE;
1677 }
1678 /* The tmp1next must be TRUE in either way. */
1679 }
1680
1681 do
1682 {
1683 count = 0;
1684 switch(status)
1685 {
1686 case start:
1687 SLJIT_ASSERT(save && common->recursive_head_ptr != 0);
1688 count = 1;
1689 srcw[0] = common->recursive_head_ptr;
1690 if (needs_control_head)
1691 {
1692 SLJIT_ASSERT(common->control_head_ptr != 0);
1693 count = 2;
1694 srcw[1] = common->control_head_ptr;
1695 }
1696 status = loop;
1697 break;
1698
1699 case loop:
1700 if (cc >= ccend)
1701 {
1702 status = end;
1703 break;
1704 }
1705
1706 switch(*cc)
1707 {
1708 case OP_KET:
1709 if (PRIVATE_DATA(cc) != 0)
1710 {
1711 count = 1;
1712 srcw[0] = PRIVATE_DATA(cc);
1713 }
1714 cc += 1 + LINK_SIZE;
1715 break;
1716
1717 case OP_ASSERT:
1718 case OP_ASSERT_NOT:
1719 case OP_ASSERTBACK:
1720 case OP_ASSERTBACK_NOT:
1721 case OP_ONCE:
1722 case OP_ONCE_NC:
1723 case OP_BRAPOS:
1724 case OP_SBRA:
1725 case OP_SBRAPOS:
1726 case OP_SCOND:
1727 count = 1;
1728 srcw[0] = PRIVATE_DATA(cc);
1729 SLJIT_ASSERT(srcw[0] != 0);
1730 cc += 1 + LINK_SIZE;
1731 break;
1732
1733 case OP_CBRA:
1734 case OP_SCBRA:
1735 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1736 {
1737 count = 1;
1738 srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1739 }
1740 cc += 1 + LINK_SIZE + IMM2_SIZE;
1741 break;
1742
1743 case OP_CBRAPOS:
1744 case OP_SCBRAPOS:
1745 count = 2;
1746 srcw[0] = PRIVATE_DATA(cc);
1747 srcw[1] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1748 SLJIT_ASSERT(srcw[0] != 0 && srcw[1] != 0);
1749 cc += 1 + LINK_SIZE + IMM2_SIZE;
1750 break;
1751
1752 case OP_COND:
1753 /* Might be a hidden SCOND. */
1754 alternative = cc + GET(cc, 1);
1755 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1756 {
1757 count = 1;
1758 srcw[0] = PRIVATE_DATA(cc);
1759 SLJIT_ASSERT(srcw[0] != 0);
1760 }
1761 cc += 1 + LINK_SIZE;
1762 break;
1763
1764 CASE_ITERATOR_PRIVATE_DATA_1
1765 if (PRIVATE_DATA(cc))
1766 {
1767 count = 1;
1768 srcw[0] = PRIVATE_DATA(cc);
1769 }
1770 cc += 2;
1771 #ifdef SUPPORT_UTF
1772 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1773 #endif
1774 break;
1775
1776 CASE_ITERATOR_PRIVATE_DATA_2A
1777 if (PRIVATE_DATA(cc))
1778 {
1779 count = 2;
1780 srcw[0] = PRIVATE_DATA(cc);
1781 srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
1782 }
1783 cc += 2;
1784 #ifdef SUPPORT_UTF
1785 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1786 #endif
1787 break;
1788
1789 CASE_ITERATOR_PRIVATE_DATA_2B
1790 if (PRIVATE_DATA(cc))
1791 {
1792 count = 2;
1793 srcw[0] = PRIVATE_DATA(cc);
1794 srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
1795 }
1796 cc += 2 + IMM2_SIZE;
1797 #ifdef SUPPORT_UTF
1798 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1799 #endif
1800 break;
1801
1802 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1803 if (PRIVATE_DATA(cc))
1804 {
1805 count = 1;
1806 srcw[0] = PRIVATE_DATA(cc);
1807 }
1808 cc += 1;
1809 break;
1810
1811 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1812 if (PRIVATE_DATA(cc))
1813 {
1814 count = 2;
1815 srcw[0] = PRIVATE_DATA(cc);
1816 srcw[1] = srcw[0] + sizeof(sljit_sw);
1817 }
1818 cc += 1;
1819 break;
1820
1821 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1822 if (PRIVATE_DATA(cc))
1823 {
1824 count = 2;
1825 srcw[0] = PRIVATE_DATA(cc);
1826 srcw[1] = srcw[0] + sizeof(sljit_sw);
1827 }
1828 cc += 1 + IMM2_SIZE;
1829 break;
1830
1831 case OP_CLASS:
1832 case OP_NCLASS:
1833 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1834 case OP_XCLASS:
1835 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1836 #else
1837 size = 1 + 32 / (int)sizeof(pcre_uchar);
1838 #endif
1839 if (PRIVATE_DATA(cc))
1840 switch(get_class_iterator_size(cc + size))
1841 {
1842 case 1:
1843 count = 1;
1844 srcw[0] = PRIVATE_DATA(cc);
1845 break;
1846
1847 case 2:
1848 count = 2;
1849 srcw[0] = PRIVATE_DATA(cc);
1850 srcw[1] = srcw[0] + sizeof(sljit_sw);
1851 break;
1852
1853 default:
1854 SLJIT_ASSERT_STOP();
1855 break;
1856 }
1857 cc += size;
1858 break;
1859
1860 default:
1861 cc = next_opcode(common, cc);
1862 SLJIT_ASSERT(cc != NULL);
1863 break;
1864 }
1865 break;
1866
1867 case end:
1868 SLJIT_ASSERT_STOP();
1869 break;
1870 }
1871
1872 while (count > 0)
1873 {
1874 count--;
1875 if (save)
1876 {
1877 if (tmp1next)
1878 {
1879 if (!tmp1empty)
1880 {
1881 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1882 stackptr += sizeof(sljit_sw);
1883 }
1884 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), srcw[count]);
1885 tmp1empty = FALSE;
1886 tmp1next = FALSE;
1887 }
1888 else
1889 {
1890 if (!tmp2empty)
1891 {
1892 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1893 stackptr += sizeof(sljit_sw);
1894 }
1895 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), srcw[count]);
1896 tmp2empty = FALSE;
1897 tmp1next = TRUE;
1898 }
1899 }
1900 else
1901 {
1902 if (tmp1next)
1903 {
1904 SLJIT_ASSERT(!tmp1empty);
1905 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), srcw[count], TMP1, 0);
1906 tmp1empty = stackptr >= stacktop;
1907 if (!tmp1empty)
1908 {
1909 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1910 stackptr += sizeof(sljit_sw);
1911 }
1912 tmp1next = FALSE;
1913 }
1914 else
1915 {
1916 SLJIT_ASSERT(!tmp2empty);
1917 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), srcw[count], TMP2, 0);
1918 tmp2empty = stackptr >= stacktop;
1919 if (!tmp2empty)
1920 {
1921 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1922 stackptr += sizeof(sljit_sw);
1923 }
1924 tmp1next = TRUE;
1925 }
1926 }
1927 }
1928 }
1929 while (status != end);
1930
1931 if (save)
1932 {
1933 if (tmp1next)
1934 {
1935 if (!tmp1empty)
1936 {
1937 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1938 stackptr += sizeof(sljit_sw);
1939 }
1940 if (!tmp2empty)
1941 {
1942 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1943 stackptr += sizeof(sljit_sw);
1944 }
1945 }
1946 else
1947 {
1948 if (!tmp2empty)
1949 {
1950 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1951 stackptr += sizeof(sljit_sw);
1952 }
1953 if (!tmp1empty)
1954 {
1955 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1956 stackptr += sizeof(sljit_sw);
1957 }
1958 }
1959 }
1960 SLJIT_ASSERT(cc == ccend && stackptr == stacktop && (save || (tmp1empty && tmp2empty)));
1961 }
1962
1963 static SLJIT_INLINE pcre_uchar *set_then_offsets(compiler_common *common, pcre_uchar *cc, pcre_uint8 *current_offset)
1964 {
1965 pcre_uchar *end = bracketend(cc);
1966 BOOL has_alternatives = cc[GET(cc, 1)] == OP_ALT;
1967
1968 /* Assert captures then. */
1969 if (*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT)
1970 current_offset = NULL;
1971 /* Conditional block does not. */
1972 if (*cc == OP_COND || *cc == OP_SCOND)
1973 has_alternatives = FALSE;
1974
1975 cc = next_opcode(common, cc);
1976 if (has_alternatives)
1977 current_offset = common->then_offsets + (cc - common->start);
1978
1979 while (cc < end)
1980 {
1981 if ((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND))
1982 cc = set_then_offsets(common, cc, current_offset);
1983 else
1984 {
1985 if (*cc == OP_ALT && has_alternatives)
1986 current_offset = common->then_offsets + (cc + 1 + LINK_SIZE - common->start);
1987 if (*cc >= OP_THEN && *cc <= OP_THEN_ARG && current_offset != NULL)
1988 *current_offset = 1;
1989 cc = next_opcode(common, cc);
1990 }
1991 }
1992
1993 return end;
1994 }
1995
1996 #undef CASE_ITERATOR_PRIVATE_DATA_1
1997 #undef CASE_ITERATOR_PRIVATE_DATA_2A
1998 #undef CASE_ITERATOR_PRIVATE_DATA_2B
1999 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_1
2000 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
2001 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
2002
2003 static SLJIT_INLINE BOOL is_powerof2(unsigned int value)
2004 {
2005 return (value & (value - 1)) == 0;
2006 }
2007
2008 static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label)
2009 {
2010 while (list)
2011 {
2012 /* sljit_set_label is clever enough to do nothing
2013 if either the jump or the label is NULL. */
2014 SET_LABEL(list->jump, label);
2015 list = list->next;
2016 }
2017 }
2018
2019 static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump *jump)
2020 {
2021 jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list));
2022 if (list_item)
2023 {
2024 list_item->next = *list;
2025 list_item->jump = jump;
2026 *list = list_item;
2027 }
2028 }
2029
2030 static void add_stub(compiler_common *common, struct sljit_jump *start)
2031 {
2032 DEFINE_COMPILER;
2033 stub_list *list_item = sljit_alloc_memory(compiler, sizeof(stub_list));
2034
2035 if (list_item)
2036 {
2037 list_item->start = start;
2038 list_item->quit = LABEL();
2039 list_item->next = common->stubs;
2040 common->stubs = list_item;
2041 }
2042 }
2043
2044 static void flush_stubs(compiler_common *common)
2045 {
2046 DEFINE_COMPILER;
2047 stub_list *list_item = common->stubs;
2048
2049 while (list_item)
2050 {
2051 JUMPHERE(list_item->start);
2052 add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));
2053 JUMPTO(SLJIT_JUMP, list_item->quit);
2054 list_item = list_item->next;
2055 }
2056 common->stubs = NULL;
2057 }
2058
2059 static void add_label_addr(compiler_common *common, sljit_uw *update_addr)
2060 {
2061 DEFINE_COMPILER;
2062 label_addr_list *label_addr;
2063
2064 label_addr = sljit_alloc_memory(compiler, sizeof(label_addr_list));
2065 if (label_addr == NULL)
2066 return;
2067 label_addr->label = LABEL();
2068 label_addr->update_addr = update_addr;
2069 label_addr->next = common->label_addrs;
2070 common->label_addrs = label_addr;
2071 }
2072
2073 static SLJIT_INLINE void count_match(compiler_common *common)
2074 {
2075 DEFINE_COMPILER;
2076
2077 OP2(SLJIT_SUB | SLJIT_SET_E, COUNT_MATCH, 0, COUNT_MATCH, 0, SLJIT_IMM, 1);
2078 add_jump(compiler, &common->calllimit, JUMP(SLJIT_ZERO));
2079 }
2080
2081 static SLJIT_INLINE void allocate_stack(compiler_common *common, int size)
2082 {
2083 /* May destroy all locals and registers except TMP2. */
2084 DEFINE_COMPILER;
2085
2086 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
2087 #ifdef DESTROY_REGISTERS
2088 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);
2089 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
2090 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
2091 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, TMP1, 0);
2092 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP1, 0);
2093 #endif
2094 add_stub(common, CMP(SLJIT_GREATER, STACK_TOP, 0, STACK_LIMIT, 0));
2095 }
2096
2097 static SLJIT_INLINE void free_stack(compiler_common *common, int size)
2098 {
2099 DEFINE_COMPILER;
2100 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
2101 }
2102
2103 static sljit_uw * allocate_read_only_data(compiler_common *common, sljit_uw size)
2104 {
2105 DEFINE_COMPILER;
2106 sljit_uw *result;
2107
2108 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
2109 return NULL;
2110
2111 result = (sljit_uw *)SLJIT_MALLOC(size + sizeof(sljit_uw), compiler->allocator_data);
2112 if (SLJIT_UNLIKELY(result == NULL))
2113 {
2114 sljit_set_compiler_memory_error(compiler);
2115 return NULL;
2116 }
2117
2118 *(void**)result = common->read_only_data_head;
2119 common->read_only_data_head = (void *)result;
2120 return result + 1;
2121 }
2122
2123 static void free_read_only_data(void *current, void *allocator_data)
2124 {
2125 void *next;
2126
2127 SLJIT_UNUSED_ARG(allocator_data);
2128
2129 while (current != NULL)
2130 {
2131 next = *(void**)current;
2132 SLJIT_FREE(current, allocator_data);
2133 current = next;
2134 }
2135 }
2136
2137 static SLJIT_INLINE void reset_ovector(compiler_common *common, int length)
2138 {
2139 DEFINE_COMPILER;
2140 struct sljit_label *loop;
2141 int i;
2142
2143 /* At this point we can freely use all temporary registers. */
2144 SLJIT_ASSERT(length > 1);
2145 /* TMP1 returns with begin - 1. */
2146 OP2(SLJIT_SUB, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_S0), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));
2147 if (length < 8)
2148 {
2149 for (i = 1; i < length; i++)
2150 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), SLJIT_R0, 0);
2151 }
2152 else
2153 {
2154 GET_LOCAL_BASE(SLJIT_R1, 0, OVECTOR_START);
2155 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, length - 1);
2156 loop = LABEL();
2157 OP1(SLJIT_MOVU, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw), SLJIT_R0, 0);
2158 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, 1);
2159 JUMPTO(SLJIT_NOT_ZERO, loop);
2160 }
2161 }
2162
2163 static SLJIT_INLINE void do_reset_match(compiler_common *common, int length)
2164 {
2165 DEFINE_COMPILER;
2166 struct sljit_label *loop;
2167 int i;
2168
2169 SLJIT_ASSERT(length > 1);
2170 /* OVECTOR(1) contains the "string begin - 1" constant. */
2171 if (length > 2)
2172 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
2173 if (length < 8)
2174 {
2175 for (i = 2; i < length; i++)
2176 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), TMP1, 0);
2177 }
2178 else
2179 {
2180 GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + sizeof(sljit_sw));
2181 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
2182 loop = LABEL();
2183 OP1(SLJIT_MOVU, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
2184 OP2(SLJIT_SUB | SLJIT_SET_E, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
2185 JUMPTO(SLJIT_NOT_ZERO, loop);
2186 }
2187
2188 OP1(SLJIT_MOV, STACK_TOP, 0, ARGUMENTS, 0);
2189 if (common->mark_ptr != 0)
2190 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, SLJIT_IMM, 0);
2191 if (common->control_head_ptr != 0)
2192 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
2193 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(jit_arguments, stack));
2194 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
2195 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(struct sljit_stack, base));
2196 }
2197
2198 static sljit_sw SLJIT_CALL do_search_mark(sljit_sw *current, const pcre_uchar *skip_arg)
2199 {
2200 while (current != NULL)
2201 {
2202 switch (current[-2])
2203 {
2204 case type_then_trap:
2205 break;
2206
2207 case type_mark:
2208 if (STRCMP_UC_UC(skip_arg, (pcre_uchar *)current[-3]) == 0)
2209 return current[-4];
2210 break;
2211
2212 default:
2213 SLJIT_ASSERT_STOP();
2214 break;
2215 }
2216 current = (sljit_sw*)current[-1];
2217 }
2218 return -1;
2219 }
2220
2221 static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)
2222 {
2223 DEFINE_COMPILER;
2224 struct sljit_label *loop;
2225 struct sljit_jump *early_quit;
2226
2227 /* At this point we can freely use all registers. */
2228 OP1(SLJIT_MOV, SLJIT_S2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
2229 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(1), STR_PTR, 0);
2230
2231 OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0);
2232 if (common->mark_ptr != 0)
2233 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
2234 OP1(SLJIT_MOV_SI, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, offset_count));
2235 if (common->mark_ptr != 0)
2236 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_R2, 0);
2237 OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, offsets), SLJIT_IMM, sizeof(int));
2238 OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, begin));
2239 GET_LOCAL_BASE(SLJIT_S0, 0, OVECTOR_START);
2240 /* Unlikely, but possible */
2241 early_quit = CMP(SLJIT_EQUAL, SLJIT_R1, 0, SLJIT_IMM, 0);
2242 loop = LABEL();
2243 OP2(SLJIT_SUB, SLJIT_S1, 0, SLJIT_MEM1(SLJIT_S0), 0, SLJIT_R0, 0);
2244 OP2(SLJIT_ADD, SLJIT_S0, 0, SLJIT_S0, 0, SLJIT_IMM, sizeof(sljit_sw));
2245 /* Copy the integer value to the output buffer */
2246 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2247 OP2(SLJIT_ASHR, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_IMM, UCHAR_SHIFT);
2248 #endif
2249 OP1(SLJIT_MOVU_SI, SLJIT_MEM1(SLJIT_R2), sizeof(int), SLJIT_S1, 0);
2250 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
2251 JUMPTO(SLJIT_NOT_ZERO, loop);
2252 JUMPHERE(early_quit);
2253
2254 /* Calculate the return value, which is the maximum ovector value. */
2255 if (topbracket > 1)
2256 {
2257 GET_LOCAL_BASE(SLJIT_R0, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_sw));
2258 OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, topbracket + 1);
2259
2260 /* OVECTOR(0) is never equal to SLJIT_S2. */
2261 loop = LABEL();
2262 OP1(SLJIT_MOVU, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), -(2 * (sljit_sw)sizeof(sljit_sw)));
2263 OP2(SLJIT_SUB, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
2264 CMPTO(SLJIT_EQUAL, SLJIT_R2, 0, SLJIT_S2, 0, loop);
2265 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_R1, 0);
2266 }
2267 else
2268 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
2269 }
2270
2271 static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *quit)
2272 {
2273 DEFINE_COMPILER;
2274 struct sljit_jump *jump;
2275
2276 SLJIT_COMPILE_ASSERT(STR_END == SLJIT_S1, str_end_must_be_saved_reg2);
2277 SLJIT_ASSERT(common->start_used_ptr != 0 && common->start_ptr != 0
2278 && (common->mode == JIT_PARTIAL_SOFT_COMPILE ? common->hit_start != 0 : common->hit_start == 0));
2279
2280 OP1(SLJIT_MOV, SLJIT_R1, 0, ARGUMENTS, 0);
2281 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_PARTIAL);
2282 OP1(SLJIT_MOV_SI, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, real_offset_count));
2283 CMPTO(SLJIT_SIG_LESS, SLJIT_R2, 0, SLJIT_IMM, 2, quit);
2284
2285 /* Store match begin and end. */
2286 OP1(SLJIT_MOV, SLJIT_S0, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, begin));
2287 OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, offsets));
2288
2289 jump = CMP(SLJIT_SIG_LESS, SLJIT_R2, 0, SLJIT_IMM, 3);
2290 OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_ptr : (common->hit_start + (int)sizeof(sljit_sw)), SLJIT_S0, 0);
2291 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2292 OP2(SLJIT_ASHR, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, UCHAR_SHIFT);
2293 #endif
2294 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_R1), 2 * sizeof(int), SLJIT_R2, 0);
2295 JUMPHERE(jump);
2296
2297 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_used_ptr : common->hit_start);
2298 OP2(SLJIT_SUB, SLJIT_S1, 0, STR_END, 0, SLJIT_S0, 0);
2299 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2300 OP2(SLJIT_ASHR, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_IMM, UCHAR_SHIFT);
2301 #endif
2302 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_R1), sizeof(int), SLJIT_S1, 0);
2303
2304 OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_S0, 0);
2305 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2306 OP2(SLJIT_ASHR, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, UCHAR_SHIFT);
2307 #endif
2308 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_R1), 0, SLJIT_R2, 0);
2309
2310 JUMPTO(SLJIT_JUMP, quit);
2311 }
2312
2313 static SLJIT_INLINE void check_start_used_ptr(compiler_common *common)
2314 {
2315 /* May destroy TMP1. */
2316 DEFINE_COMPILER;
2317 struct sljit_jump *jump;
2318
2319 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2320 {
2321 /* The value of -1 must be kept for start_used_ptr! */
2322 OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, 1);
2323 /* Jumps if start_used_ptr < STR_PTR, or start_used_ptr == -1. Although overwriting
2324 is not necessary if start_used_ptr == STR_PTR, it does not hurt as well. */
2325 jump = CMP(SLJIT_LESS_EQUAL, TMP1, 0, STR_PTR, 0);
2326 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2327 JUMPHERE(jump);
2328 }
2329 else if (common->mode == JIT_PARTIAL_HARD_COMPILE)
2330 {
2331 jump = CMP(SLJIT_LESS_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2332 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2333 JUMPHERE(jump);
2334 }
2335 }
2336
2337 static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, pcre_uchar *cc)
2338 {
2339 /* Detects if the character has an othercase. */
2340 unsigned int c;
2341
2342 #ifdef SUPPORT_UTF
2343 if (common->utf)
2344 {
2345 GETCHAR(c, cc);
2346 if (c > 127)
2347 {
2348 #ifdef SUPPORT_UCP
2349 return c != UCD_OTHERCASE(c);
2350 #else
2351 return FALSE;
2352 #endif
2353 }
2354 #ifndef COMPILE_PCRE8
2355 return common->fcc[c] != c;
2356 #endif
2357 }
2358 else
2359 #endif
2360 c = *cc;
2361 return MAX_255(c) ? common->fcc[c] != c : FALSE;
2362 }
2363
2364 static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)
2365 {
2366 /* Returns with the othercase. */
2367 #ifdef SUPPORT_UTF
2368 if (common->utf && c > 127)
2369 {
2370 #ifdef SUPPORT_UCP
2371 return UCD_OTHERCASE(c);
2372 #else
2373 return c;
2374 #endif
2375 }
2376 #endif
2377 return TABLE_GET(c, common->fcc, c);
2378 }
2379
2380 static unsigned int char_get_othercase_bit(compiler_common *common, pcre_uchar *cc)
2381 {
2382 /* Detects if the character and its othercase has only 1 bit difference. */
2383 unsigned int c, oc, bit;
2384 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2385 int n;
2386 #endif
2387
2388 #ifdef SUPPORT_UTF
2389 if (common->utf)
2390 {
2391 GETCHAR(c, cc);
2392 if (c <= 127)
2393 oc = common->fcc[c];
2394 else
2395 {
2396 #ifdef SUPPORT_UCP
2397 oc = UCD_OTHERCASE(c);
2398 #else
2399 oc = c;
2400 #endif
2401 }
2402 }
2403 else
2404 {
2405 c = *cc;
2406 oc = TABLE_GET(c, common->fcc, c);
2407 }
2408 #else
2409 c = *cc;
2410 oc = TABLE_GET(c, common->fcc, c);
2411 #endif
2412
2413 SLJIT_ASSERT(c != oc);
2414
2415 bit = c ^ oc;
2416 /* Optimized for English alphabet. */
2417 if (c <= 127 && bit == 0x20)
2418 return (0 << 8) | 0x20;
2419
2420 /* Since c != oc, they must have at least 1 bit difference. */
2421 if (!is_powerof2(bit))
2422 return 0;
2423
2424 #if defined COMPILE_PCRE8
2425
2426 #ifdef SUPPORT_UTF
2427 if (common->utf && c > 127)
2428 {
2429 n = GET_EXTRALEN(*cc);
2430 while ((bit & 0x3f) == 0)
2431 {
2432 n--;
2433 bit >>= 6;
2434 }
2435 return (n << 8) | bit;
2436 }
2437 #endif /* SUPPORT_UTF */
2438 return (0 << 8) | bit;
2439
2440 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2441
2442 #ifdef SUPPORT_UTF
2443 if (common->utf && c > 65535)
2444 {
2445 if (bit >= (1 << 10))
2446 bit >>= 10;
2447 else
2448 return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8));
2449 }
2450 #endif /* SUPPORT_UTF */
2451 return (bit < 256) ? ((0 << 8) | bit) : ((1 << 8) | (bit >> 8));
2452
2453 #endif /* COMPILE_PCRE[8|16|32] */
2454 }
2455
2456 static void check_partial(compiler_common *common, BOOL force)
2457 {
2458 /* Checks whether a partial matching is occurred. Does not modify registers. */
2459 DEFINE_COMPILER;
2460 struct sljit_jump *jump = NULL;
2461
2462 SLJIT_ASSERT(!force || common->mode != JIT_COMPILE);
2463
2464 if (common->mode == JIT_COMPILE)
2465 return;
2466
2467 if (!force)
2468 jump = CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2469 else if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2470 jump = CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1);
2471
2472 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2473 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
2474 else
2475 {
2476 if (common->partialmatchlabel != NULL)
2477 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2478 else
2479 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2480 }
2481
2482 if (jump != NULL)
2483 JUMPHERE(jump);
2484 }
2485
2486 static void check_str_end(compiler_common *common, jump_list **end_reached)
2487 {
2488 /* Does not affect registers. Usually used in a tight spot. */
2489 DEFINE_COMPILER;
2490 struct sljit_jump *jump;
2491
2492 if (common->mode == JIT_COMPILE)
2493 {
2494 add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2495 return;
2496 }
2497
2498 jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
2499 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2500 {
2501 add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
2502 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
2503 add_jump(compiler, end_reached, JUMP(SLJIT_JUMP));
2504 }
2505 else
2506 {
2507 add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
2508 if (common->partialmatchlabel != NULL)
2509 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2510 else
2511 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2512 }
2513 JUMPHERE(jump);
2514 }
2515
2516 static void detect_partial_match(compiler_common *common, jump_list **backtracks)
2517 {
2518 DEFINE_COMPILER;
2519 struct sljit_jump *jump;
2520
2521 if (common->mode == JIT_COMPILE)
2522 {
2523 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2524 return;
2525 }
2526
2527 /* Partial matching mode. */
2528 jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
2529 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
2530 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2531 {
2532 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
2533 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
2534 }
2535 else
2536 {
2537 if (common->partialmatchlabel != NULL)
2538 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2539 else
2540 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2541 }
2542 JUMPHERE(jump);
2543 }
2544
2545 static void peek_char(compiler_common *common, pcre_uint32 max)
2546 {
2547 /* Reads the character into TMP1, keeps STR_PTR.
2548 Does not check STR_END. TMP2 Destroyed. */
2549 DEFINE_COMPILER;
2550 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2551 struct sljit_jump *jump;
2552 #endif
2553
2554 SLJIT_UNUSED_ARG(max);
2555
2556 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2557 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2558 if (common->utf)
2559 {
2560 if (max < 128) return;
2561
2562 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2563 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2564 add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2565 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2566 JUMPHERE(jump);
2567 }
2568 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2569
2570 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2571 if (common->utf)
2572 {
2573 if (max < 0xd800) return;
2574
2575 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2576 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2577 /* TMP2 contains the high surrogate. */
2578 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2579 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x40);
2580 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
2581 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
2582 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2583 JUMPHERE(jump);
2584 }
2585 #endif
2586 }
2587
2588 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2589
2590 static BOOL is_char7_bitset(const pcre_uint8 *bitset, BOOL nclass)
2591 {
2592 /* Tells whether the character codes below 128 are enough
2593 to determine a match. */
2594 const pcre_uint8 value = nclass ? 0xff : 0;
2595 const pcre_uint8 *end = bitset + 32;
2596
2597 bitset += 16;
2598 do
2599 {
2600 if (*bitset++ != value)
2601 return FALSE;
2602 }
2603 while (bitset < end);
2604 return TRUE;
2605 }
2606
2607 static void read_char7_type(compiler_common *common, BOOL full_read)
2608 {
2609 /* Reads the precise character type of a character into TMP1, if the character
2610 is less than 128. Otherwise it returns with zero. Does not check STR_END. The
2611 full_read argument tells whether characters above max are accepted or not. */
2612 DEFINE_COMPILER;
2613 struct sljit_jump *jump;
2614
2615 SLJIT_ASSERT(common->utf);
2616
2617 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2618 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2619
2620 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2621
2622 if (full_read)
2623 {
2624 jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
2625 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2626 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2627 JUMPHERE(jump);
2628 }
2629 }
2630
2631 #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
2632
2633 static void read_char_range(compiler_common *common, pcre_uint32 min, pcre_uint32 max, BOOL update_str_ptr)
2634 {
2635 /* Reads the precise value of a character into TMP1, if the character is
2636 between min and max (c >= min && c <= max). Otherwise it returns with a value
2637 outside the range. Does not check STR_END. */
2638 DEFINE_COMPILER;
2639 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2640 struct sljit_jump *jump;
2641 #endif
2642 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2643 struct sljit_jump *jump2;
2644 #endif
2645
2646 SLJIT_UNUSED_ARG(update_str_ptr);
2647 SLJIT_UNUSED_ARG(min);
2648 SLJIT_UNUSED_ARG(max);
2649 SLJIT_ASSERT(min <= max);
2650
2651 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2652 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2653
2654 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2655 if (common->utf)
2656 {
2657 if (max < 128 && !update_str_ptr) return;
2658
2659 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2660 if (min >= 0x10000)
2661 {
2662 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xf0);
2663 if (update_str_ptr)
2664 OP1(SLJIT_MOV_UB, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2665 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2666 jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x7);
2667 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2668 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2669 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2670 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2671 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2672 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2673 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2674 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2675 if (!update_str_ptr)
2676 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
2677 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2678 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2679 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2680 JUMPHERE(jump2);
2681 if (update_str_ptr)
2682 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
2683 }
2684 else if (min >= 0x800 && max <= 0xffff)
2685 {
2686 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xe0);
2687 if (update_str_ptr)
2688 OP1(SLJIT_MOV_UB, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2689 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2690 jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xf);
2691 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2692 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2693 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2694 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2695 if (!update_str_ptr)
2696 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
2697 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2698 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2699 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2700 JUMPHERE(jump2);
2701 if (update_str_ptr)
2702 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
2703 }
2704 else if (max >= 0x800)
2705 add_jump(compiler, (max < 0x10000) ? &common->utfreadchar16 : &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2706 else if (max < 128)
2707 {
2708 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2709 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2710 }
2711 else
2712 {
2713 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2714 if (!update_str_ptr)
2715 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2716 else
2717 OP1(SLJIT_MOV_UB, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2718 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2719 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2720 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2721 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2722 if (update_str_ptr)
2723 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
2724 }
2725 JUMPHERE(jump);
2726 }
2727 #endif
2728
2729 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2730 if (common->utf)
2731 {
2732 if (max >= 0x10000)
2733 {
2734 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2735 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2736 /* TMP2 contains the high surrogate. */
2737 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2738 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x40);
2739 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
2740 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2741 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
2742 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2743 JUMPHERE(jump);
2744 return;
2745 }
2746
2747 if (max < 0xd800 && !update_str_ptr) return;
2748
2749 /* Skip low surrogate if necessary. */
2750 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2751 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2752 if (update_str_ptr)
2753 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2754 if (max >= 0xd800)
2755 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x10000);
2756 JUMPHERE(jump);
2757 }
2758 #endif
2759 }
2760
2761 static SLJIT_INLINE void read_char(compiler_common *common)
2762 {
2763 read_char_range(common, 0, READ_CHAR_MAX, TRUE);
2764 }
2765
2766 static void read_char8_type(compiler_common *common, BOOL update_str_ptr)
2767 {
2768 /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */
2769 DEFINE_COMPILER;
2770 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
2771 struct sljit_jump *jump;
2772 #endif
2773 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2774 struct sljit_jump *jump2;
2775 #endif
2776
2777 SLJIT_UNUSED_ARG(update_str_ptr);
2778
2779 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2780 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2781
2782 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2783 if (common->utf)
2784 {
2785 /* This can be an extra read in some situations, but hopefully
2786 it is needed in most cases. */
2787 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2788 jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
2789 if (!update_str_ptr)
2790 {
2791 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2792 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2793 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2794 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2795 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2796 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
2797 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2798 jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
2799 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2800 JUMPHERE(jump2);
2801 }
2802 else
2803 add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));
2804 JUMPHERE(jump);
2805 return;
2806 }
2807 #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
2808
2809 #if !defined COMPILE_PCRE8
2810 /* The ctypes array contains only 256 values. */
2811 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2812 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
2813 #endif
2814 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2815 #if !defined COMPILE_PCRE8
2816 JUMPHERE(jump);
2817 #endif
2818
2819 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2820 if (common->utf && update_str_ptr)
2821 {
2822 /* Skip low surrogate if necessary. */
2823 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);
2824 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2825 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2826 JUMPHERE(jump);
2827 }
2828 #endif /* SUPPORT_UTF && COMPILE_PCRE16 */
2829 }
2830
2831 static void skip_char_back(compiler_common *common)
2832 {
2833 /* Goes one character back. Affects STR_PTR and TMP1. Does not check begin. */
2834 DEFINE_COMPILER;
2835 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2836 #if defined COMPILE_PCRE8
2837 struct sljit_label *label;
2838
2839 if (common->utf)
2840 {
2841 label = LABEL();
2842 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2843 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2844 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
2845 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);
2846 return;
2847 }
2848 #elif defined COMPILE_PCRE16
2849 if (common->utf)
2850 {
2851 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2852 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2853 /* Skip low surrogate if necessary. */
2854 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2855 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
2856 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
2857 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2858 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2859 return;
2860 }
2861 #endif /* COMPILE_PCRE[8|16] */
2862 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2863 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2864 }
2865
2866 static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpifmatch)
2867 {
2868 /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */
2869 DEFINE_COMPILER;
2870 struct sljit_jump *jump;
2871
2872 if (nltype == NLTYPE_ANY)
2873 {
2874 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
2875 add_jump(compiler, backtracks, JUMP(jumpifmatch ? SLJIT_NOT_ZERO : SLJIT_ZERO));
2876 }
2877 else if (nltype == NLTYPE_ANYCRLF)
2878 {
2879 if (jumpifmatch)
2880 {
2881 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR));
2882 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
2883 }
2884 else
2885 {
2886 jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
2887 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
2888 JUMPHERE(jump);
2889 }
2890 }
2891 else
2892 {
2893 SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);
2894 add_jump(compiler, backtracks, CMP(jumpifmatch ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
2895 }
2896 }
2897
2898 #ifdef SUPPORT_UTF
2899
2900 #if defined COMPILE_PCRE8
2901 static void do_utfreadchar(compiler_common *common)
2902 {
2903 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
2904 of the character (>= 0xc0). Return char value in TMP1, length in TMP2. */
2905 DEFINE_COMPILER;
2906 struct sljit_jump *jump;
2907
2908 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2909 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2910 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2911 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2912 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2913 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2914
2915 /* Searching for the first zero. */
2916 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
2917 jump = JUMP(SLJIT_NOT_ZERO);
2918 /* Two byte sequence. */
2919 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2920 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(2));
2921 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2922
2923 JUMPHERE(jump);
2924 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2925 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x800);
2926 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2927 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2928 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2929
2930 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10000);
2931 jump = JUMP(SLJIT_NOT_ZERO);
2932 /* Three byte sequence. */
2933 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
2934 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));
2935 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2936
2937 /* Four byte sequence. */
2938 JUMPHERE(jump);
2939 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2940 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
2941 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2942 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
2943 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2944 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2945 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(4));
2946 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2947 }
2948
2949 static void do_utfreadchar16(compiler_common *common)
2950 {
2951 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
2952 of the character (>= 0xc0). Return value in TMP1. */
2953 DEFINE_COMPILER;
2954 struct sljit_jump *jump;
2955
2956 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2957 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2958 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2959 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2960 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2961 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2962
2963 /* Searching for the first zero. */
2964 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
2965 jump = JUMP(SLJIT_NOT_ZERO);
2966 /* Two byte sequence. */
2967 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2968 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2969
2970 JUMPHERE(jump);
2971 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x400);
2972 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_NOT_ZERO);
2973 /* This code runs only in 8 bit mode. No need to shift the value. */
2974 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2975 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2976 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x800);
2977 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2978 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2979 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2980 /* Three byte sequence. */
2981 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
2982 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2983 }
2984
2985 static void do_utfreadtype8(compiler_common *common)
2986 {
2987 /* Fast decoding a UTF-8 character type. TMP2 contains the first byte
2988 of the character (>= 0xc0). Return value in TMP1. */
2989 DEFINE_COMPILER;
2990 struct sljit_jump *jump;
2991 struct sljit_jump *compare;
2992
2993 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2994
2995 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20);
2996 jump = JUMP(SLJIT_NOT_ZERO);
2997 /* Two byte sequence. */
2998 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2999 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3000 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
3001 /* The upper 5 bits are known at this point. */
3002 compare = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x3);
3003 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
3004 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3005 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
3006 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
3007 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3008
3009 JUMPHERE(compare);
3010 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
3011 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3012
3013 /* We only have types for characters less than 256. */
3014 JUMPHERE(jump);
3015 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3016 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
3017 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3018 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3019 }
3020
3021 #endif /* COMPILE_PCRE8 */
3022
3023 #endif /* SUPPORT_UTF */
3024
3025 #ifdef SUPPORT_UCP
3026
3027 /* UCD_BLOCK_SIZE must be 128 (see the assert below). */
3028 #define UCD_BLOCK_MASK 127
3029 #define UCD_BLOCK_SHIFT 7
3030
3031 static void do_getucd(compiler_common *common)
3032 {
3033 /* Search the UCD record for the character comes in TMP1.
3034 Returns chartype in TMP1 and UCD offset in TMP2. */
3035 DEFINE_COMPILER;
3036
3037 SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 8);
3038
3039 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3040 OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
3041 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
3042 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
3043 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
3044 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
3045 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
3046 OP1(SLJIT_MOV_UH, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
3047 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
3048 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
3049 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3050 }
3051 #endif
3052
3053 static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common, BOOL hascrorlf, BOOL firstline)
3054 {
3055 DEFINE_COMPILER;
3056 struct sljit_label *mainloop;
3057 struct sljit_label *newlinelabel = NULL;
3058 struct sljit_jump *start;
3059 struct sljit_jump *end = NULL;
3060 struct sljit_jump *nl = NULL;
3061 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3062 struct sljit_jump *singlechar;
3063 #endif
3064 jump_list *newline = NULL;
3065 BOOL newlinecheck = FALSE;
3066 BOOL readuchar = FALSE;
3067
3068 if (!(hascrorlf || firstline) && (common->nltype == NLTYPE_ANY ||
3069 common->nltype == NLTYPE_ANYCRLF || common->newline > 255))
3070 newlinecheck = TRUE;
3071
3072 if (firstline)
3073 {
3074 /* Search for the end of the first line. */
3075 SLJIT_ASSERT(common->first_line_end != 0);
3076 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
3077
3078 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3079 {
3080 mainloop = LABEL();
3081 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3082 end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3083 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
3084 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3085 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);
3086 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);
3087 JUMPHERE(end);
3088 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_SP), common->first_line_end, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3089 }
3090 else
3091 {
3092 end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3093 mainloop = LABEL();
3094 /* Continual stores does not cause data dependency. */
3095 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->first_line_end, STR_PTR, 0);
3096 read_char_range(common, common->nlmin, common->nlmax, TRUE);
3097 check_newlinechar(common, common->nltype, &newline, TRUE);
3098 CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, mainloop);
3099 JUMPHERE(end);
3100 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->first_line_end, STR_PTR, 0);
3101 set_jumps(newline, LABEL());
3102 }
3103
3104 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
3105 }
3106
3107 start = JUMP(SLJIT_JUMP);
3108
3109 if (newlinecheck)
3110 {
3111 newlinelabel = LABEL();
3112 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3113 end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3114 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3115 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
3116 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
3117 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3118 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
3119 #endif
3120 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3121 nl = JUMP(SLJIT_JUMP);
3122 }
3123
3124 mainloop = LABEL();
3125
3126 /* Increasing the STR_PTR here requires one less jump in the most common case. */
3127 #ifdef SUPPORT_UTF
3128 if (common->utf) readuchar = TRUE;
3129 #endif
3130 if (newlinecheck) readuchar = TRUE;
3131
3132 if (readuchar)
3133 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3134
3135 if (newlinecheck)
3136 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);
3137
3138 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3139 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3140 #if defined COMPILE_PCRE8
3141 if (common->utf)
3142 {
3143 singlechar = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
3144 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3145 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3146 JUMPHERE(singlechar);
3147 }
3148 #elif defined COMPILE_PCRE16
3149 if (common->utf)
3150 {
3151 singlechar = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
3152 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
3153 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3154 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
3155 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3156 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3157 JUMPHERE(singlechar);
3158 }
3159 #endif /* COMPILE_PCRE[8|16] */
3160 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
3161 JUMPHERE(start);
3162
3163 if (newlinecheck)
3164 {
3165 JUMPHERE(end);
3166 JUMPHERE(nl);
3167 }
3168
3169 return mainloop;
3170 }
3171
3172 #define MAX_N_CHARS 16
3173 #define MAX_N_BYTES 8
3174
3175 static SLJIT_INLINE void add_prefix_byte(pcre_uint8 byte, pcre_uint8 *bytes)
3176 {
3177 pcre_uint8 len = bytes[0];
3178 int i;
3179
3180 if (len == 255)
3181 return;
3182
3183 if (len == 0)
3184 {
3185 bytes[0] = 1;
3186 bytes[1] = byte;
3187 return;
3188 }
3189
3190 for (i = len; i > 0; i--)
3191 if (bytes[i] == byte)
3192 return;
3193
3194 if (len >= MAX_N_BYTES - 1)
3195 {
3196 bytes[0] = 255;
3197 return;
3198 }
3199
3200 len++;
3201 bytes[len] = byte;
3202 bytes[0] = len;
3203 }
3204
3205 static int scan_prefix(compiler_common *common, pcre_uchar *cc, pcre_uint32 *chars, pcre_uint8 *bytes, int max_chars)
3206 {
3207 /* Recursive function, which scans prefix literals. */
3208 BOOL last, any, caseless;
3209 int len, repeat, len_save, consumed = 0;
3210 pcre_uint32 chr, mask;
3211 pcre_uchar *alternative, *cc_save, *oc;
3212 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3213 pcre_uchar othercase[8];
3214 #elif defined SUPPORT_UTF && defined COMPILE_PCRE16
3215 pcre_uchar othercase[2];
3216 #else
3217 pcre_uchar othercase[1];
3218 #endif
3219
3220 repeat = 1;
3221 while (TRUE)
3222 {
3223 last = TRUE;
3224 any = FALSE;
3225 caseless = FALSE;
3226 switch (*cc)
3227 {
3228 case OP_CHARI:
3229 caseless = TRUE;
3230 case OP_CHAR:
3231 last = FALSE;
3232 cc++;
3233 break;
3234
3235 case OP_SOD:
3236 case OP_SOM:
3237 case OP_SET_SOM:
3238 case OP_NOT_WORD_BOUNDARY:
3239 case OP_WORD_BOUNDARY:
3240 case OP_EODN:
3241 case OP_EOD:
3242 case OP_CIRC:
3243 case OP_CIRCM:
3244 case OP_DOLL:
3245 case OP_DOLLM:
3246 /* Zero width assertions. */
3247 cc++;
3248 continue;
3249
3250 case OP_ASSERT:
3251 case OP_ASSERT_NOT:
3252 case OP_ASSERTBACK:
3253 case OP_ASSERTBACK_NOT:
3254 cc = bracketend(cc);
3255 continue;
3256
3257 case OP_PLUSI:
3258 case OP_MINPLUSI:
3259 case OP_POSPLUSI:
3260 caseless = TRUE;
3261 case OP_PLUS:
3262 case OP_MINPLUS:
3263 case OP_POSPLUS:
3264 cc++;
3265 break;
3266
3267 case OP_EXACTI:
3268 caseless = TRUE;
3269 case OP_EXACT:
3270 repeat = GET2(cc, 1);
3271 last = FALSE;
3272 cc += 1 + IMM2_SIZE;
3273 break;
3274
3275 case OP_QUERYI:
3276 case OP_MINQUERYI:
3277 case OP_POSQUERYI:
3278 caseless = TRUE;
3279 case OP_QUERY:
3280 case OP_MINQUERY:
3281 case OP_POSQUERY:
3282 len = 1;
3283 cc++;
3284 #ifdef SUPPORT_UTF
3285 if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
3286 #endif
3287 max_chars = scan_prefix(common, cc + len, chars, bytes, max_chars);
3288 if (max_chars == 0)
3289 return consumed;
3290 last = FALSE;
3291 break;
3292
3293 case OP_KET:
3294 cc += 1 + LINK_SIZE;
3295 continue;
3296
3297 case OP_ALT:
3298 cc += GET(cc, 1);
3299 continue;
3300
3301 case OP_ONCE:
3302 case OP_ONCE_NC:
3303 case OP_BRA:
3304 case OP_BRAPOS:
3305 case OP_CBRA:
3306 case OP_CBRAPOS:
3307 alternative = cc + GET(cc, 1);
3308 while (*alternative == OP_ALT)
3309 {
3310 max_chars = scan_prefix(common, alternative + 1 + LINK_SIZE, chars, bytes, max_chars);
3311 if (max_chars == 0)
3312 return consumed;
3313 alternative += GET(alternative, 1);
3314 }
3315
3316 if (*cc == OP_CBRA || *cc == OP_CBRAPOS)
3317 cc += IMM2_SIZE;
3318 cc += 1 + LINK_SIZE;
3319 continue;
3320
3321 case OP_CLASS:
3322 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3323 if (common->utf && !is_char7_bitset((const pcre_uint8 *)(cc + 1), FALSE)) return consumed;
3324 #endif
3325 any = TRUE;
3326 cc += 1 + 32 / sizeof(pcre_uchar);
3327 break;
3328
3329 case OP_NCLASS:
3330 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3331 if (common->utf) return consumed;
3332 #endif
3333 any = TRUE;
3334 cc += 1 + 32 / sizeof(pcre_uchar);
3335 break;
3336
3337 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3338 case OP_XCLASS:
3339 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3340 if (common->utf) return consumed;
3341 #endif
3342 any = TRUE;
3343 cc += GET(cc, 1);
3344 break;
3345 #endif
3346
3347 case OP_DIGIT:
3348 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3349 if (common->utf && !is_char7_bitset((const pcre_uint8 *)common->ctypes - cbit_length + cbit_digit, FALSE))
3350 return consumed;
3351 #endif
3352 any = TRUE;
3353 cc++;
3354 break;
3355
3356 case OP_WHITESPACE:
3357 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3358 if (common->utf && !is_char7_bitset((const pcre_uint8 *)common->ctypes - cbit_length + cbit_space, FALSE))
3359 return consumed;
3360 #endif
3361 any = TRUE;
3362 cc++;
3363 break;
3364
3365 case OP_WORDCHAR:
3366 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3367 if (common->utf && !is_char7_bitset((const pcre_uint8 *)common->ctypes - cbit_length + cbit_word, FALSE))
3368 return consumed;
3369 #endif
3370 any = TRUE;
3371 cc++;
3372 break;
3373
3374 case OP_NOT:
3375 case OP_NOTI:
3376 cc++;
3377 /* Fall through. */
3378 case OP_NOT_DIGIT:
3379 case OP_NOT_WHITESPACE:
3380 case OP_NOT_WORDCHAR:
3381 case OP_ANY:
3382 case OP_ALLANY:
3383 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3384 if (common->utf) return consumed;
3385 #endif
3386 any = TRUE;
3387 cc++;
3388 break;
3389
3390 #ifdef SUPPORT_UCP
3391 case OP_NOTPROP:
3392 case OP_PROP:
3393 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3394 if (common->utf) return consumed;
3395 #endif
3396 any = TRUE;
3397 cc += 1 + 2;
3398 break;
3399 #endif
3400
3401 case OP_TYPEEXACT:
3402 repeat = GET2(cc, 1);
3403 cc += 1 + IMM2_SIZE;
3404 continue;
3405
3406 case OP_NOTEXACT:
3407 case OP_NOTEXACTI:
3408 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3409 if (common->utf) return consumed;
3410 #endif
3411 any = TRUE;
3412 repeat = GET2(cc, 1);
3413 cc += 1 + IMM2_SIZE + 1;
3414 break;
3415
3416 default:
3417 return consumed;
3418 }
3419
3420 if (any)
3421 {
3422 #if defined COMPILE_PCRE8
3423 mask = 0xff;
3424 #elif defined COMPILE_PCRE16
3425 mask = 0xffff;
3426 #elif defined COMPILE_PCRE32
3427 mask = 0xffffffff;
3428 #else
3429 SLJIT_ASSERT_STOP();
3430 #endif
3431
3432 do
3433 {
3434 chars[0] = mask;
3435 chars[1] = mask;
3436 bytes[0] = 255;
3437
3438 consumed++;
3439 if (--max_chars == 0)
3440 return consumed;
3441 chars += 2;
3442 bytes += MAX_N_BYTES;
3443 }
3444 while (--repeat > 0);
3445
3446 repeat = 1;
3447 continue;
3448 }
3449
3450 len = 1;
3451 #ifdef SUPPORT_UTF
3452 if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
3453 #endif
3454
3455 if (caseless && char_has_othercase(common, cc))
3456 {
3457 #ifdef SUPPORT_UTF
3458 if (common->utf)
3459 {
3460 GETCHAR(chr, cc);
3461 if ((int)PRIV(ord2utf)(char_othercase(common, chr), othercase) != len)
3462 return consumed;
3463 }
3464 else
3465 #endif
3466 {
3467 chr = *cc;
3468 othercase[0] = TABLE_GET(chr, common->fcc, chr);
3469 }
3470 }
3471 else
3472 caseless = FALSE;
3473
3474 len_save = len;
3475 cc_save = cc;
3476 while (TRUE)
3477 {
3478 oc = othercase;
3479 do
3480 {
3481 chr = *cc;
3482 #ifdef COMPILE_PCRE32
3483 if (SLJIT_UNLIKELY(chr == NOTACHAR))
3484 return consumed;
3485 #endif
3486 add_prefix_byte((pcre_uint8)chr, bytes);
3487
3488 mask = 0;
3489 if (caseless)
3490 {
3491 add_prefix_byte((pcre_uint8)*oc, bytes);
3492 mask = *cc ^ *oc;
3493 chr |= mask;
3494 }
3495
3496 #ifdef COMPILE_PCRE32
3497 if (chars[0] == NOTACHAR && chars[1] == 0)
3498 #else
3499 if (chars[0] == NOTACHAR)
3500 #endif
3501 {
3502 chars[0] = chr;
3503 chars[1] = mask;
3504 }
3505 else
3506 {
3507 mask |= chars[0] ^ chr;
3508 chr |= mask;
3509 chars[0] = chr;
3510 chars[1] |= mask;
3511 }
3512
3513 len--;
3514 consumed++;
3515 if (--max_chars == 0)
3516 return consumed;
3517 chars += 2;
3518 bytes += MAX_N_BYTES;
3519 cc++;
3520 oc++;
3521 }
3522 while (len > 0);
3523
3524 if (--repeat == 0)
3525 break;
3526
3527 len = len_save;
3528 cc = cc_save;
3529 }
3530
3531 repeat = 1;
3532 if (last)
3533 return consumed;
3534 }
3535 }
3536
3537 static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common, BOOL firstline)
3538 {
3539 DEFINE_COMPILER;
3540 struct sljit_label *start;
3541 struct sljit_jump *quit;
3542 pcre_uint32 chars[MAX_N_CHARS * 2];
3543 pcre_uint8 bytes[MAX_N_CHARS * MAX_N_BYTES];
3544 pcre_uint8 ones[MAX_N_CHARS];
3545 int offsets[3];
3546 pcre_uint32 mask;
3547 pcre_uint8 *byte_set, *byte_set_end;
3548 int i, max, from;
3549 int range_right = -1, range_len = 3 - 1;
3550 sljit_ub *update_table = NULL;
3551 BOOL in_range;
3552
3553 for (i = 0; i < MAX_N_CHARS; i++)
3554 {
3555 chars[i << 1] = NOTACHAR;
3556 chars[(i << 1) + 1] = 0;
3557 bytes[i * MAX_N_BYTES] = 0;
3558 }
3559
3560 max = scan_prefix(common, common->start, chars, bytes, MAX_N_CHARS);
3561
3562 if (max <= 1)
3563 return FALSE;
3564
3565 for (i = 0; i < max; i++)
3566 {
3567 mask = chars[(i << 1) + 1];
3568 ones[i] = ones_in_half_byte[mask & 0xf];
3569 mask >>= 4;
3570 while (mask != 0)
3571 {
3572 ones[i] += ones_in_half_byte[mask & 0xf];
3573 mask >>= 4;
3574 }
3575 }
3576
3577 in_range = FALSE;
3578 from = 0; /* Prevent compiler "uninitialized" warning */
3579 for (i = 0; i <= max; i++)
3580 {
3581 if (in_range && (i - from) > range_len && (bytes[(i - 1) * MAX_N_BYTES] <= 4))
3582 {
3583 range_len = i - from;
3584 range_right = i - 1;
3585 }
3586
3587 if (i < max && bytes[i * MAX_N_BYTES] < 255)
3588 {
3589 if (!in_range)
3590 {
3591 in_range = TRUE;
3592 from = i;
3593 }
3594 }
3595 else if (in_range)
3596 in_range = FALSE;
3597 }
3598
3599 if (range_right >= 0)
3600 {
3601 update_table = (sljit_ub *)allocate_read_only_data(common, 256);
3602 if (update_table == NULL)
3603 return TRUE;
3604 memset(update_table, IN_UCHARS(range_len), 256);
3605
3606 for (i = 0; i < range_len; i++)
3607 {
3608 byte_set = bytes + ((range_right - i) * MAX_N_BYTES);
3609 SLJIT_ASSERT(byte_set[0] > 0 && byte_set[0] < 255);
3610 byte_set_end = byte_set + byte_set[0];
3611 byte_set++;
3612 while (byte_set <= byte_set_end)
3613 {
3614 if (update_table[*byte_set] > IN_UCHARS(i))
3615 update_table[*byte_set] = IN_UCHARS(i);
3616 byte_set++;
3617 }
3618 }
3619 }
3620
3621 offsets[0] = -1;
3622 /* Scan forward. */
3623 for (i = 0; i < max; i++)
3624 if (ones[i] <= 2) {
3625 offsets[0] = i;
3626 break;
3627 }
3628
3629 if (offsets[0] < 0 && range_right < 0)
3630 return FALSE;
3631
3632 if (offsets[0] >= 0)
3633 {
3634 /* Scan backward. */
3635 offsets[1] = -1;
3636 for (i = max - 1; i > offsets[0]; i--)
3637 if (ones[i] <= 2 && i != range_right)
3638 {
3639 offsets[1] = i;
3640 break;
3641 }
3642
3643 /* This case is handled better by fast_forward_first_char. */
3644 if (offsets[1] == -1 && offsets[0] == 0 && range_right < 0)
3645 return FALSE;
3646
3647 offsets[2] = -1;
3648 /* We only search for a middle character if there is no range check. */
3649 if (offsets[1] >= 0 && range_right == -1)
3650 {
3651 /* Scan from middle. */
3652 for (i = (offsets[0] + offsets[1]) / 2 + 1; i < offsets[1]; i++)
3653 if (ones[i] <= 2)
3654 {
3655 offsets[2] = i;
3656 break;
3657 }
3658
3659 if (offsets[2] == -1)
3660 {
3661 for (i = (offsets[0] + offsets[1]) / 2; i > offsets[0]; i--)
3662 if (ones[i] <= 2)
3663 {
3664 offsets[2] = i;
3665 break;
3666 }
3667 }
3668 }
3669
3670 SLJIT_ASSERT(offsets[1] == -1 || (offsets[0] < offsets[1]));
3671 SLJIT_ASSERT(offsets[2] == -1 || (offsets[0] < offsets[2] && offsets[1] > offsets[2]));
3672
3673 chars[0] = chars[offsets[0] << 1];
3674 chars[1] = chars[(offsets[0] << 1) + 1];
3675 if (offsets[2] >= 0)
3676 {
3677 chars[2] = chars[offsets[2] << 1];
3678 chars[3] = chars[(offsets[2] << 1) + 1];
3679 }
3680 if (offsets[1] >= 0)
3681 {
3682 chars[4] = chars[offsets[1] << 1];
3683 chars[5] = chars[(offsets[1] << 1) + 1];
3684 }
3685 }
3686
3687 max -= 1;
3688 if (firstline)
3689 {
3690 SLJIT_ASSERT(common->first_line_end != 0);
3691 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end);
3692 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
3693 OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
3694 quit = CMP(SLJIT_LESS_EQUAL, STR_END, 0, TMP1, 0);
3695 OP1(SLJIT_MOV, STR_END, 0, TMP1, 0);
3696 JUMPHERE(quit);
3697 }
3698 else
3699 OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
3700
3701 #if !(defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
3702 if (range_right >= 0)
3703 OP1(SLJIT_MOV, RETURN_ADDR, 0, SLJIT_IMM, (sljit_sw)update_table);
3704 #endif
3705
3706 start = LABEL();
3707 quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3708
3709 SLJIT_ASSERT(range_right >= 0 || offsets[0] >= 0);
3710
3711 if (range_right >= 0)
3712 {
3713 #if defined COMPILE_PCRE8 || (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN)
3714 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right));
3715 #else
3716 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right + 1) - 1);
3717 #endif
3718
3719 #if !(defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
3720 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(RETURN_ADDR, TMP1), 0);
3721 #else
3722 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)update_table);
3723 #endif
3724 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3725 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, start);
3726 }
3727
3728 if (offsets[0] >= 0)
3729 {
3730 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[0]));
3731 if (offsets[1] >= 0)
3732 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[1]));
3733 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3734
3735 if (chars[1] != 0)
3736 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[1]);
3737 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[0], start);
3738 if (offsets[2] >= 0)
3739 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[2] - 1));
3740
3741 if (offsets[1] >= 0)
3742 {
3743 if (chars[5] != 0)
3744 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, chars[5]);
3745 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, chars[4], start);
3746 }
3747
3748 if (offsets[2] >= 0)
3749 {
3750 if (chars[3] != 0)
3751 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[3]);
3752 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[2], start);
3753 }
3754 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3755 }
3756
3757 JUMPHERE(quit);
3758
3759 if (firstline)
3760 {
3761 if (range_right >= 0)
3762 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end);
3763 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3764 if (range_right >= 0)
3765 {
3766 quit = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
3767 OP1(SLJIT_MOV, STR_PTR, 0, TMP1, 0);
3768 JUMPHERE(quit);
3769 }
3770 }
3771 else
3772 OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
3773 return TRUE;
3774 }
3775
3776 #undef MAX_N_CHARS
3777 #undef MAX_N_BYTES
3778
3779 static SLJIT_INLINE void fast_forward_first_char(compiler_common *common, pcre_uchar first_char, BOOL caseless, BOOL firstline)
3780 {
3781 DEFINE_COMPILER;
3782 struct sljit_label *start;
3783 struct sljit_jump *quit;
3784 struct sljit_jump *found;
3785 pcre_uchar oc, bit;
3786
3787 if (firstline)
3788 {
3789 SLJIT_ASSERT(common->first_line_end != 0);
3790 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
3791 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end);
3792 }
3793
3794 start = LABEL();
3795 quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3796 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3797
3798 oc = first_char;
3799 if (caseless)
3800 {
3801 oc = TABLE_GET(first_char, common->fcc, first_char);
3802 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
3803 if (first_char > 127 && common->utf)
3804 oc = UCD_OTHERCASE(first_char);
3805 #endif
3806 }
3807 if (first_char == oc)
3808 found = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, first_char);
3809 else
3810 {
3811 bit = first_char ^ oc;
3812 if (is_powerof2(bit))
3813 {
3814 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, bit);
3815 found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, first_char | bit);
3816 }
3817 else
3818 {
3819 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, first_char);
3820 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
3821 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);
3822 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
3823 found = JUMP(SLJIT_NOT_ZERO);
3824 }
3825 }
3826
3827 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3828 JUMPTO(SLJIT_JUMP, start);
3829 JUMPHERE(found);
3830 JUMPHERE(quit);
3831
3832 if (firstline)
3833 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3834 }
3835
3836 static SLJIT_INLINE void fast_forward_newline(compiler_common *common, BOOL firstline)
3837 {
3838 DEFINE_COMPILER;
3839 struct sljit_label *loop;
3840 struct sljit_jump *lastchar;
3841 struct sljit_jump *firstchar;
3842 struct sljit_jump *quit;
3843 struct sljit_jump *foundcr = NULL;
3844 struct sljit_jump *notfoundnl;
3845 jump_list *newline = NULL;
3846
3847 if (firstline)
3848 {
3849 SLJIT_ASSERT(common->first_line_end != 0);
3850 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
3851 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end);
3852 }
3853
3854 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3855 {
3856 lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3857 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3858 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
3859 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
3860 firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
3861
3862 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2));
3863 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0);
3864 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_GREATER_EQUAL);
3865 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3866 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
3867 #endif
3868 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3869
3870 loop = LABEL();
3871 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3872 quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3873 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
3874 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
3875 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop);
3876 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop);
3877
3878 JUMPHERE(quit);
3879 JUMPHERE(firstchar);
3880 JUMPHERE(lastchar);
3881
3882 if (firstline)
3883 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3884 return;
3885 }
3886
3887 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3888 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
3889 firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
3890 skip_char_back(common);
3891
3892 loop = LABEL();
3893 common->ff_newline_shortcut = loop;
3894
3895 read_char_range(common, common->nlmin, common->nlmax, TRUE);
3896 lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3897 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
3898 foundcr = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
3899 check_newlinechar(common, common->nltype, &newline, FALSE);
3900 set_jumps(newline, loop);
3901
3902 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
3903 {
3904 quit = JUMP(SLJIT_JUMP);
3905 JUMPHERE(foundcr);
3906 notfoundnl = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3907 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3908 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
3909 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
3910 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3911 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
3912 #endif
3913 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3914 JUMPHERE(notfoundnl);
3915 JUMPHERE(quit);
3916 }
3917 JUMPHERE(lastchar);
3918 JUMPHERE(firstchar);
3919
3920 if (firstline)
3921 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3922 }
3923
3924 static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks);
3925
3926 static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, pcre_uint8 *start_bits, BOOL firstline)
3927 {
3928 DEFINE_COMPILER;
3929 struct sljit_label *start;
3930 struct sljit_jump *quit;
3931 struct sljit_jump *found = NULL;
3932 jump_list *matches = NULL;
3933 #ifndef COMPILE_PCRE8
3934 struct sljit_jump *jump;
3935 #endif
3936
3937 if (firstline)
3938 {
3939 SLJIT_ASSERT(common->first_line_end != 0);
3940 OP1(SLJIT_MOV, RETURN_ADDR, 0, STR_END, 0);
3941 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end);
3942 }
3943
3944 start = LABEL();
3945 quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3946 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3947 #ifdef SUPPORT_UTF
3948 if (common->utf)
3949 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
3950 #endif
3951
3952 if (!check_class_ranges(common, start_bits, (start_bits[31] & 0x80) != 0, TRUE, &matches))
3953 {
3954 #ifndef COMPILE_PCRE8
3955 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 255);
3956 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 255);
3957 JUMPHERE(jump);
3958 #endif
3959 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
3960 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
3961 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)start_bits);
3962 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
3963 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
3964 found = JUMP(SLJIT_NOT_ZERO);
3965 }
3966
3967 #ifdef SUPPORT_UTF
3968 if (common->utf)
3969 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
3970 #endif
3971 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3972 #ifdef SUPPORT_UTF
3973 #if defined COMPILE_PCRE8
3974 if (common->utf)
3975 {
3976 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start);
3977 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3978 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3979 }
3980 #elif defined COMPILE_PCRE16
3981 if (common->utf)
3982 {
3983 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800, start);
3984 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
3985 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3986 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
3987 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3988 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3989 }
3990 #endif /* COMPILE_PCRE[8|16] */
3991 #endif /* SUPPORT_UTF */
3992 JUMPTO(SLJIT_JUMP, start);
3993 if (found != NULL)
3994 JUMPHERE(found);
3995 if (matches != NULL)
3996 set_jumps(matches, LABEL());
3997 JUMPHERE(quit);
3998
3999 if (firstline)
4000 OP1(SLJIT_MOV, STR_END, 0, RETURN_ADDR, 0);
4001 }
4002
4003 static SLJIT_INLINE struct sljit_jump *search_requested_char(compiler_common *common, pcre_uchar req_char, BOOL caseless, BOOL has_firstchar)
4004 {
4005 DEFINE_COMPILER;
4006 struct sljit_label *loop;
4007 struct sljit_jump *toolong;
4008 struct sljit_jump *alreadyfound;
4009 struct sljit_jump *found;
4010 struct sljit_jump *foundoc = NULL;
4011 struct sljit_jump *notfound;
4012 pcre_uint32 oc, bit;
4013
4014 SLJIT_ASSERT(common->req_char_ptr != 0);
4015 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr);
4016 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, REQ_BYTE_MAX);
4017 toolong = CMP(SLJIT_LESS, TMP1, 0, STR_END, 0);
4018 alreadyfound = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0);
4019
4020 if (has_firstchar)
4021 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4022 else
4023 OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);
4024
4025 loop = LABEL();
4026 notfound = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0);
4027
4028 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0);
4029 oc = req_char;
4030 if (caseless)
4031 {
4032 oc = TABLE_GET(req_char, common->fcc, req_char);
4033 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
4034 if (req_char > 127 && common->utf)
4035 oc = UCD_OTHERCASE(req_char);
4036 #endif
4037 }
4038 if (req_char == oc)
4039 found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
4040 else
4041 {
4042 bit = req_char ^ oc;
4043 if (is_powerof2(bit))
4044 {
4045 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);
4046 found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit);
4047 }
4048 else
4049 {
4050 found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
4051 foundoc = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, oc);
4052 }
4053 }
4054 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
4055 JUMPTO(SLJIT_JUMP, loop);
4056
4057 JUMPHERE(found);
4058 if (foundoc)
4059 JUMPHERE(foundoc);
4060 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr, TMP1, 0);
4061 JUMPHERE(alreadyfound);
4062 JUMPHERE(toolong);
4063 return notfound;
4064 }
4065
4066 static void do_revertframes(compiler_common *common)
4067 {
4068 DEFINE_COMPILER;
4069 struct sljit_jump *jump;
4070 struct sljit_label *mainloop;
4071
4072 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4073 OP1(SLJIT_MOV, TMP1, 0, STACK_TOP, 0);
4074 GET_LOCAL_BASE(TMP3, 0, 0);
4075
4076 /* Drop frames until we reach STACK_TOP. */
4077 mainloop = LABEL();
4078 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), 0);
4079 OP2(SLJIT_SUB | SLJIT_SET_S, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0);
4080 jump = JUMP(SLJIT_SIG_LESS_EQUAL);
4081
4082 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
4083 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw));
4084 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), SLJIT_MEM1(TMP1), 2 * sizeof(sljit_sw));
4085 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
4086 JUMPTO(SLJIT_JUMP, mainloop);
4087
4088 JUMPHERE(jump);
4089 jump = JUMP(SLJIT_SIG_LESS);
4090 /* End of dropping frames. */
4091 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4092
4093 JUMPHERE(jump);
4094 OP1(SLJIT_NEG, TMP2, 0, TMP2, 0);
4095 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
4096 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw));
4097 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_sw));
4098 JUMPTO(SLJIT_JUMP, mainloop);
4099 }
4100
4101 static void check_wordboundary(compiler_common *common)
4102 {
4103 DEFINE_COMPILER;
4104 struct sljit_jump *skipread;
4105 jump_list *skipread_list = NULL;
4106 #if !(defined COMPILE_PCRE8) || defined SUPPORT_UTF
4107 struct sljit_jump *jump;
4108 #endif
4109
4110 SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16);
4111
4112 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
4113 /* Get type of the previous char, and put it to LOCALS1. */
4114 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4115 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4116 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, SLJIT_IMM, 0);
4117 skipread = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
4118 skip_char_back(common);
4119 check_start_used_ptr(common);
4120 read_char(common);
4121
4122 /* Testing char type. */
4123 #ifdef SUPPORT_UCP
4124 if (common->use_ucp)
4125 {
4126 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
4127 jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
4128 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4129 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
4130 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
4131 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
4132 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
4133 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
4134 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
4135 JUMPHERE(jump);
4136 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP2, 0);
4137 }
4138 else
4139 #endif
4140 {
4141 #ifndef COMPILE_PCRE8
4142 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4143 #elif defined SUPPORT_UTF
4144 /* Here LOCALS1 has already been zeroed. */
4145 jump = NULL;
4146 if (common->utf)
4147 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4148 #endif /* COMPILE_PCRE8 */
4149 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);
4150 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */);
4151 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4152 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP1, 0);
4153 #ifndef COMPILE_PCRE8
4154 JUMPHERE(jump);
4155 #elif defined SUPPORT_UTF
4156 if (jump != NULL)
4157 JUMPHERE(jump);
4158 #endif /* COMPILE_PCRE8 */
4159 }
4160 JUMPHERE(skipread);
4161
4162 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
4163 check_str_end(common, &skipread_list);
4164 peek_char(common, READ_CHAR_MAX);
4165
4166 /* Testing char type. This is a code duplication. */
4167 #ifdef SUPPORT_UCP
4168 if (common->use_ucp)
4169 {
4170 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
4171 jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
4172 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4173 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
4174 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
4175 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
4176 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
4177 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
4178 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
4179 JUMPHERE(jump);
4180 }
4181 else
4182 #endif
4183 {
4184 #ifndef COMPILE_PCRE8
4185 /* TMP2 may be destroyed by peek_char. */
4186 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
4187 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4188 #elif defined SUPPORT_UTF
4189 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
4190 jump = NULL;
4191 if (common->utf)
4192 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4193 #endif
4194 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes);
4195 OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */);
4196 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
4197 #ifndef COMPILE_PCRE8
4198 JUMPHERE(jump);
4199 #elif defined SUPPORT_UTF
4200 if (jump != NULL)
4201 JUMPHERE(jump);
4202 #endif /* COMPILE_PCRE8 */
4203 }
4204 set_jumps(skipread_list, LABEL());
4205
4206 OP2(SLJIT_XOR | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
4207 sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
4208 }
4209
4210 static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
4211 {
4212 DEFINE_COMPILER;
4213 int ranges[MAX_RANGE_SIZE];
4214 pcre_uint8 bit, cbit, all;
4215 int i, byte, length = 0;
4216
4217 bit = bits[0] & 0x1;
4218 /* All bits will be zero or one (since bit is zero or one). */
4219 all = -bit;
4220
4221 for (i = 0; i < 256; )
4222 {
4223 byte = i >> 3;
4224 if ((i & 0x7) == 0 && bits[byte] == all)
4225 i += 8;
4226 else
4227 {
4228 cbit = (bits[byte] >> (i & 0x7)) & 0x1;
4229 if (cbit != bit)
4230 {
4231 if (length >= MAX_RANGE_SIZE)
4232 return FALSE;
4233 ranges[length] = i;
4234 length++;
4235 bit = cbit;
4236 all = -cbit;
4237 }
4238 i++;
4239 }
4240 }
4241
4242 if (((bit == 0) && nclass) || ((bit == 1) && !nclass))
4243 {
4244 if (length >= MAX_RANGE_SIZE)
4245 return FALSE;
4246 ranges[length] = 256;
4247 length++;
4248 }
4249
4250 if (length < 0 || length > 4)
4251 return FALSE;
4252
4253 bit = bits[0] & 0x1;
4254 if (invert) bit ^= 0x1;
4255
4256 /* No character is accepted. */
4257 if (length == 0 && bit == 0)
4258 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4259
4260 switch(length)
4261 {
4262 case 0:
4263 /* When bit != 0, all characters are accepted. */
4264 return TRUE;
4265
4266 case 1:
4267 add_jump(compiler, backtracks, CMP(bit == 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
4268 return TRUE;
4269
4270 case 2:
4271 if (ranges[0] + 1 != ranges[1])
4272 {
4273 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
4274 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
4275 }
4276 else
4277 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
4278 return TRUE;
4279
4280 case 3:
4281 if (bit != 0)
4282 {
4283 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
4284 if (ranges[0] + 1 != ranges[1])
4285 {
4286 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
4287 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
4288 }
4289 else
4290 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
4291 return TRUE;
4292 }
4293
4294 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[0]));
4295 if (ranges[1] + 1 != ranges[2])
4296 {
4297 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1]);
4298 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
4299 }
4300 else
4301 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1]));
4302 return TRUE;
4303
4304 case 4:
4305 if ((ranges[1] - ranges[0]) == (ranges[3] - ranges[2])
4306 && (ranges[0] | (ranges[2] - ranges[0])) == ranges[2]
4307 && is_powerof2(ranges[2] - ranges[0]))
4308 {
4309 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[0]);
4310 if (ranges[2] + 1 != ranges[3])
4311 {
4312 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);
4313 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
4314 }
4315 else
4316 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
4317 return TRUE;
4318 }
4319
4320 if (bit != 0)
4321 {
4322 i = 0;
4323 if (ranges[0] + 1 != ranges[1])
4324 {
4325 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
4326 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
4327 i = ranges[0];
4328 }
4329 else
4330 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
4331
4332 if (ranges[2] + 1 != ranges[3])
4333 {
4334 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - i);
4335 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
4336 }
4337 else
4338 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2] - i));
4339 return TRUE;
4340 }
4341
4342 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
4343 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[0]));
4344 if (ranges[1] + 1 != ranges[2])
4345 {
4346 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]);
4347 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
4348 }
4349 else
4350 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
4351 return TRUE;
4352
4353 default:
4354 SLJIT_ASSERT_STOP();
4355 return FALSE;
4356 }
4357 }
4358
4359 static void check_anynewline(compiler_common *common)
4360 {
4361 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
4362 DEFINE_COMPILER;
4363
4364 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4365
4366 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
4367 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
4368 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
4369 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
4370 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4371 #ifdef COMPILE_PCRE8
4372 if (common->utf)
4373 {
4374 #endif
4375 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4376 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
4377 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
4378 #ifdef COMPILE_PCRE8
4379 }
4380 #endif
4381 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
4382 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4383 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4384 }
4385
4386 static void check_hspace(compiler_common *common)
4387 {
4388 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
4389 DEFINE_COMPILER;
4390
4391 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4392
4393 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x09);
4394 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
4395 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
4396 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4397 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xa0);
4398 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4399 #ifdef COMPILE_PCRE8
4400 if (common->utf)
4401 {
4402 #endif
4403 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4404 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x1680);
4405 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4406 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e);
4407 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4408 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000);
4409 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000);
4410 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
4411 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000);
4412 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4413 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000);
4414 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4415 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000);
4416 #ifdef COMPILE_PCRE8
4417 }
4418 #endif
4419 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
4420 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4421
4422 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4423 }
4424
4425 static void check_vspace(compiler_common *common)
4426 {
4427 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
4428 DEFINE_COMPILER;
4429
4430 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4431
4432 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
4433 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
4434 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
4435 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
4436 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4437 #ifdef COMPILE_PCRE8
4438 if (common->utf)
4439 {
4440 #endif
4441 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4442 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
4443 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
4444 #ifdef COMPILE_PCRE8
4445 }
4446 #endif
4447 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
4448 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4449
4450 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4451 }
4452
4453 #define CHAR1 STR_END
4454 #define CHAR2 STACK_TOP
4455
4456 static void do_casefulcmp(compiler_common *common)
4457 {
4458 DEFINE_COMPILER;
4459 struct sljit_jump *jump;
4460 struct sljit_label *label;
4461
4462 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4463 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4464 OP1(SLJIT_MOV, TMP3, 0, CHAR1, 0);
4465 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, CHAR2, 0);
4466 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
4467 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4468
4469 label = LABEL();
4470 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
4471 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4472 jump = CMP(SLJIT_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
4473 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
4474 JUMPTO(SLJIT_NOT_ZERO, label);
4475
4476 JUMPHERE(jump);
4477 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4478 OP1(SLJIT_MOV, CHAR1, 0, TMP3, 0);
4479 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
4480 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4481 }
4482
4483 #define LCC_TABLE STACK_LIMIT
4484
4485 static void do_caselesscmp(compiler_common *common)
4486 {
4487 DEFINE_COMPILER;
4488 struct sljit_jump *jump;
4489 struct sljit_label *label;
4490
4491 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4492 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4493
4494 OP1(SLJIT_MOV, TMP3, 0, LCC_TABLE, 0);
4495 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, CHAR1, 0);
4496 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, CHAR2, 0);
4497 OP1(SLJIT_MOV, LCC_TABLE, 0, SLJIT_IMM, common->lcc);
4498 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
4499 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4500
4501 label = LABEL();
4502 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
4503 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4504 #ifndef COMPILE_PCRE8
4505 jump = CMP(SLJIT_GREATER, CHAR1, 0, SLJIT_IMM, 255);
4506 #endif
4507 OP1(SLJIT_MOV_UB, CHAR1, 0, SLJIT_MEM2(LCC_TABLE, CHAR1), 0);
4508 #ifndef COMPILE_PCRE8
4509 JUMPHERE(jump);
4510 jump = CMP(SLJIT_GREATER, CHAR2, 0, SLJIT_IMM, 255);
4511 #endif
4512 OP1(SLJIT_MOV_UB, CHAR2, 0, SLJIT_MEM2(LCC_TABLE, CHAR2), 0);
4513 #ifndef COMPILE_PCRE8
4514 JUMPHERE(jump);
4515 #endif
4516 jump = CMP(SLJIT_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
4517 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
4518 JUMPTO(SLJIT_NOT_ZERO, label);
4519
4520 JUMPHERE(jump);
4521 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4522 OP1(SLJIT_MOV, LCC_TABLE, 0, TMP3, 0);
4523 OP1(SLJIT_MOV, CHAR1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
4524 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
4525 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4526 }
4527
4528 #undef LCC_TABLE
4529 #undef CHAR1
4530 #undef CHAR2
4531
4532 #if defined SUPPORT_UTF && defined SUPPORT_UCP
4533
4534 static const pcre_uchar * SLJIT_CALL do_utf_caselesscmp(pcre_uchar *src1, jit_arguments *args, pcre_uchar *end1)
4535 {
4536 /* This function would be ineffective to do in JIT level. */
4537 pcre_uint32 c1, c2;
4538 const pcre_uchar *src2 = args->uchar_ptr;
4539 const pcre_uchar *end2 = args->end;
4540 const ucd_record *ur;
4541 const pcre_uint32 *pp;
4542
4543 while (src1 < end1)
4544 {
4545 if (src2 >= end2)
4546 return (pcre_uchar*)1;
4547 GETCHARINC(c1, src1);
4548 GETCHARINC(c2, src2);
4549 ur = GET_UCD(c2);
4550 if (c1 != c2 && c1 != c2 + ur->other_case)
4551 {
4552 pp = PRIV(ucd_caseless_sets) + ur->caseset;
4553 for (;;)
4554 {
4555 if (c1 < *pp) return NULL;
4556 if (c1 == *pp++) break;
4557 }
4558 }
4559 }
4560 return src2;
4561 }
4562
4563 #endif /* SUPPORT_UTF && SUPPORT_UCP */
4564
4565 static pcre_uchar *byte_sequence_compare(compiler_common *common, BOOL caseless, pcre_uchar *cc,
4566 compare_context *context, jump_list **backtracks)
4567 {
4568 DEFINE_COMPILER;
4569 unsigned int othercasebit = 0;
4570 pcre_uchar *othercasechar = NULL;
4571 #ifdef SUPPORT_UTF
4572 int utflength;
4573 #endif
4574
4575 if (caseless && char_has_othercase(common, cc))
4576 {
4577 othercasebit = char_get_othercase_bit(common, cc);
4578 SLJIT_ASSERT(othercasebit);
4579 /* Extracting bit difference info. */
4580 #if defined COMPILE_PCRE8
4581 othercasechar = cc + (othercasebit >> 8);
4582 othercasebit &= 0xff;
4583 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4584 /* Note that this code only handles characters in the BMP. If there
4585 ever are characters outside the BMP whose othercase differs in only one
4586 bit from itself (there currently are none), this code will need to be
4587 revised for COMPILE_PCRE32. */
4588 othercasechar = cc + (othercasebit >> 9);
4589 if ((othercasebit & 0x100) != 0)
4590 othercasebit = (othercasebit & 0xff) << 8;
4591 else
4592 othercasebit &= 0xff;
4593 #endif /* COMPILE_PCRE[8|16|32] */
4594 }
4595
4596 if (context->sourcereg == -1)
4597 {
4598 #if defined COMPILE_PCRE8
4599 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
4600 if (context->length >= 4)
4601 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4602 else if (context->length >= 2)
4603 OP1(SLJIT_MOV_UH, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4604 else
4605 #endif
4606 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4607 #elif defined COMPILE_PCRE16
4608 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
4609 if (context->length >= 4)
4610 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4611 else
4612 #endif
4613 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4614 #elif defined COMPILE_PCRE32
4615 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4616 #endif /* COMPILE_PCRE[8|16|32] */
4617 context->sourcereg = TMP2;
4618 }
4619
4620 #ifdef SUPPORT_UTF
4621 utflength = 1;
4622 if (common->utf && HAS_EXTRALEN(*cc))
4623 utflength += GET_EXTRALEN(*cc);
4624
4625 do
4626 {
4627 #endif
4628
4629 context->length -= IN_UCHARS(1);
4630 #if (defined SLJIT_UNALIGNED && SLJIT_UNALIGNED) && (defined COMPILE_PCRE8 || defined COMPILE_PCRE16)
4631
4632 /* Unaligned read is supported. */
4633 if (othercasebit != 0 && othercasechar == cc)
4634 {
4635 context->c.asuchars[context->ucharptr] = *cc | othercasebit;
4636 context->oc.asuchars[context->ucharptr] = othercasebit;
4637 }
4638 else
4639 {
4640 context->c.asuchars[context->ucharptr] = *cc;
4641 context->oc.asuchars[context->ucharptr] = 0;
4642 }
4643 context->ucharptr++;
4644
4645 #if defined COMPILE_PCRE8
4646 if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1))
4647 #else
4648 if (context->ucharptr >= 2 || context->length == 0)
4649 #endif
4650 {
4651 if (context->length >= 4)
4652 OP1(SLJIT_MOV_SI, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
4653 else if (context->length >= 2)
4654 OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
4655 #if defined COMPILE_PCRE8
4656 else if (context->length >= 1)
4657 OP1(SLJIT_MOV_UB, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
4658 #endif /* COMPILE_PCRE8 */
4659 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
4660
4661 switch(context->ucharptr)
4662 {
4663 case 4 / sizeof(pcre_uchar):
4664 if (context->oc.asint != 0)
4665 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint);
4666 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint));
4667 break;
4668
4669 case 2 / sizeof(pcre_uchar):
4670 if (context->oc.asushort != 0)
4671 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort);
4672 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort));
4673 break;
4674
4675 #ifdef COMPILE_PCRE8
4676 case 1:
4677 if (context->oc.asbyte != 0)
4678 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte);
4679 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte));
4680 break;
4681 #endif
4682
4683 default:
4684 SLJIT_ASSERT_STOP();
4685 break;
4686 }
4687 context->ucharptr = 0;
4688 }
4689
4690 #else
4691
4692 /* Unaligned read is unsupported or in 32 bit mode. */
4693 if (context->length >= 1)
4694 OP1(MOV_UCHAR, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
4695
4696 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
4697
4698 if (othercasebit != 0 && othercasechar == cc)
4699 {
4700 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit);
4701 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit));
4702 }
4703 else
4704 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc));
4705
4706 #endif
4707
4708 cc++;
4709 #ifdef SUPPORT_UTF
4710 utflength--;
4711 }
4712 while (utflength > 0);
4713 #endif
4714
4715 return cc;
4716 }
4717
4718 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
4719
4720 #define SET_TYPE_OFFSET(value) \
4721 if ((value) != typeoffset) \
4722 { \
4723 if ((value) < typeoffset) \
4724 OP2(SLJIT_ADD, typereg, 0, typereg, 0, SLJIT_IMM, typeoffset - (value)); \
4725 else \
4726 OP2(SLJIT_SUB, typereg, 0, typereg, 0, SLJIT_IMM, (value) - typeoffset); \
4727 } \
4728 typeoffset = (value);
4729
4730 #define SET_CHAR_OFFSET(value) \
4731 if ((value) != charoffset) \
4732 { \
4733 if ((value) < charoffset) \
4734 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(charoffset - (value))); \
4735 else \
4736 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)((value) - charoffset)); \
4737 } \
4738 charoffset = (value);
4739
4740 static void compile_xclass_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
4741 {
4742 DEFINE_COMPILER;
4743 jump_list *found = NULL;
4744 jump_list **list = (cc[0] & XCL_NOT) == 0 ? &found : backtracks;
4745 sljit_uw c, charoffset, max = 256, min = READ_CHAR_MAX;
4746 struct sljit_jump *jump = NULL;
4747 pcre_uchar *ccbegin;
4748 int compares, invertcmp, numberofcmps;
4749 #if defined SUPPORT_UTF && (defined COMPILE_PCRE8 || defined COMPILE_PCRE16)
4750 BOOL utf = common->utf;
4751 #endif
4752
4753 #ifdef SUPPORT_UCP
4754 BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE;
4755 BOOL charsaved = FALSE;
4756 int typereg = TMP1, scriptreg = TMP1;
4757 const pcre_uint32 *other_cases;
4758 sljit_uw typeoffset;
4759 #endif
4760
4761 /* Scanning the necessary info. */
4762 cc++;
4763 ccbegin = cc;
4764 compares = 0;
4765 if (cc[-1] & XCL_MAP)
4766 {
4767 min = 0;
4768 cc += 32 / sizeof(pcre_uchar);
4769 }
4770
4771 while (*cc != XCL_END)
4772 {
4773 compares++;
4774 if (*cc == XCL_SINGLE)
4775 {
4776 cc ++;
4777 GETCHARINCTEST(c, cc);
4778 if (c > max) max = c;
4779 if (c < min) min = c;
4780 #ifdef SUPPORT_UCP
4781 needschar = TRUE;
4782 #endif
4783 }
4784 else if (*cc == XCL_RANGE)
4785 {
4786 cc ++;
4787 GETCHARINCTEST(c, cc);
4788 if (c < min) min = c;
4789 GETCHARINCTEST(c, cc);
4790 if (c > max) max = c;
4791 #ifdef SUPPORT_UCP
4792 needschar = TRUE;
4793 #endif
4794 }
4795 #ifdef SUPPORT_UCP
4796 else
4797 {
4798 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
4799 cc++;
4800 if (*cc == PT_CLIST)
4801 {
4802 other_cases = PRIV(ucd_caseless_sets) + cc[1];
4803 while (*other_cases != NOTACHAR)
4804 {
4805 if (*other_cases > max) max = *other_cases;
4806 if (*other_cases < min) min = *other_cases;
4807 other_cases++;
4808 }
4809 }
4810 else
4811 {
4812 max = READ_CHAR_MAX;
4813 min = 0;
4814 }
4815
4816 switch(*cc)
4817 {
4818 case PT_ANY:
4819 break;
4820
4821 case PT_LAMP:
4822 case PT_GC:
4823 case PT_PC:
4824 case PT_ALNUM:
4825 needstype = TRUE;
4826 break;
4827
4828 case PT_SC:
4829 needsscript = TRUE;
4830 break;
4831
4832 case PT_SPACE:
4833 case PT_PXSPACE:
4834 case PT_WORD:
4835 case PT_PXGRAPH:
4836 case PT_PXPRINT:
4837 case PT_PXPUNCT:
4838 needstype = TRUE;
4839 needschar = TRUE;
4840 break;
4841
4842 case PT_CLIST:
4843 case PT_UCNC:
4844 needschar = TRUE;
4845 break;
4846
4847 default:
4848 SLJIT_ASSERT_STOP();
4849 break;
4850 }
4851 cc += 2;
4852 }
4853 #endif
4854 }
4855
4856 /* We are not necessary in utf mode even in 8 bit mode. */
4857 cc = ccbegin;
4858 detect_partial_match(common, backtracks);
4859 read_char_range(common, min, max, (cc[-1] & XCL_NOT) != 0);
4860
4861 if ((cc[-1] & XCL_HASPROP) == 0)
4862 {
4863 if ((cc[-1] & XCL_MAP) != 0)
4864 {
4865 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4866 if (!check_class_ranges(common, (const pcre_uint8 *)cc, (((const pcre_uint8 *)cc)[31] & 0x80) != 0, TRUE, &found))
4867 {
4868 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
4869 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
4870 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
4871 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4872 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4873 add_jump(compiler, &found, JUMP(SLJIT_NOT_ZERO));
4874 }
4875
4876 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4877 JUMPHERE(jump);
4878
4879 cc += 32 / sizeof(pcre_uchar);
4880 }
4881 else
4882 {
4883 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, min);
4884 add_jump(compiler, (cc[-1] & XCL_NOT) == 0 ? backtracks : &found, CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, max - min));
4885 }
4886 }
4887 else if ((cc[-1] & XCL_MAP) != 0)
4888 {
4889 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
4890 #ifdef SUPPORT_UCP
4891 charsaved = TRUE;
4892 #endif
4893 if (!check_class_ranges(common, (const pcre_uint8 *)cc, FALSE, TRUE, list))
4894 {
4895 #ifdef COMPILE_PCRE8
4896 SLJIT_ASSERT(common->utf);
4897 #endif
4898 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4899
4900 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
4901 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
4902 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
4903 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4904 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4905 add_jump(compiler, list, JUMP(SLJIT_NOT_ZERO));
4906
4907 JUMPHERE(jump);
4908 }
4909
4910 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
4911 cc += 32 / sizeof(pcre_uchar);
4912 }
4913
4914 #ifdef SUPPORT_UCP
4915 /* Simple register allocation. TMP1 is preferred if possible. */
4916 if (needstype || needsscript)
4917 {
4918 if (needschar && !charsaved)
4919 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
4920 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4921 if (needschar)
4922 {
4923 if (needstype)
4924 {
4925 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
4926 typereg = RETURN_ADDR;
4927 }
4928
4929 if (needsscript)
4930 scriptreg = TMP3;
4931 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
4932 }
4933 else if (needstype && needsscript)
4934 scriptreg = TMP3;
4935 /* In all other cases only one of them was specified, and that can goes to TMP1. */
4936
4937 if (needsscript)
4938 {
4939 if (scriptreg == TMP1)
4940 {
4941 OP1(SLJIT_MOV, scriptreg, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
4942 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM2(scriptreg, TMP2), 3);
4943 }
4944 else
4945 {
4946 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
4947 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
4948 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM1(TMP2), 0);
4949 }
4950 }
4951 }
4952 #endif
4953
4954 /* Generating code. */
4955 charoffset = 0;
4956 numberofcmps = 0;
4957 #ifdef SUPPORT_UCP
4958 typeoffset = 0;
4959 #endif
4960
4961 while (*cc != XCL_END)
4962 {
4963 compares--;
4964 invertcmp = (compares == 0 && list != backtracks);
4965 jump = NULL;
4966
4967 if (*cc == XCL_SINGLE)
4968 {
4969 cc ++;
4970 GETCHARINCTEST(c, cc);
4971
4972 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
4973 {
4974 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
4975 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_EQUAL);
4976 numberofcmps++;
4977 }
4978 else if (numberofcmps > 0)
4979 {
4980 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
4981 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4982 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
4983 numberofcmps = 0;
4984 }
4985 else
4986 {
4987 jump = CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
4988 numberofcmps = 0;
4989 }
4990 }
4991 else if (*cc == XCL_RANGE)
4992 {
4993 cc ++;
4994 GETCHARINCTEST(c, cc);
4995 SET_CHAR_OFFSET(c);
4996 GETCHARINCTEST(c, cc);
4997
4998 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
4999 {
5000 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5001 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_LESS_EQUAL);
5002 numberofcmps++;
5003 }
5004 else if (numberofcmps > 0)
5005 {
5006 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5007 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
5008 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5009 numberofcmps = 0;
5010 }
5011 else
5012 {
5013 jump = CMP(SLJIT_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5014 numberofcmps = 0;
5015 }
5016 }
5017 #ifdef SUPPORT_UCP
5018 else
5019 {
5020 if (*cc == XCL_NOTPROP)
5021 invertcmp ^= 0x1;
5022 cc++;
5023 switch(*cc)
5024 {
5025 case PT_ANY:
5026 if (list != backtracks)
5027 {
5028 if ((cc[-1] == XCL_NOTPROP && compares > 0) || (cc[-1] == XCL_PROP && compares == 0))
5029 continue;
5030 }
5031 else if (cc[-1] == XCL_NOTPROP)
5032 continue;
5033 jump = JUMP(SLJIT_JUMP);
5034 break;
5035
5036 case PT_LAMP:
5037 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - typeoffset);
5038 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5039 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ll - typeoffset);
5040 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5041 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lt - typeoffset);
5042 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5043 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5044 break;
5045
5046 case PT_GC:
5047 c = PRIV(ucp_typerange)[(int)cc[1] * 2];
5048 SET_TYPE_OFFSET(c);
5049 jump = CMP(SLJIT_LESS_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, PRIV(ucp_typerange)[(int)cc[1] * 2 + 1] - c);
5050 break;
5051
5052 case PT_PC:
5053 jump = CMP(SLJIT_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, (int)cc[1] - typeoffset);
5054 break;
5055
5056 case PT_SC:
5057 jump = CMP(SLJIT_EQUAL ^ invertcmp, scriptreg, 0, SLJIT_IMM, (int)cc[1]);
5058 break;
5059
5060 case PT_SPACE:
5061 case PT_PXSPACE:
5062 SET_CHAR_OFFSET(9);
5063 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd - 0x9);
5064 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
5065
5066 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x9);
5067 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5068
5069 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x9);
5070 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5071
5072 SET_TYPE_OFFSET(ucp_Zl);
5073 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl);
5074 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
5075 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5076 break;
5077
5078 case PT_WORD:
5079 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_UNDERSCORE - charoffset));
5080 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5081 /* Fall through. */
5082
5083 case PT_ALNUM:
5084 SET_TYPE_OFFSET(ucp_Ll);
5085 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
5086 OP_FLAGS((*cc == PT_ALNUM) ? SLJIT_MOV : SLJIT_OR, TMP2, 0, (*cc == PT_ALNUM) ? SLJIT_UNUSED : TMP2, 0, SLJIT_LESS_EQUAL);
5087 SET_TYPE_OFFSET(ucp_Nd);
5088 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_No - ucp_Nd);
5089 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
5090 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5091 break;
5092
5093 case PT_CLIST:
5094 other_cases = PRIV(ucd_caseless_sets) + cc[1];
5095
5096 /* At least three characters are required.
5097 Otherwise this case would be handled by the normal code path. */
5098 SLJIT_ASSERT(other_cases[0] != NOTACHAR && other_cases[1] != NOTACHAR && other_cases[2] != NOTACHAR);
5099 SLJIT_ASSERT(other_cases[0] < other_cases[1] && other_cases[1] < other_cases[2]);
5100
5101 /* Optimizing character pairs, if their difference is power of 2. */
5102 if (is_powerof2(other_cases[1] ^ other_cases[0]))
5103 {
5104 if (charoffset == 0)
5105 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
5106 else
5107 {
5108 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
5109 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
5110 }
5111 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[1]);
5112 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5113 other_cases += 2;
5114 }
5115 else if (is_powerof2(other_cases[2] ^ other_cases[1]))
5116 {
5117 if (charoffset == 0)
5118 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[2] ^ other_cases[1]);
5119 else
5120 {
5121 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
5122 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
5123 }
5124 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[2]);
5125 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5126
5127 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(other_cases[0] - charoffset));
5128 OP_FLAGS(SLJIT_OR | ((other_cases[3] == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5129
5130 other_cases += 3;
5131 }
5132 else
5133 {
5134 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
5135 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5136 }
5137
5138 while (*other_cases != NOTACHAR)
5139 {
5140 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
5141 OP_FLAGS(SLJIT_OR | ((*other_cases == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5142 }
5143 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5144 break;
5145
5146 case PT_UCNC:
5147 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_DOLLAR_SIGN - charoffset));
5148 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5149 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_COMMERCIAL_AT - charoffset));
5150 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5151 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_GRAVE_ACCENT - charoffset));
5152 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5153
5154 SET_CHAR_OFFSET(0xa0);
5155 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(0xd7ff - charoffset));
5156 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
5157 SET_CHAR_OFFSET(0);
5158 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xe000 - 0);
5159 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_GREATER_EQUAL);
5160 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5161 break;
5162
5163 case PT_PXGRAPH:
5164 /* C and Z groups are the farthest two groups. */
5165 SET_TYPE_OFFSET(ucp_Ll);
5166 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
5167 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_GREATER);
5168
5169 jump = CMP(SLJIT_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
5170
5171 /* In case of ucp_Cf, we overwrite the result. */
5172 SET_CHAR_OFFSET(0x2066);
5173 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
5174 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
5175
5176 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
5177 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5178
5179 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x2066);
5180 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5181
5182 JUMPHERE(jump);
5183 jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
5184 break;
5185
5186 case PT_PXPRINT:
5187 /* C and Z groups are the farthest two groups. */
5188 SET_TYPE_OFFSET(ucp_Ll);
5189 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
5190 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_GREATER);
5191
5192 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Ll);
5193 OP_FLAGS(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_NOT_EQUAL);
5194
5195 jump = CMP(SLJIT_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
5196
5197 /* In case of ucp_Cf, we overwrite the result. */
5198 SET_CHAR_OFFSET(0x2066);
5199 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
5200 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
5201
5202 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
5203 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5204
5205 JUMPHERE(jump);
5206 jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
5207 break;
5208
5209 case PT_PXPUNCT:
5210 SET_TYPE_OFFSET(ucp_Sc);
5211 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Sc);
5212 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
5213
5214 SET_CHAR_OFFSET(0);
5215 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xff);
5216 OP_FLAGS(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
5217
5218 SET_TYPE_OFFSET(ucp_Pc);
5219 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ps - ucp_Pc);
5220 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
5221 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5222 break;
5223 }
5224 cc += 2;
5225 }
5226 #endif
5227
5228 if (jump != NULL)
5229 add_jump(compiler, compares > 0 ? list : backtracks, jump);
5230 }
5231
5232 if (found != NULL)
5233 set_jumps(found, LABEL());
5234 }
5235
5236 #undef SET_TYPE_OFFSET
5237 #undef SET_CHAR_OFFSET
5238
5239 #endif
5240
5241 static pcre_uchar *compile_char1_matchingpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **backtracks)
5242 {
5243 DEFINE_COMPILER;
5244 int length;
5245 unsigned int c, oc, bit;
5246 compare_context context;
5247 struct sljit_jump *jump[4];
5248 jump_list *end_list;
5249 #ifdef SUPPORT_UTF
5250 struct sljit_label *label;
5251 #ifdef SUPPORT_UCP
5252 pcre_uchar propdata[5];
5253 #endif
5254 #endif /* SUPPORT_UTF */
5255
5256 switch(type)
5257 {
5258 case OP_SOD:
5259 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5260 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
5261 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
5262 return cc;
5263
5264 case OP_SOM:
5265 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5266 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
5267 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
5268 return cc;
5269
5270 case OP_NOT_WORD_BOUNDARY:
5271 case OP_WORD_BOUNDARY:
5272 add_jump(compiler, &common->wordboundary, JUMP(SLJIT_FAST_CALL));
5273 add_jump(compiler, backtracks, JUMP(type == OP_NOT_WORD_BOUNDARY ? SLJIT_NOT_ZERO : SLJIT_ZERO));
5274 return cc;
5275
5276 case OP_NOT_DIGIT:
5277 case OP_DIGIT:
5278 /* Digits are usually 0-9, so it is worth to optimize them. */
5279 detect_partial_match(common, backtracks);
5280 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5281 if (common->utf && is_char7_bitset((const pcre_uint8*)common->ctypes - cbit_length + cbit_digit, FALSE))
5282 read_char7_type(common, type == OP_NOT_DIGIT);
5283 else
5284 #endif
5285 read_char8_type(common, type == OP_NOT_DIGIT);
5286 /* Flip the starting bit in the negative case. */
5287 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit);
5288 add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_ZERO : SLJIT_NOT_ZERO));
5289 return cc;
5290
5291 case OP_NOT_WHITESPACE:
5292 case OP_WHITESPACE:
5293 detect_partial_match(common, backtracks);
5294 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5295 if (common->utf && is_char7_bitset((const pcre_uint8*)common->ctypes - cbit_length + cbit_space, FALSE))
5296 read_char7_type(common, type == OP_NOT_WHITESPACE);
5297 else
5298 #endif
5299 read_char8_type(common, type == OP_NOT_WHITESPACE);
5300 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space);
5301 add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_ZERO : SLJIT_NOT_ZERO));
5302 return cc;
5303
5304 case OP_NOT_WORDCHAR:
5305 case OP_WORDCHAR:
5306 detect_partial_match(common, backtracks);
5307 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5308 if (common->utf && is_char7_bitset((const pcre_uint8*)common->ctypes - cbit_length + cbit_word, FALSE))
5309 read_char7_type(common, type == OP_NOT_WORDCHAR);
5310 else
5311 #endif
5312 read_char8_type(common, type == OP_NOT_WORDCHAR);
5313 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word);
5314 add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_ZERO : SLJIT_NOT_ZERO));
5315 return cc;
5316
5317 case OP_ANY:
5318 detect_partial_match(common, backtracks);
5319 read_char_range(common, common->nlmin, common->nlmax, TRUE);
5320 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
5321 {
5322 jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
5323 end_list = NULL;
5324 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
5325 add_jump(compiler, &end_list, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
5326 else
5327 check_str_end(common, &end_list);
5328
5329 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5330 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff));
5331 set_jumps(end_list, LABEL());
5332 JUMPHERE(jump[0]);
5333 }
5334 else
5335 check_newlinechar(common, common->nltype, backtracks, TRUE);
5336 return cc;
5337
5338 case OP_ALLANY:
5339 detect_partial_match(common, backtracks);
5340 #ifdef SUPPORT_UTF
5341 if (common->utf)
5342 {
5343 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5344 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5345 #if defined COMPILE_PCRE8 || defined COMPILE_PCRE16
5346 #if defined COMPILE_PCRE8
5347 jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
5348 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
5349 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5350 #elif defined COMPILE_PCRE16
5351 jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
5352 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
5353 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
5354 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5355 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
5356 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5357 #endif
5358 JUMPHERE(jump[0]);
5359 #endif /* COMPILE_PCRE[8|16] */
5360 return cc;
5361 }
5362 #endif
5363 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5364 return cc;
5365
5366 case OP_ANYBYTE:
5367 detect_partial_match(common, backtracks);
5368 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5369 return cc;
5370
5371 #ifdef SUPPORT_UTF
5372 #ifdef SUPPORT_UCP
5373 case OP_NOTPROP:
5374 case OP_PROP:
5375 propdata[0] = XCL_HASPROP;
5376 propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP;
5377 propdata[2] = cc[0];
5378 propdata[3] = cc[1];
5379 propdata[4] = XCL_END;
5380 compile_xclass_matchingpath(common, propdata, backtracks);
5381 return cc + 2;
5382 #endif
5383 #endif
5384
5385 case OP_ANYNL:
5386 detect_partial_match(common, backtracks);
5387 read_char_range(common, common->bsr_nlmin, common->bsr_nlmax, FALSE);
5388 jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
5389 /* We don't need to handle soft partial matching case. */
5390 end_list = NULL;
5391 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
5392 add_jump(compiler, &end_list, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
5393 else
5394 check_str_end(common, &end_list);
5395 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5396 jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
5397 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5398 jump[2] = JUMP(SLJIT_JUMP);
5399 JUMPHERE(jump[0]);
5400 check_newlinechar(common, common->bsr_nltype, backtracks, FALSE);
5401 set_jumps(end_list, LABEL());
5402 JUMPHERE(jump[1]);
5403 JUMPHERE(jump[2]);
5404 return cc;
5405
5406 case OP_NOT_HSPACE:
5407 case OP_HSPACE:
5408 detect_partial_match(common, backtracks);
5409 read_char_range(common, 0x9, 0x3000, type == OP_NOT_HSPACE);
5410 add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));
5411 add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO));
5412 return cc;
5413
5414 case OP_NOT_VSPACE:
5415 case OP_VSPACE:
5416 detect_partial_match(common, backtracks);
5417 read_char_range(common, 0xa, 0x2029, type == OP_NOT_VSPACE);
5418 add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));
5419 add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO));
5420 return cc;
5421
5422 #ifdef SUPPORT_UCP
5423 case OP_EXTUNI:
5424 detect_partial_match(common, backtracks);
5425 read_char(common);
5426 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
5427 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
5428 /* Optimize register allocation: use a real register. */
5429 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STACK_TOP, 0);
5430 OP1(SLJIT_MOV_UB, STACK_TOP, 0, SLJIT_MEM2(TMP1, TMP2), 3);
5431
5432 label = LABEL();
5433 jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5434 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
5435 read_char(common);
5436 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
5437 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
5438 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM2(TMP1, TMP2), 3);
5439
5440 OP2(SLJIT_SHL, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2);
5441 OP1(SLJIT_MOV_UI, TMP1, 0, SLJIT_MEM1(STACK_TOP), (sljit_sw)PRIV(ucp_gbtable));
5442 OP1(SLJIT_MOV, STACK_TOP, 0, TMP2, 0);
5443 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
5444 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
5445 JUMPTO(SLJIT_NOT_ZERO, label);
5446
5447 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
5448 JUMPHERE(jump[0]);
5449 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
5450
5451 if (common->mode == JIT_PARTIAL_HARD_COMPILE)
5452 {
5453 jump[0] = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
5454 /* Since we successfully read a char above, partial matching must occure. */
5455 check_partial(common, TRUE);
5456 JUMPHERE(jump[0]);
5457 }
5458 return cc;
5459 #endif
5460
5461 case OP_EODN:
5462 /* Requires rather complex checks. */
5463 jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5464 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
5465 {
5466 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
5467 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5468 if (common->mode == JIT_COMPILE)
5469 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0));
5470 else
5471 {
5472 jump[1] = CMP(SLJIT_EQUAL, TMP2, 0, STR_END, 0);
5473 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
5474 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS);
5475 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
5476 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_NOT_EQUAL);
5477 add_jump(compiler, backtracks, JUMP(SLJIT_NOT_EQUAL));
5478 check_partial(common, TRUE);
5479 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5480 JUMPHERE(jump[1]);
5481 }
5482 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
5483 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
5484 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
5485 }
5486 else if (common->nltype == NLTYPE_FIXED)
5487 {
5488 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5489 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5490 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0));
5491 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
5492 }
5493 else
5494 {
5495 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5496 jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
5497 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
5498 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
5499 jump[2] = JUMP(SLJIT_GREATER);
5500 add_jump(compiler, backtracks, JUMP(SLJIT_LESS));
5501 /* Equal. */
5502 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
5503 jump[3] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
5504 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5505
5506 JUMPHERE(jump[1]);
5507 if (common->nltype == NLTYPE_ANYCRLF)
5508 {
5509 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5510 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, STR_END, 0));
5511 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
5512 }
5513 else
5514 {
5515 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, STR_PTR, 0);
5516 read_char_range(common, common->nlmin, common->nlmax, TRUE);
5517 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
5518 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
5519 add_jump(compiler, backtracks, JUMP(SLJIT_ZERO));
5520 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
5521 }
5522 JUMPHERE(jump[2]);
5523 JUMPHERE(jump[3]);
5524 }
5525 JUMPHERE(jump[0]);
5526 check_partial(common, FALSE);
5527 return cc;
5528
5529 case OP_EOD:
5530 add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
5531 check_partial(common, FALSE);
5532 return cc;
5533
5534 case OP_CIRC:
5535 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
5536 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
5537 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0));
5538 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
5539 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5540 return cc;
5541
5542 case OP_CIRCM:
5543 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
5544 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
5545 jump[1] = CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0);
5546 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
5547 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5548 jump[0] = JUMP(SLJIT_JUMP);
5549 JUMPHERE(jump[1]);
5550
5551 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
5552 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
5553 {
5554 OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
5555 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, TMP1, 0));
5556 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
5557 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
5558 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
5559 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
5560 }
5561 else
5562 {
5563 skip_char_back(common);
5564 read_char_range(common, common->nlmin, common->nlmax, TRUE);
5565 check_newlinechar(common, common->nltype, backtracks, FALSE);
5566 }
5567 JUMPHERE(jump[0]);
5568 return cc;
5569
5570 case OP_DOLL:
5571 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
5572 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
5573 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5574
5575 if (!common->endonly)
5576 compile_char1_matchingpath(common, OP_EODN, cc, backtracks);
5577 else
5578 {
5579 add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
5580 check_partial(common, FALSE);
5581 }
5582 return cc;
5583
5584 case OP_DOLLM:
5585 jump[1] = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
5586 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
5587 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
5588 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5589 check_partial(common, FALSE);
5590 jump[0] = JUMP(SLJIT_JUMP);
5591 JUMPHERE(jump[1]);
5592
5593 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
5594 {
5595 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
5596 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5597 if (common->mode == JIT_COMPILE)
5598 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, TMP2, 0, STR_END, 0));
5599 else
5600 {
5601 jump[1] = CMP(SLJIT_LESS_EQUAL, TMP2, 0, STR_END, 0);
5602 /* STR_PTR = STR_END - IN_UCHARS(1) */
5603 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
5604 check_partial(common, TRUE);
5605 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5606 JUMPHERE(jump[1]);
5607 }
5608
5609 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
5610 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
5611 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
5612 }
5613 else
5614 {
5615 peek_char(common, common->nlmax);
5616 check_newlinechar(common, common->nltype, backtracks, FALSE);
5617 }
5618 JUMPHERE(jump[0]);
5619 return cc;
5620
5621 case OP_CHAR:
5622 case OP_CHARI:
5623 length = 1;
5624 #ifdef SUPPORT_UTF
5625 if (common->utf && HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc);
5626 #endif
5627 if (common->mode == JIT_COMPILE && (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0))
5628 {
5629 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
5630 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
5631
5632 context.length = IN_UCHARS(length);
5633 context.sourcereg = -1;
5634 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
5635 context.ucharptr = 0;
5636 #endif
5637 return byte_sequence_compare(common, type == OP_CHARI, cc, &context, backtracks);
5638 }
5639
5640 detect_partial_match(common, backtracks);
5641 #ifdef SUPPORT_UTF
5642 if (common->utf)
5643 {
5644 GETCHAR(c, cc);
5645 }
5646 else
5647 #endif
5648 c = *cc;
5649
5650 if (type == OP_CHAR || !char_has_othercase(common, cc))
5651 {
5652 read_char_range(common, c, c, FALSE);
5653 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c));
5654 return cc + length;
5655 }
5656 oc = char_othercase(common, c);
5657 read_char_range(common, c < oc ? c : oc, c > oc ? c : oc, FALSE);
5658 bit = c ^ oc;
5659 if (is_powerof2(bit))
5660 {
5661 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
5662 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
5663 return cc + length;
5664 }
5665 jump[0] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c);
5666 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, oc));
5667 JUMPHERE(jump[0]);
5668 return cc + length;
5669
5670 case OP_NOT:
5671 case OP_NOTI:
5672 detect_partial_match(common, backtracks);
5673 length = 1;
5674 #ifdef SUPPORT_UTF
5675 if (common->utf)
5676 {
5677 #ifdef COMPILE_PCRE8
5678 c = *cc;
5679 if (c < 128)
5680 {
5681 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5682 if (type == OP_NOT || !char_has_othercase(common, cc))
5683 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
5684 else
5685 {
5686 /* Since UTF8 code page is fixed, we know that c is in [a-z] or [A-Z] range. */
5687 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x20);
5688 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20));
5689 }
5690 /* Skip the variable-length character. */
5691 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5692 jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
5693 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
5694 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5695 JUMPHERE(jump[0]);
5696 return cc + 1;
5697 }
5698 else
5699 #endif /* COMPILE_PCRE8 */
5700 {
5701 GETCHARLEN(c, cc, length);
5702 }
5703 }
5704 else
5705 #endif /* SUPPORT_UTF */
5706 c = *cc;
5707
5708 if (type == OP_NOT || !char_has_othercase(common, cc))
5709 {
5710 read_char_range(common, c, c, TRUE);
5711 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
5712 }
5713 else
5714 {
5715 oc = char_othercase(common, c);
5716 read_char_range(common, c < oc ? c : oc, c > oc ? c : oc, TRUE);
5717 bit = c ^ oc;
5718 if (is_powerof2(bit))
5719 {
5720 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
5721 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
5722 }
5723 else
5724 {
5725 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
5726 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, oc));
5727 }
5728 }
5729 return cc + length;
5730
5731 case OP_CLASS:
5732 case OP_NCLASS:
5733 detect_partial_match(common, backtracks);
5734
5735 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5736 bit = (common->utf && is_char7_bitset((const pcre_uint8 *)cc, type == OP_NCLASS)) ? 127 : 255;
5737 read_char_range(common, 0, bit, type == OP_NCLASS);
5738 #else
5739 read_char_range(common, 0, 255, type == OP_NCLASS);
5740 #endif
5741
5742 if (check_class_ranges(common, (const pcre_uint8 *)cc, type == OP_NCLASS, FALSE, backtracks))
5743 return cc + 32 / sizeof(pcre_uchar);
5744
5745 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5746 jump[0] = NULL;
5747 if (common->utf)
5748 {
5749 jump[0] = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, bit);
5750 if (type == OP_CLASS)
5751 {
5752 add_jump(compiler, backtracks, jump[0]);
5753 jump[0] = NULL;
5754 }
5755 }
5756 #elif !defined COMPILE_PCRE8
5757 jump[0] = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
5758 if (type == OP_CLASS)
5759 {
5760 add_jump(compiler, backtracks, jump[0]);
5761 jump[0] = NULL;
5762 }
5763 #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
5764
5765 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
5766 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
5767 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
5768 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
5769 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
5770 add_jump(compiler, backtracks, JUMP(SLJIT_ZERO));
5771
5772 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
5773 if (jump[0] != NULL)
5774 JUMPHERE(jump[0]);
5775 #endif
5776
5777 return cc + 32 / sizeof(pcre_uchar);
5778
5779 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5780 case OP_XCLASS:
5781 compile_xclass_matchingpath(common, cc + LINK_SIZE, backtracks);
5782 return cc + GET(cc, 0) - 1;
5783 #endif
5784
5785 case OP_REVERSE:
5786 length = GET(cc, 0);
5787 if (length == 0)
5788 return cc + LINK_SIZE;
5789 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5790 #ifdef SUPPORT_UTF
5791 if (common->utf)
5792 {
5793 OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
5794 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, length);
5795 label = LABEL();
5796 add_jump(compiler, backtracks, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP3, 0));
5797 skip_char_back(common);
5798 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
5799 JUMPTO(SLJIT_NOT_ZERO, label);
5800 }
5801 else
5802 #endif
5803 {
5804 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
5805 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
5806 add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, TMP1, 0));
5807 }
5808 check_start_used_ptr(common);
5809 return cc + LINK_SIZE;
5810 }
5811 SLJIT_ASSERT_STOP();
5812 return cc;
5813 }
5814
5815 static SLJIT_INLINE pcre_uchar *compile_charn_matchingpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, jump_list **backtracks)
5816 {
5817 /* This function consumes at least one input character. */
5818 /* To decrease the number of length checks, we try to concatenate the fixed length character sequences. */
5819 DEFINE_COMPILER;
5820 pcre_uchar *ccbegin = cc;
5821 compare_context context;
5822 int size;
5823
5824 context.length = 0;
5825 do
5826 {
5827 if (cc >= ccend)
5828 break;
5829
5830 if (*cc == OP_CHAR)
5831 {
5832 size = 1;
5833 #ifdef SUPPORT_UTF
5834 if (common->utf && HAS_EXTRALEN(cc[1]))
5835 size += GET_EXTRALEN(cc[1]);
5836 #endif
5837 }
5838 else if (*cc == OP_CHARI)
5839 {
5840 size = 1;
5841 #ifdef SUPPORT_UTF
5842 if (common->utf)
5843 {
5844 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
5845 size = 0;
5846 else if (HAS_EXTRALEN(cc[1]))
5847 size += GET_EXTRALEN(cc[1]);
5848 }
5849 else
5850 #endif
5851 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
5852 size = 0;
5853 }
5854 else
5855 size = 0;
5856
5857 cc += 1 + size;
5858 context.length += IN_UCHARS(size);
5859 }
5860 while (size > 0 && context.length <= 128);
5861
5862 cc = ccbegin;
5863 if (context.length > 0)
5864 {
5865 /* We have a fixed-length byte sequence. */
5866 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, context.length);
5867 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
5868
5869 context.sourcereg = -1;
5870 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
5871 context.ucharptr = 0;
5872 #endif
5873 do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, backtracks); while (context.length > 0);
5874 return cc;
5875 }
5876
5877 /* A non-fixed length character will be checked if length == 0. */
5878 return compile_char1_matchingpath(common, *cc, cc + 1, backtracks);
5879 }
5880
5881 /* Forward definitions. */
5882 static void compile_matchingpath(compiler_common *, pcre_uchar *, pcre_uchar *, backtrack_common *);
5883 static void compile_backtrackingpath(compiler_common *, struct backtrack_common *);
5884
5885 #define PUSH_BACKTRACK(size, ccstart, error) \
5886 do \
5887 { \
5888 backtrack = sljit_alloc_memory(compiler, (size)); \
5889 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
5890 return error; \
5891 memset(backtrack, 0, size); \
5892 backtrack->prev = parent->top; \
5893 backtrack->cc = (ccstart); \
5894 parent->top = backtrack; \
5895 } \
5896 while (0)
5897
5898 #define PUSH_BACKTRACK_NOVALUE(size, ccstart) \
5899 do \
5900 { \
5901 backtrack = sljit_alloc_memory(compiler, (size)); \
5902 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
5903 return; \
5904 memset(backtrack, 0, size); \
5905 backtrack->prev = parent->top; \
5906 backtrack->cc = (ccstart); \
5907 parent->top = backtrack; \
5908 } \
5909 while (0)
5910
5911 #define BACKTRACK_AS(type) ((type *)backtrack)
5912
5913 static void compile_dnref_search(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
5914 {
5915 /* The OVECTOR offset goes to TMP2. */
5916 DEFINE_COMPILER;
5917 int count = GET2(cc, 1 + IMM2_SIZE);
5918 pcre_uchar *slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
5919 unsigned int offset;
5920 jump_list *found = NULL;
5921
5922 SLJIT_ASSERT(*cc == OP_DNREF || *cc == OP_DNREFI);
5923
5924 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
5925
5926 count--;
5927 while (count-- > 0)
5928 {
5929 offset = GET2(slot, 0) << 1;
5930 GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
5931 add_jump(compiler, &found, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0));
5932 slot += common->name_entry_size;
5933 }
5934
5935 offset = GET2(slot, 0) << 1;
5936 GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
5937 if (backtracks != NULL && !common->jscript_compat)
5938 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0));
5939
5940 set_jumps(found, LABEL());
5941 }
5942
5943 static void compile_ref_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks, BOOL withchecks, BOOL emptyfail)
5944 {
5945 DEFINE_COMPILER;
5946 BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
5947 int offset = 0;
5948 struct sljit_jump *jump = NULL;
5949 struct sljit_jump *partial;
5950 struct sljit_jump *nopartial;
5951
5952 if (ref)
5953 {
5954 offset = GET2(cc, 1) << 1;
5955 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
5956 /* OVECTOR(1) contains the "string begin - 1" constant. */
5957 if (withchecks && !common->jscript_compat)
5958 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
5959 }
5960 else
5961 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
5962
5963 #if defined SUPPORT_UTF && defined SUPPORT_UCP
5964 if (common->utf && *cc == OP_REFI)
5965 {
5966 SLJIT_ASSERT(TMP1 == SLJIT_R0 && STACK_TOP == SLJIT_R1 && TMP2 == SLJIT_R2);
5967 if (ref)
5968 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
5969 else
5970 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
5971
5972 if (withchecks)
5973 jump = CMP(SLJIT_EQUAL, TMP1, 0, TMP2, 0);
5974
5975 /* Needed to save important temporary registers. */
5976 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STACK_TOP, 0);
5977 OP1(SLJIT_MOV, SLJIT_R1, 0, ARGUMENTS, 0);
5978 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, uchar_ptr), STR_PTR, 0);
5979 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_utf_caselesscmp));
5980 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
5981 if (common->mode == JIT_COMPILE)
5982 add_jump(compiler, backtracks, CMP(SLJIT_LESS_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1));
5983 else
5984 {
5985 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
5986 nopartial = CMP(SLJIT_NOT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
5987 check_partial(common, FALSE);
5988 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5989 JUMPHERE(nopartial);
5990 }
5991 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
5992 }
5993 else
5994 #endif /* SUPPORT_UTF && SUPPORT_UCP */
5995 {
5996 if (ref)
5997 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP1, 0);
5998 else
5999 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
6000
6001 if (withchecks)
6002 jump = JUMP(SLJIT_ZERO);
6003
6004 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
6005 partial = CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0);
6006 if (common->mode == JIT_COMPILE)
6007 add_jump(compiler, backtracks, partial);
6008
6009 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
6010 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
6011
6012 if (common->mode != JIT_COMPILE)
6013 {
6014 nopartial = JUMP(SLJIT_JUMP);
6015 JUMPHERE(partial);
6016 /* TMP2 -= STR_END - STR_PTR */
6017 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, STR_PTR, 0);
6018 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, STR_END, 0);
6019 partial = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0);
6020 OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
6021 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
6022 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
6023 JUMPHERE(partial);
6024 check_partial(common, FALSE);
6025 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
6026 JUMPHERE(nopartial);
6027 }
6028 }
6029
6030 if (jump != NULL)
6031 {
6032 if (emptyfail)
6033 add_jump(compiler, backtracks, jump);
6034 else
6035 JUMPHERE(jump);
6036 }
6037 }
6038
6039 static SLJIT_INLINE pcre_uchar *compile_ref_iterator_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6040 {
6041 DEFINE_COMPILER;
6042 BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
6043 backtrack_common *backtrack;
6044 pcre_uchar type;
6045 int offset = 0;
6046 struct sljit_label *label;
6047 struct sljit_jump *zerolength;
6048 struct sljit_jump *jump = NULL;
6049 pcre_uchar *ccbegin = cc;
6050 int min = 0, max = 0;
6051 BOOL minimize;
6052
6053 PUSH_BACKTRACK(sizeof(iterator_backtrack), cc, NULL);
6054
6055 if (ref)
6056 offset = GET2(cc, 1) << 1;
6057 else
6058 cc += IMM2_SIZE;
6059 type = cc[1 + IMM2_SIZE];
6060
6061 SLJIT_COMPILE_ASSERT((OP_CRSTAR & 0x1) == 0, crstar_opcode_must_be_even);
6062 minimize = (type & 0x1) != 0;
6063 switch(type)
6064 {
6065 case OP_CRSTAR:
6066 case OP_CRMINSTAR:
6067 min = 0;
6068 max = 0;
6069 cc += 1 + IMM2_SIZE + 1;
6070 break;
6071 case OP_CRPLUS:
6072 case OP_CRMINPLUS:
6073 min = 1;
6074 max = 0;
6075 cc += 1 + IMM2_SIZE + 1;
6076 break;
6077 case OP_CRQUERY:
6078 case OP_CRMINQUERY:
6079 min = 0;
6080 max = 1;
6081 cc += 1 + IMM2_SIZE + 1;
6082 break;
6083 case OP_CRRANGE:
6084 case OP_CRMINRANGE:
6085 min = GET2(cc, 1 + IMM2_SIZE + 1);
6086 max = GET2(cc, 1 + IMM2_SIZE + 1 + IMM2_SIZE);
6087 cc += 1 + IMM2_SIZE + 1 + 2 * IMM2_SIZE;
6088 break;
6089 default:
6090 SLJIT_ASSERT_STOP();
6091 break;
6092 }
6093
6094 if (!minimize)
6095 {
6096 if (min == 0)
6097 {
6098 allocate_stack(common, 2);
6099 if (ref)
6100 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
6101 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6102 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
6103 /* Temporary release of STR_PTR. */
6104 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
6105 /* Handles both invalid and empty cases. Since the minimum repeat,
6106 is zero the invalid case is basically the same as an empty case. */
6107 if (ref)
6108 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
6109 else
6110 {
6111 compile_dnref_search(common, ccbegin, NULL);
6112 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
6113 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, TMP2, 0);
6114 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
6115 }
6116 /* Restore if not zero length. */
6117 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
6118 }
6119 else
6120 {
6121 allocate_stack(common, 1);
6122 if (ref)
6123 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
6124 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6125 if (ref)
6126 {
6127 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
6128 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
6129 }
6130 else
6131 {
6132 compile_dnref_search(common, ccbegin, &backtrack->topbacktracks);
6133 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
6134 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, TMP2, 0);
6135 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
6136 }
6137 }
6138
6139 if (min > 1 || max > 1)
6140 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, SLJIT_IMM, 0);
6141
6142 label = LABEL();
6143 if (!ref)
6144 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1);
6145 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, FALSE, FALSE);
6146
6147 if (min > 1 || max > 1)
6148 {
6149 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0);
6150 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
6151 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, TMP1, 0);
6152 if (min > 1)
6153 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min, label);
6154 if (max > 1)
6155 {
6156 jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max);
6157 allocate_stack(common, 1);
6158 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6159 JUMPTO(SLJIT_JUMP, label);
6160 JUMPHERE(jump);
6161 }
6162 }
6163
6164 if (max == 0)
6165 {
6166 /* Includes min > 1 case as well. */
6167 allocate_stack(common, 1);
6168 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6169 JUMPTO(SLJIT_JUMP, label);
6170 }
6171
6172 JUMPHERE(zerolength);
6173 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
6174
6175 count_match(common);
6176 return cc;
6177 }
6178
6179 allocate_stack(common, ref ? 2 : 3);
6180 if (ref)
6181 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
6182 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6183 if (type != OP_CRMINSTAR)
6184 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
6185
6186 if (min == 0)
6187 {
6188 /* Handles both invalid and empty cases. Since the minimum repeat,
6189 is zero the invalid case is basically the same as an empty case. */
6190 if (ref)
6191 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
6192 else
6193 {
6194 compile_dnref_search(common, ccbegin, NULL);
6195 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
6196 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
6197 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
6198 }
6199 /* Length is non-zero, we can match real repeats. */
6200 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6201 jump = JUMP(SLJIT_JUMP);
6202 }
6203 else
6204 {
6205 if (ref)
6206 {
6207 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
6208 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
6209 }
6210 else
6211 {
6212 compile_dnref_search(common, ccbegin, &backtrack->topbacktracks);
6213 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
6214 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
6215 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
6216 }
6217 }
6218
6219 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
6220 if (max > 0)
6221 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max));
6222
6223 if (!ref)
6224 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
6225 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, TRUE, TRUE);
6226 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6227
6228 if (min > 1)
6229 {
6230 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
6231 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
6232 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
6233 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min, BACKTRACK_AS(iterator_backtrack)->matchingpath);
6234 }
6235 else if (max > 0)
6236 OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
6237
6238 if (jump != NULL)
6239 JUMPHERE(jump);
6240 JUMPHERE(zerolength);
6241
6242 count_match(common);
6243 return cc;
6244 }
6245
6246 static SLJIT_INLINE pcre_uchar *compile_recurse_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6247 {
6248 DEFINE_COMPILER;
6249 backtrack_common *backtrack;
6250 recurse_entry *entry = common->entries;
6251 recurse_entry *prev = NULL;
6252 sljit_sw start = GET(cc, 1);
6253 pcre_uchar *start_cc;
6254 BOOL needs_control_head;
6255
6256 PUSH_BACKTRACK(sizeof(recurse_backtrack), cc, NULL);
6257
6258 /* Inlining simple patterns. */
6259 if (get_framesize(common, common->start + start, NULL, TRUE, &needs_control_head) == no_stack)
6260 {
6261 start_cc = common->start + start;
6262 compile_matchingpath(common, next_opcode(common, start_cc), bracketend(start_cc) - (1 + LINK_SIZE), backtrack);
6263 BACKTRACK_AS(recurse_backtrack)->inlined_pattern = TRUE;
6264 return cc + 1 + LINK_SIZE;
6265 }
6266
6267 while (entry != NULL)
6268 {
6269 if (entry->start == start)
6270 break;
6271 prev = entry;
6272 entry = entry->next;
6273 }
6274
6275 if (entry == NULL)
6276 {
6277 entry = sljit_alloc_memory(compiler, sizeof(recurse_entry));
6278 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6279 return NULL;
6280 entry->next = NULL;
6281 entry->entry = NULL;
6282 entry->calls = NULL;
6283 entry->start = start;
6284
6285 if (prev != NULL)
6286 prev->next = entry;
6287 else
6288 common->entries = entry;
6289 }
6290
6291 if (common->has_set_som && common->mark_ptr != 0)
6292 {
6293 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
6294 allocate_stack(common, 2);
6295 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
6296 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
6297 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
6298 }
6299 else if (common->has_set_som || common->mark_ptr != 0)
6300 {
6301 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->has_set_som ? (int)(OVECTOR(0)) : common->mark_ptr);
6302 allocate_stack(common, 1);
6303 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
6304 }
6305
6306 if (entry->entry == NULL)
6307 add_jump(compiler, &entry->calls, JUMP(SLJIT_FAST_CALL));
6308 else
6309 JUMPTO(SLJIT_FAST_CALL, entry->entry);
6310 /* Leave if the match is failed. */
6311 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0));
6312 return cc + 1 + LINK_SIZE;
6313 }
6314
6315 static int SLJIT_CALL do_callout(struct jit_arguments *arguments, PUBL(callout_block) *callout_block, pcre_uchar **jit_ovector)
6316 {
6317 const pcre_uchar *begin = arguments->begin;
6318 int *offset_vector = arguments->offsets;
6319 int offset_count = arguments->offset_count;
6320 int i;
6321
6322 if (PUBL(callout) == NULL)
6323 return 0;
6324
6325 callout_block->version = 2;
6326 callout_block->callout_data = arguments->callout_data;
6327
6328 /* Offsets in subject. */
6329 callout_block->subject_length = arguments->end - arguments->begin;
6330 callout_block->start_match = (pcre_uchar*)callout_block->subject - arguments->begin;
6331 callout_block->current_position = (pcre_uchar*)callout_block->offset_vector - arguments->begin;
6332 #if defined COMPILE_PCRE8
6333 callout_block->subject = (PCRE_SPTR)begin;
6334 #elif defined COMPILE_PCRE16
6335 callout_block->subject = (PCRE_SPTR16)begin;
6336 #elif defined COMPILE_PCRE32
6337 callout_block->subject = (PCRE_SPTR32)begin;
6338 #endif
6339
6340 /* Convert and copy the JIT offset vector to the offset_vector array. */
6341 callout_block->capture_top = 0;
6342 callout_block->offset_vector = offset_vector;
6343 for (i = 2; i < offset_count; i += 2)
6344 {
6345 offset_vector[i] = jit_ovector[i] - begin;
6346 offset_vector[i + 1] = jit_ovector[i + 1] - begin;
6347 if (jit_ovector[i] >= begin)
6348 callout_block->capture_top = i;
6349 }
6350
6351 callout_block->capture_top = (callout_block->capture_top >> 1) + 1;
6352 if (offset_count > 0)
6353 offset_vector[0] = -1;
6354 if (offset_count > 1)
6355 offset_vector[1] = -1;
6356 return (*PUBL(callout))(callout_block);
6357 }
6358
6359 /* Aligning to 8 byte. */
6360 #define CALLOUT_ARG_SIZE \
6361 (((int)sizeof(PUBL(callout_block)) + 7) & ~7)
6362
6363 #define CALLOUT_ARG_OFFSET(arg) \
6364 (-CALLOUT_ARG_SIZE + SLJIT_OFFSETOF(PUBL(callout_block), arg))
6365
6366 static SLJIT_INLINE pcre_uchar *compile_callout_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6367 {
6368 DEFINE_COMPILER;
6369 backtrack_common *backtrack;
6370
6371 PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
6372
6373 allocate_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw));
6374
6375 SLJIT_ASSERT(common->capture_last_ptr != 0);
6376 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
6377 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6378 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_number), SLJIT_IMM, cc[1]);
6379 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_last), TMP2, 0);
6380
6381 /* These pointer sized fields temporarly stores internal variables. */
6382 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
6383 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(offset_vector), STR_PTR, 0);
6384 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(subject), TMP2, 0);
6385
6386 if (common->mark_ptr != 0)
6387 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr));
6388 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(pattern_position), SLJIT_IMM, GET(cc, 2));
6389 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(next_item_length), SLJIT_IMM, GET(cc, 2 + LINK_SIZE));
6390 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(mark), (common->mark_ptr != 0) ? TMP2 : SLJIT_IMM, 0);
6391
6392 /* Needed to save important temporary registers. */
6393 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STACK_TOP, 0);
6394 OP2(SLJIT_SUB, SLJIT_R1, 0, STACK_TOP, 0, SLJIT_IMM, CALLOUT_ARG_SIZE);
6395 GET_LOCAL_BASE(SLJIT_R2, 0, OVECTOR_START);
6396 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_callout));
6397 OP1(SLJIT_MOV_SI, SLJIT_RETURN_REG, 0, SLJIT_RETURN_REG, 0);
6398 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6399 free_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw));
6400
6401 /* Check return value. */
6402 OP2(SLJIT_SUB | SLJIT_SET_S, SLJIT_UNUSED, 0, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
6403 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_SIG_GREATER));
6404 if (common->forced_quit_label == NULL)
6405 add_jump(compiler, &common->forced_quit, JUMP(SLJIT_SIG_LESS));
6406 else
6407 JUMPTO(SLJIT_SIG_LESS, common->forced_quit_label);
6408 return cc + 2 + 2 * LINK_SIZE;
6409 }
6410
6411 #undef CALLOUT_ARG_SIZE
6412 #undef CALLOUT_ARG_OFFSET
6413
6414 static pcre_uchar *compile_assert_matchingpath(compiler_common *common, pcre_uchar *cc, assert_backtrack *backtrack, BOOL conditional)
6415 {
6416 DEFINE_COMPILER;
6417 int framesize;
6418 int extrasize;
6419 BOOL needs_control_head;
6420 int private_data_ptr;
6421 backtrack_common altbacktrack;
6422 pcre_uchar *ccbegin;
6423 pcre_uchar opcode;
6424 pcre_uchar bra = OP_BRA;
6425 jump_list *tmp = NULL;
6426 jump_list **target = (conditional) ? &backtrack->condfailed : &backtrack->common.topbacktracks;
6427 jump_list **found;
6428 /* Saving previous accept variables. */
6429 BOOL save_local_exit = common->local_exit;
6430 BOOL save_positive_assert = common->positive_assert;
6431 then_trap_backtrack *save_then_trap = common->then_trap;
6432 struct sljit_label *save_quit_label = common->quit_label;
6433 struct sljit_label *save_accept_label = common->accept_label;
6434 jump_list *save_quit = common->quit;
6435 jump_list *save_positive_assert_quit = common->positive_assert_quit;
6436 jump_list *save_accept = common->accept;
6437 struct sljit_jump *jump;
6438 struct sljit_jump *brajump = NULL;
6439
6440 /* Assert captures then. */
6441 common->then_trap = NULL;
6442
6443 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
6444 {
6445 SLJIT_ASSERT(!conditional);
6446 bra = *cc;
6447 cc++;
6448 }
6449 private_data_ptr = PRIVATE_DATA(cc);
6450 SLJIT_ASSERT(private_data_ptr != 0);
6451 framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
6452 backtrack->framesize = framesize;
6453 backtrack->private_data_ptr = private_data_ptr;
6454 opcode = *cc;
6455 SLJIT_ASSERT(opcode >= OP_ASSERT && opcode <= OP_ASSERTBACK_NOT);
6456 found = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) ? &tmp : target;
6457 ccbegin = cc;
6458 cc += GET(cc, 1);
6459
6460 if (bra == OP_BRAMINZERO)
6461 {
6462 /* This is a braminzero backtrack path. */
6463 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6464 free_stack(common, 1);
6465 brajump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
6466 }
6467
6468 if (framesize < 0)
6469 {
6470 extrasize = needs_control_head ? 2 : 1;
6471 if (framesize == no_frame)
6472 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
6473 allocate_stack(common, extrasize);
6474 if (needs_control_head)
6475 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
6476 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6477 if (needs_control_head)
6478 {
6479 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
6480 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
6481 }
6482 }
6483 else
6484 {
6485 extrasize = needs_control_head ? 3 : 2;
6486 allocate_stack(common, framesize + extrasize);
6487 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
6488 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw));
6489 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
6490 if (needs_control_head)
6491 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
6492 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6493 if (needs_control_head)
6494 {
6495 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
6496 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
6497 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
6498 }
6499 else
6500 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
6501 init_frame(common, ccbegin, NULL, framesize + extrasize - 1, extrasize, FALSE);
6502 }
6503
6504 memset(&altbacktrack, 0, sizeof(backtrack_common));
6505 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6506 {
6507 /* Negative assert is stronger than positive assert. */
6508 common->local_exit = TRUE;
6509 common->quit_label = NULL;
6510 common->quit = NULL;
6511 common->positive_assert = FALSE;
6512 }
6513 else
6514 common->positive_assert = TRUE;
6515 common->positive_assert_quit = NULL;
6516
6517 while (1)
6518 {
6519 common->accept_label = NULL;
6520 common->accept = NULL;
6521 altbacktrack.top = NULL;
6522 altbacktrack.topbacktracks = NULL;
6523
6524 if (*ccbegin == OP_ALT)
6525 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6526
6527 altbacktrack.cc = ccbegin;
6528 compile_matchingpath(common, ccbegin + 1 + LINK_SIZE, cc, &altbacktrack);
6529 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6530 {
6531 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6532 {
6533 common->local_exit = save_local_exit;
6534 common->quit_label = save_quit_label;
6535 common->quit = save_quit;
6536 }
6537 common->positive_assert = save_positive_assert;
6538 common->then_trap = save_then_trap;
6539 common->accept_label = save_accept_label;
6540 common->positive_assert_quit = save_positive_assert_quit;
6541 common->accept = save_accept;
6542 return NULL;
6543 }
6544 common->accept_label = LABEL();
6545 if (common->accept != NULL)
6546 set_jumps(common->accept, common->accept_label);
6547
6548 /* Reset stack. */
6549 if (framesize < 0)
6550 {
6551 if (framesize == no_frame)
6552 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
6553 else
6554 free_stack(common, extrasize);
6555 if (needs_control_head)
6556 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), 0);
6557 }
6558 else
6559 {
6560 if ((opcode != OP_ASSERT_NOT && opcode != OP_ASSERTBACK_NOT) || conditional)
6561 {
6562 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
6563 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
6564 if (needs_control_head)
6565 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), 0);
6566 }
6567 else
6568 {
6569 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
6570 if (needs_control_head)
6571 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), (framesize + 1) * sizeof(sljit_sw));
6572 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6573 }
6574 }
6575
6576 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6577 {
6578 /* We know that STR_PTR was stored on the top of the stack. */
6579 if (conditional)
6580 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), needs_control_head ? sizeof(sljit_sw) : 0);
6581 else if (bra == OP_BRAZERO)
6582 {
6583 if (framesize < 0)
6584 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 1) * sizeof(sljit_sw));
6585 else
6586 {
6587 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
6588 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (framesize + extrasize - 1) * sizeof(sljit_sw));
6589 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
6590 }
6591 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
6592 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6593 }
6594 else if (framesize >= 0)
6595 {
6596 /* For OP_BRA and OP_BRAMINZERO. */
6597 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
6598 }
6599 }
6600 add_jump(compiler, found, JUMP(SLJIT_JUMP));
6601
6602 compile_backtrackingpath(common, altbacktrack.top);
6603 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6604 {
6605 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6606 {
6607 common->local_exit = save_local_exit;
6608 common->quit_label = save_quit_label;
6609 common->quit = save_quit;
6610 }
6611 common->positive_assert = save_positive_assert;
6612 common->then_trap = save_then_trap;
6613 common->accept_label = save_accept_label;
6614 common->positive_assert_quit = save_positive_assert_quit;
6615 common->accept = save_accept;
6616 return NULL;
6617 }
6618 set_jumps(altbacktrack.topbacktracks, LABEL());
6619
6620 if (*cc != OP_ALT)
6621 break;
6622
6623 ccbegin = cc;
6624 cc += GET(cc, 1);
6625 }
6626
6627 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6628 {
6629 SLJIT_ASSERT(common->positive_assert_quit == NULL);
6630 /* Makes the check less complicated below. */
6631 common->positive_assert_quit = common->quit;
6632 }
6633
6634 /* None of them matched. */
6635 if (common->positive_assert_quit != NULL)
6636 {
6637 jump = JUMP(SLJIT_JUMP);
6638 set_jumps(common->positive_assert_quit, LABEL());
6639 SLJIT_ASSERT(framesize != no_stack);
6640 if (framesize < 0)
6641 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, extrasize * sizeof(sljit_sw));
6642 else
6643 {
6644 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
6645 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6646 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw));
6647 }
6648 JUMPHERE(jump);
6649 }
6650
6651 if (needs_control_head)
6652 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(1));
6653
6654 if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK)
6655 {
6656 /* Assert is failed. */
6657 if (conditional || bra == OP_BRAZERO)
6658 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6659
6660 if (framesize < 0)
6661 {
6662 /* The topmost item should be 0. */
6663 if (bra == OP_BRAZERO)
6664 {
6665 if (extrasize == 2)
6666 free_stack(common, 1);
6667 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6668 }
6669 else
6670 free_stack(common, extrasize);
6671 }
6672 else
6673 {
6674 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
6675 /* The topmost item should be 0. */
6676 if (bra == OP_BRAZERO)
6677 {
6678 free_stack(common, framesize + extrasize - 1);
6679 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6680 }
6681 else
6682 free_stack(common, framesize + extrasize);
6683 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
6684 }
6685 jump = JUMP(SLJIT_JUMP);
6686 if (bra != OP_BRAZERO)
6687 add_jump(compiler, target, jump);
6688
6689 /* Assert is successful. */
6690 set_jumps(tmp, LABEL());
6691 if (framesize < 0)
6692 {
6693 /* We know that STR_PTR was stored on the top of the stack. */
6694 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 1) * sizeof(sljit_sw));
6695 /* Keep the STR_PTR on the top of the stack. */
6696 if (bra == OP_BRAZERO)
6697 {
6698 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
6699 if (extrasize == 2)
6700 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6701 }
6702 else if (bra == OP_BRAMINZERO)
6703 {
6704 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
6705 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6706 }
6707 }
6708 else
6709 {
6710 if (bra == OP_BRA)
6711 {
6712 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
6713 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
6714 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 2) * sizeof(sljit_sw));
6715 }
6716 else
6717 {
6718 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
6719 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 2) * sizeof(sljit_sw));
6720 if (extrasize == 2)
6721 {
6722 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6723 if (bra == OP_BRAMINZERO)
6724 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6725 }
6726 else
6727 {
6728 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
6729 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), bra == OP_BRAZERO ? STR_PTR : SLJIT_IMM, 0);
6730 }
6731 }
6732 }
6733
6734 if (bra == OP_BRAZERO)
6735 {
6736 backtrack->matchingpath = LABEL();
6737 SET_LABEL(jump, backtrack->matchingpath);
6738 }
6739 else if (bra == OP_BRAMINZERO)
6740 {
6741 JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
6742 JUMPHERE(brajump);
6743 if (framesize >= 0)
6744 {
6745 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
6746 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6747 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
6748 }
6749 set_jumps(backtrack->common.topbacktracks, LABEL());
6750 }
6751 }
6752 else
6753 {
6754 /* AssertNot is successful. */
6755 if (framesize < 0)
6756 {
6757 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6758 if (bra != OP_BRA)
6759 {
6760 if (extrasize == 2)
6761 free_stack(common, 1);
6762 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6763 }
6764 else
6765 free_stack(common, extrasize);
6766 }
6767 else
6768 {
6769 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6770 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
6771 /* The topmost item should be 0. */
6772 if (bra != OP_BRA)
6773 {
6774 free_stack(common, framesize + extrasize - 1);
6775 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), S