/[pcre]/code/tags/pcre-8.37/pcre_jit_compile.c
ViewVC logotype

Contents of /code/tags/pcre-8.37/pcre_jit_compile.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1554 - (show annotations)
Tue Apr 28 11:37:07 2015 UTC (4 years, 6 months ago) by ph10
File MIME type: text/plain
File size: 331023 byte(s)
Error occurred while calculating annotation data.
Tagged copy of 8.37.
1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Written by Philip Hazel
9 Copyright (c) 1997-2013 University of Cambridge
10
11 The machine code generator part (this module) was written by Zoltan Herczeg
12 Copyright (c) 2010-2013
13
14 -----------------------------------------------------------------------------
15 Redistribution and use in source and binary forms, with or without
16 modification, are permitted provided that the following conditions are met:
17
18 * Redistributions of source code must retain the above copyright notice,
19 this list of conditions and the following disclaimer.
20
21 * Redistributions in binary form must reproduce the above copyright
22 notice, this list of conditions and the following disclaimer in the
23 documentation and/or other materials provided with the distribution.
24
25 * Neither the name of the University of Cambridge nor the names of its
26 contributors may be used to endorse or promote products derived from
27 this software without specific prior written permission.
28
29 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 POSSIBILITY OF SUCH DAMAGE.
40 -----------------------------------------------------------------------------
41 */
42
43 #ifdef HAVE_CONFIG_H
44 #include "config.h"
45 #endif
46
47 #include "pcre_internal.h"
48
49 #if defined SUPPORT_JIT
50
51 /* All-in-one: Since we use the JIT compiler only from here,
52 we just include it. This way we don't need to touch the build
53 system files. */
54
55 #define SLJIT_MALLOC(size, allocator_data) (PUBL(malloc))(size)
56 #define SLJIT_FREE(ptr, allocator_data) (PUBL(free))(ptr)
57 #define SLJIT_CONFIG_AUTO 1
58 #define SLJIT_CONFIG_STATIC 1
59 #define SLJIT_VERBOSE 0
60 #define SLJIT_DEBUG 0
61
62 #include "sljit/sljitLir.c"
63
64 #if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED
65 #error Unsupported architecture
66 #endif
67
68 /* Defines for debugging purposes. */
69
70 /* 1 - Use unoptimized capturing brackets.
71 2 - Enable capture_last_ptr (includes option 1). */
72 /* #define DEBUG_FORCE_UNOPTIMIZED_CBRAS 2 */
73
74 /* 1 - Always have a control head. */
75 /* #define DEBUG_FORCE_CONTROL_HEAD 1 */
76
77 /* Allocate memory for the regex stack on the real machine stack.
78 Fast, but limited size. */
79 #define MACHINE_STACK_SIZE 32768
80
81 /* Growth rate for stack allocated by the OS. Should be the multiply
82 of page size. */
83 #define STACK_GROWTH_RATE 8192
84
85 /* Enable to check that the allocation could destroy temporaries. */
86 #if defined SLJIT_DEBUG && SLJIT_DEBUG
87 #define DESTROY_REGISTERS 1
88 #endif
89
90 /*
91 Short summary about the backtracking mechanism empolyed by the jit code generator:
92
93 The code generator follows the recursive nature of the PERL compatible regular
94 expressions. The basic blocks of regular expressions are condition checkers
95 whose execute different commands depending on the result of the condition check.
96 The relationship between the operators can be horizontal (concatenation) and
97 vertical (sub-expression) (See struct backtrack_common for more details).
98
99 'ab' - 'a' and 'b' regexps are concatenated
100 'a+' - 'a' is the sub-expression of the '+' operator
101
102 The condition checkers are boolean (true/false) checkers. Machine code is generated
103 for the checker itself and for the actions depending on the result of the checker.
104 The 'true' case is called as the matching path (expected path), and the other is called as
105 the 'backtrack' path. Branch instructions are expesive for all CPUs, so we avoid taken
106 branches on the matching path.
107
108 Greedy star operator (*) :
109 Matching path: match happens.
110 Backtrack path: match failed.
111 Non-greedy star operator (*?) :
112 Matching path: no need to perform a match.
113 Backtrack path: match is required.
114
115 The following example shows how the code generated for a capturing bracket
116 with two alternatives. Let A, B, C, D are arbirary regular expressions, and
117 we have the following regular expression:
118
119 A(B|C)D
120
121 The generated code will be the following:
122
123 A matching path
124 '(' matching path (pushing arguments to the stack)
125 B matching path
126 ')' matching path (pushing arguments to the stack)
127 D matching path
128 return with successful match
129
130 D backtrack path
131 ')' backtrack path (If we arrived from "C" jump to the backtrack of "C")
132 B backtrack path
133 C expected path
134 jump to D matching path
135 C backtrack path
136 A backtrack path
137
138 Notice, that the order of backtrack code paths are the opposite of the fast
139 code paths. In this way the topmost value on the stack is always belong
140 to the current backtrack code path. The backtrack path must check
141 whether there is a next alternative. If so, it needs to jump back to
142 the matching path eventually. Otherwise it needs to clear out its own stack
143 frame and continue the execution on the backtrack code paths.
144 */
145
146 /*
147 Saved stack frames:
148
149 Atomic blocks and asserts require reloading the values of private data
150 when the backtrack mechanism performed. Because of OP_RECURSE, the data
151 are not necessarly known in compile time, thus we need a dynamic restore
152 mechanism.
153
154 The stack frames are stored in a chain list, and have the following format:
155 ([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]
156
157 Thus we can restore the private data to a particular point in the stack.
158 */
159
160 typedef struct jit_arguments {
161 /* Pointers first. */
162 struct sljit_stack *stack;
163 const pcre_uchar *str;
164 const pcre_uchar *begin;
165 const pcre_uchar *end;
166 int *offsets;
167 pcre_uchar *uchar_ptr;
168 pcre_uchar *mark_ptr;
169 void *callout_data;
170 /* Everything else after. */
171 pcre_uint32 limit_match;
172 int real_offset_count;
173 int offset_count;
174 pcre_uint8 notbol;
175 pcre_uint8 noteol;
176 pcre_uint8 notempty;
177 pcre_uint8 notempty_atstart;
178 } jit_arguments;
179
180 typedef struct executable_functions {
181 void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES];
182 void *read_only_data_heads[JIT_NUMBER_OF_COMPILE_MODES];
183 sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];
184 PUBL(jit_callback) callback;
185 void *userdata;
186 pcre_uint32 top_bracket;
187 pcre_uint32 limit_match;
188 } executable_functions;
189
190 typedef struct jump_list {
191 struct sljit_jump *jump;
192 struct jump_list *next;
193 } jump_list;
194
195 typedef struct stub_list {
196 struct sljit_jump *start;
197 struct sljit_label *quit;
198 struct stub_list *next;
199 } stub_list;
200
201 typedef struct label_addr_list {
202 struct sljit_label *label;
203 sljit_uw *update_addr;
204 struct label_addr_list *next;
205 } label_addr_list;
206
207 enum frame_types {
208 no_frame = -1,
209 no_stack = -2
210 };
211
212 enum control_types {
213 type_mark = 0,
214 type_then_trap = 1
215 };
216
217 typedef int (SLJIT_CALL *jit_function)(jit_arguments *args);
218
219 /* The following structure is the key data type for the recursive
220 code generator. It is allocated by compile_matchingpath, and contains
221 the arguments for compile_backtrackingpath. Must be the first member
222 of its descendants. */
223 typedef struct backtrack_common {
224 /* Concatenation stack. */
225 struct backtrack_common *prev;
226 jump_list *nextbacktracks;
227 /* Internal stack (for component operators). */
228 struct backtrack_common *top;
229 jump_list *topbacktracks;
230 /* Opcode pointer. */
231 pcre_uchar *cc;
232 } backtrack_common;
233
234 typedef struct assert_backtrack {
235 backtrack_common common;
236 jump_list *condfailed;
237 /* Less than 0 if a frame is not needed. */
238 int framesize;
239 /* Points to our private memory word on the stack. */
240 int private_data_ptr;
241 /* For iterators. */
242 struct sljit_label *matchingpath;
243 } assert_backtrack;
244
245 typedef struct bracket_backtrack {
246 backtrack_common common;
247 /* Where to coninue if an alternative is successfully matched. */
248 struct sljit_label *alternative_matchingpath;
249 /* For rmin and rmax iterators. */
250 struct sljit_label *recursive_matchingpath;
251 /* For greedy ? operator. */
252 struct sljit_label *zero_matchingpath;
253 /* Contains the branches of a failed condition. */
254 union {
255 /* Both for OP_COND, OP_SCOND. */
256 jump_list *condfailed;
257 assert_backtrack *assert;
258 /* For OP_ONCE. Less than 0 if not needed. */
259 int framesize;
260 } u;
261 /* Points to our private memory word on the stack. */
262 int private_data_ptr;
263 } bracket_backtrack;
264
265 typedef struct bracketpos_backtrack {
266 backtrack_common common;
267 /* Points to our private memory word on the stack. */
268 int private_data_ptr;
269 /* Reverting stack is needed. */
270 int framesize;
271 /* Allocated stack size. */
272 int stacksize;
273 } bracketpos_backtrack;
274
275 typedef struct braminzero_backtrack {
276 backtrack_common common;
277 struct sljit_label *matchingpath;
278 } braminzero_backtrack;
279
280 typedef struct iterator_backtrack {
281 backtrack_common common;
282 /* Next iteration. */
283 struct sljit_label *matchingpath;
284 } iterator_backtrack;
285
286 typedef struct recurse_entry {
287 struct recurse_entry *next;
288 /* Contains the function entry. */
289 struct sljit_label *entry;
290 /* Collects the calls until the function is not created. */
291 jump_list *calls;
292 /* Points to the starting opcode. */
293 sljit_sw start;
294 } recurse_entry;
295
296 typedef struct recurse_backtrack {
297 backtrack_common common;
298 BOOL inlined_pattern;
299 } recurse_backtrack;
300
301 #define OP_THEN_TRAP OP_TABLE_LENGTH
302
303 typedef struct then_trap_backtrack {
304 backtrack_common common;
305 /* If then_trap is not NULL, this structure contains the real
306 then_trap for the backtracking path. */
307 struct then_trap_backtrack *then_trap;
308 /* Points to the starting opcode. */
309 sljit_sw start;
310 /* Exit point for the then opcodes of this alternative. */
311 jump_list *quit;
312 /* Frame size of the current alternative. */
313 int framesize;
314 } then_trap_backtrack;
315
316 #define MAX_RANGE_SIZE 4
317
318 typedef struct compiler_common {
319 /* The sljit ceneric compiler. */
320 struct sljit_compiler *compiler;
321 /* First byte code. */
322 pcre_uchar *start;
323 /* Maps private data offset to each opcode. */
324 sljit_si *private_data_ptrs;
325 /* Chain list of read-only data ptrs. */
326 void *read_only_data_head;
327 /* Tells whether the capturing bracket is optimized. */
328 pcre_uint8 *optimized_cbracket;
329 /* Tells whether the starting offset is a target of then. */
330 pcre_uint8 *then_offsets;
331 /* Current position where a THEN must jump. */
332 then_trap_backtrack *then_trap;
333 /* Starting offset of private data for capturing brackets. */
334 int cbra_ptr;
335 /* Output vector starting point. Must be divisible by 2. */
336 int ovector_start;
337 /* Last known position of the requested byte. */
338 int req_char_ptr;
339 /* Head of the last recursion. */
340 int recursive_head_ptr;
341 /* First inspected character for partial matching. */
342 int start_used_ptr;
343 /* Starting pointer for partial soft matches. */
344 int hit_start;
345 /* End pointer of the first line. */
346 int first_line_end;
347 /* Points to the marked string. */
348 int mark_ptr;
349 /* Recursive control verb management chain. */
350 int control_head_ptr;
351 /* Points to the last matched capture block index. */
352 int capture_last_ptr;
353 /* Points to the starting position of the current match. */
354 int start_ptr;
355
356 /* Flipped and lower case tables. */
357 const pcre_uint8 *fcc;
358 sljit_sw lcc;
359 /* Mode can be PCRE_STUDY_JIT_COMPILE and others. */
360 int mode;
361 /* TRUE, when minlength is greater than 0. */
362 BOOL might_be_empty;
363 /* \K is found in the pattern. */
364 BOOL has_set_som;
365 /* (*SKIP:arg) is found in the pattern. */
366 BOOL has_skip_arg;
367 /* (*THEN) is found in the pattern. */
368 BOOL has_then;
369 /* Needs to know the start position anytime. */
370 BOOL needs_start_ptr;
371 /* Currently in recurse or negative assert. */
372 BOOL local_exit;
373 /* Currently in a positive assert. */
374 BOOL positive_assert;
375 /* Newline control. */
376 int nltype;
377 pcre_uint32 nlmax;
378 pcre_uint32 nlmin;
379 int newline;
380 int bsr_nltype;
381 pcre_uint32 bsr_nlmax;
382 pcre_uint32 bsr_nlmin;
383 /* Dollar endonly. */
384 int endonly;
385 /* Tables. */
386 sljit_sw ctypes;
387 /* Named capturing brackets. */
388 pcre_uchar *name_table;
389 sljit_sw name_count;
390 sljit_sw name_entry_size;
391
392 /* Labels and jump lists. */
393 struct sljit_label *partialmatchlabel;
394 struct sljit_label *quit_label;
395 struct sljit_label *forced_quit_label;
396 struct sljit_label *accept_label;
397 struct sljit_label *ff_newline_shortcut;
398 stub_list *stubs;
399 label_addr_list *label_addrs;
400 recurse_entry *entries;
401 recurse_entry *currententry;
402 jump_list *partialmatch;
403 jump_list *quit;
404 jump_list *positive_assert_quit;
405 jump_list *forced_quit;
406 jump_list *accept;
407 jump_list *calllimit;
408 jump_list *stackalloc;
409 jump_list *revertframes;
410 jump_list *wordboundary;
411 jump_list *anynewline;
412 jump_list *hspace;
413 jump_list *vspace;
414 jump_list *casefulcmp;
415 jump_list *caselesscmp;
416 jump_list *reset_match;
417 BOOL jscript_compat;
418 #ifdef SUPPORT_UTF
419 BOOL utf;
420 #ifdef SUPPORT_UCP
421 BOOL use_ucp;
422 #endif
423 #ifdef COMPILE_PCRE8
424 jump_list *utfreadchar;
425 jump_list *utfreadchar16;
426 jump_list *utfreadtype8;
427 #endif
428 #endif /* SUPPORT_UTF */
429 #ifdef SUPPORT_UCP
430 jump_list *getucd;
431 #endif
432 } compiler_common;
433
434 /* For byte_sequence_compare. */
435
436 typedef struct compare_context {
437 int length;
438 int sourcereg;
439 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
440 int ucharptr;
441 union {
442 sljit_si asint;
443 sljit_uh asushort;
444 #if defined COMPILE_PCRE8
445 sljit_ub asbyte;
446 sljit_ub asuchars[4];
447 #elif defined COMPILE_PCRE16
448 sljit_uh asuchars[2];
449 #elif defined COMPILE_PCRE32
450 sljit_ui asuchars[1];
451 #endif
452 } c;
453 union {
454 sljit_si asint;
455 sljit_uh asushort;
456 #if defined COMPILE_PCRE8
457 sljit_ub asbyte;
458 sljit_ub asuchars[4];
459 #elif defined COMPILE_PCRE16
460 sljit_uh asuchars[2];
461 #elif defined COMPILE_PCRE32
462 sljit_ui asuchars[1];
463 #endif
464 } oc;
465 #endif
466 } compare_context;
467
468 /* Undefine sljit macros. */
469 #undef CMP
470
471 /* Used for accessing the elements of the stack. */
472 #define STACK(i) ((-(i) - 1) * (int)sizeof(sljit_sw))
473
474 #define TMP1 SLJIT_R0
475 #define TMP2 SLJIT_R2
476 #define TMP3 SLJIT_R3
477 #define STR_PTR SLJIT_S0
478 #define STR_END SLJIT_S1
479 #define STACK_TOP SLJIT_R1
480 #define STACK_LIMIT SLJIT_S2
481 #define COUNT_MATCH SLJIT_S3
482 #define ARGUMENTS SLJIT_S4
483 #define RETURN_ADDR SLJIT_R4
484
485 /* Local space layout. */
486 /* These two locals can be used by the current opcode. */
487 #define LOCALS0 (0 * sizeof(sljit_sw))
488 #define LOCALS1 (1 * sizeof(sljit_sw))
489 /* Two local variables for possessive quantifiers (char1 cannot use them). */
490 #define POSSESSIVE0 (2 * sizeof(sljit_sw))
491 #define POSSESSIVE1 (3 * sizeof(sljit_sw))
492 /* Max limit of recursions. */
493 #define LIMIT_MATCH (4 * sizeof(sljit_sw))
494 /* The output vector is stored on the stack, and contains pointers
495 to characters. The vector data is divided into two groups: the first
496 group contains the start / end character pointers, and the second is
497 the start pointers when the end of the capturing group has not yet reached. */
498 #define OVECTOR_START (common->ovector_start)
499 #define OVECTOR(i) (OVECTOR_START + (i) * (sljit_sw)sizeof(sljit_sw))
500 #define OVECTOR_PRIV(i) (common->cbra_ptr + (i) * (sljit_sw)sizeof(sljit_sw))
501 #define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start])
502
503 #if defined COMPILE_PCRE8
504 #define MOV_UCHAR SLJIT_MOV_UB
505 #define MOVU_UCHAR SLJIT_MOVU_UB
506 #elif defined COMPILE_PCRE16
507 #define MOV_UCHAR SLJIT_MOV_UH
508 #define MOVU_UCHAR SLJIT_MOVU_UH
509 #elif defined COMPILE_PCRE32
510 #define MOV_UCHAR SLJIT_MOV_UI
511 #define MOVU_UCHAR SLJIT_MOVU_UI
512 #else
513 #error Unsupported compiling mode
514 #endif
515
516 /* Shortcuts. */
517 #define DEFINE_COMPILER \
518 struct sljit_compiler *compiler = common->compiler
519 #define OP1(op, dst, dstw, src, srcw) \
520 sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw))
521 #define OP2(op, dst, dstw, src1, src1w, src2, src2w) \
522 sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w))
523 #define LABEL() \
524 sljit_emit_label(compiler)
525 #define JUMP(type) \
526 sljit_emit_jump(compiler, (type))
527 #define JUMPTO(type, label) \
528 sljit_set_label(sljit_emit_jump(compiler, (type)), (label))
529 #define JUMPHERE(jump) \
530 sljit_set_label((jump), sljit_emit_label(compiler))
531 #define SET_LABEL(jump, label) \
532 sljit_set_label((jump), (label))
533 #define CMP(type, src1, src1w, src2, src2w) \
534 sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))
535 #define CMPTO(type, src1, src1w, src2, src2w, label) \
536 sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))
537 #define OP_FLAGS(op, dst, dstw, src, srcw, type) \
538 sljit_emit_op_flags(compiler, (op), (dst), (dstw), (src), (srcw), (type))
539 #define GET_LOCAL_BASE(dst, dstw, offset) \
540 sljit_get_local_base(compiler, (dst), (dstw), (offset))
541
542 #define READ_CHAR_MAX 0x7fffffff
543
544 static pcre_uchar *bracketend(pcre_uchar *cc)
545 {
546 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
547 do cc += GET(cc, 1); while (*cc == OP_ALT);
548 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
549 cc += 1 + LINK_SIZE;
550 return cc;
551 }
552
553 static int no_alternatives(pcre_uchar *cc)
554 {
555 int count = 0;
556 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
557 do
558 {
559 cc += GET(cc, 1);
560 count++;
561 }
562 while (*cc == OP_ALT);
563 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
564 return count;
565 }
566
567 static int ones_in_half_byte[16] = {
568 /* 0 */ 0, 1, 1, 2, /* 4 */ 1, 2, 2, 3,
569 /* 8 */ 1, 2, 2, 3, /* 12 */ 2, 3, 3, 4
570 };
571
572 /* Functions whose might need modification for all new supported opcodes:
573 next_opcode
574 check_opcode_types
575 set_private_data_ptrs
576 get_framesize
577 init_frame
578 get_private_data_copy_length
579 copy_private_data
580 compile_matchingpath
581 compile_backtrackingpath
582 */
583
584 static pcre_uchar *next_opcode(compiler_common *common, pcre_uchar *cc)
585 {
586 SLJIT_UNUSED_ARG(common);
587 switch(*cc)
588 {
589 case OP_SOD:
590 case OP_SOM:
591 case OP_SET_SOM:
592 case OP_NOT_WORD_BOUNDARY:
593 case OP_WORD_BOUNDARY:
594 case OP_NOT_DIGIT:
595 case OP_DIGIT:
596 case OP_NOT_WHITESPACE:
597 case OP_WHITESPACE:
598 case OP_NOT_WORDCHAR:
599 case OP_WORDCHAR:
600 case OP_ANY:
601 case OP_ALLANY:
602 case OP_NOTPROP:
603 case OP_PROP:
604 case OP_ANYNL:
605 case OP_NOT_HSPACE:
606 case OP_HSPACE:
607 case OP_NOT_VSPACE:
608 case OP_VSPACE:
609 case OP_EXTUNI:
610 case OP_EODN:
611 case OP_EOD:
612 case OP_CIRC:
613 case OP_CIRCM:
614 case OP_DOLL:
615 case OP_DOLLM:
616 case OP_CRSTAR:
617 case OP_CRMINSTAR:
618 case OP_CRPLUS:
619 case OP_CRMINPLUS:
620 case OP_CRQUERY:
621 case OP_CRMINQUERY:
622 case OP_CRRANGE:
623 case OP_CRMINRANGE:
624 case OP_CRPOSSTAR:
625 case OP_CRPOSPLUS:
626 case OP_CRPOSQUERY:
627 case OP_CRPOSRANGE:
628 case OP_CLASS:
629 case OP_NCLASS:
630 case OP_REF:
631 case OP_REFI:
632 case OP_DNREF:
633 case OP_DNREFI:
634 case OP_RECURSE:
635 case OP_CALLOUT:
636 case OP_ALT:
637 case OP_KET:
638 case OP_KETRMAX:
639 case OP_KETRMIN:
640 case OP_KETRPOS:
641 case OP_REVERSE:
642 case OP_ASSERT:
643 case OP_ASSERT_NOT:
644 case OP_ASSERTBACK:
645 case OP_ASSERTBACK_NOT:
646 case OP_ONCE:
647 case OP_ONCE_NC:
648 case OP_BRA:
649 case OP_BRAPOS:
650 case OP_CBRA:
651 case OP_CBRAPOS:
652 case OP_COND:
653 case OP_SBRA:
654 case OP_SBRAPOS:
655 case OP_SCBRA:
656 case OP_SCBRAPOS:
657 case OP_SCOND:
658 case OP_CREF:
659 case OP_DNCREF:
660 case OP_RREF:
661 case OP_DNRREF:
662 case OP_DEF:
663 case OP_BRAZERO:
664 case OP_BRAMINZERO:
665 case OP_BRAPOSZERO:
666 case OP_PRUNE:
667 case OP_SKIP:
668 case OP_THEN:
669 case OP_COMMIT:
670 case OP_FAIL:
671 case OP_ACCEPT:
672 case OP_ASSERT_ACCEPT:
673 case OP_CLOSE:
674 case OP_SKIPZERO:
675 return cc + PRIV(OP_lengths)[*cc];
676
677 case OP_CHAR:
678 case OP_CHARI:
679 case OP_NOT:
680 case OP_NOTI:
681 case OP_STAR:
682 case OP_MINSTAR:
683 case OP_PLUS:
684 case OP_MINPLUS:
685 case OP_QUERY:
686 case OP_MINQUERY:
687 case OP_UPTO:
688 case OP_MINUPTO:
689 case OP_EXACT:
690 case OP_POSSTAR:
691 case OP_POSPLUS:
692 case OP_POSQUERY:
693 case OP_POSUPTO:
694 case OP_STARI:
695 case OP_MINSTARI:
696 case OP_PLUSI:
697 case OP_MINPLUSI:
698 case OP_QUERYI:
699 case OP_MINQUERYI:
700 case OP_UPTOI:
701 case OP_MINUPTOI:
702 case OP_EXACTI:
703 case OP_POSSTARI:
704 case OP_POSPLUSI:
705 case OP_POSQUERYI:
706 case OP_POSUPTOI:
707 case OP_NOTSTAR:
708 case OP_NOTMINSTAR:
709 case OP_NOTPLUS:
710 case OP_NOTMINPLUS:
711 case OP_NOTQUERY:
712 case OP_NOTMINQUERY:
713 case OP_NOTUPTO:
714 case OP_NOTMINUPTO:
715 case OP_NOTEXACT:
716 case OP_NOTPOSSTAR:
717 case OP_NOTPOSPLUS:
718 case OP_NOTPOSQUERY:
719 case OP_NOTPOSUPTO:
720 case OP_NOTSTARI:
721 case OP_NOTMINSTARI:
722 case OP_NOTPLUSI:
723 case OP_NOTMINPLUSI:
724 case OP_NOTQUERYI:
725 case OP_NOTMINQUERYI:
726 case OP_NOTUPTOI:
727 case OP_NOTMINUPTOI:
728 case OP_NOTEXACTI:
729 case OP_NOTPOSSTARI:
730 case OP_NOTPOSPLUSI:
731 case OP_NOTPOSQUERYI:
732 case OP_NOTPOSUPTOI:
733 cc += PRIV(OP_lengths)[*cc];
734 #ifdef SUPPORT_UTF
735 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
736 #endif
737 return cc;
738
739 /* Special cases. */
740 case OP_TYPESTAR:
741 case OP_TYPEMINSTAR:
742 case OP_TYPEPLUS:
743 case OP_TYPEMINPLUS:
744 case OP_TYPEQUERY:
745 case OP_TYPEMINQUERY:
746 case OP_TYPEUPTO:
747 case OP_TYPEMINUPTO:
748 case OP_TYPEEXACT:
749 case OP_TYPEPOSSTAR:
750 case OP_TYPEPOSPLUS:
751 case OP_TYPEPOSQUERY:
752 case OP_TYPEPOSUPTO:
753 return cc + PRIV(OP_lengths)[*cc] - 1;
754
755 case OP_ANYBYTE:
756 #ifdef SUPPORT_UTF
757 if (common->utf) return NULL;
758 #endif
759 return cc + 1;
760
761 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
762 case OP_XCLASS:
763 return cc + GET(cc, 1);
764 #endif
765
766 case OP_MARK:
767 case OP_PRUNE_ARG:
768 case OP_SKIP_ARG:
769 case OP_THEN_ARG:
770 return cc + 1 + 2 + cc[1];
771
772 default:
773 /* All opcodes are supported now! */
774 SLJIT_ASSERT_STOP();
775 return NULL;
776 }
777 }
778
779 static BOOL check_opcode_types(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
780 {
781 int count;
782 pcre_uchar *slot;
783
784 /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
785 while (cc < ccend)
786 {
787 switch(*cc)
788 {
789 case OP_SET_SOM:
790 common->has_set_som = TRUE;
791 common->might_be_empty = TRUE;
792 cc += 1;
793 break;
794
795 case OP_REF:
796 case OP_REFI:
797 common->optimized_cbracket[GET2(cc, 1)] = 0;
798 cc += 1 + IMM2_SIZE;
799 break;
800
801 case OP_CBRAPOS:
802 case OP_SCBRAPOS:
803 common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] = 0;
804 cc += 1 + LINK_SIZE + IMM2_SIZE;
805 break;
806
807 case OP_COND:
808 case OP_SCOND:
809 /* Only AUTO_CALLOUT can insert this opcode. We do
810 not intend to support this case. */
811 if (cc[1 + LINK_SIZE] == OP_CALLOUT)
812 return FALSE;
813 cc += 1 + LINK_SIZE;
814 break;
815
816 case OP_CREF:
817 common->optimized_cbracket[GET2(cc, 1)] = 0;
818 cc += 1 + IMM2_SIZE;
819 break;
820
821 case OP_DNREF:
822 case OP_DNREFI:
823 case OP_DNCREF:
824 count = GET2(cc, 1 + IMM2_SIZE);
825 slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
826 while (count-- > 0)
827 {
828 common->optimized_cbracket[GET2(slot, 0)] = 0;
829 slot += common->name_entry_size;
830 }
831 cc += 1 + 2 * IMM2_SIZE;
832 break;
833
834 case OP_RECURSE:
835 /* Set its value only once. */
836 if (common->recursive_head_ptr == 0)
837 {
838 common->recursive_head_ptr = common->ovector_start;
839 common->ovector_start += sizeof(sljit_sw);
840 }
841 cc += 1 + LINK_SIZE;
842 break;
843
844 case OP_CALLOUT:
845 if (common->capture_last_ptr == 0)
846 {
847 common->capture_last_ptr = common->ovector_start;
848 common->ovector_start += sizeof(sljit_sw);
849 }
850 cc += 2 + 2 * LINK_SIZE;
851 break;
852
853 case OP_THEN_ARG:
854 common->has_then = TRUE;
855 common->control_head_ptr = 1;
856 /* Fall through. */
857
858 case OP_PRUNE_ARG:
859 common->needs_start_ptr = TRUE;
860 /* Fall through. */
861
862 case OP_MARK:
863 if (common->mark_ptr == 0)
864 {
865 common->mark_ptr = common->ovector_start;
866 common->ovector_start += sizeof(sljit_sw);
867 }
868 cc += 1 + 2 + cc[1];
869 break;
870
871 case OP_THEN:
872 common->has_then = TRUE;
873 common->control_head_ptr = 1;
874 /* Fall through. */
875
876 case OP_PRUNE:
877 case OP_SKIP:
878 common->needs_start_ptr = TRUE;
879 cc += 1;
880 break;
881
882 case OP_SKIP_ARG:
883 common->control_head_ptr = 1;
884 common->has_skip_arg = TRUE;
885 cc += 1 + 2 + cc[1];
886 break;
887
888 default:
889 cc = next_opcode(common, cc);
890 if (cc == NULL)
891 return FALSE;
892 break;
893 }
894 }
895 return TRUE;
896 }
897
898 static int get_class_iterator_size(pcre_uchar *cc)
899 {
900 switch(*cc)
901 {
902 case OP_CRSTAR:
903 case OP_CRPLUS:
904 return 2;
905
906 case OP_CRMINSTAR:
907 case OP_CRMINPLUS:
908 case OP_CRQUERY:
909 case OP_CRMINQUERY:
910 return 1;
911
912 case OP_CRRANGE:
913 case OP_CRMINRANGE:
914 if (GET2(cc, 1) == GET2(cc, 1 + IMM2_SIZE))
915 return 0;
916 return 2;
917
918 default:
919 return 0;
920 }
921 }
922
923 static BOOL detect_repeat(compiler_common *common, pcre_uchar *begin)
924 {
925 pcre_uchar *end = bracketend(begin);
926 pcre_uchar *next;
927 pcre_uchar *next_end;
928 pcre_uchar *max_end;
929 pcre_uchar type;
930 sljit_sw length = end - begin;
931 int min, max, i;
932
933 /* Detect fixed iterations first. */
934 if (end[-(1 + LINK_SIZE)] != OP_KET)
935 return FALSE;
936
937 /* Already detected repeat. */
938 if (common->private_data_ptrs[end - common->start - LINK_SIZE] != 0)
939 return TRUE;
940
941 next = end;
942 min = 1;
943 while (1)
944 {
945 if (*next != *begin)
946 break;
947 next_end = bracketend(next);
948 if (next_end - next != length || memcmp(begin, next, IN_UCHARS(length)) != 0)
949 break;
950 next = next_end;
951 min++;
952 }
953
954 if (min == 2)
955 return FALSE;
956
957 max = 0;
958 max_end = next;
959 if (*next == OP_BRAZERO || *next == OP_BRAMINZERO)
960 {
961 type = *next;
962 while (1)
963 {
964 if (next[0] != type || next[1] != OP_BRA || next[2 + LINK_SIZE] != *begin)
965 break;
966 next_end = bracketend(next + 2 + LINK_SIZE);
967 if (next_end - next != (length + 2 + LINK_SIZE) || memcmp(begin, next + 2 + LINK_SIZE, IN_UCHARS(length)) != 0)
968 break;
969 next = next_end;
970 max++;
971 }
972
973 if (next[0] == type && next[1] == *begin && max >= 1)
974 {
975 next_end = bracketend(next + 1);
976 if (next_end - next == (length + 1) && memcmp(begin, next + 1, IN_UCHARS(length)) == 0)
977 {
978 for (i = 0; i < max; i++, next_end += 1 + LINK_SIZE)
979 if (*next_end != OP_KET)
980 break;
981
982 if (i == max)
983 {
984 common->private_data_ptrs[max_end - common->start - LINK_SIZE] = next_end - max_end;
985 common->private_data_ptrs[max_end - common->start - LINK_SIZE + 1] = (type == OP_BRAZERO) ? OP_UPTO : OP_MINUPTO;
986 /* +2 the original and the last. */
987 common->private_data_ptrs[max_end - common->start - LINK_SIZE + 2] = max + 2;
988 if (min == 1)
989 return TRUE;
990 min--;
991 max_end -= (1 + LINK_SIZE) + GET(max_end, -LINK_SIZE);
992 }
993 }
994 }
995 }
996
997 if (min >= 3)
998 {
999 common->private_data_ptrs[end - common->start - LINK_SIZE] = max_end - end;
1000 common->private_data_ptrs[end - common->start - LINK_SIZE + 1] = OP_EXACT;
1001 common->private_data_ptrs[end - common->start - LINK_SIZE + 2] = min;
1002 return TRUE;
1003 }
1004
1005 return FALSE;
1006 }
1007
1008 #define CASE_ITERATOR_PRIVATE_DATA_1 \
1009 case OP_MINSTAR: \
1010 case OP_MINPLUS: \
1011 case OP_QUERY: \
1012 case OP_MINQUERY: \
1013 case OP_MINSTARI: \
1014 case OP_MINPLUSI: \
1015 case OP_QUERYI: \
1016 case OP_MINQUERYI: \
1017 case OP_NOTMINSTAR: \
1018 case OP_NOTMINPLUS: \
1019 case OP_NOTQUERY: \
1020 case OP_NOTMINQUERY: \
1021 case OP_NOTMINSTARI: \
1022 case OP_NOTMINPLUSI: \
1023 case OP_NOTQUERYI: \
1024 case OP_NOTMINQUERYI:
1025
1026 #define CASE_ITERATOR_PRIVATE_DATA_2A \
1027 case OP_STAR: \
1028 case OP_PLUS: \
1029 case OP_STARI: \
1030 case OP_PLUSI: \
1031 case OP_NOTSTAR: \
1032 case OP_NOTPLUS: \
1033 case OP_NOTSTARI: \
1034 case OP_NOTPLUSI:
1035
1036 #define CASE_ITERATOR_PRIVATE_DATA_2B \
1037 case OP_UPTO: \
1038 case OP_MINUPTO: \
1039 case OP_UPTOI: \
1040 case OP_MINUPTOI: \
1041 case OP_NOTUPTO: \
1042 case OP_NOTMINUPTO: \
1043 case OP_NOTUPTOI: \
1044 case OP_NOTMINUPTOI:
1045
1046 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_1 \
1047 case OP_TYPEMINSTAR: \
1048 case OP_TYPEMINPLUS: \
1049 case OP_TYPEQUERY: \
1050 case OP_TYPEMINQUERY:
1051
1052 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2A \
1053 case OP_TYPESTAR: \
1054 case OP_TYPEPLUS:
1055
1056 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2B \
1057 case OP_TYPEUPTO: \
1058 case OP_TYPEMINUPTO:
1059
1060 static void set_private_data_ptrs(compiler_common *common, int *private_data_start, pcre_uchar *ccend)
1061 {
1062 pcre_uchar *cc = common->start;
1063 pcre_uchar *alternative;
1064 pcre_uchar *end = NULL;
1065 int private_data_ptr = *private_data_start;
1066 int space, size, bracketlen;
1067
1068 while (cc < ccend)
1069 {
1070 space = 0;
1071 size = 0;
1072 bracketlen = 0;
1073 if (private_data_ptr > SLJIT_MAX_LOCAL_SIZE)
1074 return;
1075
1076 if (*cc == OP_ONCE || *cc == OP_ONCE_NC || *cc == OP_BRA || *cc == OP_CBRA || *cc == OP_COND)
1077 if (detect_repeat(common, cc))
1078 {
1079 /* These brackets are converted to repeats, so no global
1080 based single character repeat is allowed. */
1081 if (cc >= end)
1082 end = bracketend(cc);
1083 }
1084
1085 switch(*cc)
1086 {
1087 case OP_KET:
1088 if (common->private_data_ptrs[cc + 1 - common->start] != 0)
1089 {
1090 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1091 private_data_ptr += sizeof(sljit_sw);
1092 cc += common->private_data_ptrs[cc + 1 - common->start];
1093 }
1094 cc += 1 + LINK_SIZE;
1095 break;
1096
1097 case OP_ASSERT:
1098 case OP_ASSERT_NOT:
1099 case OP_ASSERTBACK:
1100 case OP_ASSERTBACK_NOT:
1101 case OP_ONCE:
1102 case OP_ONCE_NC:
1103 case OP_BRAPOS:
1104 case OP_SBRA:
1105 case OP_SBRAPOS:
1106 case OP_SCOND:
1107 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1108 private_data_ptr += sizeof(sljit_sw);
1109 bracketlen = 1 + LINK_SIZE;
1110 break;
1111
1112 case OP_CBRAPOS:
1113 case OP_SCBRAPOS:
1114 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1115 private_data_ptr += sizeof(sljit_sw);
1116 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1117 break;
1118
1119 case OP_COND:
1120 /* Might be a hidden SCOND. */
1121 alternative = cc + GET(cc, 1);
1122 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1123 {
1124 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1125 private_data_ptr += sizeof(sljit_sw);
1126 }
1127 bracketlen = 1 + LINK_SIZE;
1128 break;
1129
1130 case OP_BRA:
1131 bracketlen = 1 + LINK_SIZE;
1132 break;
1133
1134 case OP_CBRA:
1135 case OP_SCBRA:
1136 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1137 break;
1138
1139 CASE_ITERATOR_PRIVATE_DATA_1
1140 space = 1;
1141 size = -2;
1142 break;
1143
1144 CASE_ITERATOR_PRIVATE_DATA_2A
1145 space = 2;
1146 size = -2;
1147 break;
1148
1149 CASE_ITERATOR_PRIVATE_DATA_2B
1150 space = 2;
1151 size = -(2 + IMM2_SIZE);
1152 break;
1153
1154 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1155 space = 1;
1156 size = 1;
1157 break;
1158
1159 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1160 if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
1161 space = 2;
1162 size = 1;
1163 break;
1164
1165 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1166 if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
1167 space = 2;
1168 size = 1 + IMM2_SIZE;
1169 break;
1170
1171 case OP_CLASS:
1172 case OP_NCLASS:
1173 size += 1 + 32 / sizeof(pcre_uchar);
1174 space = get_class_iterator_size(cc + size);
1175 break;
1176
1177 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1178 case OP_XCLASS:
1179 size = GET(cc, 1);
1180 space = get_class_iterator_size(cc + size);
1181 break;
1182 #endif
1183
1184 default:
1185 cc = next_opcode(common, cc);
1186 SLJIT_ASSERT(cc != NULL);
1187 break;
1188 }
1189
1190 /* Character iterators, which are not inside a repeated bracket,
1191 gets a private slot instead of allocating it on the stack. */
1192 if (space > 0 && cc >= end)
1193 {
1194 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1195 private_data_ptr += sizeof(sljit_sw) * space;
1196 }
1197
1198 if (size != 0)
1199 {
1200 if (size < 0)
1201 {
1202 cc += -size;
1203 #ifdef SUPPORT_UTF
1204 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1205 #endif
1206 }
1207 else
1208 cc += size;
1209 }
1210
1211 if (bracketlen > 0)
1212 {
1213 if (cc >= end)
1214 {
1215 end = bracketend(cc);
1216 if (end[-1 - LINK_SIZE] == OP_KET)
1217 end = NULL;
1218 }
1219 cc += bracketlen;
1220 }
1221 }
1222 *private_data_start = private_data_ptr;
1223 }
1224
1225 /* Returns with a frame_types (always < 0) if no need for frame. */
1226 static int get_framesize(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL recursive, BOOL *needs_control_head)
1227 {
1228 int length = 0;
1229 int possessive = 0;
1230 BOOL stack_restore = FALSE;
1231 BOOL setsom_found = recursive;
1232 BOOL setmark_found = recursive;
1233 /* The last capture is a local variable even for recursions. */
1234 BOOL capture_last_found = FALSE;
1235
1236 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
1237 SLJIT_ASSERT(common->control_head_ptr != 0);
1238 *needs_control_head = TRUE;
1239 #else
1240 *needs_control_head = FALSE;
1241 #endif
1242
1243 if (ccend == NULL)
1244 {
1245 ccend = bracketend(cc) - (1 + LINK_SIZE);
1246 if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))
1247 {
1248 possessive = length = (common->capture_last_ptr != 0) ? 5 : 3;
1249 /* This is correct regardless of common->capture_last_ptr. */
1250 capture_last_found = TRUE;
1251 }
1252 cc = next_opcode(common, cc);
1253 }
1254
1255 SLJIT_ASSERT(cc != NULL);
1256 while (cc < ccend)
1257 switch(*cc)
1258 {
1259 case OP_SET_SOM:
1260 SLJIT_ASSERT(common->has_set_som);
1261 stack_restore = TRUE;
1262 if (!setsom_found)
1263 {
1264 length += 2;
1265 setsom_found = TRUE;
1266 }
1267 cc += 1;
1268 break;
1269
1270 case OP_MARK:
1271 case OP_PRUNE_ARG:
1272 case OP_THEN_ARG:
1273 SLJIT_ASSERT(common->mark_ptr != 0);
1274 stack_restore = TRUE;
1275 if (!setmark_found)
1276 {
1277 length += 2;
1278 setmark_found = TRUE;
1279 }
1280 if (common->control_head_ptr != 0)
1281 *needs_control_head = TRUE;
1282 cc += 1 + 2 + cc[1];
1283 break;
1284
1285 case OP_RECURSE:
1286 stack_restore = TRUE;
1287 if (common->has_set_som && !setsom_found)
1288 {
1289 length += 2;
1290 setsom_found = TRUE;
1291 }
1292 if (common->mark_ptr != 0 && !setmark_found)
1293 {
1294 length += 2;
1295 setmark_found = TRUE;
1296 }
1297 if (common->capture_last_ptr != 0 && !capture_last_found)
1298 {
1299 length += 2;
1300 capture_last_found = TRUE;
1301 }
1302 cc += 1 + LINK_SIZE;
1303 break;
1304
1305 case OP_CBRA:
1306 case OP_CBRAPOS:
1307 case OP_SCBRA:
1308 case OP_SCBRAPOS:
1309 stack_restore = TRUE;
1310 if (common->capture_last_ptr != 0 && !capture_last_found)
1311 {
1312 length += 2;
1313 capture_last_found = TRUE;
1314 }
1315 length += 3;
1316 cc += 1 + LINK_SIZE + IMM2_SIZE;
1317 break;
1318
1319 default:
1320 stack_restore = TRUE;
1321 /* Fall through. */
1322
1323 case OP_NOT_WORD_BOUNDARY:
1324 case OP_WORD_BOUNDARY:
1325 case OP_NOT_DIGIT:
1326 case OP_DIGIT:
1327 case OP_NOT_WHITESPACE:
1328 case OP_WHITESPACE:
1329 case OP_NOT_WORDCHAR:
1330 case OP_WORDCHAR:
1331 case OP_ANY:
1332 case OP_ALLANY:
1333 case OP_ANYBYTE:
1334 case OP_NOTPROP:
1335 case OP_PROP:
1336 case OP_ANYNL:
1337 case OP_NOT_HSPACE:
1338 case OP_HSPACE:
1339 case OP_NOT_VSPACE:
1340 case OP_VSPACE:
1341 case OP_EXTUNI:
1342 case OP_EODN:
1343 case OP_EOD:
1344 case OP_CIRC:
1345 case OP_CIRCM:
1346 case OP_DOLL:
1347 case OP_DOLLM:
1348 case OP_CHAR:
1349 case OP_CHARI:
1350 case OP_NOT:
1351 case OP_NOTI:
1352
1353 case OP_EXACT:
1354 case OP_POSSTAR:
1355 case OP_POSPLUS:
1356 case OP_POSQUERY:
1357 case OP_POSUPTO:
1358
1359 case OP_EXACTI:
1360 case OP_POSSTARI:
1361 case OP_POSPLUSI:
1362 case OP_POSQUERYI:
1363 case OP_POSUPTOI:
1364
1365 case OP_NOTEXACT:
1366 case OP_NOTPOSSTAR:
1367 case OP_NOTPOSPLUS:
1368 case OP_NOTPOSQUERY:
1369 case OP_NOTPOSUPTO:
1370
1371 case OP_NOTEXACTI:
1372 case OP_NOTPOSSTARI:
1373 case OP_NOTPOSPLUSI:
1374 case OP_NOTPOSQUERYI:
1375 case OP_NOTPOSUPTOI:
1376
1377 case OP_TYPEEXACT:
1378 case OP_TYPEPOSSTAR:
1379 case OP_TYPEPOSPLUS:
1380 case OP_TYPEPOSQUERY:
1381 case OP_TYPEPOSUPTO:
1382
1383 case OP_CLASS:
1384 case OP_NCLASS:
1385 case OP_XCLASS:
1386
1387 cc = next_opcode(common, cc);
1388 SLJIT_ASSERT(cc != NULL);
1389 break;
1390 }
1391
1392 /* Possessive quantifiers can use a special case. */
1393 if (SLJIT_UNLIKELY(possessive == length))
1394 return stack_restore ? no_frame : no_stack;
1395
1396 if (length > 0)
1397 return length + 1;
1398 return stack_restore ? no_frame : no_stack;
1399 }
1400
1401 static void init_frame(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, int stackpos, int stacktop, BOOL recursive)
1402 {
1403 DEFINE_COMPILER;
1404 BOOL setsom_found = recursive;
1405 BOOL setmark_found = recursive;
1406 /* The last capture is a local variable even for recursions. */
1407 BOOL capture_last_found = FALSE;
1408 int offset;
1409
1410 /* >= 1 + shortest item size (2) */
1411 SLJIT_UNUSED_ARG(stacktop);
1412 SLJIT_ASSERT(stackpos >= stacktop + 2);
1413
1414 stackpos = STACK(stackpos);
1415 if (ccend == NULL)
1416 {
1417 ccend = bracketend(cc) - (1 + LINK_SIZE);
1418 if (recursive || (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS))
1419 cc = next_opcode(common, cc);
1420 }
1421
1422 SLJIT_ASSERT(cc != NULL);
1423 while (cc < ccend)
1424 switch(*cc)
1425 {
1426 case OP_SET_SOM:
1427 SLJIT_ASSERT(common->has_set_som);
1428 if (!setsom_found)
1429 {
1430 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
1431 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1432 stackpos += (int)sizeof(sljit_sw);
1433 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1434 stackpos += (int)sizeof(sljit_sw);
1435 setsom_found = TRUE;
1436 }
1437 cc += 1;
1438 break;
1439
1440 case OP_MARK:
1441 case OP_PRUNE_ARG:
1442 case OP_THEN_ARG:
1443 SLJIT_ASSERT(common->mark_ptr != 0);
1444 if (!setmark_found)
1445 {
1446 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
1447 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1448 stackpos += (int)sizeof(sljit_sw);
1449 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1450 stackpos += (int)sizeof(sljit_sw);
1451 setmark_found = TRUE;
1452 }
1453 cc += 1 + 2 + cc[1];
1454 break;
1455
1456 case OP_RECURSE:
1457 if (common->has_set_som && !setsom_found)
1458 {
1459 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
1460 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1461 stackpos += (int)sizeof(sljit_sw);
1462 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1463 stackpos += (int)sizeof(sljit_sw);
1464 setsom_found = TRUE;
1465 }
1466 if (common->mark_ptr != 0 && !setmark_found)
1467 {
1468 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
1469 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1470 stackpos += (int)sizeof(sljit_sw);
1471 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1472 stackpos += (int)sizeof(sljit_sw);
1473 setmark_found = TRUE;
1474 }
1475 if (common->capture_last_ptr != 0 && !capture_last_found)
1476 {
1477 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
1478 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1479 stackpos += (int)sizeof(sljit_sw);
1480 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1481 stackpos += (int)sizeof(sljit_sw);
1482 capture_last_found = TRUE;
1483 }
1484 cc += 1 + LINK_SIZE;
1485 break;
1486
1487 case OP_CBRA:
1488 case OP_CBRAPOS:
1489 case OP_SCBRA:
1490 case OP_SCBRAPOS:
1491 if (common->capture_last_ptr != 0 && !capture_last_found)
1492 {
1493 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
1494 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1495 stackpos += (int)sizeof(sljit_sw);
1496 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1497 stackpos += (int)sizeof(sljit_sw);
1498 capture_last_found = TRUE;
1499 }
1500 offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
1501 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));
1502 stackpos += (int)sizeof(sljit_sw);
1503 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
1504 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
1505 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1506 stackpos += (int)sizeof(sljit_sw);
1507 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
1508 stackpos += (int)sizeof(sljit_sw);
1509
1510 cc += 1 + LINK_SIZE + IMM2_SIZE;
1511 break;
1512
1513 default:
1514 cc = next_opcode(common, cc);
1515 SLJIT_ASSERT(cc != NULL);
1516 break;
1517 }
1518
1519 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, 0);
1520 SLJIT_ASSERT(stackpos == STACK(stacktop));
1521 }
1522
1523 static SLJIT_INLINE int get_private_data_copy_length(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL needs_control_head)
1524 {
1525 int private_data_length = needs_control_head ? 3 : 2;
1526 int size;
1527 pcre_uchar *alternative;
1528 /* Calculate the sum of the private machine words. */
1529 while (cc < ccend)
1530 {
1531 size = 0;
1532 switch(*cc)
1533 {
1534 case OP_KET:
1535 if (PRIVATE_DATA(cc) != 0)
1536 {
1537 private_data_length++;
1538 SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0);
1539 cc += PRIVATE_DATA(cc + 1);
1540 }
1541 cc += 1 + LINK_SIZE;
1542 break;
1543
1544 case OP_ASSERT:
1545 case OP_ASSERT_NOT:
1546 case OP_ASSERTBACK:
1547 case OP_ASSERTBACK_NOT:
1548 case OP_ONCE:
1549 case OP_ONCE_NC:
1550 case OP_BRAPOS:
1551 case OP_SBRA:
1552 case OP_SBRAPOS:
1553 case OP_SCOND:
1554 private_data_length++;
1555 SLJIT_ASSERT(PRIVATE_DATA(cc) != 0);
1556 cc += 1 + LINK_SIZE;
1557 break;
1558
1559 case OP_CBRA:
1560 case OP_SCBRA:
1561 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1562 private_data_length++;
1563 cc += 1 + LINK_SIZE + IMM2_SIZE;
1564 break;
1565
1566 case OP_CBRAPOS:
1567 case OP_SCBRAPOS:
1568 private_data_length += 2;
1569 cc += 1 + LINK_SIZE + IMM2_SIZE;
1570 break;
1571
1572 case OP_COND:
1573 /* Might be a hidden SCOND. */
1574 alternative = cc + GET(cc, 1);
1575 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1576 private_data_length++;
1577 cc += 1 + LINK_SIZE;
1578 break;
1579
1580 CASE_ITERATOR_PRIVATE_DATA_1
1581 if (PRIVATE_DATA(cc))
1582 private_data_length++;
1583 cc += 2;
1584 #ifdef SUPPORT_UTF
1585 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1586 #endif
1587 break;
1588
1589 CASE_ITERATOR_PRIVATE_DATA_2A
1590 if (PRIVATE_DATA(cc))
1591 private_data_length += 2;
1592 cc += 2;
1593 #ifdef SUPPORT_UTF
1594 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1595 #endif
1596 break;
1597
1598 CASE_ITERATOR_PRIVATE_DATA_2B
1599 if (PRIVATE_DATA(cc))
1600 private_data_length += 2;
1601 cc += 2 + IMM2_SIZE;
1602 #ifdef SUPPORT_UTF
1603 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1604 #endif
1605 break;
1606
1607 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1608 if (PRIVATE_DATA(cc))
1609 private_data_length++;
1610 cc += 1;
1611 break;
1612
1613 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1614 if (PRIVATE_DATA(cc))
1615 private_data_length += 2;
1616 cc += 1;
1617 break;
1618
1619 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1620 if (PRIVATE_DATA(cc))
1621 private_data_length += 2;
1622 cc += 1 + IMM2_SIZE;
1623 break;
1624
1625 case OP_CLASS:
1626 case OP_NCLASS:
1627 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1628 case OP_XCLASS:
1629 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1630 #else
1631 size = 1 + 32 / (int)sizeof(pcre_uchar);
1632 #endif
1633 if (PRIVATE_DATA(cc))
1634 private_data_length += get_class_iterator_size(cc + size);
1635 cc += size;
1636 break;
1637
1638 default:
1639 cc = next_opcode(common, cc);
1640 SLJIT_ASSERT(cc != NULL);
1641 break;
1642 }
1643 }
1644 SLJIT_ASSERT(cc == ccend);
1645 return private_data_length;
1646 }
1647
1648 static void copy_private_data(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend,
1649 BOOL save, int stackptr, int stacktop, BOOL needs_control_head)
1650 {
1651 DEFINE_COMPILER;
1652 int srcw[2];
1653 int count, size;
1654 BOOL tmp1next = TRUE;
1655 BOOL tmp1empty = TRUE;
1656 BOOL tmp2empty = TRUE;
1657 pcre_uchar *alternative;
1658 enum {
1659 start,
1660 loop,
1661 end
1662 } status;
1663
1664 status = save ? start : loop;
1665 stackptr = STACK(stackptr - 2);
1666 stacktop = STACK(stacktop - 1);
1667
1668 if (!save)
1669 {
1670 stackptr += (needs_control_head ? 2 : 1) * sizeof(sljit_sw);
1671 if (stackptr < stacktop)
1672 {
1673 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1674 stackptr += sizeof(sljit_sw);
1675 tmp1empty = FALSE;
1676 }
1677 if (stackptr < stacktop)
1678 {
1679 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1680 stackptr += sizeof(sljit_sw);
1681 tmp2empty = FALSE;
1682 }
1683 /* The tmp1next must be TRUE in either way. */
1684 }
1685
1686 do
1687 {
1688 count = 0;
1689 switch(status)
1690 {
1691 case start:
1692 SLJIT_ASSERT(save && common->recursive_head_ptr != 0);
1693 count = 1;
1694 srcw[0] = common->recursive_head_ptr;
1695 if (needs_control_head)
1696 {
1697 SLJIT_ASSERT(common->control_head_ptr != 0);
1698 count = 2;
1699 srcw[1] = common->control_head_ptr;
1700 }
1701 status = loop;
1702 break;
1703
1704 case loop:
1705 if (cc >= ccend)
1706 {
1707 status = end;
1708 break;
1709 }
1710
1711 switch(*cc)
1712 {
1713 case OP_KET:
1714 if (PRIVATE_DATA(cc) != 0)
1715 {
1716 count = 1;
1717 srcw[0] = PRIVATE_DATA(cc);
1718 SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0);
1719 cc += PRIVATE_DATA(cc + 1);
1720 }
1721 cc += 1 + LINK_SIZE;
1722 break;
1723
1724 case OP_ASSERT:
1725 case OP_ASSERT_NOT:
1726 case OP_ASSERTBACK:
1727 case OP_ASSERTBACK_NOT:
1728 case OP_ONCE:
1729 case OP_ONCE_NC:
1730 case OP_BRAPOS:
1731 case OP_SBRA:
1732 case OP_SBRAPOS:
1733 case OP_SCOND:
1734 count = 1;
1735 srcw[0] = PRIVATE_DATA(cc);
1736 SLJIT_ASSERT(srcw[0] != 0);
1737 cc += 1 + LINK_SIZE;
1738 break;
1739
1740 case OP_CBRA:
1741 case OP_SCBRA:
1742 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1743 {
1744 count = 1;
1745 srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1746 }
1747 cc += 1 + LINK_SIZE + IMM2_SIZE;
1748 break;
1749
1750 case OP_CBRAPOS:
1751 case OP_SCBRAPOS:
1752 count = 2;
1753 srcw[0] = PRIVATE_DATA(cc);
1754 srcw[1] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1755 SLJIT_ASSERT(srcw[0] != 0 && srcw[1] != 0);
1756 cc += 1 + LINK_SIZE + IMM2_SIZE;
1757 break;
1758
1759 case OP_COND:
1760 /* Might be a hidden SCOND. */
1761 alternative = cc + GET(cc, 1);
1762 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1763 {
1764 count = 1;
1765 srcw[0] = PRIVATE_DATA(cc);
1766 SLJIT_ASSERT(srcw[0] != 0);
1767 }
1768 cc += 1 + LINK_SIZE;
1769 break;
1770
1771 CASE_ITERATOR_PRIVATE_DATA_1
1772 if (PRIVATE_DATA(cc))
1773 {
1774 count = 1;
1775 srcw[0] = PRIVATE_DATA(cc);
1776 }
1777 cc += 2;
1778 #ifdef SUPPORT_UTF
1779 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1780 #endif
1781 break;
1782
1783 CASE_ITERATOR_PRIVATE_DATA_2A
1784 if (PRIVATE_DATA(cc))
1785 {
1786 count = 2;
1787 srcw[0] = PRIVATE_DATA(cc);
1788 srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
1789 }
1790 cc += 2;
1791 #ifdef SUPPORT_UTF
1792 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1793 #endif
1794 break;
1795
1796 CASE_ITERATOR_PRIVATE_DATA_2B
1797 if (PRIVATE_DATA(cc))
1798 {
1799 count = 2;
1800 srcw[0] = PRIVATE_DATA(cc);
1801 srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
1802 }
1803 cc += 2 + IMM2_SIZE;
1804 #ifdef SUPPORT_UTF
1805 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1806 #endif
1807 break;
1808
1809 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1810 if (PRIVATE_DATA(cc))
1811 {
1812 count = 1;
1813 srcw[0] = PRIVATE_DATA(cc);
1814 }
1815 cc += 1;
1816 break;
1817
1818 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1819 if (PRIVATE_DATA(cc))
1820 {
1821 count = 2;
1822 srcw[0] = PRIVATE_DATA(cc);
1823 srcw[1] = srcw[0] + sizeof(sljit_sw);
1824 }
1825 cc += 1;
1826 break;
1827
1828 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1829 if (PRIVATE_DATA(cc))
1830 {
1831 count = 2;
1832 srcw[0] = PRIVATE_DATA(cc);
1833 srcw[1] = srcw[0] + sizeof(sljit_sw);
1834 }
1835 cc += 1 + IMM2_SIZE;
1836 break;
1837
1838 case OP_CLASS:
1839 case OP_NCLASS:
1840 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1841 case OP_XCLASS:
1842 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1843 #else
1844 size = 1 + 32 / (int)sizeof(pcre_uchar);
1845 #endif
1846 if (PRIVATE_DATA(cc))
1847 switch(get_class_iterator_size(cc + size))
1848 {
1849 case 1:
1850 count = 1;
1851 srcw[0] = PRIVATE_DATA(cc);
1852 break;
1853
1854 case 2:
1855 count = 2;
1856 srcw[0] = PRIVATE_DATA(cc);
1857 srcw[1] = srcw[0] + sizeof(sljit_sw);
1858 break;
1859
1860 default:
1861 SLJIT_ASSERT_STOP();
1862 break;
1863 }
1864 cc += size;
1865 break;
1866
1867 default:
1868 cc = next_opcode(common, cc);
1869 SLJIT_ASSERT(cc != NULL);
1870 break;
1871 }
1872 break;
1873
1874 case end:
1875 SLJIT_ASSERT_STOP();
1876 break;
1877 }
1878
1879 while (count > 0)
1880 {
1881 count--;
1882 if (save)
1883 {
1884 if (tmp1next)
1885 {
1886 if (!tmp1empty)
1887 {
1888 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1889 stackptr += sizeof(sljit_sw);
1890 }
1891 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), srcw[count]);
1892 tmp1empty = FALSE;
1893 tmp1next = FALSE;
1894 }
1895 else
1896 {
1897 if (!tmp2empty)
1898 {
1899 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1900 stackptr += sizeof(sljit_sw);
1901 }
1902 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), srcw[count]);
1903 tmp2empty = FALSE;
1904 tmp1next = TRUE;
1905 }
1906 }
1907 else
1908 {
1909 if (tmp1next)
1910 {
1911 SLJIT_ASSERT(!tmp1empty);
1912 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), srcw[count], TMP1, 0);
1913 tmp1empty = stackptr >= stacktop;
1914 if (!tmp1empty)
1915 {
1916 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1917 stackptr += sizeof(sljit_sw);
1918 }
1919 tmp1next = FALSE;
1920 }
1921 else
1922 {
1923 SLJIT_ASSERT(!tmp2empty);
1924 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), srcw[count], TMP2, 0);
1925 tmp2empty = stackptr >= stacktop;
1926 if (!tmp2empty)
1927 {
1928 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1929 stackptr += sizeof(sljit_sw);
1930 }
1931 tmp1next = TRUE;
1932 }
1933 }
1934 }
1935 }
1936 while (status != end);
1937
1938 if (save)
1939 {
1940 if (tmp1next)
1941 {
1942 if (!tmp1empty)
1943 {
1944 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1945 stackptr += sizeof(sljit_sw);
1946 }
1947 if (!tmp2empty)
1948 {
1949 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1950 stackptr += sizeof(sljit_sw);
1951 }
1952 }
1953 else
1954 {
1955 if (!tmp2empty)
1956 {
1957 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1958 stackptr += sizeof(sljit_sw);
1959 }
1960 if (!tmp1empty)
1961 {
1962 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1963 stackptr += sizeof(sljit_sw);
1964 }
1965 }
1966 }
1967 SLJIT_ASSERT(cc == ccend && stackptr == stacktop && (save || (tmp1empty && tmp2empty)));
1968 }
1969
1970 static SLJIT_INLINE pcre_uchar *set_then_offsets(compiler_common *common, pcre_uchar *cc, pcre_uint8 *current_offset)
1971 {
1972 pcre_uchar *end = bracketend(cc);
1973 BOOL has_alternatives = cc[GET(cc, 1)] == OP_ALT;
1974
1975 /* Assert captures then. */
1976 if (*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT)
1977 current_offset = NULL;
1978 /* Conditional block does not. */
1979 if (*cc == OP_COND || *cc == OP_SCOND)
1980 has_alternatives = FALSE;
1981
1982 cc = next_opcode(common, cc);
1983 if (has_alternatives)
1984 current_offset = common->then_offsets + (cc - common->start);
1985
1986 while (cc < end)
1987 {
1988 if ((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND))
1989 cc = set_then_offsets(common, cc, current_offset);
1990 else
1991 {
1992 if (*cc == OP_ALT && has_alternatives)
1993 current_offset = common->then_offsets + (cc + 1 + LINK_SIZE - common->start);
1994 if (*cc >= OP_THEN && *cc <= OP_THEN_ARG && current_offset != NULL)
1995 *current_offset = 1;
1996 cc = next_opcode(common, cc);
1997 }
1998 }
1999
2000 return end;
2001 }
2002
2003 #undef CASE_ITERATOR_PRIVATE_DATA_1
2004 #undef CASE_ITERATOR_PRIVATE_DATA_2A
2005 #undef CASE_ITERATOR_PRIVATE_DATA_2B
2006 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_1
2007 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
2008 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
2009
2010 static SLJIT_INLINE BOOL is_powerof2(unsigned int value)
2011 {
2012 return (value & (value - 1)) == 0;
2013 }
2014
2015 static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label)
2016 {
2017 while (list)
2018 {
2019 /* sljit_set_label is clever enough to do nothing
2020 if either the jump or the label is NULL. */
2021 SET_LABEL(list->jump, label);
2022 list = list->next;
2023 }
2024 }
2025
2026 static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump *jump)
2027 {
2028 jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list));
2029 if (list_item)
2030 {
2031 list_item->next = *list;
2032 list_item->jump = jump;
2033 *list = list_item;
2034 }
2035 }
2036
2037 static void add_stub(compiler_common *common, struct sljit_jump *start)
2038 {
2039 DEFINE_COMPILER;
2040 stub_list *list_item = sljit_alloc_memory(compiler, sizeof(stub_list));
2041
2042 if (list_item)
2043 {
2044 list_item->start = start;
2045 list_item->quit = LABEL();
2046 list_item->next = common->stubs;
2047 common->stubs = list_item;
2048 }
2049 }
2050
2051 static void flush_stubs(compiler_common *common)
2052 {
2053 DEFINE_COMPILER;
2054 stub_list *list_item = common->stubs;
2055
2056 while (list_item)
2057 {
2058 JUMPHERE(list_item->start);
2059 add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));
2060 JUMPTO(SLJIT_JUMP, list_item->quit);
2061 list_item = list_item->next;
2062 }
2063 common->stubs = NULL;
2064 }
2065
2066 static void add_label_addr(compiler_common *common, sljit_uw *update_addr)
2067 {
2068 DEFINE_COMPILER;
2069 label_addr_list *label_addr;
2070
2071 label_addr = sljit_alloc_memory(compiler, sizeof(label_addr_list));
2072 if (label_addr == NULL)
2073 return;
2074 label_addr->label = LABEL();
2075 label_addr->update_addr = update_addr;
2076 label_addr->next = common->label_addrs;
2077 common->label_addrs = label_addr;
2078 }
2079
2080 static SLJIT_INLINE void count_match(compiler_common *common)
2081 {
2082 DEFINE_COMPILER;
2083
2084 OP2(SLJIT_SUB | SLJIT_SET_E, COUNT_MATCH, 0, COUNT_MATCH, 0, SLJIT_IMM, 1);
2085 add_jump(compiler, &common->calllimit, JUMP(SLJIT_ZERO));
2086 }
2087
2088 static SLJIT_INLINE void allocate_stack(compiler_common *common, int size)
2089 {
2090 /* May destroy all locals and registers except TMP2. */
2091 DEFINE_COMPILER;
2092
2093 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
2094 #ifdef DESTROY_REGISTERS
2095 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);
2096 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
2097 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
2098 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, TMP1, 0);
2099 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP1, 0);
2100 #endif
2101 add_stub(common, CMP(SLJIT_GREATER, STACK_TOP, 0, STACK_LIMIT, 0));
2102 }
2103
2104 static SLJIT_INLINE void free_stack(compiler_common *common, int size)
2105 {
2106 DEFINE_COMPILER;
2107 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
2108 }
2109
2110 static sljit_uw * allocate_read_only_data(compiler_common *common, sljit_uw size)
2111 {
2112 DEFINE_COMPILER;
2113 sljit_uw *result;
2114
2115 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
2116 return NULL;
2117
2118 result = (sljit_uw *)SLJIT_MALLOC(size + sizeof(sljit_uw), compiler->allocator_data);
2119 if (SLJIT_UNLIKELY(result == NULL))
2120 {
2121 sljit_set_compiler_memory_error(compiler);
2122 return NULL;
2123 }
2124
2125 *(void**)result = common->read_only_data_head;
2126 common->read_only_data_head = (void *)result;
2127 return result + 1;
2128 }
2129
2130 static void free_read_only_data(void *current, void *allocator_data)
2131 {
2132 void *next;
2133
2134 SLJIT_UNUSED_ARG(allocator_data);
2135
2136 while (current != NULL)
2137 {
2138 next = *(void**)current;
2139 SLJIT_FREE(current, allocator_data);
2140 current = next;
2141 }
2142 }
2143
2144 static SLJIT_INLINE void reset_ovector(compiler_common *common, int length)
2145 {
2146 DEFINE_COMPILER;
2147 struct sljit_label *loop;
2148 int i;
2149
2150 /* At this point we can freely use all temporary registers. */
2151 SLJIT_ASSERT(length > 1);
2152 /* TMP1 returns with begin - 1. */
2153 OP2(SLJIT_SUB, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_S0), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));
2154 if (length < 8)
2155 {
2156 for (i = 1; i < length; i++)
2157 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), SLJIT_R0, 0);
2158 }
2159 else
2160 {
2161 GET_LOCAL_BASE(SLJIT_R1, 0, OVECTOR_START);
2162 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, length - 1);
2163 loop = LABEL();
2164 OP1(SLJIT_MOVU, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw), SLJIT_R0, 0);
2165 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, 1);
2166 JUMPTO(SLJIT_NOT_ZERO, loop);
2167 }
2168 }
2169
2170 static SLJIT_INLINE void do_reset_match(compiler_common *common, int length)
2171 {
2172 DEFINE_COMPILER;
2173 struct sljit_label *loop;
2174 int i;
2175
2176 SLJIT_ASSERT(length > 1);
2177 /* OVECTOR(1) contains the "string begin - 1" constant. */
2178 if (length > 2)
2179 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
2180 if (length < 8)
2181 {
2182 for (i = 2; i < length; i++)
2183 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), TMP1, 0);
2184 }
2185 else
2186 {
2187 GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + sizeof(sljit_sw));
2188 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
2189 loop = LABEL();
2190 OP1(SLJIT_MOVU, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
2191 OP2(SLJIT_SUB | SLJIT_SET_E, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
2192 JUMPTO(SLJIT_NOT_ZERO, loop);
2193 }
2194
2195 OP1(SLJIT_MOV, STACK_TOP, 0, ARGUMENTS, 0);
2196 if (common->mark_ptr != 0)
2197 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, SLJIT_IMM, 0);
2198 if (common->control_head_ptr != 0)
2199 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
2200 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(jit_arguments, stack));
2201 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
2202 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(struct sljit_stack, base));
2203 }
2204
2205 static sljit_sw SLJIT_CALL do_search_mark(sljit_sw *current, const pcre_uchar *skip_arg)
2206 {
2207 while (current != NULL)
2208 {
2209 switch (current[-2])
2210 {
2211 case type_then_trap:
2212 break;
2213
2214 case type_mark:
2215 if (STRCMP_UC_UC(skip_arg, (pcre_uchar *)current[-3]) == 0)
2216 return current[-4];
2217 break;
2218
2219 default:
2220 SLJIT_ASSERT_STOP();
2221 break;
2222 }
2223 current = (sljit_sw*)current[-1];
2224 }
2225 return -1;
2226 }
2227
2228 static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)
2229 {
2230 DEFINE_COMPILER;
2231 struct sljit_label *loop;
2232 struct sljit_jump *early_quit;
2233
2234 /* At this point we can freely use all registers. */
2235 OP1(SLJIT_MOV, SLJIT_S2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
2236 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(1), STR_PTR, 0);
2237
2238 OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0);
2239 if (common->mark_ptr != 0)
2240 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
2241 OP1(SLJIT_MOV_SI, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, offset_count));
2242 if (common->mark_ptr != 0)
2243 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_R2, 0);
2244 OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, offsets), SLJIT_IMM, sizeof(int));
2245 OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, begin));
2246 GET_LOCAL_BASE(SLJIT_S0, 0, OVECTOR_START);
2247 /* Unlikely, but possible */
2248 early_quit = CMP(SLJIT_EQUAL, SLJIT_R1, 0, SLJIT_IMM, 0);
2249 loop = LABEL();
2250 OP2(SLJIT_SUB, SLJIT_S1, 0, SLJIT_MEM1(SLJIT_S0), 0, SLJIT_R0, 0);
2251 OP2(SLJIT_ADD, SLJIT_S0, 0, SLJIT_S0, 0, SLJIT_IMM, sizeof(sljit_sw));
2252 /* Copy the integer value to the output buffer */
2253 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2254 OP2(SLJIT_ASHR, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_IMM, UCHAR_SHIFT);
2255 #endif
2256 OP1(SLJIT_MOVU_SI, SLJIT_MEM1(SLJIT_R2), sizeof(int), SLJIT_S1, 0);
2257 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
2258 JUMPTO(SLJIT_NOT_ZERO, loop);
2259 JUMPHERE(early_quit);
2260
2261 /* Calculate the return value, which is the maximum ovector value. */
2262 if (topbracket > 1)
2263 {
2264 GET_LOCAL_BASE(SLJIT_R0, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_sw));
2265 OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, topbracket + 1);
2266
2267 /* OVECTOR(0) is never equal to SLJIT_S2. */
2268 loop = LABEL();
2269 OP1(SLJIT_MOVU, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), -(2 * (sljit_sw)sizeof(sljit_sw)));
2270 OP2(SLJIT_SUB, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
2271 CMPTO(SLJIT_EQUAL, SLJIT_R2, 0, SLJIT_S2, 0, loop);
2272 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_R1, 0);
2273 }
2274 else
2275 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
2276 }
2277
2278 static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *quit)
2279 {
2280 DEFINE_COMPILER;
2281 struct sljit_jump *jump;
2282
2283 SLJIT_COMPILE_ASSERT(STR_END == SLJIT_S1, str_end_must_be_saved_reg2);
2284 SLJIT_ASSERT(common->start_used_ptr != 0 && common->start_ptr != 0
2285 && (common->mode == JIT_PARTIAL_SOFT_COMPILE ? common->hit_start != 0 : common->hit_start == 0));
2286
2287 OP1(SLJIT_MOV, SLJIT_R1, 0, ARGUMENTS, 0);
2288 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_PARTIAL);
2289 OP1(SLJIT_MOV_SI, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, real_offset_count));
2290 CMPTO(SLJIT_SIG_LESS, SLJIT_R2, 0, SLJIT_IMM, 2, quit);
2291
2292 /* Store match begin and end. */
2293 OP1(SLJIT_MOV, SLJIT_S0, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, begin));
2294 OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, offsets));
2295
2296 jump = CMP(SLJIT_SIG_LESS, SLJIT_R2, 0, SLJIT_IMM, 3);
2297 OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_ptr : (common->hit_start + (int)sizeof(sljit_sw)), SLJIT_S0, 0);
2298 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2299 OP2(SLJIT_ASHR, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, UCHAR_SHIFT);
2300 #endif
2301 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_R1), 2 * sizeof(int), SLJIT_R2, 0);
2302 JUMPHERE(jump);
2303
2304 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_used_ptr : common->hit_start);
2305 OP2(SLJIT_SUB, SLJIT_S1, 0, STR_END, 0, SLJIT_S0, 0);
2306 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2307 OP2(SLJIT_ASHR, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_IMM, UCHAR_SHIFT);
2308 #endif
2309 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_R1), sizeof(int), SLJIT_S1, 0);
2310
2311 OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_S0, 0);
2312 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2313 OP2(SLJIT_ASHR, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, UCHAR_SHIFT);
2314 #endif
2315 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_R1), 0, SLJIT_R2, 0);
2316
2317 JUMPTO(SLJIT_JUMP, quit);
2318 }
2319
2320 static SLJIT_INLINE void check_start_used_ptr(compiler_common *common)
2321 {
2322 /* May destroy TMP1. */
2323 DEFINE_COMPILER;
2324 struct sljit_jump *jump;
2325
2326 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2327 {
2328 /* The value of -1 must be kept for start_used_ptr! */
2329 OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, 1);
2330 /* Jumps if start_used_ptr < STR_PTR, or start_used_ptr == -1. Although overwriting
2331 is not necessary if start_used_ptr == STR_PTR, it does not hurt as well. */
2332 jump = CMP(SLJIT_LESS_EQUAL, TMP1, 0, STR_PTR, 0);
2333 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2334 JUMPHERE(jump);
2335 }
2336 else if (common->mode == JIT_PARTIAL_HARD_COMPILE)
2337 {
2338 jump = CMP(SLJIT_LESS_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2339 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2340 JUMPHERE(jump);
2341 }
2342 }
2343
2344 static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, pcre_uchar *cc)
2345 {
2346 /* Detects if the character has an othercase. */
2347 unsigned int c;
2348
2349 #ifdef SUPPORT_UTF
2350 if (common->utf)
2351 {
2352 GETCHAR(c, cc);
2353 if (c > 127)
2354 {
2355 #ifdef SUPPORT_UCP
2356 return c != UCD_OTHERCASE(c);
2357 #else
2358 return FALSE;
2359 #endif
2360 }
2361 #ifndef COMPILE_PCRE8
2362 return common->fcc[c] != c;
2363 #endif
2364 }
2365 else
2366 #endif
2367 c = *cc;
2368 return MAX_255(c) ? common->fcc[c] != c : FALSE;
2369 }
2370
2371 static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)
2372 {
2373 /* Returns with the othercase. */
2374 #ifdef SUPPORT_UTF
2375 if (common->utf && c > 127)
2376 {
2377 #ifdef SUPPORT_UCP
2378 return UCD_OTHERCASE(c);
2379 #else
2380 return c;
2381 #endif
2382 }
2383 #endif
2384 return TABLE_GET(c, common->fcc, c);
2385 }
2386
2387 static unsigned int char_get_othercase_bit(compiler_common *common, pcre_uchar *cc)
2388 {
2389 /* Detects if the character and its othercase has only 1 bit difference. */
2390 unsigned int c, oc, bit;
2391 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2392 int n;
2393 #endif
2394
2395 #ifdef SUPPORT_UTF
2396 if (common->utf)
2397 {
2398 GETCHAR(c, cc);
2399 if (c <= 127)
2400 oc = common->fcc[c];
2401 else
2402 {
2403 #ifdef SUPPORT_UCP
2404 oc = UCD_OTHERCASE(c);
2405 #else
2406 oc = c;
2407 #endif
2408 }
2409 }
2410 else
2411 {
2412 c = *cc;
2413 oc = TABLE_GET(c, common->fcc, c);
2414 }
2415 #else
2416 c = *cc;
2417 oc = TABLE_GET(c, common->fcc, c);
2418 #endif
2419
2420 SLJIT_ASSERT(c != oc);
2421
2422 bit = c ^ oc;
2423 /* Optimized for English alphabet. */
2424 if (c <= 127 && bit == 0x20)
2425 return (0 << 8) | 0x20;
2426
2427 /* Since c != oc, they must have at least 1 bit difference. */
2428 if (!is_powerof2(bit))
2429 return 0;
2430
2431 #if defined COMPILE_PCRE8
2432
2433 #ifdef SUPPORT_UTF
2434 if (common->utf && c > 127)
2435 {
2436 n = GET_EXTRALEN(*cc);
2437 while ((bit & 0x3f) == 0)
2438 {
2439 n--;
2440 bit >>= 6;
2441 }
2442 return (n << 8) | bit;
2443 }
2444 #endif /* SUPPORT_UTF */
2445 return (0 << 8) | bit;
2446
2447 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2448
2449 #ifdef SUPPORT_UTF
2450 if (common->utf && c > 65535)
2451 {
2452 if (bit >= (1 << 10))
2453 bit >>= 10;
2454 else
2455 return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8));
2456 }
2457 #endif /* SUPPORT_UTF */
2458 return (bit < 256) ? ((0 << 8) | bit) : ((1 << 8) | (bit >> 8));
2459
2460 #endif /* COMPILE_PCRE[8|16|32] */
2461 }
2462
2463 static void check_partial(compiler_common *common, BOOL force)
2464 {
2465 /* Checks whether a partial matching is occurred. Does not modify registers. */
2466 DEFINE_COMPILER;
2467 struct sljit_jump *jump = NULL;
2468
2469 SLJIT_ASSERT(!force || common->mode != JIT_COMPILE);
2470
2471 if (common->mode == JIT_COMPILE)
2472 return;
2473
2474 if (!force)
2475 jump = CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2476 else if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2477 jump = CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1);
2478
2479 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2480 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
2481 else
2482 {
2483 if (common->partialmatchlabel != NULL)
2484 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2485 else
2486 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2487 }
2488
2489 if (jump != NULL)
2490 JUMPHERE(jump);
2491 }
2492
2493 static void check_str_end(compiler_common *common, jump_list **end_reached)
2494 {
2495 /* Does not affect registers. Usually used in a tight spot. */
2496 DEFINE_COMPILER;
2497 struct sljit_jump *jump;
2498
2499 if (common->mode == JIT_COMPILE)
2500 {
2501 add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2502 return;
2503 }
2504
2505 jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
2506 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2507 {
2508 add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
2509 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
2510 add_jump(compiler, end_reached, JUMP(SLJIT_JUMP));
2511 }
2512 else
2513 {
2514 add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
2515 if (common->partialmatchlabel != NULL)
2516 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2517 else
2518 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2519 }
2520 JUMPHERE(jump);
2521 }
2522
2523 static void detect_partial_match(compiler_common *common, jump_list **backtracks)
2524 {
2525 DEFINE_COMPILER;
2526 struct sljit_jump *jump;
2527
2528 if (common->mode == JIT_COMPILE)
2529 {
2530 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2531 return;
2532 }
2533
2534 /* Partial matching mode. */
2535 jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
2536 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
2537 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2538 {
2539 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
2540 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
2541 }
2542 else
2543 {
2544 if (common->partialmatchlabel != NULL)
2545 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2546 else
2547 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2548 }
2549 JUMPHERE(jump);
2550 }
2551
2552 static void peek_char(compiler_common *common, pcre_uint32 max)
2553 {
2554 /* Reads the character into TMP1, keeps STR_PTR.
2555 Does not check STR_END. TMP2 Destroyed. */
2556 DEFINE_COMPILER;
2557 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2558 struct sljit_jump *jump;
2559 #endif
2560
2561 SLJIT_UNUSED_ARG(max);
2562
2563 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2564 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2565 if (common->utf)
2566 {
2567 if (max < 128) return;
2568
2569 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2570 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2571 add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2572 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2573 JUMPHERE(jump);
2574 }
2575 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2576
2577 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2578 if (common->utf)
2579 {
2580 if (max < 0xd800) return;
2581
2582 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2583 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2584 /* TMP2 contains the high surrogate. */
2585 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2586 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x40);
2587 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
2588 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
2589 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2590 JUMPHERE(jump);
2591 }
2592 #endif
2593 }
2594
2595 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2596
2597 static BOOL is_char7_bitset(const pcre_uint8 *bitset, BOOL nclass)
2598 {
2599 /* Tells whether the character codes below 128 are enough
2600 to determine a match. */
2601 const pcre_uint8 value = nclass ? 0xff : 0;
2602 const pcre_uint8 *end = bitset + 32;
2603
2604 bitset += 16;
2605 do
2606 {
2607 if (*bitset++ != value)
2608 return FALSE;
2609 }
2610 while (bitset < end);
2611 return TRUE;
2612 }
2613
2614 static void read_char7_type(compiler_common *common, BOOL full_read)
2615 {
2616 /* Reads the precise character type of a character into TMP1, if the character
2617 is less than 128. Otherwise it returns with zero. Does not check STR_END. The
2618 full_read argument tells whether characters above max are accepted or not. */
2619 DEFINE_COMPILER;
2620 struct sljit_jump *jump;
2621
2622 SLJIT_ASSERT(common->utf);
2623
2624 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2625 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2626
2627 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2628
2629 if (full_read)
2630 {
2631 jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
2632 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2633 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2634 JUMPHERE(jump);
2635 }
2636 }
2637
2638 #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
2639
2640 static void read_char_range(compiler_common *common, pcre_uint32 min, pcre_uint32 max, BOOL update_str_ptr)
2641 {
2642 /* Reads the precise value of a character into TMP1, if the character is
2643 between min and max (c >= min && c <= max). Otherwise it returns with a value
2644 outside the range. Does not check STR_END. */
2645 DEFINE_COMPILER;
2646 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2647 struct sljit_jump *jump;
2648 #endif
2649 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2650 struct sljit_jump *jump2;
2651 #endif
2652
2653 SLJIT_UNUSED_ARG(update_str_ptr);
2654 SLJIT_UNUSED_ARG(min);
2655 SLJIT_UNUSED_ARG(max);
2656 SLJIT_ASSERT(min <= max);
2657
2658 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2659 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2660
2661 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2662 if (common->utf)
2663 {
2664 if (max < 128 && !update_str_ptr) return;
2665
2666 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2667 if (min >= 0x10000)
2668 {
2669 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xf0);
2670 if (update_str_ptr)
2671 OP1(SLJIT_MOV_UB, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2672 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2673 jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x7);
2674 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2675 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2676 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2677 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2678 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2679 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2680 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2681 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2682 if (!update_str_ptr)
2683 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
2684 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2685 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2686 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2687 JUMPHERE(jump2);
2688 if (update_str_ptr)
2689 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
2690 }
2691 else if (min >= 0x800 && max <= 0xffff)
2692 {
2693 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xe0);
2694 if (update_str_ptr)
2695 OP1(SLJIT_MOV_UB, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2696 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2697 jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xf);
2698 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2699 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2700 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2701 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2702 if (!update_str_ptr)
2703 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
2704 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2705 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2706 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2707 JUMPHERE(jump2);
2708 if (update_str_ptr)
2709 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
2710 }
2711 else if (max >= 0x800)
2712 add_jump(compiler, (max < 0x10000) ? &common->utfreadchar16 : &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2713 else if (max < 128)
2714 {
2715 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2716 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2717 }
2718 else
2719 {
2720 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2721 if (!update_str_ptr)
2722 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2723 else
2724 OP1(SLJIT_MOV_UB, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2725 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2726 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2727 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2728 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2729 if (update_str_ptr)
2730 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
2731 }
2732 JUMPHERE(jump);
2733 }
2734 #endif
2735
2736 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2737 if (common->utf)
2738 {
2739 if (max >= 0x10000)
2740 {
2741 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2742 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2743 /* TMP2 contains the high surrogate. */
2744 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2745 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x40);
2746 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
2747 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2748 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
2749 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2750 JUMPHERE(jump);
2751 return;
2752 }
2753
2754 if (max < 0xd800 && !update_str_ptr) return;
2755
2756 /* Skip low surrogate if necessary. */
2757 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2758 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2759 if (update_str_ptr)
2760 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2761 if (max >= 0xd800)
2762 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x10000);
2763 JUMPHERE(jump);
2764 }
2765 #endif
2766 }
2767
2768 static SLJIT_INLINE void read_char(compiler_common *common)
2769 {
2770 read_char_range(common, 0, READ_CHAR_MAX, TRUE);
2771 }
2772
2773 static void read_char8_type(compiler_common *common, BOOL update_str_ptr)
2774 {
2775 /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */
2776 DEFINE_COMPILER;
2777 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
2778 struct sljit_jump *jump;
2779 #endif
2780 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2781 struct sljit_jump *jump2;
2782 #endif
2783
2784 SLJIT_UNUSED_ARG(update_str_ptr);
2785
2786 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2787 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2788
2789 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2790 if (common->utf)
2791 {
2792 /* This can be an extra read in some situations, but hopefully
2793 it is needed in most cases. */
2794 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2795 jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
2796 if (!update_str_ptr)
2797 {
2798 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2799 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2800 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2801 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2802 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2803 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
2804 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2805 jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
2806 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2807 JUMPHERE(jump2);
2808 }
2809 else
2810 add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));
2811 JUMPHERE(jump);
2812 return;
2813 }
2814 #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
2815
2816 #if !defined COMPILE_PCRE8
2817 /* The ctypes array contains only 256 values. */
2818 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2819 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
2820 #endif
2821 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2822 #if !defined COMPILE_PCRE8
2823 JUMPHERE(jump);
2824 #endif
2825
2826 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2827 if (common->utf && update_str_ptr)
2828 {
2829 /* Skip low surrogate if necessary. */
2830 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);
2831 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2832 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2833 JUMPHERE(jump);
2834 }
2835 #endif /* SUPPORT_UTF && COMPILE_PCRE16 */
2836 }
2837
2838 static void skip_char_back(compiler_common *common)
2839 {
2840 /* Goes one character back. Affects STR_PTR and TMP1. Does not check begin. */
2841 DEFINE_COMPILER;
2842 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2843 #if defined COMPILE_PCRE8
2844 struct sljit_label *label;
2845
2846 if (common->utf)
2847 {
2848 label = LABEL();
2849 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2850 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2851 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
2852 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);
2853 return;
2854 }
2855 #elif defined COMPILE_PCRE16
2856 if (common->utf)
2857 {
2858 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2859 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2860 /* Skip low surrogate if necessary. */
2861 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2862 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
2863 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
2864 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2865 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2866 return;
2867 }
2868 #endif /* COMPILE_PCRE[8|16] */
2869 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2870 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2871 }
2872
2873 static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpifmatch)
2874 {
2875 /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */
2876 DEFINE_COMPILER;
2877 struct sljit_jump *jump;
2878
2879 if (nltype == NLTYPE_ANY)
2880 {
2881 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
2882 add_jump(compiler, backtracks, JUMP(jumpifmatch ? SLJIT_NOT_ZERO : SLJIT_ZERO));
2883 }
2884 else if (nltype == NLTYPE_ANYCRLF)
2885 {
2886 if (jumpifmatch)
2887 {
2888 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR));
2889 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
2890 }
2891 else
2892 {
2893 jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
2894 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
2895 JUMPHERE(jump);
2896 }
2897 }
2898 else
2899 {
2900 SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);
2901 add_jump(compiler, backtracks, CMP(jumpifmatch ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
2902 }
2903 }
2904
2905 #ifdef SUPPORT_UTF
2906
2907 #if defined COMPILE_PCRE8
2908 static void do_utfreadchar(compiler_common *common)
2909 {
2910 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
2911 of the character (>= 0xc0). Return char value in TMP1, length in TMP2. */
2912 DEFINE_COMPILER;
2913 struct sljit_jump *jump;
2914
2915 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2916 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2917 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2918 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2919 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2920 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2921
2922 /* Searching for the first zero. */
2923 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
2924 jump = JUMP(SLJIT_NOT_ZERO);
2925 /* Two byte sequence. */
2926 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2927 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(2));
2928 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2929
2930 JUMPHERE(jump);
2931 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2932 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x800);
2933 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2934 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2935 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2936
2937 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10000);
2938 jump = JUMP(SLJIT_NOT_ZERO);
2939 /* Three byte sequence. */
2940 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
2941 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));
2942 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2943
2944 /* Four byte sequence. */
2945 JUMPHERE(jump);
2946 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2947 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
2948 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2949 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
2950 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2951 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2952 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(4));
2953 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2954 }
2955
2956 static void do_utfreadchar16(compiler_common *common)
2957 {
2958 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
2959 of the character (>= 0xc0). Return value in TMP1. */
2960 DEFINE_COMPILER;
2961 struct sljit_jump *jump;
2962
2963 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2964 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2965 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2966 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2967 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2968 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2969
2970 /* Searching for the first zero. */
2971 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
2972 jump = JUMP(SLJIT_NOT_ZERO);
2973 /* Two byte sequence. */
2974 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2975 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2976
2977 JUMPHERE(jump);
2978 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x400);
2979 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_NOT_ZERO);
2980 /* This code runs only in 8 bit mode. No need to shift the value. */
2981 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2982 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2983 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x800);
2984 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2985 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2986 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2987 /* Three byte sequence. */
2988 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
2989 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2990 }
2991
2992 static void do_utfreadtype8(compiler_common *common)
2993 {
2994 /* Fast decoding a UTF-8 character type. TMP2 contains the first byte
2995 of the character (>= 0xc0). Return value in TMP1. */
2996 DEFINE_COMPILER;
2997 struct sljit_jump *jump;
2998 struct sljit_jump *compare;
2999
3000 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3001
3002 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20);
3003 jump = JUMP(SLJIT_NOT_ZERO);
3004 /* Two byte sequence. */
3005 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3006 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3007 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
3008 /* The upper 5 bits are known at this point. */
3009 compare = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x3);
3010 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
3011 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3012 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
3013 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
3014 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3015
3016 JUMPHERE(compare);
3017 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
3018 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3019
3020 /* We only have types for characters less than 256. */
3021 JUMPHERE(jump);
3022 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3023 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
3024 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3025 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3026 }
3027
3028 #endif /* COMPILE_PCRE8 */
3029
3030 #endif /* SUPPORT_UTF */
3031
3032 #ifdef SUPPORT_UCP
3033
3034 /* UCD_BLOCK_SIZE must be 128 (see the assert below). */
3035 #define UCD_BLOCK_MASK 127
3036 #define UCD_BLOCK_SHIFT 7
3037
3038 static void do_getucd(compiler_common *common)
3039 {
3040 /* Search the UCD record for the character comes in TMP1.
3041 Returns chartype in TMP1 and UCD offset in TMP2. */
3042 DEFINE_COMPILER;
3043
3044 SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 8);
3045
3046 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3047 OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
3048 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
3049 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
3050 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
3051 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
3052 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
3053 OP1(SLJIT_MOV_UH, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
3054 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
3055 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
3056 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3057 }
3058 #endif
3059
3060 static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common, BOOL hascrorlf, BOOL firstline)
3061 {
3062 DEFINE_COMPILER;
3063 struct sljit_label *mainloop;
3064 struct sljit_label *newlinelabel = NULL;
3065 struct sljit_jump *start;
3066 struct sljit_jump *end = NULL;
3067 struct sljit_jump *nl = NULL;
3068 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3069 struct sljit_jump *singlechar;
3070 #endif
3071 jump_list *newline = NULL;
3072 BOOL newlinecheck = FALSE;
3073 BOOL readuchar = FALSE;
3074
3075 if (!(hascrorlf || firstline) && (common->nltype == NLTYPE_ANY ||
3076 common->nltype == NLTYPE_ANYCRLF || common->newline > 255))
3077 newlinecheck = TRUE;
3078
3079 if (firstline)
3080 {
3081 /* Search for the end of the first line. */
3082 SLJIT_ASSERT(common->first_line_end != 0);
3083 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
3084
3085 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3086 {
3087 mainloop = LABEL();
3088 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3089 end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3090 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
3091 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3092 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);
3093 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);
3094 JUMPHERE(end);
3095 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_SP), common->first_line_end, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3096 }
3097 else
3098 {
3099 end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3100 mainloop = LABEL();
3101 /* Continual stores does not cause data dependency. */
3102 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->first_line_end, STR_PTR, 0);
3103 read_char_range(common, common->nlmin, common->nlmax, TRUE);
3104 check_newlinechar(common, common->nltype, &newline, TRUE);
3105 CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, mainloop);
3106 JUMPHERE(end);
3107 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->first_line_end, STR_PTR, 0);
3108 set_jumps(newline, LABEL());
3109 }
3110
3111 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
3112 }
3113
3114 start = JUMP(SLJIT_JUMP);
3115
3116 if (newlinecheck)
3117 {
3118 newlinelabel = LABEL();
3119 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3120 end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3121 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3122 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
3123 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
3124 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3125 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
3126 #endif
3127 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3128 nl = JUMP(SLJIT_JUMP);
3129 }
3130
3131 mainloop = LABEL();
3132
3133 /* Increasing the STR_PTR here requires one less jump in the most common case. */
3134 #ifdef SUPPORT_UTF
3135 if (common->utf) readuchar = TRUE;
3136 #endif
3137 if (newlinecheck) readuchar = TRUE;
3138
3139 if (readuchar)
3140 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3141
3142 if (newlinecheck)
3143 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);
3144
3145 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3146 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3147 #if defined COMPILE_PCRE8
3148 if (common->utf)
3149 {
3150 singlechar = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
3151 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3152 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3153 JUMPHERE(singlechar);
3154 }
3155 #elif defined COMPILE_PCRE16
3156 if (common->utf)
3157 {
3158 singlechar = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
3159 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
3160 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3161 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
3162 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3163 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3164 JUMPHERE(singlechar);
3165 }
3166 #endif /* COMPILE_PCRE[8|16] */
3167 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
3168 JUMPHERE(start);
3169
3170 if (newlinecheck)
3171 {
3172 JUMPHERE(end);
3173 JUMPHERE(nl);
3174 }
3175
3176 return mainloop;
3177 }
3178
3179 #define MAX_N_CHARS 16
3180 #define MAX_N_BYTES 8
3181
3182 static SLJIT_INLINE void add_prefix_byte(pcre_uint8 byte, pcre_uint8 *bytes)
3183 {
3184 pcre_uint8 len = bytes[0];
3185 int i;
3186
3187 if (len == 255)
3188 return;
3189
3190 if (len == 0)
3191 {
3192 bytes[0] = 1;
3193 bytes[1] = byte;
3194 return;
3195 }
3196
3197 for (i = len; i > 0; i--)
3198 if (bytes[i] == byte)
3199 return;
3200
3201 if (len >= MAX_N_BYTES - 1)
3202 {
3203 bytes[0] = 255;
3204 return;
3205 }
3206
3207 len++;
3208 bytes[len] = byte;
3209 bytes[0] = len;
3210 }
3211
3212 static int scan_prefix(compiler_common *common, pcre_uchar *cc, pcre_uint32 *chars, pcre_uint8 *bytes, int max_chars)
3213 {
3214 /* Recursive function, which scans prefix literals. */
3215 BOOL last, any, caseless;
3216 int len, repeat, len_save, consumed = 0;
3217 pcre_uint32 chr, mask;
3218 pcre_uchar *alternative, *cc_save, *oc;
3219 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3220 pcre_uchar othercase[8];
3221 #elif defined SUPPORT_UTF && defined COMPILE_PCRE16
3222 pcre_uchar othercase[2];
3223 #else
3224 pcre_uchar othercase[1];
3225 #endif
3226
3227 repeat = 1;
3228 while (TRUE)
3229 {
3230 last = TRUE;
3231 any = FALSE;
3232 caseless = FALSE;
3233 switch (*cc)
3234 {
3235 case OP_CHARI:
3236 caseless = TRUE;
3237 case OP_CHAR:
3238 last = FALSE;
3239 cc++;
3240 break;
3241
3242 case OP_SOD:
3243 case OP_SOM:
3244 case OP_SET_SOM:
3245 case OP_NOT_WORD_BOUNDARY:
3246 case OP_WORD_BOUNDARY:
3247 case OP_EODN:
3248 case OP_EOD:
3249 case OP_CIRC:
3250 case OP_CIRCM:
3251 case OP_DOLL:
3252 case OP_DOLLM:
3253 /* Zero width assertions. */
3254 cc++;
3255 continue;
3256
3257 case OP_ASSERT:
3258 case OP_ASSERT_NOT:
3259 case OP_ASSERTBACK:
3260 case OP_ASSERTBACK_NOT:
3261 cc = bracketend(cc);
3262 continue;
3263
3264 case OP_PLUSI:
3265 case OP_MINPLUSI:
3266 case OP_POSPLUSI:
3267 caseless = TRUE;
3268 case OP_PLUS:
3269 case OP_MINPLUS:
3270 case OP_POSPLUS:
3271 cc++;
3272 break;
3273
3274 case OP_EXACTI:
3275 caseless = TRUE;
3276 case OP_EXACT:
3277 repeat = GET2(cc, 1);
3278 last = FALSE;
3279 cc += 1 + IMM2_SIZE;
3280 break;
3281
3282 case OP_QUERYI:
3283 case OP_MINQUERYI:
3284 case OP_POSQUERYI:
3285 caseless = TRUE;
3286 case OP_QUERY:
3287 case OP_MINQUERY:
3288 case OP_POSQUERY:
3289 len = 1;
3290 cc++;
3291 #ifdef SUPPORT_UTF
3292 if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
3293 #endif
3294 max_chars = scan_prefix(common, cc + len, chars, bytes, max_chars);
3295 if (max_chars == 0)
3296 return consumed;
3297 last = FALSE;
3298 break;
3299
3300 case OP_KET:
3301 cc += 1 + LINK_SIZE;
3302 continue;
3303
3304 case OP_ALT:
3305 cc += GET(cc, 1);
3306 continue;
3307
3308 case OP_ONCE:
3309 case OP_ONCE_NC:
3310 case OP_BRA:
3311 case OP_BRAPOS:
3312 case OP_CBRA:
3313 case OP_CBRAPOS:
3314 alternative = cc + GET(cc, 1);
3315 while (*alternative == OP_ALT)
3316 {
3317 max_chars = scan_prefix(common, alternative + 1 + LINK_SIZE, chars, bytes, max_chars);
3318 if (max_chars == 0)
3319 return consumed;
3320 alternative += GET(alternative, 1);
3321 }
3322
3323 if (*cc == OP_CBRA || *cc == OP_CBRAPOS)
3324 cc += IMM2_SIZE;
3325 cc += 1 + LINK_SIZE;
3326 continue;
3327
3328 case OP_CLASS:
3329 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3330 if (common->utf && !is_char7_bitset((const pcre_uint8 *)(cc + 1), FALSE)) return consumed;
3331 #endif
3332 any = TRUE;
3333 cc += 1 + 32 / sizeof(pcre_uchar);
3334 break;
3335
3336 case OP_NCLASS:
3337 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3338 if (common->utf) return consumed;
3339 #endif
3340 any = TRUE;
3341 cc += 1 + 32 / sizeof(pcre_uchar);
3342 break;
3343
3344 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3345 case OP_XCLASS:
3346 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3347 if (common->utf) return consumed;
3348 #endif
3349 any = TRUE;
3350 cc += GET(cc, 1);
3351 break;
3352 #endif
3353
3354 case OP_DIGIT:
3355 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3356 if (common->utf && !is_char7_bitset((const pcre_uint8 *)common->ctypes - cbit_length + cbit_digit, FALSE))
3357 return consumed;
3358 #endif
3359 any = TRUE;
3360 cc++;
3361 break;
3362
3363 case OP_WHITESPACE:
3364 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3365 if (common->utf && !is_char7_bitset((const pcre_uint8 *)common->ctypes - cbit_length + cbit_space, FALSE))
3366 return consumed;
3367 #endif
3368 any = TRUE;
3369 cc++;
3370 break;
3371
3372 case OP_WORDCHAR:
3373 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3374 if (common->utf && !is_char7_bitset((const pcre_uint8 *)common->ctypes - cbit_length + cbit_word, FALSE))
3375 return consumed;
3376 #endif
3377 any = TRUE;
3378 cc++;
3379 break;
3380
3381 case OP_NOT:
3382 case OP_NOTI:
3383 cc++;
3384 /* Fall through. */
3385 case OP_NOT_DIGIT:
3386 case OP_NOT_WHITESPACE:
3387 case OP_NOT_WORDCHAR:
3388 case OP_ANY:
3389 case OP_ALLANY:
3390 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3391 if (common->utf) return consumed;
3392 #endif
3393 any = TRUE;
3394 cc++;
3395 break;
3396
3397 #ifdef SUPPORT_UCP
3398 case OP_NOTPROP:
3399 case OP_PROP:
3400 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3401 if (common->utf) return consumed;
3402 #endif
3403 any = TRUE;
3404 cc += 1 + 2;
3405 break;
3406 #endif
3407
3408 case OP_TYPEEXACT:
3409 repeat = GET2(cc, 1);
3410 cc += 1 + IMM2_SIZE;
3411 continue;
3412
3413 case OP_NOTEXACT:
3414 case OP_NOTEXACTI:
3415 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3416 if (common->utf) return consumed;
3417 #endif
3418 any = TRUE;
3419 repeat = GET2(cc, 1);
3420 cc += 1 + IMM2_SIZE + 1;
3421 break;
3422
3423 default:
3424 return consumed;
3425 }
3426
3427 if (any)
3428 {
3429 #if defined COMPILE_PCRE8
3430 mask = 0xff;
3431 #elif defined COMPILE_PCRE16
3432 mask = 0xffff;
3433 #elif defined COMPILE_PCRE32
3434 mask = 0xffffffff;
3435 #else
3436 SLJIT_ASSERT_STOP();
3437 #endif
3438
3439 do
3440 {
3441 chars[0] = mask;
3442 chars[1] = mask;
3443 bytes[0] = 255;
3444
3445 consumed++;
3446 if (--max_chars == 0)
3447 return consumed;
3448 chars += 2;
3449 bytes += MAX_N_BYTES;
3450 }
3451 while (--repeat > 0);
3452
3453 repeat = 1;
3454 continue;
3455 }
3456
3457 len = 1;
3458 #ifdef SUPPORT_UTF
3459 if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
3460 #endif
3461
3462 if (caseless && char_has_othercase(common, cc))
3463 {
3464 #ifdef SUPPORT_UTF
3465 if (common->utf)
3466 {
3467 GETCHAR(chr, cc);
3468 if ((int)PRIV(ord2utf)(char_othercase(common, chr), othercase) != len)
3469 return consumed;
3470 }
3471 else
3472 #endif
3473 {
3474 chr = *cc;
3475 othercase[0] = TABLE_GET(chr, common->fcc, chr);
3476 }
3477 }
3478 else
3479 caseless = FALSE;
3480
3481 len_save = len;
3482 cc_save = cc;
3483 while (TRUE)
3484 {
3485 oc = othercase;
3486 do
3487 {
3488 chr = *cc;
3489 #ifdef COMPILE_PCRE32
3490 if (SLJIT_UNLIKELY(chr == NOTACHAR))
3491 return consumed;
3492 #endif
3493 add_prefix_byte((pcre_uint8)chr, bytes);
3494
3495 mask = 0;
3496 if (caseless)
3497 {
3498 add_prefix_byte((pcre_uint8)*oc, bytes);
3499 mask = *cc ^ *oc;
3500 chr |= mask;
3501 }
3502
3503 #ifdef COMPILE_PCRE32
3504 if (chars[0] == NOTACHAR && chars[1] == 0)
3505 #else
3506 if (chars[0] == NOTACHAR)
3507 #endif
3508 {
3509 chars[0] = chr;
3510 chars[1] = mask;
3511 }
3512 else
3513 {
3514 mask |= chars[0] ^ chr;
3515 chr |= mask;
3516 chars[0] = chr;
3517 chars[1] |= mask;
3518 }
3519
3520 len--;
3521 consumed++;
3522 if (--max_chars == 0)
3523 return consumed;
3524 chars += 2;
3525 bytes += MAX_N_BYTES;
3526 cc++;
3527 oc++;
3528 }
3529 while (len > 0);
3530
3531 if (--repeat == 0)
3532 break;
3533
3534 len = len_save;
3535 cc = cc_save;
3536 }
3537
3538 repeat = 1;
3539 if (last)
3540 return consumed;
3541 }
3542 }
3543
3544 static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common, BOOL firstline)
3545 {
3546 DEFINE_COMPILER;
3547 struct sljit_label *start;
3548 struct sljit_jump *quit;
3549 pcre_uint32 chars[MAX_N_CHARS * 2];
3550 pcre_uint8 bytes[MAX_N_CHARS * MAX_N_BYTES];
3551 pcre_uint8 ones[MAX_N_CHARS];
3552 int offsets[3];
3553 pcre_uint32 mask;
3554 pcre_uint8 *byte_set, *byte_set_end;
3555 int i, max, from;
3556 int range_right = -1, range_len = 3 - 1;
3557 sljit_ub *update_table = NULL;
3558 BOOL in_range;
3559
3560 for (i = 0; i < MAX_N_CHARS; i++)
3561 {
3562 chars[i << 1] = NOTACHAR;
3563 chars[(i << 1) + 1] = 0;
3564 bytes[i * MAX_N_BYTES] = 0;
3565 }
3566
3567 max = scan_prefix(common, common->start, chars, bytes, MAX_N_CHARS);
3568
3569 if (max <= 1)
3570 return FALSE;
3571
3572 for (i = 0; i < max; i++)
3573 {
3574 mask = chars[(i << 1) + 1];
3575 ones[i] = ones_in_half_byte[mask & 0xf];
3576 mask >>= 4;
3577 while (mask != 0)
3578 {
3579 ones[i] += ones_in_half_byte[mask & 0xf];
3580 mask >>= 4;
3581 }
3582 }
3583
3584 in_range = FALSE;
3585 from = 0; /* Prevent compiler "uninitialized" warning */
3586 for (i = 0; i <= max; i++)
3587 {
3588 if (in_range && (i - from) > range_len && (bytes[(i - 1) * MAX_N_BYTES] <= 4))
3589 {
3590 range_len = i - from;
3591 range_right = i - 1;
3592 }
3593
3594 if (i < max && bytes[i * MAX_N_BYTES] < 255)
3595 {
3596 if (!in_range)
3597 {
3598 in_range = TRUE;
3599 from = i;
3600 }
3601 }
3602 else if (in_range)
3603 in_range = FALSE;
3604 }
3605
3606 if (range_right >= 0)
3607 {
3608 update_table = (sljit_ub *)allocate_read_only_data(common, 256);
3609 if (update_table == NULL)
3610 return TRUE;
3611 memset(update_table, IN_UCHARS(range_len), 256);
3612
3613 for (i = 0; i < range_len; i++)
3614 {
3615 byte_set = bytes + ((range_right - i) * MAX_N_BYTES);
3616 SLJIT_ASSERT(byte_set[0] > 0 && byte_set[0] < 255);
3617 byte_set_end = byte_set + byte_set[0];
3618 byte_set++;
3619 while (byte_set <= byte_set_end)
3620 {
3621 if (update_table[*byte_set] > IN_UCHARS(i))
3622 update_table[*byte_set] = IN_UCHARS(i);
3623 byte_set++;
3624 }
3625 }
3626 }
3627
3628 offsets[0] = -1;
3629 /* Scan forward. */
3630 for (i = 0; i < max; i++)
3631 if (ones[i] <= 2) {
3632 offsets[0] = i;
3633 break;
3634 }
3635
3636 if (offsets[0] < 0 && range_right < 0)
3637 return FALSE;
3638
3639 if (offsets[0] >= 0)
3640 {
3641 /* Scan backward. */
3642 offsets[1] = -1;
3643 for (i = max - 1; i > offsets[0]; i--)
3644 if (ones[i] <= 2 && i != range_right)
3645 {
3646 offsets[1] = i;
3647 break;
3648 }
3649
3650 /* This case is handled better by fast_forward_first_char. */
3651 if (offsets[1] == -1 && offsets[0] == 0 && range_right < 0)
3652 return FALSE;
3653
3654 offsets[2] = -1;
3655 /* We only search for a middle character if there is no range check. */
3656 if (offsets[1] >= 0 && range_right == -1)
3657 {
3658 /* Scan from middle. */
3659 for (i = (offsets[0] + offsets[1]) / 2 + 1; i < offsets[1]; i++)
3660 if (ones[i] <= 2)
3661 {
3662 offsets[2] = i;
3663 break;
3664 }
3665
3666 if (offsets[2] == -1)
3667 {
3668 for (i = (offsets[0] + offsets[1]) / 2; i > offsets[0]; i--)
3669 if (ones[i] <= 2)
3670 {
3671 offsets[2] = i;
3672 break;
3673 }
3674 }
3675 }
3676
3677 SLJIT_ASSERT(offsets[1] == -1 || (offsets[0] < offsets[1]));
3678 SLJIT_ASSERT(offsets[2] == -1 || (offsets[0] < offsets[2] && offsets[1] > offsets[2]));
3679
3680 chars[0] = chars[offsets[0] << 1];
3681 chars[1] = chars[(offsets[0] << 1) + 1];
3682 if (offsets[2] >= 0)
3683 {
3684 chars[2] = chars[offsets[2] << 1];
3685 chars[3] = chars[(offsets[2] << 1) + 1];
3686 }
3687 if (offsets[1] >= 0)
3688 {
3689 chars[4] = chars[offsets[1] << 1];
3690 chars[5] = chars[(offsets[1] << 1) + 1];
3691 }
3692 }
3693
3694 max -= 1;
3695 if (firstline)
3696 {
3697 SLJIT_ASSERT(common->first_line_end != 0);
3698 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end);
3699 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
3700 OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
3701 quit = CMP(SLJIT_LESS_EQUAL, STR_END, 0, TMP1, 0);
3702 OP1(SLJIT_MOV, STR_END, 0, TMP1, 0);
3703 JUMPHERE(quit);
3704 }
3705 else
3706 OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
3707
3708 #if !(defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
3709 if (range_right >= 0)
3710 OP1(SLJIT_MOV, RETURN_ADDR, 0, SLJIT_IMM, (sljit_sw)update_table);
3711 #endif
3712
3713 start = LABEL();
3714 quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3715
3716 SLJIT_ASSERT(range_right >= 0 || offsets[0] >= 0);
3717
3718 if (range_right >= 0)
3719 {
3720 #if defined COMPILE_PCRE8 || (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN)
3721 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right));
3722 #else
3723 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right + 1) - 1);
3724 #endif
3725
3726 #if !(defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
3727 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(RETURN_ADDR, TMP1), 0);
3728 #else
3729 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)update_table);
3730 #endif
3731 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3732 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, start);
3733 }
3734
3735 if (offsets[0] >= 0)
3736 {
3737 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[0]));
3738 if (offsets[1] >= 0)
3739 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[1]));
3740 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3741
3742 if (chars[1] != 0)
3743 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[1]);
3744 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[0], start);
3745 if (offsets[2] >= 0)
3746 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[2] - 1));
3747
3748 if (offsets[1] >= 0)
3749 {
3750 if (chars[5] != 0)
3751 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, chars[5]);
3752 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, chars[4], start);
3753 }
3754
3755 if (offsets[2] >= 0)
3756 {
3757 if (chars[3] != 0)
3758 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[3]);
3759 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[2], start);
3760 }
3761 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3762 }
3763
3764 JUMPHERE(quit);
3765
3766 if (firstline)
3767 {
3768 if (range_right >= 0)
3769 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end);
3770 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3771 if (range_right >= 0)
3772 {
3773 quit = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
3774 OP1(SLJIT_MOV, STR_PTR, 0, TMP1, 0);
3775 JUMPHERE(quit);
3776 }
3777 }
3778 else
3779 OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
3780 return TRUE;
3781 }
3782
3783 #undef MAX_N_CHARS
3784 #undef MAX_N_BYTES
3785
3786 static SLJIT_INLINE void fast_forward_first_char(compiler_common *common, pcre_uchar first_char, BOOL caseless, BOOL firstline)
3787 {
3788 DEFINE_COMPILER;
3789 struct sljit_label *start;
3790 struct sljit_jump *quit;
3791 struct sljit_jump *found;
3792 pcre_uchar oc, bit;
3793
3794 if (firstline)
3795 {
3796 SLJIT_ASSERT(common->first_line_end != 0);
3797 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
3798 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end);
3799 }
3800
3801 start = LABEL();
3802 quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3803 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3804
3805 oc = first_char;
3806 if (caseless)
3807 {
3808 oc = TABLE_GET(first_char, common->fcc, first_char);
3809 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
3810 if (first_char > 127 && common->utf)
3811 oc = UCD_OTHERCASE(first_char);
3812 #endif
3813 }
3814 if (first_char == oc)
3815 found = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, first_char);
3816 else
3817 {
3818 bit = first_char ^ oc;
3819 if (is_powerof2(bit))
3820 {
3821 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, bit);
3822 found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, first_char | bit);
3823 }
3824 else
3825 {
3826 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, first_char);
3827 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
3828 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);
3829 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
3830 found = JUMP(SLJIT_NOT_ZERO);
3831 }
3832 }
3833
3834 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3835 JUMPTO(SLJIT_JUMP, start);
3836 JUMPHERE(found);
3837 JUMPHERE(quit);
3838
3839 if (firstline)
3840 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3841 }
3842
3843 static SLJIT_INLINE void fast_forward_newline(compiler_common *common, BOOL firstline)
3844 {
3845 DEFINE_COMPILER;
3846 struct sljit_label *loop;
3847 struct sljit_jump *lastchar;
3848 struct sljit_jump *firstchar;
3849 struct sljit_jump *quit;
3850 struct sljit_jump *foundcr = NULL;
3851 struct sljit_jump *notfoundnl;
3852 jump_list *newline = NULL;
3853
3854 if (firstline)
3855 {
3856 SLJIT_ASSERT(common->first_line_end != 0);
3857 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
3858 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end);
3859 }
3860
3861 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3862 {
3863 lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3864 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3865 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
3866 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
3867 firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
3868
3869 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2));
3870 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0);
3871 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_GREATER_EQUAL);
3872 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3873 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
3874 #endif
3875 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3876
3877 loop = LABEL();
3878 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3879 quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3880 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
3881 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
3882 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop);
3883 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop);
3884
3885 JUMPHERE(quit);
3886 JUMPHERE(firstchar);
3887 JUMPHERE(lastchar);
3888
3889 if (firstline)
3890 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3891 return;
3892 }
3893
3894 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3895 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
3896 firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
3897 skip_char_back(common);
3898
3899 loop = LABEL();
3900 common->ff_newline_shortcut = loop;
3901
3902 read_char_range(common, common->nlmin, common->nlmax, TRUE);
3903 lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3904 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
3905 foundcr = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
3906 check_newlinechar(common, common->nltype, &newline, FALSE);
3907 set_jumps(newline, loop);
3908
3909 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
3910 {
3911 quit = JUMP(SLJIT_JUMP);
3912 JUMPHERE(foundcr);
3913 notfoundnl = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3914 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3915 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
3916 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
3917 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3918 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
3919 #endif
3920 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3921 JUMPHERE(notfoundnl);
3922 JUMPHERE(quit);
3923 }
3924 JUMPHERE(lastchar);
3925 JUMPHERE(firstchar);
3926
3927 if (firstline)
3928 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3929 }
3930
3931 static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks);
3932
3933 static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, pcre_uint8 *start_bits, BOOL firstline)
3934 {
3935 DEFINE_COMPILER;
3936 struct sljit_label *start;
3937 struct sljit_jump *quit;
3938 struct sljit_jump *found = NULL;
3939 jump_list *matches = NULL;
3940 #ifndef COMPILE_PCRE8
3941 struct sljit_jump *jump;
3942 #endif
3943
3944 if (firstline)
3945 {
3946 SLJIT_ASSERT(common->first_line_end != 0);
3947 OP1(SLJIT_MOV, RETURN_ADDR, 0, STR_END, 0);
3948 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end);
3949 }
3950
3951 start = LABEL();
3952 quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3953 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3954 #ifdef SUPPORT_UTF
3955 if (common->utf)
3956 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
3957 #endif
3958
3959 if (!check_class_ranges(common, start_bits, (start_bits[31] & 0x80) != 0, TRUE, &matches))
3960 {
3961 #ifndef COMPILE_PCRE8
3962 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 255);
3963 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 255);
3964 JUMPHERE(jump);
3965 #endif
3966 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
3967 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
3968 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)start_bits);
3969 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
3970 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
3971 found = JUMP(SLJIT_NOT_ZERO);
3972 }
3973
3974 #ifdef SUPPORT_UTF
3975 if (common->utf)
3976 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
3977 #endif
3978 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3979 #ifdef SUPPORT_UTF
3980 #if defined COMPILE_PCRE8
3981 if (common->utf)
3982 {
3983 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start);
3984 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3985 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3986 }
3987 #elif defined COMPILE_PCRE16
3988 if (common->utf)
3989 {
3990 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800, start);
3991 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
3992 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3993 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
3994 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3995 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3996 }
3997 #endif /* COMPILE_PCRE[8|16] */
3998 #endif /* SUPPORT_UTF */
3999 JUMPTO(SLJIT_JUMP, start);
4000 if (found != NULL)
4001 JUMPHERE(found);
4002 if (matches != NULL)
4003 set_jumps(matches, LABEL());
4004 JUMPHERE(quit);
4005
4006 if (firstline)
4007 OP1(SLJIT_MOV, STR_END, 0, RETURN_ADDR, 0);
4008 }
4009
4010 static SLJIT_INLINE struct sljit_jump *search_requested_char(compiler_common *common, pcre_uchar req_char, BOOL caseless, BOOL has_firstchar)
4011 {
4012 DEFINE_COMPILER;
4013 struct sljit_label *loop;
4014 struct sljit_jump *toolong;
4015 struct sljit_jump *alreadyfound;
4016 struct sljit_jump *found;
4017 struct sljit_jump *foundoc = NULL;
4018 struct sljit_jump *notfound;
4019 pcre_uint32 oc, bit;
4020
4021 SLJIT_ASSERT(common->req_char_ptr != 0);
4022 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr);
4023 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, REQ_BYTE_MAX);
4024 toolong = CMP(SLJIT_LESS, TMP1, 0, STR_END, 0);
4025 alreadyfound = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0);
4026
4027 if (has_firstchar)
4028 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4029 else
4030 OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);
4031
4032 loop = LABEL();
4033 notfound = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0);
4034
4035 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0);
4036 oc = req_char;
4037 if (caseless)
4038 {
4039 oc = TABLE_GET(req_char, common->fcc, req_char);
4040 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
4041 if (req_char > 127 && common->utf)
4042 oc = UCD_OTHERCASE(req_char);
4043 #endif
4044 }
4045 if (req_char == oc)
4046 found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
4047 else
4048 {
4049 bit = req_char ^ oc;
4050 if (is_powerof2(bit))
4051 {
4052 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);
4053 found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit);
4054 }
4055 else
4056 {
4057 found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
4058 foundoc = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, oc);
4059 }
4060 }
4061 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
4062 JUMPTO(SLJIT_JUMP, loop);
4063
4064 JUMPHERE(found);
4065 if (foundoc)
4066 JUMPHERE(foundoc);
4067 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr, TMP1, 0);
4068 JUMPHERE(alreadyfound);
4069 JUMPHERE(toolong);
4070 return notfound;
4071 }
4072
4073 static void do_revertframes(compiler_common *common)
4074 {
4075 DEFINE_COMPILER;
4076 struct sljit_jump *jump;
4077 struct sljit_label *mainloop;
4078
4079 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4080 OP1(SLJIT_MOV, TMP1, 0, STACK_TOP, 0);
4081 GET_LOCAL_BASE(TMP3, 0, 0);
4082
4083 /* Drop frames until we reach STACK_TOP. */
4084 mainloop = LABEL();
4085 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), 0);
4086 OP2(SLJIT_SUB | SLJIT_SET_S, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0);
4087 jump = JUMP(SLJIT_SIG_LESS_EQUAL);
4088
4089 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
4090 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw));
4091 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), SLJIT_MEM1(TMP1), 2 * sizeof(sljit_sw));
4092 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
4093 JUMPTO(SLJIT_JUMP, mainloop);
4094
4095 JUMPHERE(jump);
4096 jump = JUMP(SLJIT_SIG_LESS);
4097 /* End of dropping frames. */
4098 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4099
4100 JUMPHERE(jump);
4101 OP1(SLJIT_NEG, TMP2, 0, TMP2, 0);
4102 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
4103 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw));
4104 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_sw));
4105 JUMPTO(SLJIT_JUMP, mainloop);
4106 }
4107
4108 static void check_wordboundary(compiler_common *common)
4109 {
4110 DEFINE_COMPILER;
4111 struct sljit_jump *skipread;
4112 jump_list *skipread_list = NULL;
4113 #if !(defined COMPILE_PCRE8) || defined SUPPORT_UTF
4114 struct sljit_jump *jump;
4115 #endif
4116
4117 SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16);
4118
4119 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
4120 /* Get type of the previous char, and put it to LOCALS1. */
4121 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4122 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4123 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, SLJIT_IMM, 0);
4124 skipread = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
4125 skip_char_back(common);
4126 check_start_used_ptr(common);
4127 read_char(common);
4128
4129 /* Testing char type. */
4130 #ifdef SUPPORT_UCP
4131 if (common->use_ucp)
4132 {
4133 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
4134 jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
4135 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4136 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
4137 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
4138 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
4139 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
4140 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
4141 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
4142 JUMPHERE(jump);
4143 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP2, 0);
4144 }
4145 else
4146 #endif
4147 {
4148 #ifndef COMPILE_PCRE8
4149 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4150 #elif defined SUPPORT_UTF
4151 /* Here LOCALS1 has already been zeroed. */
4152 jump = NULL;
4153 if (common->utf)
4154 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4155 #endif /* COMPILE_PCRE8 */
4156 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);
4157 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */);
4158 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4159 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP1, 0);
4160 #ifndef COMPILE_PCRE8
4161 JUMPHERE(jump);
4162 #elif defined SUPPORT_UTF
4163 if (jump != NULL)
4164 JUMPHERE(jump);
4165 #endif /* COMPILE_PCRE8 */
4166 }
4167 JUMPHERE(skipread);
4168
4169 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
4170 check_str_end(common, &skipread_list);
4171 peek_char(common, READ_CHAR_MAX);
4172
4173 /* Testing char type. This is a code duplication. */
4174 #ifdef SUPPORT_UCP
4175 if (common->use_ucp)
4176 {
4177 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
4178 jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
4179 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4180 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
4181 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
4182 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
4183 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
4184 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
4185 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
4186 JUMPHERE(jump);
4187 }
4188 else
4189 #endif
4190 {
4191 #ifndef COMPILE_PCRE8
4192 /* TMP2 may be destroyed by peek_char. */
4193 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
4194 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4195 #elif defined SUPPORT_UTF
4196 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
4197 jump = NULL;
4198 if (common->utf)
4199 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4200 #endif
4201 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes);
4202 OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */);
4203 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
4204 #ifndef COMPILE_PCRE8
4205 JUMPHERE(jump);
4206 #elif defined SUPPORT_UTF
4207 if (jump != NULL)
4208 JUMPHERE(jump);
4209 #endif /* COMPILE_PCRE8 */
4210 }
4211 set_jumps(skipread_list, LABEL());
4212
4213 OP2(SLJIT_XOR | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
4214 sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
4215 }
4216
4217 static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
4218 {
4219 DEFINE_COMPILER;
4220 int ranges[MAX_RANGE_SIZE];
4221 pcre_uint8 bit, cbit, all;
4222 int i, byte, length = 0;
4223
4224 bit = bits[0] & 0x1;
4225 /* All bits will be zero or one (since bit is zero or one). */
4226 all = -bit;
4227
4228 for (i = 0; i < 256; )
4229 {
4230 byte = i >> 3;
4231 if ((i & 0x7) == 0 && bits[byte] == all)
4232 i += 8;
4233 else
4234 {
4235 cbit = (bits[byte] >> (i & 0x7)) & 0x1;
4236 if (cbit != bit)
4237 {
4238 if (length >= MAX_RANGE_SIZE)
4239 return FALSE;
4240 ranges[length] = i;
4241 length++;
4242 bit = cbit;
4243 all = -cbit;
4244 }
4245 i++;
4246 }
4247 }
4248
4249 if (((bit == 0) && nclass) || ((bit == 1) && !nclass))
4250 {
4251 if (length >= MAX_RANGE_SIZE)
4252 return FALSE;
4253 ranges[length] = 256;
4254 length++;
4255 }
4256
4257 if (length < 0 || length > 4)
4258 return FALSE;
4259
4260 bit = bits[0] & 0x1;
4261 if (invert) bit ^= 0x1;
4262
4263 /* No character is accepted. */
4264 if (length == 0 && bit == 0)
4265 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4266
4267 switch(length)
4268 {
4269 case 0:
4270 /* When bit != 0, all characters are accepted. */
4271 return TRUE;
4272
4273 case 1:
4274 add_jump(compiler, backtracks, CMP(bit == 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
4275 return TRUE;
4276
4277 case 2:
4278 if (ranges[0] + 1 != ranges[1])
4279 {
4280 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
4281 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
4282 }
4283 else
4284 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
4285 return TRUE;
4286
4287 case 3:
4288 if (bit != 0)
4289 {
4290 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
4291 if (ranges[0] + 1 != ranges[1])
4292 {
4293 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
4294 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
4295 }
4296 else
4297 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
4298 return TRUE;
4299 }
4300
4301 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[0]));
4302 if (ranges[1] + 1 != ranges[2])
4303 {
4304 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1]);
4305 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
4306 }
4307 else
4308 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1]));
4309 return TRUE;
4310
4311 case 4:
4312 if ((ranges[1] - ranges[0]) == (ranges[3] - ranges[2])
4313 && (ranges[0] | (ranges[2] - ranges[0])) == ranges[2]
4314 && is_powerof2(ranges[2] - ranges[0]))
4315 {
4316 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[0]);
4317 if (ranges[2] + 1 != ranges[3])
4318 {
4319 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);
4320 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
4321 }
4322 else
4323 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
4324 return TRUE;
4325 }
4326
4327 if (bit != 0)
4328 {
4329 i = 0;
4330 if (ranges[0] + 1 != ranges[1])
4331 {
4332 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
4333 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
4334 i = ranges[0];
4335 }
4336 else
4337 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
4338
4339 if (ranges[2] + 1 != ranges[3])
4340 {
4341 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - i);
4342 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
4343 }
4344 else
4345 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2] - i));
4346 return TRUE;
4347 }
4348
4349 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
4350 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[0]));
4351 if (ranges[1] + 1 != ranges[2])
4352 {
4353 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]);
4354 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
4355 }
4356 else
4357 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
4358 return TRUE;
4359
4360 default:
4361 SLJIT_ASSERT_STOP();
4362 return FALSE;
4363 }
4364 }
4365
4366 static void check_anynewline(compiler_common *common)
4367 {
4368 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
4369 DEFINE_COMPILER;
4370
4371 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4372
4373 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
4374 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
4375 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
4376 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
4377 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4378 #ifdef COMPILE_PCRE8
4379 if (common->utf)
4380 {
4381 #endif
4382 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4383 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
4384 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
4385 #ifdef COMPILE_PCRE8
4386 }
4387 #endif
4388 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
4389 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4390 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4391 }
4392
4393 static void check_hspace(compiler_common *common)
4394 {
4395 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
4396 DEFINE_COMPILER;
4397
4398 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4399
4400 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x09);
4401 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
4402 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
4403 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4404 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xa0);
4405 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4406 #ifdef COMPILE_PCRE8
4407 if (common->utf)
4408 {
4409 #endif
4410 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4411 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x1680);
4412 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4413 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e);
4414 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4415 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000);
4416 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000);
4417 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
4418 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000);
4419 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4420 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000);
4421 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4422 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000);
4423 #ifdef COMPILE_PCRE8
4424 }
4425 #endif
4426 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
4427 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4428
4429 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4430 }
4431
4432 static void check_vspace(compiler_common *common)
4433 {
4434 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
4435 DEFINE_COMPILER;
4436
4437 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4438
4439 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
4440 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
4441 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
4442 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
4443 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4444 #ifdef COMPILE_PCRE8
4445 if (common->utf)
4446 {
4447 #endif
4448 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4449 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
4450 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
4451 #ifdef COMPILE_PCRE8
4452 }
4453 #endif
4454 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
4455 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4456
4457 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4458 }
4459
4460 #define CHAR1 STR_END
4461 #define CHAR2 STACK_TOP
4462
4463 static void do_casefulcmp(compiler_common *common)
4464 {
4465 DEFINE_COMPILER;
4466 struct sljit_jump *jump;
4467 struct sljit_label *label;
4468
4469 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4470 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4471 OP1(SLJIT_MOV, TMP3, 0, CHAR1, 0);
4472 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, CHAR2, 0);
4473 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
4474 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4475
4476 label = LABEL();
4477 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
4478 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4479 jump = CMP(SLJIT_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
4480 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
4481 JUMPTO(SLJIT_NOT_ZERO, label);
4482
4483 JUMPHERE(jump);
4484 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4485 OP1(SLJIT_MOV, CHAR1, 0, TMP3, 0);
4486 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
4487 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4488 }
4489
4490 #define LCC_TABLE STACK_LIMIT
4491
4492 static void do_caselesscmp(compiler_common *common)
4493 {
4494 DEFINE_COMPILER;
4495 struct sljit_jump *jump;
4496 struct sljit_label *label;
4497
4498 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4499 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4500
4501 OP1(SLJIT_MOV, TMP3, 0, LCC_TABLE, 0);
4502 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, CHAR1, 0);
4503 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, CHAR2, 0);
4504 OP1(SLJIT_MOV, LCC_TABLE, 0, SLJIT_IMM, common->lcc);
4505 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
4506 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4507
4508 label = LABEL();
4509 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
4510 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4511 #ifndef COMPILE_PCRE8
4512 jump = CMP(SLJIT_GREATER, CHAR1, 0, SLJIT_IMM, 255);
4513 #endif
4514 OP1(SLJIT_MOV_UB, CHAR1, 0, SLJIT_MEM2(LCC_TABLE, CHAR1), 0);
4515 #ifndef COMPILE_PCRE8
4516 JUMPHERE(jump);
4517 jump = CMP(SLJIT_GREATER, CHAR2, 0, SLJIT_IMM, 255);
4518 #endif
4519 OP1(SLJIT_MOV_UB, CHAR2, 0, SLJIT_MEM2(LCC_TABLE, CHAR2), 0);
4520 #ifndef COMPILE_PCRE8
4521 JUMPHERE(jump);
4522 #endif
4523 jump = CMP(SLJIT_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
4524 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
4525 JUMPTO(SLJIT_NOT_ZERO, label);
4526
4527 JUMPHERE(jump);
4528 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4529 OP1(SLJIT_MOV, LCC_TABLE, 0, TMP3, 0);
4530 OP1(SLJIT_MOV, CHAR1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
4531 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
4532 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4533 }
4534
4535 #undef LCC_TABLE
4536 #undef CHAR1
4537 #undef CHAR2
4538
4539 #if defined SUPPORT_UTF && defined SUPPORT_UCP
4540
4541 static const pcre_uchar * SLJIT_CALL do_utf_caselesscmp(pcre_uchar *src1, jit_arguments *args, pcre_uchar *end1)
4542 {
4543 /* This function would be ineffective to do in JIT level. */
4544 pcre_uint32 c1, c2;
4545 const pcre_uchar *src2 = args->uchar_ptr;
4546 const pcre_uchar *end2 = args->end;
4547 const ucd_record *ur;
4548 const pcre_uint32 *pp;
4549
4550 while (src1 < end1)
4551 {
4552 if (src2 >= end2)
4553 return (pcre_uchar*)1;
4554 GETCHARINC(c1, src1);
4555 GETCHARINC(c2, src2);
4556 ur = GET_UCD(c2);
4557 if (c1 != c2 && c1 != c2 + ur->other_case)
4558 {
4559 pp = PRIV(ucd_caseless_sets) + ur->caseset;
4560 for (;;)
4561 {
4562 if (c1 < *pp) return NULL;
4563 if (c1 == *pp++) break;
4564 }
4565 }
4566 }
4567 return src2;
4568 }
4569
4570 #endif /* SUPPORT_UTF && SUPPORT_UCP */
4571
4572 static pcre_uchar *byte_sequence_compare(compiler_common *common, BOOL caseless, pcre_uchar *cc,
4573 compare_context *context, jump_list **backtracks)
4574 {
4575 DEFINE_COMPILER;
4576 unsigned int othercasebit = 0;
4577 pcre_uchar *othercasechar = NULL;
4578 #ifdef SUPPORT_UTF
4579 int utflength;
4580 #endif
4581
4582 if (caseless && char_has_othercase(common, cc))
4583 {
4584 othercasebit = char_get_othercase_bit(common, cc);
4585 SLJIT_ASSERT(othercasebit);
4586 /* Extracting bit difference info. */
4587 #if defined COMPILE_PCRE8
4588 othercasechar = cc + (othercasebit >> 8);
4589 othercasebit &= 0xff;
4590 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4591 /* Note that this code only handles characters in the BMP. If there
4592 ever are characters outside the BMP whose othercase differs in only one
4593 bit from itself (there currently are none), this code will need to be
4594 revised for COMPILE_PCRE32. */
4595 othercasechar = cc + (othercasebit >> 9);
4596 if ((othercasebit & 0x100) != 0)
4597 othercasebit = (othercasebit & 0xff) << 8;
4598 else
4599 othercasebit &= 0xff;
4600 #endif /* COMPILE_PCRE[8|16|32] */
4601 }
4602
4603 if (context->sourcereg == -1)
4604 {
4605 #if defined COMPILE_PCRE8
4606 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
4607 if (context->length >= 4)
4608 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4609 else if (context->length >= 2)
4610 OP1(SLJIT_MOV_UH, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4611 else
4612 #endif
4613 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4614 #elif defined COMPILE_PCRE16
4615 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
4616 if (context->length >= 4)
4617 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4618 else
4619 #endif
4620 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4621 #elif defined COMPILE_PCRE32
4622 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4623 #endif /* COMPILE_PCRE[8|16|32] */
4624 context->sourcereg = TMP2;
4625 }
4626
4627 #ifdef SUPPORT_UTF
4628 utflength = 1;
4629 if (common->utf && HAS_EXTRALEN(*cc))
4630 utflength += GET_EXTRALEN(*cc);
4631
4632 do
4633 {
4634 #endif
4635
4636 context->length -= IN_UCHARS(1);
4637 #if (defined SLJIT_UNALIGNED && SLJIT_UNALIGNED) && (defined COMPILE_PCRE8 || defined COMPILE_PCRE16)
4638
4639 /* Unaligned read is supported. */
4640 if (othercasebit != 0 && othercasechar == cc)
4641 {
4642 context->c.asuchars[context->ucharptr] = *cc | othercasebit;
4643 context->oc.asuchars[context->ucharptr] = othercasebit;
4644 }
4645 else
4646 {
4647 context->c.asuchars[context->ucharptr] = *cc;
4648 context->oc.asuchars[context->ucharptr] = 0;
4649 }
4650 context->ucharptr++;
4651
4652 #if defined COMPILE_PCRE8
4653 if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1))
4654 #else
4655 if (context->ucharptr >= 2 || context->length == 0)
4656 #endif
4657 {
4658 if (context->length >= 4)
4659 OP1(SLJIT_MOV_SI, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
4660 else if (context->length >= 2)
4661 OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
4662 #if defined COMPILE_PCRE8
4663 else if (context->length >= 1)
4664 OP1(SLJIT_MOV_UB, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
4665 #endif /* COMPILE_PCRE8 */
4666 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
4667
4668 switch(context->ucharptr)
4669 {
4670 case 4 / sizeof(pcre_uchar):
4671 if (context->oc.asint != 0)
4672 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint);
4673 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint));
4674 break;
4675
4676 case 2 / sizeof(pcre_uchar):
4677 if (context->oc.asushort != 0)
4678 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort);
4679 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort));
4680 break;
4681
4682 #ifdef COMPILE_PCRE8
4683 case 1:
4684 if (context->oc.asbyte != 0)
4685 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte);
4686 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte));
4687 break;
4688 #endif
4689
4690 default:
4691 SLJIT_ASSERT_STOP();
4692 break;
4693 }
4694 context->ucharptr = 0;
4695 }
4696
4697 #else
4698
4699 /* Unaligned read is unsupported or in 32 bit mode. */
4700 if (context->length >= 1)
4701 OP1(MOV_UCHAR, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
4702
4703 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
4704
4705 if (othercasebit != 0 && othercasechar == cc)
4706 {
4707 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit);
4708 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit));
4709 }
4710 else
4711 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc));
4712
4713 #endif
4714
4715 cc++;
4716 #ifdef SUPPORT_UTF
4717 utflength--;
4718 }
4719 while (utflength > 0);
4720 #endif
4721
4722 return cc;
4723 }
4724
4725 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
4726
4727 #define SET_TYPE_OFFSET(value) \
4728 if ((value) != typeoffset) \
4729 { \
4730 if ((value) < typeoffset) \
4731 OP2(SLJIT_ADD, typereg, 0, typereg, 0, SLJIT_IMM, typeoffset - (value)); \
4732 else \
4733 OP2(SLJIT_SUB, typereg, 0, typereg, 0, SLJIT_IMM, (value) - typeoffset); \
4734 } \
4735 typeoffset = (value);
4736
4737 #define SET_CHAR_OFFSET(value) \
4738 if ((value) != charoffset) \
4739 { \
4740 if ((value) < charoffset) \
4741 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(charoffset - (value))); \
4742 else \
4743 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)((value) - charoffset)); \
4744 } \
4745 charoffset = (value);
4746
4747 static void compile_xclass_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
4748 {
4749 DEFINE_COMPILER;
4750 jump_list *found = NULL;
4751 jump_list **list = (cc[0] & XCL_NOT) == 0 ? &found : backtracks;
4752 sljit_uw c, charoffset, max = 256, min = READ_CHAR_MAX;
4753 struct sljit_jump *jump = NULL;
4754 pcre_uchar *ccbegin;
4755 int compares, invertcmp, numberofcmps;
4756 #if defined SUPPORT_UTF && (defined COMPILE_PCRE8 || defined COMPILE_PCRE16)
4757 BOOL utf = common->utf;
4758 #endif
4759
4760 #ifdef SUPPORT_UCP
4761 BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE;
4762 BOOL charsaved = FALSE;
4763 int typereg = TMP1, scriptreg = TMP1;
4764 const pcre_uint32 *other_cases;
4765 sljit_uw typeoffset;
4766 #endif
4767
4768 /* Scanning the necessary info. */
4769 cc++;
4770 ccbegin = cc;
4771 compares = 0;
4772 if (cc[-1] & XCL_MAP)
4773 {
4774 min = 0;
4775 cc += 32 / sizeof(pcre_uchar);
4776 }
4777
4778 while (*cc != XCL_END)
4779 {
4780 compares++;
4781 if (*cc == XCL_SINGLE)
4782 {
4783 cc ++;
4784 GETCHARINCTEST(c, cc);
4785 if (c > max) max = c;
4786 if (c < min) min = c;
4787 #ifdef SUPPORT_UCP
4788 needschar = TRUE;
4789 #endif
4790 }
4791 else if (*cc == XCL_RANGE)
4792 {
4793 cc ++;
4794 GETCHARINCTEST(c, cc);
4795 if (c < min) min = c;
4796 GETCHARINCTEST(c, cc);
4797 if (c > max) max = c;
4798 #ifdef SUPPORT_UCP
4799 needschar = TRUE;
4800 #endif
4801 }
4802 #ifdef SUPPORT_UCP
4803 else
4804 {
4805 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
4806 cc++;
4807 if (*cc == PT_CLIST)
4808 {
4809 other_cases = PRIV(ucd_caseless_sets) + cc[1];
4810 while (*other_cases != NOTACHAR)
4811 {
4812 if (*other_cases > max) max = *other_cases;
4813 if (*other_cases < min) min = *other_cases;
4814 other_cases++;
4815 }
4816 }
4817 else
4818 {
4819 max = READ_CHAR_MAX;
4820 min = 0;
4821 }
4822
4823 switch(*cc)
4824 {
4825 case PT_ANY:
4826 break;
4827
4828 case PT_LAMP:
4829 case PT_GC:
4830 case PT_PC:
4831 case PT_ALNUM:
4832 needstype = TRUE;
4833 break;
4834
4835 case PT_SC:
4836 needsscript = TRUE;
4837 break;
4838
4839 case PT_SPACE:
4840 case PT_PXSPACE:
4841 case PT_WORD:
4842 case PT_PXGRAPH:
4843 case PT_PXPRINT:
4844 case PT_PXPUNCT:
4845 needstype = TRUE;
4846 needschar = TRUE;
4847 break;
4848
4849 case PT_CLIST:
4850 case PT_UCNC:
4851 needschar = TRUE;
4852 break;
4853
4854 default:
4855 SLJIT_ASSERT_STOP();
4856 break;
4857 }
4858 cc += 2;
4859 }
4860 #endif
4861 }
4862
4863 /* We are not necessary in utf mode even in 8 bit mode. */
4864 cc = ccbegin;
4865 detect_partial_match(common, backtracks);
4866 read_char_range(common, min, max, (cc[-1] & XCL_NOT) != 0);
4867
4868 if ((cc[-1] & XCL_HASPROP) == 0)
4869 {
4870 if ((cc[-1] & XCL_MAP) != 0)
4871 {
4872 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4873 if (!check_class_ranges(common, (const pcre_uint8 *)cc, (((const pcre_uint8 *)cc)[31] & 0x80) != 0, TRUE, &found))
4874 {
4875 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
4876 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
4877 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
4878 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4879 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4880 add_jump(compiler, &found, JUMP(SLJIT_NOT_ZERO));
4881 }
4882
4883 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4884 JUMPHERE(jump);
4885
4886 cc += 32 / sizeof(pcre_uchar);
4887 }
4888 else
4889 {
4890 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, min);
4891 add_jump(compiler, (cc[-1] & XCL_NOT) == 0 ? backtracks : &found, CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, max - min));
4892 }
4893 }
4894 else if ((cc[-1] & XCL_MAP) != 0)
4895 {
4896 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
4897 #ifdef SUPPORT_UCP
4898 charsaved = TRUE;
4899 #endif
4900 if (!check_class_ranges(common, (const pcre_uint8 *)cc, FALSE, TRUE, list))
4901 {
4902 #ifdef COMPILE_PCRE8
4903 SLJIT_ASSERT(common->utf);
4904 #endif
4905 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4906
4907 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
4908 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
4909 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
4910 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4911 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4912 add_jump(compiler, list, JUMP(SLJIT_NOT_ZERO));
4913
4914 JUMPHERE(jump);
4915 }
4916
4917 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
4918 cc += 32 / sizeof(pcre_uchar);
4919 }
4920
4921 #ifdef SUPPORT_UCP
4922 /* Simple register allocation. TMP1 is preferred if possible. */
4923 if (needstype || needsscript)
4924 {
4925 if (needschar && !charsaved)
4926 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
4927 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4928 if (needschar)
4929 {
4930 if (needstype)
4931 {
4932 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
4933 typereg = RETURN_ADDR;
4934 }
4935
4936 if (needsscript)
4937 scriptreg = TMP3;
4938 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
4939 }
4940 else if (needstype && needsscript)
4941 scriptreg = TMP3;
4942 /* In all other cases only one of them was specified, and that can goes to TMP1. */
4943
4944 if (needsscript)
4945 {
4946 if (scriptreg == TMP1)
4947 {
4948 OP1(SLJIT_MOV, scriptreg, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
4949 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM2(scriptreg, TMP2), 3);
4950 }
4951 else
4952 {
4953 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
4954 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
4955 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM1(TMP2), 0);
4956 }
4957 }
4958 }
4959 #endif
4960
4961 /* Generating code. */
4962 charoffset = 0;
4963 numberofcmps = 0;
4964 #ifdef SUPPORT_UCP
4965 typeoffset = 0;
4966 #endif
4967
4968 while (*cc != XCL_END)
4969 {
4970 compares--;
4971 invertcmp = (compares == 0 && list != backtracks);
4972 jump = NULL;
4973
4974 if (*cc == XCL_SINGLE)
4975 {
4976 cc ++;
4977 GETCHARINCTEST(c, cc);
4978
4979 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
4980 {
4981 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
4982 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_EQUAL);
4983 numberofcmps++;
4984 }
4985 else if (numberofcmps > 0)
4986 {
4987 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
4988 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4989 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
4990 numberofcmps = 0;
4991 }
4992 else
4993 {
4994 jump = CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
4995 numberofcmps = 0;
4996 }
4997 }
4998 else if (*cc == XCL_RANGE)
4999 {
5000 cc ++;
5001 GETCHARINCTEST(c, cc);
5002 SET_CHAR_OFFSET(c);
5003 GETCHARINCTEST(c, cc);
5004
5005 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
5006 {
5007 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5008 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_LESS_EQUAL);
5009 numberofcmps++;
5010 }
5011 else if (numberofcmps > 0)
5012 {
5013 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5014 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
5015 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5016 numberofcmps = 0;
5017 }
5018 else
5019 {
5020 jump = CMP(SLJIT_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5021 numberofcmps = 0;
5022 }
5023 }
5024 #ifdef SUPPORT_UCP
5025 else
5026 {
5027 if (*cc == XCL_NOTPROP)
5028 invertcmp ^= 0x1;
5029 cc++;
5030 switch(*cc)
5031 {
5032 case PT_ANY:
5033 if (list != backtracks)
5034 {
5035 if ((cc[-1] == XCL_NOTPROP && compares > 0) || (cc[-1] == XCL_PROP && compares == 0))
5036 continue;
5037 }
5038 else if (cc[-1] == XCL_NOTPROP)
5039 continue;
5040 jump = JUMP(SLJIT_JUMP);
5041 break;
5042
5043 case PT_LAMP:
5044 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - typeoffset);
5045 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5046 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ll - typeoffset);
5047 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5048 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lt - typeoffset);
5049 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5050 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5051 break;
5052
5053 case PT_GC:
5054 c = PRIV(ucp_typerange)[(int)cc[1] * 2];
5055 SET_TYPE_OFFSET(c);
5056 jump = CMP(SLJIT_LESS_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, PRIV(ucp_typerange)[(int)cc[1] * 2 + 1] - c);
5057 break;
5058
5059 case PT_PC:
5060 jump = CMP(SLJIT_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, (int)cc[1] - typeoffset);
5061 break;
5062
5063 case PT_SC:
5064 jump = CMP(SLJIT_EQUAL ^ invertcmp, scriptreg, 0, SLJIT_IMM, (int)cc[1]);
5065 break;
5066
5067 case PT_SPACE:
5068 case PT_PXSPACE:
5069 SET_CHAR_OFFSET(9);
5070 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd - 0x9);
5071 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
5072
5073 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x9);
5074 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5075
5076 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x9);
5077 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5078
5079 SET_TYPE_OFFSET(ucp_Zl);
5080 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl);
5081 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
5082 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5083 break;
5084
5085 case PT_WORD:
5086 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_UNDERSCORE - charoffset));
5087 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5088 /* Fall through. */
5089
5090 case PT_ALNUM:
5091 SET_TYPE_OFFSET(ucp_Ll);
5092 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
5093 OP_FLAGS((*cc == PT_ALNUM) ? SLJIT_MOV : SLJIT_OR, TMP2, 0, (*cc == PT_ALNUM) ? SLJIT_UNUSED : TMP2, 0, SLJIT_LESS_EQUAL);
5094 SET_TYPE_OFFSET(ucp_Nd);
5095 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_No - ucp_Nd);
5096 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
5097 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5098 break;
5099
5100 case PT_CLIST:
5101 other_cases = PRIV(ucd_caseless_sets) + cc[1];
5102
5103 /* At least three characters are required.
5104 Otherwise this case would be handled by the normal code path. */
5105 SLJIT_ASSERT(other_cases[0] != NOTACHAR && other_cases[1] != NOTACHAR && other_cases[2] != NOTACHAR);
5106 SLJIT_ASSERT(other_cases[0] < other_cases[1] && other_cases[1] < other_cases[2]);
5107
5108 /* Optimizing character pairs, if their difference is power of 2. */
5109 if (is_powerof2(other_cases[1] ^ other_cases[0]))
5110 {
5111 if (charoffset == 0)
5112 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
5113 else
5114 {
5115 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
5116 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
5117 }
5118 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[1]);
5119 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5120 other_cases += 2;
5121 }
5122 else if (is_powerof2(other_cases[2] ^ other_cases[1]))
5123 {
5124 if (charoffset == 0)
5125 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[2] ^ other_cases[1]);
5126 else
5127 {
5128 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
5129 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
5130 }
5131 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[2]);
5132 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5133
5134 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(other_cases[0] - charoffset));
5135 OP_FLAGS(SLJIT_OR | ((other_cases[3] == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5136
5137 other_cases += 3;
5138 }
5139 else
5140 {
5141 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
5142 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5143 }
5144
5145 while (*other_cases != NOTACHAR)
5146 {
5147 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
5148 OP_FLAGS(SLJIT_OR | ((*other_cases == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5149 }
5150 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5151 break;
5152
5153 case PT_UCNC:
5154 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_DOLLAR_SIGN - charoffset));
5155 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5156 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_COMMERCIAL_AT - charoffset));
5157 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5158 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_GRAVE_ACCENT - charoffset));
5159 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5160
5161 SET_CHAR_OFFSET(0xa0);
5162 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(0xd7ff - charoffset));
5163 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
5164 SET_CHAR_OFFSET(0);
5165 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xe000 - 0);
5166 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_GREATER_EQUAL);
5167 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5168 break;
5169
5170 case PT_PXGRAPH:
5171 /* C and Z groups are the farthest two groups. */
5172 SET_TYPE_OFFSET(ucp_Ll);
5173 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
5174 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_GREATER);
5175
5176 jump = CMP(SLJIT_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
5177
5178 /* In case of ucp_Cf, we overwrite the result. */
5179 SET_CHAR_OFFSET(0x2066);
5180 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
5181 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
5182
5183 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
5184 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5185
5186 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x2066);
5187 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5188
5189 JUMPHERE(jump);
5190 jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
5191 break;
5192
5193 case PT_PXPRINT:
5194 /* C and Z groups are the farthest two groups. */
5195 SET_TYPE_OFFSET(ucp_Ll);
5196 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
5197 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_GREATER);
5198
5199 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Ll);
5200 OP_FLAGS(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_NOT_EQUAL);
5201
5202 jump = CMP(SLJIT_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
5203
5204 /* In case of ucp_Cf, we overwrite the result. */
5205 SET_CHAR_OFFSET(0x2066);
5206 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
5207 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
5208
5209 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
5210 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5211
5212 JUMPHERE(jump);
5213 jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
5214 break;
5215
5216 case PT_PXPUNCT:
5217 SET_TYPE_OFFSET(ucp_Sc);
5218 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Sc);
5219 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
5220
5221 SET_CHAR_OFFSET(0);
5222 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xff);
5223 OP_FLAGS(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
5224
5225 SET_TYPE_OFFSET(ucp_Pc);
5226 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ps - ucp_Pc);
5227 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
5228 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5229 break;
5230 }
5231 cc += 2;
5232 }
5233 #endif
5234
5235 if (jump != NULL)
5236 add_jump(compiler, compares > 0 ? list : backtracks, jump);
5237 }
5238
5239 if (found != NULL)
5240 set_jumps(found, LABEL());
5241 }
5242
5243 #undef SET_TYPE_OFFSET
5244 #undef SET_CHAR_OFFSET
5245
5246 #endif
5247
5248 static pcre_uchar *compile_char1_matchingpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **backtracks)
5249 {
5250 DEFINE_COMPILER;
5251 int length;
5252 unsigned int c, oc, bit;
5253 compare_context context;
5254 struct sljit_jump *jump[4];
5255 jump_list *end_list;
5256 #ifdef SUPPORT_UTF
5257 struct sljit_label *label;
5258 #ifdef SUPPORT_UCP
5259 pcre_uchar propdata[5];
5260 #endif
5261 #endif /* SUPPORT_UTF */
5262
5263 switch(type)
5264 {
5265 case OP_SOD:
5266 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5267 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
5268 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
5269 return cc;
5270
5271 case OP_SOM:
5272 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5273 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
5274 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
5275 return cc;
5276
5277 case OP_NOT_WORD_BOUNDARY:
5278 case OP_WORD_BOUNDARY:
5279 add_jump(compiler, &common->wordboundary, JUMP(SLJIT_FAST_CALL));
5280 add_jump(compiler, backtracks, JUMP(type == OP_NOT_WORD_BOUNDARY ? SLJIT_NOT_ZERO : SLJIT_ZERO));
5281 return cc;
5282
5283 case OP_NOT_DIGIT:
5284 case OP_DIGIT:
5285 /* Digits are usually 0-9, so it is worth to optimize them. */
5286 detect_partial_match(common, backtracks);
5287 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5288 if (common->utf && is_char7_bitset((const pcre_uint8*)common->ctypes - cbit_length + cbit_digit, FALSE))
5289 read_char7_type(common, type == OP_NOT_DIGIT);
5290 else
5291 #endif
5292 read_char8_type(common, type == OP_NOT_DIGIT);
5293 /* Flip the starting bit in the negative case. */
5294 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit);
5295 add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_ZERO : SLJIT_NOT_ZERO));
5296 return cc;
5297
5298 case OP_NOT_WHITESPACE:
5299 case OP_WHITESPACE:
5300 detect_partial_match(common, backtracks);
5301 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5302 if (common->utf && is_char7_bitset((const pcre_uint8*)common->ctypes - cbit_length + cbit_space, FALSE))
5303 read_char7_type(common, type == OP_NOT_WHITESPACE);
5304 else
5305 #endif
5306 read_char8_type(common, type == OP_NOT_WHITESPACE);
5307 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space);
5308 add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_ZERO : SLJIT_NOT_ZERO));
5309 return cc;
5310
5311 case OP_NOT_WORDCHAR:
5312 case OP_WORDCHAR:
5313 detect_partial_match(common, backtracks);
5314 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5315 if (common->utf && is_char7_bitset((const pcre_uint8*)common->ctypes - cbit_length + cbit_word, FALSE))
5316 read_char7_type(common, type == OP_NOT_WORDCHAR);
5317 else
5318 #endif
5319 read_char8_type(common, type == OP_NOT_WORDCHAR);
5320 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word);
5321 add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_ZERO : SLJIT_NOT_ZERO));
5322 return cc;
5323
5324 case OP_ANY:
5325 detect_partial_match(common, backtracks);
5326 read_char_range(common, common->nlmin, common->nlmax, TRUE);
5327 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
5328 {
5329 jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
5330 end_list = NULL;
5331 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
5332 add_jump(compiler, &end_list, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
5333 else
5334 check_str_end(common, &end_list);
5335
5336 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5337 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff));
5338 set_jumps(end_list, LABEL());
5339 JUMPHERE(jump[0]);
5340 }
5341 else
5342 check_newlinechar(common, common->nltype, backtracks, TRUE);
5343 return cc;
5344
5345 case OP_ALLANY:
5346 detect_partial_match(common, backtracks);
5347 #ifdef SUPPORT_UTF
5348 if (common->utf)
5349 {
5350 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5351 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5352 #if defined COMPILE_PCRE8 || defined COMPILE_PCRE16
5353 #if defined COMPILE_PCRE8
5354 jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
5355 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
5356 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5357 #elif defined COMPILE_PCRE16
5358 jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
5359 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
5360 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
5361 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5362 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
5363 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5364 #endif
5365 JUMPHERE(jump[0]);
5366 #endif /* COMPILE_PCRE[8|16] */
5367 return cc;
5368 }
5369 #endif
5370 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5371 return cc;
5372
5373 case OP_ANYBYTE:
5374 detect_partial_match(common, backtracks);
5375 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5376 return cc;
5377
5378 #ifdef SUPPORT_UTF
5379 #ifdef SUPPORT_UCP
5380 case OP_NOTPROP:
5381 case OP_PROP:
5382 propdata[0] = XCL_HASPROP;
5383 propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP;
5384 propdata[2] = cc[0];
5385 propdata[3] = cc[1];
5386 propdata[4] = XCL_END;
5387 compile_xclass_matchingpath(common, propdata, backtracks);
5388 return cc + 2;
5389 #endif
5390 #endif
5391
5392 case OP_ANYNL:
5393 detect_partial_match(common, backtracks);
5394 read_char_range(common, common->bsr_nlmin, common->bsr_nlmax, FALSE);
5395 jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
5396 /* We don't need to handle soft partial matching case. */
5397 end_list = NULL;
5398 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
5399 add_jump(compiler, &end_list, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
5400 else
5401 check_str_end(common, &end_list);
5402 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5403 jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
5404 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5405 jump[2] = JUMP(SLJIT_JUMP);
5406 JUMPHERE(jump[0]);
5407 check_newlinechar(common, common->bsr_nltype, backtracks, FALSE);
5408 set_jumps(end_list, LABEL());
5409 JUMPHERE(jump[1]);
5410 JUMPHERE(jump[2]);
5411 return cc;
5412
5413 case OP_NOT_HSPACE:
5414 case OP_HSPACE:
5415 detect_partial_match(common, backtracks);
5416 read_char_range(common, 0x9, 0x3000, type == OP_NOT_HSPACE);
5417 add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));
5418 add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO));
5419 return cc;
5420
5421 case OP_NOT_VSPACE:
5422 case OP_VSPACE:
5423 detect_partial_match(common, backtracks);
5424 read_char_range(common, 0xa, 0x2029, type == OP_NOT_VSPACE);
5425 add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));
5426 add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO));
5427 return cc;
5428
5429 #ifdef SUPPORT_UCP
5430 case OP_EXTUNI:
5431 detect_partial_match(common, backtracks);
5432 read_char(common);
5433 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
5434 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
5435 /* Optimize register allocation: use a real register. */
5436 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STACK_TOP, 0);
5437 OP1(SLJIT_MOV_UB, STACK_TOP, 0, SLJIT_MEM2(TMP1, TMP2), 3);
5438
5439 label = LABEL();
5440 jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5441 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
5442 read_char(common);
5443 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
5444 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
5445 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM2(TMP1, TMP2), 3);
5446
5447 OP2(SLJIT_SHL, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2);
5448 OP1(SLJIT_MOV_UI, TMP1, 0, SLJIT_MEM1(STACK_TOP), (sljit_sw)PRIV(ucp_gbtable));
5449 OP1(SLJIT_MOV, STACK_TOP, 0, TMP2, 0);
5450 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
5451 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
5452 JUMPTO(SLJIT_NOT_ZERO, label);
5453
5454 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
5455 JUMPHERE(jump[0]);
5456 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
5457
5458 if (common->mode == JIT_PARTIAL_HARD_COMPILE)
5459 {
5460 jump[0] = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
5461 /* Since we successfully read a char above, partial matching must occure. */
5462 check_partial(common, TRUE);
5463 JUMPHERE(jump[0]);
5464 }
5465 return cc;
5466 #endif
5467
5468 case OP_EODN:
5469 /* Requires rather complex checks. */
5470 jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5471 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
5472 {
5473 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
5474 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5475 if (common->mode == JIT_COMPILE)
5476 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0));
5477 else
5478 {
5479 jump[1] = CMP(SLJIT_EQUAL, TMP2, 0, STR_END, 0);
5480 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
5481 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS);
5482 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
5483 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_NOT_EQUAL);
5484 add_jump(compiler, backtracks, JUMP(SLJIT_NOT_EQUAL));
5485 check_partial(common, TRUE);
5486 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5487 JUMPHERE(jump[1]);
5488 }
5489 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
5490 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
5491 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
5492 }
5493 else if (common->nltype == NLTYPE_FIXED)
5494 {
5495 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5496 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5497 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0));
5498 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
5499 }
5500 else
5501 {
5502 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5503 jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
5504 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
5505 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
5506 jump[2] = JUMP(SLJIT_GREATER);
5507 add_jump(compiler, backtracks, JUMP(SLJIT_LESS));
5508 /* Equal. */
5509 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
5510 jump[3] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
5511 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5512
5513 JUMPHERE(jump[1]);
5514 if (common->nltype == NLTYPE_ANYCRLF)
5515 {
5516 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5517 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, STR_END, 0));
5518 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
5519 }
5520 else
5521 {
5522 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, STR_PTR, 0);
5523 read_char_range(common, common->nlmin, common->nlmax, TRUE);
5524 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
5525 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
5526 add_jump(compiler, backtracks, JUMP(SLJIT_ZERO));
5527 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
5528 }
5529 JUMPHERE(jump[2]);
5530 JUMPHERE(jump[3]);
5531 }
5532 JUMPHERE(jump[0]);
5533 check_partial(common, FALSE);
5534 return cc;
5535
5536 case OP_EOD:
5537 add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
5538 check_partial(common, FALSE);
5539 return cc;
5540
5541 case OP_CIRC:
5542 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
5543 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
5544 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0));
5545 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
5546 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5547 return cc;
5548
5549 case OP_CIRCM:
5550 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
5551 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
5552 jump[1] = CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0);
5553 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
5554 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5555 jump[0] = JUMP(SLJIT_JUMP);
5556 JUMPHERE(jump[1]);
5557
5558 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
5559 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
5560 {
5561 OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
5562 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, TMP1, 0));
5563 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
5564 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
5565 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
5566 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
5567 }
5568 else
5569 {
5570 skip_char_back(common);
5571 read_char_range(common, common->nlmin, common->nlmax, TRUE);
5572 check_newlinechar(common, common->nltype, backtracks, FALSE);
5573 }
5574 JUMPHERE(jump[0]);
5575 return cc;
5576
5577 case OP_DOLL:
5578 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
5579 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
5580 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5581
5582 if (!common->endonly)
5583 compile_char1_matchingpath(common, OP_EODN, cc, backtracks);
5584 else
5585 {
5586 add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
5587 check_partial(common, FALSE);
5588 }
5589 return cc;
5590
5591 case OP_DOLLM:
5592 jump[1] = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
5593 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
5594 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
5595 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5596 check_partial(common, FALSE);
5597 jump[0] = JUMP(SLJIT_JUMP);
5598 JUMPHERE(jump[1]);
5599
5600 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
5601 {
5602 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
5603 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5604 if (common->mode == JIT_COMPILE)
5605 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, TMP2, 0, STR_END, 0));
5606 else
5607 {
5608 jump[1] = CMP(SLJIT_LESS_EQUAL, TMP2, 0, STR_END, 0);
5609 /* STR_PTR = STR_END - IN_UCHARS(1) */
5610 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
5611 check_partial(common, TRUE);
5612 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5613 JUMPHERE(jump[1]);
5614 }
5615
5616 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
5617 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
5618 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
5619 }
5620 else
5621 {
5622 peek_char(common, common->nlmax);
5623 check_newlinechar(common, common->nltype, backtracks, FALSE);
5624 }
5625 JUMPHERE(jump[0]);
5626 return cc;
5627
5628 case OP_CHAR:
5629 case OP_CHARI:
5630 length = 1;
5631 #ifdef SUPPORT_UTF
5632 if (common->utf && HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc);
5633 #endif
5634 if (common->mode == JIT_COMPILE && (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0))
5635 {
5636 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
5637 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
5638
5639 context.length = IN_UCHARS(length);
5640 context.sourcereg = -1;
5641 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
5642 context.ucharptr = 0;
5643 #endif
5644 return byte_sequence_compare(common, type == OP_CHARI, cc, &context, backtracks);
5645 }
5646
5647 detect_partial_match(common, backtracks);
5648 #ifdef SUPPORT_UTF
5649 if (common->utf)
5650 {
5651 GETCHAR(c, cc);
5652 }
5653 else
5654 #endif
5655 c = *cc;
5656
5657 if (type == OP_CHAR || !char_has_othercase(common, cc))
5658 {
5659 read_char_range(common, c, c, FALSE);
5660 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c));
5661 return cc + length;
5662 }
5663 oc = char_othercase(common, c);
5664 read_char_range(common, c < oc ? c : oc, c > oc ? c : oc, FALSE);
5665 bit = c ^ oc;
5666 if (is_powerof2(bit))
5667 {
5668 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
5669 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
5670 return cc + length;
5671 }
5672 jump[0] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c);
5673 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, oc));
5674 JUMPHERE(jump[0]);
5675 return cc + length;
5676
5677 case OP_NOT:
5678 case OP_NOTI:
5679 detect_partial_match(common, backtracks);
5680 length = 1;
5681 #ifdef SUPPORT_UTF
5682 if (common->utf)
5683 {
5684 #ifdef COMPILE_PCRE8
5685 c = *cc;
5686 if (c < 128)
5687 {
5688 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5689 if (type == OP_NOT || !char_has_othercase(common, cc))
5690 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
5691 else
5692 {
5693 /* Since UTF8 code page is fixed, we know that c is in [a-z] or [A-Z] range. */
5694 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x20);
5695 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20));
5696 }
5697 /* Skip the variable-length character. */
5698 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5699 jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
5700 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
5701 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5702 JUMPHERE(jump[0]);
5703 return cc + 1;
5704 }
5705 else
5706 #endif /* COMPILE_PCRE8 */
5707 {
5708 GETCHARLEN(c, cc, length);
5709 }
5710 }
5711 else
5712 #endif /* SUPPORT_UTF */
5713 c = *cc;
5714
5715 if (type == OP_NOT || !char_has_othercase(common, cc))
5716 {
5717 read_char_range(common, c, c, TRUE);
5718 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
5719 }
5720 else
5721 {
5722 oc = char_othercase(common, c);
5723 read_char_range(common, c < oc ? c : oc, c > oc ? c : oc, TRUE);
5724 bit = c ^ oc;
5725 if (is_powerof2(bit))
5726 {
5727 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
5728 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
5729 }
5730 else
5731 {
5732 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
5733 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, oc));
5734 }
5735 }
5736 return cc + length;
5737
5738 case OP_CLASS:
5739 case OP_NCLASS:
5740 detect_partial_match(common, backtracks);
5741
5742 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5743 bit = (common->utf && is_char7_bitset((const pcre_uint8 *)cc, type == OP_NCLASS)) ? 127 : 255;
5744 read_char_range(common, 0, bit, type == OP_NCLASS);
5745 #else
5746 read_char_range(common, 0, 255, type == OP_NCLASS);
5747 #endif
5748
5749 if (check_class_ranges(common, (const pcre_uint8 *)cc, type == OP_NCLASS, FALSE, backtracks))
5750 return cc + 32 / sizeof(pcre_uchar);
5751
5752 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5753 jump[0] = NULL;
5754 if (common->utf)
5755 {
5756 jump[0] = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, bit);
5757 if (type == OP_CLASS)
5758 {
5759 add_jump(compiler, backtracks, jump[0]);
5760 jump[0] = NULL;
5761 }
5762 }
5763 #elif !defined COMPILE_PCRE8
5764 jump[0] = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
5765 if (type == OP_CLASS)
5766 {
5767 add_jump(compiler, backtracks, jump[0]);
5768 jump[0] = NULL;
5769 }
5770 #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
5771
5772 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
5773 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
5774 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
5775 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
5776 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
5777 add_jump(compiler, backtracks, JUMP(SLJIT_ZERO));
5778
5779 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
5780 if (jump[0] != NULL)
5781 JUMPHERE(jump[0]);
5782 #endif
5783
5784 return cc + 32 / sizeof(pcre_uchar);
5785
5786 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5787 case OP_XCLASS:
5788 compile_xclass_matchingpath(common, cc + LINK_SIZE, backtracks);
5789 return cc + GET(cc, 0) - 1;
5790 #endif
5791
5792 case OP_REVERSE:
5793 length = GET(cc, 0);
5794 if (length == 0)
5795 return cc + LINK_SIZE;
5796 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5797 #ifdef SUPPORT_UTF
5798 if (common->utf)
5799 {
5800 OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
5801 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, length);
5802 label = LABEL();
5803 add_jump(compiler, backtracks, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP3, 0));
5804 skip_char_back(common);
5805 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
5806 JUMPTO(SLJIT_NOT_ZERO, label);
5807 }
5808 else
5809 #endif
5810 {
5811 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
5812 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
5813 add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, TMP1, 0));
5814 }
5815 check_start_used_ptr(common);
5816 return cc + LINK_SIZE;
5817 }
5818 SLJIT_ASSERT_STOP();
5819 return cc;
5820 }
5821
5822 static SLJIT_INLINE pcre_uchar *compile_charn_matchingpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, jump_list **backtracks)
5823 {
5824 /* This function consumes at least one input character. */
5825 /* To decrease the number of length checks, we try to concatenate the fixed length character sequences. */
5826 DEFINE_COMPILER;
5827 pcre_uchar *ccbegin = cc;
5828 compare_context context;
5829 int size;
5830
5831 context.length = 0;
5832 do
5833 {
5834 if (cc >= ccend)
5835 break;
5836
5837 if (*cc == OP_CHAR)
5838 {
5839 size = 1;
5840 #ifdef SUPPORT_UTF
5841 if (common->utf && HAS_EXTRALEN(cc[1]))
5842 size += GET_EXTRALEN(cc[1]);
5843 #endif
5844 }
5845 else if (*cc == OP_CHARI)
5846 {
5847 size = 1;
5848 #ifdef SUPPORT_UTF
5849 if (common->utf)
5850 {
5851 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
5852 size = 0;
5853 else if (HAS_EXTRALEN(cc[1]))
5854 size += GET_EXTRALEN(cc[1]);
5855 }
5856 else
5857 #endif
5858 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
5859 size = 0;
5860 }
5861 else
5862 size = 0;
5863
5864 cc += 1 + size;
5865 context.length += IN_UCHARS(size);
5866 }
5867 while (size > 0 && context.length <= 128);
5868
5869 cc = ccbegin;
5870 if (context.length > 0)
5871 {
5872 /* We have a fixed-length byte sequence. */
5873 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, context.length);
5874 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
5875
5876 context.sourcereg = -1;
5877 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
5878 context.ucharptr = 0;
5879 #endif
5880 do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, backtracks); while (context.length > 0);
5881 return cc;
5882 }
5883
5884 /* A non-fixed length character will be checked if length == 0. */
5885 return compile_char1_matchingpath(common, *cc, cc + 1, backtracks);
5886 }
5887
5888 /* Forward definitions. */
5889 static void compile_matchingpath(compiler_common *, pcre_uchar *, pcre_uchar *, backtrack_common *);
5890 static void compile_backtrackingpath(compiler_common *, struct backtrack_common *);
5891
5892 #define PUSH_BACKTRACK(size, ccstart, error) \
5893 do \
5894 { \
5895 backtrack = sljit_alloc_memory(compiler, (size)); \
5896 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
5897 return error; \
5898 memset(backtrack, 0, size); \
5899 backtrack->prev = parent->top; \
5900 backtrack->cc = (ccstart); \
5901 parent->top = backtrack; \
5902 } \
5903 while (0)
5904
5905 #define PUSH_BACKTRACK_NOVALUE(size, ccstart) \
5906 do \
5907 { \
5908 backtrack = sljit_alloc_memory(compiler, (size)); \
5909 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
5910 return; \
5911 memset(backtrack, 0, size); \
5912 backtrack->prev = parent->top; \
5913 backtrack->cc = (ccstart); \
5914 parent->top = backtrack; \
5915 } \
5916 while (0)
5917
5918 #define BACKTRACK_AS(type) ((type *)backtrack)
5919
5920 static void compile_dnref_search(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
5921 {
5922 /* The OVECTOR offset goes to TMP2. */
5923 DEFINE_COMPILER;
5924 int count = GET2(cc, 1 + IMM2_SIZE);
5925 pcre_uchar *slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
5926 unsigned int offset;
5927 jump_list *found = NULL;
5928
5929 SLJIT_ASSERT(*cc == OP_DNREF || *cc == OP_DNREFI);
5930
5931 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
5932
5933 count--;
5934 while (count-- > 0)
5935 {
5936 offset = GET2(slot, 0) << 1;
5937 GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
5938 add_jump(compiler, &found, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0));
5939 slot += common->name_entry_size;
5940 }
5941
5942 offset = GET2(slot, 0) << 1;
5943 GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
5944 if (backtracks != NULL && !common->jscript_compat)
5945 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0));
5946
5947 set_jumps(found, LABEL());
5948 }
5949
5950 static void compile_ref_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks, BOOL withchecks, BOOL emptyfail)
5951 {
5952 DEFINE_COMPILER;
5953 BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
5954 int offset = 0;
5955 struct sljit_jump *jump = NULL;
5956 struct sljit_jump *partial;
5957 struct sljit_jump *nopartial;
5958
5959 if (ref)
5960 {
5961 offset = GET2(cc, 1) << 1;
5962 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
5963 /* OVECTOR(1) contains the "string begin - 1" constant. */
5964 if (withchecks && !common->jscript_compat)
5965 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
5966 }
5967 else
5968 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
5969
5970 #if defined SUPPORT_UTF && defined SUPPORT_UCP
5971 if (common->utf && *cc == OP_REFI)
5972 {
5973 SLJIT_ASSERT(TMP1 == SLJIT_R0 && STACK_TOP == SLJIT_R1 && TMP2 == SLJIT_R2);
5974 if (ref)
5975 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
5976 else
5977 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
5978
5979 if (withchecks)
5980 jump = CMP(SLJIT_EQUAL, TMP1, 0, TMP2, 0);
5981
5982 /* Needed to save important temporary registers. */
5983 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STACK_TOP, 0);
5984 OP1(SLJIT_MOV, SLJIT_R1, 0, ARGUMENTS, 0);
5985 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, uchar_ptr), STR_PTR, 0);
5986 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_utf_caselesscmp));
5987 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
5988 if (common->mode == JIT_COMPILE)
5989 add_jump(compiler, backtracks, CMP(SLJIT_LESS_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1));
5990 else
5991 {
5992 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
5993 nopartial = CMP(SLJIT_NOT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
5994 check_partial(common, FALSE);
5995 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5996 JUMPHERE(nopartial);
5997 }
5998 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
5999 }
6000 else
6001 #endif /* SUPPORT_UTF && SUPPORT_UCP */
6002 {
6003 if (ref)
6004 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP1, 0);
6005 else
6006 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
6007
6008 if (withchecks)
6009 jump = JUMP(SLJIT_ZERO);
6010
6011 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
6012 partial = CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0);
6013 if (common->mode == JIT_COMPILE)
6014 add_jump(compiler, backtracks, partial);
6015
6016 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
6017 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
6018
6019 if (common->mode != JIT_COMPILE)
6020 {
6021 nopartial = JUMP(SLJIT_JUMP);
6022 JUMPHERE(partial);
6023 /* TMP2 -= STR_END - STR_PTR */
6024 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, STR_PTR, 0);
6025 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, STR_END, 0);
6026 partial = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0);
6027 OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
6028 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
6029 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
6030 JUMPHERE(partial);
6031 check_partial(common, FALSE);
6032 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
6033 JUMPHERE(nopartial);
6034 }
6035 }
6036
6037 if (jump != NULL)
6038 {
6039 if (emptyfail)
6040 add_jump(compiler, backtracks, jump);
6041 else
6042 JUMPHERE(jump);
6043 }
6044 }
6045
6046 static SLJIT_INLINE pcre_uchar *compile_ref_iterator_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6047 {
6048 DEFINE_COMPILER;
6049 BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
6050 backtrack_common *backtrack;
6051 pcre_uchar type;
6052 int offset = 0;
6053 struct sljit_label *label;
6054 struct sljit_jump *zerolength;
6055 struct sljit_jump *jump = NULL;
6056 pcre_uchar *ccbegin = cc;
6057 int min = 0, max = 0;
6058 BOOL minimize;
6059
6060 PUSH_BACKTRACK(sizeof(iterator_backtrack), cc, NULL);
6061
6062 if (ref)
6063 offset = GET2(cc, 1) << 1;
6064 else
6065 cc += IMM2_SIZE;
6066 type = cc[1 + IMM2_SIZE];
6067
6068 SLJIT_COMPILE_ASSERT((OP_CRSTAR & 0x1) == 0, crstar_opcode_must_be_even);
6069 minimize = (type & 0x1) != 0;
6070 switch(type)
6071 {
6072 case OP_CRSTAR:
6073 case OP_CRMINSTAR:
6074 min = 0;
6075 max = 0;
6076 cc += 1 + IMM2_SIZE + 1;
6077 break;
6078 case OP_CRPLUS:
6079 case OP_CRMINPLUS:
6080 min = 1;
6081 max = 0;
6082 cc += 1 + IMM2_SIZE + 1;
6083 break;
6084 case OP_CRQUERY:
6085 case OP_CRMINQUERY:
6086 min = 0;
6087 max = 1;
6088 cc += 1 + IMM2_SIZE + 1;
6089 break;
6090 case OP_CRRANGE:
6091 case OP_CRMINRANGE:
6092 min = GET2(cc, 1 + IMM2_SIZE + 1);
6093 max = GET2(cc, 1 + IMM2_SIZE + 1 + IMM2_SIZE);
6094 cc += 1 + IMM2_SIZE + 1 + 2 * IMM2_SIZE;
6095 break;
6096 default:
6097 SLJIT_ASSERT_STOP();
6098 break;
6099 }
6100
6101 if (!minimize)
6102 {
6103 if (min == 0)
6104 {
6105 allocate_stack(common, 2);
6106 if (ref)
6107 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
6108 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6109 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
6110 /* Temporary release of STR_PTR. */
6111 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
6112 /* Handles both invalid and empty cases. Since the minimum repeat,
6113 is zero the invalid case is basically the same as an empty case. */
6114 if (ref)
6115 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
6116 else
6117 {
6118 compile_dnref_search(common, ccbegin, NULL);
6119 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
6120 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, TMP2, 0);
6121 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
6122 }
6123 /* Restore if not zero length. */
6124 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
6125 }
6126 else
6127 {
6128 allocate_stack(common, 1);
6129 if (ref)
6130 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
6131 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6132 if (ref)
6133 {
6134 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
6135 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
6136 }
6137 else
6138 {
6139 compile_dnref_search(common, ccbegin, &backtrack->topbacktracks);
6140 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
6141 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, TMP2, 0);
6142 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
6143 }
6144 }
6145
6146 if (min > 1 || max > 1)
6147 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, SLJIT_IMM, 0);
6148
6149 label = LABEL();
6150 if (!ref)
6151 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1);
6152 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, FALSE, FALSE);
6153
6154 if (min > 1 || max > 1)
6155 {
6156 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0);
6157 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
6158 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, TMP1, 0);
6159 if (min > 1)
6160 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min, label);
6161 if (max > 1)
6162 {
6163 jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max);
6164 allocate_stack(common, 1);
6165 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6166 JUMPTO(SLJIT_JUMP, label);
6167 JUMPHERE(jump);
6168 }
6169 }
6170
6171 if (max == 0)
6172 {
6173 /* Includes min > 1 case as well. */
6174 allocate_stack(common, 1);
6175 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6176 JUMPTO(SLJIT_JUMP, label);
6177 }
6178
6179 JUMPHERE(zerolength);
6180 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
6181
6182 count_match(common);
6183 return cc;
6184 }
6185
6186 allocate_stack(common, ref ? 2 : 3);
6187 if (ref)
6188 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
6189 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6190 if (type != OP_CRMINSTAR)
6191 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
6192
6193 if (min == 0)
6194 {
6195 /* Handles both invalid and empty cases. Since the minimum repeat,
6196 is zero the invalid case is basically the same as an empty case. */
6197 if (ref)
6198 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
6199 else
6200 {
6201 compile_dnref_search(common, ccbegin, NULL);
6202 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
6203 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
6204 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
6205 }
6206 /* Length is non-zero, we can match real repeats. */
6207 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6208 jump = JUMP(SLJIT_JUMP);
6209 }
6210 else
6211 {
6212 if (ref)
6213 {
6214 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
6215 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
6216 }
6217 else
6218 {
6219 compile_dnref_search(common, ccbegin, &backtrack->topbacktracks);
6220 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
6221 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
6222 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
6223 }
6224 }
6225
6226 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
6227 if (max > 0)
6228 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max));
6229
6230 if (!ref)
6231 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
6232 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, TRUE, TRUE);
6233 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6234
6235 if (min > 1)
6236 {
6237 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
6238 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
6239 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
6240 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min, BACKTRACK_AS(iterator_backtrack)->matchingpath);
6241 }
6242 else if (max > 0)
6243 OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
6244
6245 if (jump != NULL)
6246 JUMPHERE(jump);
6247 JUMPHERE(zerolength);
6248
6249 count_match(common);
6250 return cc;
6251 }
6252
6253 static SLJIT_INLINE pcre_uchar *compile_recurse_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6254 {
6255 DEFINE_COMPILER;
6256 backtrack_common *backtrack;
6257 recurse_entry *entry = common->entries;
6258 recurse_entry *prev = NULL;
6259 sljit_sw start = GET(cc, 1);
6260 pcre_uchar *start_cc;
6261 BOOL needs_control_head;
6262
6263 PUSH_BACKTRACK(sizeof(recurse_backtrack), cc, NULL);
6264
6265 /* Inlining simple patterns. */
6266 if (get_framesize(common, common->start + start, NULL, TRUE, &needs_control_head) == no_stack)
6267 {
6268 start_cc = common->start + start;
6269 compile_matchingpath(common, next_opcode(common, start_cc), bracketend(start_cc) - (1 + LINK_SIZE), backtrack);
6270 BACKTRACK_AS(recurse_backtrack)->inlined_pattern = TRUE;
6271 return cc + 1 + LINK_SIZE;
6272 }
6273
6274 while (entry != NULL)
6275 {
6276 if (entry->start == start)
6277 break;
6278 prev = entry;
6279 entry = entry->next;
6280 }
6281
6282 if (entry == NULL)
6283 {
6284 entry = sljit_alloc_memory(compiler, sizeof(recurse_entry));
6285 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6286 return NULL;
6287 entry->next = NULL;
6288 entry->entry = NULL;
6289 entry->calls = NULL;
6290 entry->start = start;
6291
6292 if (prev != NULL)
6293 prev->next = entry;
6294 else
6295 common->entries = entry;
6296 }
6297
6298 if (common->has_set_som && common->mark_ptr != 0)
6299 {
6300 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
6301 allocate_stack(common, 2);
6302 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
6303 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
6304 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
6305 }
6306 else if (common->has_set_som || common->mark_ptr != 0)
6307 {
6308 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->has_set_som ? (int)(OVECTOR(0)) : common->mark_ptr);
6309 allocate_stack(common, 1);
6310 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
6311 }
6312
6313 if (entry->entry == NULL)
6314 add_jump(compiler, &entry->calls, JUMP(SLJIT_FAST_CALL));
6315 else
6316 JUMPTO(SLJIT_FAST_CALL, entry->entry);
6317 /* Leave if the match is failed. */
6318 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0));
6319 return cc + 1 + LINK_SIZE;
6320 }
6321
6322 static int SLJIT_CALL do_callout(struct jit_arguments *arguments, PUBL(callout_block) *callout_block, pcre_uchar **jit_ovector)
6323 {
6324 const pcre_uchar *begin = arguments->begin;
6325 int *offset_vector = arguments->offsets;
6326 int offset_count = arguments->offset_count;
6327 int i;
6328
6329 if (PUBL(callout) == NULL)
6330 return 0;
6331
6332 callout_block->version = 2;
6333 callout_block->callout_data = arguments->callout_data;
6334
6335 /* Offsets in subject. */
6336 callout_block->subject_length = arguments->end - arguments->begin;
6337 callout_block->start_match = (pcre_uchar*)callout_block->subject - arguments->begin;
6338 callout_block->current_position = (pcre_uchar*)callout_block->offset_vector - arguments->begin;
6339 #if defined COMPILE_PCRE8
6340 callout_block->subject = (PCRE_SPTR)begin;
6341 #elif defined COMPILE_PCRE16
6342 callout_block->subject = (PCRE_SPTR16)begin;
6343 #elif defined COMPILE_PCRE32
6344 callout_block->subject = (PCRE_SPTR32)begin;
6345 #endif
6346
6347 /* Convert and copy the JIT offset vector to the offset_vector array. */
6348 callout_block->capture_top = 0;
6349 callout_block->offset_vector = offset_vector;
6350 for (i = 2; i < offset_count; i += 2)
6351 {
6352 offset_vector[i] = jit_ovector[i] - begin;
6353 offset_vector[i + 1] = jit_ovector[i + 1] - begin;
6354 if (jit_ovector[i] >= begin)
6355 callout_block->capture_top = i;
6356 }
6357
6358 callout_block->capture_top = (callout_block->capture_top >> 1) + 1;
6359 if (offset_count > 0)
6360 offset_vector[0] = -1;
6361 if (offset_count > 1)
6362 offset_vector[1] = -1;
6363 return (*PUBL(callout))(callout_block);
6364 }
6365
6366 /* Aligning to 8 byte. */
6367 #define CALLOUT_ARG_SIZE \
6368 (((int)sizeof(PUBL(callout_block)) + 7) & ~7)
6369
6370 #define CALLOUT_ARG_OFFSET(arg) \
6371 (-CALLOUT_ARG_SIZE + SLJIT_OFFSETOF(PUBL(callout_block), arg))
6372
6373 static SLJIT_INLINE pcre_uchar *compile_callout_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6374 {
6375 DEFINE_COMPILER;
6376 backtrack_common *backtrack;
6377
6378 PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
6379
6380 allocate_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw));
6381
6382 SLJIT_ASSERT(common->capture_last_ptr != 0);
6383 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
6384 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6385 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_number), SLJIT_IMM, cc[1]);
6386 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_last), TMP2, 0);
6387
6388 /* These pointer sized fields temporarly stores internal variables. */
6389 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
6390 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(offset_vector), STR_PTR, 0);
6391 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(subject), TMP2, 0);
6392
6393 if (common->mark_ptr != 0)
6394 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr));
6395 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(pattern_position), SLJIT_IMM, GET(cc, 2));
6396 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(next_item_length), SLJIT_IMM, GET(cc, 2 + LINK_SIZE));
6397 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(mark), (common->mark_ptr != 0) ? TMP2 : SLJIT_IMM, 0);
6398
6399 /* Needed to save important temporary registers. */
6400 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STACK_TOP, 0);
6401 OP2(SLJIT_SUB, SLJIT_R1, 0, STACK_TOP, 0, SLJIT_IMM, CALLOUT_ARG_SIZE);
6402 GET_LOCAL_BASE(SLJIT_R2, 0, OVECTOR_START);
6403 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_callout));
6404 OP1(SLJIT_MOV_SI, SLJIT_RETURN_REG, 0, SLJIT_RETURN_REG, 0);
6405 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6406 free_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw));
6407
6408 /* Check return value. */
6409 OP2(SLJIT_SUB | SLJIT_SET_S, SLJIT_UNUSED, 0, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
6410 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_SIG_GREATER));
6411 if (common->forced_quit_label == NULL)
6412 add_jump(compiler, &common->forced_quit, JUMP(SLJIT_SIG_LESS));
6413 else
6414 JUMPTO(SLJIT_SIG_LESS, common->forced_quit_label);
6415 return cc + 2 + 2 * LINK_SIZE;
6416 }
6417
6418 #undef CALLOUT_ARG_SIZE
6419 #undef CALLOUT_ARG_OFFSET
6420
6421 static pcre_uchar *compile_assert_matchingpath(compiler_common *common, pcre_uchar *cc, assert_backtrack *backtrack, BOOL conditional)
6422 {
6423 DEFINE_COMPILER;
6424 int framesize;
6425 int extrasize;
6426 BOOL needs_control_head;
6427 int private_data_ptr;
6428 backtrack_common altbacktrack;
6429 pcre_uchar *ccbegin;
6430 pcre_uchar opcode;
6431 pcre_uchar bra = OP_BRA;
6432 jump_list *tmp = NULL;
6433 jump_list **target = (conditional) ? &backtrack->condfailed : &backtrack->common.topbacktracks;
6434 jump_list **found;
6435 /* Saving previous accept variables. */
6436 BOOL save_local_exit = common->local_exit;
6437 BOOL save_positive_assert = common->positive_assert;
6438 then_trap_backtrack *save_then_trap = common->then_trap;
6439 struct sljit_label *save_quit_label = common->quit_label;
6440 struct sljit_label *save_accept_label = common->accept_label;
6441 jump_list *save_quit = common->quit;
6442 jump_list *save_positive_assert_quit = common->positive_assert_quit;
6443 jump_list *save_accept = common->accept;
6444 struct sljit_jump *jump;
6445 struct sljit_jump *brajump = NULL;
6446
6447 /* Assert captures then. */
6448 common->then_trap = NULL;
6449
6450 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
6451 {
6452 SLJIT_ASSERT(!conditional);
6453 bra = *cc;
6454 cc++;
6455 }
6456 private_data_ptr = PRIVATE_DATA(cc);
6457 SLJIT_ASSERT(private_data_ptr != 0);
6458 framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
6459 backtrack->framesize = framesize;
6460 backtrack->private_data_ptr = private_data_ptr;
6461 opcode = *cc;
6462 SLJIT_ASSERT(opcode >= OP_ASSERT && opcode <= OP_ASSERTBACK_NOT);
6463 found = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) ? &tmp : target;
6464 ccbegin = cc;
6465 cc += GET(cc, 1);
6466
6467 if (bra == OP_BRAMINZERO)
6468 {
6469 /* This is a braminzero backtrack path. */
6470 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6471 free_stack(common, 1);
6472 brajump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
6473 }
6474
6475 if (framesize < 0)
6476 {
6477 extrasize = needs_control_head ? 2 : 1;
6478 if (framesize == no_frame)
6479 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
6480 allocate_stack(common, extrasize);
6481 if (needs_control_head)
6482 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
6483 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6484 if (needs_control_head)
6485 {
6486 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
6487 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
6488 }
6489 }
6490 else
6491 {
6492 extrasize = needs_control_head ? 3 : 2;
6493 allocate_stack(common, framesize + extrasize);
6494 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
6495 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw));
6496 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
6497 if (needs_control_head)
6498 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
6499 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6500 if (needs_control_head)
6501 {
6502 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
6503 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
6504 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
6505 }
6506 else
6507 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
6508 init_frame(common, ccbegin, NULL, framesize + extrasize - 1, extrasize, FALSE);
6509 }
6510
6511 memset(&altbacktrack, 0, sizeof(backtrack_common));
6512 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6513 {
6514 /* Negative assert is stronger than positive assert. */
6515 common->local_exit = TRUE;
6516 common->quit_label = NULL;
6517 common->quit = NULL;
6518 common->positive_assert = FALSE;
6519 }
6520 else
6521 common->positive_assert = TRUE;
6522 common->positive_assert_quit = NULL;
6523
6524 while (1)
6525 {
6526 common->accept_label = NULL;
6527 common->accept = NULL;
6528 altbacktrack.top = NULL;
6529 altbacktrack.topbacktracks = NULL;
6530
6531 if (*ccbegin == OP_ALT)
6532 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6533
6534 altbacktrack.cc = ccbegin;
6535 compile_matchingpath(common, ccbegin + 1 + LINK_SIZE, cc, &altbacktrack);
6536 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6537 {
6538 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6539 {
6540 common->local_exit = save_local_exit;
6541 common->quit_label = save_quit_label;
6542 common->quit = save_quit;
6543 }
6544 common->positive_assert = save_positive_assert;
6545 common->then_trap = save_then_trap;
6546 common->accept_label = save_accept_label;
6547 common->positive_assert_quit = save_positive_assert_quit;
6548 common->accept = save_accept;
6549 return NULL;
6550 }
6551 common->accept_label = LABEL();
6552 if (common->accept != NULL)
6553 set_jumps(common->accept, common->accept_label);
6554
6555 /* Reset stack. */
6556 if (framesize < 0)
6557 {
6558 if (framesize == no_frame)
6559 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
6560 else
6561 free_stack(common, extrasize);
6562 if (needs_control_head)
6563 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), 0);
6564 }
6565 else
6566 {
6567 if ((opcode != OP_ASSERT_NOT && opcode != OP_ASSERTBACK_NOT) || conditional)
6568 {
6569 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
6570 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
6571 if (needs_control_head)
6572 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), 0);
6573 }
6574 else
6575 {
6576 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
6577 if (needs_control_head)
6578 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), (framesize + 1) * sizeof(sljit_sw));
6579 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6580 }
6581 }
6582
6583 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6584 {
6585 /* We know that STR_PTR was stored on the top of the stack. */
6586 if (conditional)
6587 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), needs_control_head ? sizeof(sljit_sw) : 0);
6588 else if (bra == OP_BRAZERO)
6589 {
6590 if (framesize < 0)
6591 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 1) * sizeof(sljit_sw));
6592 else
6593 {
6594 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
6595 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (framesize + extrasize - 1) * sizeof(sljit_sw));
6596 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
6597 }
6598 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
6599 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6600 }
6601 else if (framesize >= 0)
6602 {
6603 /* For OP_BRA and OP_BRAMINZERO. */
6604 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
6605 }
6606 }
6607 add_jump(compiler, found, JUMP(SLJIT_JUMP));
6608
6609 compile_backtrackingpath(common, altbacktrack.top);
6610 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6611 {
6612 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6613 {
6614 common->local_exit = save_local_exit;
6615 common->quit_label = save_quit_label;
6616 common->quit = save_quit;
6617 }
6618 common->positive_assert = save_positive_assert;
6619 common->then_trap = save_then_trap;
6620 common->accept_label = save_accept_label;
6621 common->positive_assert_quit = save_positive_assert_quit;
6622 common->accept = save_accept;
6623 return NULL;
6624 }
6625 set_jumps(altbacktrack.topbacktracks, LABEL());
6626
6627 if (*cc != OP_ALT)
6628 break;
6629
6630 ccbegin = cc;
6631 cc += GET(cc, 1);
6632 }
6633
6634 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6635 {
6636 SLJIT_ASSERT(common->positive_assert_quit == NULL);
6637 /* Makes the check less complicated below. */
6638 common->positive_assert_quit = common->quit;
6639 }
6640
6641 /* None of them matched. */
6642 if (common->positive_assert_quit != NULL)
6643 {
6644 jump = JUMP(SLJIT_JUMP);
6645 set_jumps(common->positive_assert_quit, LABEL());
6646 SLJIT_ASSERT(framesize != no_stack);
6647 if (framesize < 0)
6648 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, extrasize * sizeof(sljit_sw));
6649 else
6650 {
6651 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
6652 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6653 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw));
6654 }
6655 JUMPHERE(jump);
6656 }
6657
6658 if (needs_control_head)
6659 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(1));
6660
6661 if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK)
6662 {
6663 /* Assert is failed. */
6664 if (conditional || bra == OP_BRAZERO)
6665 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6666
6667 if (framesize < 0)
6668 {
6669 /* The topmost item should be 0. */
6670 if (bra == OP_BRAZERO)
6671 {
6672 if (extrasize == 2)
6673 free_stack(common, 1);
6674 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6675 }
6676 else
6677 free_stack(common, extrasize);
6678 }
6679 else
6680 {
6681 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
6682 /* The topmost item should be 0. */
6683 if (bra == OP_BRAZERO)
6684 {
6685 free_stack(common, framesize + extrasize - 1);
6686 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6687 }
6688 else
6689 free_stack(common, framesize + extrasize);
6690 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
6691 }
6692 jump = JUMP(SLJIT_JUMP);
6693 if (bra != OP_BRAZERO)
6694 add_jump(compiler, target, jump);
6695
6696 /* Assert is successful. */
6697 set_jumps(tmp, LABEL());
6698 if (framesize < 0)
6699 {
6700 /* We know that STR_PTR was stored on the top of the stack. */
6701 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 1) * sizeof(sljit_sw));
6702 /* Keep the STR_PTR on the top of the stack. */
6703 if (bra == OP_BRAZERO)
6704 {
6705 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
6706 if (extrasize == 2)
6707 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6708 }
6709 else if (bra == OP_BRAMINZERO)
6710 {
6711 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
6712 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6713 }
6714 }
6715 else
6716 {
6717 if (bra == OP_BRA)
6718 {
6719 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
6720 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
6721 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 2) * sizeof(sljit_sw));
6722 }
6723 else
6724 {
6725 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
6726 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 2) * sizeof(sljit_sw));
6727 if (extrasize == 2)
6728 {
6729 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6730 if (bra == OP_BRAMINZERO)
6731 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6732 }
6733 else
6734 {
6735 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
6736 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), bra == OP_BRAZERO ? STR_PTR : SLJIT_IMM, 0);
6737 }
6738 }
6739 }
6740
6741 if (bra == OP_BRAZERO)
6742 {
6743 backtrack->matchingpath = LABEL();
6744 SET_LABEL(jump, backtrack->matchingpath);
6745 }
6746 else if (bra == OP_BRAMINZERO)
6747 {
6748 JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
6749 JUMPHERE(brajump);
6750 if (framesize >= 0)
6751 {
6752 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
6753 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6754 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
6755 }
6756 set_jumps(backtrack->common.topbacktracks, LABEL());
6757 }
6758 }
6759 else
6760 {
6761 /* AssertNot is successful. */
6762 if (framesize < 0)
6763 {
6764 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6765 if (bra != OP_BRA)
6766 {
6767 if (extrasize == 2)
6768 free_stack(common, 1);
6769 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6770 }
6771 else
6772 free_stack(common, extrasize);
6773 }
6774 else
6775 {
6776 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6777