/[pcre]/code/trunk/pcre_jit_compile.c
ViewVC logotype

Contents of /code/trunk/pcre_jit_compile.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1415 - (show annotations)
Sun Dec 22 20:47:08 2013 UTC (5 years, 10 months ago) by zherczeg
File MIME type: text/plain
File size: 314167 byte(s)
The auto-possessification of character sets were improved. The JIT compiler also optimizes more character set checks.
1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Written by Philip Hazel
9 Copyright (c) 1997-2013 University of Cambridge
10
11 The machine code generator part (this module) was written by Zoltan Herczeg
12 Copyright (c) 2010-2013
13
14 -----------------------------------------------------------------------------
15 Redistribution and use in source and binary forms, with or without
16 modification, are permitted provided that the following conditions are met:
17
18 * Redistributions of source code must retain the above copyright notice,
19 this list of conditions and the following disclaimer.
20
21 * Redistributions in binary form must reproduce the above copyright
22 notice, this list of conditions and the following disclaimer in the
23 documentation and/or other materials provided with the distribution.
24
25 * Neither the name of the University of Cambridge nor the names of its
26 contributors may be used to endorse or promote products derived from
27 this software without specific prior written permission.
28
29 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 POSSIBILITY OF SUCH DAMAGE.
40 -----------------------------------------------------------------------------
41 */
42
43 #ifdef HAVE_CONFIG_H
44 #include "config.h"
45 #endif
46
47 #include "pcre_internal.h"
48
49 #if defined SUPPORT_JIT
50
51 /* All-in-one: Since we use the JIT compiler only from here,
52 we just include it. This way we don't need to touch the build
53 system files. */
54
55 #define SLJIT_MALLOC(size) (PUBL(malloc))(size)
56 #define SLJIT_FREE(ptr) (PUBL(free))(ptr)
57 #define SLJIT_CONFIG_AUTO 1
58 #define SLJIT_CONFIG_STATIC 1
59 #define SLJIT_VERBOSE 0
60 #define SLJIT_DEBUG 0
61
62 #include "sljit/sljitLir.c"
63
64 #if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED
65 #error Unsupported architecture
66 #endif
67
68 /* Defines for debugging purposes. */
69
70 /* 1 - Use unoptimized capturing brackets.
71 2 - Enable capture_last_ptr (includes option 1). */
72 /* #define DEBUG_FORCE_UNOPTIMIZED_CBRAS 2 */
73
74 /* 1 - Always have a control head. */
75 /* #define DEBUG_FORCE_CONTROL_HEAD 1 */
76
77 /* Allocate memory for the regex stack on the real machine stack.
78 Fast, but limited size. */
79 #define MACHINE_STACK_SIZE 32768
80
81 /* Growth rate for stack allocated by the OS. Should be the multiply
82 of page size. */
83 #define STACK_GROWTH_RATE 8192
84
85 /* Enable to check that the allocation could destroy temporaries. */
86 #if defined SLJIT_DEBUG && SLJIT_DEBUG
87 #define DESTROY_REGISTERS 1
88 #endif
89
90 /*
91 Short summary about the backtracking mechanism empolyed by the jit code generator:
92
93 The code generator follows the recursive nature of the PERL compatible regular
94 expressions. The basic blocks of regular expressions are condition checkers
95 whose execute different commands depending on the result of the condition check.
96 The relationship between the operators can be horizontal (concatenation) and
97 vertical (sub-expression) (See struct backtrack_common for more details).
98
99 'ab' - 'a' and 'b' regexps are concatenated
100 'a+' - 'a' is the sub-expression of the '+' operator
101
102 The condition checkers are boolean (true/false) checkers. Machine code is generated
103 for the checker itself and for the actions depending on the result of the checker.
104 The 'true' case is called as the matching path (expected path), and the other is called as
105 the 'backtrack' path. Branch instructions are expesive for all CPUs, so we avoid taken
106 branches on the matching path.
107
108 Greedy star operator (*) :
109 Matching path: match happens.
110 Backtrack path: match failed.
111 Non-greedy star operator (*?) :
112 Matching path: no need to perform a match.
113 Backtrack path: match is required.
114
115 The following example shows how the code generated for a capturing bracket
116 with two alternatives. Let A, B, C, D are arbirary regular expressions, and
117 we have the following regular expression:
118
119 A(B|C)D
120
121 The generated code will be the following:
122
123 A matching path
124 '(' matching path (pushing arguments to the stack)
125 B matching path
126 ')' matching path (pushing arguments to the stack)
127 D matching path
128 return with successful match
129
130 D backtrack path
131 ')' backtrack path (If we arrived from "C" jump to the backtrack of "C")
132 B backtrack path
133 C expected path
134 jump to D matching path
135 C backtrack path
136 A backtrack path
137
138 Notice, that the order of backtrack code paths are the opposite of the fast
139 code paths. In this way the topmost value on the stack is always belong
140 to the current backtrack code path. The backtrack path must check
141 whether there is a next alternative. If so, it needs to jump back to
142 the matching path eventually. Otherwise it needs to clear out its own stack
143 frame and continue the execution on the backtrack code paths.
144 */
145
146 /*
147 Saved stack frames:
148
149 Atomic blocks and asserts require reloading the values of private data
150 when the backtrack mechanism performed. Because of OP_RECURSE, the data
151 are not necessarly known in compile time, thus we need a dynamic restore
152 mechanism.
153
154 The stack frames are stored in a chain list, and have the following format:
155 ([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]
156
157 Thus we can restore the private data to a particular point in the stack.
158 */
159
160 typedef struct jit_arguments {
161 /* Pointers first. */
162 struct sljit_stack *stack;
163 const pcre_uchar *str;
164 const pcre_uchar *begin;
165 const pcre_uchar *end;
166 int *offsets;
167 pcre_uchar *uchar_ptr;
168 pcre_uchar *mark_ptr;
169 void *callout_data;
170 /* Everything else after. */
171 pcre_uint32 limit_match;
172 int real_offset_count;
173 int offset_count;
174 pcre_uint8 notbol;
175 pcre_uint8 noteol;
176 pcre_uint8 notempty;
177 pcre_uint8 notempty_atstart;
178 } jit_arguments;
179
180 typedef struct executable_functions {
181 void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES];
182 PUBL(jit_callback) callback;
183 void *userdata;
184 pcre_uint32 top_bracket;
185 pcre_uint32 limit_match;
186 sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];
187 } executable_functions;
188
189 typedef struct jump_list {
190 struct sljit_jump *jump;
191 struct jump_list *next;
192 } jump_list;
193
194 typedef struct stub_list {
195 struct sljit_jump *start;
196 struct sljit_label *quit;
197 struct stub_list *next;
198 } stub_list;
199
200 enum frame_types {
201 no_frame = -1,
202 no_stack = -2
203 };
204
205 enum control_types {
206 type_mark = 0,
207 type_then_trap = 1
208 };
209
210 typedef int (SLJIT_CALL *jit_function)(jit_arguments *args);
211
212 /* The following structure is the key data type for the recursive
213 code generator. It is allocated by compile_matchingpath, and contains
214 the arguments for compile_backtrackingpath. Must be the first member
215 of its descendants. */
216 typedef struct backtrack_common {
217 /* Concatenation stack. */
218 struct backtrack_common *prev;
219 jump_list *nextbacktracks;
220 /* Internal stack (for component operators). */
221 struct backtrack_common *top;
222 jump_list *topbacktracks;
223 /* Opcode pointer. */
224 pcre_uchar *cc;
225 } backtrack_common;
226
227 typedef struct assert_backtrack {
228 backtrack_common common;
229 jump_list *condfailed;
230 /* Less than 0 if a frame is not needed. */
231 int framesize;
232 /* Points to our private memory word on the stack. */
233 int private_data_ptr;
234 /* For iterators. */
235 struct sljit_label *matchingpath;
236 } assert_backtrack;
237
238 typedef struct bracket_backtrack {
239 backtrack_common common;
240 /* Where to coninue if an alternative is successfully matched. */
241 struct sljit_label *alternative_matchingpath;
242 /* For rmin and rmax iterators. */
243 struct sljit_label *recursive_matchingpath;
244 /* For greedy ? operator. */
245 struct sljit_label *zero_matchingpath;
246 /* Contains the branches of a failed condition. */
247 union {
248 /* Both for OP_COND, OP_SCOND. */
249 jump_list *condfailed;
250 assert_backtrack *assert;
251 /* For OP_ONCE. Less than 0 if not needed. */
252 int framesize;
253 } u;
254 /* Points to our private memory word on the stack. */
255 int private_data_ptr;
256 } bracket_backtrack;
257
258 typedef struct bracketpos_backtrack {
259 backtrack_common common;
260 /* Points to our private memory word on the stack. */
261 int private_data_ptr;
262 /* Reverting stack is needed. */
263 int framesize;
264 /* Allocated stack size. */
265 int stacksize;
266 } bracketpos_backtrack;
267
268 typedef struct braminzero_backtrack {
269 backtrack_common common;
270 struct sljit_label *matchingpath;
271 } braminzero_backtrack;
272
273 typedef struct iterator_backtrack {
274 backtrack_common common;
275 /* Next iteration. */
276 struct sljit_label *matchingpath;
277 } iterator_backtrack;
278
279 typedef struct recurse_entry {
280 struct recurse_entry *next;
281 /* Contains the function entry. */
282 struct sljit_label *entry;
283 /* Collects the calls until the function is not created. */
284 jump_list *calls;
285 /* Points to the starting opcode. */
286 sljit_sw start;
287 } recurse_entry;
288
289 typedef struct recurse_backtrack {
290 backtrack_common common;
291 BOOL inlined_pattern;
292 } recurse_backtrack;
293
294 #define OP_THEN_TRAP OP_TABLE_LENGTH
295
296 typedef struct then_trap_backtrack {
297 backtrack_common common;
298 /* If then_trap is not NULL, this structure contains the real
299 then_trap for the backtracking path. */
300 struct then_trap_backtrack *then_trap;
301 /* Points to the starting opcode. */
302 sljit_sw start;
303 /* Exit point for the then opcodes of this alternative. */
304 jump_list *quit;
305 /* Frame size of the current alternative. */
306 int framesize;
307 } then_trap_backtrack;
308
309 #define MAX_RANGE_SIZE 4
310
311 typedef struct compiler_common {
312 /* The sljit ceneric compiler. */
313 struct sljit_compiler *compiler;
314 /* First byte code. */
315 pcre_uchar *start;
316 /* Maps private data offset to each opcode. */
317 sljit_si *private_data_ptrs;
318 /* Tells whether the capturing bracket is optimized. */
319 pcre_uint8 *optimized_cbracket;
320 /* Tells whether the starting offset is a target of then. */
321 pcre_uint8 *then_offsets;
322 /* Current position where a THEN must jump. */
323 then_trap_backtrack *then_trap;
324 /* Starting offset of private data for capturing brackets. */
325 int cbra_ptr;
326 /* Output vector starting point. Must be divisible by 2. */
327 int ovector_start;
328 /* Last known position of the requested byte. */
329 int req_char_ptr;
330 /* Head of the last recursion. */
331 int recursive_head_ptr;
332 /* First inspected character for partial matching. */
333 int start_used_ptr;
334 /* Starting pointer for partial soft matches. */
335 int hit_start;
336 /* End pointer of the first line. */
337 int first_line_end;
338 /* Points to the marked string. */
339 int mark_ptr;
340 /* Recursive control verb management chain. */
341 int control_head_ptr;
342 /* Points to the last matched capture block index. */
343 int capture_last_ptr;
344 /* Points to the starting position of the current match. */
345 int start_ptr;
346
347 /* Flipped and lower case tables. */
348 const pcre_uint8 *fcc;
349 sljit_sw lcc;
350 /* Mode can be PCRE_STUDY_JIT_COMPILE and others. */
351 int mode;
352 /* \K is found in the pattern. */
353 BOOL has_set_som;
354 /* (*SKIP:arg) is found in the pattern. */
355 BOOL has_skip_arg;
356 /* (*THEN) is found in the pattern. */
357 BOOL has_then;
358 /* Needs to know the start position anytime. */
359 BOOL needs_start_ptr;
360 /* Currently in recurse or negative assert. */
361 BOOL local_exit;
362 /* Currently in a positive assert. */
363 BOOL positive_assert;
364 /* Newline control. */
365 int nltype;
366 int newline;
367 int bsr_nltype;
368 /* Dollar endonly. */
369 int endonly;
370 /* Tables. */
371 sljit_sw ctypes;
372 int digits[2 + MAX_RANGE_SIZE];
373 /* Named capturing brackets. */
374 pcre_uchar *name_table;
375 sljit_sw name_count;
376 sljit_sw name_entry_size;
377
378 /* Labels and jump lists. */
379 struct sljit_label *partialmatchlabel;
380 struct sljit_label *quit_label;
381 struct sljit_label *forced_quit_label;
382 struct sljit_label *accept_label;
383 stub_list *stubs;
384 recurse_entry *entries;
385 recurse_entry *currententry;
386 jump_list *partialmatch;
387 jump_list *quit;
388 jump_list *positive_assert_quit;
389 jump_list *forced_quit;
390 jump_list *accept;
391 jump_list *calllimit;
392 jump_list *stackalloc;
393 jump_list *revertframes;
394 jump_list *wordboundary;
395 jump_list *anynewline;
396 jump_list *hspace;
397 jump_list *vspace;
398 jump_list *casefulcmp;
399 jump_list *caselesscmp;
400 jump_list *reset_match;
401 BOOL jscript_compat;
402 #ifdef SUPPORT_UTF
403 BOOL utf;
404 #ifdef SUPPORT_UCP
405 BOOL use_ucp;
406 #endif
407 #ifndef COMPILE_PCRE32
408 jump_list *utfreadchar;
409 #endif
410 #ifdef COMPILE_PCRE8
411 jump_list *utfreadtype8;
412 #endif
413 #endif /* SUPPORT_UTF */
414 #ifdef SUPPORT_UCP
415 jump_list *getucd;
416 #endif
417 } compiler_common;
418
419 /* For byte_sequence_compare. */
420
421 typedef struct compare_context {
422 int length;
423 int sourcereg;
424 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
425 int ucharptr;
426 union {
427 sljit_si asint;
428 sljit_uh asushort;
429 #if defined COMPILE_PCRE8
430 sljit_ub asbyte;
431 sljit_ub asuchars[4];
432 #elif defined COMPILE_PCRE16
433 sljit_uh asuchars[2];
434 #elif defined COMPILE_PCRE32
435 sljit_ui asuchars[1];
436 #endif
437 } c;
438 union {
439 sljit_si asint;
440 sljit_uh asushort;
441 #if defined COMPILE_PCRE8
442 sljit_ub asbyte;
443 sljit_ub asuchars[4];
444 #elif defined COMPILE_PCRE16
445 sljit_uh asuchars[2];
446 #elif defined COMPILE_PCRE32
447 sljit_ui asuchars[1];
448 #endif
449 } oc;
450 #endif
451 } compare_context;
452
453 /* Undefine sljit macros. */
454 #undef CMP
455
456 /* Used for accessing the elements of the stack. */
457 #define STACK(i) ((-(i) - 1) * (int)sizeof(sljit_sw))
458
459 #define TMP1 SLJIT_SCRATCH_REG1
460 #define TMP2 SLJIT_SCRATCH_REG3
461 #define TMP3 SLJIT_TEMPORARY_EREG2
462 #define STR_PTR SLJIT_SAVED_REG1
463 #define STR_END SLJIT_SAVED_REG2
464 #define STACK_TOP SLJIT_SCRATCH_REG2
465 #define STACK_LIMIT SLJIT_SAVED_REG3
466 #define ARGUMENTS SLJIT_SAVED_EREG1
467 #define COUNT_MATCH SLJIT_SAVED_EREG2
468 #define RETURN_ADDR SLJIT_TEMPORARY_EREG1
469
470 /* Local space layout. */
471 /* These two locals can be used by the current opcode. */
472 #define LOCALS0 (0 * sizeof(sljit_sw))
473 #define LOCALS1 (1 * sizeof(sljit_sw))
474 /* Two local variables for possessive quantifiers (char1 cannot use them). */
475 #define POSSESSIVE0 (2 * sizeof(sljit_sw))
476 #define POSSESSIVE1 (3 * sizeof(sljit_sw))
477 /* Max limit of recursions. */
478 #define LIMIT_MATCH (4 * sizeof(sljit_sw))
479 /* The output vector is stored on the stack, and contains pointers
480 to characters. The vector data is divided into two groups: the first
481 group contains the start / end character pointers, and the second is
482 the start pointers when the end of the capturing group has not yet reached. */
483 #define OVECTOR_START (common->ovector_start)
484 #define OVECTOR(i) (OVECTOR_START + (i) * (sljit_sw)sizeof(sljit_sw))
485 #define OVECTOR_PRIV(i) (common->cbra_ptr + (i) * (sljit_sw)sizeof(sljit_sw))
486 #define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start])
487
488 #if defined COMPILE_PCRE8
489 #define MOV_UCHAR SLJIT_MOV_UB
490 #define MOVU_UCHAR SLJIT_MOVU_UB
491 #elif defined COMPILE_PCRE16
492 #define MOV_UCHAR SLJIT_MOV_UH
493 #define MOVU_UCHAR SLJIT_MOVU_UH
494 #elif defined COMPILE_PCRE32
495 #define MOV_UCHAR SLJIT_MOV_UI
496 #define MOVU_UCHAR SLJIT_MOVU_UI
497 #else
498 #error Unsupported compiling mode
499 #endif
500
501 /* Shortcuts. */
502 #define DEFINE_COMPILER \
503 struct sljit_compiler *compiler = common->compiler
504 #define OP1(op, dst, dstw, src, srcw) \
505 sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw))
506 #define OP2(op, dst, dstw, src1, src1w, src2, src2w) \
507 sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w))
508 #define LABEL() \
509 sljit_emit_label(compiler)
510 #define JUMP(type) \
511 sljit_emit_jump(compiler, (type))
512 #define JUMPTO(type, label) \
513 sljit_set_label(sljit_emit_jump(compiler, (type)), (label))
514 #define JUMPHERE(jump) \
515 sljit_set_label((jump), sljit_emit_label(compiler))
516 #define SET_LABEL(jump, label) \
517 sljit_set_label((jump), (label))
518 #define CMP(type, src1, src1w, src2, src2w) \
519 sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))
520 #define CMPTO(type, src1, src1w, src2, src2w, label) \
521 sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))
522 #define OP_FLAGS(op, dst, dstw, src, srcw, type) \
523 sljit_emit_op_flags(compiler, (op), (dst), (dstw), (src), (srcw), (type))
524 #define GET_LOCAL_BASE(dst, dstw, offset) \
525 sljit_get_local_base(compiler, (dst), (dstw), (offset))
526
527 static pcre_uchar* bracketend(pcre_uchar* cc)
528 {
529 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
530 do cc += GET(cc, 1); while (*cc == OP_ALT);
531 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
532 cc += 1 + LINK_SIZE;
533 return cc;
534 }
535
536 /* Functions whose might need modification for all new supported opcodes:
537 next_opcode
538 check_opcode_types
539 set_private_data_ptrs
540 get_framesize
541 init_frame
542 get_private_data_copy_length
543 copy_private_data
544 compile_matchingpath
545 compile_backtrackingpath
546 */
547
548 static pcre_uchar *next_opcode(compiler_common *common, pcre_uchar *cc)
549 {
550 SLJIT_UNUSED_ARG(common);
551 switch(*cc)
552 {
553 case OP_SOD:
554 case OP_SOM:
555 case OP_SET_SOM:
556 case OP_NOT_WORD_BOUNDARY:
557 case OP_WORD_BOUNDARY:
558 case OP_NOT_DIGIT:
559 case OP_DIGIT:
560 case OP_NOT_WHITESPACE:
561 case OP_WHITESPACE:
562 case OP_NOT_WORDCHAR:
563 case OP_WORDCHAR:
564 case OP_ANY:
565 case OP_ALLANY:
566 case OP_NOTPROP:
567 case OP_PROP:
568 case OP_ANYNL:
569 case OP_NOT_HSPACE:
570 case OP_HSPACE:
571 case OP_NOT_VSPACE:
572 case OP_VSPACE:
573 case OP_EXTUNI:
574 case OP_EODN:
575 case OP_EOD:
576 case OP_CIRC:
577 case OP_CIRCM:
578 case OP_DOLL:
579 case OP_DOLLM:
580 case OP_CRSTAR:
581 case OP_CRMINSTAR:
582 case OP_CRPLUS:
583 case OP_CRMINPLUS:
584 case OP_CRQUERY:
585 case OP_CRMINQUERY:
586 case OP_CRRANGE:
587 case OP_CRMINRANGE:
588 case OP_CRPOSSTAR:
589 case OP_CRPOSPLUS:
590 case OP_CRPOSQUERY:
591 case OP_CRPOSRANGE:
592 case OP_CLASS:
593 case OP_NCLASS:
594 case OP_REF:
595 case OP_REFI:
596 case OP_DNREF:
597 case OP_DNREFI:
598 case OP_RECURSE:
599 case OP_CALLOUT:
600 case OP_ALT:
601 case OP_KET:
602 case OP_KETRMAX:
603 case OP_KETRMIN:
604 case OP_KETRPOS:
605 case OP_REVERSE:
606 case OP_ASSERT:
607 case OP_ASSERT_NOT:
608 case OP_ASSERTBACK:
609 case OP_ASSERTBACK_NOT:
610 case OP_ONCE:
611 case OP_ONCE_NC:
612 case OP_BRA:
613 case OP_BRAPOS:
614 case OP_CBRA:
615 case OP_CBRAPOS:
616 case OP_COND:
617 case OP_SBRA:
618 case OP_SBRAPOS:
619 case OP_SCBRA:
620 case OP_SCBRAPOS:
621 case OP_SCOND:
622 case OP_CREF:
623 case OP_DNCREF:
624 case OP_RREF:
625 case OP_DNRREF:
626 case OP_DEF:
627 case OP_BRAZERO:
628 case OP_BRAMINZERO:
629 case OP_BRAPOSZERO:
630 case OP_PRUNE:
631 case OP_SKIP:
632 case OP_THEN:
633 case OP_COMMIT:
634 case OP_FAIL:
635 case OP_ACCEPT:
636 case OP_ASSERT_ACCEPT:
637 case OP_CLOSE:
638 case OP_SKIPZERO:
639 return cc + PRIV(OP_lengths)[*cc];
640
641 case OP_CHAR:
642 case OP_CHARI:
643 case OP_NOT:
644 case OP_NOTI:
645 case OP_STAR:
646 case OP_MINSTAR:
647 case OP_PLUS:
648 case OP_MINPLUS:
649 case OP_QUERY:
650 case OP_MINQUERY:
651 case OP_UPTO:
652 case OP_MINUPTO:
653 case OP_EXACT:
654 case OP_POSSTAR:
655 case OP_POSPLUS:
656 case OP_POSQUERY:
657 case OP_POSUPTO:
658 case OP_STARI:
659 case OP_MINSTARI:
660 case OP_PLUSI:
661 case OP_MINPLUSI:
662 case OP_QUERYI:
663 case OP_MINQUERYI:
664 case OP_UPTOI:
665 case OP_MINUPTOI:
666 case OP_EXACTI:
667 case OP_POSSTARI:
668 case OP_POSPLUSI:
669 case OP_POSQUERYI:
670 case OP_POSUPTOI:
671 case OP_NOTSTAR:
672 case OP_NOTMINSTAR:
673 case OP_NOTPLUS:
674 case OP_NOTMINPLUS:
675 case OP_NOTQUERY:
676 case OP_NOTMINQUERY:
677 case OP_NOTUPTO:
678 case OP_NOTMINUPTO:
679 case OP_NOTEXACT:
680 case OP_NOTPOSSTAR:
681 case OP_NOTPOSPLUS:
682 case OP_NOTPOSQUERY:
683 case OP_NOTPOSUPTO:
684 case OP_NOTSTARI:
685 case OP_NOTMINSTARI:
686 case OP_NOTPLUSI:
687 case OP_NOTMINPLUSI:
688 case OP_NOTQUERYI:
689 case OP_NOTMINQUERYI:
690 case OP_NOTUPTOI:
691 case OP_NOTMINUPTOI:
692 case OP_NOTEXACTI:
693 case OP_NOTPOSSTARI:
694 case OP_NOTPOSPLUSI:
695 case OP_NOTPOSQUERYI:
696 case OP_NOTPOSUPTOI:
697 cc += PRIV(OP_lengths)[*cc];
698 #ifdef SUPPORT_UTF
699 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
700 #endif
701 return cc;
702
703 /* Special cases. */
704 case OP_TYPESTAR:
705 case OP_TYPEMINSTAR:
706 case OP_TYPEPLUS:
707 case OP_TYPEMINPLUS:
708 case OP_TYPEQUERY:
709 case OP_TYPEMINQUERY:
710 case OP_TYPEUPTO:
711 case OP_TYPEMINUPTO:
712 case OP_TYPEEXACT:
713 case OP_TYPEPOSSTAR:
714 case OP_TYPEPOSPLUS:
715 case OP_TYPEPOSQUERY:
716 case OP_TYPEPOSUPTO:
717 return cc + PRIV(OP_lengths)[*cc] - 1;
718
719 case OP_ANYBYTE:
720 #ifdef SUPPORT_UTF
721 if (common->utf) return NULL;
722 #endif
723 return cc + 1;
724
725 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
726 case OP_XCLASS:
727 return cc + GET(cc, 1);
728 #endif
729
730 case OP_MARK:
731 case OP_PRUNE_ARG:
732 case OP_SKIP_ARG:
733 case OP_THEN_ARG:
734 return cc + 1 + 2 + cc[1];
735
736 default:
737 /* All opcodes are supported now! */
738 SLJIT_ASSERT_STOP();
739 return NULL;
740 }
741 }
742
743 static BOOL check_opcode_types(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
744 {
745 int count;
746 pcre_uchar *slot;
747
748 /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
749 while (cc < ccend)
750 {
751 switch(*cc)
752 {
753 case OP_SET_SOM:
754 common->has_set_som = TRUE;
755 cc += 1;
756 break;
757
758 case OP_REF:
759 case OP_REFI:
760 common->optimized_cbracket[GET2(cc, 1)] = 0;
761 cc += 1 + IMM2_SIZE;
762 break;
763
764 case OP_CBRAPOS:
765 case OP_SCBRAPOS:
766 common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] = 0;
767 cc += 1 + LINK_SIZE + IMM2_SIZE;
768 break;
769
770 case OP_COND:
771 case OP_SCOND:
772 /* Only AUTO_CALLOUT can insert this opcode. We do
773 not intend to support this case. */
774 if (cc[1 + LINK_SIZE] == OP_CALLOUT)
775 return FALSE;
776 cc += 1 + LINK_SIZE;
777 break;
778
779 case OP_CREF:
780 common->optimized_cbracket[GET2(cc, 1)] = 0;
781 cc += 1 + IMM2_SIZE;
782 break;
783
784 case OP_DNREF:
785 case OP_DNREFI:
786 case OP_DNCREF:
787 count = GET2(cc, 1 + IMM2_SIZE);
788 slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
789 while (count-- > 0)
790 {
791 common->optimized_cbracket[GET2(slot, 0)] = 0;
792 slot += common->name_entry_size;
793 }
794 cc += 1 + 2 * IMM2_SIZE;
795 break;
796
797 case OP_RECURSE:
798 /* Set its value only once. */
799 if (common->recursive_head_ptr == 0)
800 {
801 common->recursive_head_ptr = common->ovector_start;
802 common->ovector_start += sizeof(sljit_sw);
803 }
804 cc += 1 + LINK_SIZE;
805 break;
806
807 case OP_CALLOUT:
808 if (common->capture_last_ptr == 0)
809 {
810 common->capture_last_ptr = common->ovector_start;
811 common->ovector_start += sizeof(sljit_sw);
812 }
813 cc += 2 + 2 * LINK_SIZE;
814 break;
815
816 case OP_THEN_ARG:
817 common->has_then = TRUE;
818 common->control_head_ptr = 1;
819 /* Fall through. */
820
821 case OP_PRUNE_ARG:
822 common->needs_start_ptr = TRUE;
823 /* Fall through. */
824
825 case OP_MARK:
826 if (common->mark_ptr == 0)
827 {
828 common->mark_ptr = common->ovector_start;
829 common->ovector_start += sizeof(sljit_sw);
830 }
831 cc += 1 + 2 + cc[1];
832 break;
833
834 case OP_THEN:
835 common->has_then = TRUE;
836 common->control_head_ptr = 1;
837 /* Fall through. */
838
839 case OP_PRUNE:
840 case OP_SKIP:
841 common->needs_start_ptr = TRUE;
842 cc += 1;
843 break;
844
845 case OP_SKIP_ARG:
846 common->control_head_ptr = 1;
847 common->has_skip_arg = TRUE;
848 cc += 1 + 2 + cc[1];
849 break;
850
851 default:
852 cc = next_opcode(common, cc);
853 if (cc == NULL)
854 return FALSE;
855 break;
856 }
857 }
858 return TRUE;
859 }
860
861 static int get_class_iterator_size(pcre_uchar *cc)
862 {
863 switch(*cc)
864 {
865 case OP_CRSTAR:
866 case OP_CRPLUS:
867 return 2;
868
869 case OP_CRMINSTAR:
870 case OP_CRMINPLUS:
871 case OP_CRQUERY:
872 case OP_CRMINQUERY:
873 return 1;
874
875 case OP_CRRANGE:
876 case OP_CRMINRANGE:
877 if (GET2(cc, 1) == GET2(cc, 1 + IMM2_SIZE))
878 return 0;
879 return 2;
880
881 default:
882 return 0;
883 }
884 }
885
886 static BOOL detect_repeat(compiler_common *common, pcre_uchar *begin)
887 {
888 pcre_uchar *end = bracketend(begin);
889 pcre_uchar *next;
890 pcre_uchar *next_end;
891 pcre_uchar *max_end;
892 pcre_uchar type;
893 sljit_sw length = end - begin;
894 int min, max, i;
895
896 /* Detect fixed iterations first. */
897 if (end[-(1 + LINK_SIZE)] != OP_KET)
898 return FALSE;
899
900 /* Already detected repeat. */
901 if (common->private_data_ptrs[end - common->start - LINK_SIZE] != 0)
902 return TRUE;
903
904 next = end;
905 min = 1;
906 while (1)
907 {
908 if (*next != *begin)
909 break;
910 next_end = bracketend(next);
911 if (next_end - next != length || memcmp(begin, next, IN_UCHARS(length)) != 0)
912 break;
913 next = next_end;
914 min++;
915 }
916
917 if (min == 2)
918 return FALSE;
919
920 max = 0;
921 max_end = next;
922 if (*next == OP_BRAZERO || *next == OP_BRAMINZERO)
923 {
924 type = *next;
925 while (1)
926 {
927 if (next[0] != type || next[1] != OP_BRA || next[2 + LINK_SIZE] != *begin)
928 break;
929 next_end = bracketend(next + 2 + LINK_SIZE);
930 if (next_end - next != (length + 2 + LINK_SIZE) || memcmp(begin, next + 2 + LINK_SIZE, IN_UCHARS(length)) != 0)
931 break;
932 next = next_end;
933 max++;
934 }
935
936 if (next[0] == type && next[1] == *begin && max >= 1)
937 {
938 next_end = bracketend(next + 1);
939 if (next_end - next == (length + 1) && memcmp(begin, next + 1, IN_UCHARS(length)) == 0)
940 {
941 for (i = 0; i < max; i++, next_end += 1 + LINK_SIZE)
942 if (*next_end != OP_KET)
943 break;
944
945 if (i == max)
946 {
947 common->private_data_ptrs[max_end - common->start - LINK_SIZE] = next_end - max_end;
948 common->private_data_ptrs[max_end - common->start - LINK_SIZE + 1] = (type == OP_BRAZERO) ? OP_UPTO : OP_MINUPTO;
949 /* +2 the original and the last. */
950 common->private_data_ptrs[max_end - common->start - LINK_SIZE + 2] = max + 2;
951 if (min == 1)
952 return TRUE;
953 min--;
954 max_end -= (1 + LINK_SIZE) + GET(max_end, -LINK_SIZE);
955 }
956 }
957 }
958 }
959
960 if (min >= 3)
961 {
962 common->private_data_ptrs[end - common->start - LINK_SIZE] = max_end - end;
963 common->private_data_ptrs[end - common->start - LINK_SIZE + 1] = OP_EXACT;
964 common->private_data_ptrs[end - common->start - LINK_SIZE + 2] = min;
965 return TRUE;
966 }
967
968 return FALSE;
969 }
970
971 #define CASE_ITERATOR_PRIVATE_DATA_1 \
972 case OP_MINSTAR: \
973 case OP_MINPLUS: \
974 case OP_QUERY: \
975 case OP_MINQUERY: \
976 case OP_MINSTARI: \
977 case OP_MINPLUSI: \
978 case OP_QUERYI: \
979 case OP_MINQUERYI: \
980 case OP_NOTMINSTAR: \
981 case OP_NOTMINPLUS: \
982 case OP_NOTQUERY: \
983 case OP_NOTMINQUERY: \
984 case OP_NOTMINSTARI: \
985 case OP_NOTMINPLUSI: \
986 case OP_NOTQUERYI: \
987 case OP_NOTMINQUERYI:
988
989 #define CASE_ITERATOR_PRIVATE_DATA_2A \
990 case OP_STAR: \
991 case OP_PLUS: \
992 case OP_STARI: \
993 case OP_PLUSI: \
994 case OP_NOTSTAR: \
995 case OP_NOTPLUS: \
996 case OP_NOTSTARI: \
997 case OP_NOTPLUSI:
998
999 #define CASE_ITERATOR_PRIVATE_DATA_2B \
1000 case OP_UPTO: \
1001 case OP_MINUPTO: \
1002 case OP_UPTOI: \
1003 case OP_MINUPTOI: \
1004 case OP_NOTUPTO: \
1005 case OP_NOTMINUPTO: \
1006 case OP_NOTUPTOI: \
1007 case OP_NOTMINUPTOI:
1008
1009 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_1 \
1010 case OP_TYPEMINSTAR: \
1011 case OP_TYPEMINPLUS: \
1012 case OP_TYPEQUERY: \
1013 case OP_TYPEMINQUERY:
1014
1015 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2A \
1016 case OP_TYPESTAR: \
1017 case OP_TYPEPLUS:
1018
1019 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2B \
1020 case OP_TYPEUPTO: \
1021 case OP_TYPEMINUPTO:
1022
1023 static void set_private_data_ptrs(compiler_common *common, int *private_data_start, pcre_uchar *ccend)
1024 {
1025 pcre_uchar *cc = common->start;
1026 pcre_uchar *alternative;
1027 pcre_uchar *end = NULL;
1028 int private_data_ptr = *private_data_start;
1029 int space, size, bracketlen;
1030
1031 while (cc < ccend)
1032 {
1033 space = 0;
1034 size = 0;
1035 bracketlen = 0;
1036 if (private_data_ptr > SLJIT_MAX_LOCAL_SIZE)
1037 return;
1038
1039 if (*cc == OP_ONCE || *cc == OP_ONCE_NC || *cc == OP_BRA || *cc == OP_CBRA || *cc == OP_COND)
1040 if (detect_repeat(common, cc))
1041 {
1042 /* These brackets are converted to repeats, so no global
1043 based single character repeat is allowed. */
1044 if (cc >= end)
1045 end = bracketend(cc);
1046 }
1047
1048 switch(*cc)
1049 {
1050 case OP_KET:
1051 if (common->private_data_ptrs[cc + 1 - common->start] != 0)
1052 {
1053 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1054 private_data_ptr += sizeof(sljit_sw);
1055 cc += common->private_data_ptrs[cc + 1 - common->start];
1056 }
1057 cc += 1 + LINK_SIZE;
1058 break;
1059
1060 case OP_ASSERT:
1061 case OP_ASSERT_NOT:
1062 case OP_ASSERTBACK:
1063 case OP_ASSERTBACK_NOT:
1064 case OP_ONCE:
1065 case OP_ONCE_NC:
1066 case OP_BRAPOS:
1067 case OP_SBRA:
1068 case OP_SBRAPOS:
1069 case OP_SCOND:
1070 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1071 private_data_ptr += sizeof(sljit_sw);
1072 bracketlen = 1 + LINK_SIZE;
1073 break;
1074
1075 case OP_CBRAPOS:
1076 case OP_SCBRAPOS:
1077 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1078 private_data_ptr += sizeof(sljit_sw);
1079 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1080 break;
1081
1082 case OP_COND:
1083 /* Might be a hidden SCOND. */
1084 alternative = cc + GET(cc, 1);
1085 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1086 {
1087 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1088 private_data_ptr += sizeof(sljit_sw);
1089 }
1090 bracketlen = 1 + LINK_SIZE;
1091 break;
1092
1093 case OP_BRA:
1094 bracketlen = 1 + LINK_SIZE;
1095 break;
1096
1097 case OP_CBRA:
1098 case OP_SCBRA:
1099 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1100 break;
1101
1102 CASE_ITERATOR_PRIVATE_DATA_1
1103 space = 1;
1104 size = -2;
1105 break;
1106
1107 CASE_ITERATOR_PRIVATE_DATA_2A
1108 space = 2;
1109 size = -2;
1110 break;
1111
1112 CASE_ITERATOR_PRIVATE_DATA_2B
1113 space = 2;
1114 size = -(2 + IMM2_SIZE);
1115 break;
1116
1117 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1118 space = 1;
1119 size = 1;
1120 break;
1121
1122 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1123 if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
1124 space = 2;
1125 size = 1;
1126 break;
1127
1128 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1129 if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
1130 space = 2;
1131 size = 1 + IMM2_SIZE;
1132 break;
1133
1134 case OP_CLASS:
1135 case OP_NCLASS:
1136 size += 1 + 32 / sizeof(pcre_uchar);
1137 space = get_class_iterator_size(cc + size);
1138 break;
1139
1140 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1141 case OP_XCLASS:
1142 size = GET(cc, 1);
1143 space = get_class_iterator_size(cc + size);
1144 break;
1145 #endif
1146
1147 default:
1148 cc = next_opcode(common, cc);
1149 SLJIT_ASSERT(cc != NULL);
1150 break;
1151 }
1152
1153 /* Character iterators, which are not inside a repeated bracket,
1154 gets a private slot instead of allocating it on the stack. */
1155 if (space > 0 && cc >= end)
1156 {
1157 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1158 private_data_ptr += sizeof(sljit_sw) * space;
1159 }
1160
1161 if (size != 0)
1162 {
1163 if (size < 0)
1164 {
1165 cc += -size;
1166 #ifdef SUPPORT_UTF
1167 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1168 #endif
1169 }
1170 else
1171 cc += size;
1172 }
1173
1174 if (bracketlen > 0)
1175 {
1176 if (cc >= end)
1177 {
1178 end = bracketend(cc);
1179 if (end[-1 - LINK_SIZE] == OP_KET)
1180 end = NULL;
1181 }
1182 cc += bracketlen;
1183 }
1184 }
1185 *private_data_start = private_data_ptr;
1186 }
1187
1188 /* Returns with a frame_types (always < 0) if no need for frame. */
1189 static int get_framesize(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL recursive, BOOL* needs_control_head)
1190 {
1191 int length = 0;
1192 int possessive = 0;
1193 BOOL stack_restore = FALSE;
1194 BOOL setsom_found = recursive;
1195 BOOL setmark_found = recursive;
1196 /* The last capture is a local variable even for recursions. */
1197 BOOL capture_last_found = FALSE;
1198
1199 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
1200 SLJIT_ASSERT(common->control_head_ptr != 0);
1201 *needs_control_head = TRUE;
1202 #else
1203 *needs_control_head = FALSE;
1204 #endif
1205
1206 if (ccend == NULL)
1207 {
1208 ccend = bracketend(cc) - (1 + LINK_SIZE);
1209 if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))
1210 {
1211 possessive = length = (common->capture_last_ptr != 0) ? 5 : 3;
1212 /* This is correct regardless of common->capture_last_ptr. */
1213 capture_last_found = TRUE;
1214 }
1215 cc = next_opcode(common, cc);
1216 }
1217
1218 SLJIT_ASSERT(cc != NULL);
1219 while (cc < ccend)
1220 switch(*cc)
1221 {
1222 case OP_SET_SOM:
1223 SLJIT_ASSERT(common->has_set_som);
1224 stack_restore = TRUE;
1225 if (!setsom_found)
1226 {
1227 length += 2;
1228 setsom_found = TRUE;
1229 }
1230 cc += 1;
1231 break;
1232
1233 case OP_MARK:
1234 case OP_PRUNE_ARG:
1235 case OP_THEN_ARG:
1236 SLJIT_ASSERT(common->mark_ptr != 0);
1237 stack_restore = TRUE;
1238 if (!setmark_found)
1239 {
1240 length += 2;
1241 setmark_found = TRUE;
1242 }
1243 if (common->control_head_ptr != 0)
1244 *needs_control_head = TRUE;
1245 cc += 1 + 2 + cc[1];
1246 break;
1247
1248 case OP_RECURSE:
1249 stack_restore = TRUE;
1250 if (common->has_set_som && !setsom_found)
1251 {
1252 length += 2;
1253 setsom_found = TRUE;
1254 }
1255 if (common->mark_ptr != 0 && !setmark_found)
1256 {
1257 length += 2;
1258 setmark_found = TRUE;
1259 }
1260 if (common->capture_last_ptr != 0 && !capture_last_found)
1261 {
1262 length += 2;
1263 capture_last_found = TRUE;
1264 }
1265 cc += 1 + LINK_SIZE;
1266 break;
1267
1268 case OP_CBRA:
1269 case OP_CBRAPOS:
1270 case OP_SCBRA:
1271 case OP_SCBRAPOS:
1272 stack_restore = TRUE;
1273 if (common->capture_last_ptr != 0 && !capture_last_found)
1274 {
1275 length += 2;
1276 capture_last_found = TRUE;
1277 }
1278 length += 3;
1279 cc += 1 + LINK_SIZE + IMM2_SIZE;
1280 break;
1281
1282 default:
1283 stack_restore = TRUE;
1284 /* Fall through. */
1285
1286 case OP_NOT_WORD_BOUNDARY:
1287 case OP_WORD_BOUNDARY:
1288 case OP_NOT_DIGIT:
1289 case OP_DIGIT:
1290 case OP_NOT_WHITESPACE:
1291 case OP_WHITESPACE:
1292 case OP_NOT_WORDCHAR:
1293 case OP_WORDCHAR:
1294 case OP_ANY:
1295 case OP_ALLANY:
1296 case OP_ANYBYTE:
1297 case OP_NOTPROP:
1298 case OP_PROP:
1299 case OP_ANYNL:
1300 case OP_NOT_HSPACE:
1301 case OP_HSPACE:
1302 case OP_NOT_VSPACE:
1303 case OP_VSPACE:
1304 case OP_EXTUNI:
1305 case OP_EODN:
1306 case OP_EOD:
1307 case OP_CIRC:
1308 case OP_CIRCM:
1309 case OP_DOLL:
1310 case OP_DOLLM:
1311 case OP_CHAR:
1312 case OP_CHARI:
1313 case OP_NOT:
1314 case OP_NOTI:
1315
1316 case OP_EXACT:
1317 case OP_POSSTAR:
1318 case OP_POSPLUS:
1319 case OP_POSQUERY:
1320 case OP_POSUPTO:
1321
1322 case OP_EXACTI:
1323 case OP_POSSTARI:
1324 case OP_POSPLUSI:
1325 case OP_POSQUERYI:
1326 case OP_POSUPTOI:
1327
1328 case OP_NOTEXACT:
1329 case OP_NOTPOSSTAR:
1330 case OP_NOTPOSPLUS:
1331 case OP_NOTPOSQUERY:
1332 case OP_NOTPOSUPTO:
1333
1334 case OP_NOTEXACTI:
1335 case OP_NOTPOSSTARI:
1336 case OP_NOTPOSPLUSI:
1337 case OP_NOTPOSQUERYI:
1338 case OP_NOTPOSUPTOI:
1339
1340 case OP_TYPEEXACT:
1341 case OP_TYPEPOSSTAR:
1342 case OP_TYPEPOSPLUS:
1343 case OP_TYPEPOSQUERY:
1344 case OP_TYPEPOSUPTO:
1345
1346 case OP_CLASS:
1347 case OP_NCLASS:
1348 case OP_XCLASS:
1349
1350 cc = next_opcode(common, cc);
1351 SLJIT_ASSERT(cc != NULL);
1352 break;
1353 }
1354
1355 /* Possessive quantifiers can use a special case. */
1356 if (SLJIT_UNLIKELY(possessive == length))
1357 return stack_restore ? no_frame : no_stack;
1358
1359 if (length > 0)
1360 return length + 1;
1361 return stack_restore ? no_frame : no_stack;
1362 }
1363
1364 static void init_frame(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, int stackpos, int stacktop, BOOL recursive)
1365 {
1366 DEFINE_COMPILER;
1367 BOOL setsom_found = recursive;
1368 BOOL setmark_found = recursive;
1369 /* The last capture is a local variable even for recursions. */
1370 BOOL capture_last_found = FALSE;
1371 int offset;
1372
1373 /* >= 1 + shortest item size (2) */
1374 SLJIT_UNUSED_ARG(stacktop);
1375 SLJIT_ASSERT(stackpos >= stacktop + 2);
1376
1377 stackpos = STACK(stackpos);
1378 if (ccend == NULL)
1379 {
1380 ccend = bracketend(cc) - (1 + LINK_SIZE);
1381 if (recursive || (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS))
1382 cc = next_opcode(common, cc);
1383 }
1384
1385 SLJIT_ASSERT(cc != NULL);
1386 while (cc < ccend)
1387 switch(*cc)
1388 {
1389 case OP_SET_SOM:
1390 SLJIT_ASSERT(common->has_set_som);
1391 if (!setsom_found)
1392 {
1393 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
1394 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1395 stackpos += (int)sizeof(sljit_sw);
1396 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1397 stackpos += (int)sizeof(sljit_sw);
1398 setsom_found = TRUE;
1399 }
1400 cc += 1;
1401 break;
1402
1403 case OP_MARK:
1404 case OP_PRUNE_ARG:
1405 case OP_THEN_ARG:
1406 SLJIT_ASSERT(common->mark_ptr != 0);
1407 if (!setmark_found)
1408 {
1409 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
1410 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1411 stackpos += (int)sizeof(sljit_sw);
1412 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1413 stackpos += (int)sizeof(sljit_sw);
1414 setmark_found = TRUE;
1415 }
1416 cc += 1 + 2 + cc[1];
1417 break;
1418
1419 case OP_RECURSE:
1420 if (common->has_set_som && !setsom_found)
1421 {
1422 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
1423 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1424 stackpos += (int)sizeof(sljit_sw);
1425 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1426 stackpos += (int)sizeof(sljit_sw);
1427 setsom_found = TRUE;
1428 }
1429 if (common->mark_ptr != 0 && !setmark_found)
1430 {
1431 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
1432 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1433 stackpos += (int)sizeof(sljit_sw);
1434 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1435 stackpos += (int)sizeof(sljit_sw);
1436 setmark_found = TRUE;
1437 }
1438 if (common->capture_last_ptr != 0 && !capture_last_found)
1439 {
1440 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
1441 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1442 stackpos += (int)sizeof(sljit_sw);
1443 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1444 stackpos += (int)sizeof(sljit_sw);
1445 capture_last_found = TRUE;
1446 }
1447 cc += 1 + LINK_SIZE;
1448 break;
1449
1450 case OP_CBRA:
1451 case OP_CBRAPOS:
1452 case OP_SCBRA:
1453 case OP_SCBRAPOS:
1454 if (common->capture_last_ptr != 0 && !capture_last_found)
1455 {
1456 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
1457 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1458 stackpos += (int)sizeof(sljit_sw);
1459 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1460 stackpos += (int)sizeof(sljit_sw);
1461 capture_last_found = TRUE;
1462 }
1463 offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
1464 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));
1465 stackpos += (int)sizeof(sljit_sw);
1466 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
1467 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
1468 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1469 stackpos += (int)sizeof(sljit_sw);
1470 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
1471 stackpos += (int)sizeof(sljit_sw);
1472
1473 cc += 1 + LINK_SIZE + IMM2_SIZE;
1474 break;
1475
1476 default:
1477 cc = next_opcode(common, cc);
1478 SLJIT_ASSERT(cc != NULL);
1479 break;
1480 }
1481
1482 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, 0);
1483 SLJIT_ASSERT(stackpos == STACK(stacktop));
1484 }
1485
1486 static SLJIT_INLINE int get_private_data_copy_length(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL needs_control_head)
1487 {
1488 int private_data_length = needs_control_head ? 3 : 2;
1489 int size;
1490 pcre_uchar *alternative;
1491 /* Calculate the sum of the private machine words. */
1492 while (cc < ccend)
1493 {
1494 size = 0;
1495 switch(*cc)
1496 {
1497 case OP_KET:
1498 if (PRIVATE_DATA(cc) != 0)
1499 private_data_length++;
1500 cc += 1 + LINK_SIZE;
1501 break;
1502
1503 case OP_ASSERT:
1504 case OP_ASSERT_NOT:
1505 case OP_ASSERTBACK:
1506 case OP_ASSERTBACK_NOT:
1507 case OP_ONCE:
1508 case OP_ONCE_NC:
1509 case OP_BRAPOS:
1510 case OP_SBRA:
1511 case OP_SBRAPOS:
1512 case OP_SCOND:
1513 private_data_length++;
1514 cc += 1 + LINK_SIZE;
1515 break;
1516
1517 case OP_CBRA:
1518 case OP_SCBRA:
1519 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1520 private_data_length++;
1521 cc += 1 + LINK_SIZE + IMM2_SIZE;
1522 break;
1523
1524 case OP_CBRAPOS:
1525 case OP_SCBRAPOS:
1526 private_data_length += 2;
1527 cc += 1 + LINK_SIZE + IMM2_SIZE;
1528 break;
1529
1530 case OP_COND:
1531 /* Might be a hidden SCOND. */
1532 alternative = cc + GET(cc, 1);
1533 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1534 private_data_length++;
1535 cc += 1 + LINK_SIZE;
1536 break;
1537
1538 CASE_ITERATOR_PRIVATE_DATA_1
1539 if (PRIVATE_DATA(cc))
1540 private_data_length++;
1541 cc += 2;
1542 #ifdef SUPPORT_UTF
1543 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1544 #endif
1545 break;
1546
1547 CASE_ITERATOR_PRIVATE_DATA_2A
1548 if (PRIVATE_DATA(cc))
1549 private_data_length += 2;
1550 cc += 2;
1551 #ifdef SUPPORT_UTF
1552 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1553 #endif
1554 break;
1555
1556 CASE_ITERATOR_PRIVATE_DATA_2B
1557 if (PRIVATE_DATA(cc))
1558 private_data_length += 2;
1559 cc += 2 + IMM2_SIZE;
1560 #ifdef SUPPORT_UTF
1561 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1562 #endif
1563 break;
1564
1565 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1566 if (PRIVATE_DATA(cc))
1567 private_data_length++;
1568 cc += 1;
1569 break;
1570
1571 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1572 if (PRIVATE_DATA(cc))
1573 private_data_length += 2;
1574 cc += 1;
1575 break;
1576
1577 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1578 if (PRIVATE_DATA(cc))
1579 private_data_length += 2;
1580 cc += 1 + IMM2_SIZE;
1581 break;
1582
1583 case OP_CLASS:
1584 case OP_NCLASS:
1585 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1586 case OP_XCLASS:
1587 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1588 #else
1589 size = 1 + 32 / (int)sizeof(pcre_uchar);
1590 #endif
1591 if (PRIVATE_DATA(cc))
1592 private_data_length += get_class_iterator_size(cc + size);
1593 cc += size;
1594 break;
1595
1596 default:
1597 cc = next_opcode(common, cc);
1598 SLJIT_ASSERT(cc != NULL);
1599 break;
1600 }
1601 }
1602 SLJIT_ASSERT(cc == ccend);
1603 return private_data_length;
1604 }
1605
1606 static void copy_private_data(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend,
1607 BOOL save, int stackptr, int stacktop, BOOL needs_control_head)
1608 {
1609 DEFINE_COMPILER;
1610 int srcw[2];
1611 int count, size;
1612 BOOL tmp1next = TRUE;
1613 BOOL tmp1empty = TRUE;
1614 BOOL tmp2empty = TRUE;
1615 pcre_uchar *alternative;
1616 enum {
1617 start,
1618 loop,
1619 end
1620 } status;
1621
1622 status = save ? start : loop;
1623 stackptr = STACK(stackptr - 2);
1624 stacktop = STACK(stacktop - 1);
1625
1626 if (!save)
1627 {
1628 stackptr += (needs_control_head ? 2 : 1) * sizeof(sljit_sw);
1629 if (stackptr < stacktop)
1630 {
1631 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1632 stackptr += sizeof(sljit_sw);
1633 tmp1empty = FALSE;
1634 }
1635 if (stackptr < stacktop)
1636 {
1637 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1638 stackptr += sizeof(sljit_sw);
1639 tmp2empty = FALSE;
1640 }
1641 /* The tmp1next must be TRUE in either way. */
1642 }
1643
1644 do
1645 {
1646 count = 0;
1647 switch(status)
1648 {
1649 case start:
1650 SLJIT_ASSERT(save && common->recursive_head_ptr != 0);
1651 count = 1;
1652 srcw[0] = common->recursive_head_ptr;
1653 if (needs_control_head)
1654 {
1655 SLJIT_ASSERT(common->control_head_ptr != 0);
1656 count = 2;
1657 srcw[1] = common->control_head_ptr;
1658 }
1659 status = loop;
1660 break;
1661
1662 case loop:
1663 if (cc >= ccend)
1664 {
1665 status = end;
1666 break;
1667 }
1668
1669 switch(*cc)
1670 {
1671 case OP_KET:
1672 if (PRIVATE_DATA(cc) != 0)
1673 {
1674 count = 1;
1675 srcw[0] = PRIVATE_DATA(cc);
1676 }
1677 cc += 1 + LINK_SIZE;
1678 break;
1679
1680 case OP_ASSERT:
1681 case OP_ASSERT_NOT:
1682 case OP_ASSERTBACK:
1683 case OP_ASSERTBACK_NOT:
1684 case OP_ONCE:
1685 case OP_ONCE_NC:
1686 case OP_BRAPOS:
1687 case OP_SBRA:
1688 case OP_SBRAPOS:
1689 case OP_SCOND:
1690 count = 1;
1691 srcw[0] = PRIVATE_DATA(cc);
1692 SLJIT_ASSERT(srcw[0] != 0);
1693 cc += 1 + LINK_SIZE;
1694 break;
1695
1696 case OP_CBRA:
1697 case OP_SCBRA:
1698 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1699 {
1700 count = 1;
1701 srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1702 }
1703 cc += 1 + LINK_SIZE + IMM2_SIZE;
1704 break;
1705
1706 case OP_CBRAPOS:
1707 case OP_SCBRAPOS:
1708 count = 2;
1709 srcw[0] = PRIVATE_DATA(cc);
1710 srcw[1] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1711 SLJIT_ASSERT(srcw[0] != 0 && srcw[1] != 0);
1712 cc += 1 + LINK_SIZE + IMM2_SIZE;
1713 break;
1714
1715 case OP_COND:
1716 /* Might be a hidden SCOND. */
1717 alternative = cc + GET(cc, 1);
1718 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1719 {
1720 count = 1;
1721 srcw[0] = PRIVATE_DATA(cc);
1722 SLJIT_ASSERT(srcw[0] != 0);
1723 }
1724 cc += 1 + LINK_SIZE;
1725 break;
1726
1727 CASE_ITERATOR_PRIVATE_DATA_1
1728 if (PRIVATE_DATA(cc))
1729 {
1730 count = 1;
1731 srcw[0] = PRIVATE_DATA(cc);
1732 }
1733 cc += 2;
1734 #ifdef SUPPORT_UTF
1735 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1736 #endif
1737 break;
1738
1739 CASE_ITERATOR_PRIVATE_DATA_2A
1740 if (PRIVATE_DATA(cc))
1741 {
1742 count = 2;
1743 srcw[0] = PRIVATE_DATA(cc);
1744 srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
1745 }
1746 cc += 2;
1747 #ifdef SUPPORT_UTF
1748 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1749 #endif
1750 break;
1751
1752 CASE_ITERATOR_PRIVATE_DATA_2B
1753 if (PRIVATE_DATA(cc))
1754 {
1755 count = 2;
1756 srcw[0] = PRIVATE_DATA(cc);
1757 srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
1758 }
1759 cc += 2 + IMM2_SIZE;
1760 #ifdef SUPPORT_UTF
1761 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1762 #endif
1763 break;
1764
1765 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1766 if (PRIVATE_DATA(cc))
1767 {
1768 count = 1;
1769 srcw[0] = PRIVATE_DATA(cc);
1770 }
1771 cc += 1;
1772 break;
1773
1774 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1775 if (PRIVATE_DATA(cc))
1776 {
1777 count = 2;
1778 srcw[0] = PRIVATE_DATA(cc);
1779 srcw[1] = srcw[0] + sizeof(sljit_sw);
1780 }
1781 cc += 1;
1782 break;
1783
1784 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1785 if (PRIVATE_DATA(cc))
1786 {
1787 count = 2;
1788 srcw[0] = PRIVATE_DATA(cc);
1789 srcw[1] = srcw[0] + sizeof(sljit_sw);
1790 }
1791 cc += 1 + IMM2_SIZE;
1792 break;
1793
1794 case OP_CLASS:
1795 case OP_NCLASS:
1796 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1797 case OP_XCLASS:
1798 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1799 #else
1800 size = 1 + 32 / (int)sizeof(pcre_uchar);
1801 #endif
1802 if (PRIVATE_DATA(cc))
1803 switch(get_class_iterator_size(cc + size))
1804 {
1805 case 1:
1806 count = 1;
1807 srcw[0] = PRIVATE_DATA(cc);
1808 break;
1809
1810 case 2:
1811 count = 2;
1812 srcw[0] = PRIVATE_DATA(cc);
1813 srcw[1] = srcw[0] + sizeof(sljit_sw);
1814 break;
1815
1816 default:
1817 SLJIT_ASSERT_STOP();
1818 break;
1819 }
1820 cc += size;
1821 break;
1822
1823 default:
1824 cc = next_opcode(common, cc);
1825 SLJIT_ASSERT(cc != NULL);
1826 break;
1827 }
1828 break;
1829
1830 case end:
1831 SLJIT_ASSERT_STOP();
1832 break;
1833 }
1834
1835 while (count > 0)
1836 {
1837 count--;
1838 if (save)
1839 {
1840 if (tmp1next)
1841 {
1842 if (!tmp1empty)
1843 {
1844 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1845 stackptr += sizeof(sljit_sw);
1846 }
1847 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);
1848 tmp1empty = FALSE;
1849 tmp1next = FALSE;
1850 }
1851 else
1852 {
1853 if (!tmp2empty)
1854 {
1855 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1856 stackptr += sizeof(sljit_sw);
1857 }
1858 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);
1859 tmp2empty = FALSE;
1860 tmp1next = TRUE;
1861 }
1862 }
1863 else
1864 {
1865 if (tmp1next)
1866 {
1867 SLJIT_ASSERT(!tmp1empty);
1868 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP1, 0);
1869 tmp1empty = stackptr >= stacktop;
1870 if (!tmp1empty)
1871 {
1872 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1873 stackptr += sizeof(sljit_sw);
1874 }
1875 tmp1next = FALSE;
1876 }
1877 else
1878 {
1879 SLJIT_ASSERT(!tmp2empty);
1880 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP2, 0);
1881 tmp2empty = stackptr >= stacktop;
1882 if (!tmp2empty)
1883 {
1884 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1885 stackptr += sizeof(sljit_sw);
1886 }
1887 tmp1next = TRUE;
1888 }
1889 }
1890 }
1891 }
1892 while (status != end);
1893
1894 if (save)
1895 {
1896 if (tmp1next)
1897 {
1898 if (!tmp1empty)
1899 {
1900 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1901 stackptr += sizeof(sljit_sw);
1902 }
1903 if (!tmp2empty)
1904 {
1905 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1906 stackptr += sizeof(sljit_sw);
1907 }
1908 }
1909 else
1910 {
1911 if (!tmp2empty)
1912 {
1913 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1914 stackptr += sizeof(sljit_sw);
1915 }
1916 if (!tmp1empty)
1917 {
1918 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1919 stackptr += sizeof(sljit_sw);
1920 }
1921 }
1922 }
1923 SLJIT_ASSERT(cc == ccend && stackptr == stacktop && (save || (tmp1empty && tmp2empty)));
1924 }
1925
1926 static SLJIT_INLINE pcre_uchar *set_then_offsets(compiler_common *common, pcre_uchar *cc, pcre_uint8 *current_offset)
1927 {
1928 pcre_uchar *end = bracketend(cc);
1929 BOOL has_alternatives = cc[GET(cc, 1)] == OP_ALT;
1930
1931 /* Assert captures then. */
1932 if (*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT)
1933 current_offset = NULL;
1934 /* Conditional block does not. */
1935 if (*cc == OP_COND || *cc == OP_SCOND)
1936 has_alternatives = FALSE;
1937
1938 cc = next_opcode(common, cc);
1939 if (has_alternatives)
1940 current_offset = common->then_offsets + (cc - common->start);
1941
1942 while (cc < end)
1943 {
1944 if ((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND))
1945 cc = set_then_offsets(common, cc, current_offset);
1946 else
1947 {
1948 if (*cc == OP_ALT && has_alternatives)
1949 current_offset = common->then_offsets + (cc + 1 + LINK_SIZE - common->start);
1950 if (*cc >= OP_THEN && *cc <= OP_THEN_ARG && current_offset != NULL)
1951 *current_offset = 1;
1952 cc = next_opcode(common, cc);
1953 }
1954 }
1955
1956 return end;
1957 }
1958
1959 #undef CASE_ITERATOR_PRIVATE_DATA_1
1960 #undef CASE_ITERATOR_PRIVATE_DATA_2A
1961 #undef CASE_ITERATOR_PRIVATE_DATA_2B
1962 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1963 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1964 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1965
1966 static SLJIT_INLINE BOOL is_powerof2(unsigned int value)
1967 {
1968 return (value & (value - 1)) == 0;
1969 }
1970
1971 static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label)
1972 {
1973 while (list)
1974 {
1975 /* sljit_set_label is clever enough to do nothing
1976 if either the jump or the label is NULL. */
1977 SET_LABEL(list->jump, label);
1978 list = list->next;
1979 }
1980 }
1981
1982 static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump* jump)
1983 {
1984 jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list));
1985 if (list_item)
1986 {
1987 list_item->next = *list;
1988 list_item->jump = jump;
1989 *list = list_item;
1990 }
1991 }
1992
1993 static void add_stub(compiler_common *common, struct sljit_jump *start)
1994 {
1995 DEFINE_COMPILER;
1996 stub_list* list_item = sljit_alloc_memory(compiler, sizeof(stub_list));
1997
1998 if (list_item)
1999 {
2000 list_item->start = start;
2001 list_item->quit = LABEL();
2002 list_item->next = common->stubs;
2003 common->stubs = list_item;
2004 }
2005 }
2006
2007 static void flush_stubs(compiler_common *common)
2008 {
2009 DEFINE_COMPILER;
2010 stub_list* list_item = common->stubs;
2011
2012 while (list_item)
2013 {
2014 JUMPHERE(list_item->start);
2015 add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));
2016 JUMPTO(SLJIT_JUMP, list_item->quit);
2017 list_item = list_item->next;
2018 }
2019 common->stubs = NULL;
2020 }
2021
2022 static SLJIT_INLINE void count_match(compiler_common *common)
2023 {
2024 DEFINE_COMPILER;
2025
2026 OP2(SLJIT_SUB | SLJIT_SET_E, COUNT_MATCH, 0, COUNT_MATCH, 0, SLJIT_IMM, 1);
2027 add_jump(compiler, &common->calllimit, JUMP(SLJIT_C_ZERO));
2028 }
2029
2030 static SLJIT_INLINE void allocate_stack(compiler_common *common, int size)
2031 {
2032 /* May destroy all locals and registers except TMP2. */
2033 DEFINE_COMPILER;
2034
2035 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
2036 #ifdef DESTROY_REGISTERS
2037 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);
2038 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
2039 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
2040 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, TMP1, 0);
2041 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);
2042 #endif
2043 add_stub(common, CMP(SLJIT_C_GREATER, STACK_TOP, 0, STACK_LIMIT, 0));
2044 }
2045
2046 static SLJIT_INLINE void free_stack(compiler_common *common, int size)
2047 {
2048 DEFINE_COMPILER;
2049 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
2050 }
2051
2052 static SLJIT_INLINE void reset_ovector(compiler_common *common, int length)
2053 {
2054 DEFINE_COMPILER;
2055 struct sljit_label *loop;
2056 int i;
2057
2058 /* At this point we can freely use all temporary registers. */
2059 SLJIT_ASSERT(length > 1);
2060 /* TMP1 returns with begin - 1. */
2061 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG1, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));
2062 if (length < 8)
2063 {
2064 for (i = 1; i < length; i++)
2065 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(i), SLJIT_SCRATCH_REG1, 0);
2066 }
2067 else
2068 {
2069 GET_LOCAL_BASE(SLJIT_SCRATCH_REG2, 0, OVECTOR_START);
2070 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, length - 1);
2071 loop = LABEL();
2072 OP1(SLJIT_MOVU, SLJIT_MEM1(SLJIT_SCRATCH_REG2), sizeof(sljit_sw), SLJIT_SCRATCH_REG1, 0);
2073 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 1);
2074 JUMPTO(SLJIT_C_NOT_ZERO, loop);
2075 }
2076 }
2077
2078 static SLJIT_INLINE void do_reset_match(compiler_common *common, int length)
2079 {
2080 DEFINE_COMPILER;
2081 struct sljit_label *loop;
2082 int i;
2083
2084 SLJIT_ASSERT(length > 1);
2085 /* OVECTOR(1) contains the "string begin - 1" constant. */
2086 if (length > 2)
2087 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
2088 if (length < 8)
2089 {
2090 for (i = 2; i < length; i++)
2091 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(i), TMP1, 0);
2092 }
2093 else
2094 {
2095 GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + sizeof(sljit_sw));
2096 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
2097 loop = LABEL();
2098 OP1(SLJIT_MOVU, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
2099 OP2(SLJIT_SUB | SLJIT_SET_E, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
2100 JUMPTO(SLJIT_C_NOT_ZERO, loop);
2101 }
2102
2103 OP1(SLJIT_MOV, STACK_TOP, 0, ARGUMENTS, 0);
2104 if (common->mark_ptr != 0)
2105 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr, SLJIT_IMM, 0);
2106 if (common->control_head_ptr != 0)
2107 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_IMM, 0);
2108 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(jit_arguments, stack));
2109 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_ptr);
2110 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(struct sljit_stack, base));
2111 }
2112
2113 static sljit_sw SLJIT_CALL do_search_mark(sljit_sw *current, const pcre_uchar *skip_arg)
2114 {
2115 while (current != NULL)
2116 {
2117 switch (current[-2])
2118 {
2119 case type_then_trap:
2120 break;
2121
2122 case type_mark:
2123 if (STRCMP_UC_UC(skip_arg, (pcre_uchar *)current[-3]) == 0)
2124 return current[-4];
2125 break;
2126
2127 default:
2128 SLJIT_ASSERT_STOP();
2129 break;
2130 }
2131 current = (sljit_sw*)current[-1];
2132 }
2133 return -1;
2134 }
2135
2136 static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)
2137 {
2138 DEFINE_COMPILER;
2139 struct sljit_label *loop;
2140 struct sljit_jump *early_quit;
2141
2142 /* At this point we can freely use all registers. */
2143 OP1(SLJIT_MOV, SLJIT_SAVED_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
2144 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1), STR_PTR, 0);
2145
2146 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, ARGUMENTS, 0);
2147 if (common->mark_ptr != 0)
2148 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
2149 OP1(SLJIT_MOV_SI, SLJIT_SCRATCH_REG2, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, offset_count));
2150 if (common->mark_ptr != 0)
2151 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_SCRATCH_REG3, 0);
2152 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, offsets), SLJIT_IMM, sizeof(int));
2153 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, begin));
2154 GET_LOCAL_BASE(SLJIT_SAVED_REG1, 0, OVECTOR_START);
2155 /* Unlikely, but possible */
2156 early_quit = CMP(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, 0);
2157 loop = LABEL();
2158 OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), 0, SLJIT_SCRATCH_REG1, 0);
2159 OP2(SLJIT_ADD, SLJIT_SAVED_REG1, 0, SLJIT_SAVED_REG1, 0, SLJIT_IMM, sizeof(sljit_sw));
2160 /* Copy the integer value to the output buffer */
2161 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2162 OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, UCHAR_SHIFT);
2163 #endif
2164 OP1(SLJIT_MOVU_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG3), sizeof(int), SLJIT_SAVED_REG2, 0);
2165 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_SCRATCH_REG2, 0, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, 1);
2166 JUMPTO(SLJIT_C_NOT_ZERO, loop);
2167 JUMPHERE(early_quit);
2168
2169 /* Calculate the return value, which is the maximum ovector value. */
2170 if (topbracket > 1)
2171 {
2172 GET_LOCAL_BASE(SLJIT_SCRATCH_REG1, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_sw));
2173 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, topbracket + 1);
2174
2175 /* OVECTOR(0) is never equal to SLJIT_SAVED_REG3. */
2176 loop = LABEL();
2177 OP1(SLJIT_MOVU, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), -(2 * (sljit_sw)sizeof(sljit_sw)));
2178 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG2, 0, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, 1);
2179 CMPTO(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG3, 0, SLJIT_SAVED_REG3, 0, loop);
2180 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_SCRATCH_REG2, 0);
2181 }
2182 else
2183 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
2184 }
2185
2186 static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *quit)
2187 {
2188 DEFINE_COMPILER;
2189 struct sljit_jump *jump;
2190
2191 SLJIT_COMPILE_ASSERT(STR_END == SLJIT_SAVED_REG2, str_end_must_be_saved_reg2);
2192 SLJIT_ASSERT(common->start_used_ptr != 0 && common->start_ptr != 0
2193 && (common->mode == JIT_PARTIAL_SOFT_COMPILE ? common->hit_start != 0 : common->hit_start == 0));
2194
2195 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, ARGUMENTS, 0);
2196 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_PARTIAL);
2197 OP1(SLJIT_MOV_SI, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, real_offset_count));
2198 CMPTO(SLJIT_C_SIG_LESS, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 2, quit);
2199
2200 /* Store match begin and end. */
2201 OP1(SLJIT_MOV, SLJIT_SAVED_REG1, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, begin));
2202 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, offsets));
2203
2204 jump = CMP(SLJIT_C_SIG_LESS, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 3);
2205 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_ptr : (common->hit_start + (int)sizeof(sljit_sw)), SLJIT_SAVED_REG1, 0);
2206 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2207 OP2(SLJIT_ASHR, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, UCHAR_SHIFT);
2208 #endif
2209 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG2), 2 * sizeof(int), SLJIT_SCRATCH_REG3, 0);
2210 JUMPHERE(jump);
2211
2212 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_used_ptr : common->hit_start);
2213 OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, STR_END, 0, SLJIT_SAVED_REG1, 0);
2214 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2215 OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, UCHAR_SHIFT);
2216 #endif
2217 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG2), sizeof(int), SLJIT_SAVED_REG2, 0);
2218
2219 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_SAVED_REG1, 0);
2220 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2221 OP2(SLJIT_ASHR, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, UCHAR_SHIFT);
2222 #endif
2223 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG2), 0, SLJIT_SCRATCH_REG3, 0);
2224
2225 JUMPTO(SLJIT_JUMP, quit);
2226 }
2227
2228 static SLJIT_INLINE void check_start_used_ptr(compiler_common *common)
2229 {
2230 /* May destroy TMP1. */
2231 DEFINE_COMPILER;
2232 struct sljit_jump *jump;
2233
2234 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2235 {
2236 /* The value of -1 must be kept for start_used_ptr! */
2237 OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, 1);
2238 /* Jumps if start_used_ptr < STR_PTR, or start_used_ptr == -1. Although overwriting
2239 is not necessary if start_used_ptr == STR_PTR, it does not hurt as well. */
2240 jump = CMP(SLJIT_C_LESS_EQUAL, TMP1, 0, STR_PTR, 0);
2241 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2242 JUMPHERE(jump);
2243 }
2244 else if (common->mode == JIT_PARTIAL_HARD_COMPILE)
2245 {
2246 jump = CMP(SLJIT_C_LESS_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2247 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2248 JUMPHERE(jump);
2249 }
2250 }
2251
2252 static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, pcre_uchar* cc)
2253 {
2254 /* Detects if the character has an othercase. */
2255 unsigned int c;
2256
2257 #ifdef SUPPORT_UTF
2258 if (common->utf)
2259 {
2260 GETCHAR(c, cc);
2261 if (c > 127)
2262 {
2263 #ifdef SUPPORT_UCP
2264 return c != UCD_OTHERCASE(c);
2265 #else
2266 return FALSE;
2267 #endif
2268 }
2269 #ifndef COMPILE_PCRE8
2270 return common->fcc[c] != c;
2271 #endif
2272 }
2273 else
2274 #endif
2275 c = *cc;
2276 return MAX_255(c) ? common->fcc[c] != c : FALSE;
2277 }
2278
2279 static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)
2280 {
2281 /* Returns with the othercase. */
2282 #ifdef SUPPORT_UTF
2283 if (common->utf && c > 127)
2284 {
2285 #ifdef SUPPORT_UCP
2286 return UCD_OTHERCASE(c);
2287 #else
2288 return c;
2289 #endif
2290 }
2291 #endif
2292 return TABLE_GET(c, common->fcc, c);
2293 }
2294
2295 static unsigned int char_get_othercase_bit(compiler_common *common, pcre_uchar* cc)
2296 {
2297 /* Detects if the character and its othercase has only 1 bit difference. */
2298 unsigned int c, oc, bit;
2299 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2300 int n;
2301 #endif
2302
2303 #ifdef SUPPORT_UTF
2304 if (common->utf)
2305 {
2306 GETCHAR(c, cc);
2307 if (c <= 127)
2308 oc = common->fcc[c];
2309 else
2310 {
2311 #ifdef SUPPORT_UCP
2312 oc = UCD_OTHERCASE(c);
2313 #else
2314 oc = c;
2315 #endif
2316 }
2317 }
2318 else
2319 {
2320 c = *cc;
2321 oc = TABLE_GET(c, common->fcc, c);
2322 }
2323 #else
2324 c = *cc;
2325 oc = TABLE_GET(c, common->fcc, c);
2326 #endif
2327
2328 SLJIT_ASSERT(c != oc);
2329
2330 bit = c ^ oc;
2331 /* Optimized for English alphabet. */
2332 if (c <= 127 && bit == 0x20)
2333 return (0 << 8) | 0x20;
2334
2335 /* Since c != oc, they must have at least 1 bit difference. */
2336 if (!is_powerof2(bit))
2337 return 0;
2338
2339 #if defined COMPILE_PCRE8
2340
2341 #ifdef SUPPORT_UTF
2342 if (common->utf && c > 127)
2343 {
2344 n = GET_EXTRALEN(*cc);
2345 while ((bit & 0x3f) == 0)
2346 {
2347 n--;
2348 bit >>= 6;
2349 }
2350 return (n << 8) | bit;
2351 }
2352 #endif /* SUPPORT_UTF */
2353 return (0 << 8) | bit;
2354
2355 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2356
2357 #ifdef SUPPORT_UTF
2358 if (common->utf && c > 65535)
2359 {
2360 if (bit >= (1 << 10))
2361 bit >>= 10;
2362 else
2363 return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8));
2364 }
2365 #endif /* SUPPORT_UTF */
2366 return (bit < 256) ? ((0 << 8) | bit) : ((1 << 8) | (bit >> 8));
2367
2368 #endif /* COMPILE_PCRE[8|16|32] */
2369 }
2370
2371 static void check_partial(compiler_common *common, BOOL force)
2372 {
2373 /* Checks whether a partial matching is occurred. Does not modify registers. */
2374 DEFINE_COMPILER;
2375 struct sljit_jump *jump = NULL;
2376
2377 SLJIT_ASSERT(!force || common->mode != JIT_COMPILE);
2378
2379 if (common->mode == JIT_COMPILE)
2380 return;
2381
2382 if (!force)
2383 jump = CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2384 else if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2385 jump = CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, -1);
2386
2387 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2388 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, 0);
2389 else
2390 {
2391 if (common->partialmatchlabel != NULL)
2392 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2393 else
2394 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2395 }
2396
2397 if (jump != NULL)
2398 JUMPHERE(jump);
2399 }
2400
2401 static void check_str_end(compiler_common *common, jump_list **end_reached)
2402 {
2403 /* Does not affect registers. Usually used in a tight spot. */
2404 DEFINE_COMPILER;
2405 struct sljit_jump *jump;
2406
2407 if (common->mode == JIT_COMPILE)
2408 {
2409 add_jump(compiler, end_reached, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2410 return;
2411 }
2412
2413 jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
2414 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2415 {
2416 add_jump(compiler, end_reached, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0));
2417 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, 0);
2418 add_jump(compiler, end_reached, JUMP(SLJIT_JUMP));
2419 }
2420 else
2421 {
2422 add_jump(compiler, end_reached, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0));
2423 if (common->partialmatchlabel != NULL)
2424 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2425 else
2426 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2427 }
2428 JUMPHERE(jump);
2429 }
2430
2431 static void detect_partial_match(compiler_common *common, jump_list **backtracks)
2432 {
2433 DEFINE_COMPILER;
2434 struct sljit_jump *jump;
2435
2436 if (common->mode == JIT_COMPILE)
2437 {
2438 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2439 return;
2440 }
2441
2442 /* Partial matching mode. */
2443 jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
2444 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0));
2445 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2446 {
2447 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, 0);
2448 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
2449 }
2450 else
2451 {
2452 if (common->partialmatchlabel != NULL)
2453 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2454 else
2455 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2456 }
2457 JUMPHERE(jump);
2458 }
2459
2460 static void read_char(compiler_common *common)
2461 {
2462 /* Reads the character into TMP1, updates STR_PTR.
2463 Does not check STR_END. TMP2 Destroyed. */
2464 DEFINE_COMPILER;
2465 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2466 struct sljit_jump *jump;
2467 #endif
2468
2469 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2470 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2471 if (common->utf)
2472 {
2473 #if defined COMPILE_PCRE8
2474 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2475 #elif defined COMPILE_PCRE16
2476 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
2477 #endif /* COMPILE_PCRE[8|16] */
2478 add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2479 JUMPHERE(jump);
2480 }
2481 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2482 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2483 }
2484
2485 static void peek_char(compiler_common *common)
2486 {
2487 /* Reads the character into TMP1, keeps STR_PTR.
2488 Does not check STR_END. TMP2 Destroyed. */
2489 DEFINE_COMPILER;
2490 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2491 struct sljit_jump *jump;
2492 #endif
2493
2494 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2495 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2496 if (common->utf)
2497 {
2498 #if defined COMPILE_PCRE8
2499 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2500 #elif defined COMPILE_PCRE16
2501 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
2502 #endif /* COMPILE_PCRE[8|16] */
2503 add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2504 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2505 JUMPHERE(jump);
2506 }
2507 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2508 }
2509
2510 static void read_char8_type(compiler_common *common)
2511 {
2512 /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */
2513 DEFINE_COMPILER;
2514 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2515 struct sljit_jump *jump;
2516 #endif
2517
2518 #ifdef SUPPORT_UTF
2519 if (common->utf)
2520 {
2521 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2522 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2523 #if defined COMPILE_PCRE8
2524 /* This can be an extra read in some situations, but hopefully
2525 it is needed in most cases. */
2526 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2527 jump = CMP(SLJIT_C_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
2528 add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));
2529 JUMPHERE(jump);
2530 #elif defined COMPILE_PCRE16
2531 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2532 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2533 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2534 JUMPHERE(jump);
2535 /* Skip low surrogate if necessary. */
2536 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xfc00);
2537 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0xd800);
2538 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2539 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
2540 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2541 #elif defined COMPILE_PCRE32
2542 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2543 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2544 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2545 JUMPHERE(jump);
2546 #endif /* COMPILE_PCRE[8|16|32] */
2547 return;
2548 }
2549 #endif /* SUPPORT_UTF */
2550 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2551 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2552 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2553 /* The ctypes array contains only 256 values. */
2554 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2555 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2556 #endif
2557 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2558 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2559 JUMPHERE(jump);
2560 #endif
2561 }
2562
2563 static void skip_char_back(compiler_common *common)
2564 {
2565 /* Goes one character back. Affects STR_PTR and TMP1. Does not check begin. */
2566 DEFINE_COMPILER;
2567 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2568 #if defined COMPILE_PCRE8
2569 struct sljit_label *label;
2570
2571 if (common->utf)
2572 {
2573 label = LABEL();
2574 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2575 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2576 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
2577 CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);
2578 return;
2579 }
2580 #elif defined COMPILE_PCRE16
2581 if (common->utf)
2582 {
2583 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2584 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2585 /* Skip low surrogate if necessary. */
2586 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2587 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
2588 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2589 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2590 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2591 return;
2592 }
2593 #endif /* COMPILE_PCRE[8|16] */
2594 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2595 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2596 }
2597
2598 static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpiftrue)
2599 {
2600 /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */
2601 DEFINE_COMPILER;
2602
2603 if (nltype == NLTYPE_ANY)
2604 {
2605 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
2606 add_jump(compiler, backtracks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
2607 }
2608 else if (nltype == NLTYPE_ANYCRLF)
2609 {
2610 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_CR);
2611 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2612 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
2613 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
2614 add_jump(compiler, backtracks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
2615 }
2616 else
2617 {
2618 SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);
2619 add_jump(compiler, backtracks, CMP(jumpiftrue ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
2620 }
2621 }
2622
2623 #ifdef SUPPORT_UTF
2624
2625 #if defined COMPILE_PCRE8
2626 static void do_utfreadchar(compiler_common *common)
2627 {
2628 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
2629 of the character (>= 0xc0). Return char value in TMP1, length - 1 in TMP2. */
2630 DEFINE_COMPILER;
2631 struct sljit_jump *jump;
2632
2633 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2634 /* Searching for the first zero. */
2635 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
2636 jump = JUMP(SLJIT_C_NOT_ZERO);
2637 /* Two byte sequence. */
2638 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2639 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2640 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1f);
2641 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2642 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2643 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2644 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
2645 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2646 JUMPHERE(jump);
2647
2648 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10);
2649 jump = JUMP(SLJIT_C_NOT_ZERO);
2650 /* Three byte sequence. */
2651 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2652 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0f);
2653 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 12);
2654 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2655 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2656 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2657 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2658 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
2659 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2660 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2661 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(2));
2662 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2663 JUMPHERE(jump);
2664
2665 /* Four byte sequence. */
2666 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2667 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x07);
2668 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 18);
2669 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2670 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12);
2671 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2672 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2673 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2674 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2675 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2676 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(3));
2677 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
2678 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2679 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2680 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));
2681 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2682 }
2683
2684 static void do_utfreadtype8(compiler_common *common)
2685 {
2686 /* Fast decoding a UTF-8 character type. TMP2 contains the first byte
2687 of the character (>= 0xc0). Return value in TMP1. */
2688 DEFINE_COMPILER;
2689 struct sljit_jump *jump;
2690 struct sljit_jump *compare;
2691
2692 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2693
2694 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20);
2695 jump = JUMP(SLJIT_C_NOT_ZERO);
2696 /* Two byte sequence. */
2697 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2698 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2699 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
2700 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2701 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2702 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
2703 compare = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2704 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2705 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2706
2707 JUMPHERE(compare);
2708 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2709 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2710 JUMPHERE(jump);
2711
2712 /* We only have types for characters less than 256. */
2713 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2714 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2715 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2716 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2717 }
2718
2719 #elif defined COMPILE_PCRE16
2720
2721 static void do_utfreadchar(compiler_common *common)
2722 {
2723 /* Fast decoding a UTF-16 character. TMP1 contains the first 16 bit char
2724 of the character (>= 0xd800). Return char value in TMP1, length - 1 in TMP2. */
2725 DEFINE_COMPILER;
2726 struct sljit_jump *jump;
2727
2728 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2729 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xdc00);
2730 /* Do nothing, only return. */
2731 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2732
2733 JUMPHERE(jump);
2734 /* Combine two 16 bit characters. */
2735 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2736 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2737 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
2738 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 10);
2739 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3ff);
2740 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2741 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
2742 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
2743 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2744 }
2745
2746 #endif /* COMPILE_PCRE[8|16] */
2747
2748 #endif /* SUPPORT_UTF */
2749
2750 #ifdef SUPPORT_UCP
2751
2752 /* UCD_BLOCK_SIZE must be 128 (see the assert below). */
2753 #define UCD_BLOCK_MASK 127
2754 #define UCD_BLOCK_SHIFT 7
2755
2756 static void do_getucd(compiler_common *common)
2757 {
2758 /* Search the UCD record for the character comes in TMP1.
2759 Returns chartype in TMP1 and UCD offset in TMP2. */
2760 DEFINE_COMPILER;
2761
2762 SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 8);
2763
2764 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2765 OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
2766 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
2767 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
2768 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
2769 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
2770 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
2771 OP1(SLJIT_MOV_UH, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
2772 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
2773 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
2774 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2775 }
2776 #endif
2777
2778 static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common, BOOL hascrorlf, BOOL firstline)
2779 {
2780 DEFINE_COMPILER;
2781 struct sljit_label *mainloop;
2782 struct sljit_label *newlinelabel = NULL;
2783 struct sljit_jump *start;
2784 struct sljit_jump *end = NULL;
2785 struct sljit_jump *nl = NULL;
2786 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2787 struct sljit_jump *singlechar;
2788 #endif
2789 jump_list *newline = NULL;
2790 BOOL newlinecheck = FALSE;
2791 BOOL readuchar = FALSE;
2792
2793 if (!(hascrorlf || firstline) && (common->nltype == NLTYPE_ANY ||
2794 common->nltype == NLTYPE_ANYCRLF || common->newline > 255))
2795 newlinecheck = TRUE;
2796
2797 if (firstline)
2798 {
2799 /* Search for the end of the first line. */
2800 SLJIT_ASSERT(common->first_line_end != 0);
2801 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
2802
2803 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
2804 {
2805 mainloop = LABEL();
2806 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2807 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2808 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
2809 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2810 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);
2811 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);
2812 JUMPHERE(end);
2813 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2814 }
2815 else
2816 {
2817 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2818 mainloop = LABEL();
2819 /* Continual stores does not cause data dependency. */
2820 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0);
2821 read_char(common);
2822 check_newlinechar(common, common->nltype, &newline, TRUE);
2823 CMPTO(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0, mainloop);
2824 JUMPHERE(end);
2825 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0);
2826 set_jumps(newline, LABEL());
2827 }
2828
2829 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
2830 }
2831
2832 start = JUMP(SLJIT_JUMP);
2833
2834 if (newlinecheck)
2835 {
2836 newlinelabel = LABEL();
2837 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2838 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2839 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2840 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
2841 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2842 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2843 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
2844 #endif
2845 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2846 nl = JUMP(SLJIT_JUMP);
2847 }
2848
2849 mainloop = LABEL();
2850
2851 /* Increasing the STR_PTR here requires one less jump in the most common case. */
2852 #ifdef SUPPORT_UTF
2853 if (common->utf) readuchar = TRUE;
2854 #endif
2855 if (newlinecheck) readuchar = TRUE;
2856
2857 if (readuchar)
2858 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2859
2860 if (newlinecheck)
2861 CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);
2862
2863 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2864 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2865 #if defined COMPILE_PCRE8
2866 if (common->utf)
2867 {
2868 singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2869 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2870 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2871 JUMPHERE(singlechar);
2872 }
2873 #elif defined COMPILE_PCRE16
2874 if (common->utf)
2875 {
2876 singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
2877 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2878 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2879 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2880 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2881 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2882 JUMPHERE(singlechar);
2883 }
2884 #endif /* COMPILE_PCRE[8|16] */
2885 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2886 JUMPHERE(start);
2887
2888 if (newlinecheck)
2889 {
2890 JUMPHERE(end);
2891 JUMPHERE(nl);
2892 }
2893
2894 return mainloop;
2895 }
2896
2897 #define MAX_N_CHARS 3
2898
2899 static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common, BOOL firstline)
2900 {
2901 DEFINE_COMPILER;
2902 struct sljit_label *start;
2903 struct sljit_jump *quit;
2904 pcre_uint32 chars[MAX_N_CHARS * 2];
2905 pcre_uchar *cc = common->start + 1 + LINK_SIZE;
2906 int location = 0;
2907 pcre_int32 len, c, bit, caseless;
2908 int must_stop;
2909
2910 /* We do not support alternatives now. */
2911 if (*(common->start + GET(common->start, 1)) == OP_ALT)
2912 return FALSE;
2913
2914 while (TRUE)
2915 {
2916 caseless = 0;
2917 must_stop = 1;
2918 switch(*cc)
2919 {
2920 case OP_CHAR:
2921 must_stop = 0;
2922 cc++;
2923 break;
2924
2925 case OP_CHARI:
2926 caseless = 1;
2927 must_stop = 0;
2928 cc++;
2929 break;
2930
2931 case OP_SOD:
2932 case OP_SOM:
2933 case OP_SET_SOM:
2934 case OP_NOT_WORD_BOUNDARY:
2935 case OP_WORD_BOUNDARY:
2936 case OP_EODN:
2937 case OP_EOD:
2938 case OP_CIRC:
2939 case OP_CIRCM:
2940 case OP_DOLL:
2941 case OP_DOLLM:
2942 /* Zero width assertions. */
2943 cc++;
2944 continue;
2945
2946 case OP_PLUS:
2947 case OP_MINPLUS:
2948 case OP_POSPLUS:
2949 cc++;
2950 break;
2951
2952 case OP_EXACT:
2953 cc += 1 + IMM2_SIZE;
2954 break;
2955
2956 case OP_PLUSI:
2957 case OP_MINPLUSI:
2958 case OP_POSPLUSI:
2959 caseless = 1;
2960 cc++;
2961 break;
2962
2963 case OP_EXACTI:
2964 caseless = 1;
2965 cc += 1 + IMM2_SIZE;
2966 break;
2967
2968 default:
2969 must_stop = 2;
2970 break;
2971 }
2972
2973 if (must_stop == 2)
2974 break;
2975
2976 len = 1;
2977 #ifdef SUPPORT_UTF
2978 if (common->utf && HAS_EXTRALEN(cc[0])) len += GET_EXTRALEN(cc[0]);
2979 #endif
2980
2981 if (caseless && char_has_othercase(common, cc))
2982 {
2983 caseless = char_get_othercase_bit(common, cc);
2984 if (caseless == 0)
2985 return FALSE;
2986 #ifdef COMPILE_PCRE8
2987 caseless = ((caseless & 0xff) << 8) | (len - (caseless >> 8));
2988 #else
2989 if ((caseless & 0x100) != 0)
2990 caseless = ((caseless & 0xff) << 16) | (len - (caseless >> 9));
2991 else
2992 caseless = ((caseless & 0xff) << 8) | (len - (caseless >> 9));
2993 #endif
2994 }
2995 else
2996 caseless = 0;
2997
2998 while (len > 0 && location < MAX_N_CHARS * 2)
2999 {
3000 c = *cc;
3001 bit = 0;
3002 if (len == (caseless & 0xff))
3003 {
3004 bit = caseless >> 8;
3005 c |= bit;
3006 }
3007
3008 chars[location] = c;
3009 chars[location + 1] = bit;
3010
3011 len--;
3012 location += 2;
3013 cc++;
3014 }
3015
3016 if (location >= MAX_N_CHARS * 2 || must_stop != 0)
3017 break;
3018 }
3019
3020 /* At least two characters are required. */
3021 if (location < 2 * 2)
3022 return FALSE;
3023
3024 if (firstline)
3025 {
3026 SLJIT_ASSERT(common->first_line_end != 0);
3027 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
3028 OP2(SLJIT_SUB, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, SLJIT_IMM, IN_UCHARS((location >> 1) - 1));
3029 }
3030 else
3031 OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS((location >> 1) - 1));
3032
3033 start = LABEL();
3034 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3035
3036 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3037 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3038 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3039 if (chars[1] != 0)
3040 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[1]);
3041 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[0], start);
3042 if (location > 2 * 2)
3043 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3044 if (chars[3] != 0)
3045 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, chars[3]);
3046 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, chars[2], start);
3047 if (location > 2 * 2)
3048 {
3049 if (chars[5] != 0)
3050 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[5]);
3051 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[4], start);
3052 }
3053 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3054
3055 JUMPHERE(quit);
3056
3057 if (firstline)
3058 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3059 else
3060 OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS((location >> 1) - 1));
3061 return TRUE;
3062 }
3063
3064 #undef MAX_N_CHARS
3065
3066 static SLJIT_INLINE void fast_forward_first_char(compiler_common *common, pcre_uchar first_char, BOOL caseless, BOOL firstline)
3067 {
3068 DEFINE_COMPILER;
3069 struct sljit_label *start;
3070 struct sljit_jump *quit;
3071 struct sljit_jump *found;
3072 pcre_uchar oc, bit;
3073
3074 if (firstline)
3075 {
3076 SLJIT_ASSERT(common->first_line_end != 0);
3077 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
3078 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
3079 }
3080
3081 start = LABEL();
3082 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3083 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3084
3085 oc = first_char;
3086 if (caseless)
3087 {
3088 oc = TABLE_GET(first_char, common->fcc, first_char);
3089 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
3090 if (first_char > 127 && common->utf)
3091 oc = UCD_OTHERCASE(first_char);
3092 #endif
3093 }
3094 if (first_char == oc)
3095 found = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, first_char);
3096 else
3097 {
3098 bit = first_char ^ oc;
3099 if (is_powerof2(bit))
3100 {
3101 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, bit);
3102 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, first_char | bit);
3103 }
3104 else
3105 {
3106 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, first_char);
3107 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3108 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);
3109 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3110 found = JUMP(SLJIT_C_NOT_ZERO);
3111 }
3112 }
3113
3114 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3115 JUMPTO(SLJIT_JUMP, start);
3116 JUMPHERE(found);
3117 JUMPHERE(quit);
3118
3119 if (firstline)
3120 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3121 }
3122
3123 static SLJIT_INLINE void fast_forward_newline(compiler_common *common, BOOL firstline)
3124 {
3125 DEFINE_COMPILER;
3126 struct sljit_label *loop;
3127 struct sljit_jump *lastchar;
3128 struct sljit_jump *firstchar;
3129 struct sljit_jump *quit;
3130 struct sljit_jump *foundcr = NULL;
3131 struct sljit_jump *notfoundnl;
3132 jump_list *newline = NULL;
3133
3134 if (firstline)
3135 {
3136 SLJIT_ASSERT(common->first_line_end != 0);
3137 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
3138 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
3139 }
3140
3141 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3142 {
3143 lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3144 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3145 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
3146 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
3147 firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
3148
3149 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2));
3150 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0);
3151 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_GREATER_EQUAL);
3152 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3153 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
3154 #endif
3155 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3156
3157 loop = LABEL();
3158 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3159 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3160 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
3161 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
3162 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop);
3163 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop);
3164
3165 JUMPHERE(quit);
3166 JUMPHERE(firstchar);
3167 JUMPHERE(lastchar);
3168
3169 if (firstline)
3170 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
3171 return;
3172 }
3173
3174 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3175 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
3176 firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
3177 skip_char_back(common);
3178
3179 loop = LABEL();
3180 read_char(common);
3181 lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3182 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
3183 foundcr = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
3184 check_newlinechar(common, common->nltype, &newline, FALSE);
3185 set_jumps(newline, loop);
3186
3187 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
3188 {
3189 quit = JUMP(SLJIT_JUMP);
3190 JUMPHERE(foundcr);
3191 notfoundnl = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3192 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3193 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
3194 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3195 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3196 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
3197 #endif
3198 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3199 JUMPHERE(notfoundnl);
3200 JUMPHERE(quit);
3201 }
3202 JUMPHERE(lastchar);
3203 JUMPHERE(firstchar);
3204
3205 if (firstline)
3206 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3207 }
3208
3209 static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks);
3210
3211 static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, pcre_uint8 *start_bits, BOOL firstline)
3212 {
3213 DEFINE_COMPILER;
3214 struct sljit_label *start;
3215 struct sljit_jump *quit;
3216 struct sljit_jump *found = NULL;
3217 jump_list *matches = NULL;
3218 #ifndef COMPILE_PCRE8
3219 struct sljit_jump *jump;
3220 #endif
3221
3222 if (firstline)
3223 {
3224 SLJIT_ASSERT(common->first_line_end != 0);
3225 OP1(SLJIT_MOV, RETURN_ADDR, 0, STR_END, 0);
3226 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
3227 }
3228
3229 start = LABEL();
3230 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3231 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3232 #ifdef SUPPORT_UTF
3233 if (common->utf)
3234 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
3235 #endif
3236
3237 if (!check_class_ranges(common, start_bits, (start_bits[31] & 0x80) != 0, TRUE, &matches))
3238 {
3239 #ifndef COMPILE_PCRE8
3240 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 255);
3241 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 255);
3242 JUMPHERE(jump);
3243 #endif
3244 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
3245 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
3246 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)start_bits);
3247 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
3248 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
3249 found = JUMP(SLJIT_C_NOT_ZERO);
3250 }
3251
3252 #ifdef SUPPORT_UTF
3253 if (common->utf)
3254 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
3255 #endif
3256 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3257 #ifdef SUPPORT_UTF
3258 #if defined COMPILE_PCRE8
3259 if (common->utf)
3260 {
3261 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start);
3262 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3263 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3264 }
3265 #elif defined COMPILE_PCRE16
3266 if (common->utf)
3267 {
3268 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800, start);
3269 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
3270 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3271 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3272 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3273 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3274 }
3275 #endif /* COMPILE_PCRE[8|16] */
3276 #endif /* SUPPORT_UTF */
3277 JUMPTO(SLJIT_JUMP, start);
3278 if (found != NULL)
3279 JUMPHERE(found);
3280 if (matches != NULL)
3281 set_jumps(matches, LABEL());
3282 JUMPHERE(quit);
3283
3284 if (firstline)
3285 OP1(SLJIT_MOV, STR_END, 0, RETURN_ADDR, 0);
3286 }
3287
3288 static SLJIT_INLINE struct sljit_jump *search_requested_char(compiler_common *common, pcre_uchar req_char, BOOL caseless, BOOL has_firstchar)
3289 {
3290 DEFINE_COMPILER;
3291 struct sljit_label *loop;
3292 struct sljit_jump *toolong;
3293 struct sljit_jump *alreadyfound;
3294 struct sljit_jump *found;
3295 struct sljit_jump *foundoc = NULL;
3296 struct sljit_jump *notfound;
3297 pcre_uint32 oc, bit;
3298
3299 SLJIT_ASSERT(common->req_char_ptr != 0);
3300 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr);
3301 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, REQ_BYTE_MAX);
3302 toolong = CMP(SLJIT_C_LESS, TMP1, 0, STR_END, 0);
3303 alreadyfound = CMP(SLJIT_C_LESS, STR_PTR, 0, TMP2, 0);
3304
3305 if (has_firstchar)
3306 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3307 else
3308 OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);
3309
3310 loop = LABEL();
3311 notfound = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, STR_END, 0);
3312
3313 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0);
3314 oc = req_char;
3315 if (caseless)
3316 {
3317 oc = TABLE_GET(req_char, common->fcc, req_char);
3318 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
3319 if (req_char > 127 && common->utf)
3320 oc = UCD_OTHERCASE(req_char);
3321 #endif
3322 }
3323 if (req_char == oc)
3324 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
3325 else
3326 {
3327 bit = req_char ^ oc;
3328 if (is_powerof2(bit))
3329 {
3330 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);
3331 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit);
3332 }
3333 else
3334 {
3335 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
3336 foundoc = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, oc);
3337 }
3338 }
3339 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
3340 JUMPTO(SLJIT_JUMP, loop);
3341
3342 JUMPHERE(found);
3343 if (foundoc)
3344 JUMPHERE(foundoc);
3345 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr, TMP1, 0);
3346 JUMPHERE(alreadyfound);
3347 JUMPHERE(toolong);
3348 return notfound;
3349 }
3350
3351 static void do_revertframes(compiler_common *common)
3352 {
3353 DEFINE_COMPILER;
3354 struct sljit_jump *jump;
3355 struct sljit_label *mainloop;
3356
3357 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3358 OP1(SLJIT_MOV, TMP1, 0, STACK_TOP, 0);
3359 GET_LOCAL_BASE(TMP3, 0, 0);
3360
3361 /* Drop frames until we reach STACK_TOP. */
3362 mainloop = LABEL();
3363 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), 0);
3364 OP2(SLJIT_SUB | SLJIT_SET_S, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0);
3365 jump = JUMP(SLJIT_C_SIG_LESS_EQUAL);
3366
3367 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
3368 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw));
3369 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), SLJIT_MEM1(TMP1), 2 * sizeof(sljit_sw));
3370 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
3371 JUMPTO(SLJIT_JUMP, mainloop);
3372
3373 JUMPHERE(jump);
3374 jump = JUMP(SLJIT_C_SIG_LESS);
3375 /* End of dropping frames. */
3376 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3377
3378 JUMPHERE(jump);
3379 OP1(SLJIT_NEG, TMP2, 0, TMP2, 0);
3380 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
3381 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw));
3382 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_sw));
3383 JUMPTO(SLJIT_JUMP, mainloop);
3384 }
3385
3386 static void check_wordboundary(compiler_common *common)
3387 {
3388 DEFINE_COMPILER;
3389 struct sljit_jump *skipread;
3390 jump_list *skipread_list = NULL;
3391 #if !(defined COMPILE_PCRE8) || defined SUPPORT_UTF
3392 struct sljit_jump *jump;
3393 #endif
3394
3395 SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16);
3396
3397 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3398 /* Get type of the previous char, and put it to LOCALS1. */
3399 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3400 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
3401 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, 0);
3402 skipread = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
3403 skip_char_back(common);
3404 check_start_used_ptr(common);
3405 read_char(common);
3406
3407 /* Testing char type. */
3408 #ifdef SUPPORT_UCP
3409 if (common->use_ucp)
3410 {
3411 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
3412 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
3413 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
3414 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
3415 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
3416 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
3417 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
3418 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
3419 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
3420 JUMPHERE(jump);
3421 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP2, 0);
3422 }
3423 else
3424 #endif
3425 {
3426 #ifndef COMPILE_PCRE8
3427 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3428 #elif defined SUPPORT_UTF
3429 /* Here LOCALS1 has already been zeroed. */
3430 jump = NULL;
3431 if (common->utf)
3432 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3433 #endif /* COMPILE_PCRE8 */
3434 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);
3435 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */);
3436 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3437 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);
3438 #ifndef COMPILE_PCRE8
3439 JUMPHERE(jump);
3440 #elif defined SUPPORT_UTF
3441 if (jump != NULL)
3442 JUMPHERE(jump);
3443 #endif /* COMPILE_PCRE8 */
3444 }
3445 JUMPHERE(skipread);
3446
3447 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3448 check_str_end(common, &skipread_list);
3449 peek_char(common);
3450
3451 /* Testing char type. This is a code duplication. */
3452 #ifdef SUPPORT_UCP
3453 if (common->use_ucp)
3454 {
3455 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
3456 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
3457 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
3458 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
3459 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
3460 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
3461 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
3462 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
3463 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
3464 JUMPHERE(jump);
3465 }
3466 else
3467 #endif
3468 {
3469 #ifndef COMPILE_PCRE8
3470 /* TMP2 may be destroyed by peek_char. */
3471 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3472 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3473 #elif defined SUPPORT_UTF
3474 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3475 jump = NULL;
3476 if (common->utf)
3477 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3478 #endif
3479 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes);
3480 OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */);
3481 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
3482 #ifndef COMPILE_PCRE8
3483 JUMPHERE(jump);
3484 #elif defined SUPPORT_UTF
3485 if (jump != NULL)
3486 JUMPHERE(jump);
3487 #endif /* COMPILE_PCRE8 */
3488 }
3489 set_jumps(skipread_list, LABEL());
3490
3491 OP2(SLJIT_XOR | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
3492 sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3493 }
3494
3495 /*
3496 range format:
3497
3498 ranges[0] = length of the range (max MAX_RANGE_SIZE, -1 means invalid range).
3499 ranges[1] = first bit (0 or 1)
3500 ranges[2-length] = position of the bit change (when the current bit is not equal to the previous)
3501 */
3502
3503 static BOOL check_ranges(compiler_common *common, int *ranges, jump_list **backtracks, BOOL readch)
3504 {
3505 DEFINE_COMPILER;
3506
3507 if (ranges[0] < 0 || ranges[0] > 4)
3508 return FALSE;
3509
3510 /* No character is accepted. */
3511 if (ranges[0] == 0 && ranges[1] == 0)
3512 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
3513
3514 if (readch)
3515 read_char(common);
3516
3517 switch(ranges[0])
3518 {
3519 case 0:
3520 /* When ranges[1] != 0, all characters are accepted. */
3521 return TRUE;
3522
3523 case 1:
3524 add_jump(compiler, backtracks, CMP(ranges[1] == 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
3525 return TRUE;
3526
3527 case 2:
3528 if (ranges[2] + 1 != ranges[3])
3529 {
3530 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);
3531 add_jump(compiler, backtracks, CMP(ranges[1] != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
3532 }
3533 else
3534 add_jump(compiler, backtracks, CMP(ranges[1] != 0 ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
3535 return TRUE;
3536
3537 case 3:
3538 if (ranges[1] != 0)
3539 {
3540 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[4]));
3541 if (ranges[2] + 1 != ranges[3])
3542 {
3543 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);
3544 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
3545 }
3546 else
3547 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
3548 return TRUE;
3549 }
3550
3551 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[2]));
3552 if (ranges[3] + 1 != ranges[4])
3553 {
3554 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[3]);
3555 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[4] - ranges[3]));
3556 }
3557 else
3558 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3]));
3559 return TRUE;
3560
3561 case 4:
3562 if ((ranges[3] - ranges[2]) == (ranges[5] - ranges[4])
3563 && (ranges[2] | (ranges[4] - ranges[2])) == ranges[4]
3564 && is_powerof2(ranges[4] - ranges[2]))
3565 {
3566 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[4] - ranges[2]);
3567 if (ranges[4] + 1 != ranges[5])
3568 {
3569 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[4]);
3570 add_jump(compiler, backtracks, CMP(ranges[1] != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[5] - ranges[4]));
3571 }
3572 else
3573 add_jump(compiler, backtracks, CMP(ranges[1] != 0 ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[4]));
3574 return TRUE;
3575 }
3576
3577 if (ranges[1] != 0)
3578 {
3579 if (ranges[2] + 1 != ranges[3])
3580 {
3581 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);
3582 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
3583 ranges[4] -= ranges[2];
3584 ranges[5] -= ranges[2];
3585 }
3586 else
3587 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
3588
3589 if (ranges[4] + 1 != ranges[5])
3590 {
3591 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[4]);
3592 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[5] - ranges[4]));
3593 }
3594 else
3595 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[4]));
3596 return TRUE;
3597 }
3598
3599 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);
3600 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[5] - ranges[2]));
3601 if (ranges[3] + 1 != ranges[4])
3602 {
3603 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]);
3604 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[4] - ranges[3]));
3605 }
3606 else
3607 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
3608 return TRUE;
3609
3610 default:
3611 SLJIT_ASSERT_STOP();
3612 return FALSE;
3613 }
3614 }
3615
3616 static void get_ctype_ranges(compiler_common *common, int flag, int *ranges)
3617 {
3618 int i, bit, length;
3619 const pcre_uint8 *ctypes = (const pcre_uint8*)common->ctypes;
3620
3621 bit = ctypes[0] & flag;
3622 ranges[0] = -1;
3623 ranges[1] = bit != 0 ? 1 : 0;
3624 length = 0;
3625
3626 for (i = 1; i < 256; i++)
3627 if ((ctypes[i] & flag) != bit)
3628 {
3629 if (length >= MAX_RANGE_SIZE)
3630 return;
3631 ranges[2 + length] = i;
3632 length++;
3633 bit ^= flag;
3634 }
3635
3636 if (bit != 0)
3637 {
3638 if (length >= MAX_RANGE_SIZE)
3639 return;
3640 ranges[2 + length] = 256;
3641 length++;
3642 }
3643 ranges[0] = length;
3644 }
3645
3646 static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
3647 {
3648 int ranges[2 + MAX_RANGE_SIZE];
3649 pcre_uint8 bit, cbit, all;
3650 int i, byte, length = 0;
3651
3652 bit = bits[0] & 0x1;
3653 ranges[1] = !invert ? bit : (bit ^ 0x1);
3654 /* All bits will be zero or one (since bit is zero or one). */
3655 all = -bit;
3656
3657 for (i = 0; i < 256; )
3658 {
3659 byte = i >> 3;
3660 if ((i & 0x7) == 0 && bits[byte] == all)
3661 i += 8;
3662 else
3663 {
3664 cbit = (bits[byte] >> (i & 0x7)) & 0x1;
3665 if (cbit != bit)
3666 {
3667 if (length >= MAX_RANGE_SIZE)
3668 return FALSE;
3669 ranges[2 + length] = i;
3670 length++;
3671 bit = cbit;
3672 all = -cbit;
3673 }
3674 i++;
3675 }
3676 }
3677
3678 if (((bit == 0) && nclass) || ((bit == 1) && !nclass))
3679 {
3680 if (length >= MAX_RANGE_SIZE)
3681 return FALSE;
3682 ranges[2 + length] = 256;
3683 length++;
3684 }
3685 ranges[0] = length;
3686
3687 return check_ranges(common, ranges, backtracks, FALSE);
3688 }
3689
3690 static void check_anynewline(compiler_common *common)
3691 {
3692 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
3693 DEFINE_COMPILER;
3694
3695 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3696
3697 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
3698 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
3699 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
3700 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
3701 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3702 #ifdef COMPILE_PCRE8
3703 if (common->utf)
3704 {
3705 #endif
3706 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3707 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
3708 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
3709 #ifdef COMPILE_PCRE8
3710 }
3711 #endif
3712 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
3713 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3714 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3715 }
3716
3717 static void check_hspace(compiler_common *common)
3718 {
3719 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
3720 DEFINE_COMPILER;
3721
3722 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3723
3724 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x09);
3725 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3726 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
3727 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3728 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xa0);
3729 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3730 #ifdef COMPILE_PCRE8
3731 if (common->utf)
3732 {
3733 #endif
3734 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3735 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x1680);
3736 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3737 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e);
3738 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3739 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000);
3740 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000);
3741 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
3742 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000);
3743 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3744 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000);
3745 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3746 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000);
3747 #ifdef COMPILE_PCRE8
3748 }
3749 #endif
3750 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
3751 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3752
3753 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3754 }
3755
3756 static void check_vspace(compiler_common *common)
3757 {
3758 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
3759 DEFINE_COMPILER;
3760
3761 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3762
3763 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
3764 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
3765 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
3766 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
3767 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3768 #ifdef COMPILE_PCRE8
3769 if (common->utf)
3770 {
3771 #endif
3772 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3773 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
3774 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
3775 #ifdef COMPILE_PCRE8
3776 }
3777 #endif
3778 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
3779 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3780
3781 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3782 }
3783
3784 #define CHAR1 STR_END
3785 #define CHAR2 STACK_TOP
3786
3787 static void do_casefulcmp(compiler_common *common)
3788 {
3789 DEFINE_COMPILER;
3790 struct sljit_jump *jump;
3791 struct sljit_label *label;
3792
3793 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3794 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3795 OP1(SLJIT_MOV, TMP3, 0, CHAR1, 0);
3796 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR2, 0);
3797 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
3798 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3799
3800 label = LABEL();
3801 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
3802 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3803 jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
3804 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
3805 JUMPTO(SLJIT_C_NOT_ZERO, label);
3806
3807 JUMPHERE(jump);
3808 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3809 OP1(SLJIT_MOV, CHAR1, 0, TMP3, 0);
3810 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3811 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3812 }
3813
3814 #define LCC_TABLE STACK_LIMIT
3815
3816 static void do_caselesscmp(compiler_common *common)
3817 {
3818 DEFINE_COMPILER;
3819 struct sljit_jump *jump;
3820 struct sljit_label *label;
3821
3822 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3823 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3824
3825 OP1(SLJIT_MOV, TMP3, 0, LCC_TABLE, 0);
3826 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR1, 0);
3827 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, CHAR2, 0);
3828 OP1(SLJIT_MOV, LCC_TABLE, 0, SLJIT_IMM, common->lcc);
3829 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
3830 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3831
3832 label = LABEL();
3833 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
3834 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3835 #ifndef COMPILE_PCRE8
3836 jump = CMP(SLJIT_C_GREATER, CHAR1, 0, SLJIT_IMM, 255);
3837 #endif
3838 OP1(SLJIT_MOV_UB, CHAR1, 0, SLJIT_MEM2(LCC_TABLE, CHAR1), 0);
3839 #ifndef COMPILE_PCRE8
3840 JUMPHERE(jump);
3841 jump = CMP(SLJIT_C_GREATER, CHAR2, 0, SLJIT_IMM, 255);
3842 #endif
3843 OP1(SLJIT_MOV_UB, CHAR2, 0, SLJIT_MEM2(LCC_TABLE, CHAR2), 0);
3844 #ifndef COMPILE_PCRE8
3845 JUMPHERE(jump);
3846 #endif
3847 jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
3848 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
3849 JUMPTO(SLJIT_C_NOT_ZERO, label);
3850
3851 JUMPHERE(jump);
3852 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3853 OP1(SLJIT_MOV, LCC_TABLE, 0, TMP3, 0);
3854 OP1(SLJIT_MOV, CHAR1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3855 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
3856 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3857 }
3858
3859 #undef LCC_TABLE
3860 #undef CHAR1
3861 #undef CHAR2
3862
3863 #if defined SUPPORT_UTF && defined SUPPORT_UCP
3864
3865 static const pcre_uchar * SLJIT_CALL do_utf_caselesscmp(pcre_uchar *src1, jit_arguments *args, pcre_uchar *end1)
3866 {
3867 /* This function would be ineffective to do in JIT level. */
3868 pcre_uint32 c1, c2;
3869 const pcre_uchar *src2 = args->uchar_ptr;
3870 const pcre_uchar *end2 = args->end;
3871 const ucd_record *ur;
3872 const pcre_uint32 *pp;
3873
3874 while (src1 < end1)
3875 {
3876 if (src2 >= end2)
3877 return (pcre_uchar*)1;
3878 GETCHARINC(c1, src1);
3879 GETCHARINC(c2, src2);
3880 ur = GET_UCD(c2);
3881 if (c1 != c2 && c1 != c2 + ur->other_case)
3882 {
3883 pp = PRIV(ucd_caseless_sets) + ur->caseset;
3884 for (;;)
3885 {
3886 if (c1 < *pp) return NULL;
3887 if (c1 == *pp++) break;
3888 }
3889 }
3890 }
3891 return src2;
3892 }
3893
3894 #endif /* SUPPORT_UTF && SUPPORT_UCP */
3895
3896 static pcre_uchar *byte_sequence_compare(compiler_common *common, BOOL caseless, pcre_uchar *cc,
3897 compare_context* context, jump_list **backtracks)
3898 {
3899 DEFINE_COMPILER;
3900 unsigned int othercasebit = 0;
3901 pcre_uchar *othercasechar = NULL;
3902 #ifdef SUPPORT_UTF
3903 int utflength;
3904 #endif
3905
3906 if (caseless && char_has_othercase(common, cc))
3907 {
3908 othercasebit = char_get_othercase_bit(common, cc);
3909 SLJIT_ASSERT(othercasebit);
3910 /* Extracting bit difference info. */
3911 #if defined COMPILE_PCRE8
3912 othercasechar = cc + (othercasebit >> 8);
3913 othercasebit &= 0xff;
3914 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3915 /* Note that this code only handles characters in the BMP. If there
3916 ever are characters outside the BMP whose othercase differs in only one
3917 bit from itself (there currently are none), this code will need to be
3918 revised for COMPILE_PCRE32. */
3919 othercasechar = cc + (othercasebit >> 9);
3920 if ((othercasebit & 0x100) != 0)
3921 othercasebit = (othercasebit & 0xff) << 8;
3922 else
3923 othercasebit &= 0xff;
3924 #endif /* COMPILE_PCRE[8|16|32] */
3925 }
3926
3927 if (context->sourcereg == -1)
3928 {
3929 #if defined COMPILE_PCRE8
3930 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
3931 if (context->length >= 4)
3932 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3933 else if (context->length >= 2)
3934 OP1(SLJIT_MOV_UH, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3935 else
3936 #endif
3937 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3938 #elif defined COMPILE_PCRE16
3939 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
3940 if (context->length >= 4)
3941 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3942 else
3943 #endif
3944 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3945 #elif defined COMPILE_PCRE32
3946 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3947 #endif /* COMPILE_PCRE[8|16|32] */
3948 context->sourcereg = TMP2;
3949 }
3950
3951 #ifdef SUPPORT_UTF
3952 utflength = 1;
3953 if (common->utf && HAS_EXTRALEN(*cc))
3954 utflength += GET_EXTRALEN(*cc);
3955
3956 do
3957 {
3958 #endif
3959
3960 context->length -= IN_UCHARS(1);
3961 #if (defined SLJIT_UNALIGNED && SLJIT_UNALIGNED) && (defined COMPILE_PCRE8 || defined COMPILE_PCRE16)
3962
3963 /* Unaligned read is supported. */
3964 if (othercasebit != 0 && othercasechar == cc)
3965 {
3966 context->c.asuchars[context->ucharptr] = *cc | othercasebit;
3967 context->oc.asuchars[context->ucharptr] = othercasebit;
3968 }
3969 else
3970 {
3971 context->c.asuchars[context->ucharptr] = *cc;
3972 context->oc.asuchars[context->ucharptr] = 0;
3973 }
3974 context->ucharptr++;
3975
3976 #if defined COMPILE_PCRE8
3977 if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1))
3978 #else
3979 if (context->ucharptr >= 2 || context->length == 0)
3980 #endif
3981 {
3982 if (context->length >= 4)
3983 OP1(SLJIT_MOV_SI, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3984 else if (context->length >= 2)
3985 OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3986 #if defined COMPILE_PCRE8
3987 else if (context->length >= 1)
3988 OP1(SLJIT_MOV_UB, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3989 #endif /* COMPILE_PCRE8 */
3990 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
3991
3992 switch(context->ucharptr)
3993 {
3994 case 4 / sizeof(pcre_uchar):
3995 if (context->oc.asint != 0)
3996 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint);
3997 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint));
3998 break;
3999
4000 case 2 / sizeof(pcre_uchar):
4001 if (context->oc.asushort != 0)
4002 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort);
4003 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort));
4004 break;
4005
4006 #ifdef COMPILE_PCRE8
4007 case 1:
4008 if (context->oc.asbyte != 0)
4009 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte);
4010 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte));
4011 break;
4012 #endif
4013
4014 default:
4015 SLJIT_ASSERT_STOP();
4016 break;
4017 }
4018 context->ucharptr = 0;
4019 }
4020
4021 #else
4022
4023 /* Unaligned read is unsupported or in 32 bit mode. */
4024 if (context->length >= 1)
4025 OP1(MOV_UCHAR, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
4026
4027 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
4028
4029 if (othercasebit != 0 && othercasechar == cc)
4030 {
4031 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit);
4032 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit));
4033 }
4034 else
4035 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc));
4036
4037 #endif
4038
4039 cc++;
4040 #ifdef SUPPORT_UTF
4041 utflength--;
4042 }
4043 while (utflength > 0);
4044 #endif
4045
4046 return cc;
4047 }
4048
4049 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
4050
4051 #define SET_TYPE_OFFSET(value) \
4052 if ((value) != typeoffset) \
4053 { \
4054 if ((value) > typeoffset) \
4055 OP2(SLJIT_SUB, typereg, 0, typereg, 0, SLJIT_IMM, (value) - typeoffset); \
4056 else \
4057 OP2(SLJIT_ADD, typereg, 0, typereg, 0, SLJIT_IMM, typeoffset - (value)); \
4058 } \
4059 typeoffset = (value);
4060
4061 #define SET_CHAR_OFFSET(value) \
4062 if ((value) != charoffset) \
4063 { \
4064 if ((value) > charoffset) \
4065 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (value) - charoffset); \
4066 else \
4067 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, charoffset - (value)); \
4068 } \
4069 charoffset = (value);
4070
4071 static void compile_xclass_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
4072 {
4073 DEFINE_COMPILER;
4074 jump_list *found = NULL;
4075 jump_list **list = (cc[0] & XCL_NOT) == 0 ? &found : backtracks;
4076 pcre_int32 c, charoffset;
4077 struct sljit_jump *jump = NULL;
4078 pcre_uchar *ccbegin;
4079 int compares, invertcmp, numberofcmps;
4080
4081 #ifdef SUPPORT_UCP
4082 BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE;
4083 BOOL charsaved = FALSE;
4084 int typereg = TMP1, scriptreg = TMP1;
4085 const pcre_uint32 *other_cases;
4086 pcre_int32 typeoffset;
4087 #endif
4088
4089 /* Although SUPPORT_UTF must be defined, we are
4090 not necessary in utf mode even in 8 bit mode. */
4091 detect_partial_match(common, backtracks);
4092 read_char(common);
4093
4094 cc++;
4095 if ((cc[-1] & XCL_HASPROP) == 0)
4096 {
4097 if ((cc[-1] & XCL_MAP) != 0)
4098 {
4099 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
4100 #ifdef SUPPORT_UCP
4101 charsaved = TRUE;
4102 #endif
4103 if (!check_class_ranges(common, (const pcre_uint8 *)cc, TRUE, FALSE, backtracks))
4104 {
4105 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
4106
4107 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
4108 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
4109 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
4110 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4111 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4112 add_jump(compiler, &found, JUMP(SLJIT_C_NOT_ZERO));
4113 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4114
4115 JUMPHERE(jump);
4116 }
4117 else
4118 add_jump(compiler, &found, CMP(SLJIT_C_LESS_EQUAL, TMP3, 0, SLJIT_IMM, 0xff));
4119
4120 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
4121 cc += 32 / sizeof(pcre_uchar);
4122 }
4123 else
4124 add_jump(compiler, (cc[-1] & XCL_NOT) == 0 ? backtracks : &found, CMP(SLJIT_C_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0xff));
4125 }
4126 else if ((cc[-1] & XCL_MAP) != 0)
4127 {
4128 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
4129 #ifdef SUPPORT_UCP
4130 charsaved = TRUE;
4131 #endif
4132 if (!check_class_ranges(common, (const pcre_uint8 *)cc, FALSE, TRUE, list))
4133 {
4134 #ifdef COMPILE_PCRE8
4135 SLJIT_ASSERT(common->utf);
4136 #endif
4137 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
4138
4139 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
4140 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
4141 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
4142 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4143 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4144 add_jump(compiler, list, JUMP(SLJIT_C_NOT_ZERO));
4145
4146 JUMPHERE(jump);
4147 }
4148
4149 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
4150 cc += 32 / sizeof(pcre_uchar);
4151 }
4152
4153 /* Scanning the necessary info. */
4154 ccbegin = cc;
4155 compares = 0;
4156 while (*cc != XCL_END)
4157 {
4158 compares++;
4159 if (*cc == XCL_SINGLE)
4160 {
4161 cc += 2;
4162 #ifdef SUPPORT_UTF
4163 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
4164 #endif
4165 #ifdef SUPPORT_UCP
4166 needschar = TRUE;
4167 #endif
4168 }
4169 else if (*cc == XCL_RANGE)
4170 {
4171 cc += 2;
4172 #ifdef SUPPORT_UTF
4173 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
4174 #endif
4175 cc++;
4176 #ifdef SUPPORT_UTF
4177 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
4178 #endif
4179 #ifdef SUPPORT_UCP
4180 needschar = TRUE;
4181 #endif
4182 }
4183 #ifdef SUPPORT_UCP
4184 else
4185 {
4186 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
4187 cc++;
4188 switch(*cc)
4189 {
4190 case PT_ANY:
4191 break;
4192
4193 case PT_LAMP:
4194 case PT_GC:
4195 case PT_PC:
4196 case PT_ALNUM:
4197 needstype = TRUE;
4198 break;
4199
4200 case PT_SC:
4201 needsscript = TRUE;
4202 break;
4203
4204 case PT_SPACE:
4205 case PT_PXSPACE:
4206 case PT_WORD:
4207 case PT_PXGRAPH:
4208 case PT_PXPRINT:
4209 case PT_PXPUNCT:
4210 needstype = TRUE;
4211 needschar = TRUE;
4212 break;
4213
4214 case PT_CLIST:
4215 case PT_UCNC:
4216 needschar = TRUE;
4217 break;
4218
4219 default:
4220 SLJIT_ASSERT_STOP();
4221 break;
4222 }
4223 cc += 2;
4224 }
4225 #endif
4226 }
4227
4228 #ifdef SUPPORT_UCP
4229 /* Simple register allocation. TMP1 is preferred if possible. */
4230 if (needstype || needsscript)
4231 {
4232 if (needschar && !charsaved)
4233 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
4234 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4235 if (needschar)
4236 {
4237 if (needstype)
4238 {
4239 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
4240 typereg = RETURN_ADDR;
4241 }
4242
4243 if (needsscript)
4244 scriptreg = TMP3;
4245 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
4246 }
4247 else if (needstype && needsscript)
4248 scriptreg = TMP3;
4249 /* In all other cases only one of them was specified, and that can goes to TMP1. */
4250
4251 if (needsscript)
4252 {
4253 if (scriptreg == TMP1)
4254 {
4255 OP1(SLJIT_MOV, scriptreg, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
4256 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM2(scriptreg, TMP2), 3);
4257 }
4258 else
4259 {
4260 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
4261 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
4262 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM1(TMP2), 0);
4263 }
4264 }
4265 }
4266 #endif
4267
4268 /* Generating code. */
4269 cc = ccbegin;
4270 charoffset = 0;
4271 numberofcmps = 0;
4272 #ifdef SUPPORT_UCP
4273 typeoffset = 0;
4274 #endif
4275
4276 while (*cc != XCL_END)
4277 {
4278 compares--;
4279 invertcmp = (compares == 0 && list != backtracks);
4280 jump = NULL;
4281
4282 if (*cc == XCL_SINGLE)
4283 {
4284 cc ++;
4285 #ifdef SUPPORT_UTF
4286 if (common->utf)
4287 {
4288 GETCHARINC(c, cc);
4289 }
4290 else
4291 #endif
4292 c = *cc++;
4293
4294 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
4295 {
4296 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
4297 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_EQUAL);
4298 numberofcmps++;
4299 }
4300 else if (numberofcmps > 0)
4301 {
4302 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
4303 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4304 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4305 numberofcmps = 0;
4306 }
4307 else
4308 {
4309 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, c - charoffset);
4310 numberofcmps = 0;
4311 }
4312 }
4313 else if (*cc == XCL_RANGE)
4314 {
4315 cc ++;
4316 #ifdef SUPPORT_UTF
4317 if (common->utf)
4318 {
4319 GETCHARINC(c, cc);
4320 }
4321 else
4322 #endif
4323 c = *cc++;
4324 SET_CHAR_OFFSET(c);
4325 #ifdef SUPPORT_UTF
4326 if (common->utf)
4327 {
4328 GETCHARINC(c, cc);
4329 }
4330 else
4331 #endif
4332 c = *cc++;
4333 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
4334 {
4335 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
4336 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_LESS_EQUAL);
4337 numberofcmps++;
4338 }
4339 else if (numberofcmps > 0)
4340 {
4341 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
4342 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4343 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4344 numberofcmps = 0;
4345 }
4346 else
4347 {
4348 jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, c - charoffset);
4349 numberofcmps = 0;
4350 }
4351 }
4352 #ifdef SUPPORT_UCP
4353 else
4354 {
4355 if (*cc == XCL_NOTPROP)
4356 invertcmp ^= 0x1;
4357 cc++;
4358 switch(*cc)
4359 {
4360 case PT_ANY:
4361 if (list != backtracks)
4362 {
4363 if ((cc[-1] == XCL_NOTPROP && compares > 0) || (cc[-1] == XCL_PROP && compares == 0))
4364 continue;
4365 }
4366 else if (cc[-1] == XCL_NOTPROP)
4367 continue;
4368 jump = JUMP(SLJIT_JUMP);
4369 break;
4370
4371 case PT_LAMP:
4372 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - typeoffset);
4373 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4374 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ll - typeoffset);
4375 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4376 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lt - typeoffset);
4377 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4378 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4379 break;
4380
4381 case PT_GC:
4382 c = PRIV(ucp_typerange)[(int)cc[1] * 2];
4383 SET_TYPE_OFFSET(c);
4384 jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, PRIV(ucp_typerange)[(int)cc[1] * 2 + 1] - c);
4385 break;
4386
4387 case PT_PC:
4388 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, (int)cc[1] - typeoffset);
4389 break;
4390
4391 case PT_SC:
4392 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, scriptreg, 0, SLJIT_IMM, (int)cc[1]);
4393 break;
4394
4395 case PT_SPACE:
4396 case PT_PXSPACE:
4397 SET_CHAR_OFFSET(9);
4398 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd - 0x9);
4399 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
4400
4401 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x9);
4402 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4403
4404 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x9);
4405 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4406
4407 SET_TYPE_OFFSET(ucp_Zl);
4408 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl);
4409 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4410 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4411 break;
4412
4413 case PT_WORD:
4414 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE - charoffset);
4415 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4416 /* Fall through. */
4417
4418 case PT_ALNUM:
4419 SET_TYPE_OFFSET(ucp_Ll);
4420 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
4421 OP_FLAGS((*cc == PT_ALNUM) ? SLJIT_MOV : SLJIT_OR, TMP2, 0, (*cc == PT_ALNUM) ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_LESS_EQUAL);
4422 SET_TYPE_OFFSET(ucp_Nd);
4423 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_No - ucp_Nd);
4424 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4425 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4426 break;
4427
4428 case PT_CLIST:
4429 other_cases = PRIV(ucd_caseless_sets) + cc[1];
4430
4431 /* At least three characters are required.
4432 Otherwise this case would be handled by the normal code path. */
4433 SLJIT_ASSERT(other_cases[0] != NOTACHAR && other_cases[1] != NOTACHAR && other_cases[2] != NOTACHAR);
4434 SLJIT_ASSERT(other_cases[0] < other_cases[1] && other_cases[1] < other_cases[2]);
4435
4436 /* Optimizing character pairs, if their difference is power of 2. */
4437 if (is_powerof2(other_cases[1] ^ other_cases[0]))
4438 {
4439 if (charoffset == 0)
4440 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
4441 else
4442 {
4443 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
4444 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
4445 }
4446 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[1]);
4447 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4448 other_cases += 2;
4449 }
4450 else if (is_powerof2(other_cases[2] ^ other_cases[1]))
4451 {
4452 if (charoffset == 0)
4453 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[2] ^ other_cases[1]);
4454 else
4455 {
4456 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
4457 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
4458 }
4459 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[2]);
4460 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4461
4462 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, other_cases[0] - charoffset);
4463 OP_FLAGS(SLJIT_OR | ((other_cases[3] == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4464
4465 other_cases += 3;
4466 }
4467 else
4468 {
4469 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, *other_cases++ - charoffset);
4470 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4471 }
4472
4473 while (*other_cases != NOTACHAR)
4474 {
4475 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, *other_cases++ - charoffset);
4476 OP_FLAGS(SLJIT_OR | ((*other_cases == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4477 }
4478 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4479 break;
4480
4481 case PT_UCNC:
4482 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_DOLLAR_SIGN - charoffset);
4483 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4484 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_COMMERCIAL_AT - charoffset);
4485 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4486 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_GRAVE_ACCENT - charoffset);
4487 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4488
4489 SET_CHAR_OFFSET(0xa0);
4490 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd7ff - charoffset);
4491 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4492 SET_CHAR_OFFSET(0);
4493 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xe000 - 0);
4494 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_GREATER_EQUAL);
4495 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4496 break;
4497
4498 case PT_PXGRAPH:
4499 /* C and Z groups are the farthest two groups. */
4500 SET_TYPE_OFFSET(ucp_Ll);
4501 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
4502 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_GREATER);
4503
4504 jump = CMP(SLJIT_C_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
4505
4506 /* In case of ucp_Cf, we overwrite the result. */
4507 SET_CHAR_OFFSET(0x2066);
4508 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
4509 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
4510
4511 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
4512 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4513
4514 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x2066);
4515 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4516
4517 JUMPHERE(jump);
4518 jump = CMP(SLJIT_C_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
4519 break;
4520
4521 case PT_PXPRINT:
4522 /* C and Z groups are the farthest two groups. */
4523 SET_TYPE_OFFSET(ucp_Ll);
4524 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
4525 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_GREATER);
4526
4527 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Ll);
4528 OP_FLAGS(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_C_NOT_EQUAL);
4529
4530 jump = CMP(SLJIT_C_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
4531
4532 /* In case of ucp_Cf, we overwrite the result. */
4533 SET_CHAR_OFFSET(0x2066);
4534 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
4535 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
4536
4537 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
4538 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4539
4540 JUMPHERE(jump);
4541 jump = CMP(SLJIT_C_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
4542 break;
4543
4544 case PT_PXPUNCT:
4545 SET_TYPE_OFFSET(ucp_Sc);
4546 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Sc);
4547 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
4548
4549 SET_CHAR_OFFSET(0);
4550 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xff);
4551 OP_FLAGS(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4552
4553 SET_TYPE_OFFSET(ucp_Pc);
4554 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ps - ucp_Pc);
4555 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4556 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4557 break;
4558 }
4559 cc += 2;
4560 }
4561 #endif
4562
4563 if (jump != NULL)
4564 add_jump(compiler, compares > 0 ? list : backtracks, jump);
4565 }
4566
4567 if (found != NULL)
4568 set_jumps(found, LABEL());
4569 }
4570
4571 #undef SET_TYPE_OFFSET
4572 #undef SET_CHAR_OFFSET
4573
4574 #endif
4575
4576 static pcre_uchar *compile_char1_matchingpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **backtracks)
4577 {
4578 DEFINE_COMPILER;
4579 int length;
4580 unsigned int c, oc, bit;
4581 compare_context context;
4582 struct sljit_jump *jump[4];
4583 jump_list *end_list;
4584 #ifdef SUPPORT_UTF
4585 struct sljit_label *label;
4586 #ifdef SUPPORT_UCP
4587 pcre_uchar propdata[5];
4588 #endif
4589 #endif
4590
4591 switch(type)
4592 {
4593 case OP_SOD:
4594 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4595 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4596 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
4597 return cc;
4598
4599 case OP_SOM:
4600 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4601 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
4602 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
4603 return cc;
4604
4605 case OP_NOT_WORD_BOUNDARY:
4606 case OP_WORD_BOUNDARY:
4607 add_jump(compiler, &common->wordboundary, JUMP(SLJIT_FAST_CALL));
4608 add_jump(compiler, backtracks, JUMP(type == OP_NOT_WORD_BOUNDARY ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
4609 return cc;
4610
4611 case OP_NOT_DIGIT:
4612 case OP_DIGIT:
4613 /* Digits are usually 0-9, so it is worth to optimize them. */
4614 if (common->digits[0] == -2)
4615 get_ctype_ranges(common, ctype_digit, common->digits);
4616 detect_partial_match(common, backtracks);
4617 /* Flip the starting bit in the negative case. */
4618 if (type == OP_NOT_DIGIT)
4619 common->digits[1] ^= 1;
4620 if (!check_ranges(common, common->digits, backtracks, TRUE))
4621 {
4622 read_char8_type(common);
4623 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit);
4624 add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
4625 }
4626 if (type == OP_NOT_DIGIT)
4627 common->digits[1] ^= 1;
4628 return cc;
4629
4630 case OP_NOT_WHITESPACE:
4631 case OP_WHITESPACE:
4632 detect_partial_match(common, backtracks);
4633 read_char8_type(common);
4634 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space);
4635 add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
4636 return cc;
4637
4638 case OP_NOT_WORDCHAR:
4639 case OP_WORDCHAR:
4640 detect_partial_match(common, backtracks);
4641 read_char8_type(common);
4642 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word);
4643 add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
4644 return cc;
4645
4646 case OP_ANY:
4647 detect_partial_match(common, backtracks);
4648 read_char(common);
4649 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4650 {
4651 jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
4652 end_list = NULL;
4653 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
4654 add_jump(compiler, &end_list, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4655 else
4656 check_str_end(common, &end_list);
4657
4658 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4659 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff));
4660 set_jumps(end_list, LABEL());
4661 JUMPHERE(jump[0]);
4662 }
4663 else
4664 check_newlinechar(common, common->nltype, backtracks, TRUE);
4665 return cc;
4666
4667 case OP_ALLANY:
4668 detect_partial_match(common, backtracks);
4669 #ifdef SUPPORT_UTF
4670 if (common->utf)
4671 {
4672 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4673 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4674 #if defined COMPILE_PCRE8 || defined COMPILE_PCRE16
4675 #if defined COMPILE_PCRE8
4676 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
4677 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4678 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4679 #elif defined COMPILE_PCRE16
4680 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
4681 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
4682 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4683 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4684 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4685 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4686 #endif
4687 JUMPHERE(jump[0]);
4688 #endif /* COMPILE_PCRE[8|16] */
4689 return cc;
4690 }
4691 #endif
4692 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4693 return cc;
4694
4695 case OP_ANYBYTE:
4696 detect_partial_match(common, backtracks);
4697 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4698 return cc;
4699
4700 #ifdef SUPPORT_UTF
4701 #ifdef SUPPORT_UCP
4702 case OP_NOTPROP:
4703 case OP_PROP:
4704 propdata[0] = XCL_HASPROP;
4705 propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP;
4706 propdata[2] = cc[0];
4707 propdata[3] = cc[1];
4708 propdata[4] = XCL_END;
4709 compile_xclass_matchingpath(common, propdata, backtracks);
4710 return cc + 2;
4711 #endif
4712 #endif
4713
4714 case OP_ANYNL:
4715 detect_partial_match(common, backtracks);
4716 read_char(common);
4717 jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
4718 /* We don't need to handle soft partial matching case. */
4719 end_list = NULL;
4720 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
4721 add_jump(compiler, &end_list, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4722 else
4723 check_str_end(common, &end_list);
4724 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4725 jump[1] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
4726 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4727 jump[2] = JUMP(SLJIT_JUMP);
4728 JUMPHERE(jump[0]);
4729 check_newlinechar(common, common->bsr_nltype, backtracks, FALSE);
4730 set_jumps(end_list, LABEL());
4731 JUMPHERE(jump[1]);
4732 JUMPHERE(jump[2]);
4733 return cc;
4734
4735 case OP_NOT_HSPACE:
4736 case OP_HSPACE:
4737 detect_partial_match(common, backtracks);
4738 read_char(common);
4739 add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));
4740 add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
4741 return cc;
4742
4743 case OP_NOT_VSPACE:
4744 case OP_VSPACE:
4745 detect_partial_match(common, backtracks);
4746 read_char(common);
4747 add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));
4748 add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
4749 return cc;
4750
4751 #ifdef SUPPORT_UCP
4752 case OP_EXTUNI:
4753 detect_partial_match(common, backtracks);
4754 read_char(common);
4755 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4756 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
4757 /* Optimize register allocation: use a real register. */
4758 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
4759 OP1(SLJIT_MOV_UB, STACK_TOP, 0, SLJIT_MEM2(TMP1, TMP2), 3);
4760
4761 label = LABEL();
4762 jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4763 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
4764 read_char(common);
4765 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4766 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
4767 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM2(TMP1, TMP2), 3);
4768
4769 OP2(SLJIT_SHL, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2);
4770 OP1(SLJIT_MOV_UI, TMP1, 0, SLJIT_MEM1(STACK_TOP), (sljit_sw)PRIV(ucp_gbtable));
4771 OP1(SLJIT_MOV, STACK_TOP, 0, TMP2, 0);
4772 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4773 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4774 JUMPTO(SLJIT_C_NOT_ZERO, label);
4775
4776 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
4777 JUMPHERE(jump[0]);
4778 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
4779
4780 if (common->mode == JIT_PARTIAL_HARD_COMPILE)
4781 {
4782 jump[0] = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
4783 /* Since we successfully read a char above, partial matching must occure. */
4784 check_partial(common, TRUE);
4785 JUMPHERE(jump[0]);
4786 }
4787 return cc;
4788 #endif
4789
4790 case OP_EODN:
4791 /* Requires rather complex checks. */
4792 jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4793 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4794 {
4795 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4796 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4797 if (common->mode == JIT_COMPILE)
4798 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0));
4799 else
4800 {
4801 jump[1] = CMP(SLJIT_C_EQUAL, TMP2, 0, STR_END, 0);
4802 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
4803 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS);
4804 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
4805 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_NOT_EQUAL);
4806 add_jump(compiler, backtracks, JUMP(SLJIT_C_NOT_EQUAL));
4807 check_partial(common, TRUE);
4808 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4809 JUMPHERE(jump[1]);
4810 }
4811 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4812 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4813 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
4814 }
4815 else if (common->nltype == NLTYPE_FIXED)
4816 {
4817 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4818 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4819 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0));
4820 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
4821 }
4822 else
4823 {
4824 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4825 jump[1] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
4826 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4827 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
4828 jump[2] = JUMP(SLJIT_C_GREATER);
4829 add_jump(compiler, backtracks, JUMP(SLJIT_C_LESS));
4830 /* Equal. */
4831 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4832 jump[3] = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
4833 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4834
4835 JUMPHERE(jump[1]);
4836 if (common->nltype == NLTYPE_ANYCRLF)
4837 {
4838 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4839 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP2, 0, STR_END, 0));
4840 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
4841 }
4842 else
4843 {
4844 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, STR_PTR, 0);
4845 read_char(common);
4846 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
4847 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
4848 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
4849 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
4850 }
4851 JUMPHERE(jump[2]);
4852 JUMPHERE(jump[3]);
4853 }
4854 JUMPHERE(jump[0]);
4855 check_partial(common, FALSE);
4856 return cc;
4857
4858 case OP_EOD:
4859 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0));
4860 check_partial(common, FALSE);
4861 return cc;
4862
4863 case OP_CIRC:
4864 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4865 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
4866 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0));
4867 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
4868 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4869 return cc;
4870
4871 case OP_CIRCM:
4872 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4873 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
4874 jump[1] = CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0);
4875 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
4876 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4877 jump[0] = JUMP(SLJIT_JUMP);
4878 JUMPHERE(jump[1]);
4879
4880 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4881 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4882 {
4883 OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4884 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP2, 0, TMP1, 0));
4885 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4886 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
4887 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4888 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
4889 }
4890 else
4891 {
4892 skip_char_back(common);
4893 read_char(common);
4894 check_newlinechar(common, common->nltype, backtracks, FALSE);
4895 }
4896 JUMPHERE(jump[0]);
4897 return cc;
4898
4899 case OP_DOLL:
4900 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4901 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
4902 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4903
4904 if (!common->endonly)
4905 compile_char1_matchingpath(common, OP_EODN, cc, backtracks);
4906 else
4907 {
4908 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0));
4909 check_partial(common, FALSE);
4910 }
4911 return cc;
4912
4913 case OP_DOLLM:
4914 jump[1] = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
4915 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4916 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
4917 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4918 check_partial(common, FALSE);
4919 jump[0] = JUMP(SLJIT_JUMP);
4920 JUMPHERE(jump[1]);
4921
4922 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4923 {
4924 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4925 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4926 if (common->mode == JIT_COMPILE)
4927 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, TMP2, 0, STR_END, 0));
4928 else
4929 {
4930 jump[1] = CMP(SLJIT_C_LESS_EQUAL, TMP2, 0, STR_END, 0);
4931 /* STR_PTR = STR_END - IN_UCHARS(1) */
4932 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4933 check_partial(common, TRUE);
4934 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4935 JUMPHERE(jump[1]);
4936 }
4937
4938 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4939 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4940 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
4941 }
4942 else
4943 {
4944 peek_char(common);
4945 check_newlinechar(common, common->nltype, backtracks, FALSE);
4946 }
4947 JUMPHERE(jump[0]);
4948 return cc;
4949
4950 case OP_CHAR:
4951 case OP_CHARI:
4952 length = 1;
4953 #ifdef SUPPORT_UTF
4954 if (common->utf && HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc);
4955 #endif
4956 if (common->mode == JIT_COMPILE && (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0))
4957 {
4958 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
4959 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
4960
4961 context.length = IN_UCHARS(length);
4962 context.sourcereg = -1;
4963 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
4964 context.ucharptr = 0;
4965 #endif
4966 return byte_sequence_compare(common, type == OP_CHARI, cc, &context, backtracks);
4967 }
4968 detect_partial_match(common, backtracks);
4969 read_char(common);
4970 #ifdef SUPPORT_UTF
4971 if (common->utf)
4972 {
4973 GETCHAR(c, cc);
4974 }
4975 else
4976 #endif
4977 c = *cc;
4978 if (type == OP_CHAR || !char_has_othercase(common, cc))
4979 {
4980 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c));
4981 return cc + length;
4982 }
4983 oc = char_othercase(common, c);
4984 bit = c ^ oc;
4985 if (is_powerof2(bit))
4986 {
4987 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
4988 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
4989 return cc + length;
4990 }
4991 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c);
4992 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4993 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);
4994 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4995 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
4996 return cc + length;
4997
4998 case OP_NOT:
4999 case OP_NOTI:
5000 detect_partial_match(common, backtracks);
5001 length = 1;
5002 #ifdef SUPPORT_UTF
5003 if (common->utf)
5004 {
5005 #ifdef COMPILE_PCRE8
5006 c = *cc;
5007 if (c < 128)
5008 {
5009 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5010 if (type == OP_NOT || !char_has_othercase(common, cc))
5011 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
5012 else
5013 {
5014 /* Since UTF8 code page is fixed, we know that c is in [a-z] or [A-Z] range. */
5015 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x20);
5016 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20));
5017 }
5018 /* Skip the variable-length character. */
5019 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5020 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
5021 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
5022 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5023 JUMPHERE(jump[0]);
5024 return cc + 1;
5025 }
5026 else
5027 #endif /* COMPILE_PCRE8 */
5028 {
5029 GETCHARLEN(c, cc, length);
5030 read_char(common);
5031 }
5032 }
5033 else
5034 #endif /* SUPPORT_UTF */
5035 {
5036 read_char(common);
5037 c = *cc;
5038 }
5039
5040 if (type == OP_NOT || !char_has_othercase(common, cc))
5041 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
5042 else
5043 {
5044 oc = char_othercase(common, c);
5045 bit = c ^ oc;
5046 if (is_powerof2(bit))
5047 {
5048 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
5049 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
5050 }
5051 else
5052 {
5053 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
5054 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, oc));
5055 }
5056 }
5057 return cc + length;
5058
5059 case OP_CLASS:
5060 case OP_NCLASS:
5061 detect_partial_match(common, backtracks);
5062 read_char(common);
5063 if (check_class_ranges(common, (const pcre_uint8 *)cc, type == OP_NCLASS, FALSE, backtracks))
5064 return cc + 32 / sizeof(pcre_uchar);
5065
5066 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
5067 jump[0] = NULL;
5068 #ifdef COMPILE_PCRE8
5069 /* This check only affects 8 bit mode. In other modes, we
5070 always need to compare the value with 255. */
5071 if (common->utf)
5072 #endif /* COMPILE_PCRE8 */
5073 {
5074 jump[0] = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
5075 if (type == OP_CLASS)
5076 {
5077 add_jump(compiler, backtracks, jump[0]);
5078 jump[0] = NULL;
5079 }
5080 }
5081 #endif /* SUPPORT_UTF || !COMPILE_PCRE8 */
5082 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
5083 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
5084 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
5085 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
5086 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
5087 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
5088 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
5089 if (jump[0] != NULL)
5090 JUMPHERE(jump[0]);
5091 #endif /* SUPPORT_UTF || !COMPILE_PCRE8 */
5092 return cc + 32 / sizeof(pcre_uchar);
5093
5094 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5095 case OP_XCLASS:
5096 compile_xclass_matchingpath(common, cc + LINK_SIZE, backtracks);
5097 return cc + GET(cc, 0) - 1;
5098 #endif
5099
5100 case OP_REVERSE:
5101 length = GET(cc, 0);
5102 if (length == 0)
5103 return cc + LINK_SIZE;
5104 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5105 #ifdef SUPPORT_UTF
5106 if (common->utf)
5107 {
5108 OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
5109 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, length);
5110 label = LABEL();
5111 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP3, 0));
5112 skip_char_back(common);
5113 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
5114 JUMPTO(SLJIT_C_NOT_ZERO, label);
5115 }
5116 else
5117 #endif
5118 {
5119 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
5120 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
5121 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, TMP1, 0));
5122 }
5123 check_start_used_ptr(common);
5124 return cc + LINK_SIZE;
5125 }
5126 SLJIT_ASSERT_STOP();
5127 return cc;
5128 }
5129
5130 static SLJIT_INLINE pcre_uchar *compile_charn_matchingpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, jump_list **backtracks)
5131 {
5132 /* This function consumes at least one input character. */
5133 /* To decrease the number of length checks, we try to concatenate the fixed length character sequences. */
5134 DEFINE_COMPILER;
5135 pcre_uchar *ccbegin = cc;
5136 compare_context context;
5137 int size;
5138
5139 context.length = 0;
5140 do
5141 {
5142 if (cc >= ccend)
5143 break;
5144
5145 if (*cc == OP_CHAR)
5146 {
5147 size = 1;
5148 #ifdef SUPPORT_UTF
5149 if (common->utf && HAS_EXTRALEN(cc[1]))
5150 size += GET_EXTRALEN(cc[1]);
5151 #endif
5152 }
5153 else if (*cc == OP_CHARI)
5154 {
5155 size = 1;
5156 #ifdef SUPPORT_UTF
5157 if (common->utf)
5158 {
5159 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
5160 size = 0;
5161 else if (HAS_EXTRALEN(cc[1]))
5162 size += GET_EXTRALEN(cc[1]);
5163 }
5164 else
5165 #endif
5166 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
5167 size = 0;
5168 }
5169 else
5170 size = 0;
5171
5172 cc += 1 + size;
5173 context.length += IN_UCHARS(size);
5174 }
5175 while (size > 0 && context.length <= 128);
5176
5177 cc = ccbegin;
5178 if (context.length > 0)
5179 {
5180 /* We have a fixed-length byte sequence. */
5181 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, context.length);
5182 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
5183
5184 context.sourcereg = -1;
5185 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
5186 context.ucharptr = 0;
5187 #endif
5188 do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, backtracks); while (context.length > 0);
5189 return cc;
5190 }
5191
5192 /* A non-fixed length character will be checked if length == 0. */
5193 return compile_char1_matchingpath(common, *cc, cc + 1, backtracks);
5194 }
5195
5196 /* Forward definitions. */
5197 static void compile_matchingpath(compiler_common *, pcre_uchar *, pcre_uchar *, backtrack_common *);
5198 static void compile_backtrackingpath(compiler_common *, struct backtrack_common *);
5199
5200 #define PUSH_BACKTRACK(size, ccstart, error) \
5201 do \
5202 { \
5203 backtrack = sljit_alloc_memory(compiler, (size)); \
5204 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
5205 return error; \
5206 memset(backtrack, 0, size); \
5207 backtrack->prev = parent->top; \
5208 backtrack->cc = (ccstart); \
5209 parent->top = backtrack; \
5210 } \
5211 while (0)
5212
5213 #define PUSH_BACKTRACK_NOVALUE(size, ccstart) \
5214 do \
5215 { \
5216 backtrack = sljit_alloc_memory(compiler, (size)); \
5217 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
5218 return; \
5219 memset(backtrack, 0, size); \
5220 backtrack->prev = parent->top; \
5221 backtrack->cc = (ccstart); \
5222 parent->top = backtrack; \
5223 } \
5224 while (0)
5225
5226 #define BACKTRACK_AS(type) ((type *)backtrack)
5227
5228 static void compile_dnref_search(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
5229 {
5230 /* The OVECTOR offset goes to TMP2. */
5231 DEFINE_COMPILER;
5232 int count = GET2(cc, 1 + IMM2_SIZE);
5233 pcre_uchar *slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
5234 unsigned int offset;
5235 jump_list *found = NULL;
5236
5237 SLJIT_ASSERT(*cc == OP_DNREF || *cc == OP_DNREFI);
5238
5239 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
5240
5241 count--;
5242 while (count-- > 0)
5243 {
5244 offset = GET2(slot, 0) << 1;
5245 GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
5246 add_jump(compiler, &found, CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0));
5247 slot += common->name_entry_size;
5248 }
5249
5250 offset = GET2(slot, 0) << 1;
5251 GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
5252 if (backtracks != NULL && !common->jscript_compat)
5253 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0));
5254
5255 set_jumps(found, LABEL());
5256 }
5257
5258 static void compile_ref_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks, BOOL withchecks, BOOL emptyfail)
5259 {
5260 DEFINE_COMPILER;
5261 BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
5262 int offset = 0;
5263 struct sljit_jump *jump = NULL;
5264 struct sljit_jump *partial;
5265 struct sljit_jump *nopartial;
5266
5267 if (ref)
5268 {
5269 offset = GET2(cc, 1) << 1;
5270 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
5271 /* OVECTOR(1) contains the "string begin - 1" constant. */
5272 if (withchecks && !common->jscript_compat)
5273 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
5274 }
5275 else
5276 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
5277
5278 #if defined SUPPORT_UTF && defined SUPPORT_UCP
5279 if (common->utf && *cc == OP_REFI)
5280 {
5281 SLJIT_ASSERT(TMP1 == SLJIT_SCRATCH_REG1 && STACK_TOP == SLJIT_SCRATCH_REG2 && TMP2 == SLJIT_SCRATCH_REG3);
5282 if (ref)
5283 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
5284 else
5285 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
5286
5287 if (withchecks)
5288 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, TMP2, 0);
5289
5290 /* Needed to save important temporary registers. */
5291 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
5292 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, ARGUMENTS, 0);
5293 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, uchar_ptr), STR_PTR, 0);
5294 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_utf_caselesscmp));
5295 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
5296 if (common->mode == JIT_COMPILE)
5297 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1));
5298 else
5299 {
5300 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
5301 nopartial = CMP(SLJIT_C_NOT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
5302 check_partial(common, FALSE);
5303 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5304 JUMPHERE(nopartial);
5305 }
5306 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
5307 }
5308 else
5309 #endif /* SUPPORT_UTF && SUPPORT_UCP */
5310 {
5311 if (ref)
5312 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP1, 0);
5313 else
5314 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
5315
5316 if (withchecks)
5317 jump = JUMP(SLJIT_C_ZERO);
5318
5319 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
5320 partial = CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0);
5321 if (common->mode == JIT_COMPILE)
5322 add_jump(compiler, backtracks, partial);
5323
5324 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
5325 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5326
5327 if (common->mode != JIT_COMPILE)
5328 {
5329 nopartial = JUMP(SLJIT_JUMP);
5330 JUMPHERE(partial);
5331 /* TMP2 -= STR_END - STR_PTR */
5332 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, STR_PTR, 0);
5333 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, STR_END, 0);
5334 partial = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0);
5335 OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
5336 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
5337 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5338 JUMPHERE(partial);
5339 check_partial(common, FALSE);
5340 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5341 JUMPHERE(nopartial);
5342 }
5343 }
5344
5345 if (jump != NULL)
5346 {
5347 if (emptyfail)
5348 add_jump(compiler, backtracks, jump);
5349 else
5350 JUMPHERE(jump);
5351 }
5352 }
5353
5354 static SLJIT_INLINE pcre_uchar *compile_ref_iterator_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
5355 {
5356 DEFINE_COMPILER;
5357 BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
5358 backtrack_common *backtrack;
5359 pcre_uchar type;
5360 int offset = 0;
5361 struct sljit_label *label;
5362 struct sljit_jump *zerolength;
5363 struct sljit_jump *jump = NULL;
5364 pcre_uchar *ccbegin = cc;
5365 int min = 0, max = 0;
5366 BOOL minimize;
5367
5368 PUSH_BACKTRACK(sizeof(iterator_backtrack), cc, NULL);
5369
5370 if (ref)
5371 offset = GET2(cc, 1) << 1;
5372 else
5373 cc += IMM2_SIZE;
5374 type = cc[1 + IMM2_SIZE];
5375
5376 SLJIT_COMPILE_ASSERT((OP_CRSTAR & 0x1) == 0, crstar_opcode_must_be_even);
5377 minimize = (type & 0x1) != 0;
5378 switch(type)
5379 {
5380 case OP_CRSTAR:
5381 case OP_CRMINSTAR:
5382 min = 0;
5383 max = 0;
5384 cc += 1 + IMM2_SIZE + 1;
5385 break;
5386 case OP_CRPLUS:
5387 case OP_CRMINPLUS:
5388 min = 1;
5389 max = 0;
5390 cc += 1 + IMM2_SIZE + 1;
5391 break;
5392 case OP_CRQUERY:
5393 case OP_CRMINQUERY:
5394 min = 0;
5395 max = 1;
5396 cc += 1 + IMM2_SIZE + 1;
5397 break;
5398 case OP_CRRANGE:
5399 case OP_CRMINRANGE:
5400 min = GET2(cc, 1 + IMM2_SIZE + 1);
5401 max = GET2(cc, 1 + IMM2_SIZE + 1 + IMM2_SIZE);
5402 cc += 1 + IMM2_SIZE + 1 + 2 * IMM2_SIZE;
5403 break;
5404 default:
5405 SLJIT_ASSERT_STOP();
5406 break;
5407 }
5408
5409 if (!minimize)
5410 {
5411 if (min == 0)
5412 {
5413 allocate_stack(common, 2);
5414 if (ref)
5415 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
5416 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5417 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
5418 /* Temporary release of STR_PTR. */
5419 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5420 /* Handles both invalid and empty cases. Since the minimum repeat,
5421 is zero the invalid case is basically the same as an empty case. */
5422 if (ref)
5423 zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
5424 else
5425 {
5426 compile_dnref_search(common, ccbegin, NULL);
5427 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
5428 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, TMP2, 0);
5429 zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
5430 }
5431 /* Restore if not zero length. */
5432 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5433 }
5434 else
5435 {
5436 allocate_stack(common, 1);
5437 if (ref)
5438 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
5439 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5440 if (ref)
5441 {
5442 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
5443 zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
5444 }
5445 else
5446 {
5447 compile_dnref_search(common, ccbegin, &backtrack->topbacktracks);
5448 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
5449 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, TMP2, 0);
5450 zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
5451 }
5452 }
5453
5454 if (min > 1 || max > 1)
5455 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 0);
5456
5457 label = LABEL();
5458 if (!ref)
5459 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);
5460 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, FALSE, FALSE);
5461
5462 if (min > 1 || max > 1)
5463 {
5464 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
5465 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
5466 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0);
5467 if (min > 1)
5468 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, label);
5469 if (max > 1)
5470 {
5471 jump = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max);
5472 allocate_stack(common, 1);
5473 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5474 JUMPTO(SLJIT_JUMP, label);
5475 JUMPHERE(jump);
5476 }
5477 }
5478
5479 if (max == 0)
5480 {
5481 /* Includes min > 1 case as well. */
5482 allocate_stack(common, 1);
5483 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5484 JUMPTO(SLJIT_JUMP, label);
5485 }
5486
5487 JUMPHERE(zerolength);
5488 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
5489
5490 count_match(common);
5491 return cc;
5492 }
5493
5494 allocate_stack(common, ref ? 2 : 3);
5495 if (ref)
5496 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
5497 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5498 if (type != OP_CRMINSTAR)
5499 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
5500
5501 if (min == 0)
5502 {
5503 /* Handles both invalid and empty cases. Since the minimum repeat,
5504 is zero the invalid case is basically the same as an empty case. */
5505 if (ref)
5506 zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
5507 else
5508 {
5509 compile_dnref_search(common, ccbegin, NULL);
5510 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
5511 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
5512 zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
5513 }
5514 /* Length is non-zero, we can match real repeats. */
5515 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5516 jump = JUMP(SLJIT_JUMP);
5517 }
5518 else
5519 {
5520 if (ref)
5521 {
5522 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
5523 zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
5524 }
5525 else
5526 {
5527 compile_dnref_search(common, ccbegin, &backtrack->topbacktracks);
5528 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
5529 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
5530 zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
5531 }
5532 }
5533
5534 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
5535 if (max > 0)
5536 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max));
5537
5538 if (!ref)
5539 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
5540 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, TRUE, TRUE);
5541 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5542
5543 if (min > 1)
5544 {
5545 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5546 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
5547 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5548 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, BACKTRACK_AS(iterator_backtrack)->matchingpath);
5549 }
5550 else if (max > 0)
5551 OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
5552
5553 if (jump != NULL)
5554 JUMPHERE(jump);
5555 JUMPHERE(zerolength);
5556
5557 count_match(common);
5558 return cc;
5559 }
5560
5561 static SLJIT_INLINE pcre_uchar *compile_recurse_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
5562 {
5563 DEFINE_COMPILER;
5564 backtrack_common *backtrack;
5565 recurse_entry *entry = common->entries;
5566 recurse_entry *prev = NULL;
5567 sljit_sw start = GET(cc, 1);
5568 pcre_uchar *start_cc;
5569 BOOL needs_control_head;
5570
5571 PUSH_BACKTRACK(sizeof(recurse_backtrack), cc, NULL);
5572
5573 /* Inlining simple patterns. */
5574 if (get_framesize(common, common->start + start, NULL, TRUE, &needs_control_head) == no_stack)
5575 {
5576 start_cc = common->start + start;
5577 compile_matchingpath(common, next_opcode(common, start_cc), bracketend(start_cc) - (1 + LINK_SIZE), backtrack);
5578 BACKTRACK_AS(recurse_backtrack)->inlined_pattern = TRUE;
5579 return cc + 1 + LINK_SIZE;
5580 }
5581
5582 while (entry != NULL)
5583 {
5584 if (entry->start == start)
5585 break;
5586 prev = entry;
5587 entry = entry->next;
5588 }
5589
5590 if (entry == NULL)
5591 {
5592 entry = sljit_alloc_memory(compiler, sizeof(recurse_entry));
5593 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5594 return NULL;
5595 entry->next = NULL;
5596 entry->entry = NULL;
5597 entry->calls = NULL;
5598 entry->start = start;
5599
5600 if (prev != NULL)
5601 prev->next = entry;
5602 else
5603 common->entries = entry;
5604 }
5605
5606 if (common->has_set_som && common->mark_ptr != 0)
5607 {
5608 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
5609 allocate_stack(common, 2);
5610 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
5611 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
5612 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5613 }
5614 else if (common->has_set_som || common->mark_ptr != 0)
5615 {
5616 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->has_set_som ? (int)(OVECTOR(0)) : common->mark_ptr);
5617 allocate_stack(common, 1);
5618 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
5619 }
5620
5621 if (entry->entry == NULL)
5622 add_jump(compiler, &entry->calls, JUMP(SLJIT_FAST_CALL));
5623 else
5624 JUMPTO(SLJIT_FAST_CALL, entry->entry);
5625 /* Leave if the match is failed. */
5626 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0));
5627 return cc + 1 + LINK_SIZE;
5628 }
5629
5630 static int SLJIT_CALL do_callout(struct jit_arguments* arguments, PUBL(callout_block) *callout_block, pcre_uchar **jit_ovector)
5631 {
5632 const pcre_uchar *begin = arguments->begin;
5633 int *offset_vector = arguments->offsets;
5634 int offset_count = arguments->offset_count;
5635 int i;
5636
5637 if (PUBL(callout) == NULL)
5638 return 0;
5639
5640 callout_block->version = 2;
5641 callout_block->callout_data = arguments->callout_data;
5642
5643 /* Offsets in subject. */
5644 callout_block->subject_length = arguments->end - arguments->begin;
5645 callout_block->start_match = (pcre_uchar*)callout_block->subject - arguments->begin;
5646 callout_block->current_position = (pcre_uchar*)callout_block->offset_vector - arguments->begin;
5647 #if defined COMPILE_PCRE8
5648 callout_block->subject = (PCRE_SPTR)begin;
5649 #elif defined COMPILE_PCRE16
5650 callout_block->subject = (PCRE_SPTR16)begin;
5651 #elif defined COMPILE_PCRE32
5652 callout_block->subject = (PCRE_SPTR32)begin;
5653 #endif
5654
5655 /* Convert and copy the JIT offset vector to the offset_vector array. */
5656 callout_block->capture_top = 0;
5657 callout_block->offset_vector = offset_vector;
5658 for (i = 2; i < offset_count; i += 2)
5659 {
5660 offset_vector[i] = jit_ovector[i] - begin;
5661 offset_vector[i + 1] = jit_ovector[i + 1] - begin;
5662 if (jit_ovector[i] >= begin)
5663 callout_block->capture_top = i;
5664 }
5665
5666 callout_block->capture_top = (callout_block->capture_top >> 1) + 1;
5667 if (offset_count > 0)
5668 offset_vector[0] = -1;
5669 if (offset_count > 1)
5670 offset_vector[1] = -1;
5671 return (*PUBL(callout))(callout_block);
5672 }
5673
5674 /* Aligning to 8 byte. */
5675 #define CALLOUT_ARG_SIZE \
5676 (((int)sizeof(PUBL(callout_block)) + 7) & ~7)
5677
5678 #define CALLOUT_ARG_OFFSET(arg) \
5679 (-CALLOUT_ARG_SIZE + SLJIT_OFFSETOF(PUBL(callout_block), arg))
5680
5681 static SLJIT_INLINE pcre_uchar *compile_callout_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
5682 {
5683 DEFINE_COMPILER;
5684 backtrack_common *backtrack;
5685
5686 PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
5687
5688 allocate_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw));
5689
5690 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
5691 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5692 SLJIT_ASSERT(common->capture_last_ptr != 0);
5693 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_number), SLJIT_IMM, cc[1]);
5694 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_last), TMP2, 0);
5695
5696 /* These pointer sized fields temporarly stores internal variables. */
5697 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
5698 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(offset_vector), STR_PTR, 0);
5699 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(subject), TMP2, 0);
5700
5701 if (common->mark_ptr != 0)
5702 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr));
5703 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(pattern_position), SLJIT_IMM, GET(cc, 2));
5704 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(next_item_length), SLJIT_IMM, GET(cc, 2 + LINK_SIZE));
5705 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(mark), (common->mark_ptr != 0) ? TMP2 : SLJIT_IMM, 0);
5706
5707 /* Needed to save important temporary registers. */
5708 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
5709 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG2, 0, STACK_TOP, 0, SLJIT_IMM, CALLOUT_ARG_SIZE);
5710 GET_LOCAL_BASE(SLJIT_SCRATCH_REG3, 0, OVECTOR_START);
5711 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_callout));
5712 OP1(SLJIT_MOV_SI, SLJIT_RETURN_REG, 0, SLJIT_RETURN_REG, 0);
5713 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
5714 free_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw));
5715
5716 /* Check return value. */
5717 OP2(SLJIT_SUB | SLJIT_SET_S, SLJIT_UNUSED, 0, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
5718 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_C_SIG_GREATER));
5719 if (common->forced_quit_label == NULL)
5720 add_jump(compiler, &common->forced_quit, JUMP(SLJIT_C_SIG_LESS));
5721 else
5722 JUMPTO(SLJIT_C_SIG_LESS, common->forced_quit_label);
5723 return cc + 2 + 2 * LINK_SIZE;
5724 }
5725
5726 #undef CALLOUT_ARG_SIZE
5727 #undef CALLOUT_ARG_OFFSET
5728
5729 static pcre_uchar *compile_assert_matchingpath(compiler_common *common, pcre_uchar *cc, assert_backtrack *backtrack, BOOL conditional)
5730 {
5731 DEFINE_COMPILER;
5732 int framesize;
5733 int extrasize;
5734 BOOL needs_control_head;
5735 int private_data_ptr;
5736 backtrack_common altbacktrack;
5737 pcre_uchar *ccbegin;
5738 pcre_uchar opcode;
5739 pcre_uchar bra = OP_BRA;
5740 jump_list *tmp = NULL;
5741 jump_list **target = (conditional) ? &backtrack->condfailed : &backtrack->common.topbacktracks;
5742 jump_list **found;
5743 /* Saving previous accept variables. */
5744 BOOL save_local_exit = common->local_exit;
5745 BOOL save_positive_assert = common->positive_assert;
5746 then_trap_backtrack *save_then_trap = common->then_trap;
5747 struct sljit_label *save_quit_label = common->quit_label;
5748 struct sljit_label *save_accept_label = common->accept_label;
5749 jump_list *save_quit = common->quit;
5750 jump_list *save_positive_assert_quit = common->positive_assert_quit;
5751 jump_list *save_accept = common->accept;
5752 struct sljit_jump *jump;
5753 struct sljit_jump *brajump = NULL;
5754
5755 /* Assert captures then. */
5756 common->then_trap = NULL;
5757
5758 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
5759 {
5760 SLJIT_ASSERT(!conditional);
5761 bra = *cc;
5762 cc++;
5763 }
5764 private_data_ptr = PRIVATE_DATA(cc);
5765 SLJIT_ASSERT(private_data_ptr != 0);
5766 framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
5767 backtrack->framesize = framesize;
5768 backtrack->private_data_ptr = private_data_ptr;
5769 opcode = *cc;
5770 SLJIT_ASSERT(opcode >= OP_ASSERT && opcode <= OP_ASSERTBACK_NOT);
5771 found = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) ? &tmp : target;
5772 ccbegin = cc;
5773 cc += GET(cc, 1);
5774
5775 if (bra == OP_BRAMINZERO)
5776 {
5777 /* This is a braminzero backtrack path. */
5778 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5779 free_stack(common, 1);
5780 brajump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
5781 }
5782
5783 if (framesize < 0)
5784 {
5785 extrasize = needs_control_head ? 2 : 1;
5786 if (framesize == no_frame)
5787 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);
5788 allocate_stack(common, extrasize);
5789 if (needs_control_head)
5790 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
5791 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5792 if (needs_control_head)
5793 {
5794 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_IMM, 0);
5795 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5796 }
5797 }
5798 else
5799 {
5800 extrasize = needs_control_head ? 3 : 2;
5801 allocate_stack(common, framesize + extrasize);
5802 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5803 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw));
5804 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0);
5805 if (needs_control_head)
5806 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
5807 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5808 if (needs_control_head)
5809 {
5810 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
5811 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
5812 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_IMM, 0);
5813 }
5814 else
5815 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5816 init_frame(common, ccbegin, NULL, framesize + extrasize - 1, extrasize, FALSE);
5817 }
5818
5819 memset(&altbacktrack, 0, sizeof(backtrack_common));
5820 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
5821 {
5822 /* Negative assert is stronger than positive assert. */
5823 common->local_exit = TRUE;
5824 common->quit_label = NULL;
5825 common->quit = NULL;
5826 common->positive_assert = FALSE;
5827 }
5828 else
5829 common->positive_assert = TRUE;
5830 common->positive_assert_quit = NULL;
5831
5832 while (1)
5833 {
5834 common->accept_label = NULL;
5835 common->accept = NULL;
5836 altbacktrack.top = NULL;
5837 altbacktrack.topbacktracks = NULL;
5838
5839 if (*ccbegin == OP_ALT)
5840 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5841
5842 altbacktrack.cc = ccbegin;
5843 compile_matchingpath(common, ccbegin + 1 + LINK_SIZE, cc, &altbacktrack);
5844 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5845 {
5846 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
5847 {
5848 common->local_exit = save_local_exit;
5849 common->quit_label = save_quit_label;
5850 common->quit = save_quit;
5851 }
5852 common->positive_assert = save_positive_assert;
5853 common->then_trap = save_then_trap;
5854 common->accept_label = save_accept_label;
5855 common->positive_assert_quit = save_positive_assert_quit;
5856 common->accept = save_accept;
5857 return NULL;
5858 }
5859 common->accept_label = LABEL();
5860 if (common->accept != NULL)
5861 set_jumps(common->accept, common->accept_label);
5862
5863 /* Reset stack. */
5864 if (framesize < 0)
5865 {
5866 if (framesize == no_frame)
5867 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5868 else
5869 free_stack(common, extrasize);
5870 if (needs_control_head)
5871 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), 0);
5872 }
5873 else
5874 {
5875 if ((opcode != OP_ASSERT_NOT && opcode != OP_ASSERTBACK_NOT) || conditional)
5876 {
5877 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
5878 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
5879 if (needs_control_head)
5880 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), 0);
5881 }
5882 else
5883 {
5884 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5885 if (needs_control_head)
5886 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), (framesize + 1) * sizeof(sljit_sw));
5887 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
5888 }
5889 }
5890
5891 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
5892 {
5893 /* We know that STR_PTR was stored on the top of the stack. */
5894 if (conditional)
5895 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), needs_control_head ? sizeof(sljit_sw) : 0);
5896 else if (bra == OP_BRAZERO)
5897 {
5898 if (framesize < 0)
5899 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 1) * sizeof(sljit_sw));
5900 else
5901 {
5902 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
5903 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (framesize + extrasize - 1) * sizeof(sljit_sw));
5904 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
5905 }
5906 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5907 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5908 }
5909 else if (framesize >= 0)
5910 {
5911 /* For OP_BRA and OP_BRAMINZERO. */
5912 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
5913 }
5914 }
5915 add_jump(compiler, found, JUMP(SLJIT_JUMP));
5916
5917 compile_backtrackingpath(common, altbacktrack.top);
5918 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5919 {
5920 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
5921 {
5922 common->local_exit = save_local_exit;
5923 common->quit_label = save_quit_label;
5924 common->quit = save_quit;
5925 }
5926 common->positive_assert = save_positive_assert;
5927 common->then_trap = save_then_trap;
5928 common->accept_label = save_accept_label;
5929 common->positive_assert_quit = save_positive_assert_quit;
5930 common->accept = save_accept;
5931 return NULL;
5932 }
5933 set_jumps(altbacktrack.topbacktracks, LABEL());
5934
5935 if (*cc != OP_ALT)
5936 break;
5937
5938 ccbegin = cc;
5939 cc += GET(cc, 1);
5940 }
5941
5942 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
5943 {
5944 SLJIT_ASSERT(common->positive_assert_quit == NULL);
5945 /* Makes the check less complicated below. */
5946 common->positive_assert_quit = common->quit;
5947 }
5948
5949 /* None of them matched. */
5950 if (common->positive_assert_quit != NULL)
5951 {
5952 jump = JUMP(SLJIT_JUMP);
5953 set_jumps(common->positive_assert_quit, LABEL());
5954 SLJIT_ASSERT(framesize != no_stack);
5955 if (framesize < 0)
5956 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, extrasize * sizeof(sljit_sw));
5957 else
5958 {
5959 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5960 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
5961 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw));
5962 }
5963 JUMPHERE(jump);
5964 }
5965
5966 if (needs_control_head)
5967 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(1));
5968
5969 if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK)
5970 {
5971 /* Assert is failed. */
5972 if (conditional || bra == OP_BRAZERO)
5973 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5974
5975 if (framesize < 0)
5976 {
5977 /* The topmost item should be 0. */
5978 if (bra == OP_BRAZERO)
5979 {
5980 if (extrasize == 2)
5981 free_stack(common, 1);
5982 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5983 }
5984 else
5985 free_stack(common, extrasize);
5986 }
5987 else
5988 {
5989 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
5990 /* The topmost item should be 0. */
5991 if (bra == OP_BRAZERO)
5992 {
5993 free_stack(common, framesize + extrasize - 1);
5994 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5995 }
5996 else
5997 free_stack(common, framesize + extrasize);
5998 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
5999 }
6000 jump = JUMP(SLJIT_JUMP);
6001 if (bra != OP_BRAZERO)
6002 add_jump(compiler, target, jump);
6003
6004 /* Assert is successful. */
6005 set_jumps(tmp, LABEL());
6006 if (framesize < 0)
6007 {
6008 /* We know that STR_PTR was stored on the top of the stack. */
6009 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 1) * sizeof(sljit_sw));
6010 /* Keep the STR_PTR on the top of the stack. */
6011 if (bra == OP_BRAZERO)
6012 {
6013 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
6014 if (extrasize == 2)
6015 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6016 }
6017 else if (bra == OP_BRAMINZERO)
6018 {
6019 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
6020 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6021 }
6022 }
6023 else
6024 {
6025 if (bra == OP_BRA)
6026 {
6027 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
6028 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
6029 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 2) * sizeof(sljit_sw));
6030 }
6031 else
6032 {
6033 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
6034 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 2) * sizeof(sljit_sw));
6035 if (extrasize == 2)
6036 {
6037 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6038 if (bra == OP_BRAMINZERO)
6039 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6040 }
6041 else
6042 {
6043 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
6044 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), bra == OP_BRAZERO ? STR_PTR : SLJIT_IMM, 0);
6045 }
6046 }
6047 }
6048
6049 if (bra == OP_BRAZERO)
6050 {
6051 backtrack->matchingpath = LABEL();
6052 SET_LABEL(jump, backtrack->matchingpath);
6053 }
6054 else if (bra == OP_BRAMINZERO)
6055 {
6056 JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
6057 JUMPHERE(brajump);
6058 if (framesize >= 0)
6059 {
6060 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6061 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6062 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
6063 }
6064 set_jumps(backtrack->common.topbacktracks, LABEL());
6065 }
6066 }
6067 else
6068 {
6069 /* AssertNot is successful. */
6070 if (framesize < 0)
6071 {
6072 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6073 if (bra != OP_BRA)
6074 {
6075 if (extrasize == 2)
6076 free_stack(common, 1);
6077 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6078 }
6079 else
6080 free_stack(common, extrasize);
6081 }
6082 else
6083 {
6084 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6085 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
6086 /* The topmost item should be 0. */
6087 if (bra != OP_BRA)
6088 {
6089 free_stack(common, framesize + extrasize - 1);
6090 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6091 }
6092 else
6093 free_stack(common, framesize + extrasize);
6094 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
6095 }
6096
6097 if (bra == OP_BRAZERO)
6098 backtrack->matchingpath = LABEL();
6099 else if (bra == OP_BRAMINZERO)
6100 {
6101 JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
6102 JUMPHERE(brajump);
6103 }
6104
6105 if (bra != OP_BRA)
6106 {
6107 SLJIT_ASSERT(found == &backtrack->common.topbacktracks);
6108 set_jumps(backtrack->common.topbacktracks, LABEL());
6109 backtrack->common.topbacktracks = NULL;
6110 }
6111 }
6112
6113 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6114 {
6115 common->local_exit = save_local_exit;
6116 common->quit_label = save_quit_label;
6117 common->quit = save_quit;
6118 }
6119 common->positive_assert = save_positive_assert;
6120 common->then_trap = save_then_trap;
6121 common->accept_label = save_accept_label;
6122 common->positive_assert_quit = save_positive_assert_quit;
6123 common->accept = save_accept;
6124 return cc + 1 + LINK_SIZE;
6125 }
6126
6127 static SLJIT_INLINE void match_once_common(compiler_common *common, pcre_uchar ket, int framesize, int private_data_ptr, BOOL has_alternatives, BOOL needs_control_head)
6128 {
6129 DEFINE_COMPILER;
6130 int stacksize;
6131
6132 if (framesize < 0)
6133 {
6134 if (framesize == no_frame)
6135 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6136 else
6137 {
6138 stacksize = needs_control_head ? 1 : 0;
6139 if (ket != OP_KET || has_alternatives)
6140 stacksize++;
6141 free_stack(common, stacksize);
6142 }
6143
6144 if (needs_control_head)
6145 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), (ket != OP_KET || has_alternatives) ? sizeof(sljit_sw) : 0);
6146
6147 /* TMP2 which is set here used by OP_KETRMAX below. */
6148 if (ket == OP_KETRMAX)
6149 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), 0);
6150 else if (ket == OP_KETRMIN)
6151 {
6152 /* Move the STR_PTR to the private_data_ptr. */
6153 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), 0);
6154 }
6155 }
6156 else
6157 {
6158 stacksize = (ket != OP_KET || has_alternatives) ? 2 : 1;
6159 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + stacksize) * sizeof(sljit_sw));
6160 if (needs_control_head)
6161 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), 0);
6162
6163 if (ket == OP_KETRMAX)
6164 {
6165 /* TMP2 which is set here used by OP_KETRMAX below. */
6166 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6167 }
6168 }
6169 if (needs_control_head)
6170 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, TMP1, 0);
6171 }
6172
6173 static SLJIT_INLINE int match_capture_common(compiler_common *common, int stacksize, int offset, int private_data_ptr)
6174 {
6175 DEFINE_COMPILER;
6176
6177 if (common->capture_last_ptr != 0)
6178 {
6179 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
6180 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
6181 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
6182 stacksize++;
6183 }
6184 if (common->optimized_cbracket[offset >> 1] == 0)
6185 {
6186 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
6187 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
6188 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
6189 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6190 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
6191 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
6192 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
6193 stacksize += 2;
6194 }
6195 return stacksize;
6196 }
6197
6198 /*
6199 Handling bracketed expressions is probably the most complex part.
6200
6201 Stack layout naming characters:
6202 S - Push the current STR_PTR
6203 0 - Push a 0 (NULL)
6204 A - Push the current STR_PTR. Needed for restoring the STR_PTR
6205 before the next alternative. Not pushed if there are no alternatives.
6206 M - Any values pushed by the current alternative. Can be empty, or anything.
6207 C - Push the previous OVECTOR(i), OVECTOR(i+1) and OVECTOR_PRIV(i) to the stack.
6208 L - Push the previous local (pointed by localptr) to the stack
6209 () - opional values stored on the stack
6210 ()* - optonal, can be stored multiple times
6211
6212 The following list shows the regular expression templates, their PCRE byte codes
6213 and stack layout supported by pcre-sljit.
6214
6215 (?:) OP_BRA | OP_KET A M
6216 () OP_CBRA | OP_KET C M
6217 (?:)+ OP_BRA | OP_KETRMAX 0 A M S ( A M S )*
6218 OP_SBRA | OP_KETRMAX 0 L M S ( L M S )*
6219 (?:)+? OP_BRA | OP_KETRMIN 0 A M S ( A M S )*
6220 OP_SBRA | OP_KETRMIN 0 L M S ( L M S )*
6221 ()+ OP_CBRA | OP_KETRMAX 0 C M S ( C M S )*
6222 OP_SCBRA | OP_KETRMAX 0 C M S ( C M S )*
6223 ()+? OP_CBRA | OP_KETRMIN 0 C M S ( C M S )*
6224 OP_SCBRA | OP_KETRMIN 0 C M S ( C M S )*
6225 (?:)? OP_BRAZERO | OP_BRA | OP_KET S ( A M 0 )
6226 (?:)?? OP_BRAMINZERO | OP_BRA | OP_KET S ( A M 0 )
6227 ()? OP_BRAZERO | OP_CBRA | OP_KET S ( C M 0 )
6228 ()?? OP_BRAMINZERO | OP_CBRA | OP_KET S ( C M 0 )
6229 (?:)* OP_BRAZERO | OP_BRA | OP_KETRMAX S 0 ( A M S )*
6230 OP_BRAZERO | OP_SBRA | OP_KETRMAX S 0 ( L M S )*
6231 (?:)*? OP_BRAMINZERO | OP_BRA | OP_KETRMIN S 0 ( A M S )*
6232 OP_BRAMINZERO | OP_SBRA | OP_KETRMIN S 0 ( L M S )*
6233 ()* OP_BRAZERO | OP_CBRA | OP_KETRMAX S 0 ( C M S )*
6234 OP_BRAZERO | OP_SCBRA | OP_KETRMAX S 0 ( C M S )*
6235 ()*? OP_BRAMINZERO | OP_CBRA | OP_KETRMIN S 0 ( C M S )*
6236 OP_BRAMINZERO | OP_SCBRA | OP_KETRMIN S 0 ( C M S )*
6237
6238
6239 Stack layout naming characters:
6240 A - Push the alternative index (starting from 0) on the stack.
6241 Not pushed if there is no alternatives.
6242 M - Any values pushed by the current alternative. Can be empty, or anything.
6243
6244 The next list shows the possible content of a bracket:
6245 (|) OP_*BRA | OP_ALT ... M A
6246 (?()|) OP_*COND | OP_ALT M A
6247 (?>|) OP_ONCE | OP_ALT ... [stack trace] M A
6248 (?>|) OP_ONCE_NC | OP_ALT ... [stack trace] M A
6249 Or nothing, if trace is unnecessary
6250 */
6251
6252 static pcre_uchar *compile_bracket_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6253 {
6254 DEFINE_COMPILER;
6255 backtrack_common *backtrack;
6256 pcre_uchar opcode;
6257 int private_data_ptr = 0;
6258 int offset = 0;
6259 int i, stacksize;
6260 int repeat_ptr = 0, repeat_length = 0;
6261 int repeat_type = 0, repeat_count = 0;
6262 pcre_uchar *ccbegin;
6263 pcre_uchar *matchingpath;
6264 pcre_uchar *slot;
6265 pcre_uchar bra = OP_BRA;
6266 pcre_uchar ket;
6267 assert_backtrack *assert;
6268 BOOL has_alternatives;
6269 BOOL needs_control_head = FALSE;
6270 struct sljit_jump *jump;
6271 struct sljit_jump *skip;
6272 struct sljit_label *rmax_label = NULL;
6273 struct sljit_jump *braminzero = NULL;
6274
6275 PUSH_BACKTRACK(sizeof(bracket_backtrack), cc, NULL);
6276
6277 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
6278 {
6279 bra = *cc;
6280 cc++;
6281 opcode = *cc;
6282 }
6283
6284 opcode = *cc;
6285 ccbegin = cc;
6286 matchingpath = bracketend(cc) - 1 - LINK_SIZE;
6287 ket = *matchingpath;
6288 if (ket == OP_KET && PRIVATE_DATA(matchingpath) != 0)
6289 {
6290 repeat_ptr = PRIVATE_DATA(matchingpath);
6291 repeat_length = PRIVATE_DATA(matchingpath + 1);
6292 repeat_type = PRIVATE_DATA(matchingpath + 2);
6293 repeat_count = PRIVATE_DATA(matchingpath + 3);
6294 SLJIT_ASSERT(repeat_length != 0 && repeat_type != 0 && repeat_count != 0);
6295 if (repeat_type == OP_UPTO)
6296 ket = OP_KETRMAX;
6297 if (repeat_type == OP_MINUPTO)
6298 ket = OP_KETRMIN;
6299 }
6300
6301 if ((opcode == OP_COND || opcode == OP_SCOND) && cc[1 + LINK_SIZE] == OP_DEF)
6302 {
6303 /* Drop this bracket_backtrack. */
6304 parent->top = backtrack->prev;
6305 return matchingpath + 1 + LINK_SIZE + repeat_length;
6306 }
6307
6308 matchingpath = ccbegin + 1 + LINK_SIZE;
6309 SLJIT_ASSERT(ket == OP_KET || ket == OP_KETRMAX || ket == OP_KETRMIN);
6310 SLJIT_ASSERT(!((bra == OP_BRAZERO && ket == OP_KETRMIN) || (bra == OP_BRAMINZERO && ket == OP_KETRMAX)));
6311 cc += GET(cc, 1);
6312
6313 has_alternatives = *cc == OP_ALT;
6314 if (SLJIT_UNLIKELY(opcode == OP_COND || opcode == OP_SCOND))
6315 has_alternatives = (*matchingpath == OP_RREF || *matchingpath == OP_DNRREF) ? FALSE : TRUE;
6316
6317 if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
6318 opcode = OP_SCOND;
6319 if (SLJIT_UNLIKELY(opcode == OP_ONCE_NC))
6320 opcode = OP_ONCE;
6321
6322 if (opcode == OP_CBRA || opcode == OP_SCBRA)
6323 {
6324 /* Capturing brackets has a pre-allocated space. */
6325 offset = GET2(ccbegin, 1 + LINK_SIZE);
6326 if (common->optimized_cbracket[offset] == 0)
6327 {
6328 private_data_ptr = OVECTOR_PRIV(offset);
6329 offset <<= 1;
6330 }
6331 else
6332 {
6333 offset <<= 1;
6334 private_data_ptr = OVECTOR(offset);
6335 }
6336 BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
6337 matchingpath += IMM2_SIZE;
6338 }
6339 else if (opcode == OP_ONCE || opcode == OP_SBRA || opcode == OP_SCOND)
6340 {
6341 /* Other brackets simply allocate the next entry. */
6342 private_data_ptr = PRIVATE_DATA(ccbegin);
6343 SLJIT_ASSERT(private_data_ptr != 0);
6344 BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
6345 if (opcode == OP_ONCE)
6346 BACKTRACK_AS(bracket_backtrack)->u.framesize = get_framesize(common, ccbegin, NULL, FALSE, &needs_control_head);
6347 }
6348
6349 /* Instructions before the first alternative. */
6350 stacksize = 0;
6351 if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
6352 stacksize++;
6353 if (bra == OP_BRAZERO)
6354 stacksize++;
6355
6356 if (stacksize > 0)
6357 allocate_stack(common, stacksize);
6358
6359 stacksize = 0;
6360 if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
6361 {
6362 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
6363 stacksize++;
6364 }
6365
6366 if (bra == OP_BRAZERO)
6367 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
6368
6369 if (bra == OP_BRAMINZERO)
6370 {
6371 /* This is a backtrack path! (Since the try-path of OP_BRAMINZERO matches to the empty string) */
6372 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6373 if (ket != OP_KETRMIN)
6374 {
6375 free_stack(common, 1);
6376 braminzero = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
6377 }
6378 else
6379 {
6380 if (opcode == OP_ONCE || opcode >= OP_SBRA)
6381 {
6382 jump = CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
6383 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
6384 /* Nothing stored during the first run. */
6385 skip = JUMP(SLJIT_JUMP);
6386 JUMPHERE(jump);
6387 /* Checking zero-length iteration. */
6388 if (opcode != OP_ONCE || BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
6389 {
6390 /* When we come from outside, private_data_ptr contains the previous STR_PTR. */
6391 braminzero = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6392 }
6393 else
6394 {
6395 /* Except when the whole stack frame must be saved. */
6396 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6397 braminzero = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), (BACKTRACK_AS(bracket_backtrack)->u.framesize + 1) * sizeof(sljit_sw));
6398 }
6399 JUMPHERE(skip);
6400 }
6401 else
6402 {
6403 jump = CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
6404 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
6405 JUMPHERE(jump);
6406 }
6407 }
6408 }
6409
6410 if (repeat_type != 0)
6411 {
6412 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr, SLJIT_IMM, repeat_count);
6413 if (repeat_type == OP_EXACT)
6414 rmax_label = LABEL();
6415 }
6416
6417 if (ket == OP_KETRMIN)
6418 BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
6419
6420 if (ket == OP_KETRMAX)
6421 {
6422 rmax_label = LABEL();
6423 if (has_alternatives && opcode != OP_ONCE && opcode < OP_SBRA && repeat_type == 0)
6424 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = rmax_label;
6425 }
6426
6427 /* Handling capturing brackets and alternatives. */
6428 if (opcode == OP_ONCE)
6429 {
6430 stacksize = 0;
6431 if (needs_control_head)
6432 {
6433 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
6434 stacksize++;
6435 }
6436
6437 if (BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
6438 {
6439 /* Neither capturing brackets nor recursions are found in the block. */
6440 if (ket == OP_KETRMIN)
6441 {
6442 stacksize += 2;
6443 if (!needs_control_head)
6444 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6445 }
6446 else
6447 {
6448 if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame)
6449 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);
6450 if (ket == OP_KETRMAX || has_alternatives)
6451 stacksize++;
6452 }
6453
6454 if (stacksize > 0)
6455 allocate_stack(common, stacksize);
6456
6457 stacksize = 0;
6458 if (needs_control_head)
6459 {
6460 stacksize++;
6461 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
6462 }
6463
6464 if (ket == OP_KETRMIN)
6465 {
6466 if (needs_control_head)
6467 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6468 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
6469 if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame)
6470 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, needs_control_head ? (2 * sizeof(sljit_sw)) : sizeof(sljit_sw));
6471 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
6472 }
6473 else if (ket == OP_KETRMAX || has_alternatives)
6474 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
6475 }
6476 else
6477 {
6478 if (ket != OP_KET || has_alternatives)
6479 stacksize++;
6480
6481 stacksize += BACKTRACK_AS(bracket_backtrack)->u.framesize + 1;
6482 allocate_stack(common, stacksize);
6483
6484 if (needs_control_head)
6485 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
6486
6487 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6488 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
6489
6490 stacksize = needs_control_head ? 1 : 0;
6491 if (ket != OP_KET || has_alternatives)
6492 {
6493 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
6494 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0);
6495 stacksize++;
6496 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
6497 }
6498 else
6499 {
6500 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0);
6501 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
6502 }
6503 init_frame(common, ccbegin, NULL, BACKTRACK_AS(bracket_backtrack)->u.framesize + stacksize, stacksize + 1, FALSE);
6504 }
6505 }
6506 else if (opcode == OP_CBRA || opcode == OP_SCBRA)
6507 {
6508 /* Saving the previous values. */
6509 if (common->optimized_cbracket[offset >> 1] != 0)
6510 {
6511 SLJIT_ASSERT(private_data_ptr == OVECTOR(offset));
6512 allocate_stack(common, 2);
6513 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6514 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr + sizeof(sljit_sw));
6515 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0);
6516 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
6517 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
6518 }
6519 else
6520 {
6521 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6522 allocate_stack(common, 1);
6523 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0);
6524 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
6525 }
6526 }
6527 else if (opcode == OP_SBRA || opcode == OP_SCOND)
6528 {
6529 /* Saving the previous value. */
6530 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6531 allocate_stack(common, 1);
6532 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0);
6533 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
6534 }
6535 else if (has_alternatives)
6536 {
6537 /* Pushing the starting string pointer. */
6538 allocate_stack(common, 1);
6539 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6540 }
6541
6542 /* Generating code for the first alternative. */
6543 if (opcode == OP_COND || opcode == OP_SCOND)
6544 {
6545 if (*matchingpath == OP_CREF)
6546 {
6547 SLJIT_ASSERT(has_alternatives);
6548 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed),
6549 CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(GET2(matchingpath, 1) << 1), SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
6550 matchingpath += 1 + IMM2_SIZE;
6551 }
6552 else if (*matchingpath == OP_DNCREF)
6553 {
6554 SLJIT_ASSERT(has_alternatives);
6555
6556 i = GET2(matchingpath, 1 + IMM2_SIZE);
6557 slot = common->name_table + GET2(matchingpath, 1) * common->name_entry_size;
6558 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
6559 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
6560 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(GET2(slot, 0) << 1), TMP1, 0);
6561 slot += common->name_entry_size;
6562 i--;
6563 while (i-- > 0)
6564 {
6565 OP2(SLJIT_SUB, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(GET2(slot, 0) << 1), TMP1, 0);
6566 OP2(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, STR_PTR, 0);
6567 slot += common->name_entry_size;
6568 }
6569 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
6570 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), JUMP(SLJIT_C_ZERO));
6571 matchingpath += 1 + 2 * IMM2_SIZE;
6572 }
6573 else if (*matchingpath == OP_RREF || *matchingpath == OP_DNRREF)
6574 {
6575 /* Never has other case. */
6576 BACKTRACK_AS(bracket_backtrack)->u.condfailed = NULL;
6577 SLJIT_ASSERT(!has_alternatives);
6578
6579 if (*matchingpath == OP_RREF)
6580 {
6581 stacksize = GET2(matchingpath, 1);
6582 if (common->currententry == NULL)
6583 stacksize = 0;
6584 else if (stacksize == RREF_ANY)
6585 stacksize = 1;
6586 else if (common->currententry->start == 0)
6587 stacksize = stacksize == 0;
6588 else
6589 stacksize = stacksize == (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
6590
6591 if (stacksize != 0)
6592 matchingpath += 1 + IMM2_SIZE;
6593 }
6594 else
6595 {
6596 if (common->currententry == NULL || common->currententry->start == 0)
6597 stacksize = 0;
6598 else
6599 {
6600 stacksize = GET2(matchingpath, 1 + IMM2_SIZE);
6601 slot = common->name_table + GET2(matchingpath, 1) * common->name_entry_size;
6602 i = (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
6603 while (stacksize > 0)
6604 {
6605 if ((int)GET2(slot, 0) == i)
6606 break;
6607 slot += common->name_entry_size;
6608 stacksize--;
6609 }
6610 }
6611
6612 if (stacksize != 0)
6613 matchingpath += 1 + 2 * IMM2_SIZE;
6614 }
6615
6616 /* The stacksize == 0 is a common "else" case. */
6617 if (stacksize == 0)
6618 {
6619 if (*cc == OP_ALT)
6620 {
6621 matchingpath = cc + 1 + LINK_SIZE;
6622 cc += GET(cc, 1);
6623 }
6624 else
6625 matchingpath = cc;
6626 }
6627 }
6628 else
6629 {
6630 SLJIT_ASSERT(has_alternatives && *matchingpath >= OP_ASSERT && *matchingpath <= OP_ASSERTBACK_NOT);
6631 /* Similar code as PUSH_BACKTRACK macro. */
6632 assert = sljit_alloc_memory(compiler, sizeof(assert_backtrack));
6633 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6634 return NULL;
6635 memset(assert, 0, sizeof(assert_backtrack));
6636 assert->common.cc = matchingpath;
6637 BACKTRACK_AS(bracket_backtrack)->u.assert = assert;
6638 matchingpath = compile_assert_matchingpath(common, matchingpath, assert, TRUE);
6639 }
6640 }
6641
6642 compile_matchingpath(common, matchingpath, cc, backtrack);
6643 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6644 return NULL;
6645
6646 if (opcode == OP_ONCE)
6647 match_once_common(common, ket, BACKTRACK_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head);
6648
6649 stacksize = 0;
6650 if (repeat_type == OP_MINUPTO)
6651 {
6652 /* We need to preserve the counter. TMP2 will be used below. */
6653 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr);
6654 stacksize++;
6655 }
6656 if (ket != OP_KET || bra != OP_BRA)
6657 stacksize++;
6658 if (offset != 0)
6659 {
6660 if (common->capture_last_ptr != 0)
6661 stacksize++;
6662 if (common->optimized_cbracket[offset >> 1] == 0)
6663 stacksize += 2;
6664 }
6665 if (has_alternatives && opcode != OP_ONCE)
6666 stacksize++;
6667
6668 if (stacksize > 0)
6669 allocate_stack(common, stacksize);
6670
6671 stacksize = 0;
6672 if (repeat_type == OP_MINUPTO)
6673 {
6674 /* TMP2 was set above. */
6675 OP2(SLJIT_SUB, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP2, 0, SLJIT_IMM, 1);
6676 stacksize++;
6677 }
6678
6679 if (ket != OP_KET || bra != OP_BRA)
6680 {
6681 if (ket != OP_KET)
6682 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
6683 else
6684 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
6685 stacksize++;
6686 }
6687
6688 if (offset != 0)
6689 stacksize = match_capture_common(common, stacksize, offset, private_data_ptr);
6690
6691 if (has_alternatives)
6692 {
6693 if (opcode != OP_ONCE)
6694 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
6695 if (ket != OP_KETRMAX)
6696 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
6697 }
6698
6699 /* Must be after the matchingpath label. */
6700 if (offset != 0 && common->optimized_cbracket[offset >> 1] != 0)
6701 {
6702 SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0));
6703 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
6704 }
6705
6706 if (ket == OP_KETRMAX)
6707 {
6708 if (repeat_type != 0)
6709 {
6710 if (has_alternatives)
6711 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
6712 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr, SLJIT_IMM, 1);
6713 JUMPTO(SLJIT_C_NOT_ZERO, rmax_label);
6714 /* Drop STR_PTR for greedy plus quantifier. */
6715 if (opcode != OP_ONCE)
6716 free_stack(common, 1);
6717 }
6718 else if (opcode == OP_ONCE || opcode >= OP_SBRA)
6719 {
6720 if (has_alternatives)
6721 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
6722 /* Checking zero-length iteration. */
6723 if (opcode != OP_ONCE)
6724 {
6725 CMPTO(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0, rmax_label);
6726 /* Drop STR_PTR for greedy plus quantifier. */