/[pcre]/code/trunk/pcre_jit_compile.c
ViewVC logotype

Contents of /code/trunk/pcre_jit_compile.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1572 - (show annotations)
Thu Jun 25 04:02:53 2015 UTC (4 years, 3 months ago) by zherczeg
File MIME type: text/plain
File size: 331083 byte(s)
Minor JIT fixes.
1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Written by Philip Hazel
9 Copyright (c) 1997-2013 University of Cambridge
10
11 The machine code generator part (this module) was written by Zoltan Herczeg
12 Copyright (c) 2010-2013
13
14 -----------------------------------------------------------------------------
15 Redistribution and use in source and binary forms, with or without
16 modification, are permitted provided that the following conditions are met:
17
18 * Redistributions of source code must retain the above copyright notice,
19 this list of conditions and the following disclaimer.
20
21 * Redistributions in binary form must reproduce the above copyright
22 notice, this list of conditions and the following disclaimer in the
23 documentation and/or other materials provided with the distribution.
24
25 * Neither the name of the University of Cambridge nor the names of its
26 contributors may be used to endorse or promote products derived from
27 this software without specific prior written permission.
28
29 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 POSSIBILITY OF SUCH DAMAGE.
40 -----------------------------------------------------------------------------
41 */
42
43 #ifdef HAVE_CONFIG_H
44 #include "config.h"
45 #endif
46
47 #include "pcre_internal.h"
48
49 #if defined SUPPORT_JIT
50
51 /* All-in-one: Since we use the JIT compiler only from here,
52 we just include it. This way we don't need to touch the build
53 system files. */
54
55 #define SLJIT_MALLOC(size, allocator_data) (PUBL(malloc))(size)
56 #define SLJIT_FREE(ptr, allocator_data) (PUBL(free))(ptr)
57 #define SLJIT_CONFIG_AUTO 1
58 #define SLJIT_CONFIG_STATIC 1
59 #define SLJIT_VERBOSE 0
60 #define SLJIT_DEBUG 0
61
62 #include "sljit/sljitLir.c"
63
64 #if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED
65 #error Unsupported architecture
66 #endif
67
68 /* Defines for debugging purposes. */
69
70 /* 1 - Use unoptimized capturing brackets.
71 2 - Enable capture_last_ptr (includes option 1). */
72 /* #define DEBUG_FORCE_UNOPTIMIZED_CBRAS 2 */
73
74 /* 1 - Always have a control head. */
75 /* #define DEBUG_FORCE_CONTROL_HEAD 1 */
76
77 /* Allocate memory for the regex stack on the real machine stack.
78 Fast, but limited size. */
79 #define MACHINE_STACK_SIZE 32768
80
81 /* Growth rate for stack allocated by the OS. Should be the multiply
82 of page size. */
83 #define STACK_GROWTH_RATE 8192
84
85 /* Enable to check that the allocation could destroy temporaries. */
86 #if defined SLJIT_DEBUG && SLJIT_DEBUG
87 #define DESTROY_REGISTERS 1
88 #endif
89
90 /*
91 Short summary about the backtracking mechanism empolyed by the jit code generator:
92
93 The code generator follows the recursive nature of the PERL compatible regular
94 expressions. The basic blocks of regular expressions are condition checkers
95 whose execute different commands depending on the result of the condition check.
96 The relationship between the operators can be horizontal (concatenation) and
97 vertical (sub-expression) (See struct backtrack_common for more details).
98
99 'ab' - 'a' and 'b' regexps are concatenated
100 'a+' - 'a' is the sub-expression of the '+' operator
101
102 The condition checkers are boolean (true/false) checkers. Machine code is generated
103 for the checker itself and for the actions depending on the result of the checker.
104 The 'true' case is called as the matching path (expected path), and the other is called as
105 the 'backtrack' path. Branch instructions are expesive for all CPUs, so we avoid taken
106 branches on the matching path.
107
108 Greedy star operator (*) :
109 Matching path: match happens.
110 Backtrack path: match failed.
111 Non-greedy star operator (*?) :
112 Matching path: no need to perform a match.
113 Backtrack path: match is required.
114
115 The following example shows how the code generated for a capturing bracket
116 with two alternatives. Let A, B, C, D are arbirary regular expressions, and
117 we have the following regular expression:
118
119 A(B|C)D
120
121 The generated code will be the following:
122
123 A matching path
124 '(' matching path (pushing arguments to the stack)
125 B matching path
126 ')' matching path (pushing arguments to the stack)
127 D matching path
128 return with successful match
129
130 D backtrack path
131 ')' backtrack path (If we arrived from "C" jump to the backtrack of "C")
132 B backtrack path
133 C expected path
134 jump to D matching path
135 C backtrack path
136 A backtrack path
137
138 Notice, that the order of backtrack code paths are the opposite of the fast
139 code paths. In this way the topmost value on the stack is always belong
140 to the current backtrack code path. The backtrack path must check
141 whether there is a next alternative. If so, it needs to jump back to
142 the matching path eventually. Otherwise it needs to clear out its own stack
143 frame and continue the execution on the backtrack code paths.
144 */
145
146 /*
147 Saved stack frames:
148
149 Atomic blocks and asserts require reloading the values of private data
150 when the backtrack mechanism performed. Because of OP_RECURSE, the data
151 are not necessarly known in compile time, thus we need a dynamic restore
152 mechanism.
153
154 The stack frames are stored in a chain list, and have the following format:
155 ([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]
156
157 Thus we can restore the private data to a particular point in the stack.
158 */
159
160 typedef struct jit_arguments {
161 /* Pointers first. */
162 struct sljit_stack *stack;
163 const pcre_uchar *str;
164 const pcre_uchar *begin;
165 const pcre_uchar *end;
166 int *offsets;
167 pcre_uchar *uchar_ptr;
168 pcre_uchar *mark_ptr;
169 void *callout_data;
170 /* Everything else after. */
171 pcre_uint32 limit_match;
172 int real_offset_count;
173 int offset_count;
174 pcre_uint8 notbol;
175 pcre_uint8 noteol;
176 pcre_uint8 notempty;
177 pcre_uint8 notempty_atstart;
178 } jit_arguments;
179
180 typedef struct executable_functions {
181 void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES];
182 void *read_only_data_heads[JIT_NUMBER_OF_COMPILE_MODES];
183 sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];
184 PUBL(jit_callback) callback;
185 void *userdata;
186 pcre_uint32 top_bracket;
187 pcre_uint32 limit_match;
188 } executable_functions;
189
190 typedef struct jump_list {
191 struct sljit_jump *jump;
192 struct jump_list *next;
193 } jump_list;
194
195 typedef struct stub_list {
196 struct sljit_jump *start;
197 struct sljit_label *quit;
198 struct stub_list *next;
199 } stub_list;
200
201 typedef struct label_addr_list {
202 struct sljit_label *label;
203 sljit_uw *update_addr;
204 struct label_addr_list *next;
205 } label_addr_list;
206
207 enum frame_types {
208 no_frame = -1,
209 no_stack = -2
210 };
211
212 enum control_types {
213 type_mark = 0,
214 type_then_trap = 1
215 };
216
217 typedef int (SLJIT_CALL *jit_function)(jit_arguments *args);
218
219 /* The following structure is the key data type for the recursive
220 code generator. It is allocated by compile_matchingpath, and contains
221 the arguments for compile_backtrackingpath. Must be the first member
222 of its descendants. */
223 typedef struct backtrack_common {
224 /* Concatenation stack. */
225 struct backtrack_common *prev;
226 jump_list *nextbacktracks;
227 /* Internal stack (for component operators). */
228 struct backtrack_common *top;
229 jump_list *topbacktracks;
230 /* Opcode pointer. */
231 pcre_uchar *cc;
232 } backtrack_common;
233
234 typedef struct assert_backtrack {
235 backtrack_common common;
236 jump_list *condfailed;
237 /* Less than 0 if a frame is not needed. */
238 int framesize;
239 /* Points to our private memory word on the stack. */
240 int private_data_ptr;
241 /* For iterators. */
242 struct sljit_label *matchingpath;
243 } assert_backtrack;
244
245 typedef struct bracket_backtrack {
246 backtrack_common common;
247 /* Where to coninue if an alternative is successfully matched. */
248 struct sljit_label *alternative_matchingpath;
249 /* For rmin and rmax iterators. */
250 struct sljit_label *recursive_matchingpath;
251 /* For greedy ? operator. */
252 struct sljit_label *zero_matchingpath;
253 /* Contains the branches of a failed condition. */
254 union {
255 /* Both for OP_COND, OP_SCOND. */
256 jump_list *condfailed;
257 assert_backtrack *assert;
258 /* For OP_ONCE. Less than 0 if not needed. */
259 int framesize;
260 } u;
261 /* Points to our private memory word on the stack. */
262 int private_data_ptr;
263 } bracket_backtrack;
264
265 typedef struct bracketpos_backtrack {
266 backtrack_common common;
267 /* Points to our private memory word on the stack. */
268 int private_data_ptr;
269 /* Reverting stack is needed. */
270 int framesize;
271 /* Allocated stack size. */
272 int stacksize;
273 } bracketpos_backtrack;
274
275 typedef struct braminzero_backtrack {
276 backtrack_common common;
277 struct sljit_label *matchingpath;
278 } braminzero_backtrack;
279
280 typedef struct iterator_backtrack {
281 backtrack_common common;
282 /* Next iteration. */
283 struct sljit_label *matchingpath;
284 } iterator_backtrack;
285
286 typedef struct recurse_entry {
287 struct recurse_entry *next;
288 /* Contains the function entry. */
289 struct sljit_label *entry;
290 /* Collects the calls until the function is not created. */
291 jump_list *calls;
292 /* Points to the starting opcode. */
293 sljit_sw start;
294 } recurse_entry;
295
296 typedef struct recurse_backtrack {
297 backtrack_common common;
298 BOOL inlined_pattern;
299 } recurse_backtrack;
300
301 #define OP_THEN_TRAP OP_TABLE_LENGTH
302
303 typedef struct then_trap_backtrack {
304 backtrack_common common;
305 /* If then_trap is not NULL, this structure contains the real
306 then_trap for the backtracking path. */
307 struct then_trap_backtrack *then_trap;
308 /* Points to the starting opcode. */
309 sljit_sw start;
310 /* Exit point for the then opcodes of this alternative. */
311 jump_list *quit;
312 /* Frame size of the current alternative. */
313 int framesize;
314 } then_trap_backtrack;
315
316 #define MAX_RANGE_SIZE 4
317
318 typedef struct compiler_common {
319 /* The sljit ceneric compiler. */
320 struct sljit_compiler *compiler;
321 /* First byte code. */
322 pcre_uchar *start;
323 /* Maps private data offset to each opcode. */
324 sljit_si *private_data_ptrs;
325 /* Chain list of read-only data ptrs. */
326 void *read_only_data_head;
327 /* Tells whether the capturing bracket is optimized. */
328 pcre_uint8 *optimized_cbracket;
329 /* Tells whether the starting offset is a target of then. */
330 pcre_uint8 *then_offsets;
331 /* Current position where a THEN must jump. */
332 then_trap_backtrack *then_trap;
333 /* Starting offset of private data for capturing brackets. */
334 int cbra_ptr;
335 /* Output vector starting point. Must be divisible by 2. */
336 int ovector_start;
337 /* Last known position of the requested byte. */
338 int req_char_ptr;
339 /* Head of the last recursion. */
340 int recursive_head_ptr;
341 /* First inspected character for partial matching. */
342 int start_used_ptr;
343 /* Starting pointer for partial soft matches. */
344 int hit_start;
345 /* End pointer of the first line. */
346 int first_line_end;
347 /* Points to the marked string. */
348 int mark_ptr;
349 /* Recursive control verb management chain. */
350 int control_head_ptr;
351 /* Points to the last matched capture block index. */
352 int capture_last_ptr;
353 /* Points to the starting position of the current match. */
354 int start_ptr;
355
356 /* Flipped and lower case tables. */
357 const pcre_uint8 *fcc;
358 sljit_sw lcc;
359 /* Mode can be PCRE_STUDY_JIT_COMPILE and others. */
360 int mode;
361 /* TRUE, when minlength is greater than 0. */
362 BOOL might_be_empty;
363 /* \K is found in the pattern. */
364 BOOL has_set_som;
365 /* (*SKIP:arg) is found in the pattern. */
366 BOOL has_skip_arg;
367 /* (*THEN) is found in the pattern. */
368 BOOL has_then;
369 /* Needs to know the start position anytime. */
370 BOOL needs_start_ptr;
371 /* Currently in recurse or negative assert. */
372 BOOL local_exit;
373 /* Currently in a positive assert. */
374 BOOL positive_assert;
375 /* Newline control. */
376 int nltype;
377 pcre_uint32 nlmax;
378 pcre_uint32 nlmin;
379 int newline;
380 int bsr_nltype;
381 pcre_uint32 bsr_nlmax;
382 pcre_uint32 bsr_nlmin;
383 /* Dollar endonly. */
384 int endonly;
385 /* Tables. */
386 sljit_sw ctypes;
387 /* Named capturing brackets. */
388 pcre_uchar *name_table;
389 sljit_sw name_count;
390 sljit_sw name_entry_size;
391
392 /* Labels and jump lists. */
393 struct sljit_label *partialmatchlabel;
394 struct sljit_label *quit_label;
395 struct sljit_label *forced_quit_label;
396 struct sljit_label *accept_label;
397 struct sljit_label *ff_newline_shortcut;
398 stub_list *stubs;
399 label_addr_list *label_addrs;
400 recurse_entry *entries;
401 recurse_entry *currententry;
402 jump_list *partialmatch;
403 jump_list *quit;
404 jump_list *positive_assert_quit;
405 jump_list *forced_quit;
406 jump_list *accept;
407 jump_list *calllimit;
408 jump_list *stackalloc;
409 jump_list *revertframes;
410 jump_list *wordboundary;
411 jump_list *anynewline;
412 jump_list *hspace;
413 jump_list *vspace;
414 jump_list *casefulcmp;
415 jump_list *caselesscmp;
416 jump_list *reset_match;
417 BOOL jscript_compat;
418 #ifdef SUPPORT_UTF
419 BOOL utf;
420 #ifdef SUPPORT_UCP
421 BOOL use_ucp;
422 #endif
423 #ifdef COMPILE_PCRE8
424 jump_list *utfreadchar;
425 jump_list *utfreadchar16;
426 jump_list *utfreadtype8;
427 #endif
428 #endif /* SUPPORT_UTF */
429 #ifdef SUPPORT_UCP
430 jump_list *getucd;
431 #endif
432 } compiler_common;
433
434 /* For byte_sequence_compare. */
435
436 typedef struct compare_context {
437 int length;
438 int sourcereg;
439 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
440 int ucharptr;
441 union {
442 sljit_si asint;
443 sljit_uh asushort;
444 #if defined COMPILE_PCRE8
445 sljit_ub asbyte;
446 sljit_ub asuchars[4];
447 #elif defined COMPILE_PCRE16
448 sljit_uh asuchars[2];
449 #elif defined COMPILE_PCRE32
450 sljit_ui asuchars[1];
451 #endif
452 } c;
453 union {
454 sljit_si asint;
455 sljit_uh asushort;
456 #if defined COMPILE_PCRE8
457 sljit_ub asbyte;
458 sljit_ub asuchars[4];
459 #elif defined COMPILE_PCRE16
460 sljit_uh asuchars[2];
461 #elif defined COMPILE_PCRE32
462 sljit_ui asuchars[1];
463 #endif
464 } oc;
465 #endif
466 } compare_context;
467
468 /* Undefine sljit macros. */
469 #undef CMP
470
471 /* Used for accessing the elements of the stack. */
472 #define STACK(i) ((-(i) - 1) * (int)sizeof(sljit_sw))
473
474 #define TMP1 SLJIT_R0
475 #define TMP2 SLJIT_R2
476 #define TMP3 SLJIT_R3
477 #define STR_PTR SLJIT_S0
478 #define STR_END SLJIT_S1
479 #define STACK_TOP SLJIT_R1
480 #define STACK_LIMIT SLJIT_S2
481 #define COUNT_MATCH SLJIT_S3
482 #define ARGUMENTS SLJIT_S4
483 #define RETURN_ADDR SLJIT_R4
484
485 /* Local space layout. */
486 /* These two locals can be used by the current opcode. */
487 #define LOCALS0 (0 * sizeof(sljit_sw))
488 #define LOCALS1 (1 * sizeof(sljit_sw))
489 /* Two local variables for possessive quantifiers (char1 cannot use them). */
490 #define POSSESSIVE0 (2 * sizeof(sljit_sw))
491 #define POSSESSIVE1 (3 * sizeof(sljit_sw))
492 /* Max limit of recursions. */
493 #define LIMIT_MATCH (4 * sizeof(sljit_sw))
494 /* The output vector is stored on the stack, and contains pointers
495 to characters. The vector data is divided into two groups: the first
496 group contains the start / end character pointers, and the second is
497 the start pointers when the end of the capturing group has not yet reached. */
498 #define OVECTOR_START (common->ovector_start)
499 #define OVECTOR(i) (OVECTOR_START + (i) * (sljit_sw)sizeof(sljit_sw))
500 #define OVECTOR_PRIV(i) (common->cbra_ptr + (i) * (sljit_sw)sizeof(sljit_sw))
501 #define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start])
502
503 #if defined COMPILE_PCRE8
504 #define MOV_UCHAR SLJIT_MOV_UB
505 #define MOVU_UCHAR SLJIT_MOVU_UB
506 #elif defined COMPILE_PCRE16
507 #define MOV_UCHAR SLJIT_MOV_UH
508 #define MOVU_UCHAR SLJIT_MOVU_UH
509 #elif defined COMPILE_PCRE32
510 #define MOV_UCHAR SLJIT_MOV_UI
511 #define MOVU_UCHAR SLJIT_MOVU_UI
512 #else
513 #error Unsupported compiling mode
514 #endif
515
516 /* Shortcuts. */
517 #define DEFINE_COMPILER \
518 struct sljit_compiler *compiler = common->compiler
519 #define OP1(op, dst, dstw, src, srcw) \
520 sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw))
521 #define OP2(op, dst, dstw, src1, src1w, src2, src2w) \
522 sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w))
523 #define LABEL() \
524 sljit_emit_label(compiler)
525 #define JUMP(type) \
526 sljit_emit_jump(compiler, (type))
527 #define JUMPTO(type, label) \
528 sljit_set_label(sljit_emit_jump(compiler, (type)), (label))
529 #define JUMPHERE(jump) \
530 sljit_set_label((jump), sljit_emit_label(compiler))
531 #define SET_LABEL(jump, label) \
532 sljit_set_label((jump), (label))
533 #define CMP(type, src1, src1w, src2, src2w) \
534 sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))
535 #define CMPTO(type, src1, src1w, src2, src2w, label) \
536 sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))
537 #define OP_FLAGS(op, dst, dstw, src, srcw, type) \
538 sljit_emit_op_flags(compiler, (op), (dst), (dstw), (src), (srcw), (type))
539 #define GET_LOCAL_BASE(dst, dstw, offset) \
540 sljit_get_local_base(compiler, (dst), (dstw), (offset))
541
542 #define READ_CHAR_MAX 0x7fffffff
543
544 static pcre_uchar *bracketend(pcre_uchar *cc)
545 {
546 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
547 do cc += GET(cc, 1); while (*cc == OP_ALT);
548 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
549 cc += 1 + LINK_SIZE;
550 return cc;
551 }
552
553 static int no_alternatives(pcre_uchar *cc)
554 {
555 int count = 0;
556 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
557 do
558 {
559 cc += GET(cc, 1);
560 count++;
561 }
562 while (*cc == OP_ALT);
563 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
564 return count;
565 }
566
567 static int ones_in_half_byte[16] = {
568 /* 0 */ 0, 1, 1, 2, /* 4 */ 1, 2, 2, 3,
569 /* 8 */ 1, 2, 2, 3, /* 12 */ 2, 3, 3, 4
570 };
571
572 /* Functions whose might need modification for all new supported opcodes:
573 next_opcode
574 check_opcode_types
575 set_private_data_ptrs
576 get_framesize
577 init_frame
578 get_private_data_copy_length
579 copy_private_data
580 compile_matchingpath
581 compile_backtrackingpath
582 */
583
584 static pcre_uchar *next_opcode(compiler_common *common, pcre_uchar *cc)
585 {
586 SLJIT_UNUSED_ARG(common);
587 switch(*cc)
588 {
589 case OP_SOD:
590 case OP_SOM:
591 case OP_SET_SOM:
592 case OP_NOT_WORD_BOUNDARY:
593 case OP_WORD_BOUNDARY:
594 case OP_NOT_DIGIT:
595 case OP_DIGIT:
596 case OP_NOT_WHITESPACE:
597 case OP_WHITESPACE:
598 case OP_NOT_WORDCHAR:
599 case OP_WORDCHAR:
600 case OP_ANY:
601 case OP_ALLANY:
602 case OP_NOTPROP:
603 case OP_PROP:
604 case OP_ANYNL:
605 case OP_NOT_HSPACE:
606 case OP_HSPACE:
607 case OP_NOT_VSPACE:
608 case OP_VSPACE:
609 case OP_EXTUNI:
610 case OP_EODN:
611 case OP_EOD:
612 case OP_CIRC:
613 case OP_CIRCM:
614 case OP_DOLL:
615 case OP_DOLLM:
616 case OP_CRSTAR:
617 case OP_CRMINSTAR:
618 case OP_CRPLUS:
619 case OP_CRMINPLUS:
620 case OP_CRQUERY:
621 case OP_CRMINQUERY:
622 case OP_CRRANGE:
623 case OP_CRMINRANGE:
624 case OP_CRPOSSTAR:
625 case OP_CRPOSPLUS:
626 case OP_CRPOSQUERY:
627 case OP_CRPOSRANGE:
628 case OP_CLASS:
629 case OP_NCLASS:
630 case OP_REF:
631 case OP_REFI:
632 case OP_DNREF:
633 case OP_DNREFI:
634 case OP_RECURSE:
635 case OP_CALLOUT:
636 case OP_ALT:
637 case OP_KET:
638 case OP_KETRMAX:
639 case OP_KETRMIN:
640 case OP_KETRPOS:
641 case OP_REVERSE:
642 case OP_ASSERT:
643 case OP_ASSERT_NOT:
644 case OP_ASSERTBACK:
645 case OP_ASSERTBACK_NOT:
646 case OP_ONCE:
647 case OP_ONCE_NC:
648 case OP_BRA:
649 case OP_BRAPOS:
650 case OP_CBRA:
651 case OP_CBRAPOS:
652 case OP_COND:
653 case OP_SBRA:
654 case OP_SBRAPOS:
655 case OP_SCBRA:
656 case OP_SCBRAPOS:
657 case OP_SCOND:
658 case OP_CREF:
659 case OP_DNCREF:
660 case OP_RREF:
661 case OP_DNRREF:
662 case OP_DEF:
663 case OP_BRAZERO:
664 case OP_BRAMINZERO:
665 case OP_BRAPOSZERO:
666 case OP_PRUNE:
667 case OP_SKIP:
668 case OP_THEN:
669 case OP_COMMIT:
670 case OP_FAIL:
671 case OP_ACCEPT:
672 case OP_ASSERT_ACCEPT:
673 case OP_CLOSE:
674 case OP_SKIPZERO:
675 return cc + PRIV(OP_lengths)[*cc];
676
677 case OP_CHAR:
678 case OP_CHARI:
679 case OP_NOT:
680 case OP_NOTI:
681 case OP_STAR:
682 case OP_MINSTAR:
683 case OP_PLUS:
684 case OP_MINPLUS:
685 case OP_QUERY:
686 case OP_MINQUERY:
687 case OP_UPTO:
688 case OP_MINUPTO:
689 case OP_EXACT:
690 case OP_POSSTAR:
691 case OP_POSPLUS:
692 case OP_POSQUERY:
693 case OP_POSUPTO:
694 case OP_STARI:
695 case OP_MINSTARI:
696 case OP_PLUSI:
697 case OP_MINPLUSI:
698 case OP_QUERYI:
699 case OP_MINQUERYI:
700 case OP_UPTOI:
701 case OP_MINUPTOI:
702 case OP_EXACTI:
703 case OP_POSSTARI:
704 case OP_POSPLUSI:
705 case OP_POSQUERYI:
706 case OP_POSUPTOI:
707 case OP_NOTSTAR:
708 case OP_NOTMINSTAR:
709 case OP_NOTPLUS:
710 case OP_NOTMINPLUS:
711 case OP_NOTQUERY:
712 case OP_NOTMINQUERY:
713 case OP_NOTUPTO:
714 case OP_NOTMINUPTO:
715 case OP_NOTEXACT:
716 case OP_NOTPOSSTAR:
717 case OP_NOTPOSPLUS:
718 case OP_NOTPOSQUERY:
719 case OP_NOTPOSUPTO:
720 case OP_NOTSTARI:
721 case OP_NOTMINSTARI:
722 case OP_NOTPLUSI:
723 case OP_NOTMINPLUSI:
724 case OP_NOTQUERYI:
725 case OP_NOTMINQUERYI:
726 case OP_NOTUPTOI:
727 case OP_NOTMINUPTOI:
728 case OP_NOTEXACTI:
729 case OP_NOTPOSSTARI:
730 case OP_NOTPOSPLUSI:
731 case OP_NOTPOSQUERYI:
732 case OP_NOTPOSUPTOI:
733 cc += PRIV(OP_lengths)[*cc];
734 #ifdef SUPPORT_UTF
735 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
736 #endif
737 return cc;
738
739 /* Special cases. */
740 case OP_TYPESTAR:
741 case OP_TYPEMINSTAR:
742 case OP_TYPEPLUS:
743 case OP_TYPEMINPLUS:
744 case OP_TYPEQUERY:
745 case OP_TYPEMINQUERY:
746 case OP_TYPEUPTO:
747 case OP_TYPEMINUPTO:
748 case OP_TYPEEXACT:
749 case OP_TYPEPOSSTAR:
750 case OP_TYPEPOSPLUS:
751 case OP_TYPEPOSQUERY:
752 case OP_TYPEPOSUPTO:
753 return cc + PRIV(OP_lengths)[*cc] - 1;
754
755 case OP_ANYBYTE:
756 #ifdef SUPPORT_UTF
757 if (common->utf) return NULL;
758 #endif
759 return cc + 1;
760
761 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
762 case OP_XCLASS:
763 return cc + GET(cc, 1);
764 #endif
765
766 case OP_MARK:
767 case OP_PRUNE_ARG:
768 case OP_SKIP_ARG:
769 case OP_THEN_ARG:
770 return cc + 1 + 2 + cc[1];
771
772 default:
773 /* All opcodes are supported now! */
774 SLJIT_ASSERT_STOP();
775 return NULL;
776 }
777 }
778
779 static BOOL check_opcode_types(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
780 {
781 int count;
782 pcre_uchar *slot;
783
784 /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
785 while (cc < ccend)
786 {
787 switch(*cc)
788 {
789 case OP_SET_SOM:
790 common->has_set_som = TRUE;
791 common->might_be_empty = TRUE;
792 cc += 1;
793 break;
794
795 case OP_REF:
796 case OP_REFI:
797 common->optimized_cbracket[GET2(cc, 1)] = 0;
798 cc += 1 + IMM2_SIZE;
799 break;
800
801 case OP_CBRAPOS:
802 case OP_SCBRAPOS:
803 common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] = 0;
804 cc += 1 + LINK_SIZE + IMM2_SIZE;
805 break;
806
807 case OP_COND:
808 case OP_SCOND:
809 /* Only AUTO_CALLOUT can insert this opcode. We do
810 not intend to support this case. */
811 if (cc[1 + LINK_SIZE] == OP_CALLOUT)
812 return FALSE;
813 cc += 1 + LINK_SIZE;
814 break;
815
816 case OP_CREF:
817 common->optimized_cbracket[GET2(cc, 1)] = 0;
818 cc += 1 + IMM2_SIZE;
819 break;
820
821 case OP_DNREF:
822 case OP_DNREFI:
823 case OP_DNCREF:
824 count = GET2(cc, 1 + IMM2_SIZE);
825 slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
826 while (count-- > 0)
827 {
828 common->optimized_cbracket[GET2(slot, 0)] = 0;
829 slot += common->name_entry_size;
830 }
831 cc += 1 + 2 * IMM2_SIZE;
832 break;
833
834 case OP_RECURSE:
835 /* Set its value only once. */
836 if (common->recursive_head_ptr == 0)
837 {
838 common->recursive_head_ptr = common->ovector_start;
839 common->ovector_start += sizeof(sljit_sw);
840 }
841 cc += 1 + LINK_SIZE;
842 break;
843
844 case OP_CALLOUT:
845 if (common->capture_last_ptr == 0)
846 {
847 common->capture_last_ptr = common->ovector_start;
848 common->ovector_start += sizeof(sljit_sw);
849 }
850 cc += 2 + 2 * LINK_SIZE;
851 break;
852
853 case OP_THEN_ARG:
854 common->has_then = TRUE;
855 common->control_head_ptr = 1;
856 /* Fall through. */
857
858 case OP_PRUNE_ARG:
859 common->needs_start_ptr = TRUE;
860 /* Fall through. */
861
862 case OP_MARK:
863 if (common->mark_ptr == 0)
864 {
865 common->mark_ptr = common->ovector_start;
866 common->ovector_start += sizeof(sljit_sw);
867 }
868 cc += 1 + 2 + cc[1];
869 break;
870
871 case OP_THEN:
872 common->has_then = TRUE;
873 common->control_head_ptr = 1;
874 /* Fall through. */
875
876 case OP_PRUNE:
877 case OP_SKIP:
878 common->needs_start_ptr = TRUE;
879 cc += 1;
880 break;
881
882 case OP_SKIP_ARG:
883 common->control_head_ptr = 1;
884 common->has_skip_arg = TRUE;
885 cc += 1 + 2 + cc[1];
886 break;
887
888 default:
889 cc = next_opcode(common, cc);
890 if (cc == NULL)
891 return FALSE;
892 break;
893 }
894 }
895 return TRUE;
896 }
897
898 static int get_class_iterator_size(pcre_uchar *cc)
899 {
900 switch(*cc)
901 {
902 case OP_CRSTAR:
903 case OP_CRPLUS:
904 return 2;
905
906 case OP_CRMINSTAR:
907 case OP_CRMINPLUS:
908 case OP_CRQUERY:
909 case OP_CRMINQUERY:
910 return 1;
911
912 case OP_CRRANGE:
913 case OP_CRMINRANGE:
914 if (GET2(cc, 1) == GET2(cc, 1 + IMM2_SIZE))
915 return 0;
916 return 2;
917
918 default:
919 return 0;
920 }
921 }
922
923 static BOOL detect_repeat(compiler_common *common, pcre_uchar *begin)
924 {
925 pcre_uchar *end = bracketend(begin);
926 pcre_uchar *next;
927 pcre_uchar *next_end;
928 pcre_uchar *max_end;
929 pcre_uchar type;
930 sljit_sw length = end - begin;
931 int min, max, i;
932
933 /* Detect fixed iterations first. */
934 if (end[-(1 + LINK_SIZE)] != OP_KET)
935 return FALSE;
936
937 /* Already detected repeat. */
938 if (common->private_data_ptrs[end - common->start - LINK_SIZE] != 0)
939 return TRUE;
940
941 next = end;
942 min = 1;
943 while (1)
944 {
945 if (*next != *begin)
946 break;
947 next_end = bracketend(next);
948 if (next_end - next != length || memcmp(begin, next, IN_UCHARS(length)) != 0)
949 break;
950 next = next_end;
951 min++;
952 }
953
954 if (min == 2)
955 return FALSE;
956
957 max = 0;
958 max_end = next;
959 if (*next == OP_BRAZERO || *next == OP_BRAMINZERO)
960 {
961 type = *next;
962 while (1)
963 {
964 if (next[0] != type || next[1] != OP_BRA || next[2 + LINK_SIZE] != *begin)
965 break;
966 next_end = bracketend(next + 2 + LINK_SIZE);
967 if (next_end - next != (length + 2 + LINK_SIZE) || memcmp(begin, next + 2 + LINK_SIZE, IN_UCHARS(length)) != 0)
968 break;
969 next = next_end;
970 max++;
971 }
972
973 if (next[0] == type && next[1] == *begin && max >= 1)
974 {
975 next_end = bracketend(next + 1);
976 if (next_end - next == (length + 1) && memcmp(begin, next + 1, IN_UCHARS(length)) == 0)
977 {
978 for (i = 0; i < max; i++, next_end += 1 + LINK_SIZE)
979 if (*next_end != OP_KET)
980 break;
981
982 if (i == max)
983 {
984 common->private_data_ptrs[max_end - common->start - LINK_SIZE] = next_end - max_end;
985 common->private_data_ptrs[max_end - common->start - LINK_SIZE + 1] = (type == OP_BRAZERO) ? OP_UPTO : OP_MINUPTO;
986 /* +2 the original and the last. */
987 common->private_data_ptrs[max_end - common->start - LINK_SIZE + 2] = max + 2;
988 if (min == 1)
989 return TRUE;
990 min--;
991 max_end -= (1 + LINK_SIZE) + GET(max_end, -LINK_SIZE);
992 }
993 }
994 }
995 }
996
997 if (min >= 3)
998 {
999 common->private_data_ptrs[end - common->start - LINK_SIZE] = max_end - end;
1000 common->private_data_ptrs[end - common->start - LINK_SIZE + 1] = OP_EXACT;
1001 common->private_data_ptrs[end - common->start - LINK_SIZE + 2] = min;
1002 return TRUE;
1003 }
1004
1005 return FALSE;
1006 }
1007
1008 #define CASE_ITERATOR_PRIVATE_DATA_1 \
1009 case OP_MINSTAR: \
1010 case OP_MINPLUS: \
1011 case OP_QUERY: \
1012 case OP_MINQUERY: \
1013 case OP_MINSTARI: \
1014 case OP_MINPLUSI: \
1015 case OP_QUERYI: \
1016 case OP_MINQUERYI: \
1017 case OP_NOTMINSTAR: \
1018 case OP_NOTMINPLUS: \
1019 case OP_NOTQUERY: \
1020 case OP_NOTMINQUERY: \
1021 case OP_NOTMINSTARI: \
1022 case OP_NOTMINPLUSI: \
1023 case OP_NOTQUERYI: \
1024 case OP_NOTMINQUERYI:
1025
1026 #define CASE_ITERATOR_PRIVATE_DATA_2A \
1027 case OP_STAR: \
1028 case OP_PLUS: \
1029 case OP_STARI: \
1030 case OP_PLUSI: \
1031 case OP_NOTSTAR: \
1032 case OP_NOTPLUS: \
1033 case OP_NOTSTARI: \
1034 case OP_NOTPLUSI:
1035
1036 #define CASE_ITERATOR_PRIVATE_DATA_2B \
1037 case OP_UPTO: \
1038 case OP_MINUPTO: \
1039 case OP_UPTOI: \
1040 case OP_MINUPTOI: \
1041 case OP_NOTUPTO: \
1042 case OP_NOTMINUPTO: \
1043 case OP_NOTUPTOI: \
1044 case OP_NOTMINUPTOI:
1045
1046 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_1 \
1047 case OP_TYPEMINSTAR: \
1048 case OP_TYPEMINPLUS: \
1049 case OP_TYPEQUERY: \
1050 case OP_TYPEMINQUERY:
1051
1052 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2A \
1053 case OP_TYPESTAR: \
1054 case OP_TYPEPLUS:
1055
1056 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2B \
1057 case OP_TYPEUPTO: \
1058 case OP_TYPEMINUPTO:
1059
1060 static void set_private_data_ptrs(compiler_common *common, int *private_data_start, pcre_uchar *ccend)
1061 {
1062 pcre_uchar *cc = common->start;
1063 pcre_uchar *alternative;
1064 pcre_uchar *end = NULL;
1065 int private_data_ptr = *private_data_start;
1066 int space, size, bracketlen;
1067
1068 while (cc < ccend)
1069 {
1070 space = 0;
1071 size = 0;
1072 bracketlen = 0;
1073 if (private_data_ptr > SLJIT_MAX_LOCAL_SIZE)
1074 break;
1075
1076 if (*cc == OP_ONCE || *cc == OP_ONCE_NC || *cc == OP_BRA || *cc == OP_CBRA || *cc == OP_COND)
1077 if (detect_repeat(common, cc))
1078 {
1079 /* These brackets are converted to repeats, so no global
1080 based single character repeat is allowed. */
1081 if (cc >= end)
1082 end = bracketend(cc);
1083 }
1084
1085 switch(*cc)
1086 {
1087 case OP_KET:
1088 if (common->private_data_ptrs[cc + 1 - common->start] != 0)
1089 {
1090 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1091 private_data_ptr += sizeof(sljit_sw);
1092 cc += common->private_data_ptrs[cc + 1 - common->start];
1093 }
1094 cc += 1 + LINK_SIZE;
1095 break;
1096
1097 case OP_ASSERT:
1098 case OP_ASSERT_NOT:
1099 case OP_ASSERTBACK:
1100 case OP_ASSERTBACK_NOT:
1101 case OP_ONCE:
1102 case OP_ONCE_NC:
1103 case OP_BRAPOS:
1104 case OP_SBRA:
1105 case OP_SBRAPOS:
1106 case OP_SCOND:
1107 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1108 private_data_ptr += sizeof(sljit_sw);
1109 bracketlen = 1 + LINK_SIZE;
1110 break;
1111
1112 case OP_CBRAPOS:
1113 case OP_SCBRAPOS:
1114 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1115 private_data_ptr += sizeof(sljit_sw);
1116 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1117 break;
1118
1119 case OP_COND:
1120 /* Might be a hidden SCOND. */
1121 alternative = cc + GET(cc, 1);
1122 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1123 {
1124 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1125 private_data_ptr += sizeof(sljit_sw);
1126 }
1127 bracketlen = 1 + LINK_SIZE;
1128 break;
1129
1130 case OP_BRA:
1131 bracketlen = 1 + LINK_SIZE;
1132 break;
1133
1134 case OP_CBRA:
1135 case OP_SCBRA:
1136 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1137 break;
1138
1139 CASE_ITERATOR_PRIVATE_DATA_1
1140 space = 1;
1141 size = -2;
1142 break;
1143
1144 CASE_ITERATOR_PRIVATE_DATA_2A
1145 space = 2;
1146 size = -2;
1147 break;
1148
1149 CASE_ITERATOR_PRIVATE_DATA_2B
1150 space = 2;
1151 size = -(2 + IMM2_SIZE);
1152 break;
1153
1154 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1155 space = 1;
1156 size = 1;
1157 break;
1158
1159 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1160 if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
1161 space = 2;
1162 size = 1;
1163 break;
1164
1165 case OP_TYPEUPTO:
1166 if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
1167 space = 2;
1168 size = 1 + IMM2_SIZE;
1169 break;
1170
1171 case OP_TYPEMINUPTO:
1172 space = 2;
1173 size = 1 + IMM2_SIZE;
1174 break;
1175
1176 case OP_CLASS:
1177 case OP_NCLASS:
1178 size += 1 + 32 / sizeof(pcre_uchar);
1179 space = get_class_iterator_size(cc + size);
1180 break;
1181
1182 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1183 case OP_XCLASS:
1184 size = GET(cc, 1);
1185 space = get_class_iterator_size(cc + size);
1186 break;
1187 #endif
1188
1189 default:
1190 cc = next_opcode(common, cc);
1191 SLJIT_ASSERT(cc != NULL);
1192 break;
1193 }
1194
1195 /* Character iterators, which are not inside a repeated bracket,
1196 gets a private slot instead of allocating it on the stack. */
1197 if (space > 0 && cc >= end)
1198 {
1199 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1200 private_data_ptr += sizeof(sljit_sw) * space;
1201 }
1202
1203 if (size != 0)
1204 {
1205 if (size < 0)
1206 {
1207 cc += -size;
1208 #ifdef SUPPORT_UTF
1209 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1210 #endif
1211 }
1212 else
1213 cc += size;
1214 }
1215
1216 if (bracketlen > 0)
1217 {
1218 if (cc >= end)
1219 {
1220 end = bracketend(cc);
1221 if (end[-1 - LINK_SIZE] == OP_KET)
1222 end = NULL;
1223 }
1224 cc += bracketlen;
1225 }
1226 }
1227 *private_data_start = private_data_ptr;
1228 }
1229
1230 /* Returns with a frame_types (always < 0) if no need for frame. */
1231 static int get_framesize(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL recursive, BOOL *needs_control_head)
1232 {
1233 int length = 0;
1234 int possessive = 0;
1235 BOOL stack_restore = FALSE;
1236 BOOL setsom_found = recursive;
1237 BOOL setmark_found = recursive;
1238 /* The last capture is a local variable even for recursions. */
1239 BOOL capture_last_found = FALSE;
1240
1241 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
1242 SLJIT_ASSERT(common->control_head_ptr != 0);
1243 *needs_control_head = TRUE;
1244 #else
1245 *needs_control_head = FALSE;
1246 #endif
1247
1248 if (ccend == NULL)
1249 {
1250 ccend = bracketend(cc) - (1 + LINK_SIZE);
1251 if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))
1252 {
1253 possessive = length = (common->capture_last_ptr != 0) ? 5 : 3;
1254 /* This is correct regardless of common->capture_last_ptr. */
1255 capture_last_found = TRUE;
1256 }
1257 cc = next_opcode(common, cc);
1258 }
1259
1260 SLJIT_ASSERT(cc != NULL);
1261 while (cc < ccend)
1262 switch(*cc)
1263 {
1264 case OP_SET_SOM:
1265 SLJIT_ASSERT(common->has_set_som);
1266 stack_restore = TRUE;
1267 if (!setsom_found)
1268 {
1269 length += 2;
1270 setsom_found = TRUE;
1271 }
1272 cc += 1;
1273 break;
1274
1275 case OP_MARK:
1276 case OP_PRUNE_ARG:
1277 case OP_THEN_ARG:
1278 SLJIT_ASSERT(common->mark_ptr != 0);
1279 stack_restore = TRUE;
1280 if (!setmark_found)
1281 {
1282 length += 2;
1283 setmark_found = TRUE;
1284 }
1285 if (common->control_head_ptr != 0)
1286 *needs_control_head = TRUE;
1287 cc += 1 + 2 + cc[1];
1288 break;
1289
1290 case OP_RECURSE:
1291 stack_restore = TRUE;
1292 if (common->has_set_som && !setsom_found)
1293 {
1294 length += 2;
1295 setsom_found = TRUE;
1296 }
1297 if (common->mark_ptr != 0 && !setmark_found)
1298 {
1299 length += 2;
1300 setmark_found = TRUE;
1301 }
1302 if (common->capture_last_ptr != 0 && !capture_last_found)
1303 {
1304 length += 2;
1305 capture_last_found = TRUE;
1306 }
1307 cc += 1 + LINK_SIZE;
1308 break;
1309
1310 case OP_CBRA:
1311 case OP_CBRAPOS:
1312 case OP_SCBRA:
1313 case OP_SCBRAPOS:
1314 stack_restore = TRUE;
1315 if (common->capture_last_ptr != 0 && !capture_last_found)
1316 {
1317 length += 2;
1318 capture_last_found = TRUE;
1319 }
1320 length += 3;
1321 cc += 1 + LINK_SIZE + IMM2_SIZE;
1322 break;
1323
1324 default:
1325 stack_restore = TRUE;
1326 /* Fall through. */
1327
1328 case OP_NOT_WORD_BOUNDARY:
1329 case OP_WORD_BOUNDARY:
1330 case OP_NOT_DIGIT:
1331 case OP_DIGIT:
1332 case OP_NOT_WHITESPACE:
1333 case OP_WHITESPACE:
1334 case OP_NOT_WORDCHAR:
1335 case OP_WORDCHAR:
1336 case OP_ANY:
1337 case OP_ALLANY:
1338 case OP_ANYBYTE:
1339 case OP_NOTPROP:
1340 case OP_PROP:
1341 case OP_ANYNL:
1342 case OP_NOT_HSPACE:
1343 case OP_HSPACE:
1344 case OP_NOT_VSPACE:
1345 case OP_VSPACE:
1346 case OP_EXTUNI:
1347 case OP_EODN:
1348 case OP_EOD:
1349 case OP_CIRC:
1350 case OP_CIRCM:
1351 case OP_DOLL:
1352 case OP_DOLLM:
1353 case OP_CHAR:
1354 case OP_CHARI:
1355 case OP_NOT:
1356 case OP_NOTI:
1357
1358 case OP_EXACT:
1359 case OP_POSSTAR:
1360 case OP_POSPLUS:
1361 case OP_POSQUERY:
1362 case OP_POSUPTO:
1363
1364 case OP_EXACTI:
1365 case OP_POSSTARI:
1366 case OP_POSPLUSI:
1367 case OP_POSQUERYI:
1368 case OP_POSUPTOI:
1369
1370 case OP_NOTEXACT:
1371 case OP_NOTPOSSTAR:
1372 case OP_NOTPOSPLUS:
1373 case OP_NOTPOSQUERY:
1374 case OP_NOTPOSUPTO:
1375
1376 case OP_NOTEXACTI:
1377 case OP_NOTPOSSTARI:
1378 case OP_NOTPOSPLUSI:
1379 case OP_NOTPOSQUERYI:
1380 case OP_NOTPOSUPTOI:
1381
1382 case OP_TYPEEXACT:
1383 case OP_TYPEPOSSTAR:
1384 case OP_TYPEPOSPLUS:
1385 case OP_TYPEPOSQUERY:
1386 case OP_TYPEPOSUPTO:
1387
1388 case OP_CLASS:
1389 case OP_NCLASS:
1390 case OP_XCLASS:
1391
1392 cc = next_opcode(common, cc);
1393 SLJIT_ASSERT(cc != NULL);
1394 break;
1395 }
1396
1397 /* Possessive quantifiers can use a special case. */
1398 if (SLJIT_UNLIKELY(possessive == length))
1399 return stack_restore ? no_frame : no_stack;
1400
1401 if (length > 0)
1402 return length + 1;
1403 return stack_restore ? no_frame : no_stack;
1404 }
1405
1406 static void init_frame(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, int stackpos, int stacktop, BOOL recursive)
1407 {
1408 DEFINE_COMPILER;
1409 BOOL setsom_found = recursive;
1410 BOOL setmark_found = recursive;
1411 /* The last capture is a local variable even for recursions. */
1412 BOOL capture_last_found = FALSE;
1413 int offset;
1414
1415 /* >= 1 + shortest item size (2) */
1416 SLJIT_UNUSED_ARG(stacktop);
1417 SLJIT_ASSERT(stackpos >= stacktop + 2);
1418
1419 stackpos = STACK(stackpos);
1420 if (ccend == NULL)
1421 {
1422 ccend = bracketend(cc) - (1 + LINK_SIZE);
1423 if (recursive || (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS))
1424 cc = next_opcode(common, cc);
1425 }
1426
1427 SLJIT_ASSERT(cc != NULL);
1428 while (cc < ccend)
1429 switch(*cc)
1430 {
1431 case OP_SET_SOM:
1432 SLJIT_ASSERT(common->has_set_som);
1433 if (!setsom_found)
1434 {
1435 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
1436 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1437 stackpos += (int)sizeof(sljit_sw);
1438 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1439 stackpos += (int)sizeof(sljit_sw);
1440 setsom_found = TRUE;
1441 }
1442 cc += 1;
1443 break;
1444
1445 case OP_MARK:
1446 case OP_PRUNE_ARG:
1447 case OP_THEN_ARG:
1448 SLJIT_ASSERT(common->mark_ptr != 0);
1449 if (!setmark_found)
1450 {
1451 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
1452 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1453 stackpos += (int)sizeof(sljit_sw);
1454 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1455 stackpos += (int)sizeof(sljit_sw);
1456 setmark_found = TRUE;
1457 }
1458 cc += 1 + 2 + cc[1];
1459 break;
1460
1461 case OP_RECURSE:
1462 if (common->has_set_som && !setsom_found)
1463 {
1464 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
1465 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1466 stackpos += (int)sizeof(sljit_sw);
1467 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1468 stackpos += (int)sizeof(sljit_sw);
1469 setsom_found = TRUE;
1470 }
1471 if (common->mark_ptr != 0 && !setmark_found)
1472 {
1473 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
1474 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1475 stackpos += (int)sizeof(sljit_sw);
1476 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1477 stackpos += (int)sizeof(sljit_sw);
1478 setmark_found = TRUE;
1479 }
1480 if (common->capture_last_ptr != 0 && !capture_last_found)
1481 {
1482 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
1483 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1484 stackpos += (int)sizeof(sljit_sw);
1485 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1486 stackpos += (int)sizeof(sljit_sw);
1487 capture_last_found = TRUE;
1488 }
1489 cc += 1 + LINK_SIZE;
1490 break;
1491
1492 case OP_CBRA:
1493 case OP_CBRAPOS:
1494 case OP_SCBRA:
1495 case OP_SCBRAPOS:
1496 if (common->capture_last_ptr != 0 && !capture_last_found)
1497 {
1498 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
1499 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1500 stackpos += (int)sizeof(sljit_sw);
1501 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1502 stackpos += (int)sizeof(sljit_sw);
1503 capture_last_found = TRUE;
1504 }
1505 offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
1506 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));
1507 stackpos += (int)sizeof(sljit_sw);
1508 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
1509 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
1510 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1511 stackpos += (int)sizeof(sljit_sw);
1512 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
1513 stackpos += (int)sizeof(sljit_sw);
1514
1515 cc += 1 + LINK_SIZE + IMM2_SIZE;
1516 break;
1517
1518 default:
1519 cc = next_opcode(common, cc);
1520 SLJIT_ASSERT(cc != NULL);
1521 break;
1522 }
1523
1524 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, 0);
1525 SLJIT_ASSERT(stackpos == STACK(stacktop));
1526 }
1527
1528 static SLJIT_INLINE int get_private_data_copy_length(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL needs_control_head)
1529 {
1530 int private_data_length = needs_control_head ? 3 : 2;
1531 int size;
1532 pcre_uchar *alternative;
1533 /* Calculate the sum of the private machine words. */
1534 while (cc < ccend)
1535 {
1536 size = 0;
1537 switch(*cc)
1538 {
1539 case OP_KET:
1540 if (PRIVATE_DATA(cc) != 0)
1541 {
1542 private_data_length++;
1543 SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0);
1544 cc += PRIVATE_DATA(cc + 1);
1545 }
1546 cc += 1 + LINK_SIZE;
1547 break;
1548
1549 case OP_ASSERT:
1550 case OP_ASSERT_NOT:
1551 case OP_ASSERTBACK:
1552 case OP_ASSERTBACK_NOT:
1553 case OP_ONCE:
1554 case OP_ONCE_NC:
1555 case OP_BRAPOS:
1556 case OP_SBRA:
1557 case OP_SBRAPOS:
1558 case OP_SCOND:
1559 private_data_length++;
1560 SLJIT_ASSERT(PRIVATE_DATA(cc) != 0);
1561 cc += 1 + LINK_SIZE;
1562 break;
1563
1564 case OP_CBRA:
1565 case OP_SCBRA:
1566 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1567 private_data_length++;
1568 cc += 1 + LINK_SIZE + IMM2_SIZE;
1569 break;
1570
1571 case OP_CBRAPOS:
1572 case OP_SCBRAPOS:
1573 private_data_length += 2;
1574 cc += 1 + LINK_SIZE + IMM2_SIZE;
1575 break;
1576
1577 case OP_COND:
1578 /* Might be a hidden SCOND. */
1579 alternative = cc + GET(cc, 1);
1580 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1581 private_data_length++;
1582 cc += 1 + LINK_SIZE;
1583 break;
1584
1585 CASE_ITERATOR_PRIVATE_DATA_1
1586 if (PRIVATE_DATA(cc))
1587 private_data_length++;
1588 cc += 2;
1589 #ifdef SUPPORT_UTF
1590 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1591 #endif
1592 break;
1593
1594 CASE_ITERATOR_PRIVATE_DATA_2A
1595 if (PRIVATE_DATA(cc))
1596 private_data_length += 2;
1597 cc += 2;
1598 #ifdef SUPPORT_UTF
1599 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1600 #endif
1601 break;
1602
1603 CASE_ITERATOR_PRIVATE_DATA_2B
1604 if (PRIVATE_DATA(cc))
1605 private_data_length += 2;
1606 cc += 2 + IMM2_SIZE;
1607 #ifdef SUPPORT_UTF
1608 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1609 #endif
1610 break;
1611
1612 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1613 if (PRIVATE_DATA(cc))
1614 private_data_length++;
1615 cc += 1;
1616 break;
1617
1618 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1619 if (PRIVATE_DATA(cc))
1620 private_data_length += 2;
1621 cc += 1;
1622 break;
1623
1624 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1625 if (PRIVATE_DATA(cc))
1626 private_data_length += 2;
1627 cc += 1 + IMM2_SIZE;
1628 break;
1629
1630 case OP_CLASS:
1631 case OP_NCLASS:
1632 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1633 case OP_XCLASS:
1634 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1635 #else
1636 size = 1 + 32 / (int)sizeof(pcre_uchar);
1637 #endif
1638 if (PRIVATE_DATA(cc))
1639 private_data_length += get_class_iterator_size(cc + size);
1640 cc += size;
1641 break;
1642
1643 default:
1644 cc = next_opcode(common, cc);
1645 SLJIT_ASSERT(cc != NULL);
1646 break;
1647 }
1648 }
1649 SLJIT_ASSERT(cc == ccend);
1650 return private_data_length;
1651 }
1652
1653 static void copy_private_data(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend,
1654 BOOL save, int stackptr, int stacktop, BOOL needs_control_head)
1655 {
1656 DEFINE_COMPILER;
1657 int srcw[2];
1658 int count, size;
1659 BOOL tmp1next = TRUE;
1660 BOOL tmp1empty = TRUE;
1661 BOOL tmp2empty = TRUE;
1662 pcre_uchar *alternative;
1663 enum {
1664 start,
1665 loop,
1666 end
1667 } status;
1668
1669 status = save ? start : loop;
1670 stackptr = STACK(stackptr - 2);
1671 stacktop = STACK(stacktop - 1);
1672
1673 if (!save)
1674 {
1675 stackptr += (needs_control_head ? 2 : 1) * sizeof(sljit_sw);
1676 if (stackptr < stacktop)
1677 {
1678 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1679 stackptr += sizeof(sljit_sw);
1680 tmp1empty = FALSE;
1681 }
1682 if (stackptr < stacktop)
1683 {
1684 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1685 stackptr += sizeof(sljit_sw);
1686 tmp2empty = FALSE;
1687 }
1688 /* The tmp1next must be TRUE in either way. */
1689 }
1690
1691 do
1692 {
1693 count = 0;
1694 switch(status)
1695 {
1696 case start:
1697 SLJIT_ASSERT(save && common->recursive_head_ptr != 0);
1698 count = 1;
1699 srcw[0] = common->recursive_head_ptr;
1700 if (needs_control_head)
1701 {
1702 SLJIT_ASSERT(common->control_head_ptr != 0);
1703 count = 2;
1704 srcw[1] = common->control_head_ptr;
1705 }
1706 status = loop;
1707 break;
1708
1709 case loop:
1710 if (cc >= ccend)
1711 {
1712 status = end;
1713 break;
1714 }
1715
1716 switch(*cc)
1717 {
1718 case OP_KET:
1719 if (PRIVATE_DATA(cc) != 0)
1720 {
1721 count = 1;
1722 srcw[0] = PRIVATE_DATA(cc);
1723 SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0);
1724 cc += PRIVATE_DATA(cc + 1);
1725 }
1726 cc += 1 + LINK_SIZE;
1727 break;
1728
1729 case OP_ASSERT:
1730 case OP_ASSERT_NOT:
1731 case OP_ASSERTBACK:
1732 case OP_ASSERTBACK_NOT:
1733 case OP_ONCE:
1734 case OP_ONCE_NC:
1735 case OP_BRAPOS:
1736 case OP_SBRA:
1737 case OP_SBRAPOS:
1738 case OP_SCOND:
1739 count = 1;
1740 srcw[0] = PRIVATE_DATA(cc);
1741 SLJIT_ASSERT(srcw[0] != 0);
1742 cc += 1 + LINK_SIZE;
1743 break;
1744
1745 case OP_CBRA:
1746 case OP_SCBRA:
1747 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1748 {
1749 count = 1;
1750 srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1751 }
1752 cc += 1 + LINK_SIZE + IMM2_SIZE;
1753 break;
1754
1755 case OP_CBRAPOS:
1756 case OP_SCBRAPOS:
1757 count = 2;
1758 srcw[0] = PRIVATE_DATA(cc);
1759 srcw[1] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1760 SLJIT_ASSERT(srcw[0] != 0 && srcw[1] != 0);
1761 cc += 1 + LINK_SIZE + IMM2_SIZE;
1762 break;
1763
1764 case OP_COND:
1765 /* Might be a hidden SCOND. */
1766 alternative = cc + GET(cc, 1);
1767 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1768 {
1769 count = 1;
1770 srcw[0] = PRIVATE_DATA(cc);
1771 SLJIT_ASSERT(srcw[0] != 0);
1772 }
1773 cc += 1 + LINK_SIZE;
1774 break;
1775
1776 CASE_ITERATOR_PRIVATE_DATA_1
1777 if (PRIVATE_DATA(cc))
1778 {
1779 count = 1;
1780 srcw[0] = PRIVATE_DATA(cc);
1781 }
1782 cc += 2;
1783 #ifdef SUPPORT_UTF
1784 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1785 #endif
1786 break;
1787
1788 CASE_ITERATOR_PRIVATE_DATA_2A
1789 if (PRIVATE_DATA(cc))
1790 {
1791 count = 2;
1792 srcw[0] = PRIVATE_DATA(cc);
1793 srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
1794 }
1795 cc += 2;
1796 #ifdef SUPPORT_UTF
1797 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1798 #endif
1799 break;
1800
1801 CASE_ITERATOR_PRIVATE_DATA_2B
1802 if (PRIVATE_DATA(cc))
1803 {
1804 count = 2;
1805 srcw[0] = PRIVATE_DATA(cc);
1806 srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
1807 }
1808 cc += 2 + IMM2_SIZE;
1809 #ifdef SUPPORT_UTF
1810 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1811 #endif
1812 break;
1813
1814 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1815 if (PRIVATE_DATA(cc))
1816 {
1817 count = 1;
1818 srcw[0] = PRIVATE_DATA(cc);
1819 }
1820 cc += 1;
1821 break;
1822
1823 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1824 if (PRIVATE_DATA(cc))
1825 {
1826 count = 2;
1827 srcw[0] = PRIVATE_DATA(cc);
1828 srcw[1] = srcw[0] + sizeof(sljit_sw);
1829 }
1830 cc += 1;
1831 break;
1832
1833 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1834 if (PRIVATE_DATA(cc))
1835 {
1836 count = 2;
1837 srcw[0] = PRIVATE_DATA(cc);
1838 srcw[1] = srcw[0] + sizeof(sljit_sw);
1839 }
1840 cc += 1 + IMM2_SIZE;
1841 break;
1842
1843 case OP_CLASS:
1844 case OP_NCLASS:
1845 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1846 case OP_XCLASS:
1847 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1848 #else
1849 size = 1 + 32 / (int)sizeof(pcre_uchar);
1850 #endif
1851 if (PRIVATE_DATA(cc))
1852 switch(get_class_iterator_size(cc + size))
1853 {
1854 case 1:
1855 count = 1;
1856 srcw[0] = PRIVATE_DATA(cc);
1857 break;
1858
1859 case 2:
1860 count = 2;
1861 srcw[0] = PRIVATE_DATA(cc);
1862 srcw[1] = srcw[0] + sizeof(sljit_sw);
1863 break;
1864
1865 default:
1866 SLJIT_ASSERT_STOP();
1867 break;
1868 }
1869 cc += size;
1870 break;
1871
1872 default:
1873 cc = next_opcode(common, cc);
1874 SLJIT_ASSERT(cc != NULL);
1875 break;
1876 }
1877 break;
1878
1879 case end:
1880 SLJIT_ASSERT_STOP();
1881 break;
1882 }
1883
1884 while (count > 0)
1885 {
1886 count--;
1887 if (save)
1888 {
1889 if (tmp1next)
1890 {
1891 if (!tmp1empty)
1892 {
1893 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1894 stackptr += sizeof(sljit_sw);
1895 }
1896 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), srcw[count]);
1897 tmp1empty = FALSE;
1898 tmp1next = FALSE;
1899 }
1900 else
1901 {
1902 if (!tmp2empty)
1903 {
1904 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1905 stackptr += sizeof(sljit_sw);
1906 }
1907 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), srcw[count]);
1908 tmp2empty = FALSE;
1909 tmp1next = TRUE;
1910 }
1911 }
1912 else
1913 {
1914 if (tmp1next)
1915 {
1916 SLJIT_ASSERT(!tmp1empty);
1917 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), srcw[count], TMP1, 0);
1918 tmp1empty = stackptr >= stacktop;
1919 if (!tmp1empty)
1920 {
1921 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1922 stackptr += sizeof(sljit_sw);
1923 }
1924 tmp1next = FALSE;
1925 }
1926 else
1927 {
1928 SLJIT_ASSERT(!tmp2empty);
1929 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), srcw[count], TMP2, 0);
1930 tmp2empty = stackptr >= stacktop;
1931 if (!tmp2empty)
1932 {
1933 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1934 stackptr += sizeof(sljit_sw);
1935 }
1936 tmp1next = TRUE;
1937 }
1938 }
1939 }
1940 }
1941 while (status != end);
1942
1943 if (save)
1944 {
1945 if (tmp1next)
1946 {
1947 if (!tmp1empty)
1948 {
1949 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1950 stackptr += sizeof(sljit_sw);
1951 }
1952 if (!tmp2empty)
1953 {
1954 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1955 stackptr += sizeof(sljit_sw);
1956 }
1957 }
1958 else
1959 {
1960 if (!tmp2empty)
1961 {
1962 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1963 stackptr += sizeof(sljit_sw);
1964 }
1965 if (!tmp1empty)
1966 {
1967 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1968 stackptr += sizeof(sljit_sw);
1969 }
1970 }
1971 }
1972 SLJIT_ASSERT(cc == ccend && stackptr == stacktop && (save || (tmp1empty && tmp2empty)));
1973 }
1974
1975 static SLJIT_INLINE pcre_uchar *set_then_offsets(compiler_common *common, pcre_uchar *cc, pcre_uint8 *current_offset)
1976 {
1977 pcre_uchar *end = bracketend(cc);
1978 BOOL has_alternatives = cc[GET(cc, 1)] == OP_ALT;
1979
1980 /* Assert captures then. */
1981 if (*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT)
1982 current_offset = NULL;
1983 /* Conditional block does not. */
1984 if (*cc == OP_COND || *cc == OP_SCOND)
1985 has_alternatives = FALSE;
1986
1987 cc = next_opcode(common, cc);
1988 if (has_alternatives)
1989 current_offset = common->then_offsets + (cc - common->start);
1990
1991 while (cc < end)
1992 {
1993 if ((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND))
1994 cc = set_then_offsets(common, cc, current_offset);
1995 else
1996 {
1997 if (*cc == OP_ALT && has_alternatives)
1998 current_offset = common->then_offsets + (cc + 1 + LINK_SIZE - common->start);
1999 if (*cc >= OP_THEN && *cc <= OP_THEN_ARG && current_offset != NULL)
2000 *current_offset = 1;
2001 cc = next_opcode(common, cc);
2002 }
2003 }
2004
2005 return end;
2006 }
2007
2008 #undef CASE_ITERATOR_PRIVATE_DATA_1
2009 #undef CASE_ITERATOR_PRIVATE_DATA_2A
2010 #undef CASE_ITERATOR_PRIVATE_DATA_2B
2011 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_1
2012 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
2013 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
2014
2015 static SLJIT_INLINE BOOL is_powerof2(unsigned int value)
2016 {
2017 return (value & (value - 1)) == 0;
2018 }
2019
2020 static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label)
2021 {
2022 while (list)
2023 {
2024 /* sljit_set_label is clever enough to do nothing
2025 if either the jump or the label is NULL. */
2026 SET_LABEL(list->jump, label);
2027 list = list->next;
2028 }
2029 }
2030
2031 static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump *jump)
2032 {
2033 jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list));
2034 if (list_item)
2035 {
2036 list_item->next = *list;
2037 list_item->jump = jump;
2038 *list = list_item;
2039 }
2040 }
2041
2042 static void add_stub(compiler_common *common, struct sljit_jump *start)
2043 {
2044 DEFINE_COMPILER;
2045 stub_list *list_item = sljit_alloc_memory(compiler, sizeof(stub_list));
2046
2047 if (list_item)
2048 {
2049 list_item->start = start;
2050 list_item->quit = LABEL();
2051 list_item->next = common->stubs;
2052 common->stubs = list_item;
2053 }
2054 }
2055
2056 static void flush_stubs(compiler_common *common)
2057 {
2058 DEFINE_COMPILER;
2059 stub_list *list_item = common->stubs;
2060
2061 while (list_item)
2062 {
2063 JUMPHERE(list_item->start);
2064 add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));
2065 JUMPTO(SLJIT_JUMP, list_item->quit);
2066 list_item = list_item->next;
2067 }
2068 common->stubs = NULL;
2069 }
2070
2071 static void add_label_addr(compiler_common *common, sljit_uw *update_addr)
2072 {
2073 DEFINE_COMPILER;
2074 label_addr_list *label_addr;
2075
2076 label_addr = sljit_alloc_memory(compiler, sizeof(label_addr_list));
2077 if (label_addr == NULL)
2078 return;
2079 label_addr->label = LABEL();
2080 label_addr->update_addr = update_addr;
2081 label_addr->next = common->label_addrs;
2082 common->label_addrs = label_addr;
2083 }
2084
2085 static SLJIT_INLINE void count_match(compiler_common *common)
2086 {
2087 DEFINE_COMPILER;
2088
2089 OP2(SLJIT_SUB | SLJIT_SET_E, COUNT_MATCH, 0, COUNT_MATCH, 0, SLJIT_IMM, 1);
2090 add_jump(compiler, &common->calllimit, JUMP(SLJIT_ZERO));
2091 }
2092
2093 static SLJIT_INLINE void allocate_stack(compiler_common *common, int size)
2094 {
2095 /* May destroy all locals and registers except TMP2. */
2096 DEFINE_COMPILER;
2097
2098 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
2099 #ifdef DESTROY_REGISTERS
2100 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);
2101 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
2102 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
2103 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, TMP1, 0);
2104 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP1, 0);
2105 #endif
2106 add_stub(common, CMP(SLJIT_GREATER, STACK_TOP, 0, STACK_LIMIT, 0));
2107 }
2108
2109 static SLJIT_INLINE void free_stack(compiler_common *common, int size)
2110 {
2111 DEFINE_COMPILER;
2112 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
2113 }
2114
2115 static sljit_uw * allocate_read_only_data(compiler_common *common, sljit_uw size)
2116 {
2117 DEFINE_COMPILER;
2118 sljit_uw *result;
2119
2120 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
2121 return NULL;
2122
2123 result = (sljit_uw *)SLJIT_MALLOC(size + sizeof(sljit_uw), compiler->allocator_data);
2124 if (SLJIT_UNLIKELY(result == NULL))
2125 {
2126 sljit_set_compiler_memory_error(compiler);
2127 return NULL;
2128 }
2129
2130 *(void**)result = common->read_only_data_head;
2131 common->read_only_data_head = (void *)result;
2132 return result + 1;
2133 }
2134
2135 static void free_read_only_data(void *current, void *allocator_data)
2136 {
2137 void *next;
2138
2139 SLJIT_UNUSED_ARG(allocator_data);
2140
2141 while (current != NULL)
2142 {
2143 next = *(void**)current;
2144 SLJIT_FREE(current, allocator_data);
2145 current = next;
2146 }
2147 }
2148
2149 static SLJIT_INLINE void reset_ovector(compiler_common *common, int length)
2150 {
2151 DEFINE_COMPILER;
2152 struct sljit_label *loop;
2153 int i;
2154
2155 /* At this point we can freely use all temporary registers. */
2156 SLJIT_ASSERT(length > 1);
2157 /* TMP1 returns with begin - 1. */
2158 OP2(SLJIT_SUB, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_S0), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));
2159 if (length < 8)
2160 {
2161 for (i = 1; i < length; i++)
2162 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), SLJIT_R0, 0);
2163 }
2164 else
2165 {
2166 GET_LOCAL_BASE(SLJIT_R1, 0, OVECTOR_START);
2167 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, length - 1);
2168 loop = LABEL();
2169 OP1(SLJIT_MOVU, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw), SLJIT_R0, 0);
2170 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, 1);
2171 JUMPTO(SLJIT_NOT_ZERO, loop);
2172 }
2173 }
2174
2175 static SLJIT_INLINE void do_reset_match(compiler_common *common, int length)
2176 {
2177 DEFINE_COMPILER;
2178 struct sljit_label *loop;
2179 int i;
2180
2181 SLJIT_ASSERT(length > 1);
2182 /* OVECTOR(1) contains the "string begin - 1" constant. */
2183 if (length > 2)
2184 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
2185 if (length < 8)
2186 {
2187 for (i = 2; i < length; i++)
2188 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), TMP1, 0);
2189 }
2190 else
2191 {
2192 GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + sizeof(sljit_sw));
2193 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
2194 loop = LABEL();
2195 OP1(SLJIT_MOVU, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
2196 OP2(SLJIT_SUB | SLJIT_SET_E, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
2197 JUMPTO(SLJIT_NOT_ZERO, loop);
2198 }
2199
2200 OP1(SLJIT_MOV, STACK_TOP, 0, ARGUMENTS, 0);
2201 if (common->mark_ptr != 0)
2202 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, SLJIT_IMM, 0);
2203 if (common->control_head_ptr != 0)
2204 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
2205 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(jit_arguments, stack));
2206 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
2207 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(struct sljit_stack, base));
2208 }
2209
2210 static sljit_sw SLJIT_CALL do_search_mark(sljit_sw *current, const pcre_uchar *skip_arg)
2211 {
2212 while (current != NULL)
2213 {
2214 switch (current[-2])
2215 {
2216 case type_then_trap:
2217 break;
2218
2219 case type_mark:
2220 if (STRCMP_UC_UC(skip_arg, (pcre_uchar *)current[-3]) == 0)
2221 return current[-4];
2222 break;
2223
2224 default:
2225 SLJIT_ASSERT_STOP();
2226 break;
2227 }
2228 current = (sljit_sw*)current[-1];
2229 }
2230 return -1;
2231 }
2232
2233 static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)
2234 {
2235 DEFINE_COMPILER;
2236 struct sljit_label *loop;
2237 struct sljit_jump *early_quit;
2238
2239 /* At this point we can freely use all registers. */
2240 OP1(SLJIT_MOV, SLJIT_S2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
2241 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(1), STR_PTR, 0);
2242
2243 OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0);
2244 if (common->mark_ptr != 0)
2245 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
2246 OP1(SLJIT_MOV_SI, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, offset_count));
2247 if (common->mark_ptr != 0)
2248 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_R2, 0);
2249 OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, offsets), SLJIT_IMM, sizeof(int));
2250 OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, begin));
2251 GET_LOCAL_BASE(SLJIT_S0, 0, OVECTOR_START);
2252 /* Unlikely, but possible */
2253 early_quit = CMP(SLJIT_EQUAL, SLJIT_R1, 0, SLJIT_IMM, 0);
2254 loop = LABEL();
2255 OP2(SLJIT_SUB, SLJIT_S1, 0, SLJIT_MEM1(SLJIT_S0), 0, SLJIT_R0, 0);
2256 OP2(SLJIT_ADD, SLJIT_S0, 0, SLJIT_S0, 0, SLJIT_IMM, sizeof(sljit_sw));
2257 /* Copy the integer value to the output buffer */
2258 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2259 OP2(SLJIT_ASHR, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_IMM, UCHAR_SHIFT);
2260 #endif
2261 OP1(SLJIT_MOVU_SI, SLJIT_MEM1(SLJIT_R2), sizeof(int), SLJIT_S1, 0);
2262 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
2263 JUMPTO(SLJIT_NOT_ZERO, loop);
2264 JUMPHERE(early_quit);
2265
2266 /* Calculate the return value, which is the maximum ovector value. */
2267 if (topbracket > 1)
2268 {
2269 GET_LOCAL_BASE(SLJIT_R0, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_sw));
2270 OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, topbracket + 1);
2271
2272 /* OVECTOR(0) is never equal to SLJIT_S2. */
2273 loop = LABEL();
2274 OP1(SLJIT_MOVU, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), -(2 * (sljit_sw)sizeof(sljit_sw)));
2275 OP2(SLJIT_SUB, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
2276 CMPTO(SLJIT_EQUAL, SLJIT_R2, 0, SLJIT_S2, 0, loop);
2277 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_R1, 0);
2278 }
2279 else
2280 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
2281 }
2282
2283 static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *quit)
2284 {
2285 DEFINE_COMPILER;
2286 struct sljit_jump *jump;
2287
2288 SLJIT_COMPILE_ASSERT(STR_END == SLJIT_S1, str_end_must_be_saved_reg2);
2289 SLJIT_ASSERT(common->start_used_ptr != 0 && common->start_ptr != 0
2290 && (common->mode == JIT_PARTIAL_SOFT_COMPILE ? common->hit_start != 0 : common->hit_start == 0));
2291
2292 OP1(SLJIT_MOV, SLJIT_R1, 0, ARGUMENTS, 0);
2293 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_PARTIAL);
2294 OP1(SLJIT_MOV_SI, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, real_offset_count));
2295 CMPTO(SLJIT_SIG_LESS, SLJIT_R2, 0, SLJIT_IMM, 2, quit);
2296
2297 /* Store match begin and end. */
2298 OP1(SLJIT_MOV, SLJIT_S0, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, begin));
2299 OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, offsets));
2300
2301 jump = CMP(SLJIT_SIG_LESS, SLJIT_R2, 0, SLJIT_IMM, 3);
2302 OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_ptr : (common->hit_start + (int)sizeof(sljit_sw)), SLJIT_S0, 0);
2303 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2304 OP2(SLJIT_ASHR, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, UCHAR_SHIFT);
2305 #endif
2306 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_R1), 2 * sizeof(int), SLJIT_R2, 0);
2307 JUMPHERE(jump);
2308
2309 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_used_ptr : common->hit_start);
2310 OP2(SLJIT_SUB, SLJIT_S1, 0, STR_END, 0, SLJIT_S0, 0);
2311 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2312 OP2(SLJIT_ASHR, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_IMM, UCHAR_SHIFT);
2313 #endif
2314 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_R1), sizeof(int), SLJIT_S1, 0);
2315
2316 OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_S0, 0);
2317 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2318 OP2(SLJIT_ASHR, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, UCHAR_SHIFT);
2319 #endif
2320 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_R1), 0, SLJIT_R2, 0);
2321
2322 JUMPTO(SLJIT_JUMP, quit);
2323 }
2324
2325 static SLJIT_INLINE void check_start_used_ptr(compiler_common *common)
2326 {
2327 /* May destroy TMP1. */
2328 DEFINE_COMPILER;
2329 struct sljit_jump *jump;
2330
2331 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2332 {
2333 /* The value of -1 must be kept for start_used_ptr! */
2334 OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, 1);
2335 /* Jumps if start_used_ptr < STR_PTR, or start_used_ptr == -1. Although overwriting
2336 is not necessary if start_used_ptr == STR_PTR, it does not hurt as well. */
2337 jump = CMP(SLJIT_LESS_EQUAL, TMP1, 0, STR_PTR, 0);
2338 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2339 JUMPHERE(jump);
2340 }
2341 else if (common->mode == JIT_PARTIAL_HARD_COMPILE)
2342 {
2343 jump = CMP(SLJIT_LESS_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2344 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2345 JUMPHERE(jump);
2346 }
2347 }
2348
2349 static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, pcre_uchar *cc)
2350 {
2351 /* Detects if the character has an othercase. */
2352 unsigned int c;
2353
2354 #ifdef SUPPORT_UTF
2355 if (common->utf)
2356 {
2357 GETCHAR(c, cc);
2358 if (c > 127)
2359 {
2360 #ifdef SUPPORT_UCP
2361 return c != UCD_OTHERCASE(c);
2362 #else
2363 return FALSE;
2364 #endif
2365 }
2366 #ifndef COMPILE_PCRE8
2367 return common->fcc[c] != c;
2368 #endif
2369 }
2370 else
2371 #endif
2372 c = *cc;
2373 return MAX_255(c) ? common->fcc[c] != c : FALSE;
2374 }
2375
2376 static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)
2377 {
2378 /* Returns with the othercase. */
2379 #ifdef SUPPORT_UTF
2380 if (common->utf && c > 127)
2381 {
2382 #ifdef SUPPORT_UCP
2383 return UCD_OTHERCASE(c);
2384 #else
2385 return c;
2386 #endif
2387 }
2388 #endif
2389 return TABLE_GET(c, common->fcc, c);
2390 }
2391
2392 static unsigned int char_get_othercase_bit(compiler_common *common, pcre_uchar *cc)
2393 {
2394 /* Detects if the character and its othercase has only 1 bit difference. */
2395 unsigned int c, oc, bit;
2396 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2397 int n;
2398 #endif
2399
2400 #ifdef SUPPORT_UTF
2401 if (common->utf)
2402 {
2403 GETCHAR(c, cc);
2404 if (c <= 127)
2405 oc = common->fcc[c];
2406 else
2407 {
2408 #ifdef SUPPORT_UCP
2409 oc = UCD_OTHERCASE(c);
2410 #else
2411 oc = c;
2412 #endif
2413 }
2414 }
2415 else
2416 {
2417 c = *cc;
2418 oc = TABLE_GET(c, common->fcc, c);
2419 }
2420 #else
2421 c = *cc;
2422 oc = TABLE_GET(c, common->fcc, c);
2423 #endif
2424
2425 SLJIT_ASSERT(c != oc);
2426
2427 bit = c ^ oc;
2428 /* Optimized for English alphabet. */
2429 if (c <= 127 && bit == 0x20)
2430 return (0 << 8) | 0x20;
2431
2432 /* Since c != oc, they must have at least 1 bit difference. */
2433 if (!is_powerof2(bit))
2434 return 0;
2435
2436 #if defined COMPILE_PCRE8
2437
2438 #ifdef SUPPORT_UTF
2439 if (common->utf && c > 127)
2440 {
2441 n = GET_EXTRALEN(*cc);
2442 while ((bit & 0x3f) == 0)
2443 {
2444 n--;
2445 bit >>= 6;
2446 }
2447 return (n << 8) | bit;
2448 }
2449 #endif /* SUPPORT_UTF */
2450 return (0 << 8) | bit;
2451
2452 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2453
2454 #ifdef SUPPORT_UTF
2455 if (common->utf && c > 65535)
2456 {
2457 if (bit >= (1 << 10))
2458 bit >>= 10;
2459 else
2460 return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8));
2461 }
2462 #endif /* SUPPORT_UTF */
2463 return (bit < 256) ? ((0 << 8) | bit) : ((1 << 8) | (bit >> 8));
2464
2465 #endif /* COMPILE_PCRE[8|16|32] */
2466 }
2467
2468 static void check_partial(compiler_common *common, BOOL force)
2469 {
2470 /* Checks whether a partial matching is occurred. Does not modify registers. */
2471 DEFINE_COMPILER;
2472 struct sljit_jump *jump = NULL;
2473
2474 SLJIT_ASSERT(!force || common->mode != JIT_COMPILE);
2475
2476 if (common->mode == JIT_COMPILE)
2477 return;
2478
2479 if (!force)
2480 jump = CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2481 else if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2482 jump = CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1);
2483
2484 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2485 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
2486 else
2487 {
2488 if (common->partialmatchlabel != NULL)
2489 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2490 else
2491 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2492 }
2493
2494 if (jump != NULL)
2495 JUMPHERE(jump);
2496 }
2497
2498 static void check_str_end(compiler_common *common, jump_list **end_reached)
2499 {
2500 /* Does not affect registers. Usually used in a tight spot. */
2501 DEFINE_COMPILER;
2502 struct sljit_jump *jump;
2503
2504 if (common->mode == JIT_COMPILE)
2505 {
2506 add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2507 return;
2508 }
2509
2510 jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
2511 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2512 {
2513 add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
2514 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
2515 add_jump(compiler, end_reached, JUMP(SLJIT_JUMP));
2516 }
2517 else
2518 {
2519 add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
2520 if (common->partialmatchlabel != NULL)
2521 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2522 else
2523 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2524 }
2525 JUMPHERE(jump);
2526 }
2527
2528 static void detect_partial_match(compiler_common *common, jump_list **backtracks)
2529 {
2530 DEFINE_COMPILER;
2531 struct sljit_jump *jump;
2532
2533 if (common->mode == JIT_COMPILE)
2534 {
2535 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2536 return;
2537 }
2538
2539 /* Partial matching mode. */
2540 jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
2541 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
2542 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2543 {
2544 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
2545 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
2546 }
2547 else
2548 {
2549 if (common->partialmatchlabel != NULL)
2550 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2551 else
2552 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2553 }
2554 JUMPHERE(jump);
2555 }
2556
2557 static void peek_char(compiler_common *common, pcre_uint32 max)
2558 {
2559 /* Reads the character into TMP1, keeps STR_PTR.
2560 Does not check STR_END. TMP2 Destroyed. */
2561 DEFINE_COMPILER;
2562 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2563 struct sljit_jump *jump;
2564 #endif
2565
2566 SLJIT_UNUSED_ARG(max);
2567
2568 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2569 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2570 if (common->utf)
2571 {
2572 if (max < 128) return;
2573
2574 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2575 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2576 add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2577 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2578 JUMPHERE(jump);
2579 }
2580 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2581
2582 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2583 if (common->utf)
2584 {
2585 if (max < 0xd800) return;
2586
2587 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2588 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2589 /* TMP2 contains the high surrogate. */
2590 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2591 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x40);
2592 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
2593 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
2594 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2595 JUMPHERE(jump);
2596 }
2597 #endif
2598 }
2599
2600 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2601
2602 static BOOL is_char7_bitset(const pcre_uint8 *bitset, BOOL nclass)
2603 {
2604 /* Tells whether the character codes below 128 are enough
2605 to determine a match. */
2606 const pcre_uint8 value = nclass ? 0xff : 0;
2607 const pcre_uint8 *end = bitset + 32;
2608
2609 bitset += 16;
2610 do
2611 {
2612 if (*bitset++ != value)
2613 return FALSE;
2614 }
2615 while (bitset < end);
2616 return TRUE;
2617 }
2618
2619 static void read_char7_type(compiler_common *common, BOOL full_read)
2620 {
2621 /* Reads the precise character type of a character into TMP1, if the character
2622 is less than 128. Otherwise it returns with zero. Does not check STR_END. The
2623 full_read argument tells whether characters above max are accepted or not. */
2624 DEFINE_COMPILER;
2625 struct sljit_jump *jump;
2626
2627 SLJIT_ASSERT(common->utf);
2628
2629 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2630 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2631
2632 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2633
2634 if (full_read)
2635 {
2636 jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
2637 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2638 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2639 JUMPHERE(jump);
2640 }
2641 }
2642
2643 #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
2644
2645 static void read_char_range(compiler_common *common, pcre_uint32 min, pcre_uint32 max, BOOL update_str_ptr)
2646 {
2647 /* Reads the precise value of a character into TMP1, if the character is
2648 between min and max (c >= min && c <= max). Otherwise it returns with a value
2649 outside the range. Does not check STR_END. */
2650 DEFINE_COMPILER;
2651 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2652 struct sljit_jump *jump;
2653 #endif
2654 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2655 struct sljit_jump *jump2;
2656 #endif
2657
2658 SLJIT_UNUSED_ARG(update_str_ptr);
2659 SLJIT_UNUSED_ARG(min);
2660 SLJIT_UNUSED_ARG(max);
2661 SLJIT_ASSERT(min <= max);
2662
2663 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2664 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2665
2666 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2667 if (common->utf)
2668 {
2669 if (max < 128 && !update_str_ptr) return;
2670
2671 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2672 if (min >= 0x10000)
2673 {
2674 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xf0);
2675 if (update_str_ptr)
2676 OP1(SLJIT_MOV_UB, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2677 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2678 jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x7);
2679 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2680 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2681 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2682 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2683 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2684 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2685 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2686 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2687 if (!update_str_ptr)
2688 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
2689 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2690 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2691 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2692 JUMPHERE(jump2);
2693 if (update_str_ptr)
2694 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
2695 }
2696 else if (min >= 0x800 && max <= 0xffff)
2697 {
2698 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xe0);
2699 if (update_str_ptr)
2700 OP1(SLJIT_MOV_UB, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2701 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2702 jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xf);
2703 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2704 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2705 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2706 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2707 if (!update_str_ptr)
2708 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
2709 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2710 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2711 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2712 JUMPHERE(jump2);
2713 if (update_str_ptr)
2714 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
2715 }
2716 else if (max >= 0x800)
2717 add_jump(compiler, (max < 0x10000) ? &common->utfreadchar16 : &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2718 else if (max < 128)
2719 {
2720 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2721 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2722 }
2723 else
2724 {
2725 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2726 if (!update_str_ptr)
2727 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2728 else
2729 OP1(SLJIT_MOV_UB, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2730 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2731 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2732 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2733 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2734 if (update_str_ptr)
2735 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
2736 }
2737 JUMPHERE(jump);
2738 }
2739 #endif
2740
2741 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2742 if (common->utf)
2743 {
2744 if (max >= 0x10000)
2745 {
2746 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2747 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2748 /* TMP2 contains the high surrogate. */
2749 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2750 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x40);
2751 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
2752 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2753 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
2754 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2755 JUMPHERE(jump);
2756 return;
2757 }
2758
2759 if (max < 0xd800 && !update_str_ptr) return;
2760
2761 /* Skip low surrogate if necessary. */
2762 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2763 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2764 if (update_str_ptr)
2765 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2766 if (max >= 0xd800)
2767 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x10000);
2768 JUMPHERE(jump);
2769 }
2770 #endif
2771 }
2772
2773 static SLJIT_INLINE void read_char(compiler_common *common)
2774 {
2775 read_char_range(common, 0, READ_CHAR_MAX, TRUE);
2776 }
2777
2778 static void read_char8_type(compiler_common *common, BOOL update_str_ptr)
2779 {
2780 /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */
2781 DEFINE_COMPILER;
2782 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
2783 struct sljit_jump *jump;
2784 #endif
2785 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2786 struct sljit_jump *jump2;
2787 #endif
2788
2789 SLJIT_UNUSED_ARG(update_str_ptr);
2790
2791 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2792 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2793
2794 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2795 if (common->utf)
2796 {
2797 /* This can be an extra read in some situations, but hopefully
2798 it is needed in most cases. */
2799 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2800 jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
2801 if (!update_str_ptr)
2802 {
2803 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2804 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2805 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2806 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2807 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2808 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
2809 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2810 jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
2811 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2812 JUMPHERE(jump2);
2813 }
2814 else
2815 add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));
2816 JUMPHERE(jump);
2817 return;
2818 }
2819 #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
2820
2821 #if !defined COMPILE_PCRE8
2822 /* The ctypes array contains only 256 values. */
2823 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2824 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
2825 #endif
2826 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2827 #if !defined COMPILE_PCRE8
2828 JUMPHERE(jump);
2829 #endif
2830
2831 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2832 if (common->utf && update_str_ptr)
2833 {
2834 /* Skip low surrogate if necessary. */
2835 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);
2836 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2837 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2838 JUMPHERE(jump);
2839 }
2840 #endif /* SUPPORT_UTF && COMPILE_PCRE16 */
2841 }
2842
2843 static void skip_char_back(compiler_common *common)
2844 {
2845 /* Goes one character back. Affects STR_PTR and TMP1. Does not check begin. */
2846 DEFINE_COMPILER;
2847 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2848 #if defined COMPILE_PCRE8
2849 struct sljit_label *label;
2850
2851 if (common->utf)
2852 {
2853 label = LABEL();
2854 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2855 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2856 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
2857 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);
2858 return;
2859 }
2860 #elif defined COMPILE_PCRE16
2861 if (common->utf)
2862 {
2863 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2864 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2865 /* Skip low surrogate if necessary. */
2866 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2867 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
2868 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
2869 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2870 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2871 return;
2872 }
2873 #endif /* COMPILE_PCRE[8|16] */
2874 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2875 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2876 }
2877
2878 static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpifmatch)
2879 {
2880 /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */
2881 DEFINE_COMPILER;
2882 struct sljit_jump *jump;
2883
2884 if (nltype == NLTYPE_ANY)
2885 {
2886 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
2887 add_jump(compiler, backtracks, JUMP(jumpifmatch ? SLJIT_NOT_ZERO : SLJIT_ZERO));
2888 }
2889 else if (nltype == NLTYPE_ANYCRLF)
2890 {
2891 if (jumpifmatch)
2892 {
2893 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR));
2894 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
2895 }
2896 else
2897 {
2898 jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
2899 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
2900 JUMPHERE(jump);
2901 }
2902 }
2903 else
2904 {
2905 SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);
2906 add_jump(compiler, backtracks, CMP(jumpifmatch ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
2907 }
2908 }
2909
2910 #ifdef SUPPORT_UTF
2911
2912 #if defined COMPILE_PCRE8
2913 static void do_utfreadchar(compiler_common *common)
2914 {
2915 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
2916 of the character (>= 0xc0). Return char value in TMP1, length in TMP2. */
2917 DEFINE_COMPILER;
2918 struct sljit_jump *jump;
2919
2920 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2921 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2922 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2923 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2924 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2925 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2926
2927 /* Searching for the first zero. */
2928 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
2929 jump = JUMP(SLJIT_NOT_ZERO);
2930 /* Two byte sequence. */
2931 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2932 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(2));
2933 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2934
2935 JUMPHERE(jump);
2936 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2937 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x800);
2938 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2939 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2940 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2941
2942 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10000);
2943 jump = JUMP(SLJIT_NOT_ZERO);
2944 /* Three byte sequence. */
2945 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
2946 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));
2947 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2948
2949 /* Four byte sequence. */
2950 JUMPHERE(jump);
2951 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2952 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
2953 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2954 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
2955 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2956 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2957 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(4));
2958 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2959 }
2960
2961 static void do_utfreadchar16(compiler_common *common)
2962 {
2963 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
2964 of the character (>= 0xc0). Return value in TMP1. */
2965 DEFINE_COMPILER;
2966 struct sljit_jump *jump;
2967
2968 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2969 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2970 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2971 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2972 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2973 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2974
2975 /* Searching for the first zero. */
2976 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
2977 jump = JUMP(SLJIT_NOT_ZERO);
2978 /* Two byte sequence. */
2979 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2980 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2981
2982 JUMPHERE(jump);
2983 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x400);
2984 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_NOT_ZERO);
2985 /* This code runs only in 8 bit mode. No need to shift the value. */
2986 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2987 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2988 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x800);
2989 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2990 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2991 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2992 /* Three byte sequence. */
2993 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
2994 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2995 }
2996
2997 static void do_utfreadtype8(compiler_common *common)
2998 {
2999 /* Fast decoding a UTF-8 character type. TMP2 contains the first byte
3000 of the character (>= 0xc0). Return value in TMP1. */
3001 DEFINE_COMPILER;
3002 struct sljit_jump *jump;
3003 struct sljit_jump *compare;
3004
3005 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3006
3007 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20);
3008 jump = JUMP(SLJIT_NOT_ZERO);
3009 /* Two byte sequence. */
3010 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3011 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3012 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
3013 /* The upper 5 bits are known at this point. */
3014 compare = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x3);
3015 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
3016 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3017 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
3018 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
3019 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3020
3021 JUMPHERE(compare);
3022 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
3023 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3024
3025 /* We only have types for characters less than 256. */
3026 JUMPHERE(jump);
3027 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3028 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
3029 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3030 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3031 }
3032
3033 #endif /* COMPILE_PCRE8 */
3034
3035 #endif /* SUPPORT_UTF */
3036
3037 #ifdef SUPPORT_UCP
3038
3039 /* UCD_BLOCK_SIZE must be 128 (see the assert below). */
3040 #define UCD_BLOCK_MASK 127
3041 #define UCD_BLOCK_SHIFT 7
3042
3043 static void do_getucd(compiler_common *common)
3044 {
3045 /* Search the UCD record for the character comes in TMP1.
3046 Returns chartype in TMP1 and UCD offset in TMP2. */
3047 DEFINE_COMPILER;
3048
3049 SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 8);
3050
3051 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3052 OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
3053 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
3054 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
3055 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
3056 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
3057 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
3058 OP1(SLJIT_MOV_UH, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
3059 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
3060 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
3061 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3062 }
3063 #endif
3064
3065 static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common, BOOL hascrorlf, BOOL firstline)
3066 {
3067 DEFINE_COMPILER;
3068 struct sljit_label *mainloop;
3069 struct sljit_label *newlinelabel = NULL;
3070 struct sljit_jump *start;
3071 struct sljit_jump *end = NULL;
3072 struct sljit_jump *nl = NULL;
3073 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3074 struct sljit_jump *singlechar;
3075 #endif
3076 jump_list *newline = NULL;
3077 BOOL newlinecheck = FALSE;
3078 BOOL readuchar = FALSE;
3079
3080 if (!(hascrorlf || firstline) && (common->nltype == NLTYPE_ANY ||
3081 common->nltype == NLTYPE_ANYCRLF || common->newline > 255))
3082 newlinecheck = TRUE;
3083
3084 if (firstline)
3085 {
3086 /* Search for the end of the first line. */
3087 SLJIT_ASSERT(common->first_line_end != 0);
3088 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
3089
3090 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3091 {
3092 mainloop = LABEL();
3093 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3094 end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3095 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
3096 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3097 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);
3098 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);
3099 JUMPHERE(end);
3100 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_SP), common->first_line_end, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3101 }
3102 else
3103 {
3104 end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3105 mainloop = LABEL();
3106 /* Continual stores does not cause data dependency. */
3107 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->first_line_end, STR_PTR, 0);
3108 read_char_range(common, common->nlmin, common->nlmax, TRUE);
3109 check_newlinechar(common, common->nltype, &newline, TRUE);
3110 CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, mainloop);
3111 JUMPHERE(end);
3112 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->first_line_end, STR_PTR, 0);
3113 set_jumps(newline, LABEL());
3114 }
3115
3116 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
3117 }
3118
3119 start = JUMP(SLJIT_JUMP);
3120
3121 if (newlinecheck)
3122 {
3123 newlinelabel = LABEL();
3124 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3125 end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3126 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3127 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
3128 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
3129 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3130 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
3131 #endif
3132 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3133 nl = JUMP(SLJIT_JUMP);
3134 }
3135
3136 mainloop = LABEL();
3137
3138 /* Increasing the STR_PTR here requires one less jump in the most common case. */
3139 #ifdef SUPPORT_UTF
3140 if (common->utf) readuchar = TRUE;
3141 #endif
3142 if (newlinecheck) readuchar = TRUE;
3143
3144 if (readuchar)
3145 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3146
3147 if (newlinecheck)
3148 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);
3149
3150 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3151 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3152 #if defined COMPILE_PCRE8
3153 if (common->utf)
3154 {
3155 singlechar = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
3156 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3157 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3158 JUMPHERE(singlechar);
3159 }
3160 #elif defined COMPILE_PCRE16
3161 if (common->utf)
3162 {
3163 singlechar = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
3164 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
3165 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3166 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
3167 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3168 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3169 JUMPHERE(singlechar);
3170 }
3171 #endif /* COMPILE_PCRE[8|16] */
3172 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
3173 JUMPHERE(start);
3174
3175 if (newlinecheck)
3176 {
3177 JUMPHERE(end);
3178 JUMPHERE(nl);
3179 }
3180
3181 return mainloop;
3182 }
3183
3184 #define MAX_N_CHARS 16
3185 #define MAX_N_BYTES 8
3186
3187 static SLJIT_INLINE void add_prefix_byte(pcre_uint8 byte, pcre_uint8 *bytes)
3188 {
3189 pcre_uint8 len = bytes[0];
3190 int i;
3191
3192 if (len == 255)
3193 return;
3194
3195 if (len == 0)
3196 {
3197 bytes[0] = 1;
3198 bytes[1] = byte;
3199 return;
3200 }
3201
3202 for (i = len; i > 0; i--)
3203 if (bytes[i] == byte)
3204 return;
3205
3206 if (len >= MAX_N_BYTES - 1)
3207 {
3208 bytes[0] = 255;
3209 return;
3210 }
3211
3212 len++;
3213 bytes[len] = byte;
3214 bytes[0] = len;
3215 }
3216
3217 static int scan_prefix(compiler_common *common, pcre_uchar *cc, pcre_uint32 *chars, pcre_uint8 *bytes, int max_chars)
3218 {
3219 /* Recursive function, which scans prefix literals. */
3220 BOOL last, any, caseless;
3221 int len, repeat, len_save, consumed = 0;
3222 pcre_uint32 chr, mask;
3223 pcre_uchar *alternative, *cc_save, *oc;
3224 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3225 pcre_uchar othercase[8];
3226 #elif defined SUPPORT_UTF && defined COMPILE_PCRE16
3227 pcre_uchar othercase[2];
3228 #else
3229 pcre_uchar othercase[1];
3230 #endif
3231
3232 repeat = 1;
3233 while (TRUE)
3234 {
3235 last = TRUE;
3236 any = FALSE;
3237 caseless = FALSE;
3238 switch (*cc)
3239 {
3240 case OP_CHARI:
3241 caseless = TRUE;
3242 case OP_CHAR:
3243 last = FALSE;
3244 cc++;
3245 break;
3246
3247 case OP_SOD:
3248 case OP_SOM:
3249 case OP_SET_SOM:
3250 case OP_NOT_WORD_BOUNDARY:
3251 case OP_WORD_BOUNDARY:
3252 case OP_EODN:
3253 case OP_EOD:
3254 case OP_CIRC:
3255 case OP_CIRCM:
3256 case OP_DOLL:
3257 case OP_DOLLM:
3258 /* Zero width assertions. */
3259 cc++;
3260 continue;
3261
3262 case OP_ASSERT:
3263 case OP_ASSERT_NOT:
3264 case OP_ASSERTBACK:
3265 case OP_ASSERTBACK_NOT:
3266 cc = bracketend(cc);
3267 continue;
3268
3269 case OP_PLUSI:
3270 case OP_MINPLUSI:
3271 case OP_POSPLUSI:
3272 caseless = TRUE;
3273 case OP_PLUS:
3274 case OP_MINPLUS:
3275 case OP_POSPLUS:
3276 cc++;
3277 break;
3278
3279 case OP_EXACTI:
3280 caseless = TRUE;
3281 case OP_EXACT:
3282 repeat = GET2(cc, 1);
3283 last = FALSE;
3284 cc += 1 + IMM2_SIZE;
3285 break;
3286
3287 case OP_QUERYI:
3288 case OP_MINQUERYI:
3289 case OP_POSQUERYI:
3290 caseless = TRUE;
3291 case OP_QUERY:
3292 case OP_MINQUERY:
3293 case OP_POSQUERY:
3294 len = 1;
3295 cc++;
3296 #ifdef SUPPORT_UTF
3297 if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
3298 #endif
3299 max_chars = scan_prefix(common, cc + len, chars, bytes, max_chars);
3300 if (max_chars == 0)
3301 return consumed;
3302 last = FALSE;
3303 break;
3304
3305 case OP_KET:
3306 cc += 1 + LINK_SIZE;
3307 continue;
3308
3309 case OP_ALT:
3310 cc += GET(cc, 1);
3311 continue;
3312
3313 case OP_ONCE:
3314 case OP_ONCE_NC:
3315 case OP_BRA:
3316 case OP_BRAPOS:
3317 case OP_CBRA:
3318 case OP_CBRAPOS:
3319 alternative = cc + GET(cc, 1);
3320 while (*alternative == OP_ALT)
3321 {
3322 max_chars = scan_prefix(common, alternative + 1 + LINK_SIZE, chars, bytes, max_chars);
3323 if (max_chars == 0)
3324 return consumed;
3325 alternative += GET(alternative, 1);
3326 }
3327
3328 if (*cc == OP_CBRA || *cc == OP_CBRAPOS)
3329 cc += IMM2_SIZE;
3330 cc += 1 + LINK_SIZE;
3331 continue;
3332
3333 case OP_CLASS:
3334 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3335 if (common->utf && !is_char7_bitset((const pcre_uint8 *)(cc + 1), FALSE)) return consumed;
3336 #endif
3337 any = TRUE;
3338 cc += 1 + 32 / sizeof(pcre_uchar);
3339 break;
3340
3341 case OP_NCLASS:
3342 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3343 if (common->utf) return consumed;
3344 #endif
3345 any = TRUE;
3346 cc += 1 + 32 / sizeof(pcre_uchar);
3347 break;
3348
3349 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3350 case OP_XCLASS:
3351 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3352 if (common->utf) return consumed;
3353 #endif
3354 any = TRUE;
3355 cc += GET(cc, 1);
3356 break;
3357 #endif
3358
3359 case OP_DIGIT:
3360 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3361 if (common->utf && !is_char7_bitset((const pcre_uint8 *)common->ctypes - cbit_length + cbit_digit, FALSE))
3362 return consumed;
3363 #endif
3364 any = TRUE;
3365 cc++;
3366 break;
3367
3368 case OP_WHITESPACE:
3369 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3370 if (common->utf && !is_char7_bitset((const pcre_uint8 *)common->ctypes - cbit_length + cbit_space, FALSE))
3371 return consumed;
3372 #endif
3373 any = TRUE;
3374 cc++;
3375 break;
3376
3377 case OP_WORDCHAR:
3378 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3379 if (common->utf && !is_char7_bitset((const pcre_uint8 *)common->ctypes - cbit_length + cbit_word, FALSE))
3380 return consumed;
3381 #endif
3382 any = TRUE;
3383 cc++;
3384 break;
3385
3386 case OP_NOT:
3387 case OP_NOTI:
3388 cc++;
3389 /* Fall through. */
3390 case OP_NOT_DIGIT:
3391 case OP_NOT_WHITESPACE:
3392 case OP_NOT_WORDCHAR:
3393 case OP_ANY:
3394 case OP_ALLANY:
3395 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3396 if (common->utf) return consumed;
3397 #endif
3398 any = TRUE;
3399 cc++;
3400 break;
3401
3402 #ifdef SUPPORT_UCP
3403 case OP_NOTPROP:
3404 case OP_PROP:
3405 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3406 if (common->utf) return consumed;
3407 #endif
3408 any = TRUE;
3409 cc += 1 + 2;
3410 break;
3411 #endif
3412
3413 case OP_TYPEEXACT:
3414 repeat = GET2(cc, 1);
3415 cc += 1 + IMM2_SIZE;
3416 continue;
3417
3418 case OP_NOTEXACT:
3419 case OP_NOTEXACTI:
3420 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3421 if (common->utf) return consumed;
3422 #endif
3423 any = TRUE;
3424 repeat = GET2(cc, 1);
3425 cc += 1 + IMM2_SIZE + 1;
3426 break;
3427
3428 default:
3429 return consumed;
3430 }
3431
3432 if (any)
3433 {
3434 #if defined COMPILE_PCRE8
3435 mask = 0xff;
3436 #elif defined COMPILE_PCRE16
3437 mask = 0xffff;
3438 #elif defined COMPILE_PCRE32
3439 mask = 0xffffffff;
3440 #else
3441 SLJIT_ASSERT_STOP();
3442 #endif
3443
3444 do
3445 {
3446 chars[0] = mask;
3447 chars[1] = mask;
3448 bytes[0] = 255;
3449
3450 consumed++;
3451 if (--max_chars == 0)
3452 return consumed;
3453 chars += 2;
3454 bytes += MAX_N_BYTES;
3455 }
3456 while (--repeat > 0);
3457
3458 repeat = 1;
3459 continue;
3460 }
3461
3462 len = 1;
3463 #ifdef SUPPORT_UTF
3464 if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
3465 #endif
3466
3467 if (caseless && char_has_othercase(common, cc))
3468 {
3469 #ifdef SUPPORT_UTF
3470 if (common->utf)
3471 {
3472 GETCHAR(chr, cc);
3473 if ((int)PRIV(ord2utf)(char_othercase(common, chr), othercase) != len)
3474 return consumed;
3475 }
3476 else
3477 #endif
3478 {
3479 chr = *cc;
3480 othercase[0] = TABLE_GET(chr, common->fcc, chr);
3481 }
3482 }
3483 else
3484 caseless = FALSE;
3485
3486 len_save = len;
3487 cc_save = cc;
3488 while (TRUE)
3489 {
3490 oc = othercase;
3491 do
3492 {
3493 chr = *cc;
3494 #ifdef COMPILE_PCRE32
3495 if (SLJIT_UNLIKELY(chr == NOTACHAR))
3496 return consumed;
3497 #endif
3498 add_prefix_byte((pcre_uint8)chr, bytes);
3499
3500 mask = 0;
3501 if (caseless)
3502 {
3503 add_prefix_byte((pcre_uint8)*oc, bytes);
3504 mask = *cc ^ *oc;
3505 chr |= mask;
3506 }
3507
3508 #ifdef COMPILE_PCRE32
3509 if (chars[0] == NOTACHAR && chars[1] == 0)
3510 #else
3511 if (chars[0] == NOTACHAR)
3512 #endif
3513 {
3514 chars[0] = chr;
3515 chars[1] = mask;
3516 }
3517 else
3518 {
3519 mask |= chars[0] ^ chr;
3520 chr |= mask;
3521 chars[0] = chr;
3522 chars[1] |= mask;
3523 }
3524
3525 len--;
3526 consumed++;
3527 if (--max_chars == 0)
3528 return consumed;
3529 chars += 2;
3530 bytes += MAX_N_BYTES;
3531 cc++;
3532 oc++;
3533 }
3534 while (len > 0);
3535
3536 if (--repeat == 0)
3537 break;
3538
3539 len = len_save;
3540 cc = cc_save;
3541 }
3542
3543 repeat = 1;
3544 if (last)
3545 return consumed;
3546 }
3547 }
3548
3549 static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common, BOOL firstline)
3550 {
3551 DEFINE_COMPILER;
3552 struct sljit_label *start;
3553 struct sljit_jump *quit;
3554 pcre_uint32 chars[MAX_N_CHARS * 2];
3555 pcre_uint8 bytes[MAX_N_CHARS * MAX_N_BYTES];
3556 pcre_uint8 ones[MAX_N_CHARS];
3557 int offsets[3];
3558 pcre_uint32 mask;
3559 pcre_uint8 *byte_set, *byte_set_end;
3560 int i, max, from;
3561 int range_right = -1, range_len = 3 - 1;
3562 sljit_ub *update_table = NULL;
3563 BOOL in_range;
3564
3565 for (i = 0; i < MAX_N_CHARS; i++)
3566 {
3567 chars[i << 1] = NOTACHAR;
3568 chars[(i << 1) + 1] = 0;
3569 bytes[i * MAX_N_BYTES] = 0;
3570 }
3571
3572 max = scan_prefix(common, common->start, chars, bytes, MAX_N_CHARS);
3573
3574 if (max <= 1)
3575 return FALSE;
3576
3577 for (i = 0; i < max; i++)
3578 {
3579 mask = chars[(i << 1) + 1];
3580 ones[i] = ones_in_half_byte[mask & 0xf];
3581 mask >>= 4;
3582 while (mask != 0)
3583 {
3584 ones[i] += ones_in_half_byte[mask & 0xf];
3585 mask >>= 4;
3586 }
3587 }
3588
3589 in_range = FALSE;
3590 from = 0; /* Prevent compiler "uninitialized" warning */
3591 for (i = 0; i <= max; i++)
3592 {
3593 if (in_range && (i - from) > range_len && (bytes[(i - 1) * MAX_N_BYTES] <= 4))
3594 {
3595 range_len = i - from;
3596 range_right = i - 1;
3597 }
3598
3599 if (i < max && bytes[i * MAX_N_BYTES] < 255)
3600 {
3601 if (!in_range)
3602 {
3603 in_range = TRUE;
3604 from = i;
3605 }
3606 }
3607 else if (in_range)
3608 in_range = FALSE;
3609 }
3610
3611 if (range_right >= 0)
3612 {
3613 update_table = (sljit_ub *)allocate_read_only_data(common, 256);
3614 if (update_table == NULL)
3615 return TRUE;
3616 memset(update_table, IN_UCHARS(range_len), 256);
3617
3618 for (i = 0; i < range_len; i++)
3619 {
3620 byte_set = bytes + ((range_right - i) * MAX_N_BYTES);
3621 SLJIT_ASSERT(byte_set[0] > 0 && byte_set[0] < 255);
3622 byte_set_end = byte_set + byte_set[0];
3623 byte_set++;
3624 while (byte_set <= byte_set_end)
3625 {
3626 if (update_table[*byte_set] > IN_UCHARS(i))
3627 update_table[*byte_set] = IN_UCHARS(i);
3628 byte_set++;
3629 }
3630 }
3631 }
3632
3633 offsets[0] = -1;
3634 /* Scan forward. */
3635 for (i = 0; i < max; i++)
3636 if (ones[i] <= 2) {
3637 offsets[0] = i;
3638 break;
3639 }
3640
3641 if (offsets[0] < 0 && range_right < 0)
3642 return FALSE;
3643
3644 if (offsets[0] >= 0)
3645 {
3646 /* Scan backward. */
3647 offsets[1] = -1;
3648 for (i = max - 1; i > offsets[0]; i--)
3649 if (ones[i] <= 2 && i != range_right)
3650 {
3651 offsets[1] = i;
3652 break;
3653 }
3654
3655 /* This case is handled better by fast_forward_first_char. */
3656 if (offsets[1] == -1 && offsets[0] == 0 && range_right < 0)
3657 return FALSE;
3658
3659 offsets[2] = -1;
3660 /* We only search for a middle character if there is no range check. */
3661 if (offsets[1] >= 0 && range_right == -1)
3662 {
3663 /* Scan from middle. */
3664 for (i = (offsets[0] + offsets[1]) / 2 + 1; i < offsets[1]; i++)
3665 if (ones[i] <= 2)
3666 {
3667 offsets[2] = i;
3668 break;
3669 }
3670
3671 if (offsets[2] == -1)
3672 {
3673 for (i = (offsets[0] + offsets[1]) / 2; i > offsets[0]; i--)
3674 if (ones[i] <= 2)
3675 {
3676 offsets[2] = i;
3677 break;
3678 }
3679 }
3680 }
3681
3682 SLJIT_ASSERT(offsets[1] == -1 || (offsets[0] < offsets[1]));
3683 SLJIT_ASSERT(offsets[2] == -1 || (offsets[0] < offsets[2] && offsets[1] > offsets[2]));
3684
3685 chars[0] = chars[offsets[0] << 1];
3686 chars[1] = chars[(offsets[0] << 1) + 1];
3687 if (offsets[2] >= 0)
3688 {
3689 chars[2] = chars[offsets[2] << 1];
3690 chars[3] = chars[(offsets[2] << 1) + 1];
3691 }
3692 if (offsets[1] >= 0)
3693 {
3694 chars[4] = chars[offsets[1] << 1];
3695 chars[5] = chars[(offsets[1] << 1) + 1];
3696 }
3697 }
3698
3699 max -= 1;
3700 if (firstline)
3701 {
3702 SLJIT_ASSERT(common->first_line_end != 0);
3703 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end);
3704 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
3705 OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
3706 quit = CMP(SLJIT_LESS_EQUAL, STR_END, 0, TMP1, 0);
3707 OP1(SLJIT_MOV, STR_END, 0, TMP1, 0);
3708 JUMPHERE(quit);
3709 }
3710 else
3711 OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
3712
3713 #if !(defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
3714 if (range_right >= 0)
3715 OP1(SLJIT_MOV, RETURN_ADDR, 0, SLJIT_IMM, (sljit_sw)update_table);
3716 #endif
3717
3718 start = LABEL();
3719 quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3720
3721 SLJIT_ASSERT(range_right >= 0 || offsets[0] >= 0);
3722
3723 if (range_right >= 0)
3724 {
3725 #if defined COMPILE_PCRE8 || (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN)
3726 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right));
3727 #else
3728 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right + 1) - 1);
3729 #endif
3730
3731 #if !(defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
3732 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(RETURN_ADDR, TMP1), 0);
3733 #else
3734 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)update_table);
3735 #endif
3736 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3737 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, start);
3738 }
3739
3740 if (offsets[0] >= 0)
3741 {
3742 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[0]));
3743 if (offsets[1] >= 0)
3744 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[1]));
3745 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3746
3747 if (chars[1] != 0)
3748 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[1]);
3749 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[0], start);
3750 if (offsets[2] >= 0)
3751 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[2] - 1));
3752
3753 if (offsets[1] >= 0)
3754 {
3755 if (chars[5] != 0)
3756 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, chars[5]);
3757 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, chars[4], start);
3758 }
3759
3760 if (offsets[2] >= 0)
3761 {
3762 if (chars[3] != 0)
3763 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[3]);
3764 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[2], start);
3765 }
3766 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3767 }
3768
3769 JUMPHERE(quit);
3770
3771 if (firstline)
3772 {
3773 if (range_right >= 0)
3774 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end);
3775 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3776 if (range_right >= 0)
3777 {
3778 quit = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
3779 OP1(SLJIT_MOV, STR_PTR, 0, TMP1, 0);
3780 JUMPHERE(quit);
3781 }
3782 }
3783 else
3784 OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
3785 return TRUE;
3786 }
3787
3788 #undef MAX_N_CHARS
3789 #undef MAX_N_BYTES
3790
3791 static SLJIT_INLINE void fast_forward_first_char(compiler_common *common, pcre_uchar first_char, BOOL caseless, BOOL firstline)
3792 {
3793 DEFINE_COMPILER;
3794 struct sljit_label *start;
3795 struct sljit_jump *quit;
3796 struct sljit_jump *found;
3797 pcre_uchar oc, bit;
3798
3799 if (firstline)
3800 {
3801 SLJIT_ASSERT(common->first_line_end != 0);
3802 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
3803 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end);
3804 }
3805
3806 start = LABEL();
3807 quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3808 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3809
3810 oc = first_char;
3811 if (caseless)
3812 {
3813 oc = TABLE_GET(first_char, common->fcc, first_char);
3814 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
3815 if (first_char > 127 && common->utf)
3816 oc = UCD_OTHERCASE(first_char);
3817 #endif
3818 }
3819 if (first_char == oc)
3820 found = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, first_char);
3821 else
3822 {
3823 bit = first_char ^ oc;
3824 if (is_powerof2(bit))
3825 {
3826 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, bit);
3827 found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, first_char | bit);
3828 }
3829 else
3830 {
3831 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, first_char);
3832 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
3833 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);
3834 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
3835 found = JUMP(SLJIT_NOT_ZERO);
3836 }
3837 }
3838
3839 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3840 JUMPTO(SLJIT_JUMP, start);
3841 JUMPHERE(found);
3842 JUMPHERE(quit);
3843
3844 if (firstline)
3845 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3846 }
3847
3848 static SLJIT_INLINE void fast_forward_newline(compiler_common *common, BOOL firstline)
3849 {
3850 DEFINE_COMPILER;
3851 struct sljit_label *loop;
3852 struct sljit_jump *lastchar;
3853 struct sljit_jump *firstchar;
3854 struct sljit_jump *quit;
3855 struct sljit_jump *foundcr = NULL;
3856 struct sljit_jump *notfoundnl;
3857 jump_list *newline = NULL;
3858
3859 if (firstline)
3860 {
3861 SLJIT_ASSERT(common->first_line_end != 0);
3862 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
3863 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end);
3864 }
3865
3866 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3867 {
3868 lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3869 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3870 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
3871 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
3872 firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
3873
3874 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2));
3875 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0);
3876 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_GREATER_EQUAL);
3877 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3878 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
3879 #endif
3880 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3881
3882 loop = LABEL();
3883 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3884 quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3885 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
3886 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
3887 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop);
3888 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop);
3889
3890 JUMPHERE(quit);
3891 JUMPHERE(firstchar);
3892 JUMPHERE(lastchar);
3893
3894 if (firstline)
3895 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3896 return;
3897 }
3898
3899 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3900 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
3901 firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
3902 skip_char_back(common);
3903
3904 loop = LABEL();
3905 common->ff_newline_shortcut = loop;
3906
3907 read_char_range(common, common->nlmin, common->nlmax, TRUE);
3908 lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3909 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
3910 foundcr = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
3911 check_newlinechar(common, common->nltype, &newline, FALSE);
3912 set_jumps(newline, loop);
3913
3914 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
3915 {
3916 quit = JUMP(SLJIT_JUMP);
3917 JUMPHERE(foundcr);
3918 notfoundnl = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3919 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3920 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
3921 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
3922 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3923 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
3924 #endif
3925 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3926 JUMPHERE(notfoundnl);
3927 JUMPHERE(quit);
3928 }
3929 JUMPHERE(lastchar);
3930 JUMPHERE(firstchar);
3931
3932 if (firstline)
3933 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3934 }
3935
3936 static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks);
3937
3938 static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, pcre_uint8 *start_bits, BOOL firstline)
3939 {
3940 DEFINE_COMPILER;
3941 struct sljit_label *start;
3942 struct sljit_jump *quit;
3943 struct sljit_jump *found = NULL;
3944 jump_list *matches = NULL;
3945 #ifndef COMPILE_PCRE8
3946 struct sljit_jump *jump;
3947 #endif
3948
3949 if (firstline)
3950 {
3951 SLJIT_ASSERT(common->first_line_end != 0);
3952 OP1(SLJIT_MOV, RETURN_ADDR, 0, STR_END, 0);
3953 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end);
3954 }
3955
3956 start = LABEL();
3957 quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3958 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3959 #ifdef SUPPORT_UTF
3960 if (common->utf)
3961 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
3962 #endif
3963
3964 if (!check_class_ranges(common, start_bits, (start_bits[31] & 0x80) != 0, TRUE, &matches))
3965 {
3966 #ifndef COMPILE_PCRE8
3967 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 255);
3968 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 255);
3969 JUMPHERE(jump);
3970 #endif
3971 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
3972 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
3973 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)start_bits);
3974 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
3975 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
3976 found = JUMP(SLJIT_NOT_ZERO);
3977 }
3978
3979 #ifdef SUPPORT_UTF
3980 if (common->utf)
3981 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
3982 #endif
3983 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3984 #ifdef SUPPORT_UTF
3985 #if defined COMPILE_PCRE8
3986 if (common->utf)
3987 {
3988 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start);
3989 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3990 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3991 }
3992 #elif defined COMPILE_PCRE16
3993 if (common->utf)
3994 {
3995 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800, start);
3996 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
3997 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3998 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
3999 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4000 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4001 }
4002 #endif /* COMPILE_PCRE[8|16] */
4003 #endif /* SUPPORT_UTF */
4004 JUMPTO(SLJIT_JUMP, start);
4005 if (found != NULL)
4006 JUMPHERE(found);
4007 if (matches != NULL)
4008 set_jumps(matches, LABEL());
4009 JUMPHERE(quit);
4010
4011 if (firstline)
4012 OP1(SLJIT_MOV, STR_END, 0, RETURN_ADDR, 0);
4013 }
4014
4015 static SLJIT_INLINE struct sljit_jump *search_requested_char(compiler_common *common, pcre_uchar req_char, BOOL caseless, BOOL has_firstchar)
4016 {
4017 DEFINE_COMPILER;
4018 struct sljit_label *loop;
4019 struct sljit_jump *toolong;
4020 struct sljit_jump *alreadyfound;
4021 struct sljit_jump *found;
4022 struct sljit_jump *foundoc = NULL;
4023 struct sljit_jump *notfound;
4024 pcre_uint32 oc, bit;
4025
4026 SLJIT_ASSERT(common->req_char_ptr != 0);
4027 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr);
4028 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, REQ_BYTE_MAX);
4029 toolong = CMP(SLJIT_LESS, TMP1, 0, STR_END, 0);
4030 alreadyfound = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0);
4031
4032 if (has_firstchar)
4033 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4034 else
4035 OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);
4036
4037 loop = LABEL();
4038 notfound = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0);
4039
4040 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0);
4041 oc = req_char;
4042 if (caseless)
4043 {
4044 oc = TABLE_GET(req_char, common->fcc, req_char);
4045 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
4046 if (req_char > 127 && common->utf)
4047 oc = UCD_OTHERCASE(req_char);
4048 #endif
4049 }
4050 if (req_char == oc)
4051 found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
4052 else
4053 {
4054 bit = req_char ^ oc;
4055 if (is_powerof2(bit))
4056 {
4057 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);
4058 found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit);
4059 }
4060 else
4061 {
4062 found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
4063 foundoc = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, oc);
4064 }
4065 }
4066 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
4067 JUMPTO(SLJIT_JUMP, loop);
4068
4069 JUMPHERE(found);
4070 if (foundoc)
4071 JUMPHERE(foundoc);
4072 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr, TMP1, 0);
4073 JUMPHERE(alreadyfound);
4074 JUMPHERE(toolong);
4075 return notfound;
4076 }
4077
4078 static void do_revertframes(compiler_common *common)
4079 {
4080 DEFINE_COMPILER;
4081 struct sljit_jump *jump;
4082 struct sljit_label *mainloop;
4083
4084 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4085 OP1(SLJIT_MOV, TMP1, 0, STACK_TOP, 0);
4086 GET_LOCAL_BASE(TMP3, 0, 0);
4087
4088 /* Drop frames until we reach STACK_TOP. */
4089 mainloop = LABEL();
4090 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), 0);
4091 OP2(SLJIT_SUB | SLJIT_SET_S, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0);
4092 jump = JUMP(SLJIT_SIG_LESS_EQUAL);
4093
4094 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
4095 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw));
4096 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), SLJIT_MEM1(TMP1), 2 * sizeof(sljit_sw));
4097 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
4098 JUMPTO(SLJIT_JUMP, mainloop);
4099
4100 JUMPHERE(jump);
4101 jump = JUMP(SLJIT_SIG_LESS);
4102 /* End of dropping frames. */
4103 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4104
4105 JUMPHERE(jump);
4106 OP1(SLJIT_NEG, TMP2, 0, TMP2, 0);
4107 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
4108 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw));
4109 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_sw));
4110 JUMPTO(SLJIT_JUMP, mainloop);
4111 }
4112
4113 static void check_wordboundary(compiler_common *common)
4114 {
4115 DEFINE_COMPILER;
4116 struct sljit_jump *skipread;
4117 jump_list *skipread_list = NULL;
4118 #if !(defined COMPILE_PCRE8) || defined SUPPORT_UTF
4119 struct sljit_jump *jump;
4120 #endif
4121
4122 SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16);
4123
4124 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
4125 /* Get type of the previous char, and put it to LOCALS1. */
4126 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4127 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4128 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, SLJIT_IMM, 0);
4129 skipread = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
4130 skip_char_back(common);
4131 check_start_used_ptr(common);
4132 read_char(common);
4133
4134 /* Testing char type. */
4135 #ifdef SUPPORT_UCP
4136 if (common->use_ucp)
4137 {
4138 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
4139 jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
4140 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4141 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
4142 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
4143 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
4144 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
4145 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
4146 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
4147 JUMPHERE(jump);
4148 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP2, 0);
4149 }
4150 else
4151 #endif
4152 {
4153 #ifndef COMPILE_PCRE8
4154 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4155 #elif defined SUPPORT_UTF
4156 /* Here LOCALS1 has already been zeroed. */
4157 jump = NULL;
4158 if (common->utf)
4159 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4160 #endif /* COMPILE_PCRE8 */
4161 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);
4162 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */);
4163 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4164 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP1, 0);
4165 #ifndef COMPILE_PCRE8
4166 JUMPHERE(jump);
4167 #elif defined SUPPORT_UTF
4168 if (jump != NULL)
4169 JUMPHERE(jump);
4170 #endif /* COMPILE_PCRE8 */
4171 }
4172 JUMPHERE(skipread);
4173
4174 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
4175 check_str_end(common, &skipread_list);
4176 peek_char(common, READ_CHAR_MAX);
4177
4178 /* Testing char type. This is a code duplication. */
4179 #ifdef SUPPORT_UCP
4180 if (common->use_ucp)
4181 {
4182 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
4183 jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
4184 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4185 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
4186 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
4187 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
4188 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
4189 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
4190 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
4191 JUMPHERE(jump);
4192 }
4193 else
4194 #endif
4195 {
4196 #ifndef COMPILE_PCRE8
4197 /* TMP2 may be destroyed by peek_char. */
4198 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
4199 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4200 #elif defined SUPPORT_UTF
4201 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
4202 jump = NULL;
4203 if (common->utf)
4204 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4205 #endif
4206 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes);
4207 OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */);
4208 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
4209 #ifndef COMPILE_PCRE8
4210 JUMPHERE(jump);
4211 #elif defined SUPPORT_UTF
4212 if (jump != NULL)
4213 JUMPHERE(jump);
4214 #endif /* COMPILE_PCRE8 */
4215 }
4216 set_jumps(skipread_list, LABEL());
4217
4218 OP2(SLJIT_XOR | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
4219 sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
4220 }
4221
4222 static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
4223 {
4224 DEFINE_COMPILER;
4225 int ranges[MAX_RANGE_SIZE];
4226 pcre_uint8 bit, cbit, all;
4227 int i, byte, length = 0;
4228
4229 bit = bits[0] & 0x1;
4230 /* All bits will be zero or one (since bit is zero or one). */
4231 all = -bit;
4232
4233 for (i = 0; i < 256; )
4234 {
4235 byte = i >> 3;
4236 if ((i & 0x7) == 0 && bits[byte] == all)
4237 i += 8;
4238 else
4239 {
4240 cbit = (bits[byte] >> (i & 0x7)) & 0x1;
4241 if (cbit != bit)
4242 {
4243 if (length >= MAX_RANGE_SIZE)
4244 return FALSE;
4245 ranges[length] = i;
4246 length++;
4247 bit = cbit;
4248 all = -cbit;
4249 }
4250 i++;
4251 }
4252 }
4253
4254 if (((bit == 0) && nclass) || ((bit == 1) && !nclass))
4255 {
4256 if (length >= MAX_RANGE_SIZE)
4257 return FALSE;
4258 ranges[length] = 256;
4259 length++;
4260 }
4261
4262 if (length < 0 || length > 4)
4263 return FALSE;
4264
4265 bit = bits[0] & 0x1;
4266 if (invert) bit ^= 0x1;
4267
4268 /* No character is accepted. */
4269 if (length == 0 && bit == 0)
4270 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4271
4272 switch(length)
4273 {
4274 case 0:
4275 /* When bit != 0, all characters are accepted. */
4276 return TRUE;
4277
4278 case 1:
4279 add_jump(compiler, backtracks, CMP(bit == 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
4280 return TRUE;
4281
4282 case 2:
4283 if (ranges[0] + 1 != ranges[1])
4284 {
4285 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
4286 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
4287 }
4288 else
4289 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
4290 return TRUE;
4291
4292 case 3:
4293 if (bit != 0)
4294 {
4295 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
4296 if (ranges[0] + 1 != ranges[1])
4297 {
4298 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
4299 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
4300 }
4301 else
4302 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
4303 return TRUE;
4304 }
4305
4306 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[0]));
4307 if (ranges[1] + 1 != ranges[2])
4308 {
4309 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1]);
4310 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
4311 }
4312 else
4313 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1]));
4314 return TRUE;
4315
4316 case 4:
4317 if ((ranges[1] - ranges[0]) == (ranges[3] - ranges[2])
4318 && (ranges[0] | (ranges[2] - ranges[0])) == ranges[2]
4319 && is_powerof2(ranges[2] - ranges[0]))
4320 {
4321 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[0]);
4322 if (ranges[2] + 1 != ranges[3])
4323 {
4324 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);
4325 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
4326 }
4327 else
4328 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
4329 return TRUE;
4330 }
4331
4332 if (bit != 0)
4333 {
4334 i = 0;
4335 if (ranges[0] + 1 != ranges[1])
4336 {
4337 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
4338 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
4339 i = ranges[0];
4340 }
4341 else
4342 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
4343
4344 if (ranges[2] + 1 != ranges[3])
4345 {
4346 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - i);
4347 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
4348 }
4349 else
4350 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2] - i));
4351 return TRUE;
4352 }
4353
4354 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
4355 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[0]));
4356 if (ranges[1] + 1 != ranges[2])
4357 {
4358 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]);
4359 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
4360 }
4361 else
4362 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
4363 return TRUE;
4364
4365 default:
4366 SLJIT_ASSERT_STOP();
4367 return FALSE;
4368 }
4369 }
4370
4371 static void check_anynewline(compiler_common *common)
4372 {
4373 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
4374 DEFINE_COMPILER;
4375
4376 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4377
4378 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
4379 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
4380 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
4381 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
4382 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4383 #ifdef COMPILE_PCRE8
4384 if (common->utf)
4385 {
4386 #endif
4387 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4388 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
4389 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
4390 #ifdef COMPILE_PCRE8
4391 }
4392 #endif
4393 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
4394 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4395 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4396 }
4397
4398 static void check_hspace(compiler_common *common)
4399 {
4400 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
4401 DEFINE_COMPILER;
4402
4403 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4404
4405 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x09);
4406 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
4407 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
4408 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4409 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xa0);
4410 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4411 #ifdef COMPILE_PCRE8
4412 if (common->utf)
4413 {
4414 #endif
4415 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4416 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x1680);
4417 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4418 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e);
4419 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4420 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000);
4421 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000);
4422 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
4423 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000);
4424 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4425 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000);
4426 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4427 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000);
4428 #ifdef COMPILE_PCRE8
4429 }
4430 #endif
4431 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
4432 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4433
4434 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4435 }
4436
4437 static void check_vspace(compiler_common *common)
4438 {
4439 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
4440 DEFINE_COMPILER;
4441
4442 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4443
4444 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
4445 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
4446 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
4447 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
4448 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4449 #ifdef COMPILE_PCRE8
4450 if (common->utf)
4451 {
4452 #endif
4453 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4454 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
4455 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
4456 #ifdef COMPILE_PCRE8
4457 }
4458 #endif
4459 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
4460 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4461
4462 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4463 }
4464
4465 #define CHAR1 STR_END
4466 #define CHAR2 STACK_TOP
4467
4468 static void do_casefulcmp(compiler_common *common)
4469 {
4470 DEFINE_COMPILER;
4471 struct sljit_jump *jump;
4472 struct sljit_label *label;
4473
4474 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4475 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4476 OP1(SLJIT_MOV, TMP3, 0, CHAR1, 0);
4477 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, CHAR2, 0);
4478 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
4479 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4480
4481 label = LABEL();
4482 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
4483 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4484 jump = CMP(SLJIT_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
4485 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
4486 JUMPTO(SLJIT_NOT_ZERO, label);
4487
4488 JUMPHERE(jump);
4489 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4490 OP1(SLJIT_MOV, CHAR1, 0, TMP3, 0);
4491 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
4492 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4493 }
4494
4495 #define LCC_TABLE STACK_LIMIT
4496
4497 static void do_caselesscmp(compiler_common *common)
4498 {
4499 DEFINE_COMPILER;
4500 struct sljit_jump *jump;
4501 struct sljit_label *label;
4502
4503 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4504 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4505
4506 OP1(SLJIT_MOV, TMP3, 0, LCC_TABLE, 0);
4507 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, CHAR1, 0);
4508 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, CHAR2, 0);
4509 OP1(SLJIT_MOV, LCC_TABLE, 0, SLJIT_IMM, common->lcc);
4510 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
4511 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4512
4513 label = LABEL();
4514 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
4515 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4516 #ifndef COMPILE_PCRE8
4517 jump = CMP(SLJIT_GREATER, CHAR1, 0, SLJIT_IMM, 255);
4518 #endif
4519 OP1(SLJIT_MOV_UB, CHAR1, 0, SLJIT_MEM2(LCC_TABLE, CHAR1), 0);
4520 #ifndef COMPILE_PCRE8
4521 JUMPHERE(jump);
4522 jump = CMP(SLJIT_GREATER, CHAR2, 0, SLJIT_IMM, 255);
4523 #endif
4524 OP1(SLJIT_MOV_UB, CHAR2, 0, SLJIT_MEM2(LCC_TABLE, CHAR2), 0);
4525 #ifndef COMPILE_PCRE8
4526 JUMPHERE(jump);
4527 #endif
4528 jump = CMP(SLJIT_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
4529 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
4530 JUMPTO(SLJIT_NOT_ZERO, label);
4531
4532 JUMPHERE(jump);
4533 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4534 OP1(SLJIT_MOV, LCC_TABLE, 0, TMP3, 0);
4535 OP1(SLJIT_MOV, CHAR1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
4536 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
4537 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4538 }
4539
4540 #undef LCC_TABLE
4541 #undef CHAR1
4542 #undef CHAR2
4543
4544 #if defined SUPPORT_UTF && defined SUPPORT_UCP
4545
4546 static const pcre_uchar * SLJIT_CALL do_utf_caselesscmp(pcre_uchar *src1, jit_arguments *args, pcre_uchar *end1)
4547 {
4548 /* This function would be ineffective to do in JIT level. */
4549 pcre_uint32 c1, c2;
4550 const pcre_uchar *src2 = args->uchar_ptr;
4551 const pcre_uchar *end2 = args->end;
4552 const ucd_record *ur;
4553 const pcre_uint32 *pp;
4554
4555 while (src1 < end1)
4556 {
4557 if (src2 >= end2)
4558 return (pcre_uchar*)1;
4559 GETCHARINC(c1, src1);
4560 GETCHARINC(c2, src2);
4561 ur = GET_UCD(c2);
4562 if (c1 != c2 && c1 != c2 + ur->other_case)
4563 {
4564 pp = PRIV(ucd_caseless_sets) + ur->caseset;
4565 for (;;)
4566 {
4567 if (c1 < *pp) return NULL;
4568 if (c1 == *pp++) break;
4569 }
4570 }
4571 }
4572 return src2;
4573 }
4574
4575 #endif /* SUPPORT_UTF && SUPPORT_UCP */
4576
4577 static pcre_uchar *byte_sequence_compare(compiler_common *common, BOOL caseless, pcre_uchar *cc,
4578 compare_context *context, jump_list **backtracks)
4579 {
4580 DEFINE_COMPILER;
4581 unsigned int othercasebit = 0;
4582 pcre_uchar *othercasechar = NULL;
4583 #ifdef SUPPORT_UTF
4584 int utflength;
4585 #endif
4586
4587 if (caseless && char_has_othercase(common, cc))
4588 {
4589 othercasebit = char_get_othercase_bit(common, cc);
4590 SLJIT_ASSERT(othercasebit);
4591 /* Extracting bit difference info. */
4592 #if defined COMPILE_PCRE8
4593 othercasechar = cc + (othercasebit >> 8);
4594 othercasebit &= 0xff;
4595 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4596 /* Note that this code only handles characters in the BMP. If there
4597 ever are characters outside the BMP whose othercase differs in only one
4598 bit from itself (there currently are none), this code will need to be
4599 revised for COMPILE_PCRE32. */
4600 othercasechar = cc + (othercasebit >> 9);
4601 if ((othercasebit & 0x100) != 0)
4602 othercasebit = (othercasebit & 0xff) << 8;
4603 else
4604 othercasebit &= 0xff;
4605 #endif /* COMPILE_PCRE[8|16|32] */
4606 }
4607
4608 if (context->sourcereg == -1)
4609 {
4610 #if defined COMPILE_PCRE8
4611 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
4612 if (context->length >= 4)
4613 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4614 else if (context->length >= 2)
4615 OP1(SLJIT_MOV_UH, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4616 else
4617 #endif
4618 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4619 #elif defined COMPILE_PCRE16
4620 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
4621 if (context->length >= 4)
4622 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4623 else
4624 #endif
4625 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4626 #elif defined COMPILE_PCRE32
4627 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4628 #endif /* COMPILE_PCRE[8|16|32] */
4629 context->sourcereg = TMP2;
4630 }
4631
4632 #ifdef SUPPORT_UTF
4633 utflength = 1;
4634 if (common->utf && HAS_EXTRALEN(*cc))
4635 utflength += GET_EXTRALEN(*cc);
4636
4637 do
4638 {
4639 #endif
4640
4641 context->length -= IN_UCHARS(1);
4642 #if (defined SLJIT_UNALIGNED && SLJIT_UNALIGNED) && (defined COMPILE_PCRE8 || defined COMPILE_PCRE16)
4643
4644 /* Unaligned read is supported. */
4645 if (othercasebit != 0 && othercasechar == cc)
4646 {
4647 context->c.asuchars[context->ucharptr] = *cc | othercasebit;
4648 context->oc.asuchars[context->ucharptr] = othercasebit;
4649 }
4650 else
4651 {
4652 context->c.asuchars[context->ucharptr] = *cc;
4653 context->oc.asuchars[context->ucharptr] = 0;
4654 }
4655 context->ucharptr++;
4656
4657 #if defined COMPILE_PCRE8
4658 if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1))
4659 #else
4660 if (context->ucharptr >= 2 || context->length == 0)
4661 #endif
4662 {
4663 if (context->length >= 4)
4664 OP1(SLJIT_MOV_SI, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
4665 else if (context->length >= 2)
4666 OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
4667 #if defined COMPILE_PCRE8
4668 else if (context->length >= 1)
4669 OP1(SLJIT_MOV_UB, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
4670 #endif /* COMPILE_PCRE8 */
4671 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
4672
4673 switch(context->ucharptr)
4674 {
4675 case 4 / sizeof(pcre_uchar):
4676 if (context->oc.asint != 0)
4677 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint);
4678 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint));
4679 break;
4680
4681 case 2 / sizeof(pcre_uchar):
4682 if (context->oc.asushort != 0)
4683 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort);
4684 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort));
4685 break;
4686
4687 #ifdef COMPILE_PCRE8
4688 case 1:
4689 if (context->oc.asbyte != 0)
4690 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte);
4691 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte));
4692 break;
4693 #endif
4694
4695 default:
4696 SLJIT_ASSERT_STOP();
4697 break;
4698 }
4699 context->ucharptr = 0;
4700 }
4701
4702 #else
4703
4704 /* Unaligned read is unsupported or in 32 bit mode. */
4705 if (context->length >= 1)
4706 OP1(MOV_UCHAR, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
4707
4708 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
4709
4710 if (othercasebit != 0 && othercasechar == cc)
4711 {
4712 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit);
4713 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit));
4714 }
4715 else
4716 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc));
4717
4718 #endif
4719
4720 cc++;
4721 #ifdef SUPPORT_UTF
4722 utflength--;
4723 }
4724 while (utflength > 0);
4725 #endif
4726
4727 return cc;
4728 }
4729
4730 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
4731
4732 #define SET_TYPE_OFFSET(value) \
4733 if ((value) != typeoffset) \
4734 { \
4735 if ((value) < typeoffset) \
4736 OP2(SLJIT_ADD, typereg, 0, typereg, 0, SLJIT_IMM, typeoffset - (value)); \
4737 else \
4738 OP2(SLJIT_SUB, typereg, 0, typereg, 0, SLJIT_IMM, (value) - typeoffset); \
4739 } \
4740 typeoffset = (value);
4741
4742 #define SET_CHAR_OFFSET(value) \
4743 if ((value) != charoffset) \
4744 { \
4745 if ((value) < charoffset) \
4746 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(charoffset - (value))); \
4747 else \
4748 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)((value) - charoffset)); \
4749 } \
4750 charoffset = (value);
4751
4752 static void compile_xclass_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
4753 {
4754 DEFINE_COMPILER;
4755 jump_list *found = NULL;
4756 jump_list **list = (cc[0] & XCL_NOT) == 0 ? &found : backtracks;
4757 sljit_uw c, charoffset, max = 256, min = READ_CHAR_MAX;
4758 struct sljit_jump *jump = NULL;
4759 pcre_uchar *ccbegin;
4760 int compares, invertcmp, numberofcmps;
4761 #if defined SUPPORT_UTF && (defined COMPILE_PCRE8 || defined COMPILE_PCRE16)
4762 BOOL utf = common->utf;
4763 #endif
4764
4765 #ifdef SUPPORT_UCP
4766 BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE;
4767 BOOL charsaved = FALSE;
4768 int typereg = TMP1, scriptreg = TMP1;
4769 const pcre_uint32 *other_cases;
4770 sljit_uw typeoffset;
4771 #endif
4772
4773 /* Scanning the necessary info. */
4774 cc++;
4775 ccbegin = cc;
4776 compares = 0;
4777 if (cc[-1] & XCL_MAP)
4778 {
4779 min = 0;
4780 cc += 32 / sizeof(pcre_uchar);
4781 }
4782
4783 while (*cc != XCL_END)
4784 {
4785 compares++;
4786 if (*cc == XCL_SINGLE)
4787 {
4788 cc ++;
4789 GETCHARINCTEST(c, cc);
4790 if (c > max) max = c;
4791 if (c < min) min = c;
4792 #ifdef SUPPORT_UCP
4793 needschar = TRUE;
4794 #endif
4795 }
4796 else if (*cc == XCL_RANGE)
4797 {
4798 cc ++;
4799 GETCHARINCTEST(c, cc);
4800 if (c < min) min = c;
4801 GETCHARINCTEST(c, cc);
4802 if (c > max) max = c;
4803 #ifdef SUPPORT_UCP
4804 needschar = TRUE;
4805 #endif
4806 }
4807 #ifdef SUPPORT_UCP
4808 else
4809 {
4810 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
4811 cc++;
4812 if (*cc == PT_CLIST)
4813 {
4814 other_cases = PRIV(ucd_caseless_sets) + cc[1];
4815 while (*other_cases != NOTACHAR)
4816 {
4817 if (*other_cases > max) max = *other_cases;
4818 if (*other_cases < min) min = *other_cases;
4819 other_cases++;
4820 }
4821 }
4822 else
4823 {
4824 max = READ_CHAR_MAX;
4825 min = 0;
4826 }
4827
4828 switch(*cc)
4829 {
4830 case PT_ANY:
4831 break;
4832
4833 case PT_LAMP:
4834 case PT_GC:
4835 case PT_PC:
4836 case PT_ALNUM:
4837 needstype = TRUE;
4838 break;
4839
4840 case PT_SC:
4841 needsscript = TRUE;
4842 break;
4843
4844 case PT_SPACE:
4845 case PT_PXSPACE:
4846 case PT_WORD:
4847 case PT_PXGRAPH:
4848 case PT_PXPRINT:
4849 case PT_PXPUNCT:
4850 needstype = TRUE;
4851 needschar = TRUE;
4852 break;
4853
4854 case PT_CLIST:
4855 case PT_UCNC:
4856 needschar = TRUE;
4857 break;
4858
4859 default:
4860 SLJIT_ASSERT_STOP();
4861 break;
4862 }
4863 cc += 2;
4864 }
4865 #endif
4866 }
4867
4868 /* We are not necessary in utf mode even in 8 bit mode. */
4869 cc = ccbegin;
4870 detect_partial_match(common, backtracks);
4871 read_char_range(common, min, max, (cc[-1] & XCL_NOT) != 0);
4872
4873 if ((cc[-1] & XCL_HASPROP) == 0)
4874 {
4875 if ((cc[-1] & XCL_MAP) != 0)
4876 {
4877 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4878 if (!check_class_ranges(common, (const pcre_uint8 *)cc, (((const pcre_uint8 *)cc)[31] & 0x80) != 0, TRUE, &found))
4879 {
4880 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
4881 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
4882 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
4883 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4884 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4885 add_jump(compiler, &found, JUMP(SLJIT_NOT_ZERO));
4886 }
4887
4888 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4889 JUMPHERE(jump);
4890
4891 cc += 32 / sizeof(pcre_uchar);
4892 }
4893 else
4894 {
4895 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, min);
4896 add_jump(compiler, (cc[-1] & XCL_NOT) == 0 ? backtracks : &found, CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, max - min));
4897 }
4898 }
4899 else if ((cc[-1] & XCL_MAP) != 0)
4900 {
4901 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
4902 #ifdef SUPPORT_UCP
4903 charsaved = TRUE;
4904 #endif
4905 if (!check_class_ranges(common, (const pcre_uint8 *)cc, FALSE, TRUE, list))
4906 {
4907 #ifdef COMPILE_PCRE8
4908 SLJIT_ASSERT(common->utf);
4909 #endif
4910 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4911
4912 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
4913 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
4914 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
4915 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4916 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4917 add_jump(compiler, list, JUMP(SLJIT_NOT_ZERO));
4918
4919 JUMPHERE(jump);
4920 }
4921
4922 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
4923 cc += 32 / sizeof(pcre_uchar);
4924 }
4925
4926 #ifdef SUPPORT_UCP
4927 /* Simple register allocation. TMP1 is preferred if possible. */
4928 if (needstype || needsscript)
4929 {
4930 if (needschar && !charsaved)
4931 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
4932 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4933 if (needschar)
4934 {
4935 if (needstype)
4936 {
4937 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
4938 typereg = RETURN_ADDR;
4939 }
4940
4941 if (needsscript)
4942 scriptreg = TMP3;
4943 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
4944 }
4945 else if (needstype && needsscript)
4946 scriptreg = TMP3;
4947 /* In all other cases only one of them was specified, and that can goes to TMP1. */
4948
4949 if (needsscript)
4950 {
4951 if (scriptreg == TMP1)
4952 {
4953 OP1(SLJIT_MOV, scriptreg, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
4954 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM2(scriptreg, TMP2), 3);
4955 }
4956 else
4957 {
4958 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
4959 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
4960 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM1(TMP2), 0);
4961 }
4962 }
4963 }
4964 #endif
4965
4966 /* Generating code. */
4967 charoffset = 0;
4968 numberofcmps = 0;
4969 #ifdef SUPPORT_UCP
4970 typeoffset = 0;
4971 #endif
4972
4973 while (*cc != XCL_END)
4974 {
4975 compares--;
4976 invertcmp = (compares == 0 && list != backtracks);
4977 jump = NULL;
4978
4979 if (*cc == XCL_SINGLE)
4980 {
4981 cc ++;
4982 GETCHARINCTEST(c, cc);
4983
4984 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
4985 {
4986 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
4987 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_EQUAL);
4988 numberofcmps++;
4989 }
4990 else if (numberofcmps > 0)
4991 {
4992 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
4993 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4994 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
4995 numberofcmps = 0;
4996 }
4997 else
4998 {
4999 jump = CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5000 numberofcmps = 0;
5001 }
5002 }
5003 else if (*cc == XCL_RANGE)
5004 {
5005 cc ++;
5006 GETCHARINCTEST(c, cc);
5007 SET_CHAR_OFFSET(c);
5008 GETCHARINCTEST(c, cc);
5009
5010 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
5011 {
5012 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5013 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_LESS_EQUAL);
5014 numberofcmps++;
5015 }
5016 else if (numberofcmps > 0)
5017 {
5018 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5019 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
5020 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5021 numberofcmps = 0;
5022 }
5023 else
5024 {
5025 jump = CMP(SLJIT_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5026 numberofcmps = 0;
5027 }
5028 }
5029 #ifdef SUPPORT_UCP
5030 else
5031 {
5032 if (*cc == XCL_NOTPROP)
5033 invertcmp ^= 0x1;
5034 cc++;
5035 switch(*cc)
5036 {
5037 case PT_ANY:
5038 if (list != backtracks)
5039 {
5040 if ((cc[-1] == XCL_NOTPROP && compares > 0) || (cc[-1] == XCL_PROP && compares == 0))
5041 continue;
5042 }
5043 else if (cc[-1] == XCL_NOTPROP)
5044 continue;
5045 jump = JUMP(SLJIT_JUMP);
5046 break;
5047
5048 case PT_LAMP:
5049 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - typeoffset);
5050 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5051 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ll - typeoffset);
5052 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5053 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lt - typeoffset);
5054 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5055 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5056 break;
5057
5058 case PT_GC:
5059 c = PRIV(ucp_typerange)[(int)cc[1] * 2];
5060 SET_TYPE_OFFSET(c);
5061 jump = CMP(SLJIT_LESS_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, PRIV(ucp_typerange)[(int)cc[1] * 2 + 1] - c);
5062 break;
5063
5064 case PT_PC:
5065 jump = CMP(SLJIT_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, (int)cc[1] - typeoffset);
5066 break;
5067
5068 case PT_SC:
5069 jump = CMP(SLJIT_EQUAL ^ invertcmp, scriptreg, 0, SLJIT_IMM, (int)cc[1]);
5070 break;
5071
5072 case PT_SPACE:
5073 case PT_PXSPACE:
5074 SET_CHAR_OFFSET(9);
5075 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd - 0x9);
5076 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
5077
5078 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x9);
5079 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5080
5081 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x9);
5082 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5083
5084 SET_TYPE_OFFSET(ucp_Zl);
5085 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl);
5086 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
5087 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5088 break;
5089
5090 case PT_WORD:
5091 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_UNDERSCORE - charoffset));
5092 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5093 /* Fall through. */
5094
5095 case PT_ALNUM:
5096 SET_TYPE_OFFSET(ucp_Ll);
5097 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
5098 OP_FLAGS((*cc == PT_ALNUM) ? SLJIT_MOV : SLJIT_OR, TMP2, 0, (*cc == PT_ALNUM) ? SLJIT_UNUSED : TMP2, 0, SLJIT_LESS_EQUAL);
5099 SET_TYPE_OFFSET(ucp_Nd);
5100 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_No - ucp_Nd);
5101 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
5102 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5103 break;
5104
5105 case PT_CLIST:
5106 other_cases = PRIV(ucd_caseless_sets) + cc[1];
5107
5108 /* At least three characters are required.
5109 Otherwise this case would be handled by the normal code path. */
5110 SLJIT_ASSERT(other_cases[0] != NOTACHAR && other_cases[1] != NOTACHAR && other_cases[2] != NOTACHAR);
5111 SLJIT_ASSERT(other_cases[0] < other_cases[1] && other_cases[1] < other_cases[2]);
5112
5113 /* Optimizing character pairs, if their difference is power of 2. */
5114 if (is_powerof2(other_cases[1] ^ other_cases[0]))
5115 {
5116 if (charoffset == 0)
5117 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
5118 else
5119 {
5120 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
5121 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
5122 }
5123 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[1]);
5124 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5125 other_cases += 2;
5126 }
5127 else if (is_powerof2(other_cases[2] ^ other_cases[1]))
5128 {
5129 if (charoffset == 0)
5130 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[2] ^ other_cases[1]);
5131 else
5132 {
5133 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
5134 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
5135 }
5136 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[2]);
5137 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5138
5139 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(other_cases[0] - charoffset));
5140 OP_FLAGS(SLJIT_OR | ((other_cases[3] == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5141
5142 other_cases += 3;
5143 }
5144 else
5145 {
5146 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
5147 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5148 }
5149
5150 while (*other_cases != NOTACHAR)
5151 {
5152 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
5153 OP_FLAGS(SLJIT_OR | ((*other_cases == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5154 }
5155 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5156 break;
5157
5158 case PT_UCNC:
5159 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_DOLLAR_SIGN - charoffset));
5160 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5161 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_COMMERCIAL_AT - charoffset));
5162 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5163 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_GRAVE_ACCENT - charoffset));
5164 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5165
5166 SET_CHAR_OFFSET(0xa0);
5167 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(0xd7ff - charoffset));
5168 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
5169 SET_CHAR_OFFSET(0);
5170 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xe000 - 0);
5171 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_GREATER_EQUAL);
5172 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5173 break;
5174
5175 case PT_PXGRAPH:
5176 /* C and Z groups are the farthest two groups. */
5177 SET_TYPE_OFFSET(ucp_Ll);
5178 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
5179 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_GREATER);
5180
5181 jump = CMP(SLJIT_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
5182
5183 /* In case of ucp_Cf, we overwrite the result. */
5184 SET_CHAR_OFFSET(0x2066);
5185 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
5186 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
5187
5188 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
5189 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5190
5191 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x2066);
5192 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5193
5194 JUMPHERE(jump);
5195 jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
5196 break;
5197
5198 case PT_PXPRINT:
5199 /* C and Z groups are the farthest two groups. */
5200 SET_TYPE_OFFSET(ucp_Ll);
5201 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
5202 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_GREATER);
5203
5204 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Ll);
5205 OP_FLAGS(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_NOT_EQUAL);
5206
5207 jump = CMP(SLJIT_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
5208
5209 /* In case of ucp_Cf, we overwrite the result. */
5210 SET_CHAR_OFFSET(0x2066);
5211 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
5212 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
5213
5214 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
5215 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5216
5217 JUMPHERE(jump);
5218 jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
5219 break;
5220
5221 case PT_PXPUNCT:
5222 SET_TYPE_OFFSET(ucp_Sc);
5223 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Sc);
5224 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
5225
5226 SET_CHAR_OFFSET(0);
5227 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xff);
5228 OP_FLAGS(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
5229
5230 SET_TYPE_OFFSET(ucp_Pc);
5231 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ps - ucp_Pc);
5232 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
5233 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5234 break;
5235 }
5236 cc += 2;
5237 }
5238 #endif
5239
5240 if (jump != NULL)
5241 add_jump(compiler, compares > 0 ? list : backtracks, jump);
5242 }
5243
5244 if (found != NULL)
5245 set_jumps(found, LABEL());
5246 }
5247
5248 #undef SET_TYPE_OFFSET
5249 #undef SET_CHAR_OFFSET
5250
5251 #endif
5252
5253 static pcre_uchar *compile_char1_matchingpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **backtracks)
5254 {
5255 DEFINE_COMPILER;
5256 int length;
5257 unsigned int c, oc, bit;
5258 compare_context context;
5259 struct sljit_jump *jump[4];
5260 jump_list *end_list;
5261 #ifdef SUPPORT_UTF
5262 struct sljit_label *label;
5263 #ifdef SUPPORT_UCP
5264 pcre_uchar propdata[5];
5265 #endif
5266 #endif /* SUPPORT_UTF */
5267
5268 switch(type)
5269 {
5270 case OP_SOD:
5271 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5272 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
5273 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
5274 return cc;
5275
5276 case OP_SOM:
5277 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5278 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
5279 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
5280 return cc;
5281
5282 case OP_NOT_WORD_BOUNDARY:
5283 case OP_WORD_BOUNDARY:
5284 add_jump(compiler, &common->wordboundary, JUMP(SLJIT_FAST_CALL));
5285 add_jump(compiler, backtracks, JUMP(type == OP_NOT_WORD_BOUNDARY ? SLJIT_NOT_ZERO : SLJIT_ZERO));
5286 return cc;
5287
5288 case OP_NOT_DIGIT:
5289 case OP_DIGIT:
5290 /* Digits are usually 0-9, so it is worth to optimize them. */
5291 detect_partial_match(common, backtracks);
5292 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5293 if (common->utf && is_char7_bitset((const pcre_uint8*)common->ctypes - cbit_length + cbit_digit, FALSE))
5294 read_char7_type(common, type == OP_NOT_DIGIT);
5295 else
5296 #endif
5297 read_char8_type(common, type == OP_NOT_DIGIT);
5298 /* Flip the starting bit in the negative case. */
5299 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit);
5300 add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_ZERO : SLJIT_NOT_ZERO));
5301 return cc;
5302
5303 case OP_NOT_WHITESPACE:
5304 case OP_WHITESPACE:
5305 detect_partial_match(common, backtracks);
5306 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5307 if (common->utf && is_char7_bitset((const pcre_uint8*)common->ctypes - cbit_length + cbit_space, FALSE))
5308 read_char7_type(common, type == OP_NOT_WHITESPACE);
5309 else
5310 #endif
5311 read_char8_type(common, type == OP_NOT_WHITESPACE);
5312 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space);
5313 add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_ZERO : SLJIT_NOT_ZERO));
5314 return cc;
5315
5316 case OP_NOT_WORDCHAR:
5317 case OP_WORDCHAR:
5318 detect_partial_match(common, backtracks);
5319 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5320 if (common->utf && is_char7_bitset((const pcre_uint8*)common->ctypes - cbit_length + cbit_word, FALSE))
5321 read_char7_type(common, type == OP_NOT_WORDCHAR);
5322 else
5323 #endif
5324 read_char8_type(common, type == OP_NOT_WORDCHAR);
5325 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word);
5326 add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_ZERO : SLJIT_NOT_ZERO));
5327 return cc;
5328
5329 case OP_ANY:
5330 detect_partial_match(common, backtracks);
5331 read_char_range(common, common->nlmin, common->nlmax, TRUE);
5332 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
5333 {
5334 jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
5335 end_list = NULL;
5336 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
5337 add_jump(compiler, &end_list, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
5338 else
5339 check_str_end(common, &end_list);
5340
5341 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5342 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff));
5343 set_jumps(end_list, LABEL());
5344 JUMPHERE(jump[0]);
5345 }
5346 else
5347 check_newlinechar(common, common->nltype, backtracks, TRUE);
5348 return cc;
5349
5350 case OP_ALLANY:
5351 detect_partial_match(common, backtracks);
5352 #ifdef SUPPORT_UTF
5353 if (common->utf)
5354 {
5355 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5356 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5357 #if defined COMPILE_PCRE8 || defined COMPILE_PCRE16
5358 #if defined COMPILE_PCRE8
5359 jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
5360 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
5361 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5362 #elif defined COMPILE_PCRE16
5363 jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
5364 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
5365 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
5366 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5367 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
5368 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5369 #endif
5370 JUMPHERE(jump[0]);
5371 #endif /* COMPILE_PCRE[8|16] */
5372 return cc;
5373 }
5374 #endif
5375 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5376 return cc;
5377
5378 case OP_ANYBYTE:
5379 detect_partial_match(common, backtracks);
5380 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5381 return cc;
5382
5383 #ifdef SUPPORT_UTF
5384 #ifdef SUPPORT_UCP
5385 case OP_NOTPROP:
5386 case OP_PROP:
5387 propdata[0] = XCL_HASPROP;
5388 propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP;
5389 propdata[2] = cc[0];
5390 propdata[3] = cc[1];
5391 propdata[4] = XCL_END;
5392 compile_xclass_matchingpath(common, propdata, backtracks);
5393 return cc + 2;
5394 #endif
5395 #endif
5396
5397 case OP_ANYNL:
5398 detect_partial_match(common, backtracks);
5399 read_char_range(common, common->bsr_nlmin, common->bsr_nlmax, FALSE);
5400 jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
5401 /* We don't need to handle soft partial matching case. */
5402 end_list = NULL;
5403 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
5404 add_jump(compiler, &end_list, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
5405 else
5406 check_str_end(common, &end_list);
5407 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5408 jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
5409 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5410 jump[2] = JUMP(SLJIT_JUMP);
5411 JUMPHERE(jump[0]);
5412 check_newlinechar(common, common->bsr_nltype, backtracks, FALSE);
5413 set_jumps(end_list, LABEL());
5414 JUMPHERE(jump[1]);
5415 JUMPHERE(jump[2]);
5416 return cc;
5417
5418 case OP_NOT_HSPACE:
5419 case OP_HSPACE:
5420 detect_partial_match(common, backtracks);
5421 read_char_range(common, 0x9, 0x3000, type == OP_NOT_HSPACE);
5422 add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));
5423 add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO));
5424 return cc;
5425
5426 case OP_NOT_VSPACE:
5427 case OP_VSPACE:
5428 detect_partial_match(common, backtracks);
5429 read_char_range(common, 0xa, 0x2029, type == OP_NOT_VSPACE);
5430 add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));
5431 add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO));
5432 return cc;
5433
5434 #ifdef SUPPORT_UCP
5435 case OP_EXTUNI:
5436 detect_partial_match(common, backtracks);
5437 read_char(common);
5438 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
5439 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
5440 /* Optimize register allocation: use a real register. */
5441 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STACK_TOP, 0);
5442 OP1(SLJIT_MOV_UB, STACK_TOP, 0, SLJIT_MEM2(TMP1, TMP2), 3);
5443
5444 label = LABEL();
5445 jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5446 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
5447 read_char(common);
5448 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
5449 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
5450 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM2(TMP1, TMP2), 3);
5451
5452 OP2(SLJIT_SHL, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2);
5453 OP1(SLJIT_MOV_UI, TMP1, 0, SLJIT_MEM1(STACK_TOP), (sljit_sw)PRIV(ucp_gbtable));
5454 OP1(SLJIT_MOV, STACK_TOP, 0, TMP2, 0);
5455 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
5456 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
5457 JUMPTO(SLJIT_NOT_ZERO, label);
5458
5459 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
5460 JUMPHERE(jump[0]);
5461 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
5462
5463 if (common->mode == JIT_PARTIAL_HARD_COMPILE)
5464 {
5465 jump[0] = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
5466 /* Since we successfully read a char above, partial matching must occure. */
5467 check_partial(common, TRUE);
5468 JUMPHERE(jump[0]);
5469 }
5470 return cc;
5471 #endif
5472
5473 case OP_EODN:
5474 /* Requires rather complex checks. */
5475 jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5476 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
5477 {
5478 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
5479 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5480 if (common->mode == JIT_COMPILE)
5481 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0));
5482 else
5483 {
5484 jump[1] = CMP(SLJIT_EQUAL, TMP2, 0, STR_END, 0);
5485 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
5486 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS);
5487 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
5488 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_NOT_EQUAL);
5489 add_jump(compiler, backtracks, JUMP(SLJIT_NOT_EQUAL));
5490 check_partial(common, TRUE);
5491 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5492 JUMPHERE(jump[1]);
5493 }
5494 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
5495 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
5496 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
5497 }
5498 else if (common->nltype == NLTYPE_FIXED)
5499 {
5500 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5501 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5502 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0));
5503 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
5504 }
5505 else
5506 {
5507 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5508 jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
5509 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
5510 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
5511 jump[2] = JUMP(SLJIT_GREATER);
5512 add_jump(compiler, backtracks, JUMP(SLJIT_LESS));
5513 /* Equal. */
5514 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
5515 jump[3] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
5516 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5517
5518 JUMPHERE(jump[1]);
5519 if (common->nltype == NLTYPE_ANYCRLF)
5520 {
5521 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5522 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, STR_END, 0));
5523 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
5524 }
5525 else
5526 {
5527 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, STR_PTR, 0);
5528 read_char_range(common, common->nlmin, common->nlmax, TRUE);
5529 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
5530 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
5531 add_jump(compiler, backtracks, JUMP(SLJIT_ZERO));
5532 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
5533 }
5534 JUMPHERE(jump[2]);
5535 JUMPHERE(jump[3]);
5536 }
5537 JUMPHERE(jump[0]);
5538 check_partial(common, FALSE);
5539 return cc;
5540
5541 case OP_EOD:
5542 add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
5543 check_partial(common, FALSE);
5544 return cc;
5545
5546 case OP_CIRC:
5547 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
5548 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
5549 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0));
5550 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
5551 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5552 return cc;
5553
5554 case OP_CIRCM:
5555 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
5556 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
5557 jump[1] = CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0);
5558 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
5559 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5560 jump[0] = JUMP(SLJIT_JUMP);
5561 JUMPHERE(jump[1]);
5562
5563 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
5564 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
5565 {
5566 OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
5567 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, TMP1, 0));
5568 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
5569 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
5570 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
5571 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
5572 }
5573 else
5574 {
5575 skip_char_back(common);
5576 read_char_range(common, common->nlmin, common->nlmax, TRUE);
5577 check_newlinechar(common, common->nltype, backtracks, FALSE);
5578 }
5579 JUMPHERE(jump[0]);
5580 return cc;
5581
5582 case OP_DOLL:
5583 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
5584 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
5585 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5586
5587 if (!common->endonly)
5588 compile_char1_matchingpath(common, OP_EODN, cc, backtracks);
5589 else
5590 {
5591 add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
5592 check_partial(common, FALSE);
5593 }
5594 return cc;
5595
5596 case OP_DOLLM:
5597 jump[1] = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
5598 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
5599 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
5600 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5601 check_partial(common, FALSE);
5602 jump[0] = JUMP(SLJIT_JUMP);
5603 JUMPHERE(jump[1]);
5604
5605 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
5606 {
5607 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
5608 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5609 if (common->mode == JIT_COMPILE)
5610 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, TMP2, 0, STR_END, 0));
5611 else
5612 {
5613 jump[1] = CMP(SLJIT_LESS_EQUAL, TMP2, 0, STR_END, 0);
5614 /* STR_PTR = STR_END - IN_UCHARS(1) */
5615 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
5616 check_partial(common, TRUE);
5617 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5618 JUMPHERE(jump[1]);
5619 }
5620
5621 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
5622 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
5623 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
5624 }
5625 else
5626 {
5627 peek_char(common, common->nlmax);
5628 check_newlinechar(common, common->nltype, backtracks, FALSE);
5629 }
5630 JUMPHERE(jump[0]);
5631 return cc;
5632
5633 case OP_CHAR:
5634 case OP_CHARI:
5635 length = 1;
5636 #ifdef SUPPORT_UTF
5637 if (common->utf && HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc);
5638 #endif
5639 if (common->mode == JIT_COMPILE && (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0))
5640 {
5641 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
5642 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
5643
5644 context.length = IN_UCHARS(length);
5645 context.sourcereg = -1;
5646 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
5647 context.ucharptr = 0;
5648 #endif
5649 return byte_sequence_compare(common, type == OP_CHARI, cc, &context, backtracks);
5650 }
5651
5652 detect_partial_match(common, backtracks);
5653 #ifdef SUPPORT_UTF
5654 if (common->utf)
5655 {
5656 GETCHAR(c, cc);
5657 }
5658 else
5659 #endif
5660 c = *cc;
5661
5662 if (type == OP_CHAR || !char_has_othercase(common, cc))
5663 {
5664 read_char_range(common, c, c, FALSE);
5665 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c));
5666 return cc + length;
5667 }
5668 oc = char_othercase(common, c);
5669 read_char_range(common, c < oc ? c : oc, c > oc ? c : oc, FALSE);
5670 bit = c ^ oc;
5671 if (is_powerof2(bit))
5672 {
5673 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
5674 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
5675 return cc + length;
5676 }
5677 jump[0] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c);
5678 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, oc));
5679 JUMPHERE(jump[0]);
5680 return cc + length;
5681
5682 case OP_NOT:
5683 case OP_NOTI:
5684 detect_partial_match(common, backtracks);
5685 length = 1;
5686 #ifdef SUPPORT_UTF
5687 if (common->utf)
5688 {
5689 #ifdef COMPILE_PCRE8
5690 c = *cc;
5691 if (c < 128)
5692 {
5693 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5694 if (type == OP_NOT || !char_has_othercase(common, cc))
5695 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
5696 else
5697 {
5698 /* Since UTF8 code page is fixed, we know that c is in [a-z] or [A-Z] range. */
5699 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x20);
5700 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20));
5701 }
5702 /* Skip the variable-length character. */
5703 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5704 jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
5705 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
5706 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5707 JUMPHERE(jump[0]);
5708 return cc + 1;
5709 }
5710 else
5711 #endif /* COMPILE_PCRE8 */
5712 {
5713 GETCHARLEN(c, cc, length);
5714 }
5715 }
5716 else
5717 #endif /* SUPPORT_UTF */
5718 c = *cc;
5719
5720 if (type == OP_NOT || !char_has_othercase(common, cc))
5721 {
5722 read_char_range(common, c, c, TRUE);
5723 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
5724 }
5725 else
5726 {
5727 oc = char_othercase(common, c);
5728 read_char_range(common, c < oc ? c : oc, c > oc ? c : oc, TRUE);
5729 bit = c ^ oc;
5730 if (is_powerof2(bit))
5731 {
5732 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
5733 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
5734 }
5735 else
5736 {
5737 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
5738 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, oc));
5739 }
5740 }
5741 return cc + length;
5742
5743 case OP_CLASS:
5744 case OP_NCLASS:
5745 detect_partial_match(common, backtracks);
5746
5747 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5748 bit = (common->utf && is_char7_bitset((const pcre_uint8 *)cc, type == OP_NCLASS)) ? 127 : 255;
5749 read_char_range(common, 0, bit, type == OP_NCLASS);
5750 #else
5751 read_char_range(common, 0, 255, type == OP_NCLASS);
5752 #endif
5753
5754 if (check_class_ranges(common, (const pcre_uint8 *)cc, type == OP_NCLASS, FALSE, backtracks))
5755 return cc + 32 / sizeof(pcre_uchar);
5756
5757 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5758 jump[0] = NULL;
5759 if (common->utf)
5760 {
5761 jump[0] = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, bit);
5762 if (type == OP_CLASS)
5763 {
5764 add_jump(compiler, backtracks, jump[0]);
5765 jump[0] = NULL;
5766 }
5767 }
5768 #elif !defined COMPILE_PCRE8
5769 jump[0] = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
5770 if (type == OP_CLASS)
5771 {
5772 add_jump(compiler, backtracks, jump[0]);
5773 jump[0] = NULL;
5774 }
5775 #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
5776
5777 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
5778 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
5779 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
5780 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
5781 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
5782 add_jump(compiler, backtracks, JUMP(SLJIT_ZERO));
5783
5784 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
5785 if (jump[0] != NULL)
5786 JUMPHERE(jump[0]);
5787 #endif
5788
5789 return cc + 32 / sizeof(pcre_uchar);
5790
5791 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5792 case OP_XCLASS:
5793 compile_xclass_matchingpath(common, cc + LINK_SIZE, backtracks);
5794 return cc + GET(cc, 0) - 1;
5795 #endif
5796
5797 case OP_REVERSE:
5798 length = GET(cc, 0);
5799 if (length == 0)
5800 return cc + LINK_SIZE;
5801 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5802 #ifdef SUPPORT_UTF
5803 if (common->utf)
5804 {
5805 OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
5806 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, length);
5807 label = LABEL();
5808 add_jump(compiler, backtracks, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP3, 0));
5809 skip_char_back(common);
5810 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
5811 JUMPTO(SLJIT_NOT_ZERO, label);
5812 }
5813 else
5814 #endif
5815 {
5816 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
5817 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
5818 add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, TMP1, 0));
5819 }
5820 check_start_used_ptr(common);
5821 return cc + LINK_SIZE;
5822 }
5823 SLJIT_ASSERT_STOP();
5824 return cc;
5825 }
5826
5827 static SLJIT_INLINE pcre_uchar *compile_charn_matchingpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, jump_list **backtracks)
5828 {
5829 /* This function consumes at least one input character. */
5830 /* To decrease the number of length checks, we try to concatenate the fixed length character sequences. */
5831 DEFINE_COMPILER;
5832 pcre_uchar *ccbegin = cc;
5833 compare_context context;
5834 int size;
5835
5836 context.length = 0;
5837 do
5838 {
5839 if (cc >= ccend)
5840 break;
5841
5842 if (*cc == OP_CHAR)
5843 {
5844 size = 1;
5845 #ifdef SUPPORT_UTF
5846 if (common->utf && HAS_EXTRALEN(cc[1]))
5847 size += GET_EXTRALEN(cc[1]);
5848 #endif
5849 }
5850 else if (*cc == OP_CHARI)
5851 {
5852 size = 1;
5853 #ifdef SUPPORT_UTF
5854 if (common->utf)
5855 {
5856 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
5857 size = 0;
5858 else if (HAS_EXTRALEN(cc[1]))
5859 size += GET_EXTRALEN(cc[1]);
5860 }
5861 else
5862 #endif
5863 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
5864 size = 0;
5865 }
5866 else
5867 size = 0;
5868
5869 cc += 1 + size;
5870 context.length += IN_UCHARS(size);
5871 }
5872 while (size > 0 && context.length <= 128);
5873
5874 cc = ccbegin;
5875 if (context.length > 0)
5876 {
5877 /* We have a fixed-length byte sequence. */
5878 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, context.length);
5879 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
5880
5881 context.sourcereg = -1;
5882 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
5883 context.ucharptr = 0;
5884 #endif
5885 do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, backtracks); while (context.length > 0);
5886 return cc;
5887 }
5888
5889 /* A non-fixed length character will be checked if length == 0. */
5890 return compile_char1_matchingpath(common, *cc, cc + 1, backtracks);
5891 }
5892
5893 /* Forward definitions. */
5894 static void compile_matchingpath(compiler_common *, pcre_uchar *, pcre_uchar *, backtrack_common *);
5895 static void compile_backtrackingpath(compiler_common *, struct backtrack_common *);
5896
5897 #define PUSH_BACKTRACK(size, ccstart, error) \
5898 do \
5899 { \
5900 backtrack = sljit_alloc_memory(compiler, (size)); \
5901 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
5902 return error; \
5903 memset(backtrack, 0, size); \
5904 backtrack->prev = parent->top; \
5905 backtrack->cc = (ccstart); \
5906 parent->top = backtrack; \
5907 } \
5908 while (0)
5909
5910 #define PUSH_BACKTRACK_NOVALUE(size, ccstart) \
5911 do \
5912 { \
5913 backtrack = sljit_alloc_memory(compiler, (size)); \
5914 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
5915 return; \
5916 memset(backtrack, 0, size); \
5917 backtrack->prev = parent->top; \
5918 backtrack->cc = (ccstart); \
5919 parent->top = backtrack; \
5920 } \
5921 while (0)
5922
5923 #define BACKTRACK_AS(type) ((type *)backtrack)
5924
5925 static void compile_dnref_search(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
5926 {
5927 /* The OVECTOR offset goes to TMP2. */
5928 DEFINE_COMPILER;
5929 int count = GET2(cc, 1 + IMM2_SIZE);
5930 pcre_uchar *slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
5931 unsigned int offset;
5932 jump_list *found = NULL;
5933
5934 SLJIT_ASSERT(*cc == OP_DNREF || *cc == OP_DNREFI);
5935
5936 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
5937
5938 count--;
5939 while (count-- > 0)
5940 {
5941 offset = GET2(slot, 0) << 1;
5942 GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
5943 add_jump(compiler, &found, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0));
5944 slot += common->name_entry_size;
5945 }
5946
5947 offset = GET2(slot, 0) << 1;
5948 GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
5949 if (backtracks != NULL && !common->jscript_compat)
5950 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0));
5951
5952 set_jumps(found, LABEL());
5953 }
5954
5955 static void compile_ref_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks, BOOL withchecks, BOOL emptyfail)
5956 {
5957 DEFINE_COMPILER;
5958 BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
5959 int offset = 0;
5960 struct sljit_jump *jump = NULL;
5961 struct sljit_jump *partial;
5962 struct sljit_jump *nopartial;
5963
5964 if (ref)
5965 {
5966 offset = GET2(cc, 1) << 1;
5967 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
5968 /* OVECTOR(1) contains the "string begin - 1" constant. */
5969 if (withchecks && !common->jscript_compat)
5970 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
5971 }
5972 else
5973 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
5974
5975 #if defined SUPPORT_UTF && defined SUPPORT_UCP
5976 if (common->utf && *cc == OP_REFI)
5977 {
5978 SLJIT_ASSERT(TMP1 == SLJIT_R0 && STACK_TOP == SLJIT_R1 && TMP2 == SLJIT_R2);
5979 if (ref)
5980 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
5981 else
5982 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
5983
5984 if (withchecks)
5985 jump = CMP(SLJIT_EQUAL, TMP1, 0, TMP2, 0);
5986
5987 /* Needed to save important temporary registers. */
5988 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STACK_TOP, 0);
5989 OP1(SLJIT_MOV, SLJIT_R1, 0, ARGUMENTS, 0);
5990 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, uchar_ptr), STR_PTR, 0);
5991 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_utf_caselesscmp));
5992 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
5993 if (common->mode == JIT_COMPILE)
5994 add_jump(compiler, backtracks, CMP(SLJIT_LESS_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1));
5995 else
5996 {
5997 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
5998 nopartial = CMP(SLJIT_NOT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
5999 check_partial(common, FALSE);
6000 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
6001 JUMPHERE(nopartial);
6002 }
6003 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
6004 }
6005 else
6006 #endif /* SUPPORT_UTF && SUPPORT_UCP */
6007 {
6008 if (ref)
6009 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP1, 0);
6010 else
6011 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
6012
6013 if (withchecks)
6014 jump = JUMP(SLJIT_ZERO);
6015
6016 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
6017 partial = CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0);
6018 if (common->mode == JIT_COMPILE)
6019 add_jump(compiler, backtracks, partial);
6020
6021 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
6022 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
6023
6024 if (common->mode != JIT_COMPILE)
6025 {
6026 nopartial = JUMP(SLJIT_JUMP);
6027 JUMPHERE(partial);
6028 /* TMP2 -= STR_END - STR_PTR */
6029 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, STR_PTR, 0);
6030 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, STR_END, 0);
6031 partial = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0);
6032 OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
6033 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
6034 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
6035 JUMPHERE(partial);
6036 check_partial(common, FALSE);
6037 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
6038 JUMPHERE(nopartial);
6039 }
6040 }
6041
6042 if (jump != NULL)
6043 {
6044 if (emptyfail)
6045 add_jump(compiler, backtracks, jump);
6046 else
6047 JUMPHERE(jump);
6048 }
6049 }
6050
6051 static SLJIT_INLINE pcre_uchar *compile_ref_iterator_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6052 {
6053 DEFINE_COMPILER;
6054 BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
6055 backtrack_common *backtrack;
6056 pcre_uchar type;
6057 int offset = 0;
6058 struct sljit_label *label;
6059 struct sljit_jump *zerolength;
6060 struct sljit_jump *jump = NULL;
6061 pcre_uchar *ccbegin = cc;
6062 int min = 0, max = 0;
6063 BOOL minimize;
6064
6065 PUSH_BACKTRACK(sizeof(iterator_backtrack), cc, NULL);
6066
6067 if (ref)
6068 offset = GET2(cc, 1) << 1;
6069 else
6070 cc += IMM2_SIZE;
6071 type = cc[1 + IMM2_SIZE];
6072
6073 SLJIT_COMPILE_ASSERT((OP_CRSTAR & 0x1) == 0, crstar_opcode_must_be_even);
6074 minimize = (type & 0x1) != 0;
6075 switch(type)
6076 {
6077 case OP_CRSTAR:
6078 case OP_CRMINSTAR:
6079 min = 0;
6080 max = 0;
6081 cc += 1 + IMM2_SIZE + 1;
6082 break;
6083 case OP_CRPLUS:
6084 case OP_CRMINPLUS:
6085 min = 1;
6086 max = 0;
6087 cc += 1 + IMM2_SIZE + 1;
6088 break;
6089 case OP_CRQUERY:
6090 case OP_CRMINQUERY:
6091 min = 0;
6092 max = 1;
6093 cc += 1 + IMM2_SIZE + 1;
6094 break;
6095 case OP_CRRANGE:
6096 case OP_CRMINRANGE:
6097 min = GET2(cc, 1 + IMM2_SIZE + 1);
6098 max = GET2(cc, 1 + IMM2_SIZE + 1 + IMM2_SIZE);
6099 cc += 1 + IMM2_SIZE + 1 + 2 * IMM2_SIZE;
6100 break;
6101 default:
6102 SLJIT_ASSERT_STOP();
6103 break;
6104 }
6105
6106 if (!minimize)
6107 {
6108 if (min == 0)
6109 {
6110 allocate_stack(common, 2);
6111 if (ref)
6112 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
6113 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6114 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
6115 /* Temporary release of STR_PTR. */
6116 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
6117 /* Handles both invalid and empty cases. Since the minimum repeat,
6118 is zero the invalid case is basically the same as an empty case. */
6119 if (ref)
6120 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
6121 else
6122 {
6123 compile_dnref_search(common, ccbegin, NULL);
6124 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
6125 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, TMP2, 0);
6126 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
6127 }
6128 /* Restore if not zero length. */
6129 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
6130 }
6131 else
6132 {
6133 allocate_stack(common, 1);
6134 if (ref)
6135 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
6136 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6137 if (ref)
6138 {
6139 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
6140 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
6141 }
6142 else
6143 {
6144 compile_dnref_search(common, ccbegin, &backtrack->topbacktracks);
6145 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
6146 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, TMP2, 0);
6147 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
6148 }
6149 }
6150
6151 if (min > 1 || max > 1)
6152 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, SLJIT_IMM, 0);
6153
6154 label = LABEL();
6155 if (!ref)
6156 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1);
6157 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, FALSE, FALSE);
6158
6159 if (min > 1 || max > 1)
6160 {
6161 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0);
6162 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
6163 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, TMP1, 0);
6164 if (min > 1)
6165 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min, label);
6166 if (max > 1)
6167 {
6168 jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max);
6169 allocate_stack(common, 1);
6170 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6171 JUMPTO(SLJIT_JUMP, label);
6172 JUMPHERE(jump);
6173 }
6174 }
6175
6176 if (max == 0)
6177 {
6178 /* Includes min > 1 case as well. */
6179 allocate_stack(common, 1);
6180 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6181 JUMPTO(SLJIT_JUMP, label);
6182 }
6183
6184 JUMPHERE(zerolength);
6185 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
6186
6187 count_match(common);
6188 return cc;
6189 }
6190
6191 allocate_stack(common, ref ? 2 : 3);
6192 if (ref)
6193 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
6194 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6195 if (type != OP_CRMINSTAR)
6196 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
6197
6198 if (min == 0)
6199 {
6200 /* Handles both invalid and empty cases. Since the minimum repeat,
6201 is zero the invalid case is basically the same as an empty case. */
6202 if (ref)
6203 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
6204 else
6205 {
6206 compile_dnref_search(common, ccbegin, NULL);
6207 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
6208 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
6209 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
6210 }
6211 /* Length is non-zero, we can match real repeats. */
6212 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6213 jump = JUMP(SLJIT_JUMP);
6214 }
6215 else
6216 {
6217 if (ref)
6218 {
6219 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
6220 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
6221 }
6222 else
6223 {
6224 compile_dnref_search(common, ccbegin, &backtrack->topbacktracks);
6225 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
6226 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
6227 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
6228 }
6229 }
6230
6231 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
6232 if (max > 0)
6233 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max));
6234
6235 if (!ref)
6236 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
6237 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, TRUE, TRUE);
6238 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6239
6240 if (min > 1)
6241 {
6242 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
6243 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
6244 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
6245 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min, BACKTRACK_AS(iterator_backtrack)->matchingpath);
6246 }
6247 else if (max > 0)
6248 OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
6249
6250 if (jump != NULL)
6251 JUMPHERE(jump);
6252 JUMPHERE(zerolength);
6253
6254 count_match(common);
6255 return cc;
6256 }
6257
6258 static SLJIT_INLINE pcre_uchar *compile_recurse_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6259 {
6260 DEFINE_COMPILER;
6261 backtrack_common *backtrack;
6262 recurse_entry *entry = common->entries;
6263 recurse_entry *prev = NULL;
6264 sljit_sw start = GET(cc, 1);
6265 pcre_uchar *start_cc;
6266 BOOL needs_control_head;
6267
6268 PUSH_BACKTRACK(sizeof(recurse_backtrack), cc, NULL);
6269
6270 /* Inlining simple patterns. */
6271 if (get_framesize(common, common->start + start, NULL, TRUE, &needs_control_head) == no_stack)
6272 {
6273 start_cc = common->start + start;
6274 compile_matchingpath(common, next_opcode(common, start_cc), bracketend(start_cc) - (1 + LINK_SIZE), backtrack);
6275 BACKTRACK_AS(recurse_backtrack)->inlined_pattern = TRUE;
6276 return cc + 1 + LINK_SIZE;
6277 }
6278
6279 while (entry != NULL)
6280 {
6281 if (entry->start == start)
6282 break;
6283 prev = entry;
6284 entry = entry->next;
6285 }
6286
6287 if (entry == NULL)
6288 {
6289 entry = sljit_alloc_memory(compiler, sizeof(recurse_entry));
6290 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6291 return NULL;
6292 entry->next = NULL;
6293 entry->entry = NULL;
6294 entry->calls = NULL;
6295 entry->start = start;
6296
6297 if (prev != NULL)
6298 prev->next = entry;
6299 else
6300 common->entries = entry;
6301 }
6302
6303 if (common->has_set_som && common->mark_ptr != 0)
6304 {
6305 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
6306 allocate_stack(common, 2);
6307 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
6308 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
6309 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
6310 }
6311 else if (common->has_set_som || common->mark_ptr != 0)
6312 {
6313 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->has_set_som ? (int)(OVECTOR(0)) : common->mark_ptr);
6314 allocate_stack(common, 1);
6315 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
6316 }
6317
6318 if (entry->entry == NULL)
6319 add_jump(compiler, &entry->calls, JUMP(SLJIT_FAST_CALL));
6320 else
6321 JUMPTO(SLJIT_FAST_CALL, entry->entry);
6322 /* Leave if the match is failed. */
6323 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0));
6324 return cc + 1 + LINK_SIZE;
6325 }
6326
6327 static int SLJIT_CALL do_callout(struct jit_arguments *arguments, PUBL(callout_block) *callout_block, pcre_uchar **jit_ovector)
6328 {
6329 const pcre_uchar *begin = arguments->begin;
6330 int *offset_vector = arguments->offsets;
6331 int offset_count = arguments->offset_count;
6332 int i;
6333
6334 if (PUBL(callout) == NULL)
6335 return 0;
6336
6337 callout_block->version = 2;
6338 callout_block->callout_data = arguments->callout_data;
6339
6340 /* Offsets in subject. */
6341 callout_block->subject_length = arguments->end - arguments->begin;
6342 callout_block->start_match = (pcre_uchar*)callout_block->subject - arguments->begin;
6343 callout_block->current_position = (pcre_uchar*)callout_block->offset_vector - arguments->begin;
6344 #if defined COMPILE_PCRE8
6345 callout_block->subject = (PCRE_SPTR)begin;
6346 #elif defined COMPILE_PCRE16
6347 callout_block->subject = (PCRE_SPTR16)begin;
6348 #elif defined COMPILE_PCRE32
6349 callout_block->subject = (PCRE_SPTR32)begin;
6350 #endif
6351
6352 /* Convert and copy the JIT offset vector to the offset_vector array. */
6353 callout_block->capture_top = 0;
6354 callout_block->offset_vector = offset_vector;
6355 for (i = 2; i < offset_count; i += 2)
6356 {
6357 offset_vector[i] = jit_ovector[i] - begin;
6358 offset_vector[i + 1] = jit_ovector[i + 1] - begin;
6359 if (jit_ovector[i] >= begin)
6360 callout_block->capture_top = i;
6361 }
6362
6363 callout_block->capture_top = (callout_block->capture_top >> 1) + 1;
6364 if (offset_count > 0)
6365 offset_vector[0] = -1;
6366 if (offset_count > 1)
6367 offset_vector[1] = -1;
6368 return (*PUBL(callout))(callout_block);
6369 }
6370
6371 /* Aligning to 8 byte. */
6372 #define CALLOUT_ARG_SIZE \
6373 (((int)sizeof(PUBL(callout_block)) + 7) & ~7)
6374
6375 #define CALLOUT_ARG_OFFSET(arg) \
6376 (-CALLOUT_ARG_SIZE + SLJIT_OFFSETOF(PUBL(callout_block), arg))
6377
6378 static SLJIT_INLINE pcre_uchar *compile_callout_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6379 {
6380 DEFINE_COMPILER;
6381 backtrack_common *backtrack;
6382
6383 PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
6384
6385 allocate_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw));
6386
6387 SLJIT_ASSERT(common->capture_last_ptr != 0);
6388 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
6389 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6390 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_number), SLJIT_IMM, cc[1]);
6391 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_last), TMP2, 0);
6392
6393 /* These pointer sized fields temporarly stores internal variables. */
6394 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
6395 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(offset_vector), STR_PTR, 0);
6396 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(subject), TMP2, 0);
6397
6398 if (common->mark_ptr != 0)
6399 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr));
6400 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(pattern_position), SLJIT_IMM, GET(cc, 2));
6401 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(next_item_length), SLJIT_IMM, GET(cc, 2 + LINK_SIZE));
6402 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(mark), (common->mark_ptr != 0) ? TMP2 : SLJIT_IMM, 0);
6403
6404 /* Needed to save important temporary registers. */
6405 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STACK_TOP, 0);
6406 OP2(SLJIT_SUB, SLJIT_R1, 0, STACK_TOP, 0, SLJIT_IMM, CALLOUT_ARG_SIZE);
6407 GET_LOCAL_BASE(SLJIT_R2, 0, OVECTOR_START);
6408 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_callout));
6409 OP1(SLJIT_MOV_SI, SLJIT_RETURN_REG, 0, SLJIT_RETURN_REG, 0);
6410 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6411 free_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw));
6412
6413 /* Check return value. */
6414 OP2(SLJIT_SUB | SLJIT_SET_S, SLJIT_UNUSED, 0, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
6415 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_SIG_GREATER));
6416 if (common->forced_quit_label == NULL)
6417 add_jump(compiler, &common->forced_quit, JUMP(SLJIT_SIG_LESS));
6418 else
6419 JUMPTO(SLJIT_SIG_LESS, common->forced_quit_label);
6420 return cc + 2 + 2 * LINK_SIZE;
6421 }
6422
6423 #undef CALLOUT_ARG_SIZE
6424 #undef CALLOUT_ARG_OFFSET
6425
6426 static pcre_uchar *compile_assert_matchingpath(compiler_common *common, pcre_uchar *cc, assert_backtrack *backtrack, BOOL conditional)
6427 {
6428 DEFINE_COMPILER;
6429 int framesize;
6430 int extrasize;
6431 BOOL needs_control_head;
6432 int private_data_ptr;
6433 backtrack_common altbacktrack;
6434 pcre_uchar *ccbegin;
6435 pcre_uchar opcode;
6436 pcre_uchar bra = OP_BRA;
6437 jump_list *tmp = NULL;
6438 jump_list **target = (conditional) ? &backtrack->condfailed : &backtrack->common.topbacktracks;
6439 jump_list **found;
6440 /* Saving previous accept variables. */
6441 BOOL save_local_exit = common->local_exit;
6442 BOOL save_positive_assert = common->positive_assert;
6443 then_trap_backtrack *save_then_trap = common->then_trap;
6444 struct sljit_label *save_quit_label = common->quit_label;
6445 struct sljit_label *save_accept_label = common->accept_label;
6446 jump_list *save_quit = common->quit;
6447 jump_list *save_positive_assert_quit = common->positive_assert_quit;
6448 jump_list *save_accept = common->accept;
6449 struct sljit_jump *jump;
6450 struct sljit_jump *brajump = NULL;
6451
6452 /* Assert captures then. */
6453 common->then_trap = NULL;
6454
6455 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
6456 {
6457 SLJIT_ASSERT(!conditional);
6458 bra = *cc;
6459 cc++;
6460 }
6461 private_data_ptr = PRIVATE_DATA(cc);
6462 SLJIT_ASSERT(private_data_ptr != 0);
6463 framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
6464 backtrack->framesize = framesize;
6465 backtrack->private_data_ptr = private_data_ptr;
6466 opcode = *cc;
6467 SLJIT_ASSERT(opcode >= OP_ASSERT && opcode <= OP_ASSERTBACK_NOT);
6468 found = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) ? &tmp : target;
6469 ccbegin = cc;
6470 cc += GET(cc, 1);
6471
6472 if (bra == OP_BRAMINZERO)
6473 {
6474 /* This is a braminzero backtrack path. */
6475 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6476 free_stack(common, 1);
6477 brajump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
6478 }
6479
6480 if (framesize < 0)
6481 {
6482 extrasize = needs_control_head ? 2 : 1;
6483 if (framesize == no_frame)
6484 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
6485 allocate_stack(common, extrasize);
6486 if (needs_control_head)
6487 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
6488 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6489 if (needs_control_head)
6490 {
6491 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
6492 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
6493 }
6494 }
6495 else
6496 {
6497 extrasize = needs_control_head ? 3 : 2;
6498 allocate_stack(common, framesize + extrasize);
6499 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
6500 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw));
6501 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
6502 if (needs_control_head)
6503 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
6504 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6505 if (needs_control_head)
6506 {
6507 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
6508 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
6509 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
6510 }
6511 else
6512 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
6513 init_frame(common, ccbegin, NULL, framesize + extrasize - 1, extrasize, FALSE);
6514 }
6515
6516 memset(&altbacktrack, 0, sizeof(backtrack_common));
6517 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6518 {
6519 /* Negative assert is stronger than positive assert. */
6520 common->local_exit = TRUE;
6521 common->quit_label = NULL;
6522 common->quit = NULL;
6523 common->positive_assert = FALSE;
6524 }
6525 else
6526 common->positive_assert = TRUE;
6527 common->positive_assert_quit = NULL;
6528
6529 while (1)
6530 {
6531 common->accept_label = NULL;
6532 common->accept = NULL;
6533 altbacktrack.top = NULL;
6534 altbacktrack.topbacktracks = NULL;
6535
6536 if (*ccbegin == OP_ALT)
6537 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6538
6539 altbacktrack.cc = ccbegin;
6540 compile_matchingpath(common, ccbegin + 1 + LINK_SIZE, cc, &altbacktrack);
6541 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6542 {
6543 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6544 {
6545 common->local_exit = save_local_exit;
6546 common->quit_label = save_quit_label;
6547 common->quit = save_quit;
6548 }
6549 common->positive_assert = save_positive_assert;
6550 common->then_trap = save_then_trap;
6551 common->accept_label = save_accept_label;
6552 common->positive_assert_quit = save_positive_assert_quit;
6553 common->accept = save_accept;
6554 return NULL;
6555 }
6556 common->accept_label = LABEL();
6557 if (common->accept != NULL)
6558 set_jumps(common->accept, common->accept_label);
6559
6560 /* Reset stack. */
6561 if (framesize < 0)
6562 {
6563 if (framesize == no_frame)
6564 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
6565 else
6566 free_stack(common, extrasize);
6567 if (needs_control_head)
6568 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), 0);
6569 }
6570 else
6571 {
6572 if ((opcode != OP_ASSERT_NOT && opcode != OP_ASSERTBACK_NOT) || conditional)
6573 {
6574 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
6575 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
6576 if (needs_control_head)
6577 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), 0);
6578 }
6579 else
6580 {
6581 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
6582 if (needs_control_head)
6583 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), (framesize + 1) * sizeof(sljit_sw));
6584 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6585 }
6586 }
6587
6588 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6589 {
6590 /* We know that STR_PTR was stored on the top of the stack. */
6591 if (conditional)
6592 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), needs_control_head ? sizeof(sljit_sw) : 0);
6593 else if (bra == OP_BRAZERO)
6594 {
6595 if (framesize < 0)
6596 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 1) * sizeof(sljit_sw));
6597 else
6598 {
6599 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
6600 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (framesize + extrasize - 1) * sizeof(sljit_sw));
6601 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
6602 }
6603 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
6604 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6605 }
6606 else if (framesize >= 0)
6607 {
6608 /* For OP_BRA and OP_BRAMINZERO. */
6609 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
6610 }
6611 }
6612 add_jump(compiler, found, JUMP(SLJIT_JUMP));
6613
6614 compile_backtrackingpath(common, altbacktrack.top);
6615 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6616 {
6617 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6618 {
6619 common->local_exit = save_local_exit;
6620 common->quit_label = save_quit_label;
6621 common->quit = save_quit;
6622 }
6623 common->positive_assert = save_positive_assert;
6624 common->then_trap = save_then_trap;
6625 common->accept_label = save_accept_label;
6626 common->positive_assert_quit = save_positive_assert_quit;
6627 common->accept = save_accept;
6628 return NULL;
6629 }
6630 set_jumps(altbacktrack.topbacktracks, LABEL());
6631
6632 if (*cc != OP_ALT)
6633 break;
6634
6635 ccbegin = cc;
6636 cc += GET(cc, 1);
6637 }
6638
6639 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6640 {
6641 SLJIT_ASSERT(common->positive_assert_quit == NULL);
6642 /* Makes the check less complicated below. */
6643 common->positive_assert_quit = common->quit;
6644 }
6645
6646 /* None of them matched. */
6647 if (common->positive_assert_quit != NULL)
6648 {
6649 jump = JUMP(SLJIT_JUMP);
6650 set_jumps(common->positive_assert_quit, LABEL());
6651 SLJIT_ASSERT(framesize != no_stack);
6652 if (framesize < 0)
6653 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, extrasize * sizeof(sljit_sw));
6654 else
6655 {
6656 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
6657 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6658 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw));
6659 }
6660 JUMPHERE(jump);
6661 }
6662
6663 if (needs_control_head)
6664 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(1));
6665
6666 if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK)
6667 {
6668 /* Assert is failed. */
6669 if (conditional || bra == OP_BRAZERO)
6670 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6671
6672 if (framesize < 0)
6673 {
6674 /* The topmost item should be 0. */
6675 if (bra == OP_BRAZERO)
6676 {
6677 if (extrasize == 2)
6678 free_stack(common, 1);
6679 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6680 }
6681 else
6682 free_stack(common, extrasize);
6683 }
6684 else
6685 {
6686 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
6687 /* The topmost item should be 0. */
6688 if (bra == OP_BRAZERO)
6689 {
6690 free_stack(common, framesize + extrasize - 1);
6691 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6692 }
6693 else
6694 free_stack(common, framesize + extrasize);
6695 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
6696 }
6697 jump = JUMP(SLJIT_JUMP);
6698 if (bra != OP_BRAZERO)
6699 add_jump(compiler, target, jump);
6700
6701 /* Assert is successful. */
6702 set_jumps(tmp, LABEL());
6703 if (framesize < 0)
6704 {
6705 /* We know that STR_PTR was stored on the top of the stack. */
6706 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 1) * sizeof(sljit_sw));
6707 /* Keep the STR_PTR on the top of the stack. */
6708 if (bra == OP_BRAZERO)
6709 {
6710 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
6711 if (extrasize == 2)
6712 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6713 }
6714 else if (bra == OP_BRAMINZERO)
6715 {
6716 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
6717 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6718 }
6719 }
6720 else
6721 {
6722 if (bra == OP_BRA)
6723 {
6724 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
6725 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
6726 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 2) * sizeof(sljit_sw));
6727 }
6728 else
6729 {
6730 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
6731 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 2) * sizeof(sljit_sw));
6732 if (extrasize == 2)
6733 {
6734 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6735 if (bra == OP_BRAMINZERO)
6736 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6737 }
6738 else
6739 {
6740 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
6741 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), bra == OP_BRAZERO ? STR_PTR : SLJIT_IMM, 0);
6742 }
6743 }
6744 }
6745
6746 if (bra == OP_BRAZERO)
6747 {
6748 backtrack->matchingpath = LABEL();
6749 SET_LABEL(jump, backtrack->matchingpath);
6750 }
6751 else if (bra == OP_BRAMINZERO)
6752 {
6753 JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
6754 JUMPHERE(brajump);
6755 if (framesize >= 0)
6756 {
6757 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
6758 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6759 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
6760 }
6761 set_jumps(backtrack->common.topbacktracks, LABEL());
6762 }
6763 }
6764 else
6765 {
6766 /* AssertNot is successful. */
6767 if (framesize < 0)
6768 {
6769 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6770 if (bra != OP_BRA)
6771 {
6772 if (extrasize == 2)
6773 free_stack(common, 1);
6774 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6775 }
6776 else
6777 free_stack(common, extrasize);
6778 }
6779