/[pcre]/code/trunk/pcre_jit_compile.c
ViewVC logotype

Contents of /code/trunk/pcre_jit_compile.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1624 - (show annotations)
Fri Feb 5 13:47:43 2016 UTC (3 years, 10 months ago) by zherczeg
File MIME type: text/plain
File size: 334099 byte(s)
Assertion code generator in JIT has been optimized.
1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Written by Philip Hazel
9 Copyright (c) 1997-2013 University of Cambridge
10
11 The machine code generator part (this module) was written by Zoltan Herczeg
12 Copyright (c) 2010-2013
13
14 -----------------------------------------------------------------------------
15 Redistribution and use in source and binary forms, with or without
16 modification, are permitted provided that the following conditions are met:
17
18 * Redistributions of source code must retain the above copyright notice,
19 this list of conditions and the following disclaimer.
20
21 * Redistributions in binary form must reproduce the above copyright
22 notice, this list of conditions and the following disclaimer in the
23 documentation and/or other materials provided with the distribution.
24
25 * Neither the name of the University of Cambridge nor the names of its
26 contributors may be used to endorse or promote products derived from
27 this software without specific prior written permission.
28
29 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 POSSIBILITY OF SUCH DAMAGE.
40 -----------------------------------------------------------------------------
41 */
42
43 #ifdef HAVE_CONFIG_H
44 #include "config.h"
45 #endif
46
47 #include "pcre_internal.h"
48
49 #if defined SUPPORT_JIT
50
51 /* All-in-one: Since we use the JIT compiler only from here,
52 we just include it. This way we don't need to touch the build
53 system files. */
54
55 #define SLJIT_MALLOC(size, allocator_data) (PUBL(malloc))(size)
56 #define SLJIT_FREE(ptr, allocator_data) (PUBL(free))(ptr)
57 #define SLJIT_CONFIG_AUTO 1
58 #define SLJIT_CONFIG_STATIC 1
59 #define SLJIT_VERBOSE 0
60 #define SLJIT_DEBUG 0
61
62 #include "sljit/sljitLir.c"
63
64 #if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED
65 #error Unsupported architecture
66 #endif
67
68 /* Defines for debugging purposes. */
69
70 /* 1 - Use unoptimized capturing brackets.
71 2 - Enable capture_last_ptr (includes option 1). */
72 /* #define DEBUG_FORCE_UNOPTIMIZED_CBRAS 2 */
73
74 /* 1 - Always have a control head. */
75 /* #define DEBUG_FORCE_CONTROL_HEAD 1 */
76
77 /* Allocate memory for the regex stack on the real machine stack.
78 Fast, but limited size. */
79 #define MACHINE_STACK_SIZE 32768
80
81 /* Growth rate for stack allocated by the OS. Should be the multiply
82 of page size. */
83 #define STACK_GROWTH_RATE 8192
84
85 /* Enable to check that the allocation could destroy temporaries. */
86 #if defined SLJIT_DEBUG && SLJIT_DEBUG
87 #define DESTROY_REGISTERS 1
88 #endif
89
90 /*
91 Short summary about the backtracking mechanism empolyed by the jit code generator:
92
93 The code generator follows the recursive nature of the PERL compatible regular
94 expressions. The basic blocks of regular expressions are condition checkers
95 whose execute different commands depending on the result of the condition check.
96 The relationship between the operators can be horizontal (concatenation) and
97 vertical (sub-expression) (See struct backtrack_common for more details).
98
99 'ab' - 'a' and 'b' regexps are concatenated
100 'a+' - 'a' is the sub-expression of the '+' operator
101
102 The condition checkers are boolean (true/false) checkers. Machine code is generated
103 for the checker itself and for the actions depending on the result of the checker.
104 The 'true' case is called as the matching path (expected path), and the other is called as
105 the 'backtrack' path. Branch instructions are expesive for all CPUs, so we avoid taken
106 branches on the matching path.
107
108 Greedy star operator (*) :
109 Matching path: match happens.
110 Backtrack path: match failed.
111 Non-greedy star operator (*?) :
112 Matching path: no need to perform a match.
113 Backtrack path: match is required.
114
115 The following example shows how the code generated for a capturing bracket
116 with two alternatives. Let A, B, C, D are arbirary regular expressions, and
117 we have the following regular expression:
118
119 A(B|C)D
120
121 The generated code will be the following:
122
123 A matching path
124 '(' matching path (pushing arguments to the stack)
125 B matching path
126 ')' matching path (pushing arguments to the stack)
127 D matching path
128 return with successful match
129
130 D backtrack path
131 ')' backtrack path (If we arrived from "C" jump to the backtrack of "C")
132 B backtrack path
133 C expected path
134 jump to D matching path
135 C backtrack path
136 A backtrack path
137
138 Notice, that the order of backtrack code paths are the opposite of the fast
139 code paths. In this way the topmost value on the stack is always belong
140 to the current backtrack code path. The backtrack path must check
141 whether there is a next alternative. If so, it needs to jump back to
142 the matching path eventually. Otherwise it needs to clear out its own stack
143 frame and continue the execution on the backtrack code paths.
144 */
145
146 /*
147 Saved stack frames:
148
149 Atomic blocks and asserts require reloading the values of private data
150 when the backtrack mechanism performed. Because of OP_RECURSE, the data
151 are not necessarly known in compile time, thus we need a dynamic restore
152 mechanism.
153
154 The stack frames are stored in a chain list, and have the following format:
155 ([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]
156
157 Thus we can restore the private data to a particular point in the stack.
158 */
159
160 typedef struct jit_arguments {
161 /* Pointers first. */
162 struct sljit_stack *stack;
163 const pcre_uchar *str;
164 const pcre_uchar *begin;
165 const pcre_uchar *end;
166 int *offsets;
167 pcre_uchar *uchar_ptr;
168 pcre_uchar *mark_ptr;
169 void *callout_data;
170 /* Everything else after. */
171 pcre_uint32 limit_match;
172 int real_offset_count;
173 int offset_count;
174 pcre_uint8 notbol;
175 pcre_uint8 noteol;
176 pcre_uint8 notempty;
177 pcre_uint8 notempty_atstart;
178 } jit_arguments;
179
180 typedef struct executable_functions {
181 void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES];
182 void *read_only_data_heads[JIT_NUMBER_OF_COMPILE_MODES];
183 sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];
184 PUBL(jit_callback) callback;
185 void *userdata;
186 pcre_uint32 top_bracket;
187 pcre_uint32 limit_match;
188 } executable_functions;
189
190 typedef struct jump_list {
191 struct sljit_jump *jump;
192 struct jump_list *next;
193 } jump_list;
194
195 typedef struct stub_list {
196 struct sljit_jump *start;
197 struct sljit_label *quit;
198 struct stub_list *next;
199 } stub_list;
200
201 typedef struct label_addr_list {
202 struct sljit_label *label;
203 sljit_uw *update_addr;
204 struct label_addr_list *next;
205 } label_addr_list;
206
207 enum frame_types {
208 no_frame = -1,
209 no_stack = -2
210 };
211
212 enum control_types {
213 type_mark = 0,
214 type_then_trap = 1
215 };
216
217 typedef int (SLJIT_CALL *jit_function)(jit_arguments *args);
218
219 /* The following structure is the key data type for the recursive
220 code generator. It is allocated by compile_matchingpath, and contains
221 the arguments for compile_backtrackingpath. Must be the first member
222 of its descendants. */
223 typedef struct backtrack_common {
224 /* Concatenation stack. */
225 struct backtrack_common *prev;
226 jump_list *nextbacktracks;
227 /* Internal stack (for component operators). */
228 struct backtrack_common *top;
229 jump_list *topbacktracks;
230 /* Opcode pointer. */
231 pcre_uchar *cc;
232 } backtrack_common;
233
234 typedef struct assert_backtrack {
235 backtrack_common common;
236 jump_list *condfailed;
237 /* Less than 0 if a frame is not needed. */
238 int framesize;
239 /* Points to our private memory word on the stack. */
240 int private_data_ptr;
241 /* For iterators. */
242 struct sljit_label *matchingpath;
243 } assert_backtrack;
244
245 typedef struct bracket_backtrack {
246 backtrack_common common;
247 /* Where to coninue if an alternative is successfully matched. */
248 struct sljit_label *alternative_matchingpath;
249 /* For rmin and rmax iterators. */
250 struct sljit_label *recursive_matchingpath;
251 /* For greedy ? operator. */
252 struct sljit_label *zero_matchingpath;
253 /* Contains the branches of a failed condition. */
254 union {
255 /* Both for OP_COND, OP_SCOND. */
256 jump_list *condfailed;
257 assert_backtrack *assert;
258 /* For OP_ONCE. Less than 0 if not needed. */
259 int framesize;
260 } u;
261 /* Points to our private memory word on the stack. */
262 int private_data_ptr;
263 } bracket_backtrack;
264
265 typedef struct bracketpos_backtrack {
266 backtrack_common common;
267 /* Points to our private memory word on the stack. */
268 int private_data_ptr;
269 /* Reverting stack is needed. */
270 int framesize;
271 /* Allocated stack size. */
272 int stacksize;
273 } bracketpos_backtrack;
274
275 typedef struct braminzero_backtrack {
276 backtrack_common common;
277 struct sljit_label *matchingpath;
278 } braminzero_backtrack;
279
280 typedef struct iterator_backtrack {
281 backtrack_common common;
282 /* Next iteration. */
283 struct sljit_label *matchingpath;
284 } iterator_backtrack;
285
286 typedef struct recurse_entry {
287 struct recurse_entry *next;
288 /* Contains the function entry. */
289 struct sljit_label *entry;
290 /* Collects the calls until the function is not created. */
291 jump_list *calls;
292 /* Points to the starting opcode. */
293 sljit_sw start;
294 } recurse_entry;
295
296 typedef struct recurse_backtrack {
297 backtrack_common common;
298 BOOL inlined_pattern;
299 } recurse_backtrack;
300
301 #define OP_THEN_TRAP OP_TABLE_LENGTH
302
303 typedef struct then_trap_backtrack {
304 backtrack_common common;
305 /* If then_trap is not NULL, this structure contains the real
306 then_trap for the backtracking path. */
307 struct then_trap_backtrack *then_trap;
308 /* Points to the starting opcode. */
309 sljit_sw start;
310 /* Exit point for the then opcodes of this alternative. */
311 jump_list *quit;
312 /* Frame size of the current alternative. */
313 int framesize;
314 } then_trap_backtrack;
315
316 #define MAX_RANGE_SIZE 4
317
318 typedef struct compiler_common {
319 /* The sljit ceneric compiler. */
320 struct sljit_compiler *compiler;
321 /* First byte code. */
322 pcre_uchar *start;
323 /* Maps private data offset to each opcode. */
324 sljit_si *private_data_ptrs;
325 /* Chain list of read-only data ptrs. */
326 void *read_only_data_head;
327 /* Tells whether the capturing bracket is optimized. */
328 pcre_uint8 *optimized_cbracket;
329 /* Tells whether the starting offset is a target of then. */
330 pcre_uint8 *then_offsets;
331 /* Current position where a THEN must jump. */
332 then_trap_backtrack *then_trap;
333 /* Starting offset of private data for capturing brackets. */
334 int cbra_ptr;
335 /* Output vector starting point. Must be divisible by 2. */
336 int ovector_start;
337 /* Last known position of the requested byte. */
338 int req_char_ptr;
339 /* Head of the last recursion. */
340 int recursive_head_ptr;
341 /* First inspected character for partial matching. */
342 int start_used_ptr;
343 /* Starting pointer for partial soft matches. */
344 int hit_start;
345 /* End pointer of the first line. */
346 int first_line_end;
347 /* Points to the marked string. */
348 int mark_ptr;
349 /* Recursive control verb management chain. */
350 int control_head_ptr;
351 /* Points to the last matched capture block index. */
352 int capture_last_ptr;
353 /* Points to the starting position of the current match. */
354 int start_ptr;
355
356 /* Flipped and lower case tables. */
357 const pcre_uint8 *fcc;
358 sljit_sw lcc;
359 /* Mode can be PCRE_STUDY_JIT_COMPILE and others. */
360 int mode;
361 /* TRUE, when minlength is greater than 0. */
362 BOOL might_be_empty;
363 /* \K is found in the pattern. */
364 BOOL has_set_som;
365 /* (*SKIP:arg) is found in the pattern. */
366 BOOL has_skip_arg;
367 /* (*THEN) is found in the pattern. */
368 BOOL has_then;
369 /* Needs to know the start position anytime. */
370 BOOL needs_start_ptr;
371 /* Currently in recurse or negative assert. */
372 BOOL local_exit;
373 /* Currently in a positive assert. */
374 BOOL positive_assert;
375 /* Newline control. */
376 int nltype;
377 pcre_uint32 nlmax;
378 pcre_uint32 nlmin;
379 int newline;
380 int bsr_nltype;
381 pcre_uint32 bsr_nlmax;
382 pcre_uint32 bsr_nlmin;
383 /* Dollar endonly. */
384 int endonly;
385 /* Tables. */
386 sljit_sw ctypes;
387 /* Named capturing brackets. */
388 pcre_uchar *name_table;
389 sljit_sw name_count;
390 sljit_sw name_entry_size;
391
392 /* Labels and jump lists. */
393 struct sljit_label *partialmatchlabel;
394 struct sljit_label *quit_label;
395 struct sljit_label *forced_quit_label;
396 struct sljit_label *accept_label;
397 struct sljit_label *ff_newline_shortcut;
398 stub_list *stubs;
399 label_addr_list *label_addrs;
400 recurse_entry *entries;
401 recurse_entry *currententry;
402 jump_list *partialmatch;
403 jump_list *quit;
404 jump_list *positive_assert_quit;
405 jump_list *forced_quit;
406 jump_list *accept;
407 jump_list *calllimit;
408 jump_list *stackalloc;
409 jump_list *revertframes;
410 jump_list *wordboundary;
411 jump_list *anynewline;
412 jump_list *hspace;
413 jump_list *vspace;
414 jump_list *casefulcmp;
415 jump_list *caselesscmp;
416 jump_list *reset_match;
417 BOOL jscript_compat;
418 #ifdef SUPPORT_UTF
419 BOOL utf;
420 #ifdef SUPPORT_UCP
421 BOOL use_ucp;
422 #endif
423 #ifdef COMPILE_PCRE8
424 jump_list *utfreadchar;
425 jump_list *utfreadchar16;
426 jump_list *utfreadtype8;
427 #endif
428 #endif /* SUPPORT_UTF */
429 #ifdef SUPPORT_UCP
430 jump_list *getucd;
431 #endif
432 } compiler_common;
433
434 /* For byte_sequence_compare. */
435
436 typedef struct compare_context {
437 int length;
438 int sourcereg;
439 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
440 int ucharptr;
441 union {
442 sljit_si asint;
443 sljit_uh asushort;
444 #if defined COMPILE_PCRE8
445 sljit_ub asbyte;
446 sljit_ub asuchars[4];
447 #elif defined COMPILE_PCRE16
448 sljit_uh asuchars[2];
449 #elif defined COMPILE_PCRE32
450 sljit_ui asuchars[1];
451 #endif
452 } c;
453 union {
454 sljit_si asint;
455 sljit_uh asushort;
456 #if defined COMPILE_PCRE8
457 sljit_ub asbyte;
458 sljit_ub asuchars[4];
459 #elif defined COMPILE_PCRE16
460 sljit_uh asuchars[2];
461 #elif defined COMPILE_PCRE32
462 sljit_ui asuchars[1];
463 #endif
464 } oc;
465 #endif
466 } compare_context;
467
468 /* Undefine sljit macros. */
469 #undef CMP
470
471 /* Used for accessing the elements of the stack. */
472 #define STACK(i) ((-(i) - 1) * (int)sizeof(sljit_sw))
473
474 #define TMP1 SLJIT_R0
475 #define TMP2 SLJIT_R2
476 #define TMP3 SLJIT_R3
477 #define STR_PTR SLJIT_S0
478 #define STR_END SLJIT_S1
479 #define STACK_TOP SLJIT_R1
480 #define STACK_LIMIT SLJIT_S2
481 #define COUNT_MATCH SLJIT_S3
482 #define ARGUMENTS SLJIT_S4
483 #define RETURN_ADDR SLJIT_R4
484
485 /* Local space layout. */
486 /* These two locals can be used by the current opcode. */
487 #define LOCALS0 (0 * sizeof(sljit_sw))
488 #define LOCALS1 (1 * sizeof(sljit_sw))
489 /* Two local variables for possessive quantifiers (char1 cannot use them). */
490 #define POSSESSIVE0 (2 * sizeof(sljit_sw))
491 #define POSSESSIVE1 (3 * sizeof(sljit_sw))
492 /* Max limit of recursions. */
493 #define LIMIT_MATCH (4 * sizeof(sljit_sw))
494 /* The output vector is stored on the stack, and contains pointers
495 to characters. The vector data is divided into two groups: the first
496 group contains the start / end character pointers, and the second is
497 the start pointers when the end of the capturing group has not yet reached. */
498 #define OVECTOR_START (common->ovector_start)
499 #define OVECTOR(i) (OVECTOR_START + (i) * (sljit_sw)sizeof(sljit_sw))
500 #define OVECTOR_PRIV(i) (common->cbra_ptr + (i) * (sljit_sw)sizeof(sljit_sw))
501 #define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start])
502
503 #if defined COMPILE_PCRE8
504 #define MOV_UCHAR SLJIT_MOV_UB
505 #define MOVU_UCHAR SLJIT_MOVU_UB
506 #elif defined COMPILE_PCRE16
507 #define MOV_UCHAR SLJIT_MOV_UH
508 #define MOVU_UCHAR SLJIT_MOVU_UH
509 #elif defined COMPILE_PCRE32
510 #define MOV_UCHAR SLJIT_MOV_UI
511 #define MOVU_UCHAR SLJIT_MOVU_UI
512 #else
513 #error Unsupported compiling mode
514 #endif
515
516 /* Shortcuts. */
517 #define DEFINE_COMPILER \
518 struct sljit_compiler *compiler = common->compiler
519 #define OP1(op, dst, dstw, src, srcw) \
520 sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw))
521 #define OP2(op, dst, dstw, src1, src1w, src2, src2w) \
522 sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w))
523 #define LABEL() \
524 sljit_emit_label(compiler)
525 #define JUMP(type) \
526 sljit_emit_jump(compiler, (type))
527 #define JUMPTO(type, label) \
528 sljit_set_label(sljit_emit_jump(compiler, (type)), (label))
529 #define JUMPHERE(jump) \
530 sljit_set_label((jump), sljit_emit_label(compiler))
531 #define SET_LABEL(jump, label) \
532 sljit_set_label((jump), (label))
533 #define CMP(type, src1, src1w, src2, src2w) \
534 sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))
535 #define CMPTO(type, src1, src1w, src2, src2w, label) \
536 sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))
537 #define OP_FLAGS(op, dst, dstw, src, srcw, type) \
538 sljit_emit_op_flags(compiler, (op), (dst), (dstw), (src), (srcw), (type))
539 #define GET_LOCAL_BASE(dst, dstw, offset) \
540 sljit_get_local_base(compiler, (dst), (dstw), (offset))
541
542 #define READ_CHAR_MAX 0x7fffffff
543
544 static pcre_uchar *bracketend(pcre_uchar *cc)
545 {
546 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
547 do cc += GET(cc, 1); while (*cc == OP_ALT);
548 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
549 cc += 1 + LINK_SIZE;
550 return cc;
551 }
552
553 static int no_alternatives(pcre_uchar *cc)
554 {
555 int count = 0;
556 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
557 do
558 {
559 cc += GET(cc, 1);
560 count++;
561 }
562 while (*cc == OP_ALT);
563 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
564 return count;
565 }
566
567 static int ones_in_half_byte[16] = {
568 /* 0 */ 0, 1, 1, 2, /* 4 */ 1, 2, 2, 3,
569 /* 8 */ 1, 2, 2, 3, /* 12 */ 2, 3, 3, 4
570 };
571
572 /* Functions whose might need modification for all new supported opcodes:
573 next_opcode
574 check_opcode_types
575 set_private_data_ptrs
576 get_framesize
577 init_frame
578 get_private_data_copy_length
579 copy_private_data
580 compile_matchingpath
581 compile_backtrackingpath
582 */
583
584 static pcre_uchar *next_opcode(compiler_common *common, pcre_uchar *cc)
585 {
586 SLJIT_UNUSED_ARG(common);
587 switch(*cc)
588 {
589 case OP_SOD:
590 case OP_SOM:
591 case OP_SET_SOM:
592 case OP_NOT_WORD_BOUNDARY:
593 case OP_WORD_BOUNDARY:
594 case OP_NOT_DIGIT:
595 case OP_DIGIT:
596 case OP_NOT_WHITESPACE:
597 case OP_WHITESPACE:
598 case OP_NOT_WORDCHAR:
599 case OP_WORDCHAR:
600 case OP_ANY:
601 case OP_ALLANY:
602 case OP_NOTPROP:
603 case OP_PROP:
604 case OP_ANYNL:
605 case OP_NOT_HSPACE:
606 case OP_HSPACE:
607 case OP_NOT_VSPACE:
608 case OP_VSPACE:
609 case OP_EXTUNI:
610 case OP_EODN:
611 case OP_EOD:
612 case OP_CIRC:
613 case OP_CIRCM:
614 case OP_DOLL:
615 case OP_DOLLM:
616 case OP_CRSTAR:
617 case OP_CRMINSTAR:
618 case OP_CRPLUS:
619 case OP_CRMINPLUS:
620 case OP_CRQUERY:
621 case OP_CRMINQUERY:
622 case OP_CRRANGE:
623 case OP_CRMINRANGE:
624 case OP_CRPOSSTAR:
625 case OP_CRPOSPLUS:
626 case OP_CRPOSQUERY:
627 case OP_CRPOSRANGE:
628 case OP_CLASS:
629 case OP_NCLASS:
630 case OP_REF:
631 case OP_REFI:
632 case OP_DNREF:
633 case OP_DNREFI:
634 case OP_RECURSE:
635 case OP_CALLOUT:
636 case OP_ALT:
637 case OP_KET:
638 case OP_KETRMAX:
639 case OP_KETRMIN:
640 case OP_KETRPOS:
641 case OP_REVERSE:
642 case OP_ASSERT:
643 case OP_ASSERT_NOT:
644 case OP_ASSERTBACK:
645 case OP_ASSERTBACK_NOT:
646 case OP_ONCE:
647 case OP_ONCE_NC:
648 case OP_BRA:
649 case OP_BRAPOS:
650 case OP_CBRA:
651 case OP_CBRAPOS:
652 case OP_COND:
653 case OP_SBRA:
654 case OP_SBRAPOS:
655 case OP_SCBRA:
656 case OP_SCBRAPOS:
657 case OP_SCOND:
658 case OP_CREF:
659 case OP_DNCREF:
660 case OP_RREF:
661 case OP_DNRREF:
662 case OP_DEF:
663 case OP_BRAZERO:
664 case OP_BRAMINZERO:
665 case OP_BRAPOSZERO:
666 case OP_PRUNE:
667 case OP_SKIP:
668 case OP_THEN:
669 case OP_COMMIT:
670 case OP_FAIL:
671 case OP_ACCEPT:
672 case OP_ASSERT_ACCEPT:
673 case OP_CLOSE:
674 case OP_SKIPZERO:
675 return cc + PRIV(OP_lengths)[*cc];
676
677 case OP_CHAR:
678 case OP_CHARI:
679 case OP_NOT:
680 case OP_NOTI:
681 case OP_STAR:
682 case OP_MINSTAR:
683 case OP_PLUS:
684 case OP_MINPLUS:
685 case OP_QUERY:
686 case OP_MINQUERY:
687 case OP_UPTO:
688 case OP_MINUPTO:
689 case OP_EXACT:
690 case OP_POSSTAR:
691 case OP_POSPLUS:
692 case OP_POSQUERY:
693 case OP_POSUPTO:
694 case OP_STARI:
695 case OP_MINSTARI:
696 case OP_PLUSI:
697 case OP_MINPLUSI:
698 case OP_QUERYI:
699 case OP_MINQUERYI:
700 case OP_UPTOI:
701 case OP_MINUPTOI:
702 case OP_EXACTI:
703 case OP_POSSTARI:
704 case OP_POSPLUSI:
705 case OP_POSQUERYI:
706 case OP_POSUPTOI:
707 case OP_NOTSTAR:
708 case OP_NOTMINSTAR:
709 case OP_NOTPLUS:
710 case OP_NOTMINPLUS:
711 case OP_NOTQUERY:
712 case OP_NOTMINQUERY:
713 case OP_NOTUPTO:
714 case OP_NOTMINUPTO:
715 case OP_NOTEXACT:
716 case OP_NOTPOSSTAR:
717 case OP_NOTPOSPLUS:
718 case OP_NOTPOSQUERY:
719 case OP_NOTPOSUPTO:
720 case OP_NOTSTARI:
721 case OP_NOTMINSTARI:
722 case OP_NOTPLUSI:
723 case OP_NOTMINPLUSI:
724 case OP_NOTQUERYI:
725 case OP_NOTMINQUERYI:
726 case OP_NOTUPTOI:
727 case OP_NOTMINUPTOI:
728 case OP_NOTEXACTI:
729 case OP_NOTPOSSTARI:
730 case OP_NOTPOSPLUSI:
731 case OP_NOTPOSQUERYI:
732 case OP_NOTPOSUPTOI:
733 cc += PRIV(OP_lengths)[*cc];
734 #ifdef SUPPORT_UTF
735 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
736 #endif
737 return cc;
738
739 /* Special cases. */
740 case OP_TYPESTAR:
741 case OP_TYPEMINSTAR:
742 case OP_TYPEPLUS:
743 case OP_TYPEMINPLUS:
744 case OP_TYPEQUERY:
745 case OP_TYPEMINQUERY:
746 case OP_TYPEUPTO:
747 case OP_TYPEMINUPTO:
748 case OP_TYPEEXACT:
749 case OP_TYPEPOSSTAR:
750 case OP_TYPEPOSPLUS:
751 case OP_TYPEPOSQUERY:
752 case OP_TYPEPOSUPTO:
753 return cc + PRIV(OP_lengths)[*cc] - 1;
754
755 case OP_ANYBYTE:
756 #ifdef SUPPORT_UTF
757 if (common->utf) return NULL;
758 #endif
759 return cc + 1;
760
761 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
762 case OP_XCLASS:
763 return cc + GET(cc, 1);
764 #endif
765
766 case OP_MARK:
767 case OP_PRUNE_ARG:
768 case OP_SKIP_ARG:
769 case OP_THEN_ARG:
770 return cc + 1 + 2 + cc[1];
771
772 default:
773 /* All opcodes are supported now! */
774 SLJIT_ASSERT_STOP();
775 return NULL;
776 }
777 }
778
779 static BOOL check_opcode_types(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
780 {
781 int count;
782 pcre_uchar *slot;
783
784 /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
785 while (cc < ccend)
786 {
787 switch(*cc)
788 {
789 case OP_SET_SOM:
790 common->has_set_som = TRUE;
791 common->might_be_empty = TRUE;
792 cc += 1;
793 break;
794
795 case OP_REF:
796 case OP_REFI:
797 common->optimized_cbracket[GET2(cc, 1)] = 0;
798 cc += 1 + IMM2_SIZE;
799 break;
800
801 case OP_CBRAPOS:
802 case OP_SCBRAPOS:
803 common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] = 0;
804 cc += 1 + LINK_SIZE + IMM2_SIZE;
805 break;
806
807 case OP_COND:
808 case OP_SCOND:
809 /* Only AUTO_CALLOUT can insert this opcode. We do
810 not intend to support this case. */
811 if (cc[1 + LINK_SIZE] == OP_CALLOUT)
812 return FALSE;
813 cc += 1 + LINK_SIZE;
814 break;
815
816 case OP_CREF:
817 common->optimized_cbracket[GET2(cc, 1)] = 0;
818 cc += 1 + IMM2_SIZE;
819 break;
820
821 case OP_DNREF:
822 case OP_DNREFI:
823 case OP_DNCREF:
824 count = GET2(cc, 1 + IMM2_SIZE);
825 slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
826 while (count-- > 0)
827 {
828 common->optimized_cbracket[GET2(slot, 0)] = 0;
829 slot += common->name_entry_size;
830 }
831 cc += 1 + 2 * IMM2_SIZE;
832 break;
833
834 case OP_RECURSE:
835 /* Set its value only once. */
836 if (common->recursive_head_ptr == 0)
837 {
838 common->recursive_head_ptr = common->ovector_start;
839 common->ovector_start += sizeof(sljit_sw);
840 }
841 cc += 1 + LINK_SIZE;
842 break;
843
844 case OP_CALLOUT:
845 if (common->capture_last_ptr == 0)
846 {
847 common->capture_last_ptr = common->ovector_start;
848 common->ovector_start += sizeof(sljit_sw);
849 }
850 cc += 2 + 2 * LINK_SIZE;
851 break;
852
853 case OP_THEN_ARG:
854 common->has_then = TRUE;
855 common->control_head_ptr = 1;
856 /* Fall through. */
857
858 case OP_PRUNE_ARG:
859 common->needs_start_ptr = TRUE;
860 /* Fall through. */
861
862 case OP_MARK:
863 if (common->mark_ptr == 0)
864 {
865 common->mark_ptr = common->ovector_start;
866 common->ovector_start += sizeof(sljit_sw);
867 }
868 cc += 1 + 2 + cc[1];
869 break;
870
871 case OP_THEN:
872 common->has_then = TRUE;
873 common->control_head_ptr = 1;
874 /* Fall through. */
875
876 case OP_PRUNE:
877 case OP_SKIP:
878 common->needs_start_ptr = TRUE;
879 cc += 1;
880 break;
881
882 case OP_SKIP_ARG:
883 common->control_head_ptr = 1;
884 common->has_skip_arg = TRUE;
885 cc += 1 + 2 + cc[1];
886 break;
887
888 default:
889 cc = next_opcode(common, cc);
890 if (cc == NULL)
891 return FALSE;
892 break;
893 }
894 }
895 return TRUE;
896 }
897
898 static int get_class_iterator_size(pcre_uchar *cc)
899 {
900 switch(*cc)
901 {
902 case OP_CRSTAR:
903 case OP_CRPLUS:
904 return 2;
905
906 case OP_CRMINSTAR:
907 case OP_CRMINPLUS:
908 case OP_CRQUERY:
909 case OP_CRMINQUERY:
910 return 1;
911
912 case OP_CRRANGE:
913 case OP_CRMINRANGE:
914 if (GET2(cc, 1) == GET2(cc, 1 + IMM2_SIZE))
915 return 0;
916 return 2;
917
918 default:
919 return 0;
920 }
921 }
922
923 static BOOL detect_repeat(compiler_common *common, pcre_uchar *begin)
924 {
925 pcre_uchar *end = bracketend(begin);
926 pcre_uchar *next;
927 pcre_uchar *next_end;
928 pcre_uchar *max_end;
929 pcre_uchar type;
930 sljit_sw length = end - begin;
931 int min, max, i;
932
933 /* Detect fixed iterations first. */
934 if (end[-(1 + LINK_SIZE)] != OP_KET)
935 return FALSE;
936
937 /* Already detected repeat. */
938 if (common->private_data_ptrs[end - common->start - LINK_SIZE] != 0)
939 return TRUE;
940
941 next = end;
942 min = 1;
943 while (1)
944 {
945 if (*next != *begin)
946 break;
947 next_end = bracketend(next);
948 if (next_end - next != length || memcmp(begin, next, IN_UCHARS(length)) != 0)
949 break;
950 next = next_end;
951 min++;
952 }
953
954 if (min == 2)
955 return FALSE;
956
957 max = 0;
958 max_end = next;
959 if (*next == OP_BRAZERO || *next == OP_BRAMINZERO)
960 {
961 type = *next;
962 while (1)
963 {
964 if (next[0] != type || next[1] != OP_BRA || next[2 + LINK_SIZE] != *begin)
965 break;
966 next_end = bracketend(next + 2 + LINK_SIZE);
967 if (next_end - next != (length + 2 + LINK_SIZE) || memcmp(begin, next + 2 + LINK_SIZE, IN_UCHARS(length)) != 0)
968 break;
969 next = next_end;
970 max++;
971 }
972
973 if (next[0] == type && next[1] == *begin && max >= 1)
974 {
975 next_end = bracketend(next + 1);
976 if (next_end - next == (length + 1) && memcmp(begin, next + 1, IN_UCHARS(length)) == 0)
977 {
978 for (i = 0; i < max; i++, next_end += 1 + LINK_SIZE)
979 if (*next_end != OP_KET)
980 break;
981
982 if (i == max)
983 {
984 common->private_data_ptrs[max_end - common->start - LINK_SIZE] = next_end - max_end;
985 common->private_data_ptrs[max_end - common->start - LINK_SIZE + 1] = (type == OP_BRAZERO) ? OP_UPTO : OP_MINUPTO;
986 /* +2 the original and the last. */
987 common->private_data_ptrs[max_end - common->start - LINK_SIZE + 2] = max + 2;
988 if (min == 1)
989 return TRUE;
990 min--;
991 max_end -= (1 + LINK_SIZE) + GET(max_end, -LINK_SIZE);
992 }
993 }
994 }
995 }
996
997 if (min >= 3)
998 {
999 common->private_data_ptrs[end - common->start - LINK_SIZE] = max_end - end;
1000 common->private_data_ptrs[end - common->start - LINK_SIZE + 1] = OP_EXACT;
1001 common->private_data_ptrs[end - common->start - LINK_SIZE + 2] = min;
1002 return TRUE;
1003 }
1004
1005 return FALSE;
1006 }
1007
1008 #define CASE_ITERATOR_PRIVATE_DATA_1 \
1009 case OP_MINSTAR: \
1010 case OP_MINPLUS: \
1011 case OP_QUERY: \
1012 case OP_MINQUERY: \
1013 case OP_MINSTARI: \
1014 case OP_MINPLUSI: \
1015 case OP_QUERYI: \
1016 case OP_MINQUERYI: \
1017 case OP_NOTMINSTAR: \
1018 case OP_NOTMINPLUS: \
1019 case OP_NOTQUERY: \
1020 case OP_NOTMINQUERY: \
1021 case OP_NOTMINSTARI: \
1022 case OP_NOTMINPLUSI: \
1023 case OP_NOTQUERYI: \
1024 case OP_NOTMINQUERYI:
1025
1026 #define CASE_ITERATOR_PRIVATE_DATA_2A \
1027 case OP_STAR: \
1028 case OP_PLUS: \
1029 case OP_STARI: \
1030 case OP_PLUSI: \
1031 case OP_NOTSTAR: \
1032 case OP_NOTPLUS: \
1033 case OP_NOTSTARI: \
1034 case OP_NOTPLUSI:
1035
1036 #define CASE_ITERATOR_PRIVATE_DATA_2B \
1037 case OP_UPTO: \
1038 case OP_MINUPTO: \
1039 case OP_UPTOI: \
1040 case OP_MINUPTOI: \
1041 case OP_NOTUPTO: \
1042 case OP_NOTMINUPTO: \
1043 case OP_NOTUPTOI: \
1044 case OP_NOTMINUPTOI:
1045
1046 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_1 \
1047 case OP_TYPEMINSTAR: \
1048 case OP_TYPEMINPLUS: \
1049 case OP_TYPEQUERY: \
1050 case OP_TYPEMINQUERY:
1051
1052 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2A \
1053 case OP_TYPESTAR: \
1054 case OP_TYPEPLUS:
1055
1056 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2B \
1057 case OP_TYPEUPTO: \
1058 case OP_TYPEMINUPTO:
1059
1060 static void set_private_data_ptrs(compiler_common *common, int *private_data_start, pcre_uchar *ccend)
1061 {
1062 pcre_uchar *cc = common->start;
1063 pcre_uchar *alternative;
1064 pcre_uchar *end = NULL;
1065 int private_data_ptr = *private_data_start;
1066 int space, size, bracketlen;
1067 BOOL repeat_check = TRUE;
1068
1069 while (cc < ccend)
1070 {
1071 space = 0;
1072 size = 0;
1073 bracketlen = 0;
1074 if (private_data_ptr > SLJIT_MAX_LOCAL_SIZE)
1075 break;
1076
1077 if (repeat_check && (*cc == OP_ONCE || *cc == OP_ONCE_NC || *cc == OP_BRA || *cc == OP_CBRA || *cc == OP_COND))
1078 {
1079 if (detect_repeat(common, cc))
1080 {
1081 /* These brackets are converted to repeats, so no global
1082 based single character repeat is allowed. */
1083 if (cc >= end)
1084 end = bracketend(cc);
1085 }
1086 }
1087 repeat_check = TRUE;
1088
1089 switch(*cc)
1090 {
1091 case OP_KET:
1092 if (common->private_data_ptrs[cc + 1 - common->start] != 0)
1093 {
1094 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1095 private_data_ptr += sizeof(sljit_sw);
1096 cc += common->private_data_ptrs[cc + 1 - common->start];
1097 }
1098 cc += 1 + LINK_SIZE;
1099 break;
1100
1101 case OP_ASSERT:
1102 case OP_ASSERT_NOT:
1103 case OP_ASSERTBACK:
1104 case OP_ASSERTBACK_NOT:
1105 case OP_ONCE:
1106 case OP_ONCE_NC:
1107 case OP_BRAPOS:
1108 case OP_SBRA:
1109 case OP_SBRAPOS:
1110 case OP_SCOND:
1111 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1112 private_data_ptr += sizeof(sljit_sw);
1113 bracketlen = 1 + LINK_SIZE;
1114 break;
1115
1116 case OP_CBRAPOS:
1117 case OP_SCBRAPOS:
1118 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1119 private_data_ptr += sizeof(sljit_sw);
1120 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1121 break;
1122
1123 case OP_COND:
1124 /* Might be a hidden SCOND. */
1125 alternative = cc + GET(cc, 1);
1126 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1127 {
1128 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1129 private_data_ptr += sizeof(sljit_sw);
1130 }
1131 bracketlen = 1 + LINK_SIZE;
1132 break;
1133
1134 case OP_BRA:
1135 bracketlen = 1 + LINK_SIZE;
1136 break;
1137
1138 case OP_CBRA:
1139 case OP_SCBRA:
1140 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1141 break;
1142
1143 case OP_BRAZERO:
1144 case OP_BRAMINZERO:
1145 case OP_BRAPOSZERO:
1146 repeat_check = FALSE;
1147 size = 1;
1148 break;
1149
1150 CASE_ITERATOR_PRIVATE_DATA_1
1151 space = 1;
1152 size = -2;
1153 break;
1154
1155 CASE_ITERATOR_PRIVATE_DATA_2A
1156 space = 2;
1157 size = -2;
1158 break;
1159
1160 CASE_ITERATOR_PRIVATE_DATA_2B
1161 space = 2;
1162 size = -(2 + IMM2_SIZE);
1163 break;
1164
1165 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1166 space = 1;
1167 size = 1;
1168 break;
1169
1170 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1171 if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
1172 space = 2;
1173 size = 1;
1174 break;
1175
1176 case OP_TYPEUPTO:
1177 if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
1178 space = 2;
1179 size = 1 + IMM2_SIZE;
1180 break;
1181
1182 case OP_TYPEMINUPTO:
1183 space = 2;
1184 size = 1 + IMM2_SIZE;
1185 break;
1186
1187 case OP_CLASS:
1188 case OP_NCLASS:
1189 size += 1 + 32 / sizeof(pcre_uchar);
1190 space = get_class_iterator_size(cc + size);
1191 break;
1192
1193 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1194 case OP_XCLASS:
1195 size = GET(cc, 1);
1196 space = get_class_iterator_size(cc + size);
1197 break;
1198 #endif
1199
1200 default:
1201 cc = next_opcode(common, cc);
1202 SLJIT_ASSERT(cc != NULL);
1203 break;
1204 }
1205
1206 /* Character iterators, which are not inside a repeated bracket,
1207 gets a private slot instead of allocating it on the stack. */
1208 if (space > 0 && cc >= end)
1209 {
1210 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1211 private_data_ptr += sizeof(sljit_sw) * space;
1212 }
1213
1214 if (size != 0)
1215 {
1216 if (size < 0)
1217 {
1218 cc += -size;
1219 #ifdef SUPPORT_UTF
1220 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1221 #endif
1222 }
1223 else
1224 cc += size;
1225 }
1226
1227 if (bracketlen > 0)
1228 {
1229 if (cc >= end)
1230 {
1231 end = bracketend(cc);
1232 if (end[-1 - LINK_SIZE] == OP_KET)
1233 end = NULL;
1234 }
1235 cc += bracketlen;
1236 }
1237 }
1238 *private_data_start = private_data_ptr;
1239 }
1240
1241 /* Returns with a frame_types (always < 0) if no need for frame. */
1242 static int get_framesize(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL recursive, BOOL *needs_control_head)
1243 {
1244 int length = 0;
1245 int possessive = 0;
1246 BOOL stack_restore = FALSE;
1247 BOOL setsom_found = recursive;
1248 BOOL setmark_found = recursive;
1249 /* The last capture is a local variable even for recursions. */
1250 BOOL capture_last_found = FALSE;
1251
1252 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
1253 SLJIT_ASSERT(common->control_head_ptr != 0);
1254 *needs_control_head = TRUE;
1255 #else
1256 *needs_control_head = FALSE;
1257 #endif
1258
1259 if (ccend == NULL)
1260 {
1261 ccend = bracketend(cc) - (1 + LINK_SIZE);
1262 if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))
1263 {
1264 possessive = length = (common->capture_last_ptr != 0) ? 5 : 3;
1265 /* This is correct regardless of common->capture_last_ptr. */
1266 capture_last_found = TRUE;
1267 }
1268 cc = next_opcode(common, cc);
1269 }
1270
1271 SLJIT_ASSERT(cc != NULL);
1272 while (cc < ccend)
1273 switch(*cc)
1274 {
1275 case OP_SET_SOM:
1276 SLJIT_ASSERT(common->has_set_som);
1277 stack_restore = TRUE;
1278 if (!setsom_found)
1279 {
1280 length += 2;
1281 setsom_found = TRUE;
1282 }
1283 cc += 1;
1284 break;
1285
1286 case OP_MARK:
1287 case OP_PRUNE_ARG:
1288 case OP_THEN_ARG:
1289 SLJIT_ASSERT(common->mark_ptr != 0);
1290 stack_restore = TRUE;
1291 if (!setmark_found)
1292 {
1293 length += 2;
1294 setmark_found = TRUE;
1295 }
1296 if (common->control_head_ptr != 0)
1297 *needs_control_head = TRUE;
1298 cc += 1 + 2 + cc[1];
1299 break;
1300
1301 case OP_RECURSE:
1302 stack_restore = TRUE;
1303 if (common->has_set_som && !setsom_found)
1304 {
1305 length += 2;
1306 setsom_found = TRUE;
1307 }
1308 if (common->mark_ptr != 0 && !setmark_found)
1309 {
1310 length += 2;
1311 setmark_found = TRUE;
1312 }
1313 if (common->capture_last_ptr != 0 && !capture_last_found)
1314 {
1315 length += 2;
1316 capture_last_found = TRUE;
1317 }
1318 cc += 1 + LINK_SIZE;
1319 break;
1320
1321 case OP_CBRA:
1322 case OP_CBRAPOS:
1323 case OP_SCBRA:
1324 case OP_SCBRAPOS:
1325 stack_restore = TRUE;
1326 if (common->capture_last_ptr != 0 && !capture_last_found)
1327 {
1328 length += 2;
1329 capture_last_found = TRUE;
1330 }
1331 length += 3;
1332 cc += 1 + LINK_SIZE + IMM2_SIZE;
1333 break;
1334
1335 case OP_THEN:
1336 stack_restore = TRUE;
1337 if (common->control_head_ptr != 0)
1338 *needs_control_head = TRUE;
1339 cc ++;
1340 break;
1341
1342 default:
1343 stack_restore = TRUE;
1344 /* Fall through. */
1345
1346 case OP_NOT_WORD_BOUNDARY:
1347 case OP_WORD_BOUNDARY:
1348 case OP_NOT_DIGIT:
1349 case OP_DIGIT:
1350 case OP_NOT_WHITESPACE:
1351 case OP_WHITESPACE:
1352 case OP_NOT_WORDCHAR:
1353 case OP_WORDCHAR:
1354 case OP_ANY:
1355 case OP_ALLANY:
1356 case OP_ANYBYTE:
1357 case OP_NOTPROP:
1358 case OP_PROP:
1359 case OP_ANYNL:
1360 case OP_NOT_HSPACE:
1361 case OP_HSPACE:
1362 case OP_NOT_VSPACE:
1363 case OP_VSPACE:
1364 case OP_EXTUNI:
1365 case OP_EODN:
1366 case OP_EOD:
1367 case OP_CIRC:
1368 case OP_CIRCM:
1369 case OP_DOLL:
1370 case OP_DOLLM:
1371 case OP_CHAR:
1372 case OP_CHARI:
1373 case OP_NOT:
1374 case OP_NOTI:
1375
1376 case OP_EXACT:
1377 case OP_POSSTAR:
1378 case OP_POSPLUS:
1379 case OP_POSQUERY:
1380 case OP_POSUPTO:
1381
1382 case OP_EXACTI:
1383 case OP_POSSTARI:
1384 case OP_POSPLUSI:
1385 case OP_POSQUERYI:
1386 case OP_POSUPTOI:
1387
1388 case OP_NOTEXACT:
1389 case OP_NOTPOSSTAR:
1390 case OP_NOTPOSPLUS:
1391 case OP_NOTPOSQUERY:
1392 case OP_NOTPOSUPTO:
1393
1394 case OP_NOTEXACTI:
1395 case OP_NOTPOSSTARI:
1396 case OP_NOTPOSPLUSI:
1397 case OP_NOTPOSQUERYI:
1398 case OP_NOTPOSUPTOI:
1399
1400 case OP_TYPEEXACT:
1401 case OP_TYPEPOSSTAR:
1402 case OP_TYPEPOSPLUS:
1403 case OP_TYPEPOSQUERY:
1404 case OP_TYPEPOSUPTO:
1405
1406 case OP_CLASS:
1407 case OP_NCLASS:
1408 case OP_XCLASS:
1409 case OP_CALLOUT:
1410
1411 cc = next_opcode(common, cc);
1412 SLJIT_ASSERT(cc != NULL);
1413 break;
1414 }
1415
1416 /* Possessive quantifiers can use a special case. */
1417 if (SLJIT_UNLIKELY(possessive == length))
1418 return stack_restore ? no_frame : no_stack;
1419
1420 if (length > 0)
1421 return length + 1;
1422 return stack_restore ? no_frame : no_stack;
1423 }
1424
1425 static void init_frame(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, int stackpos, int stacktop, BOOL recursive)
1426 {
1427 DEFINE_COMPILER;
1428 BOOL setsom_found = recursive;
1429 BOOL setmark_found = recursive;
1430 /* The last capture is a local variable even for recursions. */
1431 BOOL capture_last_found = FALSE;
1432 int offset;
1433
1434 /* >= 1 + shortest item size (2) */
1435 SLJIT_UNUSED_ARG(stacktop);
1436 SLJIT_ASSERT(stackpos >= stacktop + 2);
1437
1438 stackpos = STACK(stackpos);
1439 if (ccend == NULL)
1440 {
1441 ccend = bracketend(cc) - (1 + LINK_SIZE);
1442 if (recursive || (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS))
1443 cc = next_opcode(common, cc);
1444 }
1445
1446 SLJIT_ASSERT(cc != NULL);
1447 while (cc < ccend)
1448 switch(*cc)
1449 {
1450 case OP_SET_SOM:
1451 SLJIT_ASSERT(common->has_set_som);
1452 if (!setsom_found)
1453 {
1454 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
1455 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1456 stackpos += (int)sizeof(sljit_sw);
1457 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1458 stackpos += (int)sizeof(sljit_sw);
1459 setsom_found = TRUE;
1460 }
1461 cc += 1;
1462 break;
1463
1464 case OP_MARK:
1465 case OP_PRUNE_ARG:
1466 case OP_THEN_ARG:
1467 SLJIT_ASSERT(common->mark_ptr != 0);
1468 if (!setmark_found)
1469 {
1470 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
1471 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1472 stackpos += (int)sizeof(sljit_sw);
1473 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1474 stackpos += (int)sizeof(sljit_sw);
1475 setmark_found = TRUE;
1476 }
1477 cc += 1 + 2 + cc[1];
1478 break;
1479
1480 case OP_RECURSE:
1481 if (common->has_set_som && !setsom_found)
1482 {
1483 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
1484 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1485 stackpos += (int)sizeof(sljit_sw);
1486 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1487 stackpos += (int)sizeof(sljit_sw);
1488 setsom_found = TRUE;
1489 }
1490 if (common->mark_ptr != 0 && !setmark_found)
1491 {
1492 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
1493 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1494 stackpos += (int)sizeof(sljit_sw);
1495 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1496 stackpos += (int)sizeof(sljit_sw);
1497 setmark_found = TRUE;
1498 }
1499 if (common->capture_last_ptr != 0 && !capture_last_found)
1500 {
1501 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
1502 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1503 stackpos += (int)sizeof(sljit_sw);
1504 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1505 stackpos += (int)sizeof(sljit_sw);
1506 capture_last_found = TRUE;
1507 }
1508 cc += 1 + LINK_SIZE;
1509 break;
1510
1511 case OP_CBRA:
1512 case OP_CBRAPOS:
1513 case OP_SCBRA:
1514 case OP_SCBRAPOS:
1515 if (common->capture_last_ptr != 0 && !capture_last_found)
1516 {
1517 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
1518 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1519 stackpos += (int)sizeof(sljit_sw);
1520 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1521 stackpos += (int)sizeof(sljit_sw);
1522 capture_last_found = TRUE;
1523 }
1524 offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
1525 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));
1526 stackpos += (int)sizeof(sljit_sw);
1527 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
1528 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
1529 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1530 stackpos += (int)sizeof(sljit_sw);
1531 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
1532 stackpos += (int)sizeof(sljit_sw);
1533
1534 cc += 1 + LINK_SIZE + IMM2_SIZE;
1535 break;
1536
1537 default:
1538 cc = next_opcode(common, cc);
1539 SLJIT_ASSERT(cc != NULL);
1540 break;
1541 }
1542
1543 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, 0);
1544 SLJIT_ASSERT(stackpos == STACK(stacktop));
1545 }
1546
1547 static SLJIT_INLINE int get_private_data_copy_length(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL needs_control_head)
1548 {
1549 int private_data_length = needs_control_head ? 3 : 2;
1550 int size;
1551 pcre_uchar *alternative;
1552 /* Calculate the sum of the private machine words. */
1553 while (cc < ccend)
1554 {
1555 size = 0;
1556 switch(*cc)
1557 {
1558 case OP_KET:
1559 if (PRIVATE_DATA(cc) != 0)
1560 {
1561 private_data_length++;
1562 SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0);
1563 cc += PRIVATE_DATA(cc + 1);
1564 }
1565 cc += 1 + LINK_SIZE;
1566 break;
1567
1568 case OP_ASSERT:
1569 case OP_ASSERT_NOT:
1570 case OP_ASSERTBACK:
1571 case OP_ASSERTBACK_NOT:
1572 case OP_ONCE:
1573 case OP_ONCE_NC:
1574 case OP_BRAPOS:
1575 case OP_SBRA:
1576 case OP_SBRAPOS:
1577 case OP_SCOND:
1578 private_data_length++;
1579 SLJIT_ASSERT(PRIVATE_DATA(cc) != 0);
1580 cc += 1 + LINK_SIZE;
1581 break;
1582
1583 case OP_CBRA:
1584 case OP_SCBRA:
1585 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1586 private_data_length++;
1587 cc += 1 + LINK_SIZE + IMM2_SIZE;
1588 break;
1589
1590 case OP_CBRAPOS:
1591 case OP_SCBRAPOS:
1592 private_data_length += 2;
1593 cc += 1 + LINK_SIZE + IMM2_SIZE;
1594 break;
1595
1596 case OP_COND:
1597 /* Might be a hidden SCOND. */
1598 alternative = cc + GET(cc, 1);
1599 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1600 private_data_length++;
1601 cc += 1 + LINK_SIZE;
1602 break;
1603
1604 CASE_ITERATOR_PRIVATE_DATA_1
1605 if (PRIVATE_DATA(cc))
1606 private_data_length++;
1607 cc += 2;
1608 #ifdef SUPPORT_UTF
1609 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1610 #endif
1611 break;
1612
1613 CASE_ITERATOR_PRIVATE_DATA_2A
1614 if (PRIVATE_DATA(cc))
1615 private_data_length += 2;
1616 cc += 2;
1617 #ifdef SUPPORT_UTF
1618 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1619 #endif
1620 break;
1621
1622 CASE_ITERATOR_PRIVATE_DATA_2B
1623 if (PRIVATE_DATA(cc))
1624 private_data_length += 2;
1625 cc += 2 + IMM2_SIZE;
1626 #ifdef SUPPORT_UTF
1627 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1628 #endif
1629 break;
1630
1631 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1632 if (PRIVATE_DATA(cc))
1633 private_data_length++;
1634 cc += 1;
1635 break;
1636
1637 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1638 if (PRIVATE_DATA(cc))
1639 private_data_length += 2;
1640 cc += 1;
1641 break;
1642
1643 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1644 if (PRIVATE_DATA(cc))
1645 private_data_length += 2;
1646 cc += 1 + IMM2_SIZE;
1647 break;
1648
1649 case OP_CLASS:
1650 case OP_NCLASS:
1651 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1652 case OP_XCLASS:
1653 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1654 #else
1655 size = 1 + 32 / (int)sizeof(pcre_uchar);
1656 #endif
1657 if (PRIVATE_DATA(cc))
1658 private_data_length += get_class_iterator_size(cc + size);
1659 cc += size;
1660 break;
1661
1662 default:
1663 cc = next_opcode(common, cc);
1664 SLJIT_ASSERT(cc != NULL);
1665 break;
1666 }
1667 }
1668 SLJIT_ASSERT(cc == ccend);
1669 return private_data_length;
1670 }
1671
1672 static void copy_private_data(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend,
1673 BOOL save, int stackptr, int stacktop, BOOL needs_control_head)
1674 {
1675 DEFINE_COMPILER;
1676 int srcw[2];
1677 int count, size;
1678 BOOL tmp1next = TRUE;
1679 BOOL tmp1empty = TRUE;
1680 BOOL tmp2empty = TRUE;
1681 pcre_uchar *alternative;
1682 enum {
1683 start,
1684 loop,
1685 end
1686 } status;
1687
1688 status = save ? start : loop;
1689 stackptr = STACK(stackptr - 2);
1690 stacktop = STACK(stacktop - 1);
1691
1692 if (!save)
1693 {
1694 stackptr += (needs_control_head ? 2 : 1) * sizeof(sljit_sw);
1695 if (stackptr < stacktop)
1696 {
1697 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1698 stackptr += sizeof(sljit_sw);
1699 tmp1empty = FALSE;
1700 }
1701 if (stackptr < stacktop)
1702 {
1703 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1704 stackptr += sizeof(sljit_sw);
1705 tmp2empty = FALSE;
1706 }
1707 /* The tmp1next must be TRUE in either way. */
1708 }
1709
1710 do
1711 {
1712 count = 0;
1713 switch(status)
1714 {
1715 case start:
1716 SLJIT_ASSERT(save && common->recursive_head_ptr != 0);
1717 count = 1;
1718 srcw[0] = common->recursive_head_ptr;
1719 if (needs_control_head)
1720 {
1721 SLJIT_ASSERT(common->control_head_ptr != 0);
1722 count = 2;
1723 srcw[1] = common->control_head_ptr;
1724 }
1725 status = loop;
1726 break;
1727
1728 case loop:
1729 if (cc >= ccend)
1730 {
1731 status = end;
1732 break;
1733 }
1734
1735 switch(*cc)
1736 {
1737 case OP_KET:
1738 if (PRIVATE_DATA(cc) != 0)
1739 {
1740 count = 1;
1741 srcw[0] = PRIVATE_DATA(cc);
1742 SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0);
1743 cc += PRIVATE_DATA(cc + 1);
1744 }
1745 cc += 1 + LINK_SIZE;
1746 break;
1747
1748 case OP_ASSERT:
1749 case OP_ASSERT_NOT:
1750 case OP_ASSERTBACK:
1751 case OP_ASSERTBACK_NOT:
1752 case OP_ONCE:
1753 case OP_ONCE_NC:
1754 case OP_BRAPOS:
1755 case OP_SBRA:
1756 case OP_SBRAPOS:
1757 case OP_SCOND:
1758 count = 1;
1759 srcw[0] = PRIVATE_DATA(cc);
1760 SLJIT_ASSERT(srcw[0] != 0);
1761 cc += 1 + LINK_SIZE;
1762 break;
1763
1764 case OP_CBRA:
1765 case OP_SCBRA:
1766 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1767 {
1768 count = 1;
1769 srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1770 }
1771 cc += 1 + LINK_SIZE + IMM2_SIZE;
1772 break;
1773
1774 case OP_CBRAPOS:
1775 case OP_SCBRAPOS:
1776 count = 2;
1777 srcw[0] = PRIVATE_DATA(cc);
1778 srcw[1] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1779 SLJIT_ASSERT(srcw[0] != 0 && srcw[1] != 0);
1780 cc += 1 + LINK_SIZE + IMM2_SIZE;
1781 break;
1782
1783 case OP_COND:
1784 /* Might be a hidden SCOND. */
1785 alternative = cc + GET(cc, 1);
1786 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1787 {
1788 count = 1;
1789 srcw[0] = PRIVATE_DATA(cc);
1790 SLJIT_ASSERT(srcw[0] != 0);
1791 }
1792 cc += 1 + LINK_SIZE;
1793 break;
1794
1795 CASE_ITERATOR_PRIVATE_DATA_1
1796 if (PRIVATE_DATA(cc))
1797 {
1798 count = 1;
1799 srcw[0] = PRIVATE_DATA(cc);
1800 }
1801 cc += 2;
1802 #ifdef SUPPORT_UTF
1803 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1804 #endif
1805 break;
1806
1807 CASE_ITERATOR_PRIVATE_DATA_2A
1808 if (PRIVATE_DATA(cc))
1809 {
1810 count = 2;
1811 srcw[0] = PRIVATE_DATA(cc);
1812 srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
1813 }
1814 cc += 2;
1815 #ifdef SUPPORT_UTF
1816 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1817 #endif
1818 break;
1819
1820 CASE_ITERATOR_PRIVATE_DATA_2B
1821 if (PRIVATE_DATA(cc))
1822 {
1823 count = 2;
1824 srcw[0] = PRIVATE_DATA(cc);
1825 srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
1826 }
1827 cc += 2 + IMM2_SIZE;
1828 #ifdef SUPPORT_UTF
1829 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1830 #endif
1831 break;
1832
1833 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1834 if (PRIVATE_DATA(cc))
1835 {
1836 count = 1;
1837 srcw[0] = PRIVATE_DATA(cc);
1838 }
1839 cc += 1;
1840 break;
1841
1842 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1843 if (PRIVATE_DATA(cc))
1844 {
1845 count = 2;
1846 srcw[0] = PRIVATE_DATA(cc);
1847 srcw[1] = srcw[0] + sizeof(sljit_sw);
1848 }
1849 cc += 1;
1850 break;
1851
1852 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1853 if (PRIVATE_DATA(cc))
1854 {
1855 count = 2;
1856 srcw[0] = PRIVATE_DATA(cc);
1857 srcw[1] = srcw[0] + sizeof(sljit_sw);
1858 }
1859 cc += 1 + IMM2_SIZE;
1860 break;
1861
1862 case OP_CLASS:
1863 case OP_NCLASS:
1864 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1865 case OP_XCLASS:
1866 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1867 #else
1868 size = 1 + 32 / (int)sizeof(pcre_uchar);
1869 #endif
1870 if (PRIVATE_DATA(cc))
1871 switch(get_class_iterator_size(cc + size))
1872 {
1873 case 1:
1874 count = 1;
1875 srcw[0] = PRIVATE_DATA(cc);
1876 break;
1877
1878 case 2:
1879 count = 2;
1880 srcw[0] = PRIVATE_DATA(cc);
1881 srcw[1] = srcw[0] + sizeof(sljit_sw);
1882 break;
1883
1884 default:
1885 SLJIT_ASSERT_STOP();
1886 break;
1887 }
1888 cc += size;
1889 break;
1890
1891 default:
1892 cc = next_opcode(common, cc);
1893 SLJIT_ASSERT(cc != NULL);
1894 break;
1895 }
1896 break;
1897
1898 case end:
1899 SLJIT_ASSERT_STOP();
1900 break;
1901 }
1902
1903 while (count > 0)
1904 {
1905 count--;
1906 if (save)
1907 {
1908 if (tmp1next)
1909 {
1910 if (!tmp1empty)
1911 {
1912 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1913 stackptr += sizeof(sljit_sw);
1914 }
1915 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), srcw[count]);
1916 tmp1empty = FALSE;
1917 tmp1next = FALSE;
1918 }
1919 else
1920 {
1921 if (!tmp2empty)
1922 {
1923 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1924 stackptr += sizeof(sljit_sw);
1925 }
1926 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), srcw[count]);
1927 tmp2empty = FALSE;
1928 tmp1next = TRUE;
1929 }
1930 }
1931 else
1932 {
1933 if (tmp1next)
1934 {
1935 SLJIT_ASSERT(!tmp1empty);
1936 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), srcw[count], TMP1, 0);
1937 tmp1empty = stackptr >= stacktop;
1938 if (!tmp1empty)
1939 {
1940 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1941 stackptr += sizeof(sljit_sw);
1942 }
1943 tmp1next = FALSE;
1944 }
1945 else
1946 {
1947 SLJIT_ASSERT(!tmp2empty);
1948 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), srcw[count], TMP2, 0);
1949 tmp2empty = stackptr >= stacktop;
1950 if (!tmp2empty)
1951 {
1952 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1953 stackptr += sizeof(sljit_sw);
1954 }
1955 tmp1next = TRUE;
1956 }
1957 }
1958 }
1959 }
1960 while (status != end);
1961
1962 if (save)
1963 {
1964 if (tmp1next)
1965 {
1966 if (!tmp1empty)
1967 {
1968 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1969 stackptr += sizeof(sljit_sw);
1970 }
1971 if (!tmp2empty)
1972 {
1973 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1974 stackptr += sizeof(sljit_sw);
1975 }
1976 }
1977 else
1978 {
1979 if (!tmp2empty)
1980 {
1981 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1982 stackptr += sizeof(sljit_sw);
1983 }
1984 if (!tmp1empty)
1985 {
1986 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1987 stackptr += sizeof(sljit_sw);
1988 }
1989 }
1990 }
1991 SLJIT_ASSERT(cc == ccend && stackptr == stacktop && (save || (tmp1empty && tmp2empty)));
1992 }
1993
1994 static SLJIT_INLINE pcre_uchar *set_then_offsets(compiler_common *common, pcre_uchar *cc, pcre_uint8 *current_offset)
1995 {
1996 pcre_uchar *end = bracketend(cc);
1997 BOOL has_alternatives = cc[GET(cc, 1)] == OP_ALT;
1998
1999 /* Assert captures then. */
2000 if (*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT)
2001 current_offset = NULL;
2002 /* Conditional block does not. */
2003 if (*cc == OP_COND || *cc == OP_SCOND)
2004 has_alternatives = FALSE;
2005
2006 cc = next_opcode(common, cc);
2007 if (has_alternatives)
2008 current_offset = common->then_offsets + (cc - common->start);
2009
2010 while (cc < end)
2011 {
2012 if ((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND))
2013 cc = set_then_offsets(common, cc, current_offset);
2014 else
2015 {
2016 if (*cc == OP_ALT && has_alternatives)
2017 current_offset = common->then_offsets + (cc + 1 + LINK_SIZE - common->start);
2018 if (*cc >= OP_THEN && *cc <= OP_THEN_ARG && current_offset != NULL)
2019 *current_offset = 1;
2020 cc = next_opcode(common, cc);
2021 }
2022 }
2023
2024 return end;
2025 }
2026
2027 #undef CASE_ITERATOR_PRIVATE_DATA_1
2028 #undef CASE_ITERATOR_PRIVATE_DATA_2A
2029 #undef CASE_ITERATOR_PRIVATE_DATA_2B
2030 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_1
2031 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
2032 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
2033
2034 static SLJIT_INLINE BOOL is_powerof2(unsigned int value)
2035 {
2036 return (value & (value - 1)) == 0;
2037 }
2038
2039 static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label)
2040 {
2041 while (list)
2042 {
2043 /* sljit_set_label is clever enough to do nothing
2044 if either the jump or the label is NULL. */
2045 SET_LABEL(list->jump, label);
2046 list = list->next;
2047 }
2048 }
2049
2050 static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump *jump)
2051 {
2052 jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list));
2053 if (list_item)
2054 {
2055 list_item->next = *list;
2056 list_item->jump = jump;
2057 *list = list_item;
2058 }
2059 }
2060
2061 static void add_stub(compiler_common *common, struct sljit_jump *start)
2062 {
2063 DEFINE_COMPILER;
2064 stub_list *list_item = sljit_alloc_memory(compiler, sizeof(stub_list));
2065
2066 if (list_item)
2067 {
2068 list_item->start = start;
2069 list_item->quit = LABEL();
2070 list_item->next = common->stubs;
2071 common->stubs = list_item;
2072 }
2073 }
2074
2075 static void flush_stubs(compiler_common *common)
2076 {
2077 DEFINE_COMPILER;
2078 stub_list *list_item = common->stubs;
2079
2080 while (list_item)
2081 {
2082 JUMPHERE(list_item->start);
2083 add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));
2084 JUMPTO(SLJIT_JUMP, list_item->quit);
2085 list_item = list_item->next;
2086 }
2087 common->stubs = NULL;
2088 }
2089
2090 static void add_label_addr(compiler_common *common, sljit_uw *update_addr)
2091 {
2092 DEFINE_COMPILER;
2093 label_addr_list *label_addr;
2094
2095 label_addr = sljit_alloc_memory(compiler, sizeof(label_addr_list));
2096 if (label_addr == NULL)
2097 return;
2098 label_addr->label = LABEL();
2099 label_addr->update_addr = update_addr;
2100 label_addr->next = common->label_addrs;
2101 common->label_addrs = label_addr;
2102 }
2103
2104 static SLJIT_INLINE void count_match(compiler_common *common)
2105 {
2106 DEFINE_COMPILER;
2107
2108 OP2(SLJIT_SUB | SLJIT_SET_E, COUNT_MATCH, 0, COUNT_MATCH, 0, SLJIT_IMM, 1);
2109 add_jump(compiler, &common->calllimit, JUMP(SLJIT_ZERO));
2110 }
2111
2112 static SLJIT_INLINE void allocate_stack(compiler_common *common, int size)
2113 {
2114 /* May destroy all locals and registers except TMP2. */
2115 DEFINE_COMPILER;
2116
2117 SLJIT_ASSERT(size > 0);
2118 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
2119 #ifdef DESTROY_REGISTERS
2120 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);
2121 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
2122 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
2123 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, TMP1, 0);
2124 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP1, 0);
2125 #endif
2126 add_stub(common, CMP(SLJIT_GREATER, STACK_TOP, 0, STACK_LIMIT, 0));
2127 }
2128
2129 static SLJIT_INLINE void free_stack(compiler_common *common, int size)
2130 {
2131 DEFINE_COMPILER;
2132
2133 SLJIT_ASSERT(size > 0);
2134 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
2135 }
2136
2137 static sljit_uw * allocate_read_only_data(compiler_common *common, sljit_uw size)
2138 {
2139 DEFINE_COMPILER;
2140 sljit_uw *result;
2141
2142 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
2143 return NULL;
2144
2145 result = (sljit_uw *)SLJIT_MALLOC(size + sizeof(sljit_uw), compiler->allocator_data);
2146 if (SLJIT_UNLIKELY(result == NULL))
2147 {
2148 sljit_set_compiler_memory_error(compiler);
2149 return NULL;
2150 }
2151
2152 *(void**)result = common->read_only_data_head;
2153 common->read_only_data_head = (void *)result;
2154 return result + 1;
2155 }
2156
2157 static void free_read_only_data(void *current, void *allocator_data)
2158 {
2159 void *next;
2160
2161 SLJIT_UNUSED_ARG(allocator_data);
2162
2163 while (current != NULL)
2164 {
2165 next = *(void**)current;
2166 SLJIT_FREE(current, allocator_data);
2167 current = next;
2168 }
2169 }
2170
2171 static SLJIT_INLINE void reset_ovector(compiler_common *common, int length)
2172 {
2173 DEFINE_COMPILER;
2174 struct sljit_label *loop;
2175 int i;
2176
2177 /* At this point we can freely use all temporary registers. */
2178 SLJIT_ASSERT(length > 1);
2179 /* TMP1 returns with begin - 1. */
2180 OP2(SLJIT_SUB, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_S0), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));
2181 if (length < 8)
2182 {
2183 for (i = 1; i < length; i++)
2184 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), SLJIT_R0, 0);
2185 }
2186 else
2187 {
2188 GET_LOCAL_BASE(SLJIT_R1, 0, OVECTOR_START);
2189 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, length - 1);
2190 loop = LABEL();
2191 OP1(SLJIT_MOVU, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw), SLJIT_R0, 0);
2192 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, 1);
2193 JUMPTO(SLJIT_NOT_ZERO, loop);
2194 }
2195 }
2196
2197 static SLJIT_INLINE void do_reset_match(compiler_common *common, int length)
2198 {
2199 DEFINE_COMPILER;
2200 struct sljit_label *loop;
2201 int i;
2202
2203 SLJIT_ASSERT(length > 1);
2204 /* OVECTOR(1) contains the "string begin - 1" constant. */
2205 if (length > 2)
2206 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
2207 if (length < 8)
2208 {
2209 for (i = 2; i < length; i++)
2210 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), TMP1, 0);
2211 }
2212 else
2213 {
2214 GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + sizeof(sljit_sw));
2215 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
2216 loop = LABEL();
2217 OP1(SLJIT_MOVU, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
2218 OP2(SLJIT_SUB | SLJIT_SET_E, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
2219 JUMPTO(SLJIT_NOT_ZERO, loop);
2220 }
2221
2222 OP1(SLJIT_MOV, STACK_TOP, 0, ARGUMENTS, 0);
2223 if (common->mark_ptr != 0)
2224 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, SLJIT_IMM, 0);
2225 if (common->control_head_ptr != 0)
2226 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
2227 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(jit_arguments, stack));
2228 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
2229 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(struct sljit_stack, base));
2230 }
2231
2232 static sljit_sw SLJIT_CALL do_search_mark(sljit_sw *current, const pcre_uchar *skip_arg)
2233 {
2234 while (current != NULL)
2235 {
2236 switch (current[-2])
2237 {
2238 case type_then_trap:
2239 break;
2240
2241 case type_mark:
2242 if (STRCMP_UC_UC(skip_arg, (pcre_uchar *)current[-3]) == 0)
2243 return current[-4];
2244 break;
2245
2246 default:
2247 SLJIT_ASSERT_STOP();
2248 break;
2249 }
2250 SLJIT_ASSERT(current > (sljit_sw*)current[-1]);
2251 current = (sljit_sw*)current[-1];
2252 }
2253 return -1;
2254 }
2255
2256 static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)
2257 {
2258 DEFINE_COMPILER;
2259 struct sljit_label *loop;
2260 struct sljit_jump *early_quit;
2261
2262 /* At this point we can freely use all registers. */
2263 OP1(SLJIT_MOV, SLJIT_S2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
2264 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(1), STR_PTR, 0);
2265
2266 OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0);
2267 if (common->mark_ptr != 0)
2268 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
2269 OP1(SLJIT_MOV_SI, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, offset_count));
2270 if (common->mark_ptr != 0)
2271 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_R2, 0);
2272 OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, offsets), SLJIT_IMM, sizeof(int));
2273 OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, begin));
2274 GET_LOCAL_BASE(SLJIT_S0, 0, OVECTOR_START);
2275 /* Unlikely, but possible */
2276 early_quit = CMP(SLJIT_EQUAL, SLJIT_R1, 0, SLJIT_IMM, 0);
2277 loop = LABEL();
2278 OP2(SLJIT_SUB, SLJIT_S1, 0, SLJIT_MEM1(SLJIT_S0), 0, SLJIT_R0, 0);
2279 OP2(SLJIT_ADD, SLJIT_S0, 0, SLJIT_S0, 0, SLJIT_IMM, sizeof(sljit_sw));
2280 /* Copy the integer value to the output buffer */
2281 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2282 OP2(SLJIT_ASHR, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_IMM, UCHAR_SHIFT);
2283 #endif
2284 OP1(SLJIT_MOVU_SI, SLJIT_MEM1(SLJIT_R2), sizeof(int), SLJIT_S1, 0);
2285 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
2286 JUMPTO(SLJIT_NOT_ZERO, loop);
2287 JUMPHERE(early_quit);
2288
2289 /* Calculate the return value, which is the maximum ovector value. */
2290 if (topbracket > 1)
2291 {
2292 GET_LOCAL_BASE(SLJIT_R0, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_sw));
2293 OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, topbracket + 1);
2294
2295 /* OVECTOR(0) is never equal to SLJIT_S2. */
2296 loop = LABEL();
2297 OP1(SLJIT_MOVU, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), -(2 * (sljit_sw)sizeof(sljit_sw)));
2298 OP2(SLJIT_SUB, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
2299 CMPTO(SLJIT_EQUAL, SLJIT_R2, 0, SLJIT_S2, 0, loop);
2300 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_R1, 0);
2301 }
2302 else
2303 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
2304 }
2305
2306 static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *quit)
2307 {
2308 DEFINE_COMPILER;
2309 struct sljit_jump *jump;
2310
2311 SLJIT_COMPILE_ASSERT(STR_END == SLJIT_S1, str_end_must_be_saved_reg2);
2312 SLJIT_ASSERT(common->start_used_ptr != 0 && common->start_ptr != 0
2313 && (common->mode == JIT_PARTIAL_SOFT_COMPILE ? common->hit_start != 0 : common->hit_start == 0));
2314
2315 OP1(SLJIT_MOV, SLJIT_R1, 0, ARGUMENTS, 0);
2316 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_PARTIAL);
2317 OP1(SLJIT_MOV_SI, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, real_offset_count));
2318 CMPTO(SLJIT_SIG_LESS, SLJIT_R2, 0, SLJIT_IMM, 2, quit);
2319
2320 /* Store match begin and end. */
2321 OP1(SLJIT_MOV, SLJIT_S0, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, begin));
2322 OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, offsets));
2323
2324 jump = CMP(SLJIT_SIG_LESS, SLJIT_R2, 0, SLJIT_IMM, 3);
2325 OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_ptr : (common->hit_start + (int)sizeof(sljit_sw)), SLJIT_S0, 0);
2326 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2327 OP2(SLJIT_ASHR, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, UCHAR_SHIFT);
2328 #endif
2329 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_R1), 2 * sizeof(int), SLJIT_R2, 0);
2330 JUMPHERE(jump);
2331
2332 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_used_ptr : common->hit_start);
2333 OP2(SLJIT_SUB, SLJIT_S1, 0, STR_END, 0, SLJIT_S0, 0);
2334 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2335 OP2(SLJIT_ASHR, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_IMM, UCHAR_SHIFT);
2336 #endif
2337 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_R1), sizeof(int), SLJIT_S1, 0);
2338
2339 OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_S0, 0);
2340 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2341 OP2(SLJIT_ASHR, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, UCHAR_SHIFT);
2342 #endif
2343 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_R1), 0, SLJIT_R2, 0);
2344
2345 JUMPTO(SLJIT_JUMP, quit);
2346 }
2347
2348 static SLJIT_INLINE void check_start_used_ptr(compiler_common *common)
2349 {
2350 /* May destroy TMP1. */
2351 DEFINE_COMPILER;
2352 struct sljit_jump *jump;
2353
2354 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2355 {
2356 /* The value of -1 must be kept for start_used_ptr! */
2357 OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, 1);
2358 /* Jumps if start_used_ptr < STR_PTR, or start_used_ptr == -1. Although overwriting
2359 is not necessary if start_used_ptr == STR_PTR, it does not hurt as well. */
2360 jump = CMP(SLJIT_LESS_EQUAL, TMP1, 0, STR_PTR, 0);
2361 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2362 JUMPHERE(jump);
2363 }
2364 else if (common->mode == JIT_PARTIAL_HARD_COMPILE)
2365 {
2366 jump = CMP(SLJIT_LESS_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2367 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2368 JUMPHERE(jump);
2369 }
2370 }
2371
2372 static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, pcre_uchar *cc)
2373 {
2374 /* Detects if the character has an othercase. */
2375 unsigned int c;
2376
2377 #ifdef SUPPORT_UTF
2378 if (common->utf)
2379 {
2380 GETCHAR(c, cc);
2381 if (c > 127)
2382 {
2383 #ifdef SUPPORT_UCP
2384 return c != UCD_OTHERCASE(c);
2385 #else
2386 return FALSE;
2387 #endif
2388 }
2389 #ifndef COMPILE_PCRE8
2390 return common->fcc[c] != c;
2391 #endif
2392 }
2393 else
2394 #endif
2395 c = *cc;
2396 return MAX_255(c) ? common->fcc[c] != c : FALSE;
2397 }
2398
2399 static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)
2400 {
2401 /* Returns with the othercase. */
2402 #ifdef SUPPORT_UTF
2403 if (common->utf && c > 127)
2404 {
2405 #ifdef SUPPORT_UCP
2406 return UCD_OTHERCASE(c);
2407 #else
2408 return c;
2409 #endif
2410 }
2411 #endif
2412 return TABLE_GET(c, common->fcc, c);
2413 }
2414
2415 static unsigned int char_get_othercase_bit(compiler_common *common, pcre_uchar *cc)
2416 {
2417 /* Detects if the character and its othercase has only 1 bit difference. */
2418 unsigned int c, oc, bit;
2419 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2420 int n;
2421 #endif
2422
2423 #ifdef SUPPORT_UTF
2424 if (common->utf)
2425 {
2426 GETCHAR(c, cc);
2427 if (c <= 127)
2428 oc = common->fcc[c];
2429 else
2430 {
2431 #ifdef SUPPORT_UCP
2432 oc = UCD_OTHERCASE(c);
2433 #else
2434 oc = c;
2435 #endif
2436 }
2437 }
2438 else
2439 {
2440 c = *cc;
2441 oc = TABLE_GET(c, common->fcc, c);
2442 }
2443 #else
2444 c = *cc;
2445 oc = TABLE_GET(c, common->fcc, c);
2446 #endif
2447
2448 SLJIT_ASSERT(c != oc);
2449
2450 bit = c ^ oc;
2451 /* Optimized for English alphabet. */
2452 if (c <= 127 && bit == 0x20)
2453 return (0 << 8) | 0x20;
2454
2455 /* Since c != oc, they must have at least 1 bit difference. */
2456 if (!is_powerof2(bit))
2457 return 0;
2458
2459 #if defined COMPILE_PCRE8
2460
2461 #ifdef SUPPORT_UTF
2462 if (common->utf && c > 127)
2463 {
2464 n = GET_EXTRALEN(*cc);
2465 while ((bit & 0x3f) == 0)
2466 {
2467 n--;
2468 bit >>= 6;
2469 }
2470 return (n << 8) | bit;
2471 }
2472 #endif /* SUPPORT_UTF */
2473 return (0 << 8) | bit;
2474
2475 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2476
2477 #ifdef SUPPORT_UTF
2478 if (common->utf && c > 65535)
2479 {
2480 if (bit >= (1 << 10))
2481 bit >>= 10;
2482 else
2483 return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8));
2484 }
2485 #endif /* SUPPORT_UTF */
2486 return (bit < 256) ? ((0 << 8) | bit) : ((1 << 8) | (bit >> 8));
2487
2488 #endif /* COMPILE_PCRE[8|16|32] */
2489 }
2490
2491 static void check_partial(compiler_common *common, BOOL force)
2492 {
2493 /* Checks whether a partial matching is occurred. Does not modify registers. */
2494 DEFINE_COMPILER;
2495 struct sljit_jump *jump = NULL;
2496
2497 SLJIT_ASSERT(!force || common->mode != JIT_COMPILE);
2498
2499 if (common->mode == JIT_COMPILE)
2500 return;
2501
2502 if (!force)
2503 jump = CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2504 else if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2505 jump = CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1);
2506
2507 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2508 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
2509 else
2510 {
2511 if (common->partialmatchlabel != NULL)
2512 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2513 else
2514 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2515 }
2516
2517 if (jump != NULL)
2518 JUMPHERE(jump);
2519 }
2520
2521 static void check_str_end(compiler_common *common, jump_list **end_reached)
2522 {
2523 /* Does not affect registers. Usually used in a tight spot. */
2524 DEFINE_COMPILER;
2525 struct sljit_jump *jump;
2526
2527 if (common->mode == JIT_COMPILE)
2528 {
2529 add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2530 return;
2531 }
2532
2533 jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
2534 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2535 {
2536 add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
2537 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
2538 add_jump(compiler, end_reached, JUMP(SLJIT_JUMP));
2539 }
2540 else
2541 {
2542 add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
2543 if (common->partialmatchlabel != NULL)
2544 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2545 else
2546 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2547 }
2548 JUMPHERE(jump);
2549 }
2550
2551 static void detect_partial_match(compiler_common *common, jump_list **backtracks)
2552 {
2553 DEFINE_COMPILER;
2554 struct sljit_jump *jump;
2555
2556 if (common->mode == JIT_COMPILE)
2557 {
2558 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2559 return;
2560 }
2561
2562 /* Partial matching mode. */
2563 jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
2564 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
2565 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2566 {
2567 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
2568 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
2569 }
2570 else
2571 {
2572 if (common->partialmatchlabel != NULL)
2573 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2574 else
2575 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2576 }
2577 JUMPHERE(jump);
2578 }
2579
2580 static void peek_char(compiler_common *common, pcre_uint32 max)
2581 {
2582 /* Reads the character into TMP1, keeps STR_PTR.
2583 Does not check STR_END. TMP2 Destroyed. */
2584 DEFINE_COMPILER;
2585 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2586 struct sljit_jump *jump;
2587 #endif
2588
2589 SLJIT_UNUSED_ARG(max);
2590
2591 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2592 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2593 if (common->utf)
2594 {
2595 if (max < 128) return;
2596
2597 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2598 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2599 add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2600 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2601 JUMPHERE(jump);
2602 }
2603 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2604
2605 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2606 if (common->utf)
2607 {
2608 if (max < 0xd800) return;
2609
2610 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2611 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2612 /* TMP2 contains the high surrogate. */
2613 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2614 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x40);
2615 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
2616 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
2617 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2618 JUMPHERE(jump);
2619 }
2620 #endif
2621 }
2622
2623 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2624
2625 static BOOL is_char7_bitset(const pcre_uint8 *bitset, BOOL nclass)
2626 {
2627 /* Tells whether the character codes below 128 are enough
2628 to determine a match. */
2629 const pcre_uint8 value = nclass ? 0xff : 0;
2630 const pcre_uint8 *end = bitset + 32;
2631
2632 bitset += 16;
2633 do
2634 {
2635 if (*bitset++ != value)
2636 return FALSE;
2637 }
2638 while (bitset < end);
2639 return TRUE;
2640 }
2641
2642 static void read_char7_type(compiler_common *common, BOOL full_read)
2643 {
2644 /* Reads the precise character type of a character into TMP1, if the character
2645 is less than 128. Otherwise it returns with zero. Does not check STR_END. The
2646 full_read argument tells whether characters above max are accepted or not. */
2647 DEFINE_COMPILER;
2648 struct sljit_jump *jump;
2649
2650 SLJIT_ASSERT(common->utf);
2651
2652 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2653 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2654
2655 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2656
2657 if (full_read)
2658 {
2659 jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
2660 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2661 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2662 JUMPHERE(jump);
2663 }
2664 }
2665
2666 #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
2667
2668 static void read_char_range(compiler_common *common, pcre_uint32 min, pcre_uint32 max, BOOL update_str_ptr)
2669 {
2670 /* Reads the precise value of a character into TMP1, if the character is
2671 between min and max (c >= min && c <= max). Otherwise it returns with a value
2672 outside the range. Does not check STR_END. */
2673 DEFINE_COMPILER;
2674 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2675 struct sljit_jump *jump;
2676 #endif
2677 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2678 struct sljit_jump *jump2;
2679 #endif
2680
2681 SLJIT_UNUSED_ARG(update_str_ptr);
2682 SLJIT_UNUSED_ARG(min);
2683 SLJIT_UNUSED_ARG(max);
2684 SLJIT_ASSERT(min <= max);
2685
2686 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2687 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2688
2689 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2690 if (common->utf)
2691 {
2692 if (max < 128 && !update_str_ptr) return;
2693
2694 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2695 if (min >= 0x10000)
2696 {
2697 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xf0);
2698 if (update_str_ptr)
2699 OP1(SLJIT_MOV_UB, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2700 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2701 jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x7);
2702 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2703 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2704 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2705 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2706 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2707 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2708 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2709 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2710 if (!update_str_ptr)
2711 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
2712 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2713 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2714 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2715 JUMPHERE(jump2);
2716 if (update_str_ptr)
2717 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
2718 }
2719 else if (min >= 0x800 && max <= 0xffff)
2720 {
2721 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xe0);
2722 if (update_str_ptr)
2723 OP1(SLJIT_MOV_UB, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2724 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2725 jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xf);
2726 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2727 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2728 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2729 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2730 if (!update_str_ptr)
2731 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
2732 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2733 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2734 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2735 JUMPHERE(jump2);
2736 if (update_str_ptr)
2737 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
2738 }
2739 else if (max >= 0x800)
2740 add_jump(compiler, (max < 0x10000) ? &common->utfreadchar16 : &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2741 else if (max < 128)
2742 {
2743 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2744 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2745 }
2746 else
2747 {
2748 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2749 if (!update_str_ptr)
2750 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2751 else
2752 OP1(SLJIT_MOV_UB, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2753 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2754 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2755 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2756 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2757 if (update_str_ptr)
2758 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
2759 }
2760 JUMPHERE(jump);
2761 }
2762 #endif
2763
2764 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2765 if (common->utf)
2766 {
2767 if (max >= 0x10000)
2768 {
2769 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2770 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2771 /* TMP2 contains the high surrogate. */
2772 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2773 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x40);
2774 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
2775 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2776 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
2777 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2778 JUMPHERE(jump);
2779 return;
2780 }
2781
2782 if (max < 0xd800 && !update_str_ptr) return;
2783
2784 /* Skip low surrogate if necessary. */
2785 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2786 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2787 if (update_str_ptr)
2788 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2789 if (max >= 0xd800)
2790 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x10000);
2791 JUMPHERE(jump);
2792 }
2793 #endif
2794 }
2795
2796 static SLJIT_INLINE void read_char(compiler_common *common)
2797 {
2798 read_char_range(common, 0, READ_CHAR_MAX, TRUE);
2799 }
2800
2801 static void read_char8_type(compiler_common *common, BOOL update_str_ptr)
2802 {
2803 /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */
2804 DEFINE_COMPILER;
2805 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
2806 struct sljit_jump *jump;
2807 #endif
2808 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2809 struct sljit_jump *jump2;
2810 #endif
2811
2812 SLJIT_UNUSED_ARG(update_str_ptr);
2813
2814 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2815 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2816
2817 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2818 if (common->utf)
2819 {
2820 /* This can be an extra read in some situations, but hopefully
2821 it is needed in most cases. */
2822 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2823 jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
2824 if (!update_str_ptr)
2825 {
2826 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2827 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2828 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2829 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2830 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2831 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
2832 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2833 jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
2834 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2835 JUMPHERE(jump2);
2836 }
2837 else
2838 add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));
2839 JUMPHERE(jump);
2840 return;
2841 }
2842 #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
2843
2844 #if !defined COMPILE_PCRE8
2845 /* The ctypes array contains only 256 values. */
2846 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2847 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
2848 #endif
2849 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2850 #if !defined COMPILE_PCRE8
2851 JUMPHERE(jump);
2852 #endif
2853
2854 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2855 if (common->utf && update_str_ptr)
2856 {
2857 /* Skip low surrogate if necessary. */
2858 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);
2859 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2860 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2861 JUMPHERE(jump);
2862 }
2863 #endif /* SUPPORT_UTF && COMPILE_PCRE16 */
2864 }
2865
2866 static void skip_char_back(compiler_common *common)
2867 {
2868 /* Goes one character back. Affects STR_PTR and TMP1. Does not check begin. */
2869 DEFINE_COMPILER;
2870 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2871 #if defined COMPILE_PCRE8
2872 struct sljit_label *label;
2873
2874 if (common->utf)
2875 {
2876 label = LABEL();
2877 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2878 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2879 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
2880 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);
2881 return;
2882 }
2883 #elif defined COMPILE_PCRE16
2884 if (common->utf)
2885 {
2886 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2887 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2888 /* Skip low surrogate if necessary. */
2889 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2890 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
2891 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
2892 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2893 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2894 return;
2895 }
2896 #endif /* COMPILE_PCRE[8|16] */
2897 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2898 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2899 }
2900
2901 static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpifmatch)
2902 {
2903 /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */
2904 DEFINE_COMPILER;
2905 struct sljit_jump *jump;
2906
2907 if (nltype == NLTYPE_ANY)
2908 {
2909 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
2910 add_jump(compiler, backtracks, JUMP(jumpifmatch ? SLJIT_NOT_ZERO : SLJIT_ZERO));
2911 }
2912 else if (nltype == NLTYPE_ANYCRLF)
2913 {
2914 if (jumpifmatch)
2915 {
2916 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR));
2917 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
2918 }
2919 else
2920 {
2921 jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
2922 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
2923 JUMPHERE(jump);
2924 }
2925 }
2926 else
2927 {
2928 SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);
2929 add_jump(compiler, backtracks, CMP(jumpifmatch ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
2930 }
2931 }
2932
2933 #ifdef SUPPORT_UTF
2934
2935 #if defined COMPILE_PCRE8
2936 static void do_utfreadchar(compiler_common *common)
2937 {
2938 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
2939 of the character (>= 0xc0). Return char value in TMP1, length in TMP2. */
2940 DEFINE_COMPILER;
2941 struct sljit_jump *jump;
2942
2943 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2944 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2945 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2946 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2947 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2948 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2949
2950 /* Searching for the first zero. */
2951 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
2952 jump = JUMP(SLJIT_NOT_ZERO);
2953 /* Two byte sequence. */
2954 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2955 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(2));
2956 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2957
2958 JUMPHERE(jump);
2959 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2960 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x800);
2961 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2962 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2963 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2964
2965 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10000);
2966 jump = JUMP(SLJIT_NOT_ZERO);
2967 /* Three byte sequence. */
2968 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
2969 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));
2970 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2971
2972 /* Four byte sequence. */
2973 JUMPHERE(jump);
2974 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2975 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
2976 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2977 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
2978 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2979 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2980 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(4));
2981 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2982 }
2983
2984 static void do_utfreadchar16(compiler_common *common)
2985 {
2986 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
2987 of the character (>= 0xc0). Return value in TMP1. */
2988 DEFINE_COMPILER;
2989 struct sljit_jump *jump;
2990
2991 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2992 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2993 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2994 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2995 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2996 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2997
2998 /* Searching for the first zero. */
2999 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
3000 jump = JUMP(SLJIT_NOT_ZERO);
3001 /* Two byte sequence. */
3002 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3003 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3004
3005 JUMPHERE(jump);
3006 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x400);
3007 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_NOT_ZERO);
3008 /* This code runs only in 8 bit mode. No need to shift the value. */
3009 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3010 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3011 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x800);
3012 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3013 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3014 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3015 /* Three byte sequence. */
3016 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
3017 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3018 }
3019
3020 static void do_utfreadtype8(compiler_common *common)
3021 {
3022 /* Fast decoding a UTF-8 character type. TMP2 contains the first byte
3023 of the character (>= 0xc0). Return value in TMP1. */
3024 DEFINE_COMPILER;
3025 struct sljit_jump *jump;
3026 struct sljit_jump *compare;
3027
3028 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3029
3030 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20);
3031 jump = JUMP(SLJIT_NOT_ZERO);
3032 /* Two byte sequence. */
3033 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3034 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3035 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
3036 /* The upper 5 bits are known at this point. */
3037 compare = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x3);
3038 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
3039 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3040 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
3041 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
3042 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3043
3044 JUMPHERE(compare);
3045 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
3046 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3047
3048 /* We only have types for characters less than 256. */
3049 JUMPHERE(jump);
3050 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3051 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
3052 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3053 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3054 }
3055
3056 #endif /* COMPILE_PCRE8 */
3057
3058 #endif /* SUPPORT_UTF */
3059
3060 #ifdef SUPPORT_UCP
3061
3062 /* UCD_BLOCK_SIZE must be 128 (see the assert below). */
3063 #define UCD_BLOCK_MASK 127
3064 #define UCD_BLOCK_SHIFT 7
3065
3066 static void do_getucd(compiler_common *common)
3067 {
3068 /* Search the UCD record for the character comes in TMP1.
3069 Returns chartype in TMP1 and UCD offset in TMP2. */
3070 DEFINE_COMPILER;
3071
3072 SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 8);
3073
3074 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3075 OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
3076 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
3077 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
3078 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
3079 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
3080 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
3081 OP1(SLJIT_MOV_UH, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
3082 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
3083 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
3084 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3085 }
3086 #endif
3087
3088 static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common, BOOL hascrorlf, BOOL firstline)
3089 {
3090 DEFINE_COMPILER;
3091 struct sljit_label *mainloop;
3092 struct sljit_label *newlinelabel = NULL;
3093 struct sljit_jump *start;
3094 struct sljit_jump *end = NULL;
3095 struct sljit_jump *nl = NULL;
3096 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3097 struct sljit_jump *singlechar;
3098 #endif
3099 jump_list *newline = NULL;
3100 BOOL newlinecheck = FALSE;
3101 BOOL readuchar = FALSE;
3102
3103 if (!(hascrorlf || firstline) && (common->nltype == NLTYPE_ANY ||
3104 common->nltype == NLTYPE_ANYCRLF || common->newline > 255))
3105 newlinecheck = TRUE;
3106
3107 if (firstline)
3108 {
3109 /* Search for the end of the first line. */
3110 SLJIT_ASSERT(common->first_line_end != 0);
3111 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
3112
3113 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3114 {
3115 mainloop = LABEL();
3116 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3117 end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3118 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
3119 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3120 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);
3121 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);
3122 JUMPHERE(end);
3123 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_SP), common->first_line_end, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3124 }
3125 else
3126 {
3127 end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3128 mainloop = LABEL();
3129 /* Continual stores does not cause data dependency. */
3130 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->first_line_end, STR_PTR, 0);
3131 read_char_range(common, common->nlmin, common->nlmax, TRUE);
3132 check_newlinechar(common, common->nltype, &newline, TRUE);
3133 CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, mainloop);
3134 JUMPHERE(end);
3135 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->first_line_end, STR_PTR, 0);
3136 set_jumps(newline, LABEL());
3137 }
3138
3139 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
3140 }
3141
3142 start = JUMP(SLJIT_JUMP);
3143
3144 if (newlinecheck)
3145 {
3146 newlinelabel = LABEL();
3147 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3148 end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3149 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3150 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
3151 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
3152 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3153 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
3154 #endif
3155 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3156 nl = JUMP(SLJIT_JUMP);
3157 }
3158
3159 mainloop = LABEL();
3160
3161 /* Increasing the STR_PTR here requires one less jump in the most common case. */
3162 #ifdef SUPPORT_UTF
3163 if (common->utf) readuchar = TRUE;
3164 #endif
3165 if (newlinecheck) readuchar = TRUE;
3166
3167 if (readuchar)
3168 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3169
3170 if (newlinecheck)
3171 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);
3172
3173 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3174 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3175 #if defined COMPILE_PCRE8
3176 if (common->utf)
3177 {
3178 singlechar = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
3179 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3180 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3181 JUMPHERE(singlechar);
3182 }
3183 #elif defined COMPILE_PCRE16
3184 if (common->utf)
3185 {
3186 singlechar = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
3187 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
3188 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3189 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
3190 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3191 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3192 JUMPHERE(singlechar);
3193 }
3194 #endif /* COMPILE_PCRE[8|16] */
3195 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
3196 JUMPHERE(start);
3197
3198 if (newlinecheck)
3199 {
3200 JUMPHERE(end);
3201 JUMPHERE(nl);
3202 }
3203
3204 return mainloop;
3205 }
3206
3207 #define MAX_N_CHARS 16
3208 #define MAX_N_BYTES 8
3209
3210 static SLJIT_INLINE void add_prefix_byte(pcre_uint8 byte, pcre_uint8 *bytes)
3211 {
3212 pcre_uint8 len = bytes[0];
3213 int i;
3214
3215 if (len == 255)
3216 return;
3217
3218 if (len == 0)
3219 {
3220 bytes[0] = 1;
3221 bytes[1] = byte;
3222 return;
3223 }
3224
3225 for (i = len; i > 0; i--)
3226 if (bytes[i] == byte)
3227 return;
3228
3229 if (len >= MAX_N_BYTES - 1)
3230 {
3231 bytes[0] = 255;
3232 return;
3233 }
3234
3235 len++;
3236 bytes[len] = byte;
3237 bytes[0] = len;
3238 }
3239
3240 static int scan_prefix(compiler_common *common, pcre_uchar *cc, pcre_uint32 *chars, pcre_uint8 *bytes, int max_chars, pcre_uint32 *rec_count)
3241 {
3242 /* Recursive function, which scans prefix literals. */
3243 BOOL last, any, caseless;
3244 int len, repeat, len_save, consumed = 0;
3245 pcre_uint32 chr, mask;
3246 pcre_uchar *alternative, *cc_save, *oc;
3247 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3248 pcre_uchar othercase[8];
3249 #elif defined SUPPORT_UTF && defined COMPILE_PCRE16
3250 pcre_uchar othercase[2];
3251 #else
3252 pcre_uchar othercase[1];
3253 #endif
3254
3255 repeat = 1;
3256 while (TRUE)
3257 {
3258 if (*rec_count == 0)
3259 return 0;
3260 (*rec_count)--;
3261
3262 last = TRUE;
3263 any = FALSE;
3264 caseless = FALSE;
3265
3266 switch (*cc)
3267 {
3268 case OP_CHARI:
3269 caseless = TRUE;
3270 case OP_CHAR:
3271 last = FALSE;
3272 cc++;
3273 break;
3274
3275 case OP_SOD:
3276 case OP_SOM:
3277 case OP_SET_SOM:
3278 case OP_NOT_WORD_BOUNDARY:
3279 case OP_WORD_BOUNDARY:
3280 case OP_EODN:
3281 case OP_EOD:
3282 case OP_CIRC:
3283 case OP_CIRCM:
3284 case OP_DOLL:
3285 case OP_DOLLM:
3286 /* Zero width assertions. */
3287 cc++;
3288 continue;
3289
3290 case OP_ASSERT:
3291 case OP_ASSERT_NOT:
3292 case OP_ASSERTBACK:
3293 case OP_ASSERTBACK_NOT:
3294 cc = bracketend(cc);
3295 continue;
3296
3297 case OP_PLUSI:
3298 case OP_MINPLUSI:
3299 case OP_POSPLUSI:
3300 caseless = TRUE;
3301 case OP_PLUS:
3302 case OP_MINPLUS:
3303 case OP_POSPLUS:
3304 cc++;
3305 break;
3306
3307 case OP_EXACTI:
3308 caseless = TRUE;
3309 case OP_EXACT:
3310 repeat = GET2(cc, 1);
3311 last = FALSE;
3312 cc += 1 + IMM2_SIZE;
3313 break;
3314
3315 case OP_QUERYI:
3316 case OP_MINQUERYI:
3317 case OP_POSQUERYI:
3318 caseless = TRUE;
3319 case OP_QUERY:
3320 case OP_MINQUERY:
3321 case OP_POSQUERY:
3322 len = 1;
3323 cc++;
3324 #ifdef SUPPORT_UTF
3325 if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
3326 #endif
3327 max_chars = scan_prefix(common, cc + len, chars, bytes, max_chars, rec_count);
3328 if (max_chars == 0)
3329 return consumed;
3330 last = FALSE;
3331 break;
3332
3333 case OP_KET:
3334 cc += 1 + LINK_SIZE;
3335 continue;
3336
3337 case OP_ALT:
3338 cc += GET(cc, 1);
3339 continue;
3340
3341 case OP_ONCE:
3342 case OP_ONCE_NC:
3343 case OP_BRA:
3344 case OP_BRAPOS:
3345 case OP_CBRA:
3346 case OP_CBRAPOS:
3347 alternative = cc + GET(cc, 1);
3348 while (*alternative == OP_ALT)
3349 {
3350 max_chars = scan_prefix(common, alternative + 1 + LINK_SIZE, chars, bytes, max_chars, rec_count);
3351 if (max_chars == 0)
3352 return consumed;
3353 alternative += GET(alternative, 1);
3354 }
3355
3356 if (*cc == OP_CBRA || *cc == OP_CBRAPOS)
3357 cc += IMM2_SIZE;
3358 cc += 1 + LINK_SIZE;
3359 continue;
3360
3361 case OP_CLASS:
3362 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3363 if (common->utf && !is_char7_bitset((const pcre_uint8 *)(cc + 1), FALSE)) return consumed;
3364 #endif
3365 any = TRUE;
3366 cc += 1 + 32 / sizeof(pcre_uchar);
3367 break;
3368
3369 case OP_NCLASS:
3370 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3371 if (common->utf) return consumed;
3372 #endif
3373 any = TRUE;
3374 cc += 1 + 32 / sizeof(pcre_uchar);
3375 break;
3376
3377 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3378 case OP_XCLASS:
3379 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3380 if (common->utf) return consumed;
3381 #endif
3382 any = TRUE;
3383 cc += GET(cc, 1);
3384 break;
3385 #endif
3386
3387 case OP_DIGIT:
3388 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3389 if (common->utf && !is_char7_bitset((const pcre_uint8 *)common->ctypes - cbit_length + cbit_digit, FALSE))
3390 return consumed;
3391 #endif
3392 any = TRUE;
3393 cc++;
3394 break;
3395
3396 case OP_WHITESPACE:
3397 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3398 if (common->utf && !is_char7_bitset((const pcre_uint8 *)common->ctypes - cbit_length + cbit_space, FALSE))
3399 return consumed;
3400 #endif
3401 any = TRUE;
3402 cc++;
3403 break;
3404
3405 case OP_WORDCHAR:
3406 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3407 if (common->utf && !is_char7_bitset((const pcre_uint8 *)common->ctypes - cbit_length + cbit_word, FALSE))
3408 return consumed;
3409 #endif
3410 any = TRUE;
3411 cc++;
3412 break;
3413
3414 case OP_NOT:
3415 case OP_NOTI:
3416 cc++;
3417 /* Fall through. */
3418 case OP_NOT_DIGIT:
3419 case OP_NOT_WHITESPACE:
3420 case OP_NOT_WORDCHAR:
3421 case OP_ANY:
3422 case OP_ALLANY:
3423 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3424 if (common->utf) return consumed;
3425 #endif
3426 any = TRUE;
3427 cc++;
3428 break;
3429
3430 #ifdef SUPPORT_UCP
3431 case OP_NOTPROP:
3432 case OP_PROP:
3433 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3434 if (common->utf) return consumed;
3435 #endif
3436 any = TRUE;
3437 cc += 1 + 2;
3438 break;
3439 #endif
3440
3441 case OP_TYPEEXACT:
3442 repeat = GET2(cc, 1);
3443 cc += 1 + IMM2_SIZE;
3444 continue;
3445
3446 case OP_NOTEXACT:
3447 case OP_NOTEXACTI:
3448 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3449 if (common->utf) return consumed;
3450 #endif
3451 any = TRUE;
3452 repeat = GET2(cc, 1);
3453 cc += 1 + IMM2_SIZE + 1;
3454 break;
3455
3456 default:
3457 return consumed;
3458 }
3459
3460 if (any)
3461 {
3462 #if defined COMPILE_PCRE8
3463 mask = 0xff;
3464 #elif defined COMPILE_PCRE16
3465 mask = 0xffff;
3466 #elif defined COMPILE_PCRE32
3467 mask = 0xffffffff;
3468 #else
3469 SLJIT_ASSERT_STOP();
3470 #endif
3471
3472 do
3473 {
3474 chars[0] = mask;
3475 chars[1] = mask;
3476 bytes[0] = 255;
3477
3478 consumed++;
3479 if (--max_chars == 0)
3480 return consumed;
3481 chars += 2;
3482 bytes += MAX_N_BYTES;
3483 }
3484 while (--repeat > 0);
3485
3486 repeat = 1;
3487 continue;
3488 }
3489
3490 len = 1;
3491 #ifdef SUPPORT_UTF
3492 if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
3493 #endif
3494
3495 if (caseless && char_has_othercase(common, cc))
3496 {
3497 #ifdef SUPPORT_UTF
3498 if (common->utf)
3499 {
3500 GETCHAR(chr, cc);
3501 if ((int)PRIV(ord2utf)(char_othercase(common, chr), othercase) != len)
3502 return consumed;
3503 }
3504 else
3505 #endif
3506 {
3507 chr = *cc;
3508 othercase[0] = TABLE_GET(chr, common->fcc, chr);
3509 }
3510 }
3511 else
3512 caseless = FALSE;
3513
3514 len_save = len;
3515 cc_save = cc;
3516 while (TRUE)
3517 {
3518 oc = othercase;
3519 do
3520 {
3521 chr = *cc;
3522 #ifdef COMPILE_PCRE32
3523 if (SLJIT_UNLIKELY(chr == NOTACHAR))
3524 return consumed;
3525 #endif
3526 add_prefix_byte((pcre_uint8)chr, bytes);
3527
3528 mask = 0;
3529 if (caseless)
3530 {
3531 add_prefix_byte((pcre_uint8)*oc, bytes);
3532 mask = *cc ^ *oc;
3533 chr |= mask;
3534 }
3535
3536 #ifdef COMPILE_PCRE32
3537 if (chars[0] == NOTACHAR && chars[1] == 0)
3538 #else
3539 if (chars[0] == NOTACHAR)
3540 #endif
3541 {
3542 chars[0] = chr;
3543 chars[1] = mask;
3544 }
3545 else
3546 {
3547 mask |= chars[0] ^ chr;
3548 chr |= mask;
3549 chars[0] = chr;
3550 chars[1] |= mask;
3551 }
3552
3553 len--;
3554 consumed++;
3555 if (--max_chars == 0)
3556 return consumed;
3557 chars += 2;
3558 bytes += MAX_N_BYTES;
3559 cc++;
3560 oc++;
3561 }
3562 while (len > 0);
3563
3564 if (--repeat == 0)
3565 break;
3566
3567 len = len_save;
3568 cc = cc_save;
3569 }
3570
3571 repeat = 1;
3572 if (last)
3573 return consumed;
3574 }
3575 }
3576
3577 static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common, BOOL firstline)
3578 {
3579 DEFINE_COMPILER;
3580 struct sljit_label *start;
3581 struct sljit_jump *quit;
3582 pcre_uint32 chars[MAX_N_CHARS * 2];
3583 pcre_uint8 bytes[MAX_N_CHARS * MAX_N_BYTES];
3584 pcre_uint8 ones[MAX_N_CHARS];
3585 int offsets[3];
3586 pcre_uint32 mask;
3587 pcre_uint8 *byte_set, *byte_set_end;
3588 int i, max, from;
3589 int range_right = -1, range_len = 3 - 1;
3590 sljit_ub *update_table = NULL;
3591 BOOL in_range;
3592 pcre_uint32 rec_count;
3593
3594 for (i = 0; i < MAX_N_CHARS; i++)
3595 {
3596 chars[i << 1] = NOTACHAR;
3597 chars[(i << 1) + 1] = 0;
3598 bytes[i * MAX_N_BYTES] = 0;
3599 }
3600
3601 rec_count = 10000;
3602 max = scan_prefix(common, common->start, chars, bytes, MAX_N_CHARS, &rec_count);
3603
3604 if (max <= 1)
3605 return FALSE;
3606
3607 for (i = 0; i < max; i++)
3608 {
3609 mask = chars[(i << 1) + 1];
3610 ones[i] = ones_in_half_byte[mask & 0xf];
3611 mask >>= 4;
3612 while (mask != 0)
3613 {
3614 ones[i] += ones_in_half_byte[mask & 0xf];
3615 mask >>= 4;
3616 }
3617 }
3618
3619 in_range = FALSE;
3620 from = 0; /* Prevent compiler "uninitialized" warning */
3621 for (i = 0; i <= max; i++)
3622 {
3623 if (in_range && (i - from) > range_len && (bytes[(i - 1) * MAX_N_BYTES] <= 4))
3624 {
3625 range_len = i - from;
3626 range_right = i - 1;
3627 }
3628
3629 if (i < max && bytes[i * MAX_N_BYTES] < 255)
3630 {
3631 if (!in_range)
3632 {
3633 in_range = TRUE;
3634 from = i;
3635 }
3636 }
3637 else if (in_range)
3638 in_range = FALSE;
3639 }
3640
3641 if (range_right >= 0)
3642 {
3643 update_table = (sljit_ub *)allocate_read_only_data(common, 256);
3644 if (update_table == NULL)
3645 return TRUE;
3646 memset(update_table, IN_UCHARS(range_len), 256);
3647
3648 for (i = 0; i < range_len; i++)
3649 {
3650 byte_set = bytes + ((range_right - i) * MAX_N_BYTES);
3651 SLJIT_ASSERT(byte_set[0] > 0 && byte_set[0] < 255);
3652 byte_set_end = byte_set + byte_set[0];
3653 byte_set++;
3654 while (byte_set <= byte_set_end)
3655 {
3656 if (update_table[*byte_set] > IN_UCHARS(i))
3657 update_table[*byte_set] = IN_UCHARS(i);
3658 byte_set++;
3659 }
3660 }
3661 }
3662
3663 offsets[0] = -1;
3664 /* Scan forward. */
3665 for (i = 0; i < max; i++)
3666 if (ones[i] <= 2) {
3667 offsets[0] = i;
3668 break;
3669 }
3670
3671 if (offsets[0] < 0 && range_right < 0)
3672 return FALSE;
3673
3674 if (offsets[0] >= 0)
3675 {
3676 /* Scan backward. */
3677 offsets[1] = -1;
3678 for (i = max - 1; i > offsets[0]; i--)
3679 if (ones[i] <= 2 && i != range_right)
3680 {
3681 offsets[1] = i;
3682 break;
3683 }
3684
3685 /* This case is handled better by fast_forward_first_char. */
3686 if (offsets[1] == -1 && offsets[0] == 0 && range_right < 0)
3687 return FALSE;
3688
3689 offsets[2] = -1;
3690 /* We only search for a middle character if there is no range check. */
3691 if (offsets[1] >= 0 && range_right == -1)
3692 {
3693 /* Scan from middle. */
3694 for (i = (offsets[0] + offsets[1]) / 2 + 1; i < offsets[1]; i++)
3695 if (ones[i] <= 2)
3696 {
3697 offsets[2] = i;
3698 break;
3699 }
3700
3701 if (offsets[2] == -1)
3702 {
3703 for (i = (offsets[0] + offsets[1]) / 2; i > offsets[0]; i--)
3704 if (ones[i] <= 2)
3705 {
3706 offsets[2] = i;
3707 break;
3708 }
3709 }
3710 }
3711
3712 SLJIT_ASSERT(offsets[1] == -1 || (offsets[0] < offsets[1]));
3713 SLJIT_ASSERT(offsets[2] == -1 || (offsets[0] < offsets[2] && offsets[1] > offsets[2]));
3714
3715 chars[0] = chars[offsets[0] << 1];
3716 chars[1] = chars[(offsets[0] << 1) + 1];
3717 if (offsets[2] >= 0)
3718 {
3719 chars[2] = chars[offsets[2] << 1];
3720 chars[3] = chars[(offsets[2] << 1) + 1];
3721 }
3722 if (offsets[1] >= 0)
3723 {
3724 chars[4] = chars[offsets[1] << 1];
3725 chars[5] = chars[(offsets[1] << 1) + 1];
3726 }
3727 }
3728
3729 max -= 1;
3730 if (firstline)
3731 {
3732 SLJIT_ASSERT(common->first_line_end != 0);
3733 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end);
3734 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
3735 OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
3736 quit = CMP(SLJIT_LESS_EQUAL, STR_END, 0, TMP1, 0);
3737 OP1(SLJIT_MOV, STR_END, 0, TMP1, 0);
3738 JUMPHERE(quit);
3739 }
3740 else
3741 OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
3742
3743 #if !(defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
3744 if (range_right >= 0)
3745 OP1(SLJIT_MOV, RETURN_ADDR, 0, SLJIT_IMM, (sljit_sw)update_table);
3746 #endif
3747
3748 start = LABEL();
3749 quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3750
3751 SLJIT_ASSERT(range_right >= 0 || offsets[0] >= 0);
3752
3753 if (range_right >= 0)
3754 {
3755 #if defined COMPILE_PCRE8 || (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN)
3756 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right));
3757 #else
3758 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right + 1) - 1);
3759 #endif
3760
3761 #if !(defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
3762 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(RETURN_ADDR, TMP1), 0);
3763 #else
3764 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)update_table);
3765 #endif
3766 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3767 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, start);
3768 }
3769
3770 if (offsets[0] >= 0)
3771 {
3772 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[0]));
3773 if (offsets[1] >= 0)
3774 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[1]));
3775 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3776
3777 if (chars[1] != 0)
3778 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[1]);
3779 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[0], start);
3780 if (offsets[2] >= 0)
3781 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[2] - 1));
3782
3783 if (offsets[1] >= 0)
3784 {
3785 if (chars[5] != 0)
3786 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, chars[5]);
3787 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, chars[4], start);
3788 }
3789
3790 if (offsets[2] >= 0)
3791 {
3792 if (chars[3] != 0)
3793 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[3]);
3794 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[2], start);
3795 }
3796 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3797 }
3798
3799 JUMPHERE(quit);
3800
3801 if (firstline)
3802 {
3803 if (range_right >= 0)
3804 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end);
3805 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3806 if (range_right >= 0)
3807 {
3808 quit = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
3809 OP1(SLJIT_MOV, STR_PTR, 0, TMP1, 0);
3810 JUMPHERE(quit);
3811 }
3812 }
3813 else
3814 OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
3815 return TRUE;
3816 }
3817
3818 #undef MAX_N_CHARS
3819 #undef MAX_N_BYTES
3820
3821 static SLJIT_INLINE void fast_forward_first_char(compiler_common *common, pcre_uchar first_char, BOOL caseless, BOOL firstline)
3822 {
3823 DEFINE_COMPILER;
3824 struct sljit_label *start;
3825 struct sljit_jump *quit;
3826 struct sljit_jump *found;
3827 pcre_uchar oc, bit;
3828
3829 if (firstline)
3830 {
3831 SLJIT_ASSERT(common->first_line_end != 0);
3832 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
3833 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end);
3834 }
3835
3836 start = LABEL();
3837 quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3838 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3839
3840 oc = first_char;
3841 if (caseless)
3842 {
3843 oc = TABLE_GET(first_char, common->fcc, first_char);
3844 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
3845 if (first_char > 127 && common->utf)
3846 oc = UCD_OTHERCASE(first_char);
3847 #endif
3848 }
3849 if (first_char == oc)
3850 found = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, first_char);
3851 else
3852 {
3853 bit = first_char ^ oc;
3854 if (is_powerof2(bit))
3855 {
3856 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, bit);
3857 found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, first_char | bit);
3858 }
3859 else
3860 {
3861 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, first_char);
3862 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
3863 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);
3864 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
3865 found = JUMP(SLJIT_NOT_ZERO);
3866 }
3867 }
3868
3869 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3870 JUMPTO(SLJIT_JUMP, start);
3871 JUMPHERE(found);
3872 JUMPHERE(quit);
3873
3874 if (firstline)
3875 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3876 }
3877
3878 static SLJIT_INLINE void fast_forward_newline(compiler_common *common, BOOL firstline)
3879 {
3880 DEFINE_COMPILER;
3881 struct sljit_label *loop;
3882 struct sljit_jump *lastchar;
3883 struct sljit_jump *firstchar;
3884 struct sljit_jump *quit;
3885 struct sljit_jump *foundcr = NULL;
3886 struct sljit_jump *notfoundnl;
3887 jump_list *newline = NULL;
3888
3889 if (firstline)
3890 {
3891 SLJIT_ASSERT(common->first_line_end != 0);
3892 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
3893 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end);
3894 }
3895
3896 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3897 {
3898 lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3899 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3900 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
3901 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
3902 firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
3903
3904 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2));
3905 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0);
3906 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_GREATER_EQUAL);
3907 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3908 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
3909 #endif
3910 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3911
3912 loop = LABEL();
3913 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3914 quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3915 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
3916 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
3917 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop);
3918 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop);
3919
3920 JUMPHERE(quit);
3921 JUMPHERE(firstchar);
3922 JUMPHERE(lastchar);
3923
3924 if (firstline)
3925 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3926 return;
3927 }
3928
3929 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3930 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
3931 firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
3932 skip_char_back(common);
3933
3934 loop = LABEL();
3935 common->ff_newline_shortcut = loop;
3936
3937 read_char_range(common, common->nlmin, common->nlmax, TRUE);
3938 lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3939 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
3940 foundcr = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
3941 check_newlinechar(common, common->nltype, &newline, FALSE);
3942 set_jumps(newline, loop);
3943
3944 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
3945 {
3946 quit = JUMP(SLJIT_JUMP);
3947 JUMPHERE(foundcr);
3948 notfoundnl = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3949 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3950 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
3951 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
3952 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3953 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
3954 #endif
3955 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3956 JUMPHERE(notfoundnl);
3957 JUMPHERE(quit);
3958 }
3959 JUMPHERE(lastchar);
3960 JUMPHERE(firstchar);
3961
3962 if (firstline)
3963 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3964 }
3965
3966 static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks);
3967
3968 static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, pcre_uint8 *start_bits, BOOL firstline)
3969 {
3970 DEFINE_COMPILER;
3971 struct sljit_label *start;
3972 struct sljit_jump *quit;
3973 struct sljit_jump *found = NULL;
3974 jump_list *matches = NULL;
3975 #ifndef COMPILE_PCRE8
3976 struct sljit_jump *jump;
3977 #endif
3978
3979 if (firstline)
3980 {
3981 SLJIT_ASSERT(common->first_line_end != 0);
3982 OP1(SLJIT_MOV, RETURN_ADDR, 0, STR_END, 0);
3983 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end);
3984 }
3985
3986 start = LABEL();
3987 quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3988 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3989 #ifdef SUPPORT_UTF
3990 if (common->utf)
3991 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
3992 #endif
3993
3994 if (!check_class_ranges(common, start_bits, (start_bits[31] & 0x80) != 0, TRUE, &matches))
3995 {
3996 #ifndef COMPILE_PCRE8
3997 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 255);
3998 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 255);
3999 JUMPHERE(jump);
4000 #endif
4001 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
4002 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
4003 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)start_bits);
4004 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4005 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4006 found = JUMP(SLJIT_NOT_ZERO);
4007 }
4008
4009 #ifdef SUPPORT_UTF
4010 if (common->utf)
4011 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
4012 #endif
4013 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4014 #ifdef SUPPORT_UTF
4015 #if defined COMPILE_PCRE8
4016 if (common->utf)
4017 {
4018 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start);
4019 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4020 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4021 }
4022 #elif defined COMPILE_PCRE16
4023 if (common->utf)
4024 {
4025 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800, start);
4026 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
4027 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4028 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
4029 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4030 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4031 }
4032 #endif /* COMPILE_PCRE[8|16] */
4033 #endif /* SUPPORT_UTF */
4034 JUMPTO(SLJIT_JUMP, start);
4035 if (found != NULL)
4036 JUMPHERE(found);
4037 if (matches != NULL)
4038 set_jumps(matches, LABEL());
4039 JUMPHERE(quit);
4040
4041 if (firstline)
4042 OP1(SLJIT_MOV, STR_END, 0, RETURN_ADDR, 0);
4043 }
4044
4045 static SLJIT_INLINE struct sljit_jump *search_requested_char(compiler_common *common, pcre_uchar req_char, BOOL caseless, BOOL has_firstchar)
4046 {
4047 DEFINE_COMPILER;
4048 struct sljit_label *loop;
4049 struct sljit_jump *toolong;
4050 struct sljit_jump *alreadyfound;
4051 struct sljit_jump *found;
4052 struct sljit_jump *foundoc = NULL;
4053 struct sljit_jump *notfound;
4054 pcre_uint32 oc, bit;
4055
4056 SLJIT_ASSERT(common->req_char_ptr != 0);
4057 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr);
4058 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, REQ_BYTE_MAX);
4059 toolong = CMP(SLJIT_LESS, TMP1, 0, STR_END, 0);
4060 alreadyfound = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0);
4061
4062 if (has_firstchar)
4063 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4064 else
4065 OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);
4066
4067 loop = LABEL();
4068 notfound = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0);
4069
4070 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0);
4071 oc = req_char;
4072 if (caseless)
4073 {
4074 oc = TABLE_GET(req_char, common->fcc, req_char);
4075 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
4076 if (req_char > 127 && common->utf)
4077 oc = UCD_OTHERCASE(req_char);
4078 #endif
4079 }
4080 if (req_char == oc)
4081 found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
4082 else
4083 {
4084 bit = req_char ^ oc;
4085 if (is_powerof2(bit))
4086 {
4087 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);
4088 found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit);
4089 }
4090 else
4091 {
4092 found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
4093 foundoc = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, oc);
4094 }
4095 }
4096 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
4097 JUMPTO(SLJIT_JUMP, loop);
4098
4099 JUMPHERE(found);
4100 if (foundoc)
4101 JUMPHERE(foundoc);
4102 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr, TMP1, 0);
4103 JUMPHERE(alreadyfound);
4104 JUMPHERE(toolong);
4105 return notfound;
4106 }
4107
4108 static void do_revertframes(compiler_common *common)
4109 {
4110 DEFINE_COMPILER;
4111 struct sljit_jump *jump;
4112 struct sljit_label *mainloop;
4113
4114 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4115 OP1(SLJIT_MOV, TMP1, 0, STACK_TOP, 0);
4116 GET_LOCAL_BASE(TMP3, 0, 0);
4117
4118 /* Drop frames until we reach STACK_TOP. */
4119 mainloop = LABEL();
4120 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), 0);
4121 OP2(SLJIT_SUB | SLJIT_SET_S, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0);
4122 jump = JUMP(SLJIT_SIG_LESS_EQUAL);
4123
4124 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
4125 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw));
4126 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), SLJIT_MEM1(TMP1), 2 * sizeof(sljit_sw));
4127 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
4128 JUMPTO(SLJIT_JUMP, mainloop);
4129
4130 JUMPHERE(jump);
4131 jump = JUMP(SLJIT_SIG_LESS);
4132 /* End of dropping frames. */
4133 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4134
4135 JUMPHERE(jump);
4136 OP1(SLJIT_NEG, TMP2, 0, TMP2, 0);
4137 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
4138 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw));
4139 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_sw));
4140 JUMPTO(SLJIT_JUMP, mainloop);
4141 }
4142
4143 static void check_wordboundary(compiler_common *common)
4144 {
4145 DEFINE_COMPILER;
4146 struct sljit_jump *skipread;
4147 jump_list *skipread_list = NULL;
4148 #if !(defined COMPILE_PCRE8) || defined SUPPORT_UTF
4149 struct sljit_jump *jump;
4150 #endif
4151
4152 SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16);
4153
4154 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
4155 /* Get type of the previous char, and put it to LOCALS1. */
4156 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4157 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4158 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, SLJIT_IMM, 0);
4159 skipread = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
4160 skip_char_back(common);
4161 check_start_used_ptr(common);
4162 read_char(common);
4163
4164 /* Testing char type. */
4165 #ifdef SUPPORT_UCP
4166 if (common->use_ucp)
4167 {
4168 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
4169 jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
4170 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4171 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
4172 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
4173 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
4174 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
4175 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
4176 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
4177 JUMPHERE(jump);
4178 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP2, 0);
4179 }
4180 else
4181 #endif
4182 {
4183 #ifndef COMPILE_PCRE8
4184 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4185 #elif defined SUPPORT_UTF
4186 /* Here LOCALS1 has already been zeroed. */
4187 jump = NULL;
4188 if (common->utf)
4189 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4190 #endif /* COMPILE_PCRE8 */
4191 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);
4192 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */);
4193 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4194 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP1, 0);
4195 #ifndef COMPILE_PCRE8
4196 JUMPHERE(jump);
4197 #elif defined SUPPORT_UTF
4198 if (jump != NULL)
4199 JUMPHERE(jump);
4200 #endif /* COMPILE_PCRE8 */
4201 }
4202 JUMPHERE(skipread);
4203
4204 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
4205 check_str_end(common, &skipread_list);
4206 peek_char(common, READ_CHAR_MAX);
4207
4208 /* Testing char type. This is a code duplication. */
4209 #ifdef SUPPORT_UCP
4210 if (common->use_ucp)
4211 {
4212 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
4213 jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
4214 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4215 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
4216 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
4217 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
4218 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
4219 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
4220 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
4221 JUMPHERE(jump);
4222 }
4223 else
4224 #endif
4225 {
4226 #ifndef COMPILE_PCRE8
4227 /* TMP2 may be destroyed by peek_char. */
4228 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
4229 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4230 #elif defined SUPPORT_UTF
4231 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
4232 jump = NULL;
4233 if (common->utf)
4234 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4235 #endif
4236 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes);
4237 OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */);
4238 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
4239 #ifndef COMPILE_PCRE8
4240 JUMPHERE(jump);
4241 #elif defined SUPPORT_UTF
4242 if (jump != NULL)
4243 JUMPHERE(jump);
4244 #endif /* COMPILE_PCRE8 */
4245 }
4246 set_jumps(skipread_list, LABEL());
4247
4248 OP2(SLJIT_XOR | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
4249 sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
4250 }
4251
4252 static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
4253 {
4254 DEFINE_COMPILER;
4255 int ranges[MAX_RANGE_SIZE];
4256 pcre_uint8 bit, cbit, all;
4257 int i, byte, length = 0;
4258
4259 bit = bits[0] & 0x1;
4260 /* All bits will be zero or one (since bit is zero or one). */
4261 all = -bit;
4262
4263 for (i = 0; i < 256; )
4264 {
4265 byte = i >> 3;
4266 if ((i & 0x7) == 0 && bits[byte] == all)
4267 i += 8;
4268 else
4269 {
4270 cbit = (bits[byte] >> (i & 0x7)) & 0x1;
4271 if (cbit != bit)
4272 {
4273 if (length >= MAX_RANGE_SIZE)
4274 return FALSE;
4275 ranges[length] = i;
4276 length++;
4277 bit = cbit;
4278 all = -cbit;
4279 }
4280 i++;
4281 }
4282 }
4283
4284 if (((bit == 0) && nclass) || ((bit == 1) && !nclass))
4285 {
4286 if (length >= MAX_RANGE_SIZE)
4287 return FALSE;
4288 ranges[length] = 256;
4289 length++;
4290 }
4291
4292 if (length < 0 || length > 4)
4293 return FALSE;
4294
4295 bit = bits[0] & 0x1;
4296 if (invert) bit ^= 0x1;
4297
4298 /* No character is accepted. */
4299 if (length == 0 && bit == 0)
4300 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4301
4302 switch(length)
4303 {
4304 case 0:
4305 /* When bit != 0, all characters are accepted. */
4306 return TRUE;
4307
4308 case 1:
4309 add_jump(compiler, backtracks, CMP(bit == 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
4310 return TRUE;
4311
4312 case 2:
4313 if (ranges[0] + 1 != ranges[1])
4314 {
4315 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
4316 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
4317 }
4318 else
4319 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
4320 return TRUE;
4321
4322 case 3:
4323 if (bit != 0)
4324 {
4325 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
4326 if (ranges[0] + 1 != ranges[1])
4327 {
4328 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
4329 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
4330 }
4331 else
4332 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
4333 return TRUE;
4334 }
4335
4336 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[0]));
4337 if (ranges[1] + 1 != ranges[2])
4338 {
4339 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1]);
4340 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
4341 }
4342 else
4343 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1]));
4344 return TRUE;
4345
4346 case 4:
4347 if ((ranges[1] - ranges[0]) == (ranges[3] - ranges[2])
4348 && (ranges[0] | (ranges[2] - ranges[0])) == ranges[2]
4349 && (ranges[1] & (ranges[2] - ranges[0])) == 0
4350 && is_powerof2(ranges[2] - ranges[0]))
4351 {
4352 SLJIT_ASSERT((ranges[0] & (ranges[2] - ranges[0])) == 0 && (ranges[2] & ranges[3] & (ranges[2] - ranges[0])) != 0);
4353 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[0]);
4354 if (ranges[2] + 1 != ranges[3])
4355 {
4356 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);
4357 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
4358 }
4359 else
4360 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
4361 return TRUE;
4362 }
4363
4364 if (bit != 0)
4365 {
4366 i = 0;
4367 if (ranges[0] + 1 != ranges[1])
4368 {
4369 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
4370 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
4371 i = ranges[0];
4372 }
4373 else
4374 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
4375
4376 if (ranges[2] + 1 != ranges[3])
4377 {
4378 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - i);
4379 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
4380 }
4381 else
4382 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2] - i));
4383 return TRUE;
4384 }
4385
4386 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
4387 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[0]));
4388 if (ranges[1] + 1 != ranges[2])
4389 {
4390 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]);
4391 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
4392 }
4393 else
4394 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
4395 return TRUE;
4396
4397 default:
4398 SLJIT_ASSERT_STOP();
4399 return FALSE;
4400 }
4401 }
4402
4403 static void check_anynewline(compiler_common *common)
4404 {
4405 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
4406 DEFINE_COMPILER;
4407
4408 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4409
4410 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
4411 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
4412 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
4413 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
4414 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4415 #ifdef COMPILE_PCRE8
4416 if (common->utf)
4417 {
4418 #endif
4419 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4420 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
4421 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
4422 #ifdef COMPILE_PCRE8
4423 }
4424 #endif
4425 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
4426 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4427 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4428 }
4429
4430 static void check_hspace(compiler_common *common)
4431 {
4432 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
4433 DEFINE_COMPILER;
4434
4435 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4436
4437 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x09);
4438 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
4439 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
4440 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4441 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xa0);
4442 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4443 #ifdef COMPILE_PCRE8
4444 if (common->utf)
4445 {
4446 #endif
4447 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4448 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x1680);
4449 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4450 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e);
4451 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4452 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000);
4453 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000);
4454 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
4455 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000);
4456 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4457 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000);
4458 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4459 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000);
4460 #ifdef COMPILE_PCRE8
4461 }
4462 #endif
4463 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
4464 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4465
4466 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4467 }
4468
4469 static void check_vspace(compiler_common *common)
4470 {
4471 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
4472 DEFINE_COMPILER;
4473
4474 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4475
4476 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
4477 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
4478 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
4479 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
4480 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4481 #ifdef COMPILE_PCRE8
4482 if (common->utf)
4483 {
4484 #endif
4485 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4486 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
4487 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
4488 #ifdef COMPILE_PCRE8
4489 }
4490 #endif
4491 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
4492 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4493
4494 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4495 }
4496
4497 #define CHAR1 STR_END
4498 #define CHAR2 STACK_TOP
4499
4500 static void do_casefulcmp(compiler_common *common)
4501 {
4502 DEFINE_COMPILER;
4503 struct sljit_jump *jump;
4504 struct sljit_label *label;
4505
4506 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4507 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4508 OP1(SLJIT_MOV, TMP3, 0, CHAR1, 0);
4509 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, CHAR2, 0);
4510 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
4511 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4512
4513 label = LABEL();
4514 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
4515 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4516 jump = CMP(SLJIT_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
4517 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
4518 JUMPTO(SLJIT_NOT_ZERO, label);
4519
4520 JUMPHERE(jump);
4521 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4522 OP1(SLJIT_MOV, CHAR1, 0, TMP3, 0);
4523 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
4524 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4525 }
4526
4527 #define LCC_TABLE STACK_LIMIT
4528
4529 static void do_caselesscmp(compiler_common *common)
4530 {
4531 DEFINE_COMPILER;
4532 struct sljit_jump *jump;
4533 struct sljit_label *label;
4534
4535 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4536 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4537
4538 OP1(SLJIT_MOV, TMP3, 0, LCC_TABLE, 0);
4539 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, CHAR1, 0);
4540 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, CHAR2, 0);
4541 OP1(SLJIT_MOV, LCC_TABLE, 0, SLJIT_IMM, common->lcc);
4542 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
4543 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4544
4545 label = LABEL();
4546 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
4547 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4548 #ifndef COMPILE_PCRE8
4549 jump = CMP(SLJIT_GREATER, CHAR1, 0, SLJIT_IMM, 255);
4550 #endif
4551 OP1(SLJIT_MOV_UB, CHAR1, 0, SLJIT_MEM2(LCC_TABLE, CHAR1), 0);
4552 #ifndef COMPILE_PCRE8
4553 JUMPHERE(jump);
4554 jump = CMP(SLJIT_GREATER, CHAR2, 0, SLJIT_IMM, 255);
4555 #endif
4556 OP1(SLJIT_MOV_UB, CHAR2, 0, SLJIT_MEM2(LCC_TABLE, CHAR2), 0);
4557 #ifndef COMPILE_PCRE8
4558 JUMPHERE(jump);
4559 #endif
4560 jump = CMP(SLJIT_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
4561 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
4562 JUMPTO(SLJIT_NOT_ZERO, label);
4563
4564 JUMPHERE(jump);
4565 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4566 OP1(SLJIT_MOV, LCC_TABLE, 0, TMP3, 0);
4567 OP1(SLJIT_MOV, CHAR1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
4568 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
4569 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4570 }
4571
4572 #undef LCC_TABLE
4573 #undef CHAR1
4574 #undef CHAR2
4575
4576 #if defined SUPPORT_UTF && defined SUPPORT_UCP
4577
4578 static const pcre_uchar * SLJIT_CALL do_utf_caselesscmp(pcre_uchar *src1, jit_arguments *args, pcre_uchar *end1)
4579 {
4580 /* This function would be ineffective to do in JIT level. */
4581 pcre_uint32 c1, c2;
4582 const pcre_uchar *src2 = args->uchar_ptr;
4583 const pcre_uchar *end2 = args->end;
4584 const ucd_record *ur;
4585 const pcre_uint32 *pp;
4586
4587 while (src1 < end1)
4588 {
4589 if (src2 >= end2)
4590 return (pcre_uchar*)1;
4591 GETCHARINC(c1, src1);
4592 GETCHARINC(c2, src2);
4593 ur = GET_UCD(c2);
4594 if (c1 != c2 && c1 != c2 + ur->other_case)
4595 {
4596 pp = PRIV(ucd_caseless_sets) + ur->caseset;
4597 for (;;)
4598 {
4599 if (c1 < *pp) return NULL;
4600 if (c1 == *pp++) break;
4601 }
4602 }
4603 }
4604 return src2;
4605 }
4606
4607 #endif /* SUPPORT_UTF && SUPPORT_UCP */
4608
4609 static pcre_uchar *byte_sequence_compare(compiler_common *common, BOOL caseless, pcre_uchar *cc,
4610 compare_context *context, jump_list **backtracks)
4611 {
4612 DEFINE_COMPILER;
4613 unsigned int othercasebit = 0;
4614 pcre_uchar *othercasechar = NULL;
4615 #ifdef SUPPORT_UTF
4616 int utflength;
4617 #endif
4618
4619 if (caseless && char_has_othercase(common, cc))
4620 {
4621 othercasebit = char_get_othercase_bit(common, cc);
4622 SLJIT_ASSERT(othercasebit);
4623 /* Extracting bit difference info. */
4624 #if defined COMPILE_PCRE8
4625 othercasechar = cc + (othercasebit >> 8);
4626 othercasebit &= 0xff;
4627 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4628 /* Note that this code only handles characters in the BMP. If there
4629 ever are characters outside the BMP whose othercase differs in only one
4630 bit from itself (there currently are none), this code will need to be
4631 revised for COMPILE_PCRE32. */
4632 othercasechar = cc + (othercasebit >> 9);
4633 if ((othercasebit & 0x100) != 0)
4634 othercasebit = (othercasebit & 0xff) << 8;
4635 else
4636 othercasebit &= 0xff;
4637 #endif /* COMPILE_PCRE[8|16|32] */
4638 }
4639
4640 if (context->sourcereg == -1)
4641 {
4642 #if defined COMPILE_PCRE8
4643 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
4644 if (context->length >= 4)
4645 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4646 else if (context->length >= 2)
4647 OP1(SLJIT_MOV_UH, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4648 else
4649 #endif
4650 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4651 #elif defined COMPILE_PCRE16
4652 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
4653 if (context->length >= 4)
4654 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4655 else
4656 #endif
4657 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4658 #elif defined COMPILE_PCRE32
4659 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4660 #endif /* COMPILE_PCRE[8|16|32] */
4661 context->sourcereg = TMP2;
4662 }
4663
4664 #ifdef SUPPORT_UTF
4665 utflength = 1;
4666 if (common->utf && HAS_EXTRALEN(*cc))
4667 utflength += GET_EXTRALEN(*cc);
4668
4669 do
4670 {
4671 #endif
4672
4673 context->length -= IN_UCHARS(1);
4674 #if (defined SLJIT_UNALIGNED && SLJIT_UNALIGNED) && (defined COMPILE_PCRE8 || defined COMPILE_PCRE16)
4675
4676 /* Unaligned read is supported. */
4677 if (othercasebit != 0 && othercasechar == cc)
4678 {
4679 context->c.asuchars[context->ucharptr] = *cc | othercasebit;
4680 context->oc.asuchars[context->ucharptr] = othercasebit;
4681 }
4682 else
4683 {
4684 context->c.asuchars[context->ucharptr] = *cc;
4685 context->oc.asuchars[context->ucharptr] = 0;
4686 }
4687 context->ucharptr++;
4688
4689 #if defined COMPILE_PCRE8
4690 if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1))
4691 #else
4692 if (context->ucharptr >= 2 || context->length == 0)
4693 #endif
4694 {
4695 if (context->length >= 4)
4696 OP1(SLJIT_MOV_SI, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
4697 else if (context->length >= 2)
4698 OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
4699 #if defined COMPILE_PCRE8
4700 else if (context->length >= 1)
4701 OP1(SLJIT_MOV_UB, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
4702 #endif /* COMPILE_PCRE8 */
4703 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
4704
4705 switch(context->ucharptr)
4706 {
4707 case 4 / sizeof(pcre_uchar):
4708 if (context->oc.asint != 0)
4709 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint);
4710 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint));
4711 break;
4712
4713 case 2 / sizeof(pcre_uchar):
4714 if (context->oc.asushort != 0)
4715 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort);
4716 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort));
4717 break;
4718
4719 #ifdef COMPILE_PCRE8
4720 case 1:
4721 if (context->oc.asbyte != 0)
4722 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte);
4723 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte));
4724 break;
4725 #endif
4726
4727 default:
4728 SLJIT_ASSERT_STOP();
4729 break;
4730 }
4731 context->ucharptr = 0;
4732 }
4733
4734 #else
4735
4736 /* Unaligned read is unsupported or in 32 bit mode. */
4737 if (context->length >= 1)
4738 OP1(MOV_UCHAR, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
4739
4740 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
4741
4742 if (othercasebit != 0 && othercasechar == cc)
4743 {
4744 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit);
4745 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit));
4746 }
4747 else
4748 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc));
4749
4750 #endif
4751
4752 cc++;
4753 #ifdef SUPPORT_UTF
4754 utflength--;
4755 }
4756 while (utflength > 0);
4757 #endif
4758
4759 return cc;
4760 }
4761
4762 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
4763
4764 #define SET_TYPE_OFFSET(value) \
4765 if ((value) != typeoffset) \
4766 { \
4767 if ((value) < typeoffset) \
4768 OP2(SLJIT_ADD, typereg, 0, typereg, 0, SLJIT_IMM, typeoffset - (value)); \
4769 else \
4770 OP2(SLJIT_SUB, typereg, 0, typereg, 0, SLJIT_IMM, (value) - typeoffset); \
4771 } \
4772 typeoffset = (value);
4773
4774 #define SET_CHAR_OFFSET(value) \
4775 if ((value) != charoffset) \
4776 { \
4777 if ((value) < charoffset) \
4778 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(charoffset - (value))); \
4779 else \
4780 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)((value) - charoffset)); \
4781 } \
4782 charoffset = (value);
4783
4784 static void compile_xclass_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
4785 {
4786 DEFINE_COMPILER;
4787 jump_list *found = NULL;
4788 jump_list **list = (cc[0] & XCL_NOT) == 0 ? &found : backtracks;
4789 sljit_uw c, charoffset, max = 256, min = READ_CHAR_MAX;
4790 struct sljit_jump *jump = NULL;
4791 pcre_uchar *ccbegin;
4792 int compares, invertcmp, numberofcmps;
4793 #if defined SUPPORT_UTF && (defined COMPILE_PCRE8 || defined COMPILE_PCRE16)
4794 BOOL utf = common->utf;
4795 #endif
4796
4797 #ifdef SUPPORT_UCP
4798 BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE;
4799 BOOL charsaved = FALSE;
4800 int typereg = TMP1, scriptreg = TMP1;
4801 const pcre_uint32 *other_cases;
4802 sljit_uw typeoffset;
4803 #endif
4804
4805 /* Scanning the necessary info. */
4806 cc++;
4807 ccbegin = cc;
4808 compares = 0;
4809 if (cc[-1] & XCL_MAP)
4810 {
4811 min = 0;
4812 cc += 32 / sizeof(pcre_uchar);
4813 }
4814
4815 while (*cc != XCL_END)
4816 {
4817 compares++;
4818 if (*cc == XCL_SINGLE)
4819 {
4820 cc ++;
4821 GETCHARINCTEST(c, cc);
4822 if (c > max) max = c;
4823 if (c < min) min = c;
4824 #ifdef SUPPORT_UCP
4825 needschar = TRUE;
4826 #endif
4827 }
4828 else if (*cc == XCL_RANGE)
4829 {
4830 cc ++;
4831 GETCHARINCTEST(c, cc);
4832 if (c < min) min = c;
4833 GETCHARINCTEST(c, cc);
4834 if (c > max) max = c;
4835 #ifdef SUPPORT_UCP
4836 needschar = TRUE;
4837 #endif
4838 }
4839 #ifdef SUPPORT_UCP
4840 else
4841 {
4842 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
4843 cc++;
4844 if (*cc == PT_CLIST)
4845 {
4846 other_cases = PRIV(ucd_caseless_sets) + cc[1];
4847 while (*other_cases != NOTACHAR)
4848 {
4849 if (*other_cases > max) max = *other_cases;
4850 if (*other_cases < min) min = *other_cases;
4851 other_cases++;
4852 }
4853 }
4854 else
4855 {
4856 max = READ_CHAR_MAX;
4857 min = 0;
4858 }
4859
4860 switch(*cc)
4861 {
4862 case PT_ANY:
4863 break;
4864
4865 case PT_LAMP:
4866 case PT_GC:
4867 case PT_PC:
4868 case PT_ALNUM:
4869 needstype = TRUE;
4870 break;
4871
4872 case PT_SC:
4873 needsscript = TRUE;
4874 break;
4875
4876 case PT_SPACE:
4877 case PT_PXSPACE:
4878 case PT_WORD:
4879 case PT_PXGRAPH:
4880 case PT_PXPRINT:
4881 case PT_PXPUNCT:
4882 needstype = TRUE;
4883 needschar = TRUE;
4884 break;
4885
4886 case PT_CLIST:
4887 case PT_UCNC:
4888 needschar = TRUE;
4889 break;
4890
4891 default:
4892 SLJIT_ASSERT_STOP();
4893 break;
4894 }
4895 cc += 2;
4896 }
4897 #endif
4898 }
4899
4900 /* We are not necessary in utf mode even in 8 bit mode. */
4901 cc = ccbegin;
4902 read_char_range(common, min, max, (cc[-1] & XCL_NOT) != 0);
4903
4904 if ((cc[-1] & XCL_HASPROP) == 0)
4905 {
4906 if ((cc[-1] & XCL_MAP) != 0)
4907 {
4908 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4909 if (!check_class_ranges(common, (const sljit_ub *)cc, (((const sljit_ub *)cc)[31] & 0x80) != 0, TRUE, &found))
4910 {
4911 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
4912 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
4913 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
4914 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4915 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4916 add_jump(compiler, &found, JUMP(SLJIT_NOT_ZERO));
4917 }
4918
4919 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4920 JUMPHERE(jump);
4921
4922 cc += 32 / sizeof(pcre_uchar);
4923 }
4924 else
4925 {
4926 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, min);
4927 add_jump(compiler, (cc[-1] & XCL_NOT) == 0 ? backtracks : &found, CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, max - min));
4928 }
4929 }
4930 else if ((cc[-1] & XCL_MAP) != 0)
4931 {
4932 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
4933 #ifdef SUPPORT_UCP
4934 charsaved = TRUE;
4935 #endif
4936 if (!check_class_ranges(common, (const pcre_uint8 *)cc, FALSE, TRUE, list))
4937 {
4938 #ifdef COMPILE_PCRE8
4939 jump = NULL;
4940 if (common->utf)
4941 #endif
4942 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4943
4944 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
4945 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
4946 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
4947 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4948 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4949 add_jump(compiler, list, JUMP(SLJIT_NOT_ZERO));
4950
4951 #ifdef COMPILE_PCRE8
4952 if (common->utf)
4953 #endif
4954 JUMPHERE(jump);
4955 }
4956
4957 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
4958 cc += 32 / sizeof(pcre_uchar);
4959 }
4960
4961 #ifdef SUPPORT_UCP
4962 /* Simple register allocation. TMP1 is preferred if possible. */
4963 if (needstype || needsscript)
4964 {
4965 if (needschar && !charsaved)
4966 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
4967 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4968 if (needschar)
4969 {
4970 if (needstype)
4971 {
4972 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
4973 typereg = RETURN_ADDR;
4974 }
4975
4976 if (needsscript)
4977 scriptreg = TMP3;
4978 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
4979 }
4980 else if (needstype && needsscript)
4981 scriptreg = TMP3;
4982 /* In all other cases only one of them was specified, and that can goes to TMP1. */
4983
4984 if (needsscript)
4985 {
4986 if (scriptreg == TMP1)
4987 {
4988 OP1(SLJIT_MOV, scriptreg, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
4989 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM2(scriptreg, TMP2), 3);
4990 }
4991 else
4992 {
4993 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
4994 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
4995 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM1(TMP2), 0);
4996 }
4997 }
4998 }
4999 #endif
5000
5001 /* Generating code. */
5002 charoffset = 0;
5003 numberofcmps = 0;
5004 #ifdef SUPPORT_UCP
5005 typeoffset = 0;
5006 #endif
5007
5008 while (*cc != XCL_END)
5009 {
5010 compares--;
5011 invertcmp = (compares == 0 && list != backtracks);
5012 jump = NULL;
5013
5014 if (*cc == XCL_SINGLE)
5015 {
5016 cc ++;
5017 GETCHARINCTEST(c, cc);
5018
5019 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
5020 {
5021 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5022 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_EQUAL);
5023 numberofcmps++;
5024 }
5025 else if (numberofcmps > 0)
5026 {
5027 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5028 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5029 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5030 numberofcmps = 0;
5031 }
5032 else
5033 {
5034 jump = CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5035 numberofcmps = 0;
5036 }
5037 }
5038 else if (*cc == XCL_RANGE)
5039 {
5040 cc ++;
5041 GETCHARINCTEST(c, cc);
5042 SET_CHAR_OFFSET(c);
5043 GETCHARINCTEST(c, cc);
5044
5045 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
5046 {
5047 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5048 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_LESS_EQUAL);
5049 numberofcmps++;
5050 }
5051 else if (numberofcmps > 0)
5052 {
5053 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5054 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
5055 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5056 numberofcmps = 0;
5057 }
5058 else
5059 {
5060 jump = CMP(SLJIT_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5061 numberofcmps = 0;
5062 }
5063 }
5064 #ifdef SUPPORT_UCP
5065 else
5066 {
5067 if (*cc == XCL_NOTPROP)
5068 invertcmp ^= 0x1;
5069 cc++;
5070 switch(*cc)
5071 {
5072 case PT_ANY:
5073 if (list != backtracks)
5074 {
5075 if ((cc[-1] == XCL_NOTPROP && compares > 0) || (cc[-1] == XCL_PROP && compares == 0))
5076 continue;
5077 }
5078 else if (cc[-1] == XCL_NOTPROP)
5079 continue;
5080 jump = JUMP(SLJIT_JUMP);
5081 break;
5082
5083 case PT_LAMP:
5084 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - typeoffset);
5085 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5086 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ll - typeoffset);
5087 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5088 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lt - typeoffset);
5089 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5090 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5091 break;
5092
5093 case PT_GC:
5094 c = PRIV(ucp_typerange)[(int)cc[1] * 2];
5095 SET_TYPE_OFFSET(c);
5096 jump = CMP(SLJIT_LESS_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, PRIV(ucp_typerange)[(int)cc[1] * 2 + 1] - c);
5097 break;
5098
5099 case PT_PC:
5100 jump = CMP(SLJIT_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, (int)cc[1] - typeoffset);
5101 break;
5102
5103 case PT_SC:
5104 jump = CMP(SLJIT_EQUAL ^ invertcmp, scriptreg, 0, SLJIT_IMM, (int)cc[1]);
5105 break;
5106
5107 case PT_SPACE:
5108 case PT_PXSPACE:
5109 SET_CHAR_OFFSET(9);
5110 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd - 0x9);
5111 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
5112
5113 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x9);
5114 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5115
5116 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x9);
5117 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5118
5119 SET_TYPE_OFFSET(ucp_Zl);
5120 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl);
5121 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
5122 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5123 break;
5124
5125 case PT_WORD:
5126 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_UNDERSCORE - charoffset));
5127 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5128 /* Fall through. */
5129
5130 case PT_ALNUM:
5131 SET_TYPE_OFFSET(ucp_Ll);
5132 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
5133 OP_FLAGS((*cc == PT_ALNUM) ? SLJIT_MOV : SLJIT_OR, TMP2, 0, (*cc == PT_ALNUM) ? SLJIT_UNUSED : TMP2, 0, SLJIT_LESS_EQUAL);
5134 SET_TYPE_OFFSET(ucp_Nd);
5135 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_No - ucp_Nd);
5136 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
5137 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5138 break;
5139
5140 case PT_CLIST:
5141 other_cases = PRIV(ucd_caseless_sets) + cc[1];
5142
5143 /* At least three characters are required.
5144 Otherwise this case would be handled by the normal code path. */
5145 SLJIT_ASSERT(other_cases[0] != NOTACHAR && other_cases[1] != NOTACHAR && other_cases[2] != NOTACHAR);
5146 SLJIT_ASSERT(other_cases[0] < other_cases[1] && other_cases[1] < other_cases[2]);
5147
5148 /* Optimizing character pairs, if their difference is power of 2. */
5149 if (is_powerof2(other_cases[1] ^ other_cases[0]))
5150 {
5151 if (charoffset == 0)
5152 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
5153 else
5154 {
5155 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
5156 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
5157 }
5158 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[1]);
5159 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5160 other_cases += 2;
5161 }
5162 else if (is_powerof2(other_cases[2] ^ other_cases[1]))
5163 {
5164 if (charoffset == 0)
5165 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[2] ^ other_cases[1]);
5166 else
5167 {
5168 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
5169 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
5170 }
5171 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[2]);
5172 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5173
5174 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(other_cases[0] - charoffset));
5175 OP_FLAGS(SLJIT_OR | ((other_cases[3] == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5176
5177 other_cases += 3;
5178 }
5179 else
5180 {
5181 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
5182 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5183 }
5184
5185 while (*other_cases != NOTACHAR)
5186 {
5187 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
5188 OP_FLAGS(SLJIT_OR | ((*other_cases == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5189 }
5190 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5191 break;
5192
5193 case PT_UCNC:
5194 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_DOLLAR_SIGN - charoffset));
5195 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5196 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_COMMERCIAL_AT - charoffset));
5197 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5198 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_GRAVE_ACCENT - charoffset));
5199 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5200
5201 SET_CHAR_OFFSET(0xa0);
5202 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(0xd7ff - charoffset));
5203 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
5204 SET_CHAR_OFFSET(0);
5205 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xe000 - 0);
5206 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_GREATER_EQUAL);
5207 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5208 break;
5209
5210 case PT_PXGRAPH:
5211 /* C and Z groups are the farthest two groups. */
5212 SET_TYPE_OFFSET(ucp_Ll);
5213 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
5214 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_GREATER);
5215
5216 jump = CMP(SLJIT_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
5217
5218 /* In case of ucp_Cf, we overwrite the result. */
5219 SET_CHAR_OFFSET(0x2066);
5220 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
5221 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
5222
5223 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
5224 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5225
5226 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x2066);
5227 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5228
5229 JUMPHERE(jump);
5230 jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
5231 break;
5232
5233 case PT_PXPRINT:
5234 /* C and Z groups are the farthest two groups. */
5235 SET_TYPE_OFFSET(ucp_Ll);
5236 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
5237 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_GREATER);
5238
5239 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Ll);
5240 OP_FLAGS(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_NOT_EQUAL);
5241
5242 jump = CMP(SLJIT_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
5243
5244 /* In case of ucp_Cf, we overwrite the result. */
5245 SET_CHAR_OFFSET(0x2066);
5246 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
5247 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
5248
5249 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
5250 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5251
5252 JUMPHERE(jump);
5253 jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
5254 break;
5255
5256 case PT_PXPUNCT:
5257 SET_TYPE_OFFSET(ucp_Sc);
5258 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Sc);
5259 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
5260
5261 SET_CHAR_OFFSET(0);
5262 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x7f);
5263 OP_FLAGS(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
5264
5265 SET_TYPE_OFFSET(ucp_Pc);
5266 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ps - ucp_Pc);
5267 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
5268 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5269 break;
5270 }
5271 cc += 2;
5272 }
5273 #endif
5274
5275 if (jump != NULL)
5276 add_jump(compiler, compares > 0 ? list : backtracks, jump);
5277 }
5278
5279 if (found != NULL)
5280 set_jumps(found, LABEL());
5281 }
5282
5283 #undef SET_TYPE_OFFSET
5284 #undef SET_CHAR_OFFSET
5285
5286 #endif
5287
5288 static pcre_uchar *compile_simple_assertion_matchingpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **backtracks)
5289 {
5290 DEFINE_COMPILER;
5291 int length;
5292 struct sljit_jump *jump[4];
5293 #ifdef SUPPORT_UTF
5294 struct sljit_label *label;
5295 #endif /* SUPPORT_UTF */
5296
5297 switch(type)
5298 {
5299 case OP_SOD:
5300 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5301 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
5302 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
5303 return cc;
5304
5305 case OP_SOM:
5306 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5307 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
5308 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
5309 return cc;
5310
5311 case OP_NOT_WORD_BOUNDARY:
5312 case OP_WORD_BOUNDARY:
5313 add_jump(compiler, &common->wordboundary, JUMP(SLJIT_FAST_CALL));
5314 add_jump(compiler, backtracks, JUMP(type == OP_NOT_WORD_BOUNDARY ? SLJIT_NOT_ZERO : SLJIT_ZERO));
5315 return cc;
5316
5317 case OP_EODN:
5318 /* Requires rather complex checks. */
5319 jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5320 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
5321 {
5322 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
5323 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5324 if (common->mode == JIT_COMPILE)
5325 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0));
5326 else
5327 {
5328 jump[1] = CMP(SLJIT_EQUAL, TMP2, 0, STR_END, 0);
5329 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
5330 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS);
5331 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
5332 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_NOT_EQUAL);
5333 add_jump(compiler, backtracks, JUMP(SLJIT_NOT_EQUAL));
5334 check_partial(common, TRUE);
5335 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5336 JUMPHERE(jump[1]);
5337 }
5338 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
5339 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
5340 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
5341 }
5342 else if (common->nltype == NLTYPE_FIXED)
5343 {
5344 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5345 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5346 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0));
5347 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
5348 }
5349 else
5350 {
5351 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5352 jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
5353 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
5354 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
5355 jump[2] = JUMP(SLJIT_GREATER);
5356 add_jump(compiler, backtracks, JUMP(SLJIT_LESS));
5357 /* Equal. */
5358 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
5359 jump[3] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
5360 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5361
5362 JUMPHERE(jump[1]);
5363 if (common->nltype == NLTYPE_ANYCRLF)
5364 {
5365 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5366 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, STR_END, 0));
5367 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
5368 }
5369 else
5370 {
5371 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, STR_PTR, 0);
5372 read_char_range(common, common->nlmin, common->nlmax, TRUE);
5373 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
5374 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
5375 add_jump(compiler, backtracks, JUMP(SLJIT_ZERO));
5376 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
5377 }
5378 JUMPHERE(jump[2]);
5379 JUMPHERE(jump[3]);
5380 }
5381 JUMPHERE(jump[0]);
5382 check_partial(common, FALSE);
5383 return cc;
5384
5385 case OP_EOD:
5386 add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
5387 check_partial(common, FALSE);
5388 return cc;
5389
5390 case OP_DOLL:
5391 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
5392 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
5393 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5394
5395 if (!common->endonly)
5396 compile_simple_assertion_matchingpath(common, OP_EODN, cc, backtracks);
5397 else
5398 {
5399 add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
5400 check_partial(common, FALSE);
5401 }
5402 return cc;
5403
5404 case OP_DOLLM:
5405 jump[1] = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
5406 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
5407 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
5408 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5409 check_partial(common, FALSE);
5410 jump[0] = JUMP(SLJIT_JUMP);
5411 JUMPHERE(jump[1]);
5412
5413 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
5414 {
5415 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
5416 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5417 if (common->mode == JIT_COMPILE)
5418 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, TMP2, 0, STR_END, 0));
5419 else
5420 {
5421 jump[1] = CMP(SLJIT_LESS_EQUAL, TMP2, 0, STR_END, 0);
5422 /* STR_PTR = STR_END - IN_UCHARS(1) */
5423 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
5424 check_partial(common, TRUE);
5425 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5426 JUMPHERE(jump[1]);
5427 }
5428
5429 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
5430 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
5431 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
5432 }
5433 else
5434 {
5435 peek_char(common, common->nlmax);
5436 check_newlinechar(common, common->nltype, backtracks, FALSE);
5437 }
5438 JUMPHERE(jump[0]);
5439 return cc;
5440
5441 case OP_CIRC:
5442 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
5443 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
5444 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0));
5445 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
5446 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5447 return cc;
5448
5449 case OP_CIRCM:
5450 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
5451 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
5452 jump[1] = CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0);
5453 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
5454 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5455 jump[0] = JUMP(SLJIT_JUMP);
5456 JUMPHERE(jump[1]);
5457
5458 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
5459 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
5460 {
5461 OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
5462 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, TMP1, 0));
5463 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
5464 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
5465 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
5466 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
5467 }
5468 else
5469 {
5470 skip_char_back(common);
5471 read_char_range(common, common->nlmin, common->nlmax, TRUE);
5472 check_newlinechar(common, common->nltype, backtracks, FALSE);
5473 }
5474 JUMPHERE(jump[0]);
5475 return cc;
5476
5477 case OP_REVERSE:
5478 length = GET(cc, 0);
5479 if (length == 0)
5480 return cc + LINK_SIZE;
5481 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5482 #ifdef SUPPORT_UTF
5483 if (common->utf)
5484 {
5485 OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
5486 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, length);
5487 label = LABEL();
5488 add_jump(compiler, backtracks, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP3, 0));
5489 skip_char_back(common);
5490 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
5491 JUMPTO(SLJIT_NOT_ZERO, label);
5492 }
5493 else
5494 #endif
5495 {
5496 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
5497 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
5498 add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, TMP1, 0));
5499 }
5500 check_start_used_ptr(common);
5501 return cc + LINK_SIZE;
5502 }
5503 SLJIT_ASSERT_STOP();
5504 return cc;
5505 }
5506
5507 static pcre_uchar *compile_char1_matchingpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **backtracks, BOOL check_str_ptr)
5508 {
5509 DEFINE_COMPILER;
5510 int length;
5511 unsigned int c, oc, bit;
5512 compare_context context;
5513 struct sljit_jump *jump[3];
5514 jump_list *end_list;
5515 #ifdef SUPPORT_UTF
5516 struct sljit_label *label;
5517 #ifdef SUPPORT_UCP
5518 pcre_uchar propdata[5];
5519 #endif
5520 #endif /* SUPPORT_UTF */
5521
5522 switch(type)
5523 {
5524 case OP_NOT_DIGIT:
5525 case OP_DIGIT:
5526 /* Digits are usually 0-9, so it is worth to optimize them. */
5527 if (check_str_ptr)
5528 detect_partial_match(common, backtracks);
5529 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5530 if (common->utf && is_char7_bitset((const pcre_uint8*)common->ctypes - cbit_length + cbit_digit, FALSE))
5531 read_char7_type(common, type == OP_NOT_DIGIT);
5532 else
5533 #endif
5534 read_char8_type(common, type == OP_NOT_DIGIT);
5535 /* Flip the starting bit in the negative case. */
5536 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit);
5537 add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_ZERO : SLJIT_NOT_ZERO));
5538 return cc;
5539
5540 case OP_NOT_WHITESPACE:
5541 case OP_WHITESPACE:
5542 if (check_str_ptr)
5543 detect_partial_match(common, backtracks);
5544 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5545 if (common->utf && is_char7_bitset((const pcre_uint8*)common->ctypes - cbit_length + cbit_space, FALSE))
5546 read_char7_type(common, type == OP_NOT_WHITESPACE);
5547 else
5548 #endif
5549 read_char8_type(common, type == OP_NOT_WHITESPACE);
5550 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space);
5551 add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_ZERO : SLJIT_NOT_ZERO));
5552 return cc;
5553
5554 case OP_NOT_WORDCHAR:
5555 case OP_WORDCHAR:
5556 if (check_str_ptr)
5557 detect_partial_match(common, backtracks);
5558 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5559 if (common->utf && is_char7_bitset((const pcre_uint8*)common->ctypes - cbit_length + cbit_word, FALSE))
5560 read_char7_type(common, type == OP_NOT_WORDCHAR);
5561 else
5562 #endif
5563 read_char8_type(common, type == OP_NOT_WORDCHAR);
5564 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word);
5565 add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_ZERO : SLJIT_NOT_ZERO));
5566 return cc;
5567
5568 case OP_ANY:
5569 if (check_str_ptr)
5570 detect_partial_match(common, backtracks);
5571 read_char_range(common, common->nlmin, common->nlmax, TRUE);
5572 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
5573 {
5574 jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
5575 end_list = NULL;
5576 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
5577 add_jump(compiler, &end_list, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
5578 else
5579 check_str_end(common, &end_list);
5580
5581 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5582 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff));
5583 set_jumps(end_list, LABEL());
5584 JUMPHERE(jump[0]);
5585 }
5586 else
5587 check_newlinechar(common, common->nltype, backtracks, TRUE);
5588 return cc;
5589
5590 case OP_ALLANY:
5591 if (check_str_ptr)
5592 detect_partial_match(common, backtracks);
5593 #ifdef SUPPORT_UTF
5594 if (common->utf)
5595 {
5596 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5597 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5598 #if defined COMPILE_PCRE8 || defined COMPILE_PCRE16
5599 #if defined COMPILE_PCRE8
5600 jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
5601 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
5602 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5603 #elif defined COMPILE_PCRE16
5604 jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
5605 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
5606 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
5607 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5608 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
5609 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5610 #endif
5611 JUMPHERE(jump[0]);
5612 #endif /* COMPILE_PCRE[8|16] */
5613 return cc;
5614 }
5615 #endif
5616 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5617 return cc;
5618
5619 case OP_ANYBYTE:
5620 if (check_str_ptr)
5621 detect_partial_match(common, backtracks);
5622 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5623 return cc;
5624
5625 #ifdef SUPPORT_UTF
5626 #ifdef SUPPORT_UCP
5627 case OP_NOTPROP:
5628 case OP_PROP:
5629 propdata[0] = XCL_HASPROP;
5630 propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP;
5631 propdata[2] = cc[0];
5632 propdata[3] = cc[1];
5633 propdata[4] = XCL_END;
5634 if (check_str_ptr)
5635 detect_partial_match(common, backtracks);
5636 compile_xclass_matchingpath(common, propdata, backtracks);
5637 return cc + 2;
5638 #endif
5639 #endif
5640
5641 case OP_ANYNL:
5642 if (check_str_ptr)
5643 detect_partial_match(common, backtracks);
5644 read_char_range(common, common->bsr_nlmin, common->bsr_nlmax, FALSE);
5645 jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
5646 /* We don't need to handle soft partial matching case. */
5647 end_list = NULL;
5648 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
5649 add_jump(compiler, &end_list, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
5650 else
5651 check_str_end(common, &end_list);
5652 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5653 jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
5654 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5655 jump[2] = JUMP(SLJIT_JUMP);
5656 JUMPHERE(jump[0]);
5657 check_newlinechar(common, common->bsr_nltype, backtracks, FALSE);
5658 set_jumps(end_list, LABEL());
5659 JUMPHERE(jump[1]);
5660 JUMPHERE(jump[2]);
5661 return cc;
5662
5663 case OP_NOT_HSPACE:
5664 case OP_HSPACE:
5665 if (check_str_ptr)
5666 detect_partial_match(common, backtracks);
5667 read_char_range(common, 0x9, 0x3000, type == OP_NOT_HSPACE);
5668 add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));
5669 add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO));
5670 return cc;
5671
5672 case OP_NOT_VSPACE:
5673 case OP_VSPACE:
5674 if (check_str_ptr)
5675 detect_partial_match(common, backtracks);
5676 read_char_range(common, 0xa, 0x2029, type == OP_NOT_VSPACE);
5677 add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));
5678 add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO));
5679 return cc;
5680
5681 #ifdef SUPPORT_UCP
5682 case OP_EXTUNI:
5683 if (check_str_ptr)
5684 detect_partial_match(common, backtracks);
5685 read_char(common);
5686 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
5687 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
5688 /* Optimize register allocation: use a real register. */
5689 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STACK_TOP, 0);
5690 OP1(SLJIT_MOV_UB, STACK_TOP, 0, SLJIT_MEM2(TMP1, TMP2), 3);
5691
5692 label = LABEL();
5693 jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5694 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
5695 read_char(common);
5696 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
5697 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
5698 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM2(TMP1, TMP2), 3);
5699
5700 OP2(SLJIT_SHL, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2);
5701 OP1(SLJIT_MOV_UI, TMP1, 0, SLJIT_MEM1(STACK_TOP), (sljit_sw)PRIV(ucp_gbtable));
5702 OP1(SLJIT_MOV, STACK_TOP, 0, TMP2, 0);
5703 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
5704 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
5705 JUMPTO(SLJIT_NOT_ZERO, label);
5706
5707 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
5708 JUMPHERE(jump[0]);
5709 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
5710
5711 if (common->mode == JIT_PARTIAL_HARD_COMPILE)
5712 {
5713 jump[0] = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
5714 /* Since we successfully read a char above, partial matching must occure. */
5715 check_partial(common, TRUE);
5716 JUMPHERE(jump[0]);
5717 }
5718 return cc;
5719 #endif
5720
5721 case OP_CHAR:
5722 case OP_CHARI:
5723 length = 1;
5724 #ifdef SUPPORT_UTF
5725 if (common->utf && HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc);
5726 #endif
5727 if (common->mode == JIT_COMPILE && (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0))
5728 {
5729 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
5730 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
5731
5732 context.length = IN_UCHARS(length);
5733 context.sourcereg = -1;
5734 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
5735 context.ucharptr = 0;
5736 #endif
5737 return byte_sequence_compare(common, type == OP_CHARI, cc, &context, backtracks);
5738 }
5739
5740 if (check_str_ptr)
5741 detect_partial_match(common, backtracks);
5742 #ifdef SUPPORT_UTF
5743 if (common->utf)
5744 {
5745 GETCHAR(c, cc);
5746 }
5747 else
5748 #endif
5749 c = *cc;
5750
5751 if (type == OP_CHAR || !char_has_othercase(common, cc))
5752 {
5753 read_char_range(common, c, c, FALSE);
5754 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c));
5755 return cc + length;
5756 }
5757 oc = char_othercase(common, c);
5758 read_char_range(common, c < oc ? c : oc, c > oc ? c : oc, FALSE);
5759 bit = c ^ oc;
5760 if (is_powerof2(bit))
5761 {
5762 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
5763 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
5764 return cc + length;
5765 }
5766 jump[0] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c);
5767 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, oc));
5768 JUMPHERE(jump[0]);
5769 return cc + length;
5770
5771 case OP_NOT:
5772 case OP_NOTI:
5773 if (check_str_ptr)
5774 detect_partial_match(common, backtracks);
5775 length = 1;
5776 #ifdef SUPPORT_UTF
5777 if (common->utf)
5778 {
5779 #ifdef COMPILE_PCRE8
5780 c = *cc;
5781 if (c < 128)
5782 {
5783 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5784 if (type == OP_NOT || !char_has_othercase(common, cc))
5785 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
5786 else
5787 {
5788 /* Since UTF8 code page is fixed, we know that c is in [a-z] or [A-Z] range. */
5789 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x20);
5790 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20));
5791 }
5792 /* Skip the variable-length character. */
5793 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5794 jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
5795 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
5796 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5797 JUMPHERE(jump[0]);
5798 return cc + 1;
5799 }
5800 else
5801 #endif /* COMPILE_PCRE8 */
5802 {
5803 GETCHARLEN(c, cc, length);
5804 }
5805 }
5806 else
5807 #endif /* SUPPORT_UTF */
5808 c = *cc;
5809
5810 if (type == OP_NOT || !char_has_othercase(common, cc))
5811 {
5812 read_char_range(common, c, c, TRUE);
5813 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
5814 }
5815 else
5816 {
5817 oc = char_othercase(common, c);
5818 read_char_range(common, c < oc ? c : oc, c > oc ? c : oc, TRUE);
5819 bit = c ^ oc;
5820 if (is_powerof2(bit))
5821 {
5822 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
5823 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
5824 }
5825 else
5826 {
5827 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
5828 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, oc));
5829 }
5830 }
5831 return cc + length;
5832
5833 case OP_CLASS:
5834 case OP_NCLASS:
5835 if (check_str_ptr)
5836 detect_partial_match(common, backtracks);
5837
5838 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5839 bit = (common->utf && is_char7_bitset((const sljit_ub *)cc, type == OP_NCLASS)) ? 127 : 255;
5840 read_char_range(common, 0, bit, type == OP_NCLASS);
5841 #else
5842 read_char_range(common, 0, 255, type == OP_NCLASS);
5843 #endif
5844
5845 if (check_class_ranges(common, (const sljit_ub *)cc, type == OP_NCLASS, FALSE, backtracks))
5846 return cc + 32 / sizeof(pcre_uchar);
5847
5848 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5849 jump[0] = NULL;
5850 if (common->utf)
5851 {
5852 jump[0] = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, bit);
5853 if (type == OP_CLASS)
5854 {
5855 add_jump(compiler, backtracks, jump[0]);
5856 jump[0] = NULL;
5857 }
5858 }
5859 #elif !defined COMPILE_PCRE8
5860 jump[0] = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
5861 if (type == OP_CLASS)
5862 {
5863 add_jump(compiler, backtracks, jump[0]);
5864 jump[0] = NULL;
5865 }
5866 #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
5867
5868 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
5869 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
5870 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
5871 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
5872 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
5873 add_jump(compiler, backtracks, JUMP(SLJIT_ZERO));
5874
5875 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
5876 if (jump[0] != NULL)
5877 JUMPHERE(jump[0]);
5878 #endif
5879 return cc + 32 / sizeof(pcre_uchar);
5880
5881 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5882 case OP_XCLASS:
5883 if (check_str_ptr)
5884 detect_partial_match(common, backtracks);
5885 compile_xclass_matchingpath(common, cc + LINK_SIZE, backtracks);
5886 return cc + GET(cc, 0) - 1;
5887 #endif
5888 }
5889 SLJIT_ASSERT_STOP();
5890 return cc;
5891 }
5892
5893 static SLJIT_INLINE pcre_uchar *compile_charn_matchingpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, jump_list **backtracks)
5894 {
5895 /* This function consumes at least one input character. */
5896 /* To decrease the number of length checks, we try to concatenate the fixed length character sequences. */
5897 DEFINE_COMPILER;
5898 pcre_uchar *ccbegin = cc;
5899 compare_context context;
5900 int size;
5901
5902 context.length = 0;
5903 do
5904 {
5905 if (cc >= ccend)
5906 break;
5907
5908 if (*cc == OP_CHAR)
5909 {
5910 size = 1;
5911 #ifdef SUPPORT_UTF
5912 if (common->utf && HAS_EXTRALEN(cc[1]))
5913 size += GET_EXTRALEN(cc[1]);
5914 #endif
5915 }
5916 else if (*cc == OP_CHARI)
5917 {
5918 size = 1;
5919 #ifdef SUPPORT_UTF
5920 if (common->utf)
5921 {
5922 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
5923 size = 0;
5924 else if (HAS_EXTRALEN(cc[1]))
5925 size += GET_EXTRALEN(cc[1]);
5926 }
5927 else
5928 #endif
5929 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
5930 size = 0;
5931 }
5932 else
5933 size = 0;
5934
5935 cc += 1 + size;
5936 context.length += IN_UCHARS(size);
5937 }
5938 while (size > 0 && context.length <= 128);
5939
5940 cc = ccbegin;
5941 if (context.length > 0)
5942 {
5943 /* We have a fixed-length byte sequence. */
5944 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, context.length);
5945 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
5946
5947 context.sourcereg = -1;
5948 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
5949 context.ucharptr = 0;
5950 #endif
5951 do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, backtracks); while (context.length > 0);
5952 return cc;
5953 }
5954
5955 /* A non-fixed length character will be checked if length == 0. */
5956 return compile_char1_matchingpath(common, *cc, cc + 1, backtracks, TRUE);
5957 }
5958
5959 /* Forward definitions. */
5960 static void compile_matchingpath(compiler_common *, pcre_uchar *, pcre_uchar *, backtrack_common *);
5961 static void compile_backtrackingpath(compiler_common *, struct backtrack_common *);
5962
5963 #define PUSH_BACKTRACK(size, ccstart, error) \
5964 do \
5965 { \
5966 backtrack = sljit_alloc_memory(compiler, (size)); \
5967 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
5968 return error; \
5969 memset(backtrack, 0, size); \
5970 backtrack->prev = parent->top; \
5971 backtrack->cc = (ccstart); \
5972 parent->top = backtrack; \
5973 } \
5974 while (0)
5975
5976 #define PUSH_BACKTRACK_NOVALUE(size, ccstart) \
5977 do \
5978 { \
5979 backtrack = sljit_alloc_memory(compiler, (size)); \
5980 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
5981 return; \
5982 memset(backtrack, 0, size); \
5983 backtrack->prev = parent->top; \
5984 backtrack->cc = (ccstart); \
5985 parent->top = backtrack; \
5986 } \
5987 while (0)
5988
5989 #define BACKTRACK_AS(type) ((type *)backtrack)
5990
5991 static void compile_dnref_search(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
5992 {
5993 /* The OVECTOR offset goes to TMP2. */
5994 DEFINE_COMPILER;
5995 int count = GET2(cc, 1 + IMM2_SIZE);
5996 pcre_uchar *slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
5997 unsigned int offset;
5998 jump_list *found = NULL;
5999
6000 SLJIT_ASSERT(*cc == OP_DNREF || *cc == OP_DNREFI);
6001
6002 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
6003
6004 count--;
6005 while (count-- > 0)
6006 {
6007 offset = GET2(slot, 0) << 1;
6008 GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
6009 add_jump(compiler, &found, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0));
6010 slot += common->name_entry_size;
6011 }
6012
6013 offset = GET2(slot, 0) << 1;
6014 GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
6015 if (backtracks != NULL && !common->jscript_compat)
6016 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0));
6017
6018 set_jumps(found, LABEL());
6019 }
6020
6021 static void compile_ref_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks, BOOL withchecks, BOOL emptyfail)
6022 {
6023 DEFINE_COMPILER;
6024 BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
6025 int offset = 0;
6026 struct sljit_jump *jump = NULL;
6027 struct sljit_jump *partial;
6028 struct sljit_jump *nopartial;
6029
6030 if (ref)
6031 {
6032 offset = GET2(cc, 1) << 1;
6033 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
6034 /* OVECTOR(1) contains the "string begin - 1" constant. */
6035 if (withchecks && !common->jscript_compat)
6036 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
6037 }
6038 else
6039 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
6040
6041 #if defined SUPPORT_UTF && defined SUPPORT_UCP
6042 if (common->utf && *cc == OP_REFI)
6043 {
6044 SLJIT_ASSERT(TMP1 == SLJIT_R0 && STACK_TOP == SLJIT_R1 && TMP2 == SLJIT_R2);
6045 if (ref)
6046 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
6047 else
6048 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
6049
6050 if (withchecks)
6051 jump = CMP(SLJIT_EQUAL, TMP1, 0, TMP2, 0);
6052
6053 /* Needed to save important temporary registers. */
6054 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STACK_TOP, 0);
6055 OP1(SLJIT_MOV, SLJIT_R1, 0, ARGUMENTS, 0);
6056 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, uchar_ptr), STR_PTR, 0);
6057 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_utf_caselesscmp));
6058 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6059 if (common->mode == JIT_COMPILE)
6060 add_jump(compiler, backtracks, CMP(SLJIT_LESS_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1));
6061 else
6062 {
6063 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
6064 nopartial = CMP(SLJIT_NOT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
6065 check_partial(common, FALSE);
6066 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
6067 JUMPHERE(nopartial);
6068 }
6069 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
6070 }
6071 else
6072 #endif /* SUPPORT_UTF && SUPPORT_UCP */
6073 {
6074 if (ref)
6075 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP1, 0);
6076 else
6077 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
6078
6079 if (withchecks)
6080 jump = JUMP(SLJIT_ZERO);
6081
6082 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
6083 partial = CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0);
6084 if (common->mode == JIT_COMPILE)
6085 add_jump(compiler, backtracks, partial);
6086
6087 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
6088 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
6089
6090 if (common->mode != JIT_COMPILE)
6091 {
6092 nopartial = JUMP(SLJIT_JUMP);
6093 JUMPHERE(partial);
6094 /* TMP2 -= STR_END - STR_PTR */
6095 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, STR_PTR, 0);
6096 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, STR_END, 0);
6097 partial = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0);
6098 OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
6099 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
6100 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
6101 JUMPHERE(partial);
6102 check_partial(common, FALSE);
6103 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
6104 JUMPHERE(nopartial);
6105 }
6106 }
6107
6108 if (jump != NULL)
6109 {
6110 if (emptyfail)
6111 add_jump(compiler, backtracks, jump);
6112 else
6113 JUMPHERE(jump);
6114 }
6115 }
6116
6117 static SLJIT_INLINE pcre_uchar *compile_ref_iterator_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6118 {
6119 DEFINE_COMPILER;
6120 BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
6121 backtrack_common *backtrack;
6122 pcre_uchar type;
6123 int offset = 0;
6124 struct sljit_label *label;
6125 struct sljit_jump *zerolength;
6126 struct sljit_jump *jump = NULL;
6127 pcre_uchar *ccbegin = cc;
6128 int min = 0, max = 0;
6129 BOOL minimize;
6130
6131 PUSH_BACKTRACK(sizeof(iterator_backtrack), cc, NULL);
6132
6133 if (ref)
6134 offset = GET2(cc, 1) << 1;
6135 else
6136 cc += IMM2_SIZE;
6137 type = cc[1 + IMM2_SIZE];
6138
6139 SLJIT_COMPILE_ASSERT((OP_CRSTAR & 0x1) == 0, crstar_opcode_must_be_even);
6140 minimize = (type & 0x1) != 0;
6141 switch(type)
6142 {
6143 case OP_CRSTAR:
6144 case OP_CRMINSTAR:
6145 min = 0;
6146 max = 0;
6147 cc += 1 + IMM2_SIZE + 1;
6148 break;
6149 case OP_CRPLUS:
6150 case OP_CRMINPLUS:
6151 min = 1;
6152 max = 0;
6153 cc += 1 + IMM2_SIZE + 1;
6154 break;
6155 case OP_CRQUERY:
6156 case OP_CRMINQUERY:
6157 min = 0;
6158 max = 1;
6159 cc += 1 + IMM2_SIZE + 1;
6160 break;
6161 case OP_CRRANGE:
6162 case OP_CRMINRANGE:
6163 min = GET2(cc, 1 + IMM2_SIZE + 1);
6164 max = GET2(cc, 1 + IMM2_SIZE + 1 + IMM2_SIZE);
6165 cc += 1 + IMM2_SIZE + 1 + 2 * IMM2_SIZE;
6166 break;
6167 default:
6168 SLJIT_ASSERT_STOP();
6169 break;
6170 }
6171
6172 if (!minimize)
6173 {
6174 if (min == 0)
6175 {
6176 allocate_stack(common, 2);
6177 if (ref)
6178 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
6179 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6180 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
6181 /* Temporary release of STR_PTR. */
6182 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
6183 /* Handles both invalid and empty cases. Since the minimum repeat,
6184 is zero the invalid case is basically the same as an empty case. */
6185 if (ref)
6186 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
6187 else
6188 {
6189 compile_dnref_search(common, ccbegin, NULL);
6190 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
6191 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, TMP2, 0);
6192 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
6193 }
6194 /* Restore if not zero length. */
6195 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
6196 }
6197 else
6198 {
6199 allocate_stack(common, 1);
6200 if (ref)
6201 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
6202 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6203 if (ref)
6204 {
6205 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
6206 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
6207 }
6208 else
6209 {
6210 compile_dnref_search(common, ccbegin, &backtrack->topbacktracks);
6211 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
6212 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, TMP2, 0);
6213 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
6214 }
6215 }
6216
6217 if (min > 1 || max > 1)
6218 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, SLJIT_IMM, 0);
6219
6220 label = LABEL();
6221 if (!ref)
6222 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1);
6223 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, FALSE, FALSE);
6224
6225 if (min > 1 || max > 1)
6226 {
6227 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0);
6228 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
6229 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, TMP1, 0);
6230 if (min > 1)
6231 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min, label);
6232 if (max > 1)
6233 {
6234 jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max);
6235 allocate_stack(common, 1);
6236 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6237 JUMPTO(SLJIT_JUMP, label);
6238 JUMPHERE(jump);
6239 }
6240 }
6241
6242 if (max == 0)
6243 {
6244 /* Includes min > 1 case as well. */
6245 allocate_stack(common, 1);
6246 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6247 JUMPTO(SLJIT_JUMP, label);
6248 }
6249
6250 JUMPHERE(zerolength);
6251 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
6252
6253 count_match(common);
6254 return cc;
6255 }
6256
6257 allocate_stack(common, ref ? 2 : 3);
6258 if (ref)
6259 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
6260 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6261 if (type != OP_CRMINSTAR)
6262 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
6263
6264 if (min == 0)
6265 {
6266 /* Handles both invalid and empty cases. Since the minimum repeat,
6267 is zero the invalid case is basically the same as an empty case. */
6268 if (ref)
6269 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
6270 else
6271 {
6272 compile_dnref_search(common, ccbegin, NULL);
6273 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
6274 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
6275 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
6276 }
6277 /* Length is non-zero, we can match real repeats. */
6278 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6279 jump = JUMP(SLJIT_JUMP);
6280 }
6281 else
6282 {
6283 if (ref)
6284 {
6285 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
6286 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
6287 }
6288 else
6289 {
6290 compile_dnref_search(common, ccbegin, &backtrack->topbacktracks);
6291 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
6292 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
6293 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
6294 }
6295 }
6296
6297 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
6298 if (max > 0)
6299 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max));
6300
6301 if (!ref)
6302 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
6303 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, TRUE, TRUE);
6304 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6305
6306 if (min > 1)
6307 {
6308 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
6309 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
6310 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
6311 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min, BACKTRACK_AS(iterator_backtrack)->matchingpath);
6312 }
6313 else if (max > 0)
6314 OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
6315
6316 if (jump != NULL)
6317 JUMPHERE(jump);
6318 JUMPHERE(zerolength);
6319
6320 count_match(common);
6321 return cc;
6322 }
6323
6324 static SLJIT_INLINE pcre_uchar *compile_recurse_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6325 {
6326 DEFINE_COMPILER;
6327 backtrack_common *backtrack;
6328 recurse_entry *entry = common->entries;
6329 recurse_entry *prev = NULL;
6330 sljit_sw start = GET(cc, 1);
6331 pcre_uchar *start_cc;
6332 BOOL needs_control_head;
6333
6334 PUSH_BACKTRACK(sizeof(recurse_backtrack), cc, NULL);
6335
6336 /* Inlining simple patterns. */
6337 if (get_framesize(common, common->start + start, NULL, TRUE, &needs_control_head) == no_stack)
6338 {
6339 start_cc = common->start + start;
6340 compile_matchingpath(common, next_opcode(common, start_cc), bracketend(start_cc) - (1 + LINK_SIZE), backtrack);
6341 BACKTRACK_AS(recurse_backtrack)->inlined_pattern = TRUE;
6342 return cc + 1 + LINK_SIZE;
6343 }
6344
6345 while (entry != NULL)
6346 {
6347 if (entry->start == start)
6348 break;
6349 prev = entry;
6350 entry = entry->next;
6351 }
6352
6353 if (entry == NULL)
6354 {
6355 entry = sljit_alloc_memory(compiler, sizeof(recurse_entry));
6356 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6357 return NULL;
6358 entry->next = NULL;
6359 entry->entry = NULL;
6360 entry->calls = NULL;
6361 entry->start = start;
6362
6363 if (prev != NULL)
6364 prev->next = entry;
6365 else
6366 common->entries = entry;
6367 }
6368
6369 if (common->has_set_som && common->mark_ptr != 0)
6370 {
6371 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
6372 allocate_stack(common, 2);
6373 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
6374 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
6375 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
6376 }
6377 else if (common->has_set_som || common->mark_ptr != 0)
6378 {
6379 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->has_set_som ? (int)(OVECTOR(0)) : common->mark_ptr);
6380 allocate_stack(common, 1);
6381 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
6382 }
6383
6384 if (entry->entry == NULL)
6385 add_jump(compiler, &entry->calls, JUMP(SLJIT_FAST_CALL));
6386 else
6387 JUMPTO(SLJIT_FAST_CALL, entry->entry);
6388 /* Leave if the match is failed. */
6389 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0));
6390 return cc + 1 + LINK_SIZE;
6391 }
6392
6393 static int SLJIT_CALL do_callout(struct jit_arguments *arguments, PUBL(callout_block) *callout_block, pcre_uchar **jit_ovector)
6394 {
6395 const pcre_uchar *begin = arguments->begin;
6396 int *offset_vector = arguments->offsets;
6397 int offset_count = arguments->offset_count;
6398 int i;
6399
6400 if (PUBL(callout) == NULL)
6401 return 0;
6402
6403 callout_block->version = 2;
6404 callout_block->callout_data = arguments->callout_data;
6405
6406 /* Offsets in subject. */
6407 callout_block->subject_length = arguments->end - arguments->begin;
6408 callout_block->start_match = (pcre_uchar*)callout_block->subject - arguments->begin;
6409 callout_block->current_position = (pcre_uchar*)callout_block->offset_vector - arguments->begin;
6410 #if defined COMPILE_PCRE8
6411 callout_block->subject = (PCRE_SPTR)begin;
6412 #elif defined COMPILE_PCRE16
6413 callout_block->subject = (PCRE_SPTR16)begin;
6414 #elif defined COMPILE_PCRE32
6415 callout_block->subject = (PCRE_SPTR32)begin;
6416 #endif
6417
6418 /* Convert and copy the JIT offset vector to the offset_vector array. */
6419 callout_block->capture_top = 0;
6420 callout_block->offset_vector = offset_vector;
6421 for (i = 2; i < offset_count; i += 2)
6422 {
6423 offset_vector[i] = jit_ovector[i] - begin;
6424 offset_vector[i + 1] = jit_ovector[i + 1] - begin;
6425 if (jit_ovector[i] >= begin)
6426 callout_block->capture_top = i;
6427 }
6428
6429 callout_block->capture_top = (callout_block->capture_top >> 1) + 1;
6430 if (offset_count > 0)
6431 offset_vector[0] = -1;
6432 if (offset_count > 1)
6433 offset_vector[1] = -1;
6434 return (*PUBL(callout))(callout_block);
6435 }
6436
6437 /* Aligning to 8 byte. */
6438 #define CALLOUT_ARG_SIZE \
6439 (((int)sizeof(PUBL(callout_block)) + 7) & ~7)
6440
6441 #define CALLOUT_ARG_OFFSET(arg) \
6442 (-CALLOUT_ARG_SIZE + SLJIT_OFFSETOF(PUBL(callout_block), arg))
6443
6444 static SLJIT_INLINE pcre_uchar *compile_callout_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6445 {
6446 DEFINE_COMPILER;
6447 backtrack_common *backtrack;
6448
6449 PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
6450
6451 allocate_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw));
6452
6453 SLJIT_ASSERT(common->capture_last_ptr != 0);
6454 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
6455 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6456 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_number), SLJIT_IMM, cc[1]);
6457 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_last), TMP2, 0);
6458
6459 /* These pointer sized fields temporarly stores internal variables. */
6460 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
6461 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(offset_vector), STR_PTR, 0);
6462 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(subject), TMP2, 0);
6463
6464 if (common->mark_ptr != 0)
6465 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr));
6466 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(pattern_position), SLJIT_IMM, GET(cc, 2));
6467 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(next_item_length), SLJIT_IMM, GET(cc, 2 + LINK_SIZE));
6468 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(mark), (common->mark_ptr != 0) ? TMP2 : SLJIT_IMM, 0);
6469
6470 /* Needed to save important temporary registers. */
6471 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STACK_TOP, 0);
6472 OP2(SLJIT_SUB, SLJIT_R1, 0, STACK_TOP, 0, SLJIT_IMM, CALLOUT_ARG_SIZE);
6473 GET_LOCAL_BASE(SLJIT_R2, 0, OVECTOR_START);
6474 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_callout));
6475 OP1(SLJIT_MOV_SI, SLJIT_RETURN_REG, 0, SLJIT_RETURN_REG, 0);
6476 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6477 free_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw));
6478
6479 /* Check return value. */
6480 OP2(SLJIT_SUB | SLJIT_SET_S, SLJIT_UNUSED, 0, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
6481 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_SIG_GREATER));
6482 if (common->forced_quit_label == NULL)
6483 add_jump(compiler, &common->forced_quit, JUMP(SLJIT_SIG_LESS));
6484 else
6485 JUMPTO(SLJIT_SIG_LESS, common->forced_quit_label);
6486 return cc + 2 + 2 * LINK_SIZE;
6487 }
6488
6489 #undef CALLOUT_ARG_SIZE
6490 #undef CALLOUT_ARG_OFFSET
6491
6492 static SLJIT_INLINE BOOL assert_needs_str_ptr_saving(pcre_uchar *cc)
6493 {
6494 while (TRUE)
6495 {
6496 switch (*cc)
6497 {
6498 case OP_NOT_WORD_BOUNDARY:
6499 case OP_WORD_BOUNDARY:
6500 case OP_CIRC:
6501 case OP_CIRCM:
6502 case OP_DOLL:
6503 case OP_DOLLM:
6504 case OP_CALLOUT:
6505 case OP_ALT:
6506 cc += PRIV(OP_lengths)[*cc];
6507 break;
6508
6509 case OP_KET:
6510 return FALSE;
6511
6512 default:
6513 return TRUE;
6514 }
6515 }
6516 }
6517
6518 static pcre_uchar *compile_assert_matchingpath(compiler_common *common, pcre_uchar *cc, assert_backtrack *backtrack, BOOL conditional)
6519 {
6520 DEFINE_COMPILER;
6521 int framesize;
6522 int extrasize;
6523 BOOL needs_control_head;
6524 int private_data_ptr;
6525 backtrack_common altbacktrack;
6526 pcre_uchar *ccbegin;
6527 pcre_uchar opcode;
6528 pcre_uchar bra = OP_BRA;
6529 jump_list *tmp = NULL;
6530 jump_list **target = (conditional) ? &backtrack->condfailed : &backtrack->common.topbacktracks;
6531 jump_list **found;
6532 /* Saving previous accept variables. */
6533 BOOL save_local_exit = common->local_exit;
6534 BOOL save_positive_assert = common->positive_assert;
6535 then_trap_backtrack *save_then_trap = common->then_trap;
6536 struct sljit_label *save_quit_label = common->quit_label;
6537 struct sljit_label *save_accept_label = common->accept_label;
6538 jump_list *save_quit = common->quit;
6539 jump_list *save_positive_assert_quit = common->positive_assert_quit;
6540 jump_list *save_accept = common->accept;
6541 struct sljit_jump *jump;
6542 struct sljit_jump *brajump = NULL;
6543
6544 /* Assert captures then. */
6545 common->then_trap = NULL;
6546
6547 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
6548 {
6549 SLJIT_ASSERT(!conditional);
6550 bra = *cc;
6551 cc++;
6552 }
6553 private_data_ptr = PRIVATE_DATA(cc);
6554 SLJIT_ASSERT(private_data_ptr != 0);
6555 framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
6556 backtrack->framesize = framesize;
6557 backtrack->private_data_ptr = private_data_ptr;
6558 opcode = *cc;
6559 SLJIT_ASSERT(opcode >= OP_ASSERT && opcode <= OP_ASSERTBACK_NOT);
6560 found = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) ? &tmp : target;
6561 ccbegin = cc;
6562 cc += GET(cc, 1);
6563
6564 if (bra == OP_BRAMINZERO)
6565 {
6566 /* This is a braminzero backtrack path. */
6567 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6568 free_stack(common, 1);
6569 brajump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
6570 }
6571
6572 if (framesize < 0)
6573 {
6574 extrasize = 1;
6575 if (bra == OP_BRA && !assert_needs_str_ptr_saving(ccbegin + 1 + LINK_SIZE))
6576 extrasize = 0;
6577
6578 if (needs_control_head)
6579 extrasize++;
6580
6581 if (framesize == no_frame)
6582 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
6583
6584 if (extrasize > 0)
6585 allocate_stack(common, extrasize);
6586
6587 if (needs_control_head)
6588 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
6589
6590 if (extrasize > 0)
6591 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6592
6593 if (needs_control_head)
6594 {
6595 SLJIT_ASSERT(extrasize == 2);
6596 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
6597 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
6598 }
6599 }
6600 else
6601 {
6602 extrasize = needs_control_head ? 3 : 2;
6603 allocate_stack(common, framesize + extrasize);
6604
6605 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
6606 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw));
6607 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
6608 if (needs_control_head)
6609 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
6610 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6611 if (needs_control_head)
6612 {
6613 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
6614 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
6615 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
6616 }
6617 else
6618 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
6619
6620 init_frame(common, ccbegin, NULL, framesize + extrasize - 1, extrasize, FALSE);
6621 }
6622
6623 memset(&altbacktrack, 0, sizeof(backtrack_common));
6624 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6625 {
6626 /* Negative assert is stronger than positive assert. */
6627 common->local_exit = TRUE;
6628 common->quit_label = NULL;
6629 common->quit = NULL;
6630 common->positive_assert = FALSE;
6631 }
6632 else
6633 common->positive_assert = TRUE;
6634 common->positive_assert_quit = NULL;
6635
6636 while (1)
6637 {
6638 common->accept_label = NULL;
6639 common->accept = NULL;
6640 altbacktrack.top = NULL;
6641 altbacktrack.topbacktracks = NULL;
6642
6643 if (*ccbegin == OP_ALT && extrasize > 0)
6644 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6645
6646 altbacktrack.cc = ccbegin;
6647 compile_matchingpath(common, ccbegin + 1 + LINK_SIZE, cc, &altbacktrack);
6648 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6649 {
6650 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6651 {
6652 common->local_exit = save_local_exit;
6653 common->quit_label = save_quit_label;
6654 common->quit = save_quit;
6655 }
6656 common->positive_assert = save_positive_assert;
6657 common->then_trap = save_then_trap;
6658 common->accept_label = save_accept_label;
6659 common->positive_assert_quit = save_positive_assert_quit;
6660 common->accept = save_accept;
6661 return NULL;
6662 }
6663 common->accept_label = LABEL();
6664 if (common->accept != NULL)
6665 set_jumps(common->accept, common->accept_label);
6666
6667 /* Reset stack. */
6668 if (framesize < 0)
6669 {
6670 if (framesize == no_frame)
6671 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
6672 else if (extrasize > 0)
6673 free_stack(common, extrasize);
6674
6675 if (needs_control_head)
6676 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), 0);
6677 }
6678 else
6679 {
6680 if ((opcode != OP_ASSERT_NOT && opcode != OP_ASSERTBACK_NOT) || conditional)
6681 {
6682 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
6683 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
6684 if (needs_control_head)
6685 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), 0);
6686 }
6687 else
6688 {
6689 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
6690 if (needs_control_head)
6691 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), (framesize + 1) * sizeof(sljit_sw));
6692 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6693 }
6694 }
6695
6696 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6697 {
6698 /* We know that STR_PTR was stored on the top of the stack. */
6699 if (conditional)
6700 {
6701 if (extrasize > 0)
6702 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), needs_control_head ? sizeof(sljit_sw) : 0);
6703 }
6704 else if (bra == OP_BRAZERO)
6705 {
6706 if (framesize < 0)
6707 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 1) * sizeof(sljit_sw));
6708 else
6709 {
6710 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
6711 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (framesize + extrasize - 1) * sizeof(sljit_sw));
6712 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
6713 }
6714 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
6715 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6716 }
6717 else if (framesize >= 0)
6718 {
6719 /* For OP_BRA and OP_BRAMINZERO. */
6720 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
6721 }
6722 }
6723 add_jump(compiler, found, JUMP(SLJIT_JUMP));
6724
6725 compile_backtrackingpath(common, altbacktrack.top);
6726 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6727 {
6728 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6729 {
6730 common->local_exit = save_local_exit;
6731 common->quit_label = save_quit_label;
6732 common->quit = save_quit;
6733 }
6734 common->positive_assert = save_positive_assert;
6735 common->then_trap = save_then_trap;
6736 common->accept_label = save_accept_label;
6737 common->positive_assert_quit = save_positive_assert_quit;
6738 common->accept = save_accept;
6739 return NULL;
6740 }
6741 set_jumps(altbacktrack.topbacktracks, LABEL());
6742
6743 if (*cc != OP_ALT)
6744 break;
6745
6746 ccbegin = cc;
6747 cc += GET(cc, 1);
6748 }
6749
6750 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6751 {
6752 SLJIT_ASSERT(common->positive_assert_quit == NULL);
6753 /* Makes the check less complicated below. */
6754 common->positive_assert_quit = common->quit;
6755 }
6756
6757 /* None of them matched. */
6758 if (common->positive_assert_quit != NULL)
6759 {
6760 jump = JUMP(SLJIT_JUMP);
6761 set_jumps(common->positive_assert_quit, LABEL());
6762 SLJIT_ASSERT(framesize != no_stack);
6763 if (framesize < 0)
6764 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, extrasize * sizeof(sljit_sw));
6765 else
6766 {
6767 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
6768 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6769 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw));
6770 }
6771 JUMPHERE(jump);
6772 }
6773
6774 if (needs_control_head)
6775 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(1));
6776
6777 if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK)
6778 {
6779 /* Assert is failed. */
6780 if ((conditional && extrasize > 0) || bra == OP_BRAZERO)
6781 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6782
6783 if (framesize < 0)
6784 {
6785 /* The topmost item should be 0. */
6786 if (bra == OP_BRAZERO)
6787