/[pcre]/code/trunk/pcre_jit_compile.c
ViewVC logotype

Contents of /code/trunk/pcre_jit_compile.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1491 - (show annotations)
Mon Jul 7 07:11:16 2014 UTC (5 years, 3 months ago) by zherczeg
File MIME type: text/plain
File size: 332033 byte(s)
Error occurred while calculating annotation data.
Major JIT compiler update.
1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Written by Philip Hazel
9 Copyright (c) 1997-2013 University of Cambridge
10
11 The machine code generator part (this module) was written by Zoltan Herczeg
12 Copyright (c) 2010-2013
13
14 -----------------------------------------------------------------------------
15 Redistribution and use in source and binary forms, with or without
16 modification, are permitted provided that the following conditions are met:
17
18 * Redistributions of source code must retain the above copyright notice,
19 this list of conditions and the following disclaimer.
20
21 * Redistributions in binary form must reproduce the above copyright
22 notice, this list of conditions and the following disclaimer in the
23 documentation and/or other materials provided with the distribution.
24
25 * Neither the name of the University of Cambridge nor the names of its
26 contributors may be used to endorse or promote products derived from
27 this software without specific prior written permission.
28
29 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 POSSIBILITY OF SUCH DAMAGE.
40 -----------------------------------------------------------------------------
41 */
42
43 #ifdef HAVE_CONFIG_H
44 #include "config.h"
45 #endif
46
47 #include "pcre_internal.h"
48
49 #if defined SUPPORT_JIT
50
51 /* All-in-one: Since we use the JIT compiler only from here,
52 we just include it. This way we don't need to touch the build
53 system files. */
54
55 #define SLJIT_MALLOC(size) (PUBL(malloc))(size)
56 #define SLJIT_FREE(ptr) (PUBL(free))(ptr)
57 #define SLJIT_CONFIG_AUTO 1
58 #define SLJIT_CONFIG_STATIC 1
59 #define SLJIT_VERBOSE 0
60 #define SLJIT_DEBUG 0
61
62 #include "sljit/sljitLir.c"
63
64 #if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED
65 #error Unsupported architecture
66 #endif
67
68 /* Defines for debugging purposes. */
69
70 /* 1 - Use unoptimized capturing brackets.
71 2 - Enable capture_last_ptr (includes option 1). */
72 /* #define DEBUG_FORCE_UNOPTIMIZED_CBRAS 2 */
73
74 /* 1 - Always have a control head. */
75 /* #define DEBUG_FORCE_CONTROL_HEAD 1 */
76
77 /* Allocate memory for the regex stack on the real machine stack.
78 Fast, but limited size. */
79 #define MACHINE_STACK_SIZE 32768
80
81 /* Growth rate for stack allocated by the OS. Should be the multiply
82 of page size. */
83 #define STACK_GROWTH_RATE 8192
84
85 /* Enable to check that the allocation could destroy temporaries. */
86 #if defined SLJIT_DEBUG && SLJIT_DEBUG
87 #define DESTROY_REGISTERS 1
88 #endif
89
90 /*
91 Short summary about the backtracking mechanism empolyed by the jit code generator:
92
93 The code generator follows the recursive nature of the PERL compatible regular
94 expressions. The basic blocks of regular expressions are condition checkers
95 whose execute different commands depending on the result of the condition check.
96 The relationship between the operators can be horizontal (concatenation) and
97 vertical (sub-expression) (See struct backtrack_common for more details).
98
99 'ab' - 'a' and 'b' regexps are concatenated
100 'a+' - 'a' is the sub-expression of the '+' operator
101
102 The condition checkers are boolean (true/false) checkers. Machine code is generated
103 for the checker itself and for the actions depending on the result of the checker.
104 The 'true' case is called as the matching path (expected path), and the other is called as
105 the 'backtrack' path. Branch instructions are expesive for all CPUs, so we avoid taken
106 branches on the matching path.
107
108 Greedy star operator (*) :
109 Matching path: match happens.
110 Backtrack path: match failed.
111 Non-greedy star operator (*?) :
112 Matching path: no need to perform a match.
113 Backtrack path: match is required.
114
115 The following example shows how the code generated for a capturing bracket
116 with two alternatives. Let A, B, C, D are arbirary regular expressions, and
117 we have the following regular expression:
118
119 A(B|C)D
120
121 The generated code will be the following:
122
123 A matching path
124 '(' matching path (pushing arguments to the stack)
125 B matching path
126 ')' matching path (pushing arguments to the stack)
127 D matching path
128 return with successful match
129
130 D backtrack path
131 ')' backtrack path (If we arrived from "C" jump to the backtrack of "C")
132 B backtrack path
133 C expected path
134 jump to D matching path
135 C backtrack path
136 A backtrack path
137
138 Notice, that the order of backtrack code paths are the opposite of the fast
139 code paths. In this way the topmost value on the stack is always belong
140 to the current backtrack code path. The backtrack path must check
141 whether there is a next alternative. If so, it needs to jump back to
142 the matching path eventually. Otherwise it needs to clear out its own stack
143 frame and continue the execution on the backtrack code paths.
144 */
145
146 /*
147 Saved stack frames:
148
149 Atomic blocks and asserts require reloading the values of private data
150 when the backtrack mechanism performed. Because of OP_RECURSE, the data
151 are not necessarly known in compile time, thus we need a dynamic restore
152 mechanism.
153
154 The stack frames are stored in a chain list, and have the following format:
155 ([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]
156
157 Thus we can restore the private data to a particular point in the stack.
158 */
159
160 typedef struct jit_arguments {
161 /* Pointers first. */
162 struct sljit_stack *stack;
163 const pcre_uchar *str;
164 const pcre_uchar *begin;
165 const pcre_uchar *end;
166 int *offsets;
167 pcre_uchar *uchar_ptr;
168 pcre_uchar *mark_ptr;
169 void *callout_data;
170 /* Everything else after. */
171 pcre_uint32 limit_match;
172 int real_offset_count;
173 int offset_count;
174 pcre_uint8 notbol;
175 pcre_uint8 noteol;
176 pcre_uint8 notempty;
177 pcre_uint8 notempty_atstart;
178 } jit_arguments;
179
180 typedef struct executable_functions {
181 void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES];
182 sljit_uw *read_only_data[JIT_NUMBER_OF_COMPILE_MODES];
183 sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];
184 PUBL(jit_callback) callback;
185 void *userdata;
186 pcre_uint32 top_bracket;
187 pcre_uint32 limit_match;
188 } executable_functions;
189
190 typedef struct jump_list {
191 struct sljit_jump *jump;
192 struct jump_list *next;
193 } jump_list;
194
195 typedef struct stub_list {
196 struct sljit_jump *start;
197 struct sljit_label *quit;
198 struct stub_list *next;
199 } stub_list;
200
201 typedef struct label_addr_list {
202 struct sljit_label *label;
203 sljit_uw *update_addr;
204 struct label_addr_list *next;
205 } label_addr_list;
206
207 enum frame_types {
208 no_frame = -1,
209 no_stack = -2
210 };
211
212 enum control_types {
213 type_mark = 0,
214 type_then_trap = 1
215 };
216
217 typedef int (SLJIT_CALL *jit_function)(jit_arguments *args);
218
219 /* The following structure is the key data type for the recursive
220 code generator. It is allocated by compile_matchingpath, and contains
221 the arguments for compile_backtrackingpath. Must be the first member
222 of its descendants. */
223 typedef struct backtrack_common {
224 /* Concatenation stack. */
225 struct backtrack_common *prev;
226 jump_list *nextbacktracks;
227 /* Internal stack (for component operators). */
228 struct backtrack_common *top;
229 jump_list *topbacktracks;
230 /* Opcode pointer. */
231 pcre_uchar *cc;
232 } backtrack_common;
233
234 typedef struct assert_backtrack {
235 backtrack_common common;
236 jump_list *condfailed;
237 /* Less than 0 if a frame is not needed. */
238 int framesize;
239 /* Points to our private memory word on the stack. */
240 int private_data_ptr;
241 /* For iterators. */
242 struct sljit_label *matchingpath;
243 } assert_backtrack;
244
245 typedef struct bracket_backtrack {
246 backtrack_common common;
247 /* Where to coninue if an alternative is successfully matched. */
248 struct sljit_label *alternative_matchingpath;
249 /* For rmin and rmax iterators. */
250 struct sljit_label *recursive_matchingpath;
251 /* For greedy ? operator. */
252 struct sljit_label *zero_matchingpath;
253 /* Contains the branches of a failed condition. */
254 union {
255 /* Both for OP_COND, OP_SCOND. */
256 jump_list *condfailed;
257 assert_backtrack *assert;
258 /* For OP_ONCE. Less than 0 if not needed. */
259 int framesize;
260 } u;
261 /* Points to our private memory word on the stack. */
262 int private_data_ptr;
263 } bracket_backtrack;
264
265 typedef struct bracketpos_backtrack {
266 backtrack_common common;
267 /* Points to our private memory word on the stack. */
268 int private_data_ptr;
269 /* Reverting stack is needed. */
270 int framesize;
271 /* Allocated stack size. */
272 int stacksize;
273 } bracketpos_backtrack;
274
275 typedef struct braminzero_backtrack {
276 backtrack_common common;
277 struct sljit_label *matchingpath;
278 } braminzero_backtrack;
279
280 typedef struct iterator_backtrack {
281 backtrack_common common;
282 /* Next iteration. */
283 struct sljit_label *matchingpath;
284 } iterator_backtrack;
285
286 typedef struct recurse_entry {
287 struct recurse_entry *next;
288 /* Contains the function entry. */
289 struct sljit_label *entry;
290 /* Collects the calls until the function is not created. */
291 jump_list *calls;
292 /* Points to the starting opcode. */
293 sljit_sw start;
294 } recurse_entry;
295
296 typedef struct recurse_backtrack {
297 backtrack_common common;
298 BOOL inlined_pattern;
299 } recurse_backtrack;
300
301 #define OP_THEN_TRAP OP_TABLE_LENGTH
302
303 typedef struct then_trap_backtrack {
304 backtrack_common common;
305 /* If then_trap is not NULL, this structure contains the real
306 then_trap for the backtracking path. */
307 struct then_trap_backtrack *then_trap;
308 /* Points to the starting opcode. */
309 sljit_sw start;
310 /* Exit point for the then opcodes of this alternative. */
311 jump_list *quit;
312 /* Frame size of the current alternative. */
313 int framesize;
314 } then_trap_backtrack;
315
316 #define MAX_RANGE_SIZE 4
317
318 typedef struct compiler_common {
319 /* The sljit ceneric compiler. */
320 struct sljit_compiler *compiler;
321 /* First byte code. */
322 pcre_uchar *start;
323 /* Maps private data offset to each opcode. */
324 sljit_si *private_data_ptrs;
325 /* This read-only data is available during runtime. */
326 sljit_uw *read_only_data;
327 /* The total size of the read-only data. */
328 sljit_uw read_only_data_size;
329 /* The next free entry of the read_only_data. */
330 sljit_uw *read_only_data_ptr;
331 /* Tells whether the capturing bracket is optimized. */
332 pcre_uint8 *optimized_cbracket;
333 /* Tells whether the starting offset is a target of then. */
334 pcre_uint8 *then_offsets;
335 /* Current position where a THEN must jump. */
336 then_trap_backtrack *then_trap;
337 /* Starting offset of private data for capturing brackets. */
338 int cbra_ptr;
339 /* Output vector starting point. Must be divisible by 2. */
340 int ovector_start;
341 /* Last known position of the requested byte. */
342 int req_char_ptr;
343 /* Head of the last recursion. */
344 int recursive_head_ptr;
345 /* First inspected character for partial matching. */
346 int start_used_ptr;
347 /* Starting pointer for partial soft matches. */
348 int hit_start;
349 /* End pointer of the first line. */
350 int first_line_end;
351 /* Points to the marked string. */
352 int mark_ptr;
353 /* Recursive control verb management chain. */
354 int control_head_ptr;
355 /* Points to the last matched capture block index. */
356 int capture_last_ptr;
357 /* Points to the starting position of the current match. */
358 int start_ptr;
359
360 /* Flipped and lower case tables. */
361 const pcre_uint8 *fcc;
362 sljit_sw lcc;
363 /* Mode can be PCRE_STUDY_JIT_COMPILE and others. */
364 int mode;
365 /* TRUE, when minlength is greater than 0. */
366 BOOL might_be_empty;
367 /* \K is found in the pattern. */
368 BOOL has_set_som;
369 /* (*SKIP:arg) is found in the pattern. */
370 BOOL has_skip_arg;
371 /* (*THEN) is found in the pattern. */
372 BOOL has_then;
373 /* Needs to know the start position anytime. */
374 BOOL needs_start_ptr;
375 /* Currently in recurse or negative assert. */
376 BOOL local_exit;
377 /* Currently in a positive assert. */
378 BOOL positive_assert;
379 /* Newline control. */
380 int nltype;
381 pcre_uint32 nlmax;
382 pcre_uint32 nlmin;
383 int newline;
384 int bsr_nltype;
385 pcre_uint32 bsr_nlmax;
386 pcre_uint32 bsr_nlmin;
387 /* Dollar endonly. */
388 int endonly;
389 /* Tables. */
390 sljit_sw ctypes;
391 /* Named capturing brackets. */
392 pcre_uchar *name_table;
393 sljit_sw name_count;
394 sljit_sw name_entry_size;
395
396 /* Labels and jump lists. */
397 struct sljit_label *partialmatchlabel;
398 struct sljit_label *quit_label;
399 struct sljit_label *forced_quit_label;
400 struct sljit_label *accept_label;
401 struct sljit_label *ff_newline_shortcut;
402 stub_list *stubs;
403 label_addr_list *label_addrs;
404 recurse_entry *entries;
405 recurse_entry *currententry;
406 jump_list *partialmatch;
407 jump_list *quit;
408 jump_list *positive_assert_quit;
409 jump_list *forced_quit;
410 jump_list *accept;
411 jump_list *calllimit;
412 jump_list *stackalloc;
413 jump_list *revertframes;
414 jump_list *wordboundary;
415 jump_list *anynewline;
416 jump_list *hspace;
417 jump_list *vspace;
418 jump_list *casefulcmp;
419 jump_list *caselesscmp;
420 jump_list *reset_match;
421 BOOL jscript_compat;
422 #ifdef SUPPORT_UTF
423 BOOL utf;
424 #ifdef SUPPORT_UCP
425 BOOL use_ucp;
426 #endif
427 #ifdef COMPILE_PCRE8
428 jump_list *utfreadchar;
429 jump_list *utfreadchar16;
430 jump_list *utfreadtype8;
431 #endif
432 #endif /* SUPPORT_UTF */
433 #ifdef SUPPORT_UCP
434 jump_list *getucd;
435 #endif
436 } compiler_common;
437
438 /* For byte_sequence_compare. */
439
440 typedef struct compare_context {
441 int length;
442 int sourcereg;
443 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
444 int ucharptr;
445 union {
446 sljit_si asint;
447 sljit_uh asushort;
448 #if defined COMPILE_PCRE8
449 sljit_ub asbyte;
450 sljit_ub asuchars[4];
451 #elif defined COMPILE_PCRE16
452 sljit_uh asuchars[2];
453 #elif defined COMPILE_PCRE32
454 sljit_ui asuchars[1];
455 #endif
456 } c;
457 union {
458 sljit_si asint;
459 sljit_uh asushort;
460 #if defined COMPILE_PCRE8
461 sljit_ub asbyte;
462 sljit_ub asuchars[4];
463 #elif defined COMPILE_PCRE16
464 sljit_uh asuchars[2];
465 #elif defined COMPILE_PCRE32
466 sljit_ui asuchars[1];
467 #endif
468 } oc;
469 #endif
470 } compare_context;
471
472 /* Undefine sljit macros. */
473 #undef CMP
474
475 /* Used for accessing the elements of the stack. */
476 #define STACK(i) ((-(i) - 1) * (int)sizeof(sljit_sw))
477
478 #define TMP1 SLJIT_R0
479 #define TMP2 SLJIT_R2
480 #define TMP3 SLJIT_R3
481 #define STR_PTR SLJIT_S0
482 #define STR_END SLJIT_S1
483 #define STACK_TOP SLJIT_R1
484 #define STACK_LIMIT SLJIT_S2
485 #define COUNT_MATCH SLJIT_S3
486 #define ARGUMENTS SLJIT_S4
487 #define RETURN_ADDR SLJIT_R4
488
489 /* Local space layout. */
490 /* These two locals can be used by the current opcode. */
491 #define LOCALS0 (0 * sizeof(sljit_sw))
492 #define LOCALS1 (1 * sizeof(sljit_sw))
493 /* Two local variables for possessive quantifiers (char1 cannot use them). */
494 #define POSSESSIVE0 (2 * sizeof(sljit_sw))
495 #define POSSESSIVE1 (3 * sizeof(sljit_sw))
496 /* Max limit of recursions. */
497 #define LIMIT_MATCH (4 * sizeof(sljit_sw))
498 /* The output vector is stored on the stack, and contains pointers
499 to characters. The vector data is divided into two groups: the first
500 group contains the start / end character pointers, and the second is
501 the start pointers when the end of the capturing group has not yet reached. */
502 #define OVECTOR_START (common->ovector_start)
503 #define OVECTOR(i) (OVECTOR_START + (i) * (sljit_sw)sizeof(sljit_sw))
504 #define OVECTOR_PRIV(i) (common->cbra_ptr + (i) * (sljit_sw)sizeof(sljit_sw))
505 #define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start])
506
507 #if defined COMPILE_PCRE8
508 #define MOV_UCHAR SLJIT_MOV_UB
509 #define MOVU_UCHAR SLJIT_MOVU_UB
510 #elif defined COMPILE_PCRE16
511 #define MOV_UCHAR SLJIT_MOV_UH
512 #define MOVU_UCHAR SLJIT_MOVU_UH
513 #elif defined COMPILE_PCRE32
514 #define MOV_UCHAR SLJIT_MOV_UI
515 #define MOVU_UCHAR SLJIT_MOVU_UI
516 #else
517 #error Unsupported compiling mode
518 #endif
519
520 /* Shortcuts. */
521 #define DEFINE_COMPILER \
522 struct sljit_compiler *compiler = common->compiler
523 #define OP1(op, dst, dstw, src, srcw) \
524 sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw))
525 #define OP2(op, dst, dstw, src1, src1w, src2, src2w) \
526 sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w))
527 #define LABEL() \
528 sljit_emit_label(compiler)
529 #define JUMP(type) \
530 sljit_emit_jump(compiler, (type))
531 #define JUMPTO(type, label) \
532 sljit_set_label(sljit_emit_jump(compiler, (type)), (label))
533 #define JUMPHERE(jump) \
534 sljit_set_label((jump), sljit_emit_label(compiler))
535 #define SET_LABEL(jump, label) \
536 sljit_set_label((jump), (label))
537 #define CMP(type, src1, src1w, src2, src2w) \
538 sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))
539 #define CMPTO(type, src1, src1w, src2, src2w, label) \
540 sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))
541 #define OP_FLAGS(op, dst, dstw, src, srcw, type) \
542 sljit_emit_op_flags(compiler, (op), (dst), (dstw), (src), (srcw), (type))
543 #define GET_LOCAL_BASE(dst, dstw, offset) \
544 sljit_get_local_base(compiler, (dst), (dstw), (offset))
545
546 #define READ_CHAR_MAX 0x7fffffff
547
548 static pcre_uchar* bracketend(pcre_uchar* cc)
549 {
550 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
551 do cc += GET(cc, 1); while (*cc == OP_ALT);
552 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
553 cc += 1 + LINK_SIZE;
554 return cc;
555 }
556
557 static int no_alternatives(pcre_uchar* cc)
558 {
559 int count = 0;
560 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
561 do
562 {
563 cc += GET(cc, 1);
564 count++;
565 }
566 while (*cc == OP_ALT);
567 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
568 return count;
569 }
570
571 static int ones_in_half_byte[16] = {
572 /* 0 */ 0, 1, 1, 2, /* 4 */ 1, 2, 2, 3,
573 /* 8 */ 1, 2, 2, 3, /* 12 */ 2, 3, 3, 4
574 };
575
576 /* Functions whose might need modification for all new supported opcodes:
577 next_opcode
578 check_opcode_types
579 set_private_data_ptrs
580 get_framesize
581 init_frame
582 get_private_data_copy_length
583 copy_private_data
584 compile_matchingpath
585 compile_backtrackingpath
586 */
587
588 static pcre_uchar *next_opcode(compiler_common *common, pcre_uchar *cc)
589 {
590 SLJIT_UNUSED_ARG(common);
591 switch(*cc)
592 {
593 case OP_SOD:
594 case OP_SOM:
595 case OP_SET_SOM:
596 case OP_NOT_WORD_BOUNDARY:
597 case OP_WORD_BOUNDARY:
598 case OP_NOT_DIGIT:
599 case OP_DIGIT:
600 case OP_NOT_WHITESPACE:
601 case OP_WHITESPACE:
602 case OP_NOT_WORDCHAR:
603 case OP_WORDCHAR:
604 case OP_ANY:
605 case OP_ALLANY:
606 case OP_NOTPROP:
607 case OP_PROP:
608 case OP_ANYNL:
609 case OP_NOT_HSPACE:
610 case OP_HSPACE:
611 case OP_NOT_VSPACE:
612 case OP_VSPACE:
613 case OP_EXTUNI:
614 case OP_EODN:
615 case OP_EOD:
616 case OP_CIRC:
617 case OP_CIRCM:
618 case OP_DOLL:
619 case OP_DOLLM:
620 case OP_CRSTAR:
621 case OP_CRMINSTAR:
622 case OP_CRPLUS:
623 case OP_CRMINPLUS:
624 case OP_CRQUERY:
625 case OP_CRMINQUERY:
626 case OP_CRRANGE:
627 case OP_CRMINRANGE:
628 case OP_CRPOSSTAR:
629 case OP_CRPOSPLUS:
630 case OP_CRPOSQUERY:
631 case OP_CRPOSRANGE:
632 case OP_CLASS:
633 case OP_NCLASS:
634 case OP_REF:
635 case OP_REFI:
636 case OP_DNREF:
637 case OP_DNREFI:
638 case OP_RECURSE:
639 case OP_CALLOUT:
640 case OP_ALT:
641 case OP_KET:
642 case OP_KETRMAX:
643 case OP_KETRMIN:
644 case OP_KETRPOS:
645 case OP_REVERSE:
646 case OP_ASSERT:
647 case OP_ASSERT_NOT:
648 case OP_ASSERTBACK:
649 case OP_ASSERTBACK_NOT:
650 case OP_ONCE:
651 case OP_ONCE_NC:
652 case OP_BRA:
653 case OP_BRAPOS:
654 case OP_CBRA:
655 case OP_CBRAPOS:
656 case OP_COND:
657 case OP_SBRA:
658 case OP_SBRAPOS:
659 case OP_SCBRA:
660 case OP_SCBRAPOS:
661 case OP_SCOND:
662 case OP_CREF:
663 case OP_DNCREF:
664 case OP_RREF:
665 case OP_DNRREF:
666 case OP_DEF:
667 case OP_BRAZERO:
668 case OP_BRAMINZERO:
669 case OP_BRAPOSZERO:
670 case OP_PRUNE:
671 case OP_SKIP:
672 case OP_THEN:
673 case OP_COMMIT:
674 case OP_FAIL:
675 case OP_ACCEPT:
676 case OP_ASSERT_ACCEPT:
677 case OP_CLOSE:
678 case OP_SKIPZERO:
679 return cc + PRIV(OP_lengths)[*cc];
680
681 case OP_CHAR:
682 case OP_CHARI:
683 case OP_NOT:
684 case OP_NOTI:
685 case OP_STAR:
686 case OP_MINSTAR:
687 case OP_PLUS:
688 case OP_MINPLUS:
689 case OP_QUERY:
690 case OP_MINQUERY:
691 case OP_UPTO:
692 case OP_MINUPTO:
693 case OP_EXACT:
694 case OP_POSSTAR:
695 case OP_POSPLUS:
696 case OP_POSQUERY:
697 case OP_POSUPTO:
698 case OP_STARI:
699 case OP_MINSTARI:
700 case OP_PLUSI:
701 case OP_MINPLUSI:
702 case OP_QUERYI:
703 case OP_MINQUERYI:
704 case OP_UPTOI:
705 case OP_MINUPTOI:
706 case OP_EXACTI:
707 case OP_POSSTARI:
708 case OP_POSPLUSI:
709 case OP_POSQUERYI:
710 case OP_POSUPTOI:
711 case OP_NOTSTAR:
712 case OP_NOTMINSTAR:
713 case OP_NOTPLUS:
714 case OP_NOTMINPLUS:
715 case OP_NOTQUERY:
716 case OP_NOTMINQUERY:
717 case OP_NOTUPTO:
718 case OP_NOTMINUPTO:
719 case OP_NOTEXACT:
720 case OP_NOTPOSSTAR:
721 case OP_NOTPOSPLUS:
722 case OP_NOTPOSQUERY:
723 case OP_NOTPOSUPTO:
724 case OP_NOTSTARI:
725 case OP_NOTMINSTARI:
726 case OP_NOTPLUSI:
727 case OP_NOTMINPLUSI:
728 case OP_NOTQUERYI:
729 case OP_NOTMINQUERYI:
730 case OP_NOTUPTOI:
731 case OP_NOTMINUPTOI:
732 case OP_NOTEXACTI:
733 case OP_NOTPOSSTARI:
734 case OP_NOTPOSPLUSI:
735 case OP_NOTPOSQUERYI:
736 case OP_NOTPOSUPTOI:
737 cc += PRIV(OP_lengths)[*cc];
738 #ifdef SUPPORT_UTF
739 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
740 #endif
741 return cc;
742
743 /* Special cases. */
744 case OP_TYPESTAR:
745 case OP_TYPEMINSTAR:
746 case OP_TYPEPLUS:
747 case OP_TYPEMINPLUS:
748 case OP_TYPEQUERY:
749 case OP_TYPEMINQUERY:
750 case OP_TYPEUPTO:
751 case OP_TYPEMINUPTO:
752 case OP_TYPEEXACT:
753 case OP_TYPEPOSSTAR:
754 case OP_TYPEPOSPLUS:
755 case OP_TYPEPOSQUERY:
756 case OP_TYPEPOSUPTO:
757 return cc + PRIV(OP_lengths)[*cc] - 1;
758
759 case OP_ANYBYTE:
760 #ifdef SUPPORT_UTF
761 if (common->utf) return NULL;
762 #endif
763 return cc + 1;
764
765 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
766 case OP_XCLASS:
767 return cc + GET(cc, 1);
768 #endif
769
770 case OP_MARK:
771 case OP_PRUNE_ARG:
772 case OP_SKIP_ARG:
773 case OP_THEN_ARG:
774 return cc + 1 + 2 + cc[1];
775
776 default:
777 /* All opcodes are supported now! */
778 SLJIT_ASSERT_STOP();
779 return NULL;
780 }
781 }
782
783 static BOOL check_opcode_types(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
784 {
785 int count;
786 pcre_uchar *slot;
787
788 /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
789 while (cc < ccend)
790 {
791 switch(*cc)
792 {
793 case OP_SET_SOM:
794 common->has_set_som = TRUE;
795 common->might_be_empty = TRUE;
796 cc += 1;
797 break;
798
799 case OP_REF:
800 case OP_REFI:
801 common->optimized_cbracket[GET2(cc, 1)] = 0;
802 cc += 1 + IMM2_SIZE;
803 break;
804
805 case OP_BRA:
806 case OP_CBRA:
807 case OP_SBRA:
808 case OP_SCBRA:
809 count = no_alternatives(cc);
810 if (count > 4)
811 common->read_only_data_size += count * sizeof(sljit_uw);
812 cc += 1 + LINK_SIZE + (*cc == OP_CBRA || *cc == OP_SCBRA ? IMM2_SIZE : 0);
813 break;
814
815 case OP_CBRAPOS:
816 case OP_SCBRAPOS:
817 common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] = 0;
818 cc += 1 + LINK_SIZE + IMM2_SIZE;
819 break;
820
821 case OP_COND:
822 case OP_SCOND:
823 /* Only AUTO_CALLOUT can insert this opcode. We do
824 not intend to support this case. */
825 if (cc[1 + LINK_SIZE] == OP_CALLOUT)
826 return FALSE;
827 cc += 1 + LINK_SIZE;
828 break;
829
830 case OP_CREF:
831 common->optimized_cbracket[GET2(cc, 1)] = 0;
832 cc += 1 + IMM2_SIZE;
833 break;
834
835 case OP_DNREF:
836 case OP_DNREFI:
837 case OP_DNCREF:
838 count = GET2(cc, 1 + IMM2_SIZE);
839 slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
840 while (count-- > 0)
841 {
842 common->optimized_cbracket[GET2(slot, 0)] = 0;
843 slot += common->name_entry_size;
844 }
845 cc += 1 + 2 * IMM2_SIZE;
846 break;
847
848 case OP_RECURSE:
849 /* Set its value only once. */
850 if (common->recursive_head_ptr == 0)
851 {
852 common->recursive_head_ptr = common->ovector_start;
853 common->ovector_start += sizeof(sljit_sw);
854 }
855 cc += 1 + LINK_SIZE;
856 break;
857
858 case OP_CALLOUT:
859 if (common->capture_last_ptr == 0)
860 {
861 common->capture_last_ptr = common->ovector_start;
862 common->ovector_start += sizeof(sljit_sw);
863 }
864 cc += 2 + 2 * LINK_SIZE;
865 break;
866
867 case OP_THEN_ARG:
868 common->has_then = TRUE;
869 common->control_head_ptr = 1;
870 /* Fall through. */
871
872 case OP_PRUNE_ARG:
873 common->needs_start_ptr = TRUE;
874 /* Fall through. */
875
876 case OP_MARK:
877 if (common->mark_ptr == 0)
878 {
879 common->mark_ptr = common->ovector_start;
880 common->ovector_start += sizeof(sljit_sw);
881 }
882 cc += 1 + 2 + cc[1];
883 break;
884
885 case OP_THEN:
886 common->has_then = TRUE;
887 common->control_head_ptr = 1;
888 /* Fall through. */
889
890 case OP_PRUNE:
891 case OP_SKIP:
892 common->needs_start_ptr = TRUE;
893 cc += 1;
894 break;
895
896 case OP_SKIP_ARG:
897 common->control_head_ptr = 1;
898 common->has_skip_arg = TRUE;
899 cc += 1 + 2 + cc[1];
900 break;
901
902 default:
903 cc = next_opcode(common, cc);
904 if (cc == NULL)
905 return FALSE;
906 break;
907 }
908 }
909 return TRUE;
910 }
911
912 static int get_class_iterator_size(pcre_uchar *cc)
913 {
914 switch(*cc)
915 {
916 case OP_CRSTAR:
917 case OP_CRPLUS:
918 return 2;
919
920 case OP_CRMINSTAR:
921 case OP_CRMINPLUS:
922 case OP_CRQUERY:
923 case OP_CRMINQUERY:
924 return 1;
925
926 case OP_CRRANGE:
927 case OP_CRMINRANGE:
928 if (GET2(cc, 1) == GET2(cc, 1 + IMM2_SIZE))
929 return 0;
930 return 2;
931
932 default:
933 return 0;
934 }
935 }
936
937 static BOOL detect_repeat(compiler_common *common, pcre_uchar *begin)
938 {
939 pcre_uchar *end = bracketend(begin);
940 pcre_uchar *next;
941 pcre_uchar *next_end;
942 pcre_uchar *max_end;
943 pcre_uchar type;
944 sljit_sw length = end - begin;
945 int min, max, i;
946
947 /* Detect fixed iterations first. */
948 if (end[-(1 + LINK_SIZE)] != OP_KET)
949 return FALSE;
950
951 /* Already detected repeat. */
952 if (common->private_data_ptrs[end - common->start - LINK_SIZE] != 0)
953 return TRUE;
954
955 next = end;
956 min = 1;
957 while (1)
958 {
959 if (*next != *begin)
960 break;
961 next_end = bracketend(next);
962 if (next_end - next != length || memcmp(begin, next, IN_UCHARS(length)) != 0)
963 break;
964 next = next_end;
965 min++;
966 }
967
968 if (min == 2)
969 return FALSE;
970
971 max = 0;
972 max_end = next;
973 if (*next == OP_BRAZERO || *next == OP_BRAMINZERO)
974 {
975 type = *next;
976 while (1)
977 {
978 if (next[0] != type || next[1] != OP_BRA || next[2 + LINK_SIZE] != *begin)
979 break;
980 next_end = bracketend(next + 2 + LINK_SIZE);
981 if (next_end - next != (length + 2 + LINK_SIZE) || memcmp(begin, next + 2 + LINK_SIZE, IN_UCHARS(length)) != 0)
982 break;
983 next = next_end;
984 max++;
985 }
986
987 if (next[0] == type && next[1] == *begin && max >= 1)
988 {
989 next_end = bracketend(next + 1);
990 if (next_end - next == (length + 1) && memcmp(begin, next + 1, IN_UCHARS(length)) == 0)
991 {
992 for (i = 0; i < max; i++, next_end += 1 + LINK_SIZE)
993 if (*next_end != OP_KET)
994 break;
995
996 if (i == max)
997 {
998 common->private_data_ptrs[max_end - common->start - LINK_SIZE] = next_end - max_end;
999 common->private_data_ptrs[max_end - common->start - LINK_SIZE + 1] = (type == OP_BRAZERO) ? OP_UPTO : OP_MINUPTO;
1000 /* +2 the original and the last. */
1001 common->private_data_ptrs[max_end - common->start - LINK_SIZE + 2] = max + 2;
1002 if (min == 1)
1003 return TRUE;
1004 min--;
1005 max_end -= (1 + LINK_SIZE) + GET(max_end, -LINK_SIZE);
1006 }
1007 }
1008 }
1009 }
1010
1011 if (min >= 3)
1012 {
1013 common->private_data_ptrs[end - common->start - LINK_SIZE] = max_end - end;
1014 common->private_data_ptrs[end - common->start - LINK_SIZE + 1] = OP_EXACT;
1015 common->private_data_ptrs[end - common->start - LINK_SIZE + 2] = min;
1016 return TRUE;
1017 }
1018
1019 return FALSE;
1020 }
1021
1022 #define CASE_ITERATOR_PRIVATE_DATA_1 \
1023 case OP_MINSTAR: \
1024 case OP_MINPLUS: \
1025 case OP_QUERY: \
1026 case OP_MINQUERY: \
1027 case OP_MINSTARI: \
1028 case OP_MINPLUSI: \
1029 case OP_QUERYI: \
1030 case OP_MINQUERYI: \
1031 case OP_NOTMINSTAR: \
1032 case OP_NOTMINPLUS: \
1033 case OP_NOTQUERY: \
1034 case OP_NOTMINQUERY: \
1035 case OP_NOTMINSTARI: \
1036 case OP_NOTMINPLUSI: \
1037 case OP_NOTQUERYI: \
1038 case OP_NOTMINQUERYI:
1039
1040 #define CASE_ITERATOR_PRIVATE_DATA_2A \
1041 case OP_STAR: \
1042 case OP_PLUS: \
1043 case OP_STARI: \
1044 case OP_PLUSI: \
1045 case OP_NOTSTAR: \
1046 case OP_NOTPLUS: \
1047 case OP_NOTSTARI: \
1048 case OP_NOTPLUSI:
1049
1050 #define CASE_ITERATOR_PRIVATE_DATA_2B \
1051 case OP_UPTO: \
1052 case OP_MINUPTO: \
1053 case OP_UPTOI: \
1054 case OP_MINUPTOI: \
1055 case OP_NOTUPTO: \
1056 case OP_NOTMINUPTO: \
1057 case OP_NOTUPTOI: \
1058 case OP_NOTMINUPTOI:
1059
1060 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_1 \
1061 case OP_TYPEMINSTAR: \
1062 case OP_TYPEMINPLUS: \
1063 case OP_TYPEQUERY: \
1064 case OP_TYPEMINQUERY:
1065
1066 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2A \
1067 case OP_TYPESTAR: \
1068 case OP_TYPEPLUS:
1069
1070 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2B \
1071 case OP_TYPEUPTO: \
1072 case OP_TYPEMINUPTO:
1073
1074 static void set_private_data_ptrs(compiler_common *common, int *private_data_start, pcre_uchar *ccend)
1075 {
1076 pcre_uchar *cc = common->start;
1077 pcre_uchar *alternative;
1078 pcre_uchar *end = NULL;
1079 int private_data_ptr = *private_data_start;
1080 int space, size, bracketlen;
1081
1082 while (cc < ccend)
1083 {
1084 space = 0;
1085 size = 0;
1086 bracketlen = 0;
1087 if (private_data_ptr > SLJIT_MAX_LOCAL_SIZE)
1088 return;
1089
1090 if (*cc == OP_ONCE || *cc == OP_ONCE_NC || *cc == OP_BRA || *cc == OP_CBRA || *cc == OP_COND)
1091 if (detect_repeat(common, cc))
1092 {
1093 /* These brackets are converted to repeats, so no global
1094 based single character repeat is allowed. */
1095 if (cc >= end)
1096 end = bracketend(cc);
1097 }
1098
1099 switch(*cc)
1100 {
1101 case OP_KET:
1102 if (common->private_data_ptrs[cc + 1 - common->start] != 0)
1103 {
1104 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1105 private_data_ptr += sizeof(sljit_sw);
1106 cc += common->private_data_ptrs[cc + 1 - common->start];
1107 }
1108 cc += 1 + LINK_SIZE;
1109 break;
1110
1111 case OP_ASSERT:
1112 case OP_ASSERT_NOT:
1113 case OP_ASSERTBACK:
1114 case OP_ASSERTBACK_NOT:
1115 case OP_ONCE:
1116 case OP_ONCE_NC:
1117 case OP_BRAPOS:
1118 case OP_SBRA:
1119 case OP_SBRAPOS:
1120 case OP_SCOND:
1121 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1122 private_data_ptr += sizeof(sljit_sw);
1123 bracketlen = 1 + LINK_SIZE;
1124 break;
1125
1126 case OP_CBRAPOS:
1127 case OP_SCBRAPOS:
1128 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1129 private_data_ptr += sizeof(sljit_sw);
1130 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1131 break;
1132
1133 case OP_COND:
1134 /* Might be a hidden SCOND. */
1135 alternative = cc + GET(cc, 1);
1136 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1137 {
1138 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1139 private_data_ptr += sizeof(sljit_sw);
1140 }
1141 bracketlen = 1 + LINK_SIZE;
1142 break;
1143
1144 case OP_BRA:
1145 bracketlen = 1 + LINK_SIZE;
1146 break;
1147
1148 case OP_CBRA:
1149 case OP_SCBRA:
1150 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1151 break;
1152
1153 CASE_ITERATOR_PRIVATE_DATA_1
1154 space = 1;
1155 size = -2;
1156 break;
1157
1158 CASE_ITERATOR_PRIVATE_DATA_2A
1159 space = 2;
1160 size = -2;
1161 break;
1162
1163 CASE_ITERATOR_PRIVATE_DATA_2B
1164 space = 2;
1165 size = -(2 + IMM2_SIZE);
1166 break;
1167
1168 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1169 space = 1;
1170 size = 1;
1171 break;
1172
1173 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1174 if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
1175 space = 2;
1176 size = 1;
1177 break;
1178
1179 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1180 if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
1181 space = 2;
1182 size = 1 + IMM2_SIZE;
1183 break;
1184
1185 case OP_CLASS:
1186 case OP_NCLASS:
1187 size += 1 + 32 / sizeof(pcre_uchar);
1188 space = get_class_iterator_size(cc + size);
1189 break;
1190
1191 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1192 case OP_XCLASS:
1193 size = GET(cc, 1);
1194 space = get_class_iterator_size(cc + size);
1195 break;
1196 #endif
1197
1198 default:
1199 cc = next_opcode(common, cc);
1200 SLJIT_ASSERT(cc != NULL);
1201 break;
1202 }
1203
1204 /* Character iterators, which are not inside a repeated bracket,
1205 gets a private slot instead of allocating it on the stack. */
1206 if (space > 0 && cc >= end)
1207 {
1208 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1209 private_data_ptr += sizeof(sljit_sw) * space;
1210 }
1211
1212 if (size != 0)
1213 {
1214 if (size < 0)
1215 {
1216 cc += -size;
1217 #ifdef SUPPORT_UTF
1218 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1219 #endif
1220 }
1221 else
1222 cc += size;
1223 }
1224
1225 if (bracketlen > 0)
1226 {
1227 if (cc >= end)
1228 {
1229 end = bracketend(cc);
1230 if (end[-1 - LINK_SIZE] == OP_KET)
1231 end = NULL;
1232 }
1233 cc += bracketlen;
1234 }
1235 }
1236 *private_data_start = private_data_ptr;
1237 }
1238
1239 /* Returns with a frame_types (always < 0) if no need for frame. */
1240 static int get_framesize(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL recursive, BOOL* needs_control_head)
1241 {
1242 int length = 0;
1243 int possessive = 0;
1244 BOOL stack_restore = FALSE;
1245 BOOL setsom_found = recursive;
1246 BOOL setmark_found = recursive;
1247 /* The last capture is a local variable even for recursions. */
1248 BOOL capture_last_found = FALSE;
1249
1250 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
1251 SLJIT_ASSERT(common->control_head_ptr != 0);
1252 *needs_control_head = TRUE;
1253 #else
1254 *needs_control_head = FALSE;
1255 #endif
1256
1257 if (ccend == NULL)
1258 {
1259 ccend = bracketend(cc) - (1 + LINK_SIZE);
1260 if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))
1261 {
1262 possessive = length = (common->capture_last_ptr != 0) ? 5 : 3;
1263 /* This is correct regardless of common->capture_last_ptr. */
1264 capture_last_found = TRUE;
1265 }
1266 cc = next_opcode(common, cc);
1267 }
1268
1269 SLJIT_ASSERT(cc != NULL);
1270 while (cc < ccend)
1271 switch(*cc)
1272 {
1273 case OP_SET_SOM:
1274 SLJIT_ASSERT(common->has_set_som);
1275 stack_restore = TRUE;
1276 if (!setsom_found)
1277 {
1278 length += 2;
1279 setsom_found = TRUE;
1280 }
1281 cc += 1;
1282 break;
1283
1284 case OP_MARK:
1285 case OP_PRUNE_ARG:
1286 case OP_THEN_ARG:
1287 SLJIT_ASSERT(common->mark_ptr != 0);
1288 stack_restore = TRUE;
1289 if (!setmark_found)
1290 {
1291 length += 2;
1292 setmark_found = TRUE;
1293 }
1294 if (common->control_head_ptr != 0)
1295 *needs_control_head = TRUE;
1296 cc += 1 + 2 + cc[1];
1297 break;
1298
1299 case OP_RECURSE:
1300 stack_restore = TRUE;
1301 if (common->has_set_som && !setsom_found)
1302 {
1303 length += 2;
1304 setsom_found = TRUE;
1305 }
1306 if (common->mark_ptr != 0 && !setmark_found)
1307 {
1308 length += 2;
1309 setmark_found = TRUE;
1310 }
1311 if (common->capture_last_ptr != 0 && !capture_last_found)
1312 {
1313 length += 2;
1314 capture_last_found = TRUE;
1315 }
1316 cc += 1 + LINK_SIZE;
1317 break;
1318
1319 case OP_CBRA:
1320 case OP_CBRAPOS:
1321 case OP_SCBRA:
1322 case OP_SCBRAPOS:
1323 stack_restore = TRUE;
1324 if (common->capture_last_ptr != 0 && !capture_last_found)
1325 {
1326 length += 2;
1327 capture_last_found = TRUE;
1328 }
1329 length += 3;
1330 cc += 1 + LINK_SIZE + IMM2_SIZE;
1331 break;
1332
1333 default:
1334 stack_restore = TRUE;
1335 /* Fall through. */
1336
1337 case OP_NOT_WORD_BOUNDARY:
1338 case OP_WORD_BOUNDARY:
1339 case OP_NOT_DIGIT:
1340 case OP_DIGIT:
1341 case OP_NOT_WHITESPACE:
1342 case OP_WHITESPACE:
1343 case OP_NOT_WORDCHAR:
1344 case OP_WORDCHAR:
1345 case OP_ANY:
1346 case OP_ALLANY:
1347 case OP_ANYBYTE:
1348 case OP_NOTPROP:
1349 case OP_PROP:
1350 case OP_ANYNL:
1351 case OP_NOT_HSPACE:
1352 case OP_HSPACE:
1353 case OP_NOT_VSPACE:
1354 case OP_VSPACE:
1355 case OP_EXTUNI:
1356 case OP_EODN:
1357 case OP_EOD:
1358 case OP_CIRC:
1359 case OP_CIRCM:
1360 case OP_DOLL:
1361 case OP_DOLLM:
1362 case OP_CHAR:
1363 case OP_CHARI:
1364 case OP_NOT:
1365 case OP_NOTI:
1366
1367 case OP_EXACT:
1368 case OP_POSSTAR:
1369 case OP_POSPLUS:
1370 case OP_POSQUERY:
1371 case OP_POSUPTO:
1372
1373 case OP_EXACTI:
1374 case OP_POSSTARI:
1375 case OP_POSPLUSI:
1376 case OP_POSQUERYI:
1377 case OP_POSUPTOI:
1378
1379 case OP_NOTEXACT:
1380 case OP_NOTPOSSTAR:
1381 case OP_NOTPOSPLUS:
1382 case OP_NOTPOSQUERY:
1383 case OP_NOTPOSUPTO:
1384
1385 case OP_NOTEXACTI:
1386 case OP_NOTPOSSTARI:
1387 case OP_NOTPOSPLUSI:
1388 case OP_NOTPOSQUERYI:
1389 case OP_NOTPOSUPTOI:
1390
1391 case OP_TYPEEXACT:
1392 case OP_TYPEPOSSTAR:
1393 case OP_TYPEPOSPLUS:
1394 case OP_TYPEPOSQUERY:
1395 case OP_TYPEPOSUPTO:
1396
1397 case OP_CLASS:
1398 case OP_NCLASS:
1399 case OP_XCLASS:
1400
1401 cc = next_opcode(common, cc);
1402 SLJIT_ASSERT(cc != NULL);
1403 break;
1404 }
1405
1406 /* Possessive quantifiers can use a special case. */
1407 if (SLJIT_UNLIKELY(possessive == length))
1408 return stack_restore ? no_frame : no_stack;
1409
1410 if (length > 0)
1411 return length + 1;
1412 return stack_restore ? no_frame : no_stack;
1413 }
1414
1415 static void init_frame(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, int stackpos, int stacktop, BOOL recursive)
1416 {
1417 DEFINE_COMPILER;
1418 BOOL setsom_found = recursive;
1419 BOOL setmark_found = recursive;
1420 /* The last capture is a local variable even for recursions. */
1421 BOOL capture_last_found = FALSE;
1422 int offset;
1423
1424 /* >= 1 + shortest item size (2) */
1425 SLJIT_UNUSED_ARG(stacktop);
1426 SLJIT_ASSERT(stackpos >= stacktop + 2);
1427
1428 stackpos = STACK(stackpos);
1429 if (ccend == NULL)
1430 {
1431 ccend = bracketend(cc) - (1 + LINK_SIZE);
1432 if (recursive || (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS))
1433 cc = next_opcode(common, cc);
1434 }
1435
1436 SLJIT_ASSERT(cc != NULL);
1437 while (cc < ccend)
1438 switch(*cc)
1439 {
1440 case OP_SET_SOM:
1441 SLJIT_ASSERT(common->has_set_som);
1442 if (!setsom_found)
1443 {
1444 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
1445 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1446 stackpos += (int)sizeof(sljit_sw);
1447 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1448 stackpos += (int)sizeof(sljit_sw);
1449 setsom_found = TRUE;
1450 }
1451 cc += 1;
1452 break;
1453
1454 case OP_MARK:
1455 case OP_PRUNE_ARG:
1456 case OP_THEN_ARG:
1457 SLJIT_ASSERT(common->mark_ptr != 0);
1458 if (!setmark_found)
1459 {
1460 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
1461 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1462 stackpos += (int)sizeof(sljit_sw);
1463 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1464 stackpos += (int)sizeof(sljit_sw);
1465 setmark_found = TRUE;
1466 }
1467 cc += 1 + 2 + cc[1];
1468 break;
1469
1470 case OP_RECURSE:
1471 if (common->has_set_som && !setsom_found)
1472 {
1473 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
1474 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1475 stackpos += (int)sizeof(sljit_sw);
1476 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1477 stackpos += (int)sizeof(sljit_sw);
1478 setsom_found = TRUE;
1479 }
1480 if (common->mark_ptr != 0 && !setmark_found)
1481 {
1482 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
1483 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1484 stackpos += (int)sizeof(sljit_sw);
1485 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1486 stackpos += (int)sizeof(sljit_sw);
1487 setmark_found = TRUE;
1488 }
1489 if (common->capture_last_ptr != 0 && !capture_last_found)
1490 {
1491 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
1492 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1493 stackpos += (int)sizeof(sljit_sw);
1494 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1495 stackpos += (int)sizeof(sljit_sw);
1496 capture_last_found = TRUE;
1497 }
1498 cc += 1 + LINK_SIZE;
1499 break;
1500
1501 case OP_CBRA:
1502 case OP_CBRAPOS:
1503 case OP_SCBRA:
1504 case OP_SCBRAPOS:
1505 if (common->capture_last_ptr != 0 && !capture_last_found)
1506 {
1507 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
1508 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1509 stackpos += (int)sizeof(sljit_sw);
1510 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1511 stackpos += (int)sizeof(sljit_sw);
1512 capture_last_found = TRUE;
1513 }
1514 offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
1515 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));
1516 stackpos += (int)sizeof(sljit_sw);
1517 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
1518 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
1519 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1520 stackpos += (int)sizeof(sljit_sw);
1521 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
1522 stackpos += (int)sizeof(sljit_sw);
1523
1524 cc += 1 + LINK_SIZE + IMM2_SIZE;
1525 break;
1526
1527 default:
1528 cc = next_opcode(common, cc);
1529 SLJIT_ASSERT(cc != NULL);
1530 break;
1531 }
1532
1533 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, 0);
1534 SLJIT_ASSERT(stackpos == STACK(stacktop));
1535 }
1536
1537 static SLJIT_INLINE int get_private_data_copy_length(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL needs_control_head)
1538 {
1539 int private_data_length = needs_control_head ? 3 : 2;
1540 int size;
1541 pcre_uchar *alternative;
1542 /* Calculate the sum of the private machine words. */
1543 while (cc < ccend)
1544 {
1545 size = 0;
1546 switch(*cc)
1547 {
1548 case OP_KET:
1549 if (PRIVATE_DATA(cc) != 0)
1550 private_data_length++;
1551 cc += 1 + LINK_SIZE;
1552 break;
1553
1554 case OP_ASSERT:
1555 case OP_ASSERT_NOT:
1556 case OP_ASSERTBACK:
1557 case OP_ASSERTBACK_NOT:
1558 case OP_ONCE:
1559 case OP_ONCE_NC:
1560 case OP_BRAPOS:
1561 case OP_SBRA:
1562 case OP_SBRAPOS:
1563 case OP_SCOND:
1564 private_data_length++;
1565 cc += 1 + LINK_SIZE;
1566 break;
1567
1568 case OP_CBRA:
1569 case OP_SCBRA:
1570 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1571 private_data_length++;
1572 cc += 1 + LINK_SIZE + IMM2_SIZE;
1573 break;
1574
1575 case OP_CBRAPOS:
1576 case OP_SCBRAPOS:
1577 private_data_length += 2;
1578 cc += 1 + LINK_SIZE + IMM2_SIZE;
1579 break;
1580
1581 case OP_COND:
1582 /* Might be a hidden SCOND. */
1583 alternative = cc + GET(cc, 1);
1584 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1585 private_data_length++;
1586 cc += 1 + LINK_SIZE;
1587 break;
1588
1589 CASE_ITERATOR_PRIVATE_DATA_1
1590 if (PRIVATE_DATA(cc))
1591 private_data_length++;
1592 cc += 2;
1593 #ifdef SUPPORT_UTF
1594 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1595 #endif
1596 break;
1597
1598 CASE_ITERATOR_PRIVATE_DATA_2A
1599 if (PRIVATE_DATA(cc))
1600 private_data_length += 2;
1601 cc += 2;
1602 #ifdef SUPPORT_UTF
1603 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1604 #endif
1605 break;
1606
1607 CASE_ITERATOR_PRIVATE_DATA_2B
1608 if (PRIVATE_DATA(cc))
1609 private_data_length += 2;
1610 cc += 2 + IMM2_SIZE;
1611 #ifdef SUPPORT_UTF
1612 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1613 #endif
1614 break;
1615
1616 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1617 if (PRIVATE_DATA(cc))
1618 private_data_length++;
1619 cc += 1;
1620 break;
1621
1622 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1623 if (PRIVATE_DATA(cc))
1624 private_data_length += 2;
1625 cc += 1;
1626 break;
1627
1628 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1629 if (PRIVATE_DATA(cc))
1630 private_data_length += 2;
1631 cc += 1 + IMM2_SIZE;
1632 break;
1633
1634 case OP_CLASS:
1635 case OP_NCLASS:
1636 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1637 case OP_XCLASS:
1638 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1639 #else
1640 size = 1 + 32 / (int)sizeof(pcre_uchar);
1641 #endif
1642 if (PRIVATE_DATA(cc))
1643 private_data_length += get_class_iterator_size(cc + size);
1644 cc += size;
1645 break;
1646
1647 default:
1648 cc = next_opcode(common, cc);
1649 SLJIT_ASSERT(cc != NULL);
1650 break;
1651 }
1652 }
1653 SLJIT_ASSERT(cc == ccend);
1654 return private_data_length;
1655 }
1656
1657 static void copy_private_data(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend,
1658 BOOL save, int stackptr, int stacktop, BOOL needs_control_head)
1659 {
1660 DEFINE_COMPILER;
1661 int srcw[2];
1662 int count, size;
1663 BOOL tmp1next = TRUE;
1664 BOOL tmp1empty = TRUE;
1665 BOOL tmp2empty = TRUE;
1666 pcre_uchar *alternative;
1667 enum {
1668 start,
1669 loop,
1670 end
1671 } status;
1672
1673 status = save ? start : loop;
1674 stackptr = STACK(stackptr - 2);
1675 stacktop = STACK(stacktop - 1);
1676
1677 if (!save)
1678 {
1679 stackptr += (needs_control_head ? 2 : 1) * sizeof(sljit_sw);
1680 if (stackptr < stacktop)
1681 {
1682 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1683 stackptr += sizeof(sljit_sw);
1684 tmp1empty = FALSE;
1685 }
1686 if (stackptr < stacktop)
1687 {
1688 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1689 stackptr += sizeof(sljit_sw);
1690 tmp2empty = FALSE;
1691 }
1692 /* The tmp1next must be TRUE in either way. */
1693 }
1694
1695 do
1696 {
1697 count = 0;
1698 switch(status)
1699 {
1700 case start:
1701 SLJIT_ASSERT(save && common->recursive_head_ptr != 0);
1702 count = 1;
1703 srcw[0] = common->recursive_head_ptr;
1704 if (needs_control_head)
1705 {
1706 SLJIT_ASSERT(common->control_head_ptr != 0);
1707 count = 2;
1708 srcw[1] = common->control_head_ptr;
1709 }
1710 status = loop;
1711 break;
1712
1713 case loop:
1714 if (cc >= ccend)
1715 {
1716 status = end;
1717 break;
1718 }
1719
1720 switch(*cc)
1721 {
1722 case OP_KET:
1723 if (PRIVATE_DATA(cc) != 0)
1724 {
1725 count = 1;
1726 srcw[0] = PRIVATE_DATA(cc);
1727 }
1728 cc += 1 + LINK_SIZE;
1729 break;
1730
1731 case OP_ASSERT:
1732 case OP_ASSERT_NOT:
1733 case OP_ASSERTBACK:
1734 case OP_ASSERTBACK_NOT:
1735 case OP_ONCE:
1736 case OP_ONCE_NC:
1737 case OP_BRAPOS:
1738 case OP_SBRA:
1739 case OP_SBRAPOS:
1740 case OP_SCOND:
1741 count = 1;
1742 srcw[0] = PRIVATE_DATA(cc);
1743 SLJIT_ASSERT(srcw[0] != 0);
1744 cc += 1 + LINK_SIZE;
1745 break;
1746
1747 case OP_CBRA:
1748 case OP_SCBRA:
1749 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1750 {
1751 count = 1;
1752 srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1753 }
1754 cc += 1 + LINK_SIZE + IMM2_SIZE;
1755 break;
1756
1757 case OP_CBRAPOS:
1758 case OP_SCBRAPOS:
1759 count = 2;
1760 srcw[0] = PRIVATE_DATA(cc);
1761 srcw[1] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1762 SLJIT_ASSERT(srcw[0] != 0 && srcw[1] != 0);
1763 cc += 1 + LINK_SIZE + IMM2_SIZE;
1764 break;
1765
1766 case OP_COND:
1767 /* Might be a hidden SCOND. */
1768 alternative = cc + GET(cc, 1);
1769 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1770 {
1771 count = 1;
1772 srcw[0] = PRIVATE_DATA(cc);
1773 SLJIT_ASSERT(srcw[0] != 0);
1774 }
1775 cc += 1 + LINK_SIZE;
1776 break;
1777
1778 CASE_ITERATOR_PRIVATE_DATA_1
1779 if (PRIVATE_DATA(cc))
1780 {
1781 count = 1;
1782 srcw[0] = PRIVATE_DATA(cc);
1783 }
1784 cc += 2;
1785 #ifdef SUPPORT_UTF
1786 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1787 #endif
1788 break;
1789
1790 CASE_ITERATOR_PRIVATE_DATA_2A
1791 if (PRIVATE_DATA(cc))
1792 {
1793 count = 2;
1794 srcw[0] = PRIVATE_DATA(cc);
1795 srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
1796 }
1797 cc += 2;
1798 #ifdef SUPPORT_UTF
1799 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1800 #endif
1801 break;
1802
1803 CASE_ITERATOR_PRIVATE_DATA_2B
1804 if (PRIVATE_DATA(cc))
1805 {
1806 count = 2;
1807 srcw[0] = PRIVATE_DATA(cc);
1808 srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
1809 }
1810 cc += 2 + IMM2_SIZE;
1811 #ifdef SUPPORT_UTF
1812 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1813 #endif
1814 break;
1815
1816 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1817 if (PRIVATE_DATA(cc))
1818 {
1819 count = 1;
1820 srcw[0] = PRIVATE_DATA(cc);
1821 }
1822 cc += 1;
1823 break;
1824
1825 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1826 if (PRIVATE_DATA(cc))
1827 {
1828 count = 2;
1829 srcw[0] = PRIVATE_DATA(cc);
1830 srcw[1] = srcw[0] + sizeof(sljit_sw);
1831 }
1832 cc += 1;
1833 break;
1834
1835 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1836 if (PRIVATE_DATA(cc))
1837 {
1838 count = 2;
1839 srcw[0] = PRIVATE_DATA(cc);
1840 srcw[1] = srcw[0] + sizeof(sljit_sw);
1841 }
1842 cc += 1 + IMM2_SIZE;
1843 break;
1844
1845 case OP_CLASS:
1846 case OP_NCLASS:
1847 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1848 case OP_XCLASS:
1849 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1850 #else
1851 size = 1 + 32 / (int)sizeof(pcre_uchar);
1852 #endif
1853 if (PRIVATE_DATA(cc))
1854 switch(get_class_iterator_size(cc + size))
1855 {
1856 case 1:
1857 count = 1;
1858 srcw[0] = PRIVATE_DATA(cc);
1859 break;
1860
1861 case 2:
1862 count = 2;
1863 srcw[0] = PRIVATE_DATA(cc);
1864 srcw[1] = srcw[0] + sizeof(sljit_sw);
1865 break;
1866
1867 default:
1868 SLJIT_ASSERT_STOP();
1869 break;
1870 }
1871 cc += size;
1872 break;
1873
1874 default:
1875 cc = next_opcode(common, cc);
1876 SLJIT_ASSERT(cc != NULL);
1877 break;
1878 }
1879 break;
1880
1881 case end:
1882 SLJIT_ASSERT_STOP();
1883 break;
1884 }
1885
1886 while (count > 0)
1887 {
1888 count--;
1889 if (save)
1890 {
1891 if (tmp1next)
1892 {
1893 if (!tmp1empty)
1894 {
1895 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1896 stackptr += sizeof(sljit_sw);
1897 }
1898 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), srcw[count]);
1899 tmp1empty = FALSE;
1900 tmp1next = FALSE;
1901 }
1902 else
1903 {
1904 if (!tmp2empty)
1905 {
1906 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1907 stackptr += sizeof(sljit_sw);
1908 }
1909 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), srcw[count]);
1910 tmp2empty = FALSE;
1911 tmp1next = TRUE;
1912 }
1913 }
1914 else
1915 {
1916 if (tmp1next)
1917 {
1918 SLJIT_ASSERT(!tmp1empty);
1919 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), srcw[count], TMP1, 0);
1920 tmp1empty = stackptr >= stacktop;
1921 if (!tmp1empty)
1922 {
1923 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1924 stackptr += sizeof(sljit_sw);
1925 }
1926 tmp1next = FALSE;
1927 }
1928 else
1929 {
1930 SLJIT_ASSERT(!tmp2empty);
1931 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), srcw[count], TMP2, 0);
1932 tmp2empty = stackptr >= stacktop;
1933 if (!tmp2empty)
1934 {
1935 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1936 stackptr += sizeof(sljit_sw);
1937 }
1938 tmp1next = TRUE;
1939 }
1940 }
1941 }
1942 }
1943 while (status != end);
1944
1945 if (save)
1946 {
1947 if (tmp1next)
1948 {
1949 if (!tmp1empty)
1950 {
1951 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1952 stackptr += sizeof(sljit_sw);
1953 }
1954 if (!tmp2empty)
1955 {
1956 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1957 stackptr += sizeof(sljit_sw);
1958 }
1959 }
1960 else
1961 {
1962 if (!tmp2empty)
1963 {
1964 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1965 stackptr += sizeof(sljit_sw);
1966 }
1967 if (!tmp1empty)
1968 {
1969 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1970 stackptr += sizeof(sljit_sw);
1971 }
1972 }
1973 }
1974 SLJIT_ASSERT(cc == ccend && stackptr == stacktop && (save || (tmp1empty && tmp2empty)));
1975 }
1976
1977 static SLJIT_INLINE pcre_uchar *set_then_offsets(compiler_common *common, pcre_uchar *cc, pcre_uint8 *current_offset)
1978 {
1979 pcre_uchar *end = bracketend(cc);
1980 BOOL has_alternatives = cc[GET(cc, 1)] == OP_ALT;
1981
1982 /* Assert captures then. */
1983 if (*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT)
1984 current_offset = NULL;
1985 /* Conditional block does not. */
1986 if (*cc == OP_COND || *cc == OP_SCOND)
1987 has_alternatives = FALSE;
1988
1989 cc = next_opcode(common, cc);
1990 if (has_alternatives)
1991 current_offset = common->then_offsets + (cc - common->start);
1992
1993 while (cc < end)
1994 {
1995 if ((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND))
1996 cc = set_then_offsets(common, cc, current_offset);
1997 else
1998 {
1999 if (*cc == OP_ALT && has_alternatives)
2000 current_offset = common->then_offsets + (cc + 1 + LINK_SIZE - common->start);
2001 if (*cc >= OP_THEN && *cc <= OP_THEN_ARG && current_offset != NULL)
2002 *current_offset = 1;
2003 cc = next_opcode(common, cc);
2004 }
2005 }
2006
2007 return end;
2008 }
2009
2010 #undef CASE_ITERATOR_PRIVATE_DATA_1
2011 #undef CASE_ITERATOR_PRIVATE_DATA_2A
2012 #undef CASE_ITERATOR_PRIVATE_DATA_2B
2013 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_1
2014 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
2015 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
2016
2017 static SLJIT_INLINE BOOL is_powerof2(unsigned int value)
2018 {
2019 return (value & (value - 1)) == 0;
2020 }
2021
2022 static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label)
2023 {
2024 while (list)
2025 {
2026 /* sljit_set_label is clever enough to do nothing
2027 if either the jump or the label is NULL. */
2028 SET_LABEL(list->jump, label);
2029 list = list->next;
2030 }
2031 }
2032
2033 static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump* jump)
2034 {
2035 jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list));
2036 if (list_item)
2037 {
2038 list_item->next = *list;
2039 list_item->jump = jump;
2040 *list = list_item;
2041 }
2042 }
2043
2044 static void add_stub(compiler_common *common, struct sljit_jump *start)
2045 {
2046 DEFINE_COMPILER;
2047 stub_list* list_item = sljit_alloc_memory(compiler, sizeof(stub_list));
2048
2049 if (list_item)
2050 {
2051 list_item->start = start;
2052 list_item->quit = LABEL();
2053 list_item->next = common->stubs;
2054 common->stubs = list_item;
2055 }
2056 }
2057
2058 static void flush_stubs(compiler_common *common)
2059 {
2060 DEFINE_COMPILER;
2061 stub_list* list_item = common->stubs;
2062
2063 while (list_item)
2064 {
2065 JUMPHERE(list_item->start);
2066 add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));
2067 JUMPTO(SLJIT_JUMP, list_item->quit);
2068 list_item = list_item->next;
2069 }
2070 common->stubs = NULL;
2071 }
2072
2073 static void add_label_addr(compiler_common *common, sljit_uw *update_addr)
2074 {
2075 DEFINE_COMPILER;
2076 label_addr_list *label_addr;
2077
2078 label_addr = sljit_alloc_memory(compiler, sizeof(label_addr_list));
2079 if (label_addr == NULL)
2080 return;
2081 label_addr->label = LABEL();
2082 label_addr->update_addr = update_addr;
2083 label_addr->next = common->label_addrs;
2084 common->label_addrs = label_addr;
2085 }
2086
2087 static SLJIT_INLINE void count_match(compiler_common *common)
2088 {
2089 DEFINE_COMPILER;
2090
2091 OP2(SLJIT_SUB | SLJIT_SET_E, COUNT_MATCH, 0, COUNT_MATCH, 0, SLJIT_IMM, 1);
2092 add_jump(compiler, &common->calllimit, JUMP(SLJIT_C_ZERO));
2093 }
2094
2095 static SLJIT_INLINE void allocate_stack(compiler_common *common, int size)
2096 {
2097 /* May destroy all locals and registers except TMP2. */
2098 DEFINE_COMPILER;
2099
2100 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
2101 #ifdef DESTROY_REGISTERS
2102 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);
2103 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
2104 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
2105 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, TMP1, 0);
2106 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP1, 0);
2107 #endif
2108 add_stub(common, CMP(SLJIT_C_GREATER, STACK_TOP, 0, STACK_LIMIT, 0));
2109 }
2110
2111 static SLJIT_INLINE void free_stack(compiler_common *common, int size)
2112 {
2113 DEFINE_COMPILER;
2114 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
2115 }
2116
2117 static SLJIT_INLINE void reset_ovector(compiler_common *common, int length)
2118 {
2119 DEFINE_COMPILER;
2120 struct sljit_label *loop;
2121 int i;
2122
2123 /* At this point we can freely use all temporary registers. */
2124 SLJIT_ASSERT(length > 1);
2125 /* TMP1 returns with begin - 1. */
2126 OP2(SLJIT_SUB, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_S0), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));
2127 if (length < 8)
2128 {
2129 for (i = 1; i < length; i++)
2130 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), SLJIT_R0, 0);
2131 }
2132 else
2133 {
2134 GET_LOCAL_BASE(SLJIT_R1, 0, OVECTOR_START);
2135 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, length - 1);
2136 loop = LABEL();
2137 OP1(SLJIT_MOVU, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw), SLJIT_R0, 0);
2138 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, 1);
2139 JUMPTO(SLJIT_C_NOT_ZERO, loop);
2140 }
2141 }
2142
2143 static SLJIT_INLINE void do_reset_match(compiler_common *common, int length)
2144 {
2145 DEFINE_COMPILER;
2146 struct sljit_label *loop;
2147 int i;
2148
2149 SLJIT_ASSERT(length > 1);
2150 /* OVECTOR(1) contains the "string begin - 1" constant. */
2151 if (length > 2)
2152 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
2153 if (length < 8)
2154 {
2155 for (i = 2; i < length; i++)
2156 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), TMP1, 0);
2157 }
2158 else
2159 {
2160 GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + sizeof(sljit_sw));
2161 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
2162 loop = LABEL();
2163 OP1(SLJIT_MOVU, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
2164 OP2(SLJIT_SUB | SLJIT_SET_E, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
2165 JUMPTO(SLJIT_C_NOT_ZERO, loop);
2166 }
2167
2168 OP1(SLJIT_MOV, STACK_TOP, 0, ARGUMENTS, 0);
2169 if (common->mark_ptr != 0)
2170 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, SLJIT_IMM, 0);
2171 if (common->control_head_ptr != 0)
2172 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
2173 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(jit_arguments, stack));
2174 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
2175 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(struct sljit_stack, base));
2176 }
2177
2178 static sljit_sw SLJIT_CALL do_search_mark(sljit_sw *current, const pcre_uchar *skip_arg)
2179 {
2180 while (current != NULL)
2181 {
2182 switch (current[-2])
2183 {
2184 case type_then_trap:
2185 break;
2186
2187 case type_mark:
2188 if (STRCMP_UC_UC(skip_arg, (pcre_uchar *)current[-3]) == 0)
2189 return current[-4];
2190 break;
2191
2192 default:
2193 SLJIT_ASSERT_STOP();
2194 break;
2195 }
2196 current = (sljit_sw*)current[-1];
2197 }
2198 return -1;
2199 }
2200
2201 static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)
2202 {
2203 DEFINE_COMPILER;
2204 struct sljit_label *loop;
2205 struct sljit_jump *early_quit;
2206
2207 /* At this point we can freely use all registers. */
2208 OP1(SLJIT_MOV, SLJIT_S2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
2209 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(1), STR_PTR, 0);
2210
2211 OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0);
2212 if (common->mark_ptr != 0)
2213 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
2214 OP1(SLJIT_MOV_SI, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, offset_count));
2215 if (common->mark_ptr != 0)
2216 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_R2, 0);
2217 OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, offsets), SLJIT_IMM, sizeof(int));
2218 OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, begin));
2219 GET_LOCAL_BASE(SLJIT_S0, 0, OVECTOR_START);
2220 /* Unlikely, but possible */
2221 early_quit = CMP(SLJIT_C_EQUAL, SLJIT_R1, 0, SLJIT_IMM, 0);
2222 loop = LABEL();
2223 OP2(SLJIT_SUB, SLJIT_S1, 0, SLJIT_MEM1(SLJIT_S0), 0, SLJIT_R0, 0);
2224 OP2(SLJIT_ADD, SLJIT_S0, 0, SLJIT_S0, 0, SLJIT_IMM, sizeof(sljit_sw));
2225 /* Copy the integer value to the output buffer */
2226 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2227 OP2(SLJIT_ASHR, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_IMM, UCHAR_SHIFT);
2228 #endif
2229 OP1(SLJIT_MOVU_SI, SLJIT_MEM1(SLJIT_R2), sizeof(int), SLJIT_S1, 0);
2230 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
2231 JUMPTO(SLJIT_C_NOT_ZERO, loop);
2232 JUMPHERE(early_quit);
2233
2234 /* Calculate the return value, which is the maximum ovector value. */
2235 if (topbracket > 1)
2236 {
2237 GET_LOCAL_BASE(SLJIT_R0, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_sw));
2238 OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, topbracket + 1);
2239
2240 /* OVECTOR(0) is never equal to SLJIT_S2. */
2241 loop = LABEL();
2242 OP1(SLJIT_MOVU, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), -(2 * (sljit_sw)sizeof(sljit_sw)));
2243 OP2(SLJIT_SUB, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
2244 CMPTO(SLJIT_C_EQUAL, SLJIT_R2, 0, SLJIT_S2, 0, loop);
2245 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_R1, 0);
2246 }
2247 else
2248 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
2249 }
2250
2251 static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *quit)
2252 {
2253 DEFINE_COMPILER;
2254 struct sljit_jump *jump;
2255
2256 SLJIT_COMPILE_ASSERT(STR_END == SLJIT_S1, str_end_must_be_saved_reg2);
2257 SLJIT_ASSERT(common->start_used_ptr != 0 && common->start_ptr != 0
2258 && (common->mode == JIT_PARTIAL_SOFT_COMPILE ? common->hit_start != 0 : common->hit_start == 0));
2259
2260 OP1(SLJIT_MOV, SLJIT_R1, 0, ARGUMENTS, 0);
2261 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_PARTIAL);
2262 OP1(SLJIT_MOV_SI, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, real_offset_count));
2263 CMPTO(SLJIT_C_SIG_LESS, SLJIT_R2, 0, SLJIT_IMM, 2, quit);
2264
2265 /* Store match begin and end. */
2266 OP1(SLJIT_MOV, SLJIT_S0, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, begin));
2267 OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, offsets));
2268
2269 jump = CMP(SLJIT_C_SIG_LESS, SLJIT_R2, 0, SLJIT_IMM, 3);
2270 OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_ptr : (common->hit_start + (int)sizeof(sljit_sw)), SLJIT_S0, 0);
2271 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2272 OP2(SLJIT_ASHR, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, UCHAR_SHIFT);
2273 #endif
2274 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_R1), 2 * sizeof(int), SLJIT_R2, 0);
2275 JUMPHERE(jump);
2276
2277 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_used_ptr : common->hit_start);
2278 OP2(SLJIT_SUB, SLJIT_S1, 0, STR_END, 0, SLJIT_S0, 0);
2279 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2280 OP2(SLJIT_ASHR, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_IMM, UCHAR_SHIFT);
2281 #endif
2282 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_R1), sizeof(int), SLJIT_S1, 0);
2283
2284 OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_S0, 0);
2285 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2286 OP2(SLJIT_ASHR, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, UCHAR_SHIFT);
2287 #endif
2288 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_R1), 0, SLJIT_R2, 0);
2289
2290 JUMPTO(SLJIT_JUMP, quit);
2291 }
2292
2293 static SLJIT_INLINE void check_start_used_ptr(compiler_common *common)
2294 {
2295 /* May destroy TMP1. */
2296 DEFINE_COMPILER;
2297 struct sljit_jump *jump;
2298
2299 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2300 {
2301 /* The value of -1 must be kept for start_used_ptr! */
2302 OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, 1);
2303 /* Jumps if start_used_ptr < STR_PTR, or start_used_ptr == -1. Although overwriting
2304 is not necessary if start_used_ptr == STR_PTR, it does not hurt as well. */
2305 jump = CMP(SLJIT_C_LESS_EQUAL, TMP1, 0, STR_PTR, 0);
2306 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2307 JUMPHERE(jump);
2308 }
2309 else if (common->mode == JIT_PARTIAL_HARD_COMPILE)
2310 {
2311 jump = CMP(SLJIT_C_LESS_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2312 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2313 JUMPHERE(jump);
2314 }
2315 }
2316
2317 static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, pcre_uchar* cc)
2318 {
2319 /* Detects if the character has an othercase. */
2320 unsigned int c;
2321
2322 #ifdef SUPPORT_UTF
2323 if (common->utf)
2324 {
2325 GETCHAR(c, cc);
2326 if (c > 127)
2327 {
2328 #ifdef SUPPORT_UCP
2329 return c != UCD_OTHERCASE(c);
2330 #else
2331 return FALSE;
2332 #endif
2333 }
2334 #ifndef COMPILE_PCRE8
2335 return common->fcc[c] != c;
2336 #endif
2337 }
2338 else
2339 #endif
2340 c = *cc;
2341 return MAX_255(c) ? common->fcc[c] != c : FALSE;
2342 }
2343
2344 static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)
2345 {
2346 /* Returns with the othercase. */
2347 #ifdef SUPPORT_UTF
2348 if (common->utf && c > 127)
2349 {
2350 #ifdef SUPPORT_UCP
2351 return UCD_OTHERCASE(c);
2352 #else
2353 return c;
2354 #endif
2355 }
2356 #endif
2357 return TABLE_GET(c, common->fcc, c);
2358 }
2359
2360 static unsigned int char_get_othercase_bit(compiler_common *common, pcre_uchar* cc)
2361 {
2362 /* Detects if the character and its othercase has only 1 bit difference. */
2363 unsigned int c, oc, bit;
2364 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2365 int n;
2366 #endif
2367
2368 #ifdef SUPPORT_UTF
2369 if (common->utf)
2370 {
2371 GETCHAR(c, cc);
2372 if (c <= 127)
2373 oc = common->fcc[c];
2374 else
2375 {
2376 #ifdef SUPPORT_UCP
2377 oc = UCD_OTHERCASE(c);
2378 #else
2379 oc = c;
2380 #endif
2381 }
2382 }
2383 else
2384 {
2385 c = *cc;
2386 oc = TABLE_GET(c, common->fcc, c);
2387 }
2388 #else
2389 c = *cc;
2390 oc = TABLE_GET(c, common->fcc, c);
2391 #endif
2392
2393 SLJIT_ASSERT(c != oc);
2394
2395 bit = c ^ oc;
2396 /* Optimized for English alphabet. */
2397 if (c <= 127 && bit == 0x20)
2398 return (0 << 8) | 0x20;
2399
2400 /* Since c != oc, they must have at least 1 bit difference. */
2401 if (!is_powerof2(bit))
2402 return 0;
2403
2404 #if defined COMPILE_PCRE8
2405
2406 #ifdef SUPPORT_UTF
2407 if (common->utf && c > 127)
2408 {
2409 n = GET_EXTRALEN(*cc);
2410 while ((bit & 0x3f) == 0)
2411 {
2412 n--;
2413 bit >>= 6;
2414 }
2415 return (n << 8) | bit;
2416 }
2417 #endif /* SUPPORT_UTF */
2418 return (0 << 8) | bit;
2419
2420 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2421
2422 #ifdef SUPPORT_UTF
2423 if (common->utf && c > 65535)
2424 {
2425 if (bit >= (1 << 10))
2426 bit >>= 10;
2427 else
2428 return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8));
2429 }
2430 #endif /* SUPPORT_UTF */
2431 return (bit < 256) ? ((0 << 8) | bit) : ((1 << 8) | (bit >> 8));
2432
2433 #endif /* COMPILE_PCRE[8|16|32] */
2434 }
2435
2436 static void check_partial(compiler_common *common, BOOL force)
2437 {
2438 /* Checks whether a partial matching is occurred. Does not modify registers. */
2439 DEFINE_COMPILER;
2440 struct sljit_jump *jump = NULL;
2441
2442 SLJIT_ASSERT(!force || common->mode != JIT_COMPILE);
2443
2444 if (common->mode == JIT_COMPILE)
2445 return;
2446
2447 if (!force)
2448 jump = CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2449 else if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2450 jump = CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1);
2451
2452 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2453 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
2454 else
2455 {
2456 if (common->partialmatchlabel != NULL)
2457 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2458 else
2459 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2460 }
2461
2462 if (jump != NULL)
2463 JUMPHERE(jump);
2464 }
2465
2466 static void check_str_end(compiler_common *common, jump_list **end_reached)
2467 {
2468 /* Does not affect registers. Usually used in a tight spot. */
2469 DEFINE_COMPILER;
2470 struct sljit_jump *jump;
2471
2472 if (common->mode == JIT_COMPILE)
2473 {
2474 add_jump(compiler, end_reached, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2475 return;
2476 }
2477
2478 jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
2479 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2480 {
2481 add_jump(compiler, end_reached, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
2482 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
2483 add_jump(compiler, end_reached, JUMP(SLJIT_JUMP));
2484 }
2485 else
2486 {
2487 add_jump(compiler, end_reached, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
2488 if (common->partialmatchlabel != NULL)
2489 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2490 else
2491 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2492 }
2493 JUMPHERE(jump);
2494 }
2495
2496 static void detect_partial_match(compiler_common *common, jump_list **backtracks)
2497 {
2498 DEFINE_COMPILER;
2499 struct sljit_jump *jump;
2500
2501 if (common->mode == JIT_COMPILE)
2502 {
2503 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2504 return;
2505 }
2506
2507 /* Partial matching mode. */
2508 jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
2509 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
2510 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2511 {
2512 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
2513 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
2514 }
2515 else
2516 {
2517 if (common->partialmatchlabel != NULL)
2518 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2519 else
2520 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2521 }
2522 JUMPHERE(jump);
2523 }
2524
2525 static void peek_char(compiler_common *common, pcre_uint32 max)
2526 {
2527 /* Reads the character into TMP1, keeps STR_PTR.
2528 Does not check STR_END. TMP2 Destroyed. */
2529 DEFINE_COMPILER;
2530 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2531 struct sljit_jump *jump;
2532 #endif
2533
2534 SLJIT_UNUSED_ARG(max);
2535
2536 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2537 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2538 if (common->utf)
2539 {
2540 if (max < 128) return;
2541
2542 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2543 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2544 add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2545 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2546 JUMPHERE(jump);
2547 }
2548 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2549
2550 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2551 if (common->utf)
2552 {
2553 if (max < 0xd800) return;
2554
2555 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2556 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2557 /* TMP2 contains the high surrogate. */
2558 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2559 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x40);
2560 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
2561 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
2562 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2563 JUMPHERE(jump);
2564 }
2565 #endif
2566 }
2567
2568 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2569
2570 static BOOL is_char7_bitset(const pcre_uint8 *bitset, BOOL nclass)
2571 {
2572 /* Tells whether the character codes below 128 are enough
2573 to determine a match. */
2574 const pcre_uint8 value = nclass ? 0xff : 0;
2575 const pcre_uint8* end = bitset + 32;
2576
2577 bitset += 16;
2578 do
2579 {
2580 if (*bitset++ != value)
2581 return FALSE;
2582 }
2583 while (bitset < end);
2584 return TRUE;
2585 }
2586
2587 static void read_char7_type(compiler_common *common, BOOL full_read)
2588 {
2589 /* Reads the precise character type of a character into TMP1, if the character
2590 is less than 128. Otherwise it returns with zero. Does not check STR_END. The
2591 full_read argument tells whether characters above max are accepted or not. */
2592 DEFINE_COMPILER;
2593 struct sljit_jump *jump;
2594
2595 SLJIT_ASSERT(common->utf);
2596
2597 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2598 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2599
2600 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2601
2602 if (full_read)
2603 {
2604 jump = CMP(SLJIT_C_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
2605 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2606 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2607 JUMPHERE(jump);
2608 }
2609 }
2610
2611 #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
2612
2613 static void read_char_range(compiler_common *common, pcre_uint32 min, pcre_uint32 max, BOOL update_str_ptr)
2614 {
2615 /* Reads the precise value of a character into TMP1, if the character is
2616 between min and max (c >= min && c <= max). Otherwise it returns with a value
2617 outside the range. Does not check STR_END. */
2618 DEFINE_COMPILER;
2619 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2620 struct sljit_jump *jump;
2621 #endif
2622 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2623 struct sljit_jump *jump2;
2624 #endif
2625
2626 SLJIT_UNUSED_ARG(update_str_ptr);
2627 SLJIT_UNUSED_ARG(min);
2628 SLJIT_UNUSED_ARG(max);
2629 SLJIT_ASSERT(min <= max);
2630
2631 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2632 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2633
2634 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2635 if (common->utf)
2636 {
2637 if (max < 128 && !update_str_ptr) return;
2638
2639 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2640 if (min >= 0x10000)
2641 {
2642 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xf0);
2643 if (update_str_ptr)
2644 OP1(SLJIT_MOV_UB, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2645 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2646 jump2 = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0x7);
2647 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2648 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2649 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2650 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2651 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2652 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2653 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2654 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2655 if (!update_str_ptr)
2656 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
2657 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2658 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2659 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2660 JUMPHERE(jump2);
2661 if (update_str_ptr)
2662 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
2663 }
2664 else if (min >= 0x800 && max <= 0xffff)
2665 {
2666 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xe0);
2667 if (update_str_ptr)
2668 OP1(SLJIT_MOV_UB, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2669 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2670 jump2 = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0xf);
2671 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2672 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2673 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2674 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2675 if (!update_str_ptr)
2676 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
2677 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2678 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2679 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2680 JUMPHERE(jump2);
2681 if (update_str_ptr)
2682 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
2683 }
2684 else if (max >= 0x800)
2685 add_jump(compiler, (max < 0x10000) ? &common->utfreadchar16 : &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2686 else if (max < 128)
2687 {
2688 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2689 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2690 }
2691 else
2692 {
2693 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2694 if (!update_str_ptr)
2695 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2696 else
2697 OP1(SLJIT_MOV_UB, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2698 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2699 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2700 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2701 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2702 if (update_str_ptr)
2703 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
2704 }
2705 JUMPHERE(jump);
2706 }
2707 #endif
2708
2709 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2710 if (common->utf)
2711 {
2712 if (max >= 0x10000)
2713 {
2714 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2715 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2716 /* TMP2 contains the high surrogate. */
2717 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2718 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x40);
2719 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
2720 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2721 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
2722 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2723 JUMPHERE(jump);
2724 return;
2725 }
2726
2727 if (max < 0xd800 && !update_str_ptr) return;
2728
2729 /* Skip low surrogate if necessary. */
2730 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2731 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2732 if (update_str_ptr)
2733 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2734 if (max >= 0xd800)
2735 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x10000);
2736 JUMPHERE(jump);
2737 }
2738 #endif
2739 }
2740
2741 static SLJIT_INLINE void read_char(compiler_common *common)
2742 {
2743 read_char_range(common, 0, READ_CHAR_MAX, TRUE);
2744 }
2745
2746 static void read_char8_type(compiler_common *common, BOOL update_str_ptr)
2747 {
2748 /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */
2749 DEFINE_COMPILER;
2750 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
2751 struct sljit_jump *jump;
2752 #endif
2753 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2754 struct sljit_jump *jump2;
2755 #endif
2756
2757 SLJIT_UNUSED_ARG(update_str_ptr);
2758
2759 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2760 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2761
2762 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2763 if (common->utf)
2764 {
2765 /* This can be an extra read in some situations, but hopefully
2766 it is needed in most cases. */
2767 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2768 jump = CMP(SLJIT_C_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
2769 if (!update_str_ptr)
2770 {
2771 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2772 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2773 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2774 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2775 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2776 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
2777 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2778 jump2 = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2779 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2780 JUMPHERE(jump2);
2781 }
2782 else
2783 add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));
2784 JUMPHERE(jump);
2785 return;
2786 }
2787 #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
2788
2789 #if !defined COMPILE_PCRE8
2790 /* The ctypes array contains only 256 values. */
2791 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2792 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2793 #endif
2794 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2795 #if !defined COMPILE_PCRE8
2796 JUMPHERE(jump);
2797 #endif
2798
2799 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2800 if (common->utf && update_str_ptr)
2801 {
2802 /* Skip low surrogate if necessary. */
2803 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);
2804 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2805 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2806 JUMPHERE(jump);
2807 }
2808 #endif /* SUPPORT_UTF && COMPILE_PCRE16 */
2809 }
2810
2811 static void skip_char_back(compiler_common *common)
2812 {
2813 /* Goes one character back. Affects STR_PTR and TMP1. Does not check begin. */
2814 DEFINE_COMPILER;
2815 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2816 #if defined COMPILE_PCRE8
2817 struct sljit_label *label;
2818
2819 if (common->utf)
2820 {
2821 label = LABEL();
2822 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2823 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2824 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
2825 CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);
2826 return;
2827 }
2828 #elif defined COMPILE_PCRE16
2829 if (common->utf)
2830 {
2831 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2832 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2833 /* Skip low surrogate if necessary. */
2834 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2835 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
2836 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2837 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2838 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2839 return;
2840 }
2841 #endif /* COMPILE_PCRE[8|16] */
2842 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2843 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2844 }
2845
2846 static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpifmatch)
2847 {
2848 /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */
2849 DEFINE_COMPILER;
2850 struct sljit_jump *jump;
2851
2852 if (nltype == NLTYPE_ANY)
2853 {
2854 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
2855 add_jump(compiler, backtracks, JUMP(jumpifmatch ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
2856 }
2857 else if (nltype == NLTYPE_ANYCRLF)
2858 {
2859 if (jumpifmatch)
2860 {
2861 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR));
2862 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
2863 }
2864 else
2865 {
2866 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
2867 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
2868 JUMPHERE(jump);
2869 }
2870 }
2871 else
2872 {
2873 SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);
2874 add_jump(compiler, backtracks, CMP(jumpifmatch ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
2875 }
2876 }
2877
2878 #ifdef SUPPORT_UTF
2879
2880 #if defined COMPILE_PCRE8
2881 static void do_utfreadchar(compiler_common *common)
2882 {
2883 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
2884 of the character (>= 0xc0). Return char value in TMP1, length in TMP2. */
2885 DEFINE_COMPILER;
2886 struct sljit_jump *jump;
2887
2888 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2889 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2890 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2891 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2892 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2893 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2894
2895 /* Searching for the first zero. */
2896 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
2897 jump = JUMP(SLJIT_C_NOT_ZERO);
2898 /* Two byte sequence. */
2899 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2900 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(2));
2901 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2902
2903 JUMPHERE(jump);
2904 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2905 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x800);
2906 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2907 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2908 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2909
2910 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10000);
2911 jump = JUMP(SLJIT_C_NOT_ZERO);
2912 /* Three byte sequence. */
2913 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
2914 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));
2915 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2916
2917 /* Four byte sequence. */
2918 JUMPHERE(jump);
2919 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2920 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
2921 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2922 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
2923 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2924 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2925 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(4));
2926 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2927 }
2928
2929 static void do_utfreadchar16(compiler_common *common)
2930 {
2931 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
2932 of the character (>= 0xc0). Return value in TMP1. */
2933 DEFINE_COMPILER;
2934 struct sljit_jump *jump;
2935
2936 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2937 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2938 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2939 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2940 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2941 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2942
2943 /* Searching for the first zero. */
2944 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
2945 jump = JUMP(SLJIT_C_NOT_ZERO);
2946 /* Two byte sequence. */
2947 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2948 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2949
2950 JUMPHERE(jump);
2951 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x400);
2952 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_NOT_ZERO);
2953 /* This code runs only in 8 bit mode. No need to shift the value. */
2954 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2955 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2956 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x800);
2957 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2958 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2959 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2960 /* Three byte sequence. */
2961 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
2962 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2963 }
2964
2965 static void do_utfreadtype8(compiler_common *common)
2966 {
2967 /* Fast decoding a UTF-8 character type. TMP2 contains the first byte
2968 of the character (>= 0xc0). Return value in TMP1. */
2969 DEFINE_COMPILER;
2970 struct sljit_jump *jump;
2971 struct sljit_jump *compare;
2972
2973 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2974
2975 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20);
2976 jump = JUMP(SLJIT_C_NOT_ZERO);
2977 /* Two byte sequence. */
2978 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2979 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2980 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
2981 /* The upper 5 bits are known at this point. */
2982 compare = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0x3);
2983 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2984 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2985 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
2986 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2987 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2988
2989 JUMPHERE(compare);
2990 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2991 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2992
2993 /* We only have types for characters less than 256. */
2994 JUMPHERE(jump);
2995 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2996 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2997 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2998 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2999 }
3000
3001 #endif /* COMPILE_PCRE8 */
3002
3003 #endif /* SUPPORT_UTF */
3004
3005 #ifdef SUPPORT_UCP
3006
3007 /* UCD_BLOCK_SIZE must be 128 (see the assert below). */
3008 #define UCD_BLOCK_MASK 127
3009 #define UCD_BLOCK_SHIFT 7
3010
3011 static void do_getucd(compiler_common *common)
3012 {
3013 /* Search the UCD record for the character comes in TMP1.
3014 Returns chartype in TMP1 and UCD offset in TMP2. */
3015 DEFINE_COMPILER;
3016
3017 SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 8);
3018
3019 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3020 OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
3021 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
3022 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
3023 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
3024 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
3025 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
3026 OP1(SLJIT_MOV_UH, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
3027 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
3028 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
3029 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3030 }
3031 #endif
3032
3033 static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common, BOOL hascrorlf, BOOL firstline)
3034 {
3035 DEFINE_COMPILER;
3036 struct sljit_label *mainloop;
3037 struct sljit_label *newlinelabel = NULL;
3038 struct sljit_jump *start;
3039 struct sljit_jump *end = NULL;
3040 struct sljit_jump *nl = NULL;
3041 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3042 struct sljit_jump *singlechar;
3043 #endif
3044 jump_list *newline = NULL;
3045 BOOL newlinecheck = FALSE;
3046 BOOL readuchar = FALSE;
3047
3048 if (!(hascrorlf || firstline) && (common->nltype == NLTYPE_ANY ||
3049 common->nltype == NLTYPE_ANYCRLF || common->newline > 255))
3050 newlinecheck = TRUE;
3051
3052 if (firstline)
3053 {
3054 /* Search for the end of the first line. */
3055 SLJIT_ASSERT(common->first_line_end != 0);
3056 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
3057
3058 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3059 {
3060 mainloop = LABEL();
3061 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3062 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3063 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
3064 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3065 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);
3066 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);
3067 JUMPHERE(end);
3068 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_SP), common->first_line_end, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3069 }
3070 else
3071 {
3072 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3073 mainloop = LABEL();
3074 /* Continual stores does not cause data dependency. */
3075 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->first_line_end, STR_PTR, 0);
3076 read_char_range(common, common->nlmin, common->nlmax, TRUE);
3077 check_newlinechar(common, common->nltype, &newline, TRUE);
3078 CMPTO(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0, mainloop);
3079 JUMPHERE(end);
3080 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->first_line_end, STR_PTR, 0);
3081 set_jumps(newline, LABEL());
3082 }
3083
3084 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
3085 }
3086
3087 start = JUMP(SLJIT_JUMP);
3088
3089 if (newlinecheck)
3090 {
3091 newlinelabel = LABEL();
3092 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3093 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3094 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3095 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
3096 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3097 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3098 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
3099 #endif
3100 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3101 nl = JUMP(SLJIT_JUMP);
3102 }
3103
3104 mainloop = LABEL();
3105
3106 /* Increasing the STR_PTR here requires one less jump in the most common case. */
3107 #ifdef SUPPORT_UTF
3108 if (common->utf) readuchar = TRUE;
3109 #endif
3110 if (newlinecheck) readuchar = TRUE;
3111
3112 if (readuchar)
3113 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3114
3115 if (newlinecheck)
3116 CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);
3117
3118 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3119 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3120 #if defined COMPILE_PCRE8
3121 if (common->utf)
3122 {
3123 singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
3124 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3125 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3126 JUMPHERE(singlechar);
3127 }
3128 #elif defined COMPILE_PCRE16
3129 if (common->utf)
3130 {
3131 singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
3132 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
3133 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3134 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3135 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3136 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3137 JUMPHERE(singlechar);
3138 }
3139 #endif /* COMPILE_PCRE[8|16] */
3140 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
3141 JUMPHERE(start);
3142
3143 if (newlinecheck)
3144 {
3145 JUMPHERE(end);
3146 JUMPHERE(nl);
3147 }
3148
3149 return mainloop;
3150 }
3151
3152 #define MAX_N_CHARS 16
3153 #define MAX_N_BYTES 8
3154
3155 static SLJIT_INLINE void add_prefix_byte(pcre_uint8 byte, pcre_uint8 *bytes)
3156 {
3157 pcre_uint8 len = bytes[0];
3158 int i;
3159
3160 if (len == 255)
3161 return;
3162
3163 if (len == 0)
3164 {
3165 bytes[0] = 1;
3166 bytes[1] = byte;
3167 return;
3168 }
3169
3170 for (i = len; i > 0; i--)
3171 if (bytes[i] == byte)
3172 return;
3173
3174 if (len >= MAX_N_BYTES - 1)
3175 {
3176 bytes[0] = 255;
3177 return;
3178 }
3179
3180 len++;
3181 bytes[len] = byte;
3182 bytes[0] = len;
3183 }
3184
3185 static int scan_prefix(compiler_common *common, pcre_uchar *cc, pcre_uint32 *chars, pcre_uint8 *bytes, int max_chars)
3186 {
3187 /* Recursive function, which scans prefix literals. */
3188 BOOL last, any, caseless;
3189 int len, repeat, len_save, consumed = 0;
3190 pcre_uint32 chr, mask;
3191 pcre_uchar *alternative, *cc_save, *oc;
3192 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3193 pcre_uchar othercase[8];
3194 #elif defined SUPPORT_UTF && defined COMPILE_PCRE16
3195 pcre_uchar othercase[2];
3196 #else
3197 pcre_uchar othercase[1];
3198 #endif
3199
3200 repeat = 1;
3201 while (TRUE)
3202 {
3203 last = TRUE;
3204 any = FALSE;
3205 caseless = FALSE;
3206 switch (*cc)
3207 {
3208 case OP_CHARI:
3209 caseless = TRUE;
3210 case OP_CHAR:
3211 last = FALSE;
3212 cc++;
3213 break;
3214
3215 case OP_SOD:
3216 case OP_SOM:
3217 case OP_SET_SOM:
3218 case OP_NOT_WORD_BOUNDARY:
3219 case OP_WORD_BOUNDARY:
3220 case OP_EODN:
3221 case OP_EOD:
3222 case OP_CIRC:
3223 case OP_CIRCM:
3224 case OP_DOLL:
3225 case OP_DOLLM:
3226 /* Zero width assertions. */
3227 cc++;
3228 continue;
3229
3230 case OP_ASSERT:
3231 case OP_ASSERT_NOT:
3232 case OP_ASSERTBACK:
3233 case OP_ASSERTBACK_NOT:
3234 cc = bracketend(cc);
3235 continue;
3236
3237 case OP_PLUSI:
3238 case OP_MINPLUSI:
3239 case OP_POSPLUSI:
3240 caseless = TRUE;
3241 case OP_PLUS:
3242 case OP_MINPLUS:
3243 case OP_POSPLUS:
3244 cc++;
3245 break;
3246
3247 case OP_EXACTI:
3248 caseless = TRUE;
3249 case OP_EXACT:
3250 repeat = GET2(cc, 1);
3251 last = FALSE;
3252 cc += 1 + IMM2_SIZE;
3253 break;
3254
3255 case OP_QUERYI:
3256 case OP_MINQUERYI:
3257 case OP_POSQUERYI:
3258 caseless = TRUE;
3259 case OP_QUERY:
3260 case OP_MINQUERY:
3261 case OP_POSQUERY:
3262 len = 1;
3263 cc++;
3264 #ifdef SUPPORT_UTF
3265 if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
3266 #endif
3267 max_chars = scan_prefix(common, cc + len, chars, bytes, max_chars);
3268 if (max_chars == 0)
3269 return consumed;
3270 last = FALSE;
3271 break;
3272
3273 case OP_KET:
3274 cc += 1 + LINK_SIZE;
3275 continue;
3276
3277 case OP_ALT:
3278 cc += GET(cc, 1);
3279 continue;
3280
3281 case OP_ONCE:
3282 case OP_ONCE_NC:
3283 case OP_BRA:
3284 case OP_BRAPOS:
3285 case OP_CBRA:
3286 case OP_CBRAPOS:
3287 alternative = cc + GET(cc, 1);
3288 while (*alternative == OP_ALT)
3289 {
3290 max_chars = scan_prefix(common, alternative + 1 + LINK_SIZE, chars, bytes, max_chars);
3291 if (max_chars == 0)
3292 return consumed;
3293 alternative += GET(alternative, 1);
3294 }
3295
3296 if (*cc == OP_CBRA || *cc == OP_CBRAPOS)
3297 cc += IMM2_SIZE;
3298 cc += 1 + LINK_SIZE;
3299 continue;
3300
3301 case OP_CLASS:
3302 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3303 if (common->utf && !is_char7_bitset((const pcre_uint8 *)(cc + 1), FALSE)) return consumed;
3304 #endif
3305 any = TRUE;
3306 cc += 1 + 32 / sizeof(pcre_uchar);
3307 break;
3308
3309 case OP_NCLASS:
3310 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3311 if (common->utf) return consumed;
3312 #endif
3313 any = TRUE;
3314 cc += 1 + 32 / sizeof(pcre_uchar);
3315 break;
3316
3317 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3318 case OP_XCLASS:
3319 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3320 if (common->utf) return consumed;
3321 #endif
3322 any = TRUE;
3323 cc += GET(cc, 1);
3324 break;
3325 #endif
3326
3327 case OP_DIGIT:
3328 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3329 if (common->utf && !is_char7_bitset((const pcre_uint8 *)common->ctypes - cbit_length + cbit_digit, FALSE))
3330 return consumed;
3331 #endif
3332 any = TRUE;
3333 cc++;
3334 break;
3335
3336 case OP_WHITESPACE:
3337 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3338 if (common->utf && !is_char7_bitset((const pcre_uint8 *)common->ctypes - cbit_length + cbit_space, FALSE))
3339 return consumed;
3340 #endif
3341 any = TRUE;
3342 cc++;
3343 break;
3344
3345 case OP_WORDCHAR:
3346 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3347 if (common->utf && !is_char7_bitset((const pcre_uint8 *)common->ctypes - cbit_length + cbit_word, FALSE))
3348 return consumed;
3349 #endif
3350 any = TRUE;
3351 cc++;
3352 break;
3353
3354 case OP_NOT:
3355 case OP_NOTI:
3356 cc++;
3357 /* Fall through. */
3358 case OP_NOT_DIGIT:
3359 case OP_NOT_WHITESPACE:
3360 case OP_NOT_WORDCHAR:
3361 case OP_ANY:
3362 case OP_ALLANY:
3363 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3364 if (common->utf) return consumed;
3365 #endif
3366 any = TRUE;
3367 cc++;
3368 break;
3369
3370 #ifdef SUPPORT_UCP
3371 case OP_NOTPROP:
3372 case OP_PROP:
3373 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3374 if (common->utf) return consumed;
3375 #endif
3376 any = TRUE;
3377 cc += 1 + 2;
3378 break;
3379 #endif
3380
3381 case OP_TYPEEXACT:
3382 repeat = GET2(cc, 1);
3383 cc += 1 + IMM2_SIZE;
3384 continue;
3385
3386 case OP_NOTEXACT:
3387 case OP_NOTEXACTI:
3388 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3389 if (common->utf) return consumed;
3390 #endif
3391 any = TRUE;
3392 repeat = GET2(cc, 1);
3393 cc += 1 + IMM2_SIZE + 1;
3394 break;
3395
3396 default:
3397 return consumed;
3398 }
3399
3400 if (any)
3401 {
3402 #if defined COMPILE_PCRE8
3403 mask = 0xff;
3404 #elif defined COMPILE_PCRE16
3405 mask = 0xffff;
3406 #elif defined COMPILE_PCRE32
3407 mask = 0xffffffff;
3408 #else
3409 SLJIT_ASSERT_STOP();
3410 #endif
3411
3412 do
3413 {
3414 chars[0] = mask;
3415 chars[1] = mask;
3416 bytes[0] = 255;
3417
3418 consumed++;
3419 if (--max_chars == 0)
3420 return consumed;
3421 chars += 2;
3422 bytes += MAX_N_BYTES;
3423 }
3424 while (--repeat > 0);
3425
3426 repeat = 1;
3427 continue;
3428 }
3429
3430 len = 1;
3431 #ifdef SUPPORT_UTF
3432 if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
3433 #endif
3434
3435 if (caseless && char_has_othercase(common, cc))
3436 {
3437 #ifdef SUPPORT_UTF
3438 if (common->utf)
3439 {
3440 GETCHAR(chr, cc);
3441 if ((int)PRIV(ord2utf)(char_othercase(common, chr), othercase) != len)
3442 return consumed;
3443 }
3444 else
3445 #endif
3446 {
3447 chr = *cc;
3448 othercase[0] = TABLE_GET(chr, common->fcc, chr);
3449 }
3450 }
3451 else
3452 caseless = FALSE;
3453
3454 len_save = len;
3455 cc_save = cc;
3456 while (TRUE)
3457 {
3458 oc = othercase;
3459 do
3460 {
3461 chr = *cc;
3462 #ifdef COMPILE_PCRE32
3463 if (SLJIT_UNLIKELY(chr == NOTACHAR))
3464 return consumed;
3465 #endif
3466 add_prefix_byte((pcre_uint8)chr, bytes);
3467
3468 mask = 0;
3469 if (caseless)
3470 {
3471 add_prefix_byte((pcre_uint8)*oc, bytes);
3472 mask = *cc ^ *oc;
3473 chr |= mask;
3474 }
3475
3476 #ifdef COMPILE_PCRE32
3477 if (chars[0] == NOTACHAR && chars[1] == 0)
3478 #else
3479 if (chars[0] == NOTACHAR)
3480 #endif
3481 {
3482 chars[0] = chr;
3483 chars[1] = mask;
3484 }
3485 else
3486 {
3487 mask |= chars[0] ^ chr;
3488 chr |= mask;
3489 chars[0] = chr;
3490 chars[1] |= mask;
3491 }
3492
3493 len--;
3494 consumed++;
3495 if (--max_chars == 0)
3496 return consumed;
3497 chars += 2;
3498 bytes += MAX_N_BYTES;
3499 cc++;
3500 oc++;
3501 }
3502 while (len > 0);
3503
3504 if (--repeat == 0)
3505 break;
3506
3507 len = len_save;
3508 cc = cc_save;
3509 }
3510
3511 repeat = 1;
3512 if (last)
3513 return consumed;
3514 }
3515 }
3516
3517 static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common, BOOL firstline)
3518 {
3519 DEFINE_COMPILER;
3520 struct sljit_label *start;
3521 struct sljit_jump *quit;
3522 pcre_uint32 chars[MAX_N_CHARS * 2];
3523 pcre_uint8 bytes[MAX_N_CHARS * MAX_N_BYTES];
3524 pcre_uint8 ones[MAX_N_CHARS];
3525 int offsets[3];
3526 pcre_uint32 mask;
3527 pcre_uint8 *byte_set, *byte_set_end;
3528 int i, max, from;
3529 int range_right = -1, range_len = 3 - 1;
3530 sljit_ub *update_table = NULL;
3531 BOOL in_range;
3532
3533 /* This is even TRUE, if both are NULL. */
3534 SLJIT_ASSERT(common->read_only_data_ptr == common->read_only_data);
3535
3536 for (i = 0; i < MAX_N_CHARS; i++)
3537 {
3538 chars[i << 1] = NOTACHAR;
3539 chars[(i << 1) + 1] = 0;
3540 bytes[i * MAX_N_BYTES] = 0;
3541 }
3542
3543 max = scan_prefix(common, common->start, chars, bytes, MAX_N_CHARS);
3544
3545 if (max <= 1)
3546 return FALSE;
3547
3548 for (i = 0; i < max; i++)
3549 {
3550 mask = chars[(i << 1) + 1];
3551 ones[i] = ones_in_half_byte[mask & 0xf];
3552 mask >>= 4;
3553 while (mask != 0)
3554 {
3555 ones[i] += ones_in_half_byte[mask & 0xf];
3556 mask >>= 4;
3557 }
3558 }
3559
3560 in_range = FALSE;
3561 from = 0; /* Prevent compiler "uninitialized" warning */
3562 for (i = 0; i <= max; i++)
3563 {
3564 if (in_range && (i - from) > range_len && (bytes[(i - 1) * MAX_N_BYTES] <= 4))
3565 {
3566 range_len = i - from;
3567 range_right = i - 1;
3568 }
3569
3570 if (i < max && bytes[i * MAX_N_BYTES] < 255)
3571 {
3572 if (!in_range)
3573 {
3574 in_range = TRUE;
3575 from = i;
3576 }
3577 }
3578 else if (in_range)
3579 in_range = FALSE;
3580 }
3581
3582 if (range_right >= 0)
3583 {
3584 /* Since no data is consumed (see the assert in the beginning
3585 of this function), this space can be reallocated. */
3586 if (common->read_only_data)
3587 SLJIT_FREE(common->read_only_data);
3588
3589 common->read_only_data_size += 256;
3590 common->read_only_data = (sljit_uw *)SLJIT_MALLOC(common->read_only_data_size);
3591 if (common->read_only_data == NULL)
3592 return TRUE;
3593
3594 update_table = (sljit_ub *)common->read_only_data;
3595 common->read_only_data_ptr = (sljit_uw *)(update_table + 256);
3596 memset(update_table, IN_UCHARS(range_len), 256);
3597
3598 for (i = 0; i < range_len; i++)
3599 {
3600 byte_set = bytes + ((range_right - i) * MAX_N_BYTES);
3601 SLJIT_ASSERT(byte_set[0] > 0 && byte_set[0] < 255);
3602 byte_set_end = byte_set + byte_set[0];
3603 byte_set++;
3604 while (byte_set <= byte_set_end)
3605 {
3606 if (update_table[*byte_set] > IN_UCHARS(i))
3607 update_table[*byte_set] = IN_UCHARS(i);
3608 byte_set++;
3609 }
3610 }
3611 }
3612
3613 offsets[0] = -1;
3614 /* Scan forward. */
3615 for (i = 0; i < max; i++)
3616 if (ones[i] <= 2) {
3617 offsets[0] = i;
3618 break;
3619 }
3620
3621 if (offsets[0] < 0 && range_right < 0)
3622 return FALSE;
3623
3624 if (offsets[0] >= 0)
3625 {
3626 /* Scan backward. */
3627 offsets[1] = -1;
3628 for (i = max - 1; i > offsets[0]; i--)
3629 if (ones[i] <= 2 && i != range_right)
3630 {
3631 offsets[1] = i;
3632 break;
3633 }
3634
3635 /* This case is handled better by fast_forward_first_char. */
3636 if (offsets[1] == -1 && offsets[0] == 0 && range_right < 0)
3637 return FALSE;
3638
3639 offsets[2] = -1;
3640 /* We only search for a middle character if there is no range check. */
3641 if (offsets[1] >= 0 && range_right == -1)
3642 {
3643 /* Scan from middle. */
3644 for (i = (offsets[0] + offsets[1]) / 2 + 1; i < offsets[1]; i++)
3645 if (ones[i] <= 2)
3646 {
3647 offsets[2] = i;
3648 break;
3649 }
3650
3651 if (offsets[2] == -1)
3652 {
3653 for (i = (offsets[0] + offsets[1]) / 2; i > offsets[0]; i--)
3654 if (ones[i] <= 2)
3655 {
3656 offsets[2] = i;
3657 break;
3658 }
3659 }
3660 }
3661
3662 SLJIT_ASSERT(offsets[1] == -1 || (offsets[0] < offsets[1]));
3663 SLJIT_ASSERT(offsets[2] == -1 || (offsets[0] < offsets[2] && offsets[1] > offsets[2]));
3664
3665 chars[0] = chars[offsets[0] << 1];
3666 chars[1] = chars[(offsets[0] << 1) + 1];
3667 if (offsets[2] >= 0)
3668 {
3669 chars[2] = chars[offsets[2] << 1];
3670 chars[3] = chars[(offsets[2] << 1) + 1];
3671 }
3672 if (offsets[1] >= 0)
3673 {
3674 chars[4] = chars[offsets[1] << 1];
3675 chars[5] = chars[(offsets[1] << 1) + 1];
3676 }
3677 }
3678
3679 max -= 1;
3680 if (firstline)
3681 {
3682 SLJIT_ASSERT(common->first_line_end != 0);
3683 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end);
3684 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
3685 OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
3686 quit = CMP(SLJIT_C_LESS_EQUAL, STR_END, 0, TMP1, 0);
3687 OP1(SLJIT_MOV, STR_END, 0, TMP1, 0);
3688 JUMPHERE(quit);
3689 }
3690 else
3691 OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
3692
3693 #if !(defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
3694 if (range_right >= 0)
3695 OP1(SLJIT_MOV, RETURN_ADDR, 0, SLJIT_IMM, (sljit_sw)update_table);
3696 #endif
3697
3698 start = LABEL();
3699 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3700
3701 SLJIT_ASSERT(range_right >= 0 || offsets[0] >= 0);
3702
3703 if (range_right >= 0)
3704 {
3705 #if defined COMPILE_PCRE8 || (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN)
3706 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right));
3707 #else
3708 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right + 1) - 1);
3709 #endif
3710
3711 #if !(defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
3712 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(RETURN_ADDR, TMP1), 0);
3713 #else
3714 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)update_table);
3715 #endif
3716 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3717 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, start);
3718 }
3719
3720 if (offsets[0] >= 0)
3721 {
3722 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[0]));
3723 if (offsets[1] >= 0)
3724 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[1]));
3725 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3726
3727 if (chars[1] != 0)
3728 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[1]);
3729 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[0], start);
3730 if (offsets[2] >= 0)
3731 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[2] - 1));
3732
3733 if (offsets[1] >= 0)
3734 {
3735 if (chars[5] != 0)
3736 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, chars[5]);
3737 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, chars[4], start);
3738 }
3739
3740 if (offsets[2] >= 0)
3741 {
3742 if (chars[3] != 0)
3743 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[3]);
3744 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[2], start);
3745 }
3746 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3747 }
3748
3749 JUMPHERE(quit);
3750
3751 if (firstline)
3752 {
3753 if (range_right >= 0)
3754 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end);
3755 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3756 if (range_right >= 0)
3757 {
3758 quit = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
3759 OP1(SLJIT_MOV, STR_PTR, 0, TMP1, 0);
3760 JUMPHERE(quit);
3761 }
3762 }
3763 else
3764 OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
3765 return TRUE;
3766 }
3767
3768 #undef MAX_N_CHARS
3769 #undef MAX_N_BYTES
3770
3771 static SLJIT_INLINE void fast_forward_first_char(compiler_common *common, pcre_uchar first_char, BOOL caseless, BOOL firstline)
3772 {
3773 DEFINE_COMPILER;
3774 struct sljit_label *start;
3775 struct sljit_jump *quit;
3776 struct sljit_jump *found;
3777 pcre_uchar oc, bit;
3778
3779 if (firstline)
3780 {
3781 SLJIT_ASSERT(common->first_line_end != 0);
3782 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
3783 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end);
3784 }
3785
3786 start = LABEL();
3787 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3788 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3789
3790 oc = first_char;
3791 if (caseless)
3792 {
3793 oc = TABLE_GET(first_char, common->fcc, first_char);
3794 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
3795 if (first_char > 127 && common->utf)
3796 oc = UCD_OTHERCASE(first_char);
3797 #endif
3798 }
3799 if (first_char == oc)
3800 found = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, first_char);
3801 else
3802 {
3803 bit = first_char ^ oc;
3804 if (is_powerof2(bit))
3805 {
3806 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, bit);
3807 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, first_char | bit);
3808 }
3809 else
3810 {
3811 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, first_char);
3812 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3813 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);
3814 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3815 found = JUMP(SLJIT_C_NOT_ZERO);
3816 }
3817 }
3818
3819 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3820 JUMPTO(SLJIT_JUMP, start);
3821 JUMPHERE(found);
3822 JUMPHERE(quit);
3823
3824 if (firstline)
3825 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3826 }
3827
3828 static SLJIT_INLINE void fast_forward_newline(compiler_common *common, BOOL firstline)
3829 {
3830 DEFINE_COMPILER;
3831 struct sljit_label *loop;
3832 struct sljit_jump *lastchar;
3833 struct sljit_jump *firstchar;
3834 struct sljit_jump *quit;
3835 struct sljit_jump *foundcr = NULL;
3836 struct sljit_jump *notfoundnl;
3837 jump_list *newline = NULL;
3838
3839 if (firstline)
3840 {
3841 SLJIT_ASSERT(common->first_line_end != 0);
3842 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
3843 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end);
3844 }
3845
3846 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3847 {
3848 lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3849 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3850 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
3851 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
3852 firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
3853
3854 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2));
3855 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0);
3856 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_GREATER_EQUAL);
3857 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3858 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
3859 #endif
3860 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3861
3862 loop = LABEL();
3863 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3864 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3865 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
3866 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
3867 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop);
3868 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop);
3869
3870 JUMPHERE(quit);
3871 JUMPHERE(firstchar);
3872 JUMPHERE(lastchar);
3873
3874 if (firstline)
3875 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3876 return;
3877 }
3878
3879 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3880 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
3881 firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
3882 skip_char_back(common);
3883
3884 loop = LABEL();
3885 common->ff_newline_shortcut = loop;
3886
3887 read_char_range(common, common->nlmin, common->nlmax, TRUE);
3888 lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3889 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
3890 foundcr = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
3891 check_newlinechar(common, common->nltype, &newline, FALSE);
3892 set_jumps(newline, loop);
3893
3894 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
3895 {
3896 quit = JUMP(SLJIT_JUMP);
3897 JUMPHERE(foundcr);
3898 notfoundnl = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3899 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3900 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
3901 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3902 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3903 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
3904 #endif
3905 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3906 JUMPHERE(notfoundnl);
3907 JUMPHERE(quit);
3908 }
3909 JUMPHERE(lastchar);
3910 JUMPHERE(firstchar);
3911
3912 if (firstline)
3913 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3914 }
3915
3916 static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks);
3917
3918 static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, pcre_uint8 *start_bits, BOOL firstline)
3919 {
3920 DEFINE_COMPILER;
3921 struct sljit_label *start;
3922 struct sljit_jump *quit;
3923 struct sljit_jump *found = NULL;
3924 jump_list *matches = NULL;
3925 #ifndef COMPILE_PCRE8
3926 struct sljit_jump *jump;
3927 #endif
3928
3929 if (firstline)
3930 {
3931 SLJIT_ASSERT(common->first_line_end != 0);
3932 OP1(SLJIT_MOV, RETURN_ADDR, 0, STR_END, 0);
3933 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end);
3934 }
3935
3936 start = LABEL();
3937 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3938 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3939 #ifdef SUPPORT_UTF
3940 if (common->utf)
3941 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
3942 #endif
3943
3944 if (!check_class_ranges(common, start_bits, (start_bits[31] & 0x80) != 0, TRUE, &matches))
3945 {
3946 #ifndef COMPILE_PCRE8
3947 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 255);
3948 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 255);
3949 JUMPHERE(jump);
3950 #endif
3951 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
3952 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
3953 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)start_bits);
3954 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
3955 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
3956 found = JUMP(SLJIT_C_NOT_ZERO);
3957 }
3958
3959 #ifdef SUPPORT_UTF
3960 if (common->utf)
3961 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
3962 #endif
3963 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3964 #ifdef SUPPORT_UTF
3965 #if defined COMPILE_PCRE8
3966 if (common->utf)
3967 {
3968 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start);
3969 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3970 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3971 }
3972 #elif defined COMPILE_PCRE16
3973 if (common->utf)
3974 {
3975 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800, start);
3976 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
3977 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3978 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3979 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3980 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3981 }
3982 #endif /* COMPILE_PCRE[8|16] */
3983 #endif /* SUPPORT_UTF */
3984 JUMPTO(SLJIT_JUMP, start);
3985 if (found != NULL)
3986 JUMPHERE(found);
3987 if (matches != NULL)
3988 set_jumps(matches, LABEL());
3989 JUMPHERE(quit);
3990
3991 if (firstline)
3992 OP1(SLJIT_MOV, STR_END, 0, RETURN_ADDR, 0);
3993 }
3994
3995 static SLJIT_INLINE struct sljit_jump *search_requested_char(compiler_common *common, pcre_uchar req_char, BOOL caseless, BOOL has_firstchar)
3996 {
3997 DEFINE_COMPILER;
3998 struct sljit_label *loop;
3999 struct sljit_jump *toolong;
4000 struct sljit_jump *alreadyfound;
4001 struct sljit_jump *found;
4002 struct sljit_jump *foundoc = NULL;
4003 struct sljit_jump *notfound;
4004 pcre_uint32 oc, bit;
4005
4006 SLJIT_ASSERT(common->req_char_ptr != 0);
4007 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr);
4008 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, REQ_BYTE_MAX);
4009 toolong = CMP(SLJIT_C_LESS, TMP1, 0, STR_END, 0);
4010 alreadyfound = CMP(SLJIT_C_LESS, STR_PTR, 0, TMP2, 0);
4011
4012 if (has_firstchar)
4013 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4014 else
4015 OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);
4016
4017 loop = LABEL();
4018 notfound = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, STR_END, 0);
4019
4020 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0);
4021 oc = req_char;
4022 if (caseless)
4023 {
4024 oc = TABLE_GET(req_char, common->fcc, req_char);
4025 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
4026 if (req_char > 127 && common->utf)
4027 oc = UCD_OTHERCASE(req_char);
4028 #endif
4029 }
4030 if (req_char == oc)
4031 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
4032 else
4033 {
4034 bit = req_char ^ oc;
4035 if (is_powerof2(bit))
4036 {
4037 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);
4038 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit);
4039 }
4040 else
4041 {
4042 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
4043 foundoc = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, oc);
4044 }
4045 }
4046 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
4047 JUMPTO(SLJIT_JUMP, loop);
4048
4049 JUMPHERE(found);
4050 if (foundoc)
4051 JUMPHERE(foundoc);
4052 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr, TMP1, 0);
4053 JUMPHERE(alreadyfound);
4054 JUMPHERE(toolong);
4055 return notfound;
4056 }
4057
4058 static void do_revertframes(compiler_common *common)
4059 {
4060 DEFINE_COMPILER;
4061 struct sljit_jump *jump;
4062 struct sljit_label *mainloop;
4063
4064 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4065 OP1(SLJIT_MOV, TMP1, 0, STACK_TOP, 0);
4066 GET_LOCAL_BASE(TMP3, 0, 0);
4067
4068 /* Drop frames until we reach STACK_TOP. */
4069 mainloop = LABEL();
4070 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), 0);
4071 OP2(SLJIT_SUB | SLJIT_SET_S, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0);
4072 jump = JUMP(SLJIT_C_SIG_LESS_EQUAL);
4073
4074 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
4075 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw));
4076 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), SLJIT_MEM1(TMP1), 2 * sizeof(sljit_sw));
4077 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
4078 JUMPTO(SLJIT_JUMP, mainloop);
4079
4080 JUMPHERE(jump);
4081 jump = JUMP(SLJIT_C_SIG_LESS);
4082 /* End of dropping frames. */
4083 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4084
4085 JUMPHERE(jump);
4086 OP1(SLJIT_NEG, TMP2, 0, TMP2, 0);
4087 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
4088 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw));
4089 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_sw));
4090 JUMPTO(SLJIT_JUMP, mainloop);
4091 }
4092
4093 static void check_wordboundary(compiler_common *common)
4094 {
4095 DEFINE_COMPILER;
4096 struct sljit_jump *skipread;
4097 jump_list *skipread_list = NULL;
4098 #if !(defined COMPILE_PCRE8) || defined SUPPORT_UTF
4099 struct sljit_jump *jump;
4100 #endif
4101
4102 SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16);
4103
4104 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
4105 /* Get type of the previous char, and put it to LOCALS1. */
4106 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4107 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4108 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, SLJIT_IMM, 0);
4109 skipread = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
4110 skip_char_back(common);
4111 check_start_used_ptr(common);
4112 read_char(common);
4113
4114 /* Testing char type. */
4115 #ifdef SUPPORT_UCP
4116 if (common->use_ucp)
4117 {
4118 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
4119 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
4120 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4121 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
4122 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
4123 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
4124 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
4125 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
4126 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4127 JUMPHERE(jump);
4128 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP2, 0);
4129 }
4130 else
4131 #endif
4132 {
4133 #ifndef COMPILE_PCRE8
4134 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
4135 #elif defined SUPPORT_UTF
4136 /* Here LOCALS1 has already been zeroed. */
4137 jump = NULL;
4138 if (common->utf)
4139 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
4140 #endif /* COMPILE_PCRE8 */
4141 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);
4142 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */);
4143 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4144 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP1, 0);
4145 #ifndef COMPILE_PCRE8
4146 JUMPHERE(jump);
4147 #elif defined SUPPORT_UTF
4148 if (jump != NULL)
4149 JUMPHERE(jump);
4150 #endif /* COMPILE_PCRE8 */
4151 }
4152 JUMPHERE(skipread);
4153
4154 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
4155 check_str_end(common, &skipread_list);
4156 peek_char(common, READ_CHAR_MAX);
4157
4158 /* Testing char type. This is a code duplication. */
4159 #ifdef SUPPORT_UCP
4160 if (common->use_ucp)
4161 {
4162 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
4163 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
4164 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4165 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
4166 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
4167 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
4168 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
4169 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
4170 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4171 JUMPHERE(jump);
4172 }
4173 else
4174 #endif
4175 {
4176 #ifndef COMPILE_PCRE8
4177 /* TMP2 may be destroyed by peek_char. */
4178 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
4179 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
4180 #elif defined SUPPORT_UTF
4181 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
4182 jump = NULL;
4183 if (common->utf)
4184 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
4185 #endif
4186 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes);
4187 OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */);
4188 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
4189 #ifndef COMPILE_PCRE8
4190 JUMPHERE(jump);
4191 #elif defined SUPPORT_UTF
4192 if (jump != NULL)
4193 JUMPHERE(jump);
4194 #endif /* COMPILE_PCRE8 */
4195 }
4196 set_jumps(skipread_list, LABEL());
4197
4198 OP2(SLJIT_XOR | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
4199 sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
4200 }
4201
4202 static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
4203 {
4204 DEFINE_COMPILER;
4205 int ranges[MAX_RANGE_SIZE];
4206 pcre_uint8 bit, cbit, all;
4207 int i, byte, length = 0;
4208
4209 bit = bits[0] & 0x1;
4210 /* All bits will be zero or one (since bit is zero or one). */
4211 all = -bit;
4212
4213 for (i = 0; i < 256; )
4214 {
4215 byte = i >> 3;
4216 if ((i & 0x7) == 0 && bits[byte] == all)
4217 i += 8;
4218 else
4219 {
4220 cbit = (bits[byte] >> (i & 0x7)) & 0x1;
4221 if (cbit != bit)
4222 {
4223 if (length >= MAX_RANGE_SIZE)
4224 return FALSE;
4225 ranges[length] = i;
4226 length++;
4227 bit = cbit;
4228 all = -cbit;
4229 }
4230 i++;
4231 }
4232 }
4233
4234 if (((bit == 0) && nclass) || ((bit == 1) && !nclass))
4235 {
4236 if (length >= MAX_RANGE_SIZE)
4237 return FALSE;
4238 ranges[length] = 256;
4239 length++;
4240 }
4241
4242 if (length < 0 || length > 4)
4243 return FALSE;
4244
4245 bit = bits[0] & 0x1;
4246 if (invert) bit ^= 0x1;
4247
4248 /* No character is accepted. */
4249 if (length == 0 && bit == 0)
4250 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4251
4252 switch(length)
4253 {
4254 case 0:
4255 /* When bit != 0, all characters are accepted. */
4256 return TRUE;
4257
4258 case 1:
4259 add_jump(compiler, backtracks, CMP(bit == 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
4260 return TRUE;
4261
4262 case 2:
4263 if (ranges[0] + 1 != ranges[1])
4264 {
4265 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
4266 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
4267 }
4268 else
4269 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
4270 return TRUE;
4271
4272 case 3:
4273 if (bit != 0)
4274 {
4275 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
4276 if (ranges[0] + 1 != ranges[1])
4277 {
4278 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
4279 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
4280 }
4281 else
4282 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
4283 return TRUE;
4284 }
4285
4286 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[0]));
4287 if (ranges[1] + 1 != ranges[2])
4288 {
4289 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1]);
4290 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
4291 }
4292 else
4293 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1]));
4294 return TRUE;
4295
4296 case 4:
4297 if ((ranges[1] - ranges[0]) == (ranges[3] - ranges[2])
4298 && (ranges[0] | (ranges[2] - ranges[0])) == ranges[2]
4299 && is_powerof2(ranges[2] - ranges[0]))
4300 {
4301 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[0]);
4302 if (ranges[2] + 1 != ranges[3])
4303 {
4304 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);
4305 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
4306 }
4307 else
4308 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
4309 return TRUE;
4310 }
4311
4312 if (bit != 0)
4313 {
4314 i = 0;
4315 if (ranges[0] + 1 != ranges[1])
4316 {
4317 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
4318 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
4319 i = ranges[0];
4320 }
4321 else
4322 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
4323
4324 if (ranges[2] + 1 != ranges[3])
4325 {
4326 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - i);
4327 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
4328 }
4329 else
4330 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2] - i));
4331 return TRUE;
4332 }
4333
4334 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
4335 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[0]));
4336 if (ranges[1] + 1 != ranges[2])
4337 {
4338 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]);
4339 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
4340 }
4341 else
4342 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
4343 return TRUE;
4344
4345 default:
4346 SLJIT_ASSERT_STOP();
4347 return FALSE;
4348 }
4349 }
4350
4351 static void check_anynewline(compiler_common *common)
4352 {
4353 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
4354 DEFINE_COMPILER;
4355
4356 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4357
4358 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
4359 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
4360 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
4361 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
4362 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4363 #ifdef COMPILE_PCRE8
4364 if (common->utf)
4365 {
4366 #endif
4367 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4368 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
4369 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
4370 #ifdef COMPILE_PCRE8
4371 }
4372 #endif
4373 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
4374 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4375 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4376 }
4377
4378 static void check_hspace(compiler_common *common)
4379 {
4380 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
4381 DEFINE_COMPILER;
4382
4383 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4384
4385 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x09);
4386 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4387 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
4388 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4389 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xa0);
4390 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4391 #ifdef COMPILE_PCRE8
4392 if (common->utf)
4393 {
4394 #endif
4395 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4396 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x1680);
4397 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4398 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e);
4399 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4400 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000);
4401 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000);
4402 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4403 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000);
4404 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4405 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000);
4406 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4407 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000);
4408 #ifdef COMPILE_PCRE8
4409 }
4410 #endif
4411 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
4412 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4413
4414 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4415 }
4416
4417 static void check_vspace(compiler_common *common)
4418 {
4419 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
4420 DEFINE_COMPILER;
4421
4422 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4423
4424 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
4425 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
4426 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
4427 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
4428 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4429 #ifdef COMPILE_PCRE8
4430 if (common->utf)
4431 {
4432 #endif
4433 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4434 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
4435 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
4436 #ifdef COMPILE_PCRE8
4437 }
4438 #endif
4439 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
4440 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4441
4442 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4443 }
4444
4445 #define CHAR1 STR_END
4446 #define CHAR2 STACK_TOP
4447
4448 static void do_casefulcmp(compiler_common *common)
4449 {
4450 DEFINE_COMPILER;
4451 struct sljit_jump *jump;
4452 struct sljit_label *label;
4453
4454 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4455 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4456 OP1(SLJIT_MOV, TMP3, 0, CHAR1, 0);
4457 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, CHAR2, 0);
4458 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
4459 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4460
4461 label = LABEL();
4462 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
4463 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4464 jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
4465 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
4466 JUMPTO(SLJIT_C_NOT_ZERO, label);
4467
4468 JUMPHERE(jump);
4469 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4470 OP1(SLJIT_MOV, CHAR1, 0, TMP3, 0);
4471 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
4472 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4473 }
4474
4475 #define LCC_TABLE STACK_LIMIT
4476
4477 static void do_caselesscmp(compiler_common *common)
4478 {
4479 DEFINE_COMPILER;
4480 struct sljit_jump *jump;
4481 struct sljit_label *label;
4482
4483 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4484 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4485
4486 OP1(SLJIT_MOV, TMP3, 0, LCC_TABLE, 0);
4487 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, CHAR1, 0);
4488 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, CHAR2, 0);
4489 OP1(SLJIT_MOV, LCC_TABLE, 0, SLJIT_IMM, common->lcc);
4490 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
4491 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4492
4493 label = LABEL();
4494 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
4495 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4496 #ifndef COMPILE_PCRE8
4497 jump = CMP(SLJIT_C_GREATER, CHAR1, 0, SLJIT_IMM, 255);
4498 #endif
4499 OP1(SLJIT_MOV_UB, CHAR1, 0, SLJIT_MEM2(LCC_TABLE, CHAR1), 0);
4500 #ifndef COMPILE_PCRE8
4501 JUMPHERE(jump);
4502 jump = CMP(SLJIT_C_GREATER, CHAR2, 0, SLJIT_IMM, 255);
4503 #endif
4504 OP1(SLJIT_MOV_UB, CHAR2, 0, SLJIT_MEM2(LCC_TABLE, CHAR2), 0);
4505 #ifndef COMPILE_PCRE8
4506 JUMPHERE(jump);
4507 #endif
4508 jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
4509 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
4510 JUMPTO(SLJIT_C_NOT_ZERO, label);
4511
4512 JUMPHERE(jump);
4513 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4514 OP1(SLJIT_MOV, LCC_TABLE, 0, TMP3, 0);
4515 OP1(SLJIT_MOV, CHAR1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
4516 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
4517 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4518 }
4519
4520 #undef LCC_TABLE
4521 #undef CHAR1
4522 #undef CHAR2
4523
4524 #if defined SUPPORT_UTF && defined SUPPORT_UCP
4525
4526 static const pcre_uchar * SLJIT_CALL do_utf_caselesscmp(pcre_uchar *src1, jit_arguments *args, pcre_uchar *end1)
4527 {
4528 /* This function would be ineffective to do in JIT level. */
4529 pcre_uint32 c1, c2;
4530 const pcre_uchar *src2 = args->uchar_ptr;
4531 const pcre_uchar *end2 = args->end;
4532 const ucd_record *ur;
4533 const pcre_uint32 *pp;
4534
4535 while (src1 < end1)
4536 {
4537 if (src2 >= end2)
4538 return (pcre_uchar*)1;
4539 GETCHARINC(c1, src1);
4540 GETCHARINC(c2, src2);
4541 ur = GET_UCD(c2);
4542 if (c1 != c2 && c1 != c2 + ur->other_case)
4543 {
4544 pp = PRIV(ucd_caseless_sets) + ur->caseset;
4545 for (;;)
4546 {
4547 if (c1 < *pp) return NULL;
4548 if (c1 == *pp++) break;
4549 }
4550 }
4551 }
4552 return src2;
4553 }
4554
4555 #endif /* SUPPORT_UTF && SUPPORT_UCP */
4556
4557 static pcre_uchar *byte_sequence_compare(compiler_common *common, BOOL caseless, pcre_uchar *cc,
4558 compare_context* context, jump_list **backtracks)
4559 {
4560 DEFINE_COMPILER;
4561 unsigned int othercasebit = 0;
4562 pcre_uchar *othercasechar = NULL;
4563 #ifdef SUPPORT_UTF
4564 int utflength;
4565 #endif
4566
4567 if (caseless && char_has_othercase(common, cc))
4568 {
4569 othercasebit = char_get_othercase_bit(common, cc);
4570 SLJIT_ASSERT(othercasebit);
4571 /* Extracting bit difference info. */
4572 #if defined COMPILE_PCRE8
4573 othercasechar = cc + (othercasebit >> 8);
4574 othercasebit &= 0xff;
4575 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4576 /* Note that this code only handles characters in the BMP. If there
4577 ever are characters outside the BMP whose othercase differs in only one
4578 bit from itself (there currently are none), this code will need to be
4579 revised for COMPILE_PCRE32. */
4580 othercasechar = cc + (othercasebit >> 9);
4581 if ((othercasebit & 0x100) != 0)
4582 othercasebit = (othercasebit & 0xff) << 8;
4583 else
4584 othercasebit &= 0xff;
4585 #endif /* COMPILE_PCRE[8|16|32] */
4586 }
4587
4588 if (context->sourcereg == -1)
4589 {
4590 #if defined COMPILE_PCRE8
4591 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
4592 if (context->length >= 4)
4593 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4594 else if (context->length >= 2)
4595 OP1(SLJIT_MOV_UH, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4596 else
4597 #endif
4598 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4599 #elif defined COMPILE_PCRE16
4600 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
4601 if (context->length >= 4)
4602 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4603 else
4604 #endif
4605 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4606 #elif defined COMPILE_PCRE32
4607 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4608 #endif /* COMPILE_PCRE[8|16|32] */
4609 context->sourcereg = TMP2;
4610 }
4611
4612 #ifdef SUPPORT_UTF
4613 utflength = 1;
4614 if (common->utf && HAS_EXTRALEN(*cc))
4615 utflength += GET_EXTRALEN(*cc);
4616
4617 do
4618 {
4619 #endif
4620
4621 context->length -= IN_UCHARS(1);
4622 #if (defined SLJIT_UNALIGNED && SLJIT_UNALIGNED) && (defined COMPILE_PCRE8 || defined COMPILE_PCRE16)
4623
4624 /* Unaligned read is supported. */
4625 if (othercasebit != 0 && othercasechar == cc)
4626 {
4627 context->c.asuchars[context->ucharptr] = *cc | othercasebit;
4628 context->oc.asuchars[context->ucharptr] = othercasebit;
4629 }
4630 else
4631 {
4632 context->c.asuchars[context->ucharptr] = *cc;
4633 context->oc.asuchars[context->ucharptr] = 0;
4634 }
4635 context->ucharptr++;
4636
4637 #if defined COMPILE_PCRE8
4638 if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1))
4639 #else
4640 if (context->ucharptr >= 2 || context->length == 0)
4641 #endif
4642 {
4643 if (context->length >= 4)
4644 OP1(SLJIT_MOV_SI, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
4645 else if (context->length >= 2)
4646 OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
4647 #if defined COMPILE_PCRE8
4648 else if (context->length >= 1)
4649 OP1(SLJIT_MOV_UB, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
4650 #endif /* COMPILE_PCRE8 */
4651 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
4652
4653 switch(context->ucharptr)
4654 {
4655 case 4 / sizeof(pcre_uchar):
4656 if (context->oc.asint != 0)
4657 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint);
4658 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint));
4659 break;
4660
4661 case 2 / sizeof(pcre_uchar):
4662 if (context->oc.asushort != 0)
4663 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort);
4664 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort));
4665 break;
4666
4667 #ifdef COMPILE_PCRE8
4668 case 1:
4669 if (context->oc.asbyte != 0)
4670 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte);
4671 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte));
4672 break;
4673 #endif
4674
4675 default:
4676 SLJIT_ASSERT_STOP();
4677 break;
4678 }
4679 context->ucharptr = 0;
4680 }
4681
4682 #else
4683
4684 /* Unaligned read is unsupported or in 32 bit mode. */
4685 if (context->length >= 1)
4686 OP1(MOV_UCHAR, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
4687
4688 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
4689
4690 if (othercasebit != 0 && othercasechar == cc)
4691 {
4692 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit);
4693 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit));
4694 }
4695 else
4696 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc));
4697
4698 #endif
4699
4700 cc++;
4701 #ifdef SUPPORT_UTF
4702 utflength--;
4703 }
4704 while (utflength > 0);
4705 #endif
4706
4707 return cc;
4708 }
4709
4710 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
4711
4712 #define SET_TYPE_OFFSET(value) \
4713 if ((value) != typeoffset) \
4714 { \
4715 if ((value) < typeoffset) \
4716 OP2(SLJIT_ADD, typereg, 0, typereg, 0, SLJIT_IMM, typeoffset - (value)); \
4717 else \
4718 OP2(SLJIT_SUB, typereg, 0, typereg, 0, SLJIT_IMM, (value) - typeoffset); \
4719 } \
4720 typeoffset = (value);
4721
4722 #define SET_CHAR_OFFSET(value) \
4723 if ((value) != charoffset) \
4724 { \
4725 if ((value) < charoffset) \
4726 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(charoffset - (value))); \
4727 else \
4728 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)((value) - charoffset)); \
4729 } \
4730 charoffset = (value);
4731
4732 static void compile_xclass_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
4733 {
4734 DEFINE_COMPILER;
4735 jump_list *found = NULL;
4736 jump_list **list = (cc[0] & XCL_NOT) == 0 ? &found : backtracks;
4737 sljit_uw c, charoffset, max = 256, min = READ_CHAR_MAX;
4738 struct sljit_jump *jump = NULL;
4739 pcre_uchar *ccbegin;
4740 int compares, invertcmp, numberofcmps;
4741 #if defined SUPPORT_UTF && (defined COMPILE_PCRE8 || defined COMPILE_PCRE16)
4742 BOOL utf = common->utf;
4743 #endif
4744
4745 #ifdef SUPPORT_UCP
4746 BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE;
4747 BOOL charsaved = FALSE;
4748 int typereg = TMP1, scriptreg = TMP1;
4749 const pcre_uint32 *other_cases;
4750 sljit_uw typeoffset;
4751 #endif
4752
4753 /* Scanning the necessary info. */
4754 cc++;
4755 ccbegin = cc;
4756 compares = 0;
4757 if (cc[-1] & XCL_MAP)
4758 {
4759 min = 0;
4760 cc += 32 / sizeof(pcre_uchar);
4761 }
4762
4763 while (*cc != XCL_END)
4764 {
4765 compares++;
4766 if (*cc == XCL_SINGLE)
4767 {
4768 cc ++;
4769 GETCHARINCTEST(c, cc);
4770 if (c > max) max = c;
4771 if (c < min) min = c;
4772 #ifdef SUPPORT_UCP
4773 needschar = TRUE;
4774 #endif
4775 }
4776 else if (*cc == XCL_RANGE)
4777 {
4778 cc ++;
4779 GETCHARINCTEST(c, cc);
4780 if (c < min) min = c;
4781 GETCHARINCTEST(c, cc);
4782 if (c > max) max = c;
4783 #ifdef SUPPORT_UCP
4784 needschar = TRUE;
4785 #endif
4786 }
4787 #ifdef SUPPORT_UCP
4788 else
4789 {
4790 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
4791 cc++;
4792 if (*cc == PT_CLIST)
4793 {
4794 other_cases = PRIV(ucd_caseless_sets) + cc[1];
4795 while (*other_cases != NOTACHAR)
4796 {
4797 if (*other_cases > max) max = *other_cases;
4798 if (*other_cases < min) min = *other_cases;
4799 other_cases++;
4800 }
4801 }
4802 else
4803 {
4804 max = READ_CHAR_MAX;
4805 min = 0;
4806 }
4807
4808 switch(*cc)
4809 {
4810 case PT_ANY:
4811 break;
4812
4813 case PT_LAMP:
4814 case PT_GC:
4815 case PT_PC:
4816 case PT_ALNUM:
4817 needstype = TRUE;
4818 break;
4819
4820 case PT_SC:
4821 needsscript = TRUE;
4822 break;
4823
4824 case PT_SPACE:
4825 case PT_PXSPACE:
4826 case PT_WORD:
4827 case PT_PXGRAPH:
4828 case PT_PXPRINT:
4829 case PT_PXPUNCT:
4830 needstype = TRUE;
4831 needschar = TRUE;
4832 break;
4833
4834 case PT_CLIST:
4835 case PT_UCNC:
4836 needschar = TRUE;
4837 break;
4838
4839 default:
4840 SLJIT_ASSERT_STOP();
4841 break;
4842 }
4843 cc += 2;
4844 }
4845 #endif
4846 }
4847
4848 /* We are not necessary in utf mode even in 8 bit mode. */
4849 cc = ccbegin;
4850 detect_partial_match(common, backtracks);
4851 read_char_range(common, min, max, (cc[-1] & XCL_NOT) != 0);
4852
4853 if ((cc[-1] & XCL_HASPROP) == 0)
4854 {
4855 if ((cc[-1] & XCL_MAP) != 0)
4856 {
4857 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
4858 if (!check_class_ranges(common, (const pcre_uint8 *)cc, (((const pcre_uint8 *)cc)[31] & 0x80) != 0, TRUE, &found))
4859 {
4860 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
4861 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
4862 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
4863 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4864 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4865 add_jump(compiler, &found, JUMP(SLJIT_C_NOT_ZERO));
4866 }
4867
4868 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4869 JUMPHERE(jump);
4870
4871 cc += 32 / sizeof(pcre_uchar);
4872 }
4873 else
4874 {
4875 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, min);
4876 add_jump(compiler, (cc[-1] & XCL_NOT) == 0 ? backtracks : &found, CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, max - min));
4877 }
4878 }
4879 else if ((cc[-1] & XCL_MAP) != 0)
4880 {
4881 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
4882 #ifdef SUPPORT_UCP
4883 charsaved = TRUE;
4884 #endif
4885 if (!check_class_ranges(common, (const pcre_uint8 *)cc, FALSE, TRUE, list))
4886 {
4887 #ifdef COMPILE_PCRE8
4888 SLJIT_ASSERT(common->utf);
4889 #endif
4890 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
4891
4892 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
4893 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
4894 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
4895 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4896 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4897 add_jump(compiler, list, JUMP(SLJIT_C_NOT_ZERO));
4898
4899 JUMPHERE(jump);
4900 }
4901
4902 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
4903 cc += 32 / sizeof(pcre_uchar);
4904 }
4905
4906 #ifdef SUPPORT_UCP
4907 /* Simple register allocation. TMP1 is preferred if possible. */
4908 if (needstype || needsscript)
4909 {
4910 if (needschar && !charsaved)
4911 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
4912 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4913 if (needschar)
4914 {
4915 if (needstype)
4916 {
4917 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
4918 typereg = RETURN_ADDR;
4919 }
4920
4921 if (needsscript)
4922 scriptreg = TMP3;
4923 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
4924 }
4925 else if (needstype && needsscript)
4926 scriptreg = TMP3;
4927 /* In all other cases only one of them was specified, and that can goes to TMP1. */
4928
4929 if (needsscript)
4930 {
4931 if (scriptreg == TMP1)
4932 {
4933 OP1(SLJIT_MOV, scriptreg, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
4934 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM2(scriptreg, TMP2), 3);
4935 }
4936 else
4937 {
4938 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
4939 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
4940 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM1(TMP2), 0);
4941 }
4942 }
4943 }
4944 #endif
4945
4946 /* Generating code. */
4947 charoffset = 0;
4948 numberofcmps = 0;
4949 #ifdef SUPPORT_UCP
4950 typeoffset = 0;
4951 #endif
4952
4953 while (*cc != XCL_END)
4954 {
4955 compares--;
4956 invertcmp = (compares == 0 && list != backtracks);
4957 jump = NULL;
4958
4959 if (*cc == XCL_SINGLE)
4960 {
4961 cc ++;
4962 GETCHARINCTEST(c, cc);
4963
4964 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
4965 {
4966 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
4967 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_EQUAL);
4968 numberofcmps++;
4969 }
4970 else if (numberofcmps > 0)
4971 {
4972 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
4973 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4974 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4975 numberofcmps = 0;
4976 }
4977 else
4978 {
4979 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
4980 numberofcmps = 0;
4981 }
4982 }
4983 else if (*cc == XCL_RANGE)
4984 {
4985 cc ++;
4986 GETCHARINCTEST(c, cc);
4987 SET_CHAR_OFFSET(c);
4988 GETCHARINCTEST(c, cc);
4989
4990 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
4991 {
4992 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
4993 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_LESS_EQUAL);
4994 numberofcmps++;
4995 }
4996 else if (numberofcmps > 0)
4997 {
4998 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
4999 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
5000 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
5001 numberofcmps = 0;
5002 }
5003 else
5004 {
5005 jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5006 numberofcmps = 0;
5007 }
5008 }
5009 #ifdef SUPPORT_UCP
5010 else
5011 {
5012 if (*cc == XCL_NOTPROP)
5013 invertcmp ^= 0x1;
5014 cc++;
5015 switch(*cc)
5016 {
5017 case PT_ANY:
5018 if (list != backtracks)
5019 {
5020 if ((cc[-1] == XCL_NOTPROP && compares > 0) || (cc[-1] == XCL_PROP && compares == 0))
5021 continue;
5022 }
5023 else if (cc[-1] == XCL_NOTPROP)
5024 continue;
5025 jump = JUMP(SLJIT_JUMP);
5026 break;
5027
5028 case PT_LAMP:
5029 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - typeoffset);
5030 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
5031 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ll - typeoffset);
5032 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
5033 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lt - typeoffset);
5034 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
5035 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
5036 break;
5037
5038 case PT_GC:
5039 c = PRIV(ucp_typerange)[(int)cc[1] * 2];
5040 SET_TYPE_OFFSET(c);
5041 jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, PRIV(ucp_typerange)[(int)cc[1] * 2 + 1] - c);
5042 break;
5043
5044 case PT_PC:
5045 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, (int)cc[1] - typeoffset);
5046 break;
5047
5048 case PT_SC:
5049 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, scriptreg, 0, SLJIT_IMM, (int)cc[1]);
5050 break;
5051
5052 case PT_SPACE:
5053 case PT_PXSPACE:
5054 SET_CHAR_OFFSET(9);
5055 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd - 0x9);
5056 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
5057
5058 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x9);
5059 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
5060
5061 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x9);
5062 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
5063
5064 SET_TYPE_OFFSET(ucp_Zl);
5065 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl);
5066 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
5067 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
5068 break;
5069
5070 case PT_WORD:
5071 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_UNDERSCORE - charoffset));
5072 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
5073 /* Fall through. */
5074
5075 case PT_ALNUM:
5076 SET_TYPE_OFFSET(ucp_Ll);
5077 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
5078 OP_FLAGS((*cc == PT_ALNUM) ? SLJIT_MOV : SLJIT_OR, TMP2, 0, (*cc == PT_ALNUM) ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_LESS_EQUAL);
5079 SET_TYPE_OFFSET(ucp_Nd);
5080 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_No - ucp_Nd);
5081 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
5082 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
5083 break;
5084
5085 case PT_CLIST:
5086 other_cases = PRIV(ucd_caseless_sets) + cc[1];
5087
5088 /* At least three characters are required.
5089 Otherwise this case would be handled by the normal code path. */
5090 SLJIT_ASSERT(other_cases[0] != NOTACHAR && other_cases[1] != NOTACHAR && other_cases[2] != NOTACHAR);
5091 SLJIT_ASSERT(other_cases[0] < other_cases[1] && other_cases[1] < other_cases[2]);
5092
5093 /* Optimizing character pairs, if their difference is power of 2. */
5094 if (is_powerof2(other_cases[1] ^ other_cases[0]))
5095 {
5096 if (charoffset == 0)
5097 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
5098 else
5099 {
5100 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
5101 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
5102 }
5103 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[1]);
5104 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
5105 other_cases += 2;
5106 }
5107 else if (is_powerof2(other_cases[2] ^ other_cases[1]))
5108 {
5109 if (charoffset == 0)
5110 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[2] ^ other_cases[1]);
5111 else
5112 {
5113 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
5114 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
5115 }
5116 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[2]);
5117 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
5118
5119 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(other_cases[0] - charoffset));
5120 OP_FLAGS(SLJIT_OR | ((other_cases[3] == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
5121
5122 other_cases += 3;
5123 }
5124 else
5125 {
5126 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
5127 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
5128 }
5129
5130 while (*other_cases != NOTACHAR)
5131 {
5132 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
5133 OP_FLAGS(SLJIT_OR | ((*other_cases == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
5134 }
5135 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
5136 break;
5137
5138 case PT_UCNC:
5139 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_DOLLAR_SIGN - charoffset));
5140 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
5141 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_COMMERCIAL_AT - charoffset));
5142 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
5143 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_GRAVE_ACCENT - charoffset));
5144 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
5145
5146 SET_CHAR_OFFSET(0xa0);
5147 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(0xd7ff - charoffset));
5148 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
5149 SET_CHAR_OFFSET(0);
5150 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xe000 - 0);
5151 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_GREATER_EQUAL);
5152 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
5153 break;
5154
5155 case PT_PXGRAPH:
5156 /* C and Z groups are the farthest two groups. */
5157 SET_TYPE_OFFSET(ucp_Ll);
5158 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
5159 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_GREATER);
5160
5161 jump = CMP(SLJIT_C_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
5162
5163 /* In case of ucp_Cf, we overwrite the result. */
5164 SET_CHAR_OFFSET(0x2066);
5165 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
5166 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
5167
5168 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
5169 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
5170
5171 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x2066);
5172 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
5173
5174 JUMPHERE(jump);
5175 jump = CMP(SLJIT_C_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
5176 break;
5177
5178 case PT_PXPRINT:
5179 /* C and Z groups are the farthest two groups. */
5180 SET_TYPE_OFFSET(ucp_Ll);
5181 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
5182 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_GREATER);
5183
5184 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Ll);
5185 OP_FLAGS(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_C_NOT_EQUAL);
5186
5187 jump = CMP(SLJIT_C_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
5188
5189 /* In case of ucp_Cf, we overwrite the result. */
5190 SET_CHAR_OFFSET(0x2066);
5191 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
5192 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
5193
5194 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
5195 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
5196
5197 JUMPHERE(jump);
5198 jump = CMP(SLJIT_C_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
5199 break;
5200
5201 case PT_PXPUNCT:
5202 SET_TYPE_OFFSET(ucp_Sc);
5203 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Sc);
5204 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
5205
5206 SET_CHAR_OFFSET(0);
5207 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xff);
5208 OP_FLAGS(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
5209
5210 SET_TYPE_OFFSET(ucp_Pc);
5211 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ps - ucp_Pc);
5212 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
5213 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
5214 break;
5215 }
5216 cc += 2;
5217 }
5218 #endif
5219
5220 if (jump != NULL)
5221 add_jump(compiler, compares > 0 ? list : backtracks, jump);
5222 }
5223
5224 if (found != NULL)
5225 set_jumps(found, LABEL());
5226 }
5227
5228 #undef SET_TYPE_OFFSET
5229 #undef SET_CHAR_OFFSET
5230
5231 #endif
5232
5233 static pcre_uchar *compile_char1_matchingpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **backtracks)
5234 {
5235 DEFINE_COMPILER;
5236 int length;
5237 unsigned int c, oc, bit;
5238 compare_context context;
5239 struct sljit_jump *jump[4];
5240 jump_list *end_list;
5241 #ifdef SUPPORT_UTF
5242 struct sljit_label *label;
5243 #ifdef SUPPORT_UCP
5244 pcre_uchar propdata[5];
5245 #endif
5246 #endif /* SUPPORT_UTF */
5247
5248 switch(type)
5249 {
5250 case OP_SOD:
5251 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5252 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
5253 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
5254 return cc;
5255
5256 case OP_SOM:
5257 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5258 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
5259 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
5260 return cc;
5261
5262 case OP_NOT_WORD_BOUNDARY:
5263 case OP_WORD_BOUNDARY:
5264 add_jump(compiler, &common->wordboundary, JUMP(SLJIT_FAST_CALL));
5265 add_jump(compiler, backtracks, JUMP(type == OP_NOT_WORD_BOUNDARY ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
5266 return cc;
5267
5268 case OP_NOT_DIGIT:
5269 case OP_DIGIT:
5270 /* Digits are usually 0-9, so it is worth to optimize them. */
5271 detect_partial_match(common, backtracks);
5272 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5273 if (common->utf && is_char7_bitset((const pcre_uint8*)common->ctypes - cbit_length + cbit_digit, FALSE))
5274 read_char7_type(common, type == OP_NOT_DIGIT);
5275 else
5276 #endif
5277 read_char8_type(common, type == OP_NOT_DIGIT);
5278 /* Flip the starting bit in the negative case. */
5279 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit);
5280 add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
5281 return cc;
5282
5283 case OP_NOT_WHITESPACE:
5284 case OP_WHITESPACE:
5285 detect_partial_match(common, backtracks);
5286 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5287 if (common->utf && is_char7_bitset((const pcre_uint8*)common->ctypes - cbit_length + cbit_space, FALSE))
5288 read_char7_type(common, type == OP_NOT_WHITESPACE);
5289 else
5290 #endif
5291 read_char8_type(common, type == OP_NOT_WHITESPACE);
5292 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space);
5293 add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
5294 return cc;
5295
5296 case OP_NOT_WORDCHAR:
5297 case OP_WORDCHAR:
5298 detect_partial_match(common, backtracks);
5299 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5300 if (common->utf && is_char7_bitset((const pcre_uint8*)common->ctypes - cbit_length + cbit_word, FALSE))
5301 read_char7_type(common, type == OP_NOT_WORDCHAR);
5302 else
5303 #endif
5304 read_char8_type(common, type == OP_NOT_WORDCHAR);
5305 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word);
5306 add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
5307 return cc;
5308
5309 case OP_ANY:
5310 detect_partial_match(common, backtracks);
5311 read_char_range(common, common->nlmin, common->nlmax, TRUE);
5312 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
5313 {
5314 jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
5315 end_list = NULL;
5316 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
5317 add_jump(compiler, &end_list, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
5318 else
5319 check_str_end(common, &end_list);
5320
5321 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5322 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff));
5323 set_jumps(end_list, LABEL());
5324 JUMPHERE(jump[0]);
5325 }
5326 else
5327 check_newlinechar(common, common->nltype, backtracks, TRUE);
5328 return cc;
5329
5330 case OP_ALLANY:
5331 detect_partial_match(common, backtracks);
5332 #ifdef SUPPORT_UTF
5333 if (common->utf)
5334 {
5335 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5336 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5337 #if defined COMPILE_PCRE8 || defined COMPILE_PCRE16
5338 #if defined COMPILE_PCRE8
5339 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
5340 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
5341 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5342 #elif defined COMPILE_PCRE16
5343 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
5344 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
5345 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
5346 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
5347 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
5348 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5349 #endif
5350 JUMPHERE(jump[0]);
5351 #endif /* COMPILE_PCRE[8|16] */
5352 return cc;
5353 }
5354 #endif
5355 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5356 return cc;
5357
5358 case OP_ANYBYTE:
5359 detect_partial_match(common, backtracks);
5360 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5361 return cc;
5362
5363 #ifdef SUPPORT_UTF
5364 #ifdef SUPPORT_UCP
5365 case OP_NOTPROP:
5366 case OP_PROP:
5367 propdata[0] = XCL_HASPROP;
5368 propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP;
5369 propdata[2] = cc[0];
5370 propdata[3] = cc[1];
5371 propdata[4] = XCL_END;
5372 compile_xclass_matchingpath(common, propdata, backtracks);
5373 return cc + 2;
5374 #endif
5375 #endif
5376
5377 case OP_ANYNL:
5378 detect_partial_match(common, backtracks);
5379 read_char_range(common, common->bsr_nlmin, common->bsr_nlmax, FALSE);
5380 jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
5381 /* We don't need to handle soft partial matching case. */
5382 end_list = NULL;
5383 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
5384 add_jump(compiler, &end_list, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
5385 else
5386 check_str_end(common, &end_list);
5387 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5388 jump[1] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
5389 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5390 jump[2] = JUMP(SLJIT_JUMP);
5391 JUMPHERE(jump[0]);
5392 check_newlinechar(common, common->bsr_nltype, backtracks, FALSE);
5393 set_jumps(end_list, LABEL());
5394 JUMPHERE(jump[1]);
5395 JUMPHERE(jump[2]);
5396 return cc;
5397
5398 case OP_NOT_HSPACE:
5399 case OP_HSPACE:
5400 detect_partial_match(common, backtracks);
5401 read_char_range(common, 0x9, 0x3000, type == OP_NOT_HSPACE);
5402 add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));
5403 add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
5404 return cc;
5405
5406 case OP_NOT_VSPACE:
5407 case OP_VSPACE:
5408 detect_partial_match(common, backtracks);
5409 read_char_range(common, 0xa, 0x2029, type == OP_NOT_VSPACE);
5410 add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));
5411 add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
5412 return cc;
5413
5414 #ifdef SUPPORT_UCP
5415 case OP_EXTUNI:
5416 detect_partial_match(common, backtracks);
5417 read_char(common);
5418 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
5419 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
5420 /* Optimize register allocation: use a real register. */
5421 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STACK_TOP, 0);
5422 OP1(SLJIT_MOV_UB, STACK_TOP, 0, SLJIT_MEM2(TMP1, TMP2), 3);
5423
5424 label = LABEL();
5425 jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5426 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
5427 read_char(common);
5428 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
5429 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
5430 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM2(TMP1, TMP2), 3);
5431
5432 OP2(SLJIT_SHL, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2);
5433 OP1(SLJIT_MOV_UI, TMP1, 0, SLJIT_MEM1(STACK_TOP), (sljit_sw)PRIV(ucp_gbtable));
5434 OP1(SLJIT_MOV, STACK_TOP, 0, TMP2, 0);
5435 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
5436 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
5437 JUMPTO(SLJIT_C_NOT_ZERO, label);
5438
5439 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
5440 JUMPHERE(jump[0]);
5441 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
5442
5443 if (common->mode == JIT_PARTIAL_HARD_COMPILE)
5444 {
5445 jump[0] = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
5446 /* Since we successfully read a char above, partial matching must occure. */
5447 check_partial(common, TRUE);
5448 JUMPHERE(jump[0]);
5449 }
5450 return cc;
5451 #endif
5452
5453 case OP_EODN:
5454 /* Requires rather complex checks. */
5455 jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5456 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
5457 {
5458 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
5459 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5460 if (common->mode == JIT_COMPILE)
5461 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0));
5462 else
5463 {
5464 jump[1] = CMP(SLJIT_C_EQUAL, TMP2, 0, STR_END, 0);
5465 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
5466 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS);
5467 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
5468 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_NOT_EQUAL);
5469 add_jump(compiler, backtracks, JUMP(SLJIT_C_NOT_EQUAL));
5470 check_partial(common, TRUE);
5471 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5472 JUMPHERE(jump[1]);
5473 }
5474 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
5475 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
5476 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
5477 }
5478 else if (common->nltype == NLTYPE_FIXED)
5479 {
5480 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5481 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5482 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0));
5483 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
5484 }
5485 else
5486 {
5487 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5488 jump[1] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
5489 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
5490 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
5491 jump[2] = JUMP(SLJIT_C_GREATER);
5492 add_jump(compiler, backtracks, JUMP(SLJIT_C_LESS));
5493 /* Equal. */
5494 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
5495 jump[3] = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
5496 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5497
5498 JUMPHERE(jump[1]);
5499 if (common->nltype == NLTYPE_ANYCRLF)
5500 {
5501 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5502 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP2, 0, STR_END, 0));
5503 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
5504 }
5505 else
5506 {
5507 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, STR_PTR, 0);
5508 read_char_range(common, common->nlmin, common->nlmax, TRUE);
5509 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
5510 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
5511 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
5512 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
5513 }
5514 JUMPHERE(jump[2]);
5515 JUMPHERE(jump[3]);
5516 }
5517 JUMPHERE(jump[0]);
5518 check_partial(common, FALSE);
5519 return cc;
5520
5521 case OP_EOD:
5522 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0));
5523 check_partial(common, FALSE);
5524 return cc;
5525
5526 case OP_CIRC:
5527 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
5528 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
5529 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0));
5530 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
5531 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5532 return cc;
5533
5534 case OP_CIRCM:
5535 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
5536 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
5537 jump[1] = CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0);
5538 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
5539 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5540 jump[0] = JUMP(SLJIT_JUMP);
5541 JUMPHERE(jump[1]);
5542
5543 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
5544 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
5545 {
5546 OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
5547 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP2, 0, TMP1, 0));
5548 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
5549 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
5550 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
5551 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
5552 }
5553 else
5554 {
5555 skip_char_back(common);
5556 read_char_range(common, common->nlmin, common->nlmax, TRUE);
5557 check_newlinechar(common, common->nltype, backtracks, FALSE);
5558 }
5559 JUMPHERE(jump[0]);
5560 return cc;
5561
5562 case OP_DOLL:
5563 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
5564 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
5565 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5566
5567 if (!common->endonly)
5568 compile_char1_matchingpath(common, OP_EODN, cc, backtracks);
5569 else
5570 {
5571 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0));
5572 check_partial(common, FALSE);
5573 }
5574 return cc;
5575
5576 case OP_DOLLM:
5577 jump[1] = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
5578 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
5579 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
5580 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5581 check_partial(common, FALSE);
5582 jump[0] = JUMP(SLJIT_JUMP);
5583 JUMPHERE(jump[1]);
5584
5585 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
5586 {
5587 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
5588 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5589 if (common->mode == JIT_COMPILE)
5590 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, TMP2, 0, STR_END, 0));
5591 else
5592 {
5593 jump[1] = CMP(SLJIT_C_LESS_EQUAL, TMP2, 0, STR_END, 0);
5594 /* STR_PTR = STR_END - IN_UCHARS(1) */
5595 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
5596 check_partial(common, TRUE);
5597 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5598 JUMPHERE(jump[1]);
5599 }
5600
5601 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
5602 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
5603 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
5604 }
5605 else
5606 {
5607 peek_char(common, common->nlmax);
5608 check_newlinechar(common, common->nltype, backtracks, FALSE);
5609 }
5610 JUMPHERE(jump[0]);
5611 return cc;
5612
5613 case OP_CHAR:
5614 case OP_CHARI:
5615 length = 1;
5616 #ifdef SUPPORT_UTF
5617 if (common->utf && HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc);
5618 #endif
5619 if (common->mode == JIT_COMPILE && (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0))
5620 {
5621 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
5622 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
5623
5624 context.length = IN_UCHARS(length);
5625 context.sourcereg = -1;
5626 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
5627 context.ucharptr = 0;
5628 #endif
5629 return byte_sequence_compare(common, type == OP_CHARI, cc, &context, backtracks);
5630 }
5631
5632 detect_partial_match(common, backtracks);
5633 #ifdef SUPPORT_UTF
5634 if (common->utf)
5635 {
5636 GETCHAR(c, cc);
5637 }
5638 else
5639 #endif
5640 c = *cc;
5641
5642 if (type == OP_CHAR || !char_has_othercase(common, cc))
5643 {
5644 read_char_range(common, c, c, FALSE);
5645 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c));
5646 return cc + length;
5647 }
5648 oc = char_othercase(common, c);
5649 read_char_range(common, c < oc ? c : oc, c > oc ? c : oc, FALSE);
5650 bit = c ^ oc;
5651 if (is_powerof2(bit))
5652 {
5653 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
5654 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
5655 return cc + length;
5656 }
5657 jump[0] = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c);
5658 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, oc));
5659 JUMPHERE(jump[0]);
5660 return cc + length;
5661
5662 case OP_NOT:
5663 case OP_NOTI:
5664 detect_partial_match(common, backtracks);
5665 length = 1;
5666 #ifdef SUPPORT_UTF
5667 if (common->utf)
5668 {
5669 #ifdef COMPILE_PCRE8
5670 c = *cc;
5671 if (c < 128)
5672 {
5673 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5674 if (type == OP_NOT || !char_has_othercase(common, cc))
5675 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
5676 else
5677 {
5678 /* Since UTF8 code page is fixed, we know that c is in [a-z] or [A-Z] range. */
5679 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x20);
5680 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20));
5681 }
5682 /* Skip the variable-length character. */
5683 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5684 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
5685 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
5686 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5687 JUMPHERE(jump[0]);
5688 return cc + 1;
5689 }
5690 else
5691 #endif /* COMPILE_PCRE8 */
5692 {
5693 GETCHARLEN(c, cc, length);
5694 }
5695 }
5696 else
5697 #endif /* SUPPORT_UTF */
5698 c = *cc;
5699
5700 if (type == OP_NOT || !char_has_othercase(common, cc))
5701 {
5702 read_char_range(common, c, c, TRUE);
5703 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
5704 }
5705 else
5706 {
5707 oc = char_othercase(common, c);
5708 read_char_range(common, c < oc ? c : oc, c > oc ? c : oc, TRUE);
5709 bit = c ^ oc;
5710 if (is_powerof2(bit))
5711 {
5712 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
5713 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
5714 }
5715 else
5716 {
5717 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
5718 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, oc));
5719 }
5720 }
5721 return cc + length;
5722
5723 case OP_CLASS:
5724 case OP_NCLASS:
5725 detect_partial_match(common, backtracks);
5726
5727 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5728 bit = (common->utf && is_char7_bitset((const pcre_uint8 *)cc, type == OP_NCLASS)) ? 127 : 255;
5729 read_char_range(common, 0, bit, type == OP_NCLASS);
5730 #else
5731 read_char_range(common, 0, 255, type == OP_NCLASS);
5732 #endif
5733
5734 if (check_class_ranges(common, (const pcre_uint8 *)cc, type == OP_NCLASS, FALSE, backtracks))
5735 return cc + 32 / sizeof(pcre_uchar);
5736
5737 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5738 jump[0] = NULL;
5739 if (common->utf)
5740 {
5741 jump[0] = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, bit);
5742 if (type == OP_CLASS)
5743 {
5744 add_jump(compiler, backtracks, jump[0]);
5745 jump[0] = NULL;
5746 }
5747 }
5748 #elif !defined COMPILE_PCRE8
5749 jump[0] = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
5750 if (type == OP_CLASS)
5751 {
5752 add_jump(compiler, backtracks, jump[0]);
5753 jump[0] = NULL;
5754 }
5755 #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
5756
5757 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
5758 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
5759 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
5760 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
5761 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
5762 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
5763
5764 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
5765 if (jump[0] != NULL)
5766 JUMPHERE(jump[0]);
5767 #endif
5768
5769 return cc + 32 / sizeof(pcre_uchar);
5770
5771 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5772 case OP_XCLASS:
5773 compile_xclass_matchingpath(common, cc + LINK_SIZE, backtracks);
5774 return cc + GET(cc, 0) - 1;
5775 #endif
5776
5777 case OP_REVERSE:
5778 length = GET(cc, 0);
5779 if (length == 0)
5780 return cc + LINK_SIZE;
5781 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5782 #ifdef SUPPORT_UTF
5783 if (common->utf)
5784 {
5785 OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
5786 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, length);
5787 label = LABEL();
5788 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP3, 0));
5789 skip_char_back(common);
5790 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
5791 JUMPTO(SLJIT_C_NOT_ZERO, label);
5792 }
5793 else
5794 #endif
5795 {
5796 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
5797 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
5798 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, TMP1, 0));
5799 }
5800 check_start_used_ptr(common);
5801 return cc + LINK_SIZE;
5802 }
5803 SLJIT_ASSERT_STOP();
5804 return cc;
5805 }
5806
5807 static SLJIT_INLINE pcre_uchar *compile_charn_matchingpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, jump_list **backtracks)
5808 {
5809 /* This function consumes at least one input character. */
5810 /* To decrease the number of length checks, we try to concatenate the fixed length character sequences. */
5811 DEFINE_COMPILER;
5812 pcre_uchar *ccbegin = cc;
5813 compare_context context;
5814 int size;
5815
5816 context.length = 0;
5817 do
5818 {
5819 if (cc >= ccend)
5820 break;
5821
5822 if (*cc == OP_CHAR)
5823 {
5824 size = 1;
5825 #ifdef SUPPORT_UTF
5826 if (common->utf && HAS_EXTRALEN(cc[1]))
5827 size += GET_EXTRALEN(cc[1]);
5828 #endif
5829 }
5830 else if (*cc == OP_CHARI)
5831 {
5832 size = 1;
5833 #ifdef SUPPORT_UTF
5834 if (common->utf)
5835 {
5836 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
5837 size = 0;
5838 else if (HAS_EXTRALEN(cc[1]))
5839 size += GET_EXTRALEN(cc[1]);
5840 }
5841 else
5842 #endif
5843 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
5844 size = 0;
5845 }
5846 else
5847 size = 0;
5848
5849 cc += 1 + size;
5850 context.length += IN_UCHARS(size);
5851 }
5852 while (size > 0 && context.length <= 128);
5853
5854 cc = ccbegin;
5855 if (context.length > 0)
5856 {
5857 /* We have a fixed-length byte sequence. */
5858 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, context.length);
5859 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
5860
5861 context.sourcereg = -1;
5862 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
5863 context.ucharptr = 0;
5864 #endif
5865 do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, backtracks); while (context.length > 0);
5866 return cc;
5867 }
5868
5869 /* A non-fixed length character will be checked if length == 0. */
5870 return compile_char1_matchingpath(common, *cc, cc + 1, backtracks);
5871 }
5872
5873 /* Forward definitions. */
5874 static void compile_matchingpath(compiler_common *, pcre_uchar *, pcre_uchar *, backtrack_common *);
5875 static void compile_backtrackingpath(compiler_common *, struct backtrack_common *);
5876
5877 #define PUSH_BACKTRACK(size, ccstart, error) \
5878 do \
5879 { \
5880 backtrack = sljit_alloc_memory(compiler, (size)); \
5881 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
5882 return error; \
5883 memset(backtrack, 0, size); \
5884 backtrack->prev = parent->top; \
5885 backtrack->cc = (ccstart); \
5886 parent->top = backtrack; \
5887 } \
5888 while (0)
5889
5890 #define PUSH_BACKTRACK_NOVALUE(size, ccstart) \
5891 do \
5892 { \
5893 backtrack = sljit_alloc_memory(compiler, (size)); \
5894 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
5895 return; \
5896 memset(backtrack, 0, size); \
5897 backtrack->prev = parent->top; \
5898 backtrack->cc = (ccstart); \
5899 parent->top = backtrack; \
5900 } \
5901 while (0)
5902
5903 #define BACKTRACK_AS(type) ((type *)backtrack)
5904
5905 static void compile_dnref_search(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
5906 {
5907 /* The OVECTOR offset goes to TMP2. */
5908 DEFINE_COMPILER;
5909 int count = GET2(cc, 1 + IMM2_SIZE);
5910 pcre_uchar *slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
5911 unsigned int offset;
5912 jump_list *found = NULL;
5913
5914 SLJIT_ASSERT(*cc == OP_DNREF || *cc == OP_DNREFI);
5915
5916 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
5917
5918 count--;
5919 while (count-- > 0)
5920 {
5921 offset = GET2(slot, 0) << 1;
5922 GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
5923 add_jump(compiler, &found, CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0));
5924 slot += common->name_entry_size;
5925 }
5926
5927 offset = GET2(slot, 0) << 1;
5928 GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
5929 if (backtracks != NULL && !common->jscript_compat)
5930 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0));
5931
5932 set_jumps(found, LABEL());
5933 }
5934
5935 static void compile_ref_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks, BOOL withchecks, BOOL emptyfail)
5936 {
5937 DEFINE_COMPILER;
5938 BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
5939 int offset = 0;
5940 struct sljit_jump *jump = NULL;
5941 struct sljit_jump *partial;
5942 struct sljit_jump *nopartial;
5943
5944 if (ref)
5945 {
5946 offset = GET2(cc, 1) << 1;
5947 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
5948 /* OVECTOR(1) contains the "string begin - 1" constant. */
5949 if (withchecks && !common->jscript_compat)
5950 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
5951 }
5952 else
5953 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
5954
5955 #if defined SUPPORT_UTF && defined SUPPORT_UCP
5956 if (common->utf && *cc == OP_REFI)
5957 {
5958 SLJIT_ASSERT(TMP1 == SLJIT_R0 && STACK_TOP == SLJIT_R1 && TMP2 == SLJIT_R2);
5959 if (ref)
5960 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
5961 else
5962 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
5963
5964 if (withchecks)
5965 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, TMP2, 0);
5966
5967 /* Needed to save important temporary registers. */
5968 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STACK_TOP, 0);
5969 OP1(SLJIT_MOV, SLJIT_R1, 0, ARGUMENTS, 0);
5970 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, uchar_ptr), STR_PTR, 0);
5971 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_utf_caselesscmp));
5972 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
5973 if (common->mode == JIT_COMPILE)
5974 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1));
5975 else
5976 {
5977 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
5978 nopartial = CMP(SLJIT_C_NOT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
5979 check_partial(common, FALSE);
5980 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5981 JUMPHERE(nopartial);
5982 }
5983 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
5984 }
5985 else
5986 #endif /* SUPPORT_UTF && SUPPORT_UCP */
5987 {
5988 if (ref)
5989 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP1, 0);
5990 else
5991 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
5992
5993 if (withchecks)
5994 jump = JUMP(SLJIT_C_ZERO);
5995
5996 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
5997 partial = CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0);
5998 if (common->mode == JIT_COMPILE)
5999 add_jump(compiler, backtracks, partial);
6000
6001 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
6002 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
6003
6004 if (common->mode != JIT_COMPILE)
6005 {
6006 nopartial = JUMP(SLJIT_JUMP);
6007 JUMPHERE(partial);
6008 /* TMP2 -= STR_END - STR_PTR */
6009 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, STR_PTR, 0);
6010 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, STR_END, 0);
6011 partial = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0);
6012 OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
6013 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
6014 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
6015 JUMPHERE(partial);
6016 check_partial(common, FALSE);
6017 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
6018 JUMPHERE(nopartial);
6019 }
6020 }
6021
6022 if (jump != NULL)
6023 {
6024 if (emptyfail)
6025 add_jump(compiler, backtracks, jump);
6026 else
6027 JUMPHERE(jump);
6028 }
6029 }
6030
6031 static SLJIT_INLINE pcre_uchar *compile_ref_iterator_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6032 {
6033 DEFINE_COMPILER;
6034 BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
6035 backtrack_common *backtrack;
6036 pcre_uchar type;
6037 int offset = 0;
6038 struct sljit_label *label;
6039 struct sljit_jump *zerolength;
6040 struct sljit_jump *jump = NULL;
6041 pcre_uchar *ccbegin = cc;
6042 int min = 0, max = 0;
6043 BOOL minimize;
6044
6045 PUSH_BACKTRACK(sizeof(iterator_backtrack), cc, NULL);
6046
6047 if (ref)
6048 offset = GET2(cc, 1) << 1;
6049 else
6050 cc += IMM2_SIZE;
6051 type = cc[1 + IMM2_SIZE];
6052
6053 SLJIT_COMPILE_ASSERT((OP_CRSTAR & 0x1) == 0, crstar_opcode_must_be_even);
6054 minimize = (type & 0x1) != 0;
6055 switch(type)
6056 {
6057 case OP_CRSTAR:
6058 case OP_CRMINSTAR:
6059 min = 0;
6060 max = 0;
6061 cc += 1 + IMM2_SIZE + 1;
6062 break;
6063 case OP_CRPLUS:
6064 case OP_CRMINPLUS:
6065 min = 1;
6066 max = 0;
6067 cc += 1 + IMM2_SIZE + 1;
6068 break;
6069 case OP_CRQUERY:
6070 case OP_CRMINQUERY:
6071 min = 0;
6072 max = 1;
6073 cc += 1 + IMM2_SIZE + 1;
6074 break;
6075 case OP_CRRANGE:
6076 case OP_CRMINRANGE:
6077 min = GET2(cc, 1 + IMM2_SIZE + 1);
6078 max = GET2(cc, 1 + IMM2_SIZE + 1 + IMM2_SIZE);
6079 cc += 1 + IMM2_SIZE + 1 + 2 * IMM2_SIZE;
6080 break;
6081 default:
6082 SLJIT_ASSERT_STOP();
6083 break;
6084 }
6085
6086 if (!minimize)
6087 {
6088 if (min == 0)
6089 {
6090 allocate_stack(common, 2);
6091 if (ref)
6092 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
6093 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6094 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
6095 /* Temporary release of STR_PTR. */
6096 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
6097 /* Handles both invalid and empty cases. Since the minimum repeat,
6098 is zero the invalid case is basically the same as an empty case. */
6099 if (ref)
6100 zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
6101 else
6102 {
6103 compile_dnref_search(common, ccbegin, NULL);
6104 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
6105 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, TMP2, 0);
6106 zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
6107 }
6108 /* Restore if not zero length. */
6109 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
6110 }
6111 else
6112 {
6113 allocate_stack(common, 1);
6114 if (ref)
6115 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
6116 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6117 if (ref)
6118 {
6119 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
6120 zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
6121 }
6122 else
6123 {
6124 compile_dnref_search(common, ccbegin, &backtrack->topbacktracks);
6125 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
6126 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, TMP2, 0);
6127 zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
6128 }
6129 }
6130
6131 if (min > 1 || max > 1)
6132 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, SLJIT_IMM, 0);
6133
6134 label = LABEL();
6135 if (!ref)
6136 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1);
6137 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, FALSE, FALSE);
6138
6139 if (min > 1 || max > 1)
6140 {
6141 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0);
6142 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
6143 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, TMP1, 0);
6144 if (min > 1)
6145 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, label);
6146 if (max > 1)
6147 {
6148 jump = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max);
6149 allocate_stack(common, 1);
6150 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6151 JUMPTO(SLJIT_JUMP, label);
6152 JUMPHERE(jump);
6153 }
6154 }
6155
6156 if (max == 0)
6157 {
6158 /* Includes min > 1 case as well. */
6159 allocate_stack(common, 1);
6160 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6161 JUMPTO(SLJIT_JUMP, label);
6162 }
6163
6164 JUMPHERE(zerolength);
6165 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
6166
6167 count_match(common);
6168 return cc;
6169 }
6170
6171 allocate_stack(common, ref ? 2 : 3);
6172 if (ref)
6173 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
6174 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6175 if (type != OP_CRMINSTAR)
6176 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
6177
6178 if (min == 0)
6179 {
6180 /* Handles both invalid and empty cases. Since the minimum repeat,
6181 is zero the invalid case is basically the same as an empty case. */
6182 if (ref)
6183 zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
6184 else
6185 {
6186 compile_dnref_search(common, ccbegin, NULL);
6187 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
6188 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
6189 zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
6190 }
6191 /* Length is non-zero, we can match real repeats. */
6192 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6193 jump = JUMP(SLJIT_JUMP);
6194 }
6195 else
6196 {
6197 if (ref)
6198 {
6199 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
6200 zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
6201 }
6202 else
6203 {
6204 compile_dnref_search(common, ccbegin, &backtrack->topbacktracks);
6205 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
6206 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
6207 zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
6208 }
6209 }
6210
6211 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
6212 if (max > 0)
6213 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max));
6214
6215 if (!ref)
6216 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
6217 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, TRUE, TRUE);
6218 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6219
6220 if (min > 1)
6221 {
6222 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
6223 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
6224 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
6225 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, BACKTRACK_AS(iterator_backtrack)->matchingpath);
6226 }
6227 else if (max > 0)
6228 OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
6229
6230 if (jump != NULL)
6231 JUMPHERE(jump);
6232 JUMPHERE(zerolength);
6233
6234 count_match(common);
6235 return cc;
6236 }
6237
6238 static SLJIT_INLINE pcre_uchar *compile_recurse_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6239 {
6240 DEFINE_COMPILER;
6241 backtrack_common *backtrack;
6242 recurse_entry *entry = common->entries;
6243 recurse_entry *prev = NULL;
6244 sljit_sw start = GET(cc, 1);
6245 pcre_uchar *start_cc;
6246 BOOL needs_control_head;
6247
6248 PUSH_BACKTRACK(sizeof(recurse_backtrack), cc, NULL);
6249
6250 /* Inlining simple patterns. */
6251 if (get_framesize(common, common->start + start, NULL, TRUE, &needs_control_head) == no_stack)
6252 {
6253 start_cc = common->start + start;
6254 compile_matchingpath(common, next_opcode(common, start_cc), bracketend(start_cc) - (1 + LINK_SIZE), backtrack);
6255 BACKTRACK_AS(recurse_backtrack)->inlined_pattern = TRUE;
6256 return cc + 1 + LINK_SIZE;
6257 }
6258
6259 while (entry != NULL)
6260 {
6261 if (entry->start == start)
6262 break;
6263 prev = entry;
6264 entry = entry->next;
6265 }
6266
6267 if (entry == NULL)
6268 {
6269 entry = sljit_alloc_memory(compiler, sizeof(recurse_entry));
6270 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6271 return NULL;
6272 entry->next = NULL;
6273 entry->entry = NULL;
6274 entry->calls = NULL;
6275 entry->start = start;
6276
6277 if (prev != NULL)
6278 prev->next = entry;
6279 else
6280 common->entries = entry;
6281 }
6282
6283 if (common->has_set_som && common->mark_ptr != 0)
6284 {
6285 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
6286 allocate_stack(common, 2);
6287 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
6288 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
6289 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
6290 }
6291 else if (common->has_set_som || common->mark_ptr != 0)
6292 {
6293 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->has_set_som ? (int)(OVECTOR(0)) : common->mark_ptr);
6294 allocate_stack(common, 1);
6295 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
6296 }
6297
6298 if (entry->entry == NULL)
6299 add_jump(compiler, &entry->calls, JUMP(SLJIT_FAST_CALL));
6300 else
6301 JUMPTO(SLJIT_FAST_CALL, entry->entry);
6302 /* Leave if the match is failed. */
6303 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0));
6304 return cc + 1 + LINK_SIZE;
6305 }
6306
6307 static int SLJIT_CALL do_callout(struct jit_arguments* arguments, PUBL(callout_block) *callout_block, pcre_uchar **jit_ovector)
6308 {
6309 const pcre_uchar *begin = arguments->begin;
6310 int *offset_vector = arguments->offsets;
6311 int offset_count = arguments->offset_count;
6312 int i;
6313
6314 if (PUBL(callout) == NULL)
6315 return 0;
6316
6317 callout_block->version = 2;
6318 callout_block->callout_data = arguments->callout_data;
6319
6320 /* Offsets in subject. */
6321 callout_block->subject_length = arguments->end - arguments->begin;
6322 callout_block->start_match = (pcre_uchar*)callout_block->subject - arguments->begin;
6323 callout_block->current_position = (pcre_uchar*)callout_block->offset_vector - arguments->begin;
6324 #if defined COMPILE_PCRE8
6325 callout_block->subject = (PCRE_SPTR)begin;
6326 #elif defined COMPILE_PCRE16
6327 callout_block->subject = (PCRE_SPTR16)begin;
6328 #elif defined COMPILE_PCRE32
6329 callout_block->subject = (PCRE_SPTR32)begin;
6330 #endif
6331
6332 /* Convert and copy the JIT offset vector to the offset_vector array. */
6333 callout_block->capture_top = 0;
6334 callout_block->offset_vector = offset_vector;
6335 for (i = 2; i < offset_count; i += 2)
6336 {
6337 offset_vector[i] = jit_ovector[i] - begin;
6338 offset_vector[i + 1] = jit_ovector[i + 1] - begin;
6339 if (jit_ovector[i] >= begin)
6340 callout_block->capture_top = i;
6341 }
6342
6343 callout_block->capture_top = (callout_block->capture_top >> 1) + 1;
6344 if (offset_count > 0)
6345 offset_vector[0] = -1;
6346 if (offset_count > 1)
6347 offset_vector[1] = -1;
6348 return (*PUBL(callout))(callout_block);
6349 }
6350
6351 /* Aligning to 8 byte. */
6352 #define CALLOUT_ARG_SIZE \
6353 (((int)sizeof(PUBL(callout_block)) + 7) & ~7)
6354
6355 #define CALLOUT_ARG_OFFSET(arg) \
6356 (-CALLOUT_ARG_SIZE + SLJIT_OFFSETOF(PUBL(callout_block), arg))
6357
6358 static SLJIT_INLINE pcre_uchar *compile_callout_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6359 {
6360 DEFINE_COMPILER;
6361 backtrack_common *backtrack;
6362
6363 PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
6364
6365 allocate_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw));
6366
6367 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
6368 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6369 SLJIT_ASSERT(common->capture_last_ptr != 0);
6370 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_number), SLJIT_IMM, cc[1]);
6371 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_last), TMP2, 0);
6372
6373 /* These pointer sized fields temporarly stores internal variables. */
6374 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
6375 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(offset_vector), STR_PTR, 0);
6376 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(subject), TMP2, 0);
6377
6378 if (common->mark_ptr != 0)
6379 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr));
6380 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(pattern_position), SLJIT_IMM, GET(cc, 2));
6381 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(next_item_length), SLJIT_IMM, GET(cc, 2 + LINK_SIZE));
6382 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(mark), (common->mark_ptr != 0) ? TMP2 : SLJIT_IMM, 0);
6383
6384 /* Needed to save important temporary registers. */
6385 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STACK_TOP, 0);
6386 OP2(SLJIT_SUB, SLJIT_R1, 0, STACK_TOP, 0, SLJIT_IMM, CALLOUT_ARG_SIZE);
6387 GET_LOCAL_BASE(SLJIT_R2, 0, OVECTOR_START);
6388 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_callout));
6389 OP1(SLJIT_MOV_SI, SLJIT_RETURN_REG, 0, SLJIT_RETURN_REG, 0);
6390 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6391 free_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw));
6392
6393 /* Check return value. */
6394 OP2(SLJIT_SUB | SLJIT_SET_S, SLJIT_UNUSED, 0, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
6395 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_C_SIG_GREATER));
6396 if (common->forced_quit_label == NULL)
6397 add_jump(compiler, &common->forced_quit, JUMP(SLJIT_C_SIG_LESS));
6398 else
6399 JUMPTO(SLJIT_C_SIG_LESS, common->forced_quit_label);
6400 return cc + 2 + 2 * LINK_SIZE;
6401 }
6402
6403 #undef CALLOUT_ARG_SIZE
6404 #undef CALLOUT_ARG_OFFSET
6405
6406 static pcre_uchar *compile_assert_matchingpath(compiler_common *common, pcre_uchar *cc, assert_backtrack *backtrack, BOOL conditional)
6407 {
6408 DEFINE_COMPILER;
6409 int framesize;
6410 int extrasize;
6411 BOOL needs_control_head;
6412 int private_data_ptr;
6413 backtrack_common altbacktrack;
6414 pcre_uchar *ccbegin;
6415 pcre_uchar opcode;
6416 pcre_uchar bra = OP_BRA;
6417 jump_list *tmp = NULL;
6418 jump_list **target = (conditional) ? &backtrack->condfailed : &backtrack->common.topbacktracks;
6419 jump_list **found;
6420 /* Saving previous accept variables. */
6421 BOOL save_local_exit = common->local_exit;
6422 BOOL save_positive_assert = common->positive_assert;
6423 then_trap_backtrack *save_then_trap = common->then_trap;
6424 struct sljit_label *save_quit_label = common->quit_label;
6425 struct sljit_label *save_accept_label = common->accept_label;
6426 jump_list *save_quit = common->quit;
6427 jump_list *save_positive_assert_quit = common->positive_assert_quit;
6428 jump_list *save_accept = common->accept;
6429 struct sljit_jump *jump;
6430 struct sljit_jump *brajump = NULL;
6431
6432 /* Assert captures then. */
6433 common->then_trap = NULL;
6434
6435 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
6436 {
6437 SLJIT_ASSERT(!conditional);
6438 bra = *cc;
6439 cc++;
6440 }
6441 private_data_ptr = PRIVATE_DATA(cc);
6442 SLJIT_ASSERT(private_data_ptr != 0);
6443 framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
6444 backtrack->framesize = framesize;
6445 backtrack->private_data_ptr = private_data_ptr;
6446 opcode = *cc;
6447 SLJIT_ASSERT(opcode >= OP_ASSERT && opcode <= OP_ASSERTBACK_NOT);
6448 found = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) ? &tmp : target;
6449 ccbegin = cc;
6450 cc += GET(cc, 1);
6451
6452 if (bra == OP_BRAMINZERO)
6453 {
6454 /* This is a braminzero backtrack path. */
6455 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6456 free_stack(common, 1);
6457 brajump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
6458 }
6459
6460 if (framesize < 0)
6461 {
6462 extrasize = needs_control_head ? 2 : 1;
6463 if (framesize == no_frame)
6464 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
6465 allocate_stack(common, extrasize);
6466 if (needs_control_head)
6467 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
6468 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6469 if (needs_control_head)
6470 {
6471 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
6472 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
6473 }
6474 }
6475 else
6476 {
6477 extrasize = needs_control_head ? 3 : 2;
6478 allocate_stack(common, framesize + extrasize);
6479 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
6480 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw));
6481 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
6482 if (needs_control_head)
6483 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
6484 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6485 if (needs_control_head)
6486 {
6487 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
6488 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
6489 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
6490 }
6491 else
6492 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
6493 init_frame(common, ccbegin, NULL, framesize + extrasize - 1, extrasize, FALSE);
6494 }
6495
6496 memset(&altbacktrack, 0, sizeof(backtrack_common));
6497 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6498 {
6499 /* Negative assert is stronger than positive assert. */
6500 common->local_exit = TRUE;
6501 common->quit_label = NULL;
6502 common->quit = NULL;
6503 common->positive_assert = FALSE;
6504 }
6505 else
6506 common->positive_assert = TRUE;
6507 common->positive_assert_quit = NULL;
6508
6509 while (1)
6510 {
6511 common->accept_label = NULL;
6512 common->accept = NULL;
6513 altbacktrack.top = NULL;
6514 altbacktrack.topbacktracks = NULL;
6515
6516 if (*ccbegin == OP_ALT)
6517 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6518
6519 altbacktrack.cc = ccbegin;
6520 compile_matchingpath(common, ccbegin + 1 + LINK_SIZE, cc, &altbacktrack);
6521 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6522 {
6523 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6524 {
6525 common->local_exit = save_local_exit;
6526 common->quit_label = save_quit_label;
6527 common->quit = save_quit;
6528 }
6529 common->positive_assert = save_positive_assert;
6530 common->then_trap = save_then_trap;
6531 common->accept_label = save_accept_label;
6532 common->positive_assert_quit = save_positive_assert_quit;
6533 common->accept = save_accept;
6534 return NULL;
6535 }
6536 common->accept_label = LABEL();
6537 if (common->accept != NULL)
6538 set_jumps(common->accept, common->accept_label);
6539
6540 /* Reset stack. */
6541 if (framesize < 0)
6542 {
6543 if (framesize == no_frame)
6544 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
6545 else
6546 free_stack(common, extrasize);
6547 if (needs_control_head)
6548 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), 0);
6549 }
6550 else
6551 {
6552 if ((opcode != OP_ASSERT_NOT && opcode != OP_ASSERTBACK_NOT) || conditional)
6553 {
6554 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
6555 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
6556 if (needs_control_head)
6557 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), 0);
6558 }
6559 else
6560 {
6561 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
6562 if (needs_control_head)
6563 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), (framesize + 1) * sizeof(sljit_sw));
6564 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6565 }
6566 }
6567
6568 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6569 {
6570 /* We know that STR_PTR was stored on the top of the stack. */
6571 if (conditional)
6572 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), needs_control_head ? sizeof(sljit_sw) : 0);
6573 else if (bra == OP_BRAZERO)
6574 {
6575 if (framesize < 0)
6576 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 1) * sizeof(sljit_sw));
6577 else
6578 {
6579 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
6580 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (framesize + extrasize - 1) * sizeof(sljit_sw));
6581 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
6582 }
6583 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
6584 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6585 }
6586 else if (framesize >= 0)
6587 {
6588 /* For OP_BRA and OP_BRAMINZERO. */
6589 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
6590 }
6591 }
6592 add_jump(compiler, found, JUMP(SLJIT_JUMP));
6593
6594 compile_backtrackingpath(common, altbacktrack.top);
6595 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6596 {
6597 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6598 {
6599 common->local_exit = save_local_exit;
6600 common->quit_label = save_quit_label;
6601 common->quit = save_quit;
6602 }
6603 common->positive_assert = save_positive_assert;
6604 common->then_trap = save_then_trap;
6605 common->accept_label = save_accept_label;
6606 common->positive_assert_quit = save_positive_assert_quit;
6607 common->accept = save_accept;
6608 return NULL;
6609 }
6610 set_jumps(altbacktrack.topbacktracks, LABEL());
6611
6612 if (*cc != OP_ALT)
6613 break;
6614
6615 ccbegin = cc;
6616 cc += GET(cc, 1);
6617 }
6618
6619 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6620 {
6621 SLJIT_ASSERT(common->positive_assert_quit == NULL);
6622 /* Makes the check less complicated below. */
6623 common->positive_assert_quit = common->quit;
6624 }
6625
6626 /* None of them matched. */
6627 if (common->positive_assert_quit != NULL)
6628 {
6629 jump = JUMP(SLJIT_JUMP);
6630 set_jumps(common->positive_assert_quit, LABEL());
6631 SLJIT_ASSERT(framesize != no_stack);
6632 if (framesize < 0)
6633 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, extrasize * sizeof(sljit_sw));
6634 else
6635 {
6636 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
6637 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6638 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw));
6639 }
6640 JUMPHERE(jump);
6641 }
6642
6643 if (needs_control_head)
6644 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(1));
6645
6646 if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK)
6647 {
6648 /* Assert is failed. */
6649 if (conditional || bra == OP_BRAZERO)
6650 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6651
6652 if (framesize < 0)
6653 {
6654 /* The topmost item should be 0. */
6655 if (bra == OP_BRAZERO)
6656 {
6657 if (extrasize == 2)
6658 free_stack(common, 1);
6659 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6660 }
6661 else
6662 free_stack(common, extrasize);
6663 }
6664 else
6665 {
6666 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
6667 /* The topmost item should be 0. */
6668 if (bra == OP_BRAZERO)
6669 {
6670 free_stack(common, framesize + extrasize - 1);
6671 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6672 }
6673 else
6674 free_stack(common, framesize + extrasize);
6675 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
6676 }
6677 jump = JUMP(SLJIT_JUMP);
6678 if (bra != OP_BRAZERO)
6679 add_jump(compiler, target, jump);
6680
6681 /* Assert is successful. */
6682 set_jumps(tmp, LABEL());
6683 if (framesize < 0)
6684 {
6685 /* We know that STR_PTR was stored on the top of the stack. */
6686 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 1) * sizeof(sljit_sw));
6687 /* Keep the STR_PTR on the top of the stack. */
6688 if (bra == OP_BRAZERO)
6689 {
6690 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
6691 if (extrasize == 2)
6692 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6693 }
6694 else if (bra == OP_BRAMINZERO)
6695 {
6696 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
6697 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6698 }
6699 }
6700 else
6701 {
6702 if (bra == OP_BRA)
6703 {
6704 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
6705 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
6706 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 2) * sizeof(sljit_sw));
6707 }
6708 else
6709 {
6710 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
6711 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 2) * sizeof(sljit_sw));
6712 if (extrasize == 2)
6713 {
6714 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6715 if (bra == OP_BRAMINZERO)
6716 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6717 }
6718 else
6719 {
6720 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
6721 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), bra == OP_BRAZERO ? STR_PTR : SLJIT_IMM, 0);
6722 }
6723 }
6724 }
6725
6726 if (bra == OP_BRAZERO)
6727 {
6728 backtrack->matchingpath = LABEL();
6729 SET_LABEL(jump, backtrack->matchingpath);
6730 }
6731 else if (bra == OP_BRAMINZERO)
6732 {
6733 JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
6734 JUMPHERE(brajump);
6735 if (framesize >= 0)
6736 {
6737 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
6738 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6739 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
6740 }
6741 set_jumps(backtrack->common.topbacktracks, LABEL());
6742 }
6743 }
6744 else
6745 {
6746 /* AssertNot is successful. */
6747 if (framesize < 0)
6748 {
6749 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6750 if (bra != OP_BRA)
6751 {
6752 if (extrasize == 2)
6753 free_stack(common, 1);
6754 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6755 }
6756 else
6757 free_stack(common, extrasize);
6758 }
6759 else
6760 {
6761 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6762 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
6763 /* The topmost item should be 0. */
6764 if (bra != OP_BRA)
6765 {
6766 free_stack(common, framesize + extrasize - 1);
6767 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6768 }
6769 else
6770 free_stack(common, framesize + extrasize);