/[pcre]/code/trunk/pcre_jit_compile.c
ViewVC logotype

Contents of /code/trunk/pcre_jit_compile.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1591 - (show annotations)
Tue Aug 11 05:41:09 2015 UTC (4 years, 3 months ago) by zherczeg
File MIME type: text/plain
File size: 331580 byte(s)
Error occurred while calculating annotation data.
The JIT compiler should not check repeats after a {0,1} repeat byte code.
1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Written by Philip Hazel
9 Copyright (c) 1997-2013 University of Cambridge
10
11 The machine code generator part (this module) was written by Zoltan Herczeg
12 Copyright (c) 2010-2013
13
14 -----------------------------------------------------------------------------
15 Redistribution and use in source and binary forms, with or without
16 modification, are permitted provided that the following conditions are met:
17
18 * Redistributions of source code must retain the above copyright notice,
19 this list of conditions and the following disclaimer.
20
21 * Redistributions in binary form must reproduce the above copyright
22 notice, this list of conditions and the following disclaimer in the
23 documentation and/or other materials provided with the distribution.
24
25 * Neither the name of the University of Cambridge nor the names of its
26 contributors may be used to endorse or promote products derived from
27 this software without specific prior written permission.
28
29 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 POSSIBILITY OF SUCH DAMAGE.
40 -----------------------------------------------------------------------------
41 */
42
43 #ifdef HAVE_CONFIG_H
44 #include "config.h"
45 #endif
46
47 #include "pcre_internal.h"
48
49 #if defined SUPPORT_JIT
50
51 /* All-in-one: Since we use the JIT compiler only from here,
52 we just include it. This way we don't need to touch the build
53 system files. */
54
55 #define SLJIT_MALLOC(size, allocator_data) (PUBL(malloc))(size)
56 #define SLJIT_FREE(ptr, allocator_data) (PUBL(free))(ptr)
57 #define SLJIT_CONFIG_AUTO 1
58 #define SLJIT_CONFIG_STATIC 1
59 #define SLJIT_VERBOSE 0
60 #define SLJIT_DEBUG 0
61
62 #include "sljit/sljitLir.c"
63
64 #if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED
65 #error Unsupported architecture
66 #endif
67
68 /* Defines for debugging purposes. */
69
70 /* 1 - Use unoptimized capturing brackets.
71 2 - Enable capture_last_ptr (includes option 1). */
72 /* #define DEBUG_FORCE_UNOPTIMIZED_CBRAS 2 */
73
74 /* 1 - Always have a control head. */
75 /* #define DEBUG_FORCE_CONTROL_HEAD 1 */
76
77 /* Allocate memory for the regex stack on the real machine stack.
78 Fast, but limited size. */
79 #define MACHINE_STACK_SIZE 32768
80
81 /* Growth rate for stack allocated by the OS. Should be the multiply
82 of page size. */
83 #define STACK_GROWTH_RATE 8192
84
85 /* Enable to check that the allocation could destroy temporaries. */
86 #if defined SLJIT_DEBUG && SLJIT_DEBUG
87 #define DESTROY_REGISTERS 1
88 #endif
89
90 /*
91 Short summary about the backtracking mechanism empolyed by the jit code generator:
92
93 The code generator follows the recursive nature of the PERL compatible regular
94 expressions. The basic blocks of regular expressions are condition checkers
95 whose execute different commands depending on the result of the condition check.
96 The relationship between the operators can be horizontal (concatenation) and
97 vertical (sub-expression) (See struct backtrack_common for more details).
98
99 'ab' - 'a' and 'b' regexps are concatenated
100 'a+' - 'a' is the sub-expression of the '+' operator
101
102 The condition checkers are boolean (true/false) checkers. Machine code is generated
103 for the checker itself and for the actions depending on the result of the checker.
104 The 'true' case is called as the matching path (expected path), and the other is called as
105 the 'backtrack' path. Branch instructions are expesive for all CPUs, so we avoid taken
106 branches on the matching path.
107
108 Greedy star operator (*) :
109 Matching path: match happens.
110 Backtrack path: match failed.
111 Non-greedy star operator (*?) :
112 Matching path: no need to perform a match.
113 Backtrack path: match is required.
114
115 The following example shows how the code generated for a capturing bracket
116 with two alternatives. Let A, B, C, D are arbirary regular expressions, and
117 we have the following regular expression:
118
119 A(B|C)D
120
121 The generated code will be the following:
122
123 A matching path
124 '(' matching path (pushing arguments to the stack)
125 B matching path
126 ')' matching path (pushing arguments to the stack)
127 D matching path
128 return with successful match
129
130 D backtrack path
131 ')' backtrack path (If we arrived from "C" jump to the backtrack of "C")
132 B backtrack path
133 C expected path
134 jump to D matching path
135 C backtrack path
136 A backtrack path
137
138 Notice, that the order of backtrack code paths are the opposite of the fast
139 code paths. In this way the topmost value on the stack is always belong
140 to the current backtrack code path. The backtrack path must check
141 whether there is a next alternative. If so, it needs to jump back to
142 the matching path eventually. Otherwise it needs to clear out its own stack
143 frame and continue the execution on the backtrack code paths.
144 */
145
146 /*
147 Saved stack frames:
148
149 Atomic blocks and asserts require reloading the values of private data
150 when the backtrack mechanism performed. Because of OP_RECURSE, the data
151 are not necessarly known in compile time, thus we need a dynamic restore
152 mechanism.
153
154 The stack frames are stored in a chain list, and have the following format:
155 ([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]
156
157 Thus we can restore the private data to a particular point in the stack.
158 */
159
160 typedef struct jit_arguments {
161 /* Pointers first. */
162 struct sljit_stack *stack;
163 const pcre_uchar *str;
164 const pcre_uchar *begin;
165 const pcre_uchar *end;
166 int *offsets;
167 pcre_uchar *uchar_ptr;
168 pcre_uchar *mark_ptr;
169 void *callout_data;
170 /* Everything else after. */
171 pcre_uint32 limit_match;
172 int real_offset_count;
173 int offset_count;
174 pcre_uint8 notbol;
175 pcre_uint8 noteol;
176 pcre_uint8 notempty;
177 pcre_uint8 notempty_atstart;
178 } jit_arguments;
179
180 typedef struct executable_functions {
181 void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES];
182 void *read_only_data_heads[JIT_NUMBER_OF_COMPILE_MODES];
183 sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];
184 PUBL(jit_callback) callback;
185 void *userdata;
186 pcre_uint32 top_bracket;
187 pcre_uint32 limit_match;
188 } executable_functions;
189
190 typedef struct jump_list {
191 struct sljit_jump *jump;
192 struct jump_list *next;
193 } jump_list;
194
195 typedef struct stub_list {
196 struct sljit_jump *start;
197 struct sljit_label *quit;
198 struct stub_list *next;
199 } stub_list;
200
201 typedef struct label_addr_list {
202 struct sljit_label *label;
203 sljit_uw *update_addr;
204 struct label_addr_list *next;
205 } label_addr_list;
206
207 enum frame_types {
208 no_frame = -1,
209 no_stack = -2
210 };
211
212 enum control_types {
213 type_mark = 0,
214 type_then_trap = 1
215 };
216
217 typedef int (SLJIT_CALL *jit_function)(jit_arguments *args);
218
219 /* The following structure is the key data type for the recursive
220 code generator. It is allocated by compile_matchingpath, and contains
221 the arguments for compile_backtrackingpath. Must be the first member
222 of its descendants. */
223 typedef struct backtrack_common {
224 /* Concatenation stack. */
225 struct backtrack_common *prev;
226 jump_list *nextbacktracks;
227 /* Internal stack (for component operators). */
228 struct backtrack_common *top;
229 jump_list *topbacktracks;
230 /* Opcode pointer. */
231 pcre_uchar *cc;
232 } backtrack_common;
233
234 typedef struct assert_backtrack {
235 backtrack_common common;
236 jump_list *condfailed;
237 /* Less than 0 if a frame is not needed. */
238 int framesize;
239 /* Points to our private memory word on the stack. */
240 int private_data_ptr;
241 /* For iterators. */
242 struct sljit_label *matchingpath;
243 } assert_backtrack;
244
245 typedef struct bracket_backtrack {
246 backtrack_common common;
247 /* Where to coninue if an alternative is successfully matched. */
248 struct sljit_label *alternative_matchingpath;
249 /* For rmin and rmax iterators. */
250 struct sljit_label *recursive_matchingpath;
251 /* For greedy ? operator. */
252 struct sljit_label *zero_matchingpath;
253 /* Contains the branches of a failed condition. */
254 union {
255 /* Both for OP_COND, OP_SCOND. */
256 jump_list *condfailed;
257 assert_backtrack *assert;
258 /* For OP_ONCE. Less than 0 if not needed. */
259 int framesize;
260 } u;
261 /* Points to our private memory word on the stack. */
262 int private_data_ptr;
263 } bracket_backtrack;
264
265 typedef struct bracketpos_backtrack {
266 backtrack_common common;
267 /* Points to our private memory word on the stack. */
268 int private_data_ptr;
269 /* Reverting stack is needed. */
270 int framesize;
271 /* Allocated stack size. */
272 int stacksize;
273 } bracketpos_backtrack;
274
275 typedef struct braminzero_backtrack {
276 backtrack_common common;
277 struct sljit_label *matchingpath;
278 } braminzero_backtrack;
279
280 typedef struct iterator_backtrack {
281 backtrack_common common;
282 /* Next iteration. */
283 struct sljit_label *matchingpath;
284 } iterator_backtrack;
285
286 typedef struct recurse_entry {
287 struct recurse_entry *next;
288 /* Contains the function entry. */
289 struct sljit_label *entry;
290 /* Collects the calls until the function is not created. */
291 jump_list *calls;
292 /* Points to the starting opcode. */
293 sljit_sw start;
294 } recurse_entry;
295
296 typedef struct recurse_backtrack {
297 backtrack_common common;
298 BOOL inlined_pattern;
299 } recurse_backtrack;
300
301 #define OP_THEN_TRAP OP_TABLE_LENGTH
302
303 typedef struct then_trap_backtrack {
304 backtrack_common common;
305 /* If then_trap is not NULL, this structure contains the real
306 then_trap for the backtracking path. */
307 struct then_trap_backtrack *then_trap;
308 /* Points to the starting opcode. */
309 sljit_sw start;
310 /* Exit point for the then opcodes of this alternative. */
311 jump_list *quit;
312 /* Frame size of the current alternative. */
313 int framesize;
314 } then_trap_backtrack;
315
316 #define MAX_RANGE_SIZE 4
317
318 typedef struct compiler_common {
319 /* The sljit ceneric compiler. */
320 struct sljit_compiler *compiler;
321 /* First byte code. */
322 pcre_uchar *start;
323 /* Maps private data offset to each opcode. */
324 sljit_si *private_data_ptrs;
325 /* Chain list of read-only data ptrs. */
326 void *read_only_data_head;
327 /* Tells whether the capturing bracket is optimized. */
328 pcre_uint8 *optimized_cbracket;
329 /* Tells whether the starting offset is a target of then. */
330 pcre_uint8 *then_offsets;
331 /* Current position where a THEN must jump. */
332 then_trap_backtrack *then_trap;
333 /* Starting offset of private data for capturing brackets. */
334 int cbra_ptr;
335 /* Output vector starting point. Must be divisible by 2. */
336 int ovector_start;
337 /* Last known position of the requested byte. */
338 int req_char_ptr;
339 /* Head of the last recursion. */
340 int recursive_head_ptr;
341 /* First inspected character for partial matching. */
342 int start_used_ptr;
343 /* Starting pointer for partial soft matches. */
344 int hit_start;
345 /* End pointer of the first line. */
346 int first_line_end;
347 /* Points to the marked string. */
348 int mark_ptr;
349 /* Recursive control verb management chain. */
350 int control_head_ptr;
351 /* Points to the last matched capture block index. */
352 int capture_last_ptr;
353 /* Points to the starting position of the current match. */
354 int start_ptr;
355
356 /* Flipped and lower case tables. */
357 const pcre_uint8 *fcc;
358 sljit_sw lcc;
359 /* Mode can be PCRE_STUDY_JIT_COMPILE and others. */
360 int mode;
361 /* TRUE, when minlength is greater than 0. */
362 BOOL might_be_empty;
363 /* \K is found in the pattern. */
364 BOOL has_set_som;
365 /* (*SKIP:arg) is found in the pattern. */
366 BOOL has_skip_arg;
367 /* (*THEN) is found in the pattern. */
368 BOOL has_then;
369 /* Needs to know the start position anytime. */
370 BOOL needs_start_ptr;
371 /* Currently in recurse or negative assert. */
372 BOOL local_exit;
373 /* Currently in a positive assert. */
374 BOOL positive_assert;
375 /* Newline control. */
376 int nltype;
377 pcre_uint32 nlmax;
378 pcre_uint32 nlmin;
379 int newline;
380 int bsr_nltype;
381 pcre_uint32 bsr_nlmax;
382 pcre_uint32 bsr_nlmin;
383 /* Dollar endonly. */
384 int endonly;
385 /* Tables. */
386 sljit_sw ctypes;
387 /* Named capturing brackets. */
388 pcre_uchar *name_table;
389 sljit_sw name_count;
390 sljit_sw name_entry_size;
391
392 /* Labels and jump lists. */
393 struct sljit_label *partialmatchlabel;
394 struct sljit_label *quit_label;
395 struct sljit_label *forced_quit_label;
396 struct sljit_label *accept_label;
397 struct sljit_label *ff_newline_shortcut;
398 stub_list *stubs;
399 label_addr_list *label_addrs;
400 recurse_entry *entries;
401 recurse_entry *currententry;
402 jump_list *partialmatch;
403 jump_list *quit;
404 jump_list *positive_assert_quit;
405 jump_list *forced_quit;
406 jump_list *accept;
407 jump_list *calllimit;
408 jump_list *stackalloc;
409 jump_list *revertframes;
410 jump_list *wordboundary;
411 jump_list *anynewline;
412 jump_list *hspace;
413 jump_list *vspace;
414 jump_list *casefulcmp;
415 jump_list *caselesscmp;
416 jump_list *reset_match;
417 BOOL jscript_compat;
418 #ifdef SUPPORT_UTF
419 BOOL utf;
420 #ifdef SUPPORT_UCP
421 BOOL use_ucp;
422 #endif
423 #ifdef COMPILE_PCRE8
424 jump_list *utfreadchar;
425 jump_list *utfreadchar16;
426 jump_list *utfreadtype8;
427 #endif
428 #endif /* SUPPORT_UTF */
429 #ifdef SUPPORT_UCP
430 jump_list *getucd;
431 #endif
432 } compiler_common;
433
434 /* For byte_sequence_compare. */
435
436 typedef struct compare_context {
437 int length;
438 int sourcereg;
439 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
440 int ucharptr;
441 union {
442 sljit_si asint;
443 sljit_uh asushort;
444 #if defined COMPILE_PCRE8
445 sljit_ub asbyte;
446 sljit_ub asuchars[4];
447 #elif defined COMPILE_PCRE16
448 sljit_uh asuchars[2];
449 #elif defined COMPILE_PCRE32
450 sljit_ui asuchars[1];
451 #endif
452 } c;
453 union {
454 sljit_si asint;
455 sljit_uh asushort;
456 #if defined COMPILE_PCRE8
457 sljit_ub asbyte;
458 sljit_ub asuchars[4];
459 #elif defined COMPILE_PCRE16
460 sljit_uh asuchars[2];
461 #elif defined COMPILE_PCRE32
462 sljit_ui asuchars[1];
463 #endif
464 } oc;
465 #endif
466 } compare_context;
467
468 /* Undefine sljit macros. */
469 #undef CMP
470
471 /* Used for accessing the elements of the stack. */
472 #define STACK(i) ((-(i) - 1) * (int)sizeof(sljit_sw))
473
474 #define TMP1 SLJIT_R0
475 #define TMP2 SLJIT_R2
476 #define TMP3 SLJIT_R3
477 #define STR_PTR SLJIT_S0
478 #define STR_END SLJIT_S1
479 #define STACK_TOP SLJIT_R1
480 #define STACK_LIMIT SLJIT_S2
481 #define COUNT_MATCH SLJIT_S3
482 #define ARGUMENTS SLJIT_S4
483 #define RETURN_ADDR SLJIT_R4
484
485 /* Local space layout. */
486 /* These two locals can be used by the current opcode. */
487 #define LOCALS0 (0 * sizeof(sljit_sw))
488 #define LOCALS1 (1 * sizeof(sljit_sw))
489 /* Two local variables for possessive quantifiers (char1 cannot use them). */
490 #define POSSESSIVE0 (2 * sizeof(sljit_sw))
491 #define POSSESSIVE1 (3 * sizeof(sljit_sw))
492 /* Max limit of recursions. */
493 #define LIMIT_MATCH (4 * sizeof(sljit_sw))
494 /* The output vector is stored on the stack, and contains pointers
495 to characters. The vector data is divided into two groups: the first
496 group contains the start / end character pointers, and the second is
497 the start pointers when the end of the capturing group has not yet reached. */
498 #define OVECTOR_START (common->ovector_start)
499 #define OVECTOR(i) (OVECTOR_START + (i) * (sljit_sw)sizeof(sljit_sw))
500 #define OVECTOR_PRIV(i) (common->cbra_ptr + (i) * (sljit_sw)sizeof(sljit_sw))
501 #define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start])
502
503 #if defined COMPILE_PCRE8
504 #define MOV_UCHAR SLJIT_MOV_UB
505 #define MOVU_UCHAR SLJIT_MOVU_UB
506 #elif defined COMPILE_PCRE16
507 #define MOV_UCHAR SLJIT_MOV_UH
508 #define MOVU_UCHAR SLJIT_MOVU_UH
509 #elif defined COMPILE_PCRE32
510 #define MOV_UCHAR SLJIT_MOV_UI
511 #define MOVU_UCHAR SLJIT_MOVU_UI
512 #else
513 #error Unsupported compiling mode
514 #endif
515
516 /* Shortcuts. */
517 #define DEFINE_COMPILER \
518 struct sljit_compiler *compiler = common->compiler
519 #define OP1(op, dst, dstw, src, srcw) \
520 sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw))
521 #define OP2(op, dst, dstw, src1, src1w, src2, src2w) \
522 sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w))
523 #define LABEL() \
524 sljit_emit_label(compiler)
525 #define JUMP(type) \
526 sljit_emit_jump(compiler, (type))
527 #define JUMPTO(type, label) \
528 sljit_set_label(sljit_emit_jump(compiler, (type)), (label))
529 #define JUMPHERE(jump) \
530 sljit_set_label((jump), sljit_emit_label(compiler))
531 #define SET_LABEL(jump, label) \
532 sljit_set_label((jump), (label))
533 #define CMP(type, src1, src1w, src2, src2w) \
534 sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))
535 #define CMPTO(type, src1, src1w, src2, src2w, label) \
536 sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))
537 #define OP_FLAGS(op, dst, dstw, src, srcw, type) \
538 sljit_emit_op_flags(compiler, (op), (dst), (dstw), (src), (srcw), (type))
539 #define GET_LOCAL_BASE(dst, dstw, offset) \
540 sljit_get_local_base(compiler, (dst), (dstw), (offset))
541
542 #define READ_CHAR_MAX 0x7fffffff
543
544 static pcre_uchar *bracketend(pcre_uchar *cc)
545 {
546 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
547 do cc += GET(cc, 1); while (*cc == OP_ALT);
548 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
549 cc += 1 + LINK_SIZE;
550 return cc;
551 }
552
553 static int no_alternatives(pcre_uchar *cc)
554 {
555 int count = 0;
556 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
557 do
558 {
559 cc += GET(cc, 1);
560 count++;
561 }
562 while (*cc == OP_ALT);
563 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
564 return count;
565 }
566
567 static int ones_in_half_byte[16] = {
568 /* 0 */ 0, 1, 1, 2, /* 4 */ 1, 2, 2, 3,
569 /* 8 */ 1, 2, 2, 3, /* 12 */ 2, 3, 3, 4
570 };
571
572 /* Functions whose might need modification for all new supported opcodes:
573 next_opcode
574 check_opcode_types
575 set_private_data_ptrs
576 get_framesize
577 init_frame
578 get_private_data_copy_length
579 copy_private_data
580 compile_matchingpath
581 compile_backtrackingpath
582 */
583
584 static pcre_uchar *next_opcode(compiler_common *common, pcre_uchar *cc)
585 {
586 SLJIT_UNUSED_ARG(common);
587 switch(*cc)
588 {
589 case OP_SOD:
590 case OP_SOM:
591 case OP_SET_SOM:
592 case OP_NOT_WORD_BOUNDARY:
593 case OP_WORD_BOUNDARY:
594 case OP_NOT_DIGIT:
595 case OP_DIGIT:
596 case OP_NOT_WHITESPACE:
597 case OP_WHITESPACE:
598 case OP_NOT_WORDCHAR:
599 case OP_WORDCHAR:
600 case OP_ANY:
601 case OP_ALLANY:
602 case OP_NOTPROP:
603 case OP_PROP:
604 case OP_ANYNL:
605 case OP_NOT_HSPACE:
606 case OP_HSPACE:
607 case OP_NOT_VSPACE:
608 case OP_VSPACE:
609 case OP_EXTUNI:
610 case OP_EODN:
611 case OP_EOD:
612 case OP_CIRC:
613 case OP_CIRCM:
614 case OP_DOLL:
615 case OP_DOLLM:
616 case OP_CRSTAR:
617 case OP_CRMINSTAR:
618 case OP_CRPLUS:
619 case OP_CRMINPLUS:
620 case OP_CRQUERY:
621 case OP_CRMINQUERY:
622 case OP_CRRANGE:
623 case OP_CRMINRANGE:
624 case OP_CRPOSSTAR:
625 case OP_CRPOSPLUS:
626 case OP_CRPOSQUERY:
627 case OP_CRPOSRANGE:
628 case OP_CLASS:
629 case OP_NCLASS:
630 case OP_REF:
631 case OP_REFI:
632 case OP_DNREF:
633 case OP_DNREFI:
634 case OP_RECURSE:
635 case OP_CALLOUT:
636 case OP_ALT:
637 case OP_KET:
638 case OP_KETRMAX:
639 case OP_KETRMIN:
640 case OP_KETRPOS:
641 case OP_REVERSE:
642 case OP_ASSERT:
643 case OP_ASSERT_NOT:
644 case OP_ASSERTBACK:
645 case OP_ASSERTBACK_NOT:
646 case OP_ONCE:
647 case OP_ONCE_NC:
648 case OP_BRA:
649 case OP_BRAPOS:
650 case OP_CBRA:
651 case OP_CBRAPOS:
652 case OP_COND:
653 case OP_SBRA:
654 case OP_SBRAPOS:
655 case OP_SCBRA:
656 case OP_SCBRAPOS:
657 case OP_SCOND:
658 case OP_CREF:
659 case OP_DNCREF:
660 case OP_RREF:
661 case OP_DNRREF:
662 case OP_DEF:
663 case OP_BRAZERO:
664 case OP_BRAMINZERO:
665 case OP_BRAPOSZERO:
666 case OP_PRUNE:
667 case OP_SKIP:
668 case OP_THEN:
669 case OP_COMMIT:
670 case OP_FAIL:
671 case OP_ACCEPT:
672 case OP_ASSERT_ACCEPT:
673 case OP_CLOSE:
674 case OP_SKIPZERO:
675 return cc + PRIV(OP_lengths)[*cc];
676
677 case OP_CHAR:
678 case OP_CHARI:
679 case OP_NOT:
680 case OP_NOTI:
681 case OP_STAR:
682 case OP_MINSTAR:
683 case OP_PLUS:
684 case OP_MINPLUS:
685 case OP_QUERY:
686 case OP_MINQUERY:
687 case OP_UPTO:
688 case OP_MINUPTO:
689 case OP_EXACT:
690 case OP_POSSTAR:
691 case OP_POSPLUS:
692 case OP_POSQUERY:
693 case OP_POSUPTO:
694 case OP_STARI:
695 case OP_MINSTARI:
696 case OP_PLUSI:
697 case OP_MINPLUSI:
698 case OP_QUERYI:
699 case OP_MINQUERYI:
700 case OP_UPTOI:
701 case OP_MINUPTOI:
702 case OP_EXACTI:
703 case OP_POSSTARI:
704 case OP_POSPLUSI:
705 case OP_POSQUERYI:
706 case OP_POSUPTOI:
707 case OP_NOTSTAR:
708 case OP_NOTMINSTAR:
709 case OP_NOTPLUS:
710 case OP_NOTMINPLUS:
711 case OP_NOTQUERY:
712 case OP_NOTMINQUERY:
713 case OP_NOTUPTO:
714 case OP_NOTMINUPTO:
715 case OP_NOTEXACT:
716 case OP_NOTPOSSTAR:
717 case OP_NOTPOSPLUS:
718 case OP_NOTPOSQUERY:
719 case OP_NOTPOSUPTO:
720 case OP_NOTSTARI:
721 case OP_NOTMINSTARI:
722 case OP_NOTPLUSI:
723 case OP_NOTMINPLUSI:
724 case OP_NOTQUERYI:
725 case OP_NOTMINQUERYI:
726 case OP_NOTUPTOI:
727 case OP_NOTMINUPTOI:
728 case OP_NOTEXACTI:
729 case OP_NOTPOSSTARI:
730 case OP_NOTPOSPLUSI:
731 case OP_NOTPOSQUERYI:
732 case OP_NOTPOSUPTOI:
733 cc += PRIV(OP_lengths)[*cc];
734 #ifdef SUPPORT_UTF
735 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
736 #endif
737 return cc;
738
739 /* Special cases. */
740 case OP_TYPESTAR:
741 case OP_TYPEMINSTAR:
742 case OP_TYPEPLUS:
743 case OP_TYPEMINPLUS:
744 case OP_TYPEQUERY:
745 case OP_TYPEMINQUERY:
746 case OP_TYPEUPTO:
747 case OP_TYPEMINUPTO:
748 case OP_TYPEEXACT:
749 case OP_TYPEPOSSTAR:
750 case OP_TYPEPOSPLUS:
751 case OP_TYPEPOSQUERY:
752 case OP_TYPEPOSUPTO:
753 return cc + PRIV(OP_lengths)[*cc] - 1;
754
755 case OP_ANYBYTE:
756 #ifdef SUPPORT_UTF
757 if (common->utf) return NULL;
758 #endif
759 return cc + 1;
760
761 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
762 case OP_XCLASS:
763 return cc + GET(cc, 1);
764 #endif
765
766 case OP_MARK:
767 case OP_PRUNE_ARG:
768 case OP_SKIP_ARG:
769 case OP_THEN_ARG:
770 return cc + 1 + 2 + cc[1];
771
772 default:
773 /* All opcodes are supported now! */
774 SLJIT_ASSERT_STOP();
775 return NULL;
776 }
777 }
778
779 static BOOL check_opcode_types(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
780 {
781 int count;
782 pcre_uchar *slot;
783
784 /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
785 while (cc < ccend)
786 {
787 switch(*cc)
788 {
789 case OP_SET_SOM:
790 common->has_set_som = TRUE;
791 common->might_be_empty = TRUE;
792 cc += 1;
793 break;
794
795 case OP_REF:
796 case OP_REFI:
797 common->optimized_cbracket[GET2(cc, 1)] = 0;
798 cc += 1 + IMM2_SIZE;
799 break;
800
801 case OP_CBRAPOS:
802 case OP_SCBRAPOS:
803 common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] = 0;
804 cc += 1 + LINK_SIZE + IMM2_SIZE;
805 break;
806
807 case OP_COND:
808 case OP_SCOND:
809 /* Only AUTO_CALLOUT can insert this opcode. We do
810 not intend to support this case. */
811 if (cc[1 + LINK_SIZE] == OP_CALLOUT)
812 return FALSE;
813 cc += 1 + LINK_SIZE;
814 break;
815
816 case OP_CREF:
817 common->optimized_cbracket[GET2(cc, 1)] = 0;
818 cc += 1 + IMM2_SIZE;
819 break;
820
821 case OP_DNREF:
822 case OP_DNREFI:
823 case OP_DNCREF:
824 count = GET2(cc, 1 + IMM2_SIZE);
825 slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
826 while (count-- > 0)
827 {
828 common->optimized_cbracket[GET2(slot, 0)] = 0;
829 slot += common->name_entry_size;
830 }
831 cc += 1 + 2 * IMM2_SIZE;
832 break;
833
834 case OP_RECURSE:
835 /* Set its value only once. */
836 if (common->recursive_head_ptr == 0)
837 {
838 common->recursive_head_ptr = common->ovector_start;
839 common->ovector_start += sizeof(sljit_sw);
840 }
841 cc += 1 + LINK_SIZE;
842 break;
843
844 case OP_CALLOUT:
845 if (common->capture_last_ptr == 0)
846 {
847 common->capture_last_ptr = common->ovector_start;
848 common->ovector_start += sizeof(sljit_sw);
849 }
850 cc += 2 + 2 * LINK_SIZE;
851 break;
852
853 case OP_THEN_ARG:
854 common->has_then = TRUE;
855 common->control_head_ptr = 1;
856 /* Fall through. */
857
858 case OP_PRUNE_ARG:
859 common->needs_start_ptr = TRUE;
860 /* Fall through. */
861
862 case OP_MARK:
863 if (common->mark_ptr == 0)
864 {
865 common->mark_ptr = common->ovector_start;
866 common->ovector_start += sizeof(sljit_sw);
867 }
868 cc += 1 + 2 + cc[1];
869 break;
870
871 case OP_THEN:
872 common->has_then = TRUE;
873 common->control_head_ptr = 1;
874 /* Fall through. */
875
876 case OP_PRUNE:
877 case OP_SKIP:
878 common->needs_start_ptr = TRUE;
879 cc += 1;
880 break;
881
882 case OP_SKIP_ARG:
883 common->control_head_ptr = 1;
884 common->has_skip_arg = TRUE;
885 cc += 1 + 2 + cc[1];
886 break;
887
888 default:
889 cc = next_opcode(common, cc);
890 if (cc == NULL)
891 return FALSE;
892 break;
893 }
894 }
895 return TRUE;
896 }
897
898 static int get_class_iterator_size(pcre_uchar *cc)
899 {
900 switch(*cc)
901 {
902 case OP_CRSTAR:
903 case OP_CRPLUS:
904 return 2;
905
906 case OP_CRMINSTAR:
907 case OP_CRMINPLUS:
908 case OP_CRQUERY:
909 case OP_CRMINQUERY:
910 return 1;
911
912 case OP_CRRANGE:
913 case OP_CRMINRANGE:
914 if (GET2(cc, 1) == GET2(cc, 1 + IMM2_SIZE))
915 return 0;
916 return 2;
917
918 default:
919 return 0;
920 }
921 }
922
923 static BOOL detect_repeat(compiler_common *common, pcre_uchar *begin)
924 {
925 pcre_uchar *end = bracketend(begin);
926 pcre_uchar *next;
927 pcre_uchar *next_end;
928 pcre_uchar *max_end;
929 pcre_uchar type;
930 sljit_sw length = end - begin;
931 int min, max, i;
932
933 /* Detect fixed iterations first. */
934 if (end[-(1 + LINK_SIZE)] != OP_KET)
935 return FALSE;
936
937 /* Already detected repeat. */
938 if (common->private_data_ptrs[end - common->start - LINK_SIZE] != 0)
939 return TRUE;
940
941 next = end;
942 min = 1;
943 while (1)
944 {
945 if (*next != *begin)
946 break;
947 next_end = bracketend(next);
948 if (next_end - next != length || memcmp(begin, next, IN_UCHARS(length)) != 0)
949 break;
950 next = next_end;
951 min++;
952 }
953
954 if (min == 2)
955 return FALSE;
956
957 max = 0;
958 max_end = next;
959 if (*next == OP_BRAZERO || *next == OP_BRAMINZERO)
960 {
961 type = *next;
962 while (1)
963 {
964 if (next[0] != type || next[1] != OP_BRA || next[2 + LINK_SIZE] != *begin)
965 break;
966 next_end = bracketend(next + 2 + LINK_SIZE);
967 if (next_end - next != (length + 2 + LINK_SIZE) || memcmp(begin, next + 2 + LINK_SIZE, IN_UCHARS(length)) != 0)
968 break;
969 next = next_end;
970 max++;
971 }
972
973 if (next[0] == type && next[1] == *begin && max >= 1)
974 {
975 next_end = bracketend(next + 1);
976 if (next_end - next == (length + 1) && memcmp(begin, next + 1, IN_UCHARS(length)) == 0)
977 {
978 for (i = 0; i < max; i++, next_end += 1 + LINK_SIZE)
979 if (*next_end != OP_KET)
980 break;
981
982 if (i == max)
983 {
984 common->private_data_ptrs[max_end - common->start - LINK_SIZE] = next_end - max_end;
985 common->private_data_ptrs[max_end - common->start - LINK_SIZE + 1] = (type == OP_BRAZERO) ? OP_UPTO : OP_MINUPTO;
986 /* +2 the original and the last. */
987 common->private_data_ptrs[max_end - common->start - LINK_SIZE + 2] = max + 2;
988 if (min == 1)
989 return TRUE;
990 min--;
991 max_end -= (1 + LINK_SIZE) + GET(max_end, -LINK_SIZE);
992 }
993 }
994 }
995 }
996
997 if (min >= 3)
998 {
999 common->private_data_ptrs[end - common->start - LINK_SIZE] = max_end - end;
1000 common->private_data_ptrs[end - common->start - LINK_SIZE + 1] = OP_EXACT;
1001 common->private_data_ptrs[end - common->start - LINK_SIZE + 2] = min;
1002 return TRUE;
1003 }
1004
1005 return FALSE;
1006 }
1007
1008 #define CASE_ITERATOR_PRIVATE_DATA_1 \
1009 case OP_MINSTAR: \
1010 case OP_MINPLUS: \
1011 case OP_QUERY: \
1012 case OP_MINQUERY: \
1013 case OP_MINSTARI: \
1014 case OP_MINPLUSI: \
1015 case OP_QUERYI: \
1016 case OP_MINQUERYI: \
1017 case OP_NOTMINSTAR: \
1018 case OP_NOTMINPLUS: \
1019 case OP_NOTQUERY: \
1020 case OP_NOTMINQUERY: \
1021 case OP_NOTMINSTARI: \
1022 case OP_NOTMINPLUSI: \
1023 case OP_NOTQUERYI: \
1024 case OP_NOTMINQUERYI:
1025
1026 #define CASE_ITERATOR_PRIVATE_DATA_2A \
1027 case OP_STAR: \
1028 case OP_PLUS: \
1029 case OP_STARI: \
1030 case OP_PLUSI: \
1031 case OP_NOTSTAR: \
1032 case OP_NOTPLUS: \
1033 case OP_NOTSTARI: \
1034 case OP_NOTPLUSI:
1035
1036 #define CASE_ITERATOR_PRIVATE_DATA_2B \
1037 case OP_UPTO: \
1038 case OP_MINUPTO: \
1039 case OP_UPTOI: \
1040 case OP_MINUPTOI: \
1041 case OP_NOTUPTO: \
1042 case OP_NOTMINUPTO: \
1043 case OP_NOTUPTOI: \
1044 case OP_NOTMINUPTOI:
1045
1046 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_1 \
1047 case OP_TYPEMINSTAR: \
1048 case OP_TYPEMINPLUS: \
1049 case OP_TYPEQUERY: \
1050 case OP_TYPEMINQUERY:
1051
1052 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2A \
1053 case OP_TYPESTAR: \
1054 case OP_TYPEPLUS:
1055
1056 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2B \
1057 case OP_TYPEUPTO: \
1058 case OP_TYPEMINUPTO:
1059
1060 static void set_private_data_ptrs(compiler_common *common, int *private_data_start, pcre_uchar *ccend)
1061 {
1062 pcre_uchar *cc = common->start;
1063 pcre_uchar *alternative;
1064 pcre_uchar *end = NULL;
1065 int private_data_ptr = *private_data_start;
1066 int space, size, bracketlen;
1067 BOOL repeat_check = TRUE;
1068
1069 while (cc < ccend)
1070 {
1071 space = 0;
1072 size = 0;
1073 bracketlen = 0;
1074 if (private_data_ptr > SLJIT_MAX_LOCAL_SIZE)
1075 break;
1076
1077 if (repeat_check && (*cc == OP_ONCE || *cc == OP_ONCE_NC || *cc == OP_BRA || *cc == OP_CBRA || *cc == OP_COND))
1078 {
1079 if (detect_repeat(common, cc))
1080 {
1081 /* These brackets are converted to repeats, so no global
1082 based single character repeat is allowed. */
1083 if (cc >= end)
1084 end = bracketend(cc);
1085 }
1086 }
1087 repeat_check = TRUE;
1088
1089 switch(*cc)
1090 {
1091 case OP_KET:
1092 if (common->private_data_ptrs[cc + 1 - common->start] != 0)
1093 {
1094 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1095 private_data_ptr += sizeof(sljit_sw);
1096 cc += common->private_data_ptrs[cc + 1 - common->start];
1097 }
1098 cc += 1 + LINK_SIZE;
1099 break;
1100
1101 case OP_ASSERT:
1102 case OP_ASSERT_NOT:
1103 case OP_ASSERTBACK:
1104 case OP_ASSERTBACK_NOT:
1105 case OP_ONCE:
1106 case OP_ONCE_NC:
1107 case OP_BRAPOS:
1108 case OP_SBRA:
1109 case OP_SBRAPOS:
1110 case OP_SCOND:
1111 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1112 private_data_ptr += sizeof(sljit_sw);
1113 bracketlen = 1 + LINK_SIZE;
1114 break;
1115
1116 case OP_CBRAPOS:
1117 case OP_SCBRAPOS:
1118 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1119 private_data_ptr += sizeof(sljit_sw);
1120 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1121 break;
1122
1123 case OP_COND:
1124 /* Might be a hidden SCOND. */
1125 alternative = cc + GET(cc, 1);
1126 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1127 {
1128 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1129 private_data_ptr += sizeof(sljit_sw);
1130 }
1131 bracketlen = 1 + LINK_SIZE;
1132 break;
1133
1134 case OP_BRA:
1135 bracketlen = 1 + LINK_SIZE;
1136 break;
1137
1138 case OP_CBRA:
1139 case OP_SCBRA:
1140 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1141 break;
1142
1143 case OP_BRAZERO:
1144 case OP_BRAMINZERO:
1145 case OP_BRAPOSZERO:
1146 repeat_check = FALSE;
1147 size = 1;
1148 break;
1149
1150 CASE_ITERATOR_PRIVATE_DATA_1
1151 space = 1;
1152 size = -2;
1153 break;
1154
1155 CASE_ITERATOR_PRIVATE_DATA_2A
1156 space = 2;
1157 size = -2;
1158 break;
1159
1160 CASE_ITERATOR_PRIVATE_DATA_2B
1161 space = 2;
1162 size = -(2 + IMM2_SIZE);
1163 break;
1164
1165 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1166 space = 1;
1167 size = 1;
1168 break;
1169
1170 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1171 if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
1172 space = 2;
1173 size = 1;
1174 break;
1175
1176 case OP_TYPEUPTO:
1177 if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
1178 space = 2;
1179 size = 1 + IMM2_SIZE;
1180 break;
1181
1182 case OP_TYPEMINUPTO:
1183 space = 2;
1184 size = 1 + IMM2_SIZE;
1185 break;
1186
1187 case OP_CLASS:
1188 case OP_NCLASS:
1189 size += 1 + 32 / sizeof(pcre_uchar);
1190 space = get_class_iterator_size(cc + size);
1191 break;
1192
1193 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1194 case OP_XCLASS:
1195 size = GET(cc, 1);
1196 space = get_class_iterator_size(cc + size);
1197 break;
1198 #endif
1199
1200 default:
1201 cc = next_opcode(common, cc);
1202 SLJIT_ASSERT(cc != NULL);
1203 break;
1204 }
1205
1206 /* Character iterators, which are not inside a repeated bracket,
1207 gets a private slot instead of allocating it on the stack. */
1208 if (space > 0 && cc >= end)
1209 {
1210 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1211 private_data_ptr += sizeof(sljit_sw) * space;
1212 }
1213
1214 if (size != 0)
1215 {
1216 if (size < 0)
1217 {
1218 cc += -size;
1219 #ifdef SUPPORT_UTF
1220 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1221 #endif
1222 }
1223 else
1224 cc += size;
1225 }
1226
1227 if (bracketlen > 0)
1228 {
1229 if (cc >= end)
1230 {
1231 end = bracketend(cc);
1232 if (end[-1 - LINK_SIZE] == OP_KET)
1233 end = NULL;
1234 }
1235 cc += bracketlen;
1236 }
1237 }
1238 *private_data_start = private_data_ptr;
1239 }
1240
1241 /* Returns with a frame_types (always < 0) if no need for frame. */
1242 static int get_framesize(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL recursive, BOOL *needs_control_head)
1243 {
1244 int length = 0;
1245 int possessive = 0;
1246 BOOL stack_restore = FALSE;
1247 BOOL setsom_found = recursive;
1248 BOOL setmark_found = recursive;
1249 /* The last capture is a local variable even for recursions. */
1250 BOOL capture_last_found = FALSE;
1251
1252 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
1253 SLJIT_ASSERT(common->control_head_ptr != 0);
1254 *needs_control_head = TRUE;
1255 #else
1256 *needs_control_head = FALSE;
1257 #endif
1258
1259 if (ccend == NULL)
1260 {
1261 ccend = bracketend(cc) - (1 + LINK_SIZE);
1262 if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))
1263 {
1264 possessive = length = (common->capture_last_ptr != 0) ? 5 : 3;
1265 /* This is correct regardless of common->capture_last_ptr. */
1266 capture_last_found = TRUE;
1267 }
1268 cc = next_opcode(common, cc);
1269 }
1270
1271 SLJIT_ASSERT(cc != NULL);
1272 while (cc < ccend)
1273 switch(*cc)
1274 {
1275 case OP_SET_SOM:
1276 SLJIT_ASSERT(common->has_set_som);
1277 stack_restore = TRUE;
1278 if (!setsom_found)
1279 {
1280 length += 2;
1281 setsom_found = TRUE;
1282 }
1283 cc += 1;
1284 break;
1285
1286 case OP_MARK:
1287 case OP_PRUNE_ARG:
1288 case OP_THEN_ARG:
1289 SLJIT_ASSERT(common->mark_ptr != 0);
1290 stack_restore = TRUE;
1291 if (!setmark_found)
1292 {
1293 length += 2;
1294 setmark_found = TRUE;
1295 }
1296 if (common->control_head_ptr != 0)
1297 *needs_control_head = TRUE;
1298 cc += 1 + 2 + cc[1];
1299 break;
1300
1301 case OP_RECURSE:
1302 stack_restore = TRUE;
1303 if (common->has_set_som && !setsom_found)
1304 {
1305 length += 2;
1306 setsom_found = TRUE;
1307 }
1308 if (common->mark_ptr != 0 && !setmark_found)
1309 {
1310 length += 2;
1311 setmark_found = TRUE;
1312 }
1313 if (common->capture_last_ptr != 0 && !capture_last_found)
1314 {
1315 length += 2;
1316 capture_last_found = TRUE;
1317 }
1318 cc += 1 + LINK_SIZE;
1319 break;
1320
1321 case OP_CBRA:
1322 case OP_CBRAPOS:
1323 case OP_SCBRA:
1324 case OP_SCBRAPOS:
1325 stack_restore = TRUE;
1326 if (common->capture_last_ptr != 0 && !capture_last_found)
1327 {
1328 length += 2;
1329 capture_last_found = TRUE;
1330 }
1331 length += 3;
1332 cc += 1 + LINK_SIZE + IMM2_SIZE;
1333 break;
1334
1335 case OP_THEN:
1336 stack_restore = TRUE;
1337 if (common->control_head_ptr != 0)
1338 *needs_control_head = TRUE;
1339 cc ++;
1340 break;
1341
1342 default:
1343 stack_restore = TRUE;
1344 /* Fall through. */
1345
1346 case OP_NOT_WORD_BOUNDARY:
1347 case OP_WORD_BOUNDARY:
1348 case OP_NOT_DIGIT:
1349 case OP_DIGIT:
1350 case OP_NOT_WHITESPACE:
1351 case OP_WHITESPACE:
1352 case OP_NOT_WORDCHAR:
1353 case OP_WORDCHAR:
1354 case OP_ANY:
1355 case OP_ALLANY:
1356 case OP_ANYBYTE:
1357 case OP_NOTPROP:
1358 case OP_PROP:
1359 case OP_ANYNL:
1360 case OP_NOT_HSPACE:
1361 case OP_HSPACE:
1362 case OP_NOT_VSPACE:
1363 case OP_VSPACE:
1364 case OP_EXTUNI:
1365 case OP_EODN:
1366 case OP_EOD:
1367 case OP_CIRC:
1368 case OP_CIRCM:
1369 case OP_DOLL:
1370 case OP_DOLLM:
1371 case OP_CHAR:
1372 case OP_CHARI:
1373 case OP_NOT:
1374 case OP_NOTI:
1375
1376 case OP_EXACT:
1377 case OP_POSSTAR:
1378 case OP_POSPLUS:
1379 case OP_POSQUERY:
1380 case OP_POSUPTO:
1381
1382 case OP_EXACTI:
1383 case OP_POSSTARI:
1384 case OP_POSPLUSI:
1385 case OP_POSQUERYI:
1386 case OP_POSUPTOI:
1387
1388 case OP_NOTEXACT:
1389 case OP_NOTPOSSTAR:
1390 case OP_NOTPOSPLUS:
1391 case OP_NOTPOSQUERY:
1392 case OP_NOTPOSUPTO:
1393
1394 case OP_NOTEXACTI:
1395 case OP_NOTPOSSTARI:
1396 case OP_NOTPOSPLUSI:
1397 case OP_NOTPOSQUERYI:
1398 case OP_NOTPOSUPTOI:
1399
1400 case OP_TYPEEXACT:
1401 case OP_TYPEPOSSTAR:
1402 case OP_TYPEPOSPLUS:
1403 case OP_TYPEPOSQUERY:
1404 case OP_TYPEPOSUPTO:
1405
1406 case OP_CLASS:
1407 case OP_NCLASS:
1408 case OP_XCLASS:
1409
1410 cc = next_opcode(common, cc);
1411 SLJIT_ASSERT(cc != NULL);
1412 break;
1413 }
1414
1415 /* Possessive quantifiers can use a special case. */
1416 if (SLJIT_UNLIKELY(possessive == length))
1417 return stack_restore ? no_frame : no_stack;
1418
1419 if (length > 0)
1420 return length + 1;
1421 return stack_restore ? no_frame : no_stack;
1422 }
1423
1424 static void init_frame(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, int stackpos, int stacktop, BOOL recursive)
1425 {
1426 DEFINE_COMPILER;
1427 BOOL setsom_found = recursive;
1428 BOOL setmark_found = recursive;
1429 /* The last capture is a local variable even for recursions. */
1430 BOOL capture_last_found = FALSE;
1431 int offset;
1432
1433 /* >= 1 + shortest item size (2) */
1434 SLJIT_UNUSED_ARG(stacktop);
1435 SLJIT_ASSERT(stackpos >= stacktop + 2);
1436
1437 stackpos = STACK(stackpos);
1438 if (ccend == NULL)
1439 {
1440 ccend = bracketend(cc) - (1 + LINK_SIZE);
1441 if (recursive || (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS))
1442 cc = next_opcode(common, cc);
1443 }
1444
1445 SLJIT_ASSERT(cc != NULL);
1446 while (cc < ccend)
1447 switch(*cc)
1448 {
1449 case OP_SET_SOM:
1450 SLJIT_ASSERT(common->has_set_som);
1451 if (!setsom_found)
1452 {
1453 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
1454 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1455 stackpos += (int)sizeof(sljit_sw);
1456 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1457 stackpos += (int)sizeof(sljit_sw);
1458 setsom_found = TRUE;
1459 }
1460 cc += 1;
1461 break;
1462
1463 case OP_MARK:
1464 case OP_PRUNE_ARG:
1465 case OP_THEN_ARG:
1466 SLJIT_ASSERT(common->mark_ptr != 0);
1467 if (!setmark_found)
1468 {
1469 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
1470 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1471 stackpos += (int)sizeof(sljit_sw);
1472 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1473 stackpos += (int)sizeof(sljit_sw);
1474 setmark_found = TRUE;
1475 }
1476 cc += 1 + 2 + cc[1];
1477 break;
1478
1479 case OP_RECURSE:
1480 if (common->has_set_som && !setsom_found)
1481 {
1482 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
1483 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1484 stackpos += (int)sizeof(sljit_sw);
1485 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1486 stackpos += (int)sizeof(sljit_sw);
1487 setsom_found = TRUE;
1488 }
1489 if (common->mark_ptr != 0 && !setmark_found)
1490 {
1491 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
1492 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1493 stackpos += (int)sizeof(sljit_sw);
1494 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1495 stackpos += (int)sizeof(sljit_sw);
1496 setmark_found = TRUE;
1497 }
1498 if (common->capture_last_ptr != 0 && !capture_last_found)
1499 {
1500 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
1501 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1502 stackpos += (int)sizeof(sljit_sw);
1503 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1504 stackpos += (int)sizeof(sljit_sw);
1505 capture_last_found = TRUE;
1506 }
1507 cc += 1 + LINK_SIZE;
1508 break;
1509
1510 case OP_CBRA:
1511 case OP_CBRAPOS:
1512 case OP_SCBRA:
1513 case OP_SCBRAPOS:
1514 if (common->capture_last_ptr != 0 && !capture_last_found)
1515 {
1516 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
1517 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1518 stackpos += (int)sizeof(sljit_sw);
1519 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1520 stackpos += (int)sizeof(sljit_sw);
1521 capture_last_found = TRUE;
1522 }
1523 offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
1524 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));
1525 stackpos += (int)sizeof(sljit_sw);
1526 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
1527 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
1528 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1529 stackpos += (int)sizeof(sljit_sw);
1530 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
1531 stackpos += (int)sizeof(sljit_sw);
1532
1533 cc += 1 + LINK_SIZE + IMM2_SIZE;
1534 break;
1535
1536 default:
1537 cc = next_opcode(common, cc);
1538 SLJIT_ASSERT(cc != NULL);
1539 break;
1540 }
1541
1542 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, 0);
1543 SLJIT_ASSERT(stackpos == STACK(stacktop));
1544 }
1545
1546 static SLJIT_INLINE int get_private_data_copy_length(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL needs_control_head)
1547 {
1548 int private_data_length = needs_control_head ? 3 : 2;
1549 int size;
1550 pcre_uchar *alternative;
1551 /* Calculate the sum of the private machine words. */
1552 while (cc < ccend)
1553 {
1554 size = 0;
1555 switch(*cc)
1556 {
1557 case OP_KET:
1558 if (PRIVATE_DATA(cc) != 0)
1559 {
1560 private_data_length++;
1561 SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0);
1562 cc += PRIVATE_DATA(cc + 1);
1563 }
1564 cc += 1 + LINK_SIZE;
1565 break;
1566
1567 case OP_ASSERT:
1568 case OP_ASSERT_NOT:
1569 case OP_ASSERTBACK:
1570 case OP_ASSERTBACK_NOT:
1571 case OP_ONCE:
1572 case OP_ONCE_NC:
1573 case OP_BRAPOS:
1574 case OP_SBRA:
1575 case OP_SBRAPOS:
1576 case OP_SCOND:
1577 private_data_length++;
1578 SLJIT_ASSERT(PRIVATE_DATA(cc) != 0);
1579 cc += 1 + LINK_SIZE;
1580 break;
1581
1582 case OP_CBRA:
1583 case OP_SCBRA:
1584 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1585 private_data_length++;
1586 cc += 1 + LINK_SIZE + IMM2_SIZE;
1587 break;
1588
1589 case OP_CBRAPOS:
1590 case OP_SCBRAPOS:
1591 private_data_length += 2;
1592 cc += 1 + LINK_SIZE + IMM2_SIZE;
1593 break;
1594
1595 case OP_COND:
1596 /* Might be a hidden SCOND. */
1597 alternative = cc + GET(cc, 1);
1598 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1599 private_data_length++;
1600 cc += 1 + LINK_SIZE;
1601 break;
1602
1603 CASE_ITERATOR_PRIVATE_DATA_1
1604 if (PRIVATE_DATA(cc))
1605 private_data_length++;
1606 cc += 2;
1607 #ifdef SUPPORT_UTF
1608 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1609 #endif
1610 break;
1611
1612 CASE_ITERATOR_PRIVATE_DATA_2A
1613 if (PRIVATE_DATA(cc))
1614 private_data_length += 2;
1615 cc += 2;
1616 #ifdef SUPPORT_UTF
1617 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1618 #endif
1619 break;
1620
1621 CASE_ITERATOR_PRIVATE_DATA_2B
1622 if (PRIVATE_DATA(cc))
1623 private_data_length += 2;
1624 cc += 2 + IMM2_SIZE;
1625 #ifdef SUPPORT_UTF
1626 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1627 #endif
1628 break;
1629
1630 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1631 if (PRIVATE_DATA(cc))
1632 private_data_length++;
1633 cc += 1;
1634 break;
1635
1636 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1637 if (PRIVATE_DATA(cc))
1638 private_data_length += 2;
1639 cc += 1;
1640 break;
1641
1642 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1643 if (PRIVATE_DATA(cc))
1644 private_data_length += 2;
1645 cc += 1 + IMM2_SIZE;
1646 break;
1647
1648 case OP_CLASS:
1649 case OP_NCLASS:
1650 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1651 case OP_XCLASS:
1652 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1653 #else
1654 size = 1 + 32 / (int)sizeof(pcre_uchar);
1655 #endif
1656 if (PRIVATE_DATA(cc))
1657 private_data_length += get_class_iterator_size(cc + size);
1658 cc += size;
1659 break;
1660
1661 default:
1662 cc = next_opcode(common, cc);
1663 SLJIT_ASSERT(cc != NULL);
1664 break;
1665 }
1666 }
1667 SLJIT_ASSERT(cc == ccend);
1668 return private_data_length;
1669 }
1670
1671 static void copy_private_data(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend,
1672 BOOL save, int stackptr, int stacktop, BOOL needs_control_head)
1673 {
1674 DEFINE_COMPILER;
1675 int srcw[2];
1676 int count, size;
1677 BOOL tmp1next = TRUE;
1678 BOOL tmp1empty = TRUE;
1679 BOOL tmp2empty = TRUE;
1680 pcre_uchar *alternative;
1681 enum {
1682 start,
1683 loop,
1684 end
1685 } status;
1686
1687 status = save ? start : loop;
1688 stackptr = STACK(stackptr - 2);
1689 stacktop = STACK(stacktop - 1);
1690
1691 if (!save)
1692 {
1693 stackptr += (needs_control_head ? 2 : 1) * sizeof(sljit_sw);
1694 if (stackptr < stacktop)
1695 {
1696 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1697 stackptr += sizeof(sljit_sw);
1698 tmp1empty = FALSE;
1699 }
1700 if (stackptr < stacktop)
1701 {
1702 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1703 stackptr += sizeof(sljit_sw);
1704 tmp2empty = FALSE;
1705 }
1706 /* The tmp1next must be TRUE in either way. */
1707 }
1708
1709 do
1710 {
1711 count = 0;
1712 switch(status)
1713 {
1714 case start:
1715 SLJIT_ASSERT(save && common->recursive_head_ptr != 0);
1716 count = 1;
1717 srcw[0] = common->recursive_head_ptr;
1718 if (needs_control_head)
1719 {
1720 SLJIT_ASSERT(common->control_head_ptr != 0);
1721 count = 2;
1722 srcw[1] = common->control_head_ptr;
1723 }
1724 status = loop;
1725 break;
1726
1727 case loop:
1728 if (cc >= ccend)
1729 {
1730 status = end;
1731 break;
1732 }
1733
1734 switch(*cc)
1735 {
1736 case OP_KET:
1737 if (PRIVATE_DATA(cc) != 0)
1738 {
1739 count = 1;
1740 srcw[0] = PRIVATE_DATA(cc);
1741 SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0);
1742 cc += PRIVATE_DATA(cc + 1);
1743 }
1744 cc += 1 + LINK_SIZE;
1745 break;
1746
1747 case OP_ASSERT:
1748 case OP_ASSERT_NOT:
1749 case OP_ASSERTBACK:
1750 case OP_ASSERTBACK_NOT:
1751 case OP_ONCE:
1752 case OP_ONCE_NC:
1753 case OP_BRAPOS:
1754 case OP_SBRA:
1755 case OP_SBRAPOS:
1756 case OP_SCOND:
1757 count = 1;
1758 srcw[0] = PRIVATE_DATA(cc);
1759 SLJIT_ASSERT(srcw[0] != 0);
1760 cc += 1 + LINK_SIZE;
1761 break;
1762
1763 case OP_CBRA:
1764 case OP_SCBRA:
1765 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1766 {
1767 count = 1;
1768 srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1769 }
1770 cc += 1 + LINK_SIZE + IMM2_SIZE;
1771 break;
1772
1773 case OP_CBRAPOS:
1774 case OP_SCBRAPOS:
1775 count = 2;
1776 srcw[0] = PRIVATE_DATA(cc);
1777 srcw[1] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1778 SLJIT_ASSERT(srcw[0] != 0 && srcw[1] != 0);
1779 cc += 1 + LINK_SIZE + IMM2_SIZE;
1780 break;
1781
1782 case OP_COND:
1783 /* Might be a hidden SCOND. */
1784 alternative = cc + GET(cc, 1);
1785 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1786 {
1787 count = 1;
1788 srcw[0] = PRIVATE_DATA(cc);
1789 SLJIT_ASSERT(srcw[0] != 0);
1790 }
1791 cc += 1 + LINK_SIZE;
1792 break;
1793
1794 CASE_ITERATOR_PRIVATE_DATA_1
1795 if (PRIVATE_DATA(cc))
1796 {
1797 count = 1;
1798 srcw[0] = PRIVATE_DATA(cc);
1799 }
1800 cc += 2;
1801 #ifdef SUPPORT_UTF
1802 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1803 #endif
1804 break;
1805
1806 CASE_ITERATOR_PRIVATE_DATA_2A
1807 if (PRIVATE_DATA(cc))
1808 {
1809 count = 2;
1810 srcw[0] = PRIVATE_DATA(cc);
1811 srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
1812 }
1813 cc += 2;
1814 #ifdef SUPPORT_UTF
1815 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1816 #endif
1817 break;
1818
1819 CASE_ITERATOR_PRIVATE_DATA_2B
1820 if (PRIVATE_DATA(cc))
1821 {
1822 count = 2;
1823 srcw[0] = PRIVATE_DATA(cc);
1824 srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
1825 }
1826 cc += 2 + IMM2_SIZE;
1827 #ifdef SUPPORT_UTF
1828 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1829 #endif
1830 break;
1831
1832 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1833 if (PRIVATE_DATA(cc))
1834 {
1835 count = 1;
1836 srcw[0] = PRIVATE_DATA(cc);
1837 }
1838 cc += 1;
1839 break;
1840
1841 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1842 if (PRIVATE_DATA(cc))
1843 {
1844 count = 2;
1845 srcw[0] = PRIVATE_DATA(cc);
1846 srcw[1] = srcw[0] + sizeof(sljit_sw);
1847 }
1848 cc += 1;
1849 break;
1850
1851 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1852 if (PRIVATE_DATA(cc))
1853 {
1854 count = 2;
1855 srcw[0] = PRIVATE_DATA(cc);
1856 srcw[1] = srcw[0] + sizeof(sljit_sw);
1857 }
1858 cc += 1 + IMM2_SIZE;
1859 break;
1860
1861 case OP_CLASS:
1862 case OP_NCLASS:
1863 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1864 case OP_XCLASS:
1865 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1866 #else
1867 size = 1 + 32 / (int)sizeof(pcre_uchar);
1868 #endif
1869 if (PRIVATE_DATA(cc))
1870 switch(get_class_iterator_size(cc + size))
1871 {
1872 case 1:
1873 count = 1;
1874 srcw[0] = PRIVATE_DATA(cc);
1875 break;
1876
1877 case 2:
1878 count = 2;
1879 srcw[0] = PRIVATE_DATA(cc);
1880 srcw[1] = srcw[0] + sizeof(sljit_sw);
1881 break;
1882
1883 default:
1884 SLJIT_ASSERT_STOP();
1885 break;
1886 }
1887 cc += size;
1888 break;
1889
1890 default:
1891 cc = next_opcode(common, cc);
1892 SLJIT_ASSERT(cc != NULL);
1893 break;
1894 }
1895 break;
1896
1897 case end:
1898 SLJIT_ASSERT_STOP();
1899 break;
1900 }
1901
1902 while (count > 0)
1903 {
1904 count--;
1905 if (save)
1906 {
1907 if (tmp1next)
1908 {
1909 if (!tmp1empty)
1910 {
1911 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1912 stackptr += sizeof(sljit_sw);
1913 }
1914 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), srcw[count]);
1915 tmp1empty = FALSE;
1916 tmp1next = FALSE;
1917 }
1918 else
1919 {
1920 if (!tmp2empty)
1921 {
1922 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1923 stackptr += sizeof(sljit_sw);
1924 }
1925 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), srcw[count]);
1926 tmp2empty = FALSE;
1927 tmp1next = TRUE;
1928 }
1929 }
1930 else
1931 {
1932 if (tmp1next)
1933 {
1934 SLJIT_ASSERT(!tmp1empty);
1935 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), srcw[count], TMP1, 0);
1936 tmp1empty = stackptr >= stacktop;
1937 if (!tmp1empty)
1938 {
1939 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1940 stackptr += sizeof(sljit_sw);
1941 }
1942 tmp1next = FALSE;
1943 }
1944 else
1945 {
1946 SLJIT_ASSERT(!tmp2empty);
1947 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), srcw[count], TMP2, 0);
1948 tmp2empty = stackptr >= stacktop;
1949 if (!tmp2empty)
1950 {
1951 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1952 stackptr += sizeof(sljit_sw);
1953 }
1954 tmp1next = TRUE;
1955 }
1956 }
1957 }
1958 }
1959 while (status != end);
1960
1961 if (save)
1962 {
1963 if (tmp1next)
1964 {
1965 if (!tmp1empty)
1966 {
1967 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1968 stackptr += sizeof(sljit_sw);
1969 }
1970 if (!tmp2empty)
1971 {
1972 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1973 stackptr += sizeof(sljit_sw);
1974 }
1975 }
1976 else
1977 {
1978 if (!tmp2empty)
1979 {
1980 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1981 stackptr += sizeof(sljit_sw);
1982 }
1983 if (!tmp1empty)
1984 {
1985 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1986 stackptr += sizeof(sljit_sw);
1987 }
1988 }
1989 }
1990 SLJIT_ASSERT(cc == ccend && stackptr == stacktop && (save || (tmp1empty && tmp2empty)));
1991 }
1992
1993 static SLJIT_INLINE pcre_uchar *set_then_offsets(compiler_common *common, pcre_uchar *cc, pcre_uint8 *current_offset)
1994 {
1995 pcre_uchar *end = bracketend(cc);
1996 BOOL has_alternatives = cc[GET(cc, 1)] == OP_ALT;
1997
1998 /* Assert captures then. */
1999 if (*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT)
2000 current_offset = NULL;
2001 /* Conditional block does not. */
2002 if (*cc == OP_COND || *cc == OP_SCOND)
2003 has_alternatives = FALSE;
2004
2005 cc = next_opcode(common, cc);
2006 if (has_alternatives)
2007 current_offset = common->then_offsets + (cc - common->start);
2008
2009 while (cc < end)
2010 {
2011 if ((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND))
2012 cc = set_then_offsets(common, cc, current_offset);
2013 else
2014 {
2015 if (*cc == OP_ALT && has_alternatives)
2016 current_offset = common->then_offsets + (cc + 1 + LINK_SIZE - common->start);
2017 if (*cc >= OP_THEN && *cc <= OP_THEN_ARG && current_offset != NULL)
2018 *current_offset = 1;
2019 cc = next_opcode(common, cc);
2020 }
2021 }
2022
2023 return end;
2024 }
2025
2026 #undef CASE_ITERATOR_PRIVATE_DATA_1
2027 #undef CASE_ITERATOR_PRIVATE_DATA_2A
2028 #undef CASE_ITERATOR_PRIVATE_DATA_2B
2029 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_1
2030 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
2031 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
2032
2033 static SLJIT_INLINE BOOL is_powerof2(unsigned int value)
2034 {
2035 return (value & (value - 1)) == 0;
2036 }
2037
2038 static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label)
2039 {
2040 while (list)
2041 {
2042 /* sljit_set_label is clever enough to do nothing
2043 if either the jump or the label is NULL. */
2044 SET_LABEL(list->jump, label);
2045 list = list->next;
2046 }
2047 }
2048
2049 static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump *jump)
2050 {
2051 jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list));
2052 if (list_item)
2053 {
2054 list_item->next = *list;
2055 list_item->jump = jump;
2056 *list = list_item;
2057 }
2058 }
2059
2060 static void add_stub(compiler_common *common, struct sljit_jump *start)
2061 {
2062 DEFINE_COMPILER;
2063 stub_list *list_item = sljit_alloc_memory(compiler, sizeof(stub_list));
2064
2065 if (list_item)
2066 {
2067 list_item->start = start;
2068 list_item->quit = LABEL();
2069 list_item->next = common->stubs;
2070 common->stubs = list_item;
2071 }
2072 }
2073
2074 static void flush_stubs(compiler_common *common)
2075 {
2076 DEFINE_COMPILER;
2077 stub_list *list_item = common->stubs;
2078
2079 while (list_item)
2080 {
2081 JUMPHERE(list_item->start);
2082 add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));
2083 JUMPTO(SLJIT_JUMP, list_item->quit);
2084 list_item = list_item->next;
2085 }
2086 common->stubs = NULL;
2087 }
2088
2089 static void add_label_addr(compiler_common *common, sljit_uw *update_addr)
2090 {
2091 DEFINE_COMPILER;
2092 label_addr_list *label_addr;
2093
2094 label_addr = sljit_alloc_memory(compiler, sizeof(label_addr_list));
2095 if (label_addr == NULL)
2096 return;
2097 label_addr->label = LABEL();
2098 label_addr->update_addr = update_addr;
2099 label_addr->next = common->label_addrs;
2100 common->label_addrs = label_addr;
2101 }
2102
2103 static SLJIT_INLINE void count_match(compiler_common *common)
2104 {
2105 DEFINE_COMPILER;
2106
2107 OP2(SLJIT_SUB | SLJIT_SET_E, COUNT_MATCH, 0, COUNT_MATCH, 0, SLJIT_IMM, 1);
2108 add_jump(compiler, &common->calllimit, JUMP(SLJIT_ZERO));
2109 }
2110
2111 static SLJIT_INLINE void allocate_stack(compiler_common *common, int size)
2112 {
2113 /* May destroy all locals and registers except TMP2. */
2114 DEFINE_COMPILER;
2115
2116 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
2117 #ifdef DESTROY_REGISTERS
2118 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);
2119 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
2120 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
2121 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, TMP1, 0);
2122 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP1, 0);
2123 #endif
2124 add_stub(common, CMP(SLJIT_GREATER, STACK_TOP, 0, STACK_LIMIT, 0));
2125 }
2126
2127 static SLJIT_INLINE void free_stack(compiler_common *common, int size)
2128 {
2129 DEFINE_COMPILER;
2130 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
2131 }
2132
2133 static sljit_uw * allocate_read_only_data(compiler_common *common, sljit_uw size)
2134 {
2135 DEFINE_COMPILER;
2136 sljit_uw *result;
2137
2138 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
2139 return NULL;
2140
2141 result = (sljit_uw *)SLJIT_MALLOC(size + sizeof(sljit_uw), compiler->allocator_data);
2142 if (SLJIT_UNLIKELY(result == NULL))
2143 {
2144 sljit_set_compiler_memory_error(compiler);
2145 return NULL;
2146 }
2147
2148 *(void**)result = common->read_only_data_head;
2149 common->read_only_data_head = (void *)result;
2150 return result + 1;
2151 }
2152
2153 static void free_read_only_data(void *current, void *allocator_data)
2154 {
2155 void *next;
2156
2157 SLJIT_UNUSED_ARG(allocator_data);
2158
2159 while (current != NULL)
2160 {
2161 next = *(void**)current;
2162 SLJIT_FREE(current, allocator_data);
2163 current = next;
2164 }
2165 }
2166
2167 static SLJIT_INLINE void reset_ovector(compiler_common *common, int length)
2168 {
2169 DEFINE_COMPILER;
2170 struct sljit_label *loop;
2171 int i;
2172
2173 /* At this point we can freely use all temporary registers. */
2174 SLJIT_ASSERT(length > 1);
2175 /* TMP1 returns with begin - 1. */
2176 OP2(SLJIT_SUB, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_S0), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));
2177 if (length < 8)
2178 {
2179 for (i = 1; i < length; i++)
2180 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), SLJIT_R0, 0);
2181 }
2182 else
2183 {
2184 GET_LOCAL_BASE(SLJIT_R1, 0, OVECTOR_START);
2185 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, length - 1);
2186 loop = LABEL();
2187 OP1(SLJIT_MOVU, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw), SLJIT_R0, 0);
2188 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, 1);
2189 JUMPTO(SLJIT_NOT_ZERO, loop);
2190 }
2191 }
2192
2193 static SLJIT_INLINE void do_reset_match(compiler_common *common, int length)
2194 {
2195 DEFINE_COMPILER;
2196 struct sljit_label *loop;
2197 int i;
2198
2199 SLJIT_ASSERT(length > 1);
2200 /* OVECTOR(1) contains the "string begin - 1" constant. */
2201 if (length > 2)
2202 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
2203 if (length < 8)
2204 {
2205 for (i = 2; i < length; i++)
2206 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), TMP1, 0);
2207 }
2208 else
2209 {
2210 GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + sizeof(sljit_sw));
2211 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
2212 loop = LABEL();
2213 OP1(SLJIT_MOVU, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
2214 OP2(SLJIT_SUB | SLJIT_SET_E, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
2215 JUMPTO(SLJIT_NOT_ZERO, loop);
2216 }
2217
2218 OP1(SLJIT_MOV, STACK_TOP, 0, ARGUMENTS, 0);
2219 if (common->mark_ptr != 0)
2220 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, SLJIT_IMM, 0);
2221 if (common->control_head_ptr != 0)
2222 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
2223 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(jit_arguments, stack));
2224 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
2225 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(struct sljit_stack, base));
2226 }
2227
2228 static sljit_sw SLJIT_CALL do_search_mark(sljit_sw *current, const pcre_uchar *skip_arg)
2229 {
2230 while (current != NULL)
2231 {
2232 switch (current[-2])
2233 {
2234 case type_then_trap:
2235 break;
2236
2237 case type_mark:
2238 if (STRCMP_UC_UC(skip_arg, (pcre_uchar *)current[-3]) == 0)
2239 return current[-4];
2240 break;
2241
2242 default:
2243 SLJIT_ASSERT_STOP();
2244 break;
2245 }
2246 current = (sljit_sw*)current[-1];
2247 }
2248 return -1;
2249 }
2250
2251 static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)
2252 {
2253 DEFINE_COMPILER;
2254 struct sljit_label *loop;
2255 struct sljit_jump *early_quit;
2256
2257 /* At this point we can freely use all registers. */
2258 OP1(SLJIT_MOV, SLJIT_S2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
2259 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(1), STR_PTR, 0);
2260
2261 OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0);
2262 if (common->mark_ptr != 0)
2263 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
2264 OP1(SLJIT_MOV_SI, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, offset_count));
2265 if (common->mark_ptr != 0)
2266 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_R2, 0);
2267 OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, offsets), SLJIT_IMM, sizeof(int));
2268 OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, begin));
2269 GET_LOCAL_BASE(SLJIT_S0, 0, OVECTOR_START);
2270 /* Unlikely, but possible */
2271 early_quit = CMP(SLJIT_EQUAL, SLJIT_R1, 0, SLJIT_IMM, 0);
2272 loop = LABEL();
2273 OP2(SLJIT_SUB, SLJIT_S1, 0, SLJIT_MEM1(SLJIT_S0), 0, SLJIT_R0, 0);
2274 OP2(SLJIT_ADD, SLJIT_S0, 0, SLJIT_S0, 0, SLJIT_IMM, sizeof(sljit_sw));
2275 /* Copy the integer value to the output buffer */
2276 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2277 OP2(SLJIT_ASHR, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_IMM, UCHAR_SHIFT);
2278 #endif
2279 OP1(SLJIT_MOVU_SI, SLJIT_MEM1(SLJIT_R2), sizeof(int), SLJIT_S1, 0);
2280 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
2281 JUMPTO(SLJIT_NOT_ZERO, loop);
2282 JUMPHERE(early_quit);
2283
2284 /* Calculate the return value, which is the maximum ovector value. */
2285 if (topbracket > 1)
2286 {
2287 GET_LOCAL_BASE(SLJIT_R0, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_sw));
2288 OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, topbracket + 1);
2289
2290 /* OVECTOR(0) is never equal to SLJIT_S2. */
2291 loop = LABEL();
2292 OP1(SLJIT_MOVU, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), -(2 * (sljit_sw)sizeof(sljit_sw)));
2293 OP2(SLJIT_SUB, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
2294 CMPTO(SLJIT_EQUAL, SLJIT_R2, 0, SLJIT_S2, 0, loop);
2295 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_R1, 0);
2296 }
2297 else
2298 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
2299 }
2300
2301 static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *quit)
2302 {
2303 DEFINE_COMPILER;
2304 struct sljit_jump *jump;
2305
2306 SLJIT_COMPILE_ASSERT(STR_END == SLJIT_S1, str_end_must_be_saved_reg2);
2307 SLJIT_ASSERT(common->start_used_ptr != 0 && common->start_ptr != 0
2308 && (common->mode == JIT_PARTIAL_SOFT_COMPILE ? common->hit_start != 0 : common->hit_start == 0));
2309
2310 OP1(SLJIT_MOV, SLJIT_R1, 0, ARGUMENTS, 0);
2311 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_PARTIAL);
2312 OP1(SLJIT_MOV_SI, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, real_offset_count));
2313 CMPTO(SLJIT_SIG_LESS, SLJIT_R2, 0, SLJIT_IMM, 2, quit);
2314
2315 /* Store match begin and end. */
2316 OP1(SLJIT_MOV, SLJIT_S0, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, begin));
2317 OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, offsets));
2318
2319 jump = CMP(SLJIT_SIG_LESS, SLJIT_R2, 0, SLJIT_IMM, 3);
2320 OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_ptr : (common->hit_start + (int)sizeof(sljit_sw)), SLJIT_S0, 0);
2321 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2322 OP2(SLJIT_ASHR, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, UCHAR_SHIFT);
2323 #endif
2324 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_R1), 2 * sizeof(int), SLJIT_R2, 0);
2325 JUMPHERE(jump);
2326
2327 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_used_ptr : common->hit_start);
2328 OP2(SLJIT_SUB, SLJIT_S1, 0, STR_END, 0, SLJIT_S0, 0);
2329 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2330 OP2(SLJIT_ASHR, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_IMM, UCHAR_SHIFT);
2331 #endif
2332 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_R1), sizeof(int), SLJIT_S1, 0);
2333
2334 OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_S0, 0);
2335 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2336 OP2(SLJIT_ASHR, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, UCHAR_SHIFT);
2337 #endif
2338 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_R1), 0, SLJIT_R2, 0);
2339
2340 JUMPTO(SLJIT_JUMP, quit);
2341 }
2342
2343 static SLJIT_INLINE void check_start_used_ptr(compiler_common *common)
2344 {
2345 /* May destroy TMP1. */
2346 DEFINE_COMPILER;
2347 struct sljit_jump *jump;
2348
2349 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2350 {
2351 /* The value of -1 must be kept for start_used_ptr! */
2352 OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, 1);
2353 /* Jumps if start_used_ptr < STR_PTR, or start_used_ptr == -1. Although overwriting
2354 is not necessary if start_used_ptr == STR_PTR, it does not hurt as well. */
2355 jump = CMP(SLJIT_LESS_EQUAL, TMP1, 0, STR_PTR, 0);
2356 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2357 JUMPHERE(jump);
2358 }
2359 else if (common->mode == JIT_PARTIAL_HARD_COMPILE)
2360 {
2361 jump = CMP(SLJIT_LESS_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2362 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2363 JUMPHERE(jump);
2364 }
2365 }
2366
2367 static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, pcre_uchar *cc)
2368 {
2369 /* Detects if the character has an othercase. */
2370 unsigned int c;
2371
2372 #ifdef SUPPORT_UTF
2373 if (common->utf)
2374 {
2375 GETCHAR(c, cc);
2376 if (c > 127)
2377 {
2378 #ifdef SUPPORT_UCP
2379 return c != UCD_OTHERCASE(c);
2380 #else
2381 return FALSE;
2382 #endif
2383 }
2384 #ifndef COMPILE_PCRE8
2385 return common->fcc[c] != c;
2386 #endif
2387 }
2388 else
2389 #endif
2390 c = *cc;
2391 return MAX_255(c) ? common->fcc[c] != c : FALSE;
2392 }
2393
2394 static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)
2395 {
2396 /* Returns with the othercase. */
2397 #ifdef SUPPORT_UTF
2398 if (common->utf && c > 127)
2399 {
2400 #ifdef SUPPORT_UCP
2401 return UCD_OTHERCASE(c);
2402 #else
2403 return c;
2404 #endif
2405 }
2406 #endif
2407 return TABLE_GET(c, common->fcc, c);
2408 }
2409
2410 static unsigned int char_get_othercase_bit(compiler_common *common, pcre_uchar *cc)
2411 {
2412 /* Detects if the character and its othercase has only 1 bit difference. */
2413 unsigned int c, oc, bit;
2414 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2415 int n;
2416 #endif
2417
2418 #ifdef SUPPORT_UTF
2419 if (common->utf)
2420 {
2421 GETCHAR(c, cc);
2422 if (c <= 127)
2423 oc = common->fcc[c];
2424 else
2425 {
2426 #ifdef SUPPORT_UCP
2427 oc = UCD_OTHERCASE(c);
2428 #else
2429 oc = c;
2430 #endif
2431 }
2432 }
2433 else
2434 {
2435 c = *cc;
2436 oc = TABLE_GET(c, common->fcc, c);
2437 }
2438 #else
2439 c = *cc;
2440 oc = TABLE_GET(c, common->fcc, c);
2441 #endif
2442
2443 SLJIT_ASSERT(c != oc);
2444
2445 bit = c ^ oc;
2446 /* Optimized for English alphabet. */
2447 if (c <= 127 && bit == 0x20)
2448 return (0 << 8) | 0x20;
2449
2450 /* Since c != oc, they must have at least 1 bit difference. */
2451 if (!is_powerof2(bit))
2452 return 0;
2453
2454 #if defined COMPILE_PCRE8
2455
2456 #ifdef SUPPORT_UTF
2457 if (common->utf && c > 127)
2458 {
2459 n = GET_EXTRALEN(*cc);
2460 while ((bit & 0x3f) == 0)
2461 {
2462 n--;
2463 bit >>= 6;
2464 }
2465 return (n << 8) | bit;
2466 }
2467 #endif /* SUPPORT_UTF */
2468 return (0 << 8) | bit;
2469
2470 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2471
2472 #ifdef SUPPORT_UTF
2473 if (common->utf && c > 65535)
2474 {
2475 if (bit >= (1 << 10))
2476 bit >>= 10;
2477 else
2478 return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8));
2479 }
2480 #endif /* SUPPORT_UTF */
2481 return (bit < 256) ? ((0 << 8) | bit) : ((1 << 8) | (bit >> 8));
2482
2483 #endif /* COMPILE_PCRE[8|16|32] */
2484 }
2485
2486 static void check_partial(compiler_common *common, BOOL force)
2487 {
2488 /* Checks whether a partial matching is occurred. Does not modify registers. */
2489 DEFINE_COMPILER;
2490 struct sljit_jump *jump = NULL;
2491
2492 SLJIT_ASSERT(!force || common->mode != JIT_COMPILE);
2493
2494 if (common->mode == JIT_COMPILE)
2495 return;
2496
2497 if (!force)
2498 jump = CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2499 else if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2500 jump = CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1);
2501
2502 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2503 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
2504 else
2505 {
2506 if (common->partialmatchlabel != NULL)
2507 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2508 else
2509 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2510 }
2511
2512 if (jump != NULL)
2513 JUMPHERE(jump);
2514 }
2515
2516 static void check_str_end(compiler_common *common, jump_list **end_reached)
2517 {
2518 /* Does not affect registers. Usually used in a tight spot. */
2519 DEFINE_COMPILER;
2520 struct sljit_jump *jump;
2521
2522 if (common->mode == JIT_COMPILE)
2523 {
2524 add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2525 return;
2526 }
2527
2528 jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
2529 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2530 {
2531 add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
2532 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
2533 add_jump(compiler, end_reached, JUMP(SLJIT_JUMP));
2534 }
2535 else
2536 {
2537 add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
2538 if (common->partialmatchlabel != NULL)
2539 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2540 else
2541 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2542 }
2543 JUMPHERE(jump);
2544 }
2545
2546 static void detect_partial_match(compiler_common *common, jump_list **backtracks)
2547 {
2548 DEFINE_COMPILER;
2549 struct sljit_jump *jump;
2550
2551 if (common->mode == JIT_COMPILE)
2552 {
2553 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2554 return;
2555 }
2556
2557 /* Partial matching mode. */
2558 jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
2559 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
2560 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2561 {
2562 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
2563 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
2564 }
2565 else
2566 {
2567 if (common->partialmatchlabel != NULL)
2568 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2569 else
2570 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2571 }
2572 JUMPHERE(jump);
2573 }
2574
2575 static void peek_char(compiler_common *common, pcre_uint32 max)
2576 {
2577 /* Reads the character into TMP1, keeps STR_PTR.
2578 Does not check STR_END. TMP2 Destroyed. */
2579 DEFINE_COMPILER;
2580 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2581 struct sljit_jump *jump;
2582 #endif
2583
2584 SLJIT_UNUSED_ARG(max);
2585
2586 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2587 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2588 if (common->utf)
2589 {
2590 if (max < 128) return;
2591
2592 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2593 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2594 add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2595 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2596 JUMPHERE(jump);
2597 }
2598 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2599
2600 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2601 if (common->utf)
2602 {
2603 if (max < 0xd800) return;
2604
2605 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2606 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2607 /* TMP2 contains the high surrogate. */
2608 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2609 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x40);
2610 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
2611 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
2612 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2613 JUMPHERE(jump);
2614 }
2615 #endif
2616 }
2617
2618 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2619
2620 static BOOL is_char7_bitset(const pcre_uint8 *bitset, BOOL nclass)
2621 {
2622 /* Tells whether the character codes below 128 are enough
2623 to determine a match. */
2624 const pcre_uint8 value = nclass ? 0xff : 0;
2625 const pcre_uint8 *end = bitset + 32;
2626
2627 bitset += 16;
2628 do
2629 {
2630 if (*bitset++ != value)
2631 return FALSE;
2632 }
2633 while (bitset < end);
2634 return TRUE;
2635 }
2636
2637 static void read_char7_type(compiler_common *common, BOOL full_read)
2638 {
2639 /* Reads the precise character type of a character into TMP1, if the character
2640 is less than 128. Otherwise it returns with zero. Does not check STR_END. The
2641 full_read argument tells whether characters above max are accepted or not. */
2642 DEFINE_COMPILER;
2643 struct sljit_jump *jump;
2644
2645 SLJIT_ASSERT(common->utf);
2646
2647 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2648 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2649
2650 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2651
2652 if (full_read)
2653 {
2654 jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
2655 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2656 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2657 JUMPHERE(jump);
2658 }
2659 }
2660
2661 #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
2662
2663 static void read_char_range(compiler_common *common, pcre_uint32 min, pcre_uint32 max, BOOL update_str_ptr)
2664 {
2665 /* Reads the precise value of a character into TMP1, if the character is
2666 between min and max (c >= min && c <= max). Otherwise it returns with a value
2667 outside the range. Does not check STR_END. */
2668 DEFINE_COMPILER;
2669 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2670 struct sljit_jump *jump;
2671 #endif
2672 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2673 struct sljit_jump *jump2;
2674 #endif
2675
2676 SLJIT_UNUSED_ARG(update_str_ptr);
2677 SLJIT_UNUSED_ARG(min);
2678 SLJIT_UNUSED_ARG(max);
2679 SLJIT_ASSERT(min <= max);
2680
2681 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2682 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2683
2684 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2685 if (common->utf)
2686 {
2687 if (max < 128 && !update_str_ptr) return;
2688
2689 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2690 if (min >= 0x10000)
2691 {
2692 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xf0);
2693 if (update_str_ptr)
2694 OP1(SLJIT_MOV_UB, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2695 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2696 jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x7);
2697 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2698 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2699 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2700 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2701 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2702 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2703 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2704 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2705 if (!update_str_ptr)
2706 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
2707 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2708 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2709 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2710 JUMPHERE(jump2);
2711 if (update_str_ptr)
2712 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
2713 }
2714 else if (min >= 0x800 && max <= 0xffff)
2715 {
2716 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xe0);
2717 if (update_str_ptr)
2718 OP1(SLJIT_MOV_UB, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2719 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2720 jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xf);
2721 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2722 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2723 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2724 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2725 if (!update_str_ptr)
2726 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
2727 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2728 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2729 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2730 JUMPHERE(jump2);
2731 if (update_str_ptr)
2732 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
2733 }
2734 else if (max >= 0x800)
2735 add_jump(compiler, (max < 0x10000) ? &common->utfreadchar16 : &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2736 else if (max < 128)
2737 {
2738 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2739 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2740 }
2741 else
2742 {
2743 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2744 if (!update_str_ptr)
2745 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2746 else
2747 OP1(SLJIT_MOV_UB, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2748 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2749 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2750 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2751 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2752 if (update_str_ptr)
2753 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
2754 }
2755 JUMPHERE(jump);
2756 }
2757 #endif
2758
2759 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2760 if (common->utf)
2761 {
2762 if (max >= 0x10000)
2763 {
2764 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2765 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2766 /* TMP2 contains the high surrogate. */
2767 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2768 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x40);
2769 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
2770 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2771 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
2772 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2773 JUMPHERE(jump);
2774 return;
2775 }
2776
2777 if (max < 0xd800 && !update_str_ptr) return;
2778
2779 /* Skip low surrogate if necessary. */
2780 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2781 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2782 if (update_str_ptr)
2783 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2784 if (max >= 0xd800)
2785 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x10000);
2786 JUMPHERE(jump);
2787 }
2788 #endif
2789 }
2790
2791 static SLJIT_INLINE void read_char(compiler_common *common)
2792 {
2793 read_char_range(common, 0, READ_CHAR_MAX, TRUE);
2794 }
2795
2796 static void read_char8_type(compiler_common *common, BOOL update_str_ptr)
2797 {
2798 /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */
2799 DEFINE_COMPILER;
2800 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
2801 struct sljit_jump *jump;
2802 #endif
2803 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2804 struct sljit_jump *jump2;
2805 #endif
2806
2807 SLJIT_UNUSED_ARG(update_str_ptr);
2808
2809 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2810 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2811
2812 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2813 if (common->utf)
2814 {
2815 /* This can be an extra read in some situations, but hopefully
2816 it is needed in most cases. */
2817 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2818 jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
2819 if (!update_str_ptr)
2820 {
2821 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2822 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2823 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2824 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2825 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2826 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
2827 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2828 jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
2829 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2830 JUMPHERE(jump2);
2831 }
2832 else
2833 add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));
2834 JUMPHERE(jump);
2835 return;
2836 }
2837 #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
2838
2839 #if !defined COMPILE_PCRE8
2840 /* The ctypes array contains only 256 values. */
2841 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2842 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
2843 #endif
2844 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2845 #if !defined COMPILE_PCRE8
2846 JUMPHERE(jump);
2847 #endif
2848
2849 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2850 if (common->utf && update_str_ptr)
2851 {
2852 /* Skip low surrogate if necessary. */
2853 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);
2854 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2855 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2856 JUMPHERE(jump);
2857 }
2858 #endif /* SUPPORT_UTF && COMPILE_PCRE16 */
2859 }
2860
2861 static void skip_char_back(compiler_common *common)
2862 {
2863 /* Goes one character back. Affects STR_PTR and TMP1. Does not check begin. */
2864 DEFINE_COMPILER;
2865 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2866 #if defined COMPILE_PCRE8
2867 struct sljit_label *label;
2868
2869 if (common->utf)
2870 {
2871 label = LABEL();
2872 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2873 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2874 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
2875 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);
2876 return;
2877 }
2878 #elif defined COMPILE_PCRE16
2879 if (common->utf)
2880 {
2881 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2882 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2883 /* Skip low surrogate if necessary. */
2884 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2885 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
2886 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
2887 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2888 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2889 return;
2890 }
2891 #endif /* COMPILE_PCRE[8|16] */
2892 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2893 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2894 }
2895
2896 static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpifmatch)
2897 {
2898 /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */
2899 DEFINE_COMPILER;
2900 struct sljit_jump *jump;
2901
2902 if (nltype == NLTYPE_ANY)
2903 {
2904 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
2905 add_jump(compiler, backtracks, JUMP(jumpifmatch ? SLJIT_NOT_ZERO : SLJIT_ZERO));
2906 }
2907 else if (nltype == NLTYPE_ANYCRLF)
2908 {
2909 if (jumpifmatch)
2910 {
2911 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR));
2912 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
2913 }
2914 else
2915 {
2916 jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
2917 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
2918 JUMPHERE(jump);
2919 }
2920 }
2921 else
2922 {
2923 SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);
2924 add_jump(compiler, backtracks, CMP(jumpifmatch ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
2925 }
2926 }
2927
2928 #ifdef SUPPORT_UTF
2929
2930 #if defined COMPILE_PCRE8
2931 static void do_utfreadchar(compiler_common *common)
2932 {
2933 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
2934 of the character (>= 0xc0). Return char value in TMP1, length in TMP2. */
2935 DEFINE_COMPILER;
2936 struct sljit_jump *jump;
2937
2938 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2939 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2940 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2941 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2942 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2943 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2944
2945 /* Searching for the first zero. */
2946 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
2947 jump = JUMP(SLJIT_NOT_ZERO);
2948 /* Two byte sequence. */
2949 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2950 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(2));
2951 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2952
2953 JUMPHERE(jump);
2954 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2955 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x800);
2956 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2957 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2958 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2959
2960 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10000);
2961 jump = JUMP(SLJIT_NOT_ZERO);
2962 /* Three byte sequence. */
2963 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
2964 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));
2965 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2966
2967 /* Four byte sequence. */
2968 JUMPHERE(jump);
2969 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2970 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
2971 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2972 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
2973 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2974 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2975 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(4));
2976 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2977 }
2978
2979 static void do_utfreadchar16(compiler_common *common)
2980 {
2981 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
2982 of the character (>= 0xc0). Return value in TMP1. */
2983 DEFINE_COMPILER;
2984 struct sljit_jump *jump;
2985
2986 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2987 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2988 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2989 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2990 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2991 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2992
2993 /* Searching for the first zero. */
2994 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
2995 jump = JUMP(SLJIT_NOT_ZERO);
2996 /* Two byte sequence. */
2997 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2998 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2999
3000 JUMPHERE(jump);
3001 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x400);
3002 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_NOT_ZERO);
3003 /* This code runs only in 8 bit mode. No need to shift the value. */
3004 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3005 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3006 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x800);
3007 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3008 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3009 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3010 /* Three byte sequence. */
3011 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
3012 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3013 }
3014
3015 static void do_utfreadtype8(compiler_common *common)
3016 {
3017 /* Fast decoding a UTF-8 character type. TMP2 contains the first byte
3018 of the character (>= 0xc0). Return value in TMP1. */
3019 DEFINE_COMPILER;
3020 struct sljit_jump *jump;
3021 struct sljit_jump *compare;
3022
3023 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3024
3025 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20);
3026 jump = JUMP(SLJIT_NOT_ZERO);
3027 /* Two byte sequence. */
3028 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3029 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3030 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
3031 /* The upper 5 bits are known at this point. */
3032 compare = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x3);
3033 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
3034 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3035 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
3036 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
3037 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3038
3039 JUMPHERE(compare);
3040 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
3041 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3042
3043 /* We only have types for characters less than 256. */
3044 JUMPHERE(jump);
3045 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3046 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
3047 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3048 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3049 }
3050
3051 #endif /* COMPILE_PCRE8 */
3052
3053 #endif /* SUPPORT_UTF */
3054
3055 #ifdef SUPPORT_UCP
3056
3057 /* UCD_BLOCK_SIZE must be 128 (see the assert below). */
3058 #define UCD_BLOCK_MASK 127
3059 #define UCD_BLOCK_SHIFT 7
3060
3061 static void do_getucd(compiler_common *common)
3062 {
3063 /* Search the UCD record for the character comes in TMP1.
3064 Returns chartype in TMP1 and UCD offset in TMP2. */
3065 DEFINE_COMPILER;
3066
3067 SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 8);
3068
3069 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3070 OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
3071 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
3072 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
3073 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
3074 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
3075 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
3076 OP1(SLJIT_MOV_UH, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
3077 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
3078 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
3079 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3080 }
3081 #endif
3082
3083 static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common, BOOL hascrorlf, BOOL firstline)
3084 {
3085 DEFINE_COMPILER;
3086 struct sljit_label *mainloop;
3087 struct sljit_label *newlinelabel = NULL;
3088 struct sljit_jump *start;
3089 struct sljit_jump *end = NULL;
3090 struct sljit_jump *nl = NULL;
3091 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3092 struct sljit_jump *singlechar;
3093 #endif
3094 jump_list *newline = NULL;
3095 BOOL newlinecheck = FALSE;
3096 BOOL readuchar = FALSE;
3097
3098 if (!(hascrorlf || firstline) && (common->nltype == NLTYPE_ANY ||
3099 common->nltype == NLTYPE_ANYCRLF || common->newline > 255))
3100 newlinecheck = TRUE;
3101
3102 if (firstline)
3103 {
3104 /* Search for the end of the first line. */
3105 SLJIT_ASSERT(common->first_line_end != 0);
3106 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
3107
3108 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3109 {
3110 mainloop = LABEL();
3111 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3112 end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3113 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
3114 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3115 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);
3116 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);
3117 JUMPHERE(end);
3118 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_SP), common->first_line_end, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3119 }
3120 else
3121 {
3122 end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3123 mainloop = LABEL();
3124 /* Continual stores does not cause data dependency. */
3125 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->first_line_end, STR_PTR, 0);
3126 read_char_range(common, common->nlmin, common->nlmax, TRUE);
3127 check_newlinechar(common, common->nltype, &newline, TRUE);
3128 CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, mainloop);
3129 JUMPHERE(end);
3130 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->first_line_end, STR_PTR, 0);
3131 set_jumps(newline, LABEL());
3132 }
3133
3134 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
3135 }
3136
3137 start = JUMP(SLJIT_JUMP);
3138
3139 if (newlinecheck)
3140 {
3141 newlinelabel = LABEL();
3142 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3143 end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3144 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3145 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
3146 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
3147 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3148 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
3149 #endif
3150 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3151 nl = JUMP(SLJIT_JUMP);
3152 }
3153
3154 mainloop = LABEL();
3155
3156 /* Increasing the STR_PTR here requires one less jump in the most common case. */
3157 #ifdef SUPPORT_UTF
3158 if (common->utf) readuchar = TRUE;
3159 #endif
3160 if (newlinecheck) readuchar = TRUE;
3161
3162 if (readuchar)
3163 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3164
3165 if (newlinecheck)
3166 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);
3167
3168 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3169 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3170 #if defined COMPILE_PCRE8
3171 if (common->utf)
3172 {
3173 singlechar = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
3174 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3175 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3176 JUMPHERE(singlechar);
3177 }
3178 #elif defined COMPILE_PCRE16
3179 if (common->utf)
3180 {
3181 singlechar = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
3182 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
3183 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3184 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
3185 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3186 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3187 JUMPHERE(singlechar);
3188 }
3189 #endif /* COMPILE_PCRE[8|16] */
3190 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
3191 JUMPHERE(start);
3192
3193 if (newlinecheck)
3194 {
3195 JUMPHERE(end);
3196 JUMPHERE(nl);
3197 }
3198
3199 return mainloop;
3200 }
3201
3202 #define MAX_N_CHARS 16
3203 #define MAX_N_BYTES 8
3204
3205 static SLJIT_INLINE void add_prefix_byte(pcre_uint8 byte, pcre_uint8 *bytes)
3206 {
3207 pcre_uint8 len = bytes[0];
3208 int i;
3209
3210 if (len == 255)
3211 return;
3212
3213 if (len == 0)
3214 {
3215 bytes[0] = 1;
3216 bytes[1] = byte;
3217 return;
3218 }
3219
3220 for (i = len; i > 0; i--)
3221 if (bytes[i] == byte)
3222 return;
3223
3224 if (len >= MAX_N_BYTES - 1)
3225 {
3226 bytes[0] = 255;
3227 return;
3228 }
3229
3230 len++;
3231 bytes[len] = byte;
3232 bytes[0] = len;
3233 }
3234
3235 static int scan_prefix(compiler_common *common, pcre_uchar *cc, pcre_uint32 *chars, pcre_uint8 *bytes, int max_chars, pcre_uint32 *rec_count)
3236 {
3237 /* Recursive function, which scans prefix literals. */
3238 BOOL last, any, caseless;
3239 int len, repeat, len_save, consumed = 0;
3240 pcre_uint32 chr, mask;
3241 pcre_uchar *alternative, *cc_save, *oc;
3242 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3243 pcre_uchar othercase[8];
3244 #elif defined SUPPORT_UTF && defined COMPILE_PCRE16
3245 pcre_uchar othercase[2];
3246 #else
3247 pcre_uchar othercase[1];
3248 #endif
3249
3250 repeat = 1;
3251 while (TRUE)
3252 {
3253 if (*rec_count == 0)
3254 return 0;
3255 (*rec_count)--;
3256
3257 last = TRUE;
3258 any = FALSE;
3259 caseless = FALSE;
3260
3261 switch (*cc)
3262 {
3263 case OP_CHARI:
3264 caseless = TRUE;
3265 case OP_CHAR:
3266 last = FALSE;
3267 cc++;
3268 break;
3269
3270 case OP_SOD:
3271 case OP_SOM:
3272 case OP_SET_SOM:
3273 case OP_NOT_WORD_BOUNDARY:
3274 case OP_WORD_BOUNDARY:
3275 case OP_EODN:
3276 case OP_EOD:
3277 case OP_CIRC:
3278 case OP_CIRCM:
3279 case OP_DOLL:
3280 case OP_DOLLM:
3281 /* Zero width assertions. */
3282 cc++;
3283 continue;
3284
3285 case OP_ASSERT:
3286 case OP_ASSERT_NOT:
3287 case OP_ASSERTBACK:
3288 case OP_ASSERTBACK_NOT:
3289 cc = bracketend(cc);
3290 continue;
3291
3292 case OP_PLUSI:
3293 case OP_MINPLUSI:
3294 case OP_POSPLUSI:
3295 caseless = TRUE;
3296 case OP_PLUS:
3297 case OP_MINPLUS:
3298 case OP_POSPLUS:
3299 cc++;
3300 break;
3301
3302 case OP_EXACTI:
3303 caseless = TRUE;
3304 case OP_EXACT:
3305 repeat = GET2(cc, 1);
3306 last = FALSE;
3307 cc += 1 + IMM2_SIZE;
3308 break;
3309
3310 case OP_QUERYI:
3311 case OP_MINQUERYI:
3312 case OP_POSQUERYI:
3313 caseless = TRUE;
3314 case OP_QUERY:
3315 case OP_MINQUERY:
3316 case OP_POSQUERY:
3317 len = 1;
3318 cc++;
3319 #ifdef SUPPORT_UTF
3320 if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
3321 #endif
3322 max_chars = scan_prefix(common, cc + len, chars, bytes, max_chars, rec_count);
3323 if (max_chars == 0)
3324 return consumed;
3325 last = FALSE;
3326 break;
3327
3328 case OP_KET:
3329 cc += 1 + LINK_SIZE;
3330 continue;
3331
3332 case OP_ALT:
3333 cc += GET(cc, 1);
3334 continue;
3335
3336 case OP_ONCE:
3337 case OP_ONCE_NC:
3338 case OP_BRA:
3339 case OP_BRAPOS:
3340 case OP_CBRA:
3341 case OP_CBRAPOS:
3342 alternative = cc + GET(cc, 1);
3343 while (*alternative == OP_ALT)
3344 {
3345 max_chars = scan_prefix(common, alternative + 1 + LINK_SIZE, chars, bytes, max_chars, rec_count);
3346 if (max_chars == 0)
3347 return consumed;
3348 alternative += GET(alternative, 1);
3349 }
3350
3351 if (*cc == OP_CBRA || *cc == OP_CBRAPOS)
3352 cc += IMM2_SIZE;
3353 cc += 1 + LINK_SIZE;
3354 continue;
3355
3356 case OP_CLASS:
3357 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3358 if (common->utf && !is_char7_bitset((const pcre_uint8 *)(cc + 1), FALSE)) return consumed;
3359 #endif
3360 any = TRUE;
3361 cc += 1 + 32 / sizeof(pcre_uchar);
3362 break;
3363
3364 case OP_NCLASS:
3365 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3366 if (common->utf) return consumed;
3367 #endif
3368 any = TRUE;
3369 cc += 1 + 32 / sizeof(pcre_uchar);
3370 break;
3371
3372 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3373 case OP_XCLASS:
3374 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3375 if (common->utf) return consumed;
3376 #endif
3377 any = TRUE;
3378 cc += GET(cc, 1);
3379 break;
3380 #endif
3381
3382 case OP_DIGIT:
3383 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3384 if (common->utf && !is_char7_bitset((const pcre_uint8 *)common->ctypes - cbit_length + cbit_digit, FALSE))
3385 return consumed;
3386 #endif
3387 any = TRUE;
3388 cc++;
3389 break;
3390
3391 case OP_WHITESPACE:
3392 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3393 if (common->utf && !is_char7_bitset((const pcre_uint8 *)common->ctypes - cbit_length + cbit_space, FALSE))
3394 return consumed;
3395 #endif
3396 any = TRUE;
3397 cc++;
3398 break;
3399
3400 case OP_WORDCHAR:
3401 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3402 if (common->utf && !is_char7_bitset((const pcre_uint8 *)common->ctypes - cbit_length + cbit_word, FALSE))
3403 return consumed;
3404 #endif
3405 any = TRUE;
3406 cc++;
3407 break;
3408
3409 case OP_NOT:
3410 case OP_NOTI:
3411 cc++;
3412 /* Fall through. */
3413 case OP_NOT_DIGIT:
3414 case OP_NOT_WHITESPACE:
3415 case OP_NOT_WORDCHAR:
3416 case OP_ANY:
3417 case OP_ALLANY:
3418 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3419 if (common->utf) return consumed;
3420 #endif
3421 any = TRUE;
3422 cc++;
3423 break;
3424
3425 #ifdef SUPPORT_UCP
3426 case OP_NOTPROP:
3427 case OP_PROP:
3428 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3429 if (common->utf) return consumed;
3430 #endif
3431 any = TRUE;
3432 cc += 1 + 2;
3433 break;
3434 #endif
3435
3436 case OP_TYPEEXACT:
3437 repeat = GET2(cc, 1);
3438 cc += 1 + IMM2_SIZE;
3439 continue;
3440
3441 case OP_NOTEXACT:
3442 case OP_NOTEXACTI:
3443 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3444 if (common->utf) return consumed;
3445 #endif
3446 any = TRUE;
3447 repeat = GET2(cc, 1);
3448 cc += 1 + IMM2_SIZE + 1;
3449 break;
3450
3451 default:
3452 return consumed;
3453 }
3454
3455 if (any)
3456 {
3457 #if defined COMPILE_PCRE8
3458 mask = 0xff;
3459 #elif defined COMPILE_PCRE16
3460 mask = 0xffff;
3461 #elif defined COMPILE_PCRE32
3462 mask = 0xffffffff;
3463 #else
3464 SLJIT_ASSERT_STOP();
3465 #endif
3466
3467 do
3468 {
3469 chars[0] = mask;
3470 chars[1] = mask;
3471 bytes[0] = 255;
3472
3473 consumed++;
3474 if (--max_chars == 0)
3475 return consumed;
3476 chars += 2;
3477 bytes += MAX_N_BYTES;
3478 }
3479 while (--repeat > 0);
3480
3481 repeat = 1;
3482 continue;
3483 }
3484
3485 len = 1;
3486 #ifdef SUPPORT_UTF
3487 if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
3488 #endif
3489
3490 if (caseless && char_has_othercase(common, cc))
3491 {
3492 #ifdef SUPPORT_UTF
3493 if (common->utf)
3494 {
3495 GETCHAR(chr, cc);
3496 if ((int)PRIV(ord2utf)(char_othercase(common, chr), othercase) != len)
3497 return consumed;
3498 }
3499 else
3500 #endif
3501 {
3502 chr = *cc;
3503 othercase[0] = TABLE_GET(chr, common->fcc, chr);
3504 }
3505 }
3506 else
3507 caseless = FALSE;
3508
3509 len_save = len;
3510 cc_save = cc;
3511 while (TRUE)
3512 {
3513 oc = othercase;
3514 do
3515 {
3516 chr = *cc;
3517 #ifdef COMPILE_PCRE32
3518 if (SLJIT_UNLIKELY(chr == NOTACHAR))
3519 return consumed;
3520 #endif
3521 add_prefix_byte((pcre_uint8)chr, bytes);
3522
3523 mask = 0;
3524 if (caseless)
3525 {
3526 add_prefix_byte((pcre_uint8)*oc, bytes);
3527 mask = *cc ^ *oc;
3528 chr |= mask;
3529 }
3530
3531 #ifdef COMPILE_PCRE32
3532 if (chars[0] == NOTACHAR && chars[1] == 0)
3533 #else
3534 if (chars[0] == NOTACHAR)
3535 #endif
3536 {
3537 chars[0] = chr;
3538 chars[1] = mask;
3539 }
3540 else
3541 {
3542 mask |= chars[0] ^ chr;
3543 chr |= mask;
3544 chars[0] = chr;
3545 chars[1] |= mask;
3546 }
3547
3548 len--;
3549 consumed++;
3550 if (--max_chars == 0)
3551 return consumed;
3552 chars += 2;
3553 bytes += MAX_N_BYTES;
3554 cc++;
3555 oc++;
3556 }
3557 while (len > 0);
3558
3559 if (--repeat == 0)
3560 break;
3561
3562 len = len_save;
3563 cc = cc_save;
3564 }
3565
3566 repeat = 1;
3567 if (last)
3568 return consumed;
3569 }
3570 }
3571
3572 static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common, BOOL firstline)
3573 {
3574 DEFINE_COMPILER;
3575 struct sljit_label *start;
3576 struct sljit_jump *quit;
3577 pcre_uint32 chars[MAX_N_CHARS * 2];
3578 pcre_uint8 bytes[MAX_N_CHARS * MAX_N_BYTES];
3579 pcre_uint8 ones[MAX_N_CHARS];
3580 int offsets[3];
3581 pcre_uint32 mask;
3582 pcre_uint8 *byte_set, *byte_set_end;
3583 int i, max, from;
3584 int range_right = -1, range_len = 3 - 1;
3585 sljit_ub *update_table = NULL;
3586 BOOL in_range;
3587 pcre_uint32 rec_count;
3588
3589 for (i = 0; i < MAX_N_CHARS; i++)
3590 {
3591 chars[i << 1] = NOTACHAR;
3592 chars[(i << 1) + 1] = 0;
3593 bytes[i * MAX_N_BYTES] = 0;
3594 }
3595
3596 rec_count = 10000;
3597 max = scan_prefix(common, common->start, chars, bytes, MAX_N_CHARS, &rec_count);
3598
3599 if (max <= 1)
3600 return FALSE;
3601
3602 for (i = 0; i < max; i++)
3603 {
3604 mask = chars[(i << 1) + 1];
3605 ones[i] = ones_in_half_byte[mask & 0xf];
3606 mask >>= 4;
3607 while (mask != 0)
3608 {
3609 ones[i] += ones_in_half_byte[mask & 0xf];
3610 mask >>= 4;
3611 }
3612 }
3613
3614 in_range = FALSE;
3615 from = 0; /* Prevent compiler "uninitialized" warning */
3616 for (i = 0; i <= max; i++)
3617 {
3618 if (in_range && (i - from) > range_len && (bytes[(i - 1) * MAX_N_BYTES] <= 4))
3619 {
3620 range_len = i - from;
3621 range_right = i - 1;
3622 }
3623
3624 if (i < max && bytes[i * MAX_N_BYTES] < 255)
3625 {
3626 if (!in_range)
3627 {
3628 in_range = TRUE;
3629 from = i;
3630 }
3631 }
3632 else if (in_range)
3633 in_range = FALSE;
3634 }
3635
3636 if (range_right >= 0)
3637 {
3638 update_table = (sljit_ub *)allocate_read_only_data(common, 256);
3639 if (update_table == NULL)
3640 return TRUE;
3641 memset(update_table, IN_UCHARS(range_len), 256);
3642
3643 for (i = 0; i < range_len; i++)
3644 {
3645 byte_set = bytes + ((range_right - i) * MAX_N_BYTES);
3646 SLJIT_ASSERT(byte_set[0] > 0 && byte_set[0] < 255);
3647 byte_set_end = byte_set + byte_set[0];
3648 byte_set++;
3649 while (byte_set <= byte_set_end)
3650 {
3651 if (update_table[*byte_set] > IN_UCHARS(i))
3652 update_table[*byte_set] = IN_UCHARS(i);
3653 byte_set++;
3654 }
3655 }
3656 }
3657
3658 offsets[0] = -1;
3659 /* Scan forward. */
3660 for (i = 0; i < max; i++)
3661 if (ones[i] <= 2) {
3662 offsets[0] = i;
3663 break;
3664 }
3665
3666 if (offsets[0] < 0 && range_right < 0)
3667 return FALSE;
3668
3669 if (offsets[0] >= 0)
3670 {
3671 /* Scan backward. */
3672 offsets[1] = -1;
3673 for (i = max - 1; i > offsets[0]; i--)
3674 if (ones[i] <= 2 && i != range_right)
3675 {
3676 offsets[1] = i;
3677 break;
3678 }
3679
3680 /* This case is handled better by fast_forward_first_char. */
3681 if (offsets[1] == -1 && offsets[0] == 0 && range_right < 0)
3682 return FALSE;
3683
3684 offsets[2] = -1;
3685 /* We only search for a middle character if there is no range check. */
3686 if (offsets[1] >= 0 && range_right == -1)
3687 {
3688 /* Scan from middle. */
3689 for (i = (offsets[0] + offsets[1]) / 2 + 1; i < offsets[1]; i++)
3690 if (ones[i] <= 2)
3691 {
3692 offsets[2] = i;
3693 break;
3694 }
3695
3696 if (offsets[2] == -1)
3697 {
3698 for (i = (offsets[0] + offsets[1]) / 2; i > offsets[0]; i--)
3699 if (ones[i] <= 2)
3700 {
3701 offsets[2] = i;
3702 break;
3703 }
3704 }
3705 }
3706
3707 SLJIT_ASSERT(offsets[1] == -1 || (offsets[0] < offsets[1]));
3708 SLJIT_ASSERT(offsets[2] == -1 || (offsets[0] < offsets[2] && offsets[1] > offsets[2]));
3709
3710 chars[0] = chars[offsets[0] << 1];
3711 chars[1] = chars[(offsets[0] << 1) + 1];
3712 if (offsets[2] >= 0)
3713 {
3714 chars[2] = chars[offsets[2] << 1];
3715 chars[3] = chars[(offsets[2] << 1) + 1];
3716 }
3717 if (offsets[1] >= 0)
3718 {
3719 chars[4] = chars[offsets[1] << 1];
3720 chars[5] = chars[(offsets[1] << 1) + 1];
3721 }
3722 }
3723
3724 max -= 1;
3725 if (firstline)
3726 {
3727 SLJIT_ASSERT(common->first_line_end != 0);
3728 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end);
3729 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
3730 OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
3731 quit = CMP(SLJIT_LESS_EQUAL, STR_END, 0, TMP1, 0);
3732 OP1(SLJIT_MOV, STR_END, 0, TMP1, 0);
3733 JUMPHERE(quit);
3734 }
3735 else
3736 OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
3737
3738 #if !(defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
3739 if (range_right >= 0)
3740 OP1(SLJIT_MOV, RETURN_ADDR, 0, SLJIT_IMM, (sljit_sw)update_table);
3741 #endif
3742
3743 start = LABEL();
3744 quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3745
3746 SLJIT_ASSERT(range_right >= 0 || offsets[0] >= 0);
3747
3748 if (range_right >= 0)
3749 {
3750 #if defined COMPILE_PCRE8 || (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN)
3751 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right));
3752 #else
3753 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right + 1) - 1);
3754 #endif
3755
3756 #if !(defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
3757 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(RETURN_ADDR, TMP1), 0);
3758 #else
3759 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)update_table);
3760 #endif
3761 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3762 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, start);
3763 }
3764
3765 if (offsets[0] >= 0)
3766 {
3767 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[0]));
3768 if (offsets[1] >= 0)
3769 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[1]));
3770 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3771
3772 if (chars[1] != 0)
3773 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[1]);
3774 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[0], start);
3775 if (offsets[2] >= 0)
3776 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[2] - 1));
3777
3778 if (offsets[1] >= 0)
3779 {
3780 if (chars[5] != 0)
3781 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, chars[5]);
3782 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, chars[4], start);
3783 }
3784
3785 if (offsets[2] >= 0)
3786 {
3787 if (chars[3] != 0)
3788 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[3]);
3789 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[2], start);
3790 }
3791 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3792 }
3793
3794 JUMPHERE(quit);
3795
3796 if (firstline)
3797 {
3798 if (range_right >= 0)
3799 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end);
3800 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3801 if (range_right >= 0)
3802 {
3803 quit = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
3804 OP1(SLJIT_MOV, STR_PTR, 0, TMP1, 0);
3805 JUMPHERE(quit);
3806 }
3807 }
3808 else
3809 OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
3810 return TRUE;
3811 }
3812
3813 #undef MAX_N_CHARS
3814 #undef MAX_N_BYTES
3815
3816 static SLJIT_INLINE void fast_forward_first_char(compiler_common *common, pcre_uchar first_char, BOOL caseless, BOOL firstline)
3817 {
3818 DEFINE_COMPILER;
3819 struct sljit_label *start;
3820 struct sljit_jump *quit;
3821 struct sljit_jump *found;
3822 pcre_uchar oc, bit;
3823
3824 if (firstline)
3825 {
3826 SLJIT_ASSERT(common->first_line_end != 0);
3827 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
3828 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end);
3829 }
3830
3831 start = LABEL();
3832 quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3833 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3834
3835 oc = first_char;
3836 if (caseless)
3837 {
3838 oc = TABLE_GET(first_char, common->fcc, first_char);
3839 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
3840 if (first_char > 127 && common->utf)
3841 oc = UCD_OTHERCASE(first_char);
3842 #endif
3843 }
3844 if (first_char == oc)
3845 found = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, first_char);
3846 else
3847 {
3848 bit = first_char ^ oc;
3849 if (is_powerof2(bit))
3850 {
3851 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, bit);
3852 found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, first_char | bit);
3853 }
3854 else
3855 {
3856 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, first_char);
3857 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
3858 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);
3859 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
3860 found = JUMP(SLJIT_NOT_ZERO);
3861 }
3862 }
3863
3864 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3865 JUMPTO(SLJIT_JUMP, start);
3866 JUMPHERE(found);
3867 JUMPHERE(quit);
3868
3869 if (firstline)
3870 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3871 }
3872
3873 static SLJIT_INLINE void fast_forward_newline(compiler_common *common, BOOL firstline)
3874 {
3875 DEFINE_COMPILER;
3876 struct sljit_label *loop;
3877 struct sljit_jump *lastchar;
3878 struct sljit_jump *firstchar;
3879 struct sljit_jump *quit;
3880 struct sljit_jump *foundcr = NULL;
3881 struct sljit_jump *notfoundnl;
3882 jump_list *newline = NULL;
3883
3884 if (firstline)
3885 {
3886 SLJIT_ASSERT(common->first_line_end != 0);
3887 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
3888 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end);
3889 }
3890
3891 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3892 {
3893 lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3894 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3895 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
3896 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
3897 firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
3898
3899 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2));
3900 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0);
3901 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_GREATER_EQUAL);
3902 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3903 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
3904 #endif
3905 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3906
3907 loop = LABEL();
3908 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3909 quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3910 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
3911 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
3912 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop);
3913 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop);
3914
3915 JUMPHERE(quit);
3916 JUMPHERE(firstchar);
3917 JUMPHERE(lastchar);
3918
3919 if (firstline)
3920 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3921 return;
3922 }
3923
3924 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3925 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
3926 firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
3927 skip_char_back(common);
3928
3929 loop = LABEL();
3930 common->ff_newline_shortcut = loop;
3931
3932 read_char_range(common, common->nlmin, common->nlmax, TRUE);
3933 lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3934 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
3935 foundcr = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
3936 check_newlinechar(common, common->nltype, &newline, FALSE);
3937 set_jumps(newline, loop);
3938
3939 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
3940 {
3941 quit = JUMP(SLJIT_JUMP);
3942 JUMPHERE(foundcr);
3943 notfoundnl = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3944 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3945 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
3946 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
3947 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3948 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
3949 #endif
3950 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3951 JUMPHERE(notfoundnl);
3952 JUMPHERE(quit);
3953 }
3954 JUMPHERE(lastchar);
3955 JUMPHERE(firstchar);
3956
3957 if (firstline)
3958 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3959 }
3960
3961 static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks);
3962
3963 static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, pcre_uint8 *start_bits, BOOL firstline)
3964 {
3965 DEFINE_COMPILER;
3966 struct sljit_label *start;
3967 struct sljit_jump *quit;
3968 struct sljit_jump *found = NULL;
3969 jump_list *matches = NULL;
3970 #ifndef COMPILE_PCRE8
3971 struct sljit_jump *jump;
3972 #endif
3973
3974 if (firstline)
3975 {
3976 SLJIT_ASSERT(common->first_line_end != 0);
3977 OP1(SLJIT_MOV, RETURN_ADDR, 0, STR_END, 0);
3978 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end);
3979 }
3980
3981 start = LABEL();
3982 quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3983 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3984 #ifdef SUPPORT_UTF
3985 if (common->utf)
3986 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
3987 #endif
3988
3989 if (!check_class_ranges(common, start_bits, (start_bits[31] & 0x80) != 0, TRUE, &matches))
3990 {
3991 #ifndef COMPILE_PCRE8
3992 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 255);
3993 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 255);
3994 JUMPHERE(jump);
3995 #endif
3996 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
3997 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
3998 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)start_bits);
3999 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4000 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4001 found = JUMP(SLJIT_NOT_ZERO);
4002 }
4003
4004 #ifdef SUPPORT_UTF
4005 if (common->utf)
4006 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
4007 #endif
4008 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4009 #ifdef SUPPORT_UTF
4010 #if defined COMPILE_PCRE8
4011 if (common->utf)
4012 {
4013 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start);
4014 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4015 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4016 }
4017 #elif defined COMPILE_PCRE16
4018 if (common->utf)
4019 {
4020 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800, start);
4021 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
4022 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4023 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
4024 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4025 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4026 }
4027 #endif /* COMPILE_PCRE[8|16] */
4028 #endif /* SUPPORT_UTF */
4029 JUMPTO(SLJIT_JUMP, start);
4030 if (found != NULL)
4031 JUMPHERE(found);
4032 if (matches != NULL)
4033 set_jumps(matches, LABEL());
4034 JUMPHERE(quit);
4035
4036 if (firstline)
4037 OP1(SLJIT_MOV, STR_END, 0, RETURN_ADDR, 0);
4038 }
4039
4040 static SLJIT_INLINE struct sljit_jump *search_requested_char(compiler_common *common, pcre_uchar req_char, BOOL caseless, BOOL has_firstchar)
4041 {
4042 DEFINE_COMPILER;
4043 struct sljit_label *loop;
4044 struct sljit_jump *toolong;
4045 struct sljit_jump *alreadyfound;
4046 struct sljit_jump *found;
4047 struct sljit_jump *foundoc = NULL;
4048 struct sljit_jump *notfound;
4049 pcre_uint32 oc, bit;
4050
4051 SLJIT_ASSERT(common->req_char_ptr != 0);
4052 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr);
4053 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, REQ_BYTE_MAX);
4054 toolong = CMP(SLJIT_LESS, TMP1, 0, STR_END, 0);
4055 alreadyfound = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0);
4056
4057 if (has_firstchar)
4058 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4059 else
4060 OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);
4061
4062 loop = LABEL();
4063 notfound = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0);
4064
4065 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0);
4066 oc = req_char;
4067 if (caseless)
4068 {
4069 oc = TABLE_GET(req_char, common->fcc, req_char);
4070 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
4071 if (req_char > 127 && common->utf)
4072 oc = UCD_OTHERCASE(req_char);
4073 #endif
4074 }
4075 if (req_char == oc)
4076 found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
4077 else
4078 {
4079 bit = req_char ^ oc;
4080 if (is_powerof2(bit))
4081 {
4082 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);
4083 found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit);
4084 }
4085 else
4086 {
4087 found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
4088 foundoc = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, oc);
4089 }
4090 }
4091 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
4092 JUMPTO(SLJIT_JUMP, loop);
4093
4094 JUMPHERE(found);
4095 if (foundoc)
4096 JUMPHERE(foundoc);
4097 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr, TMP1, 0);
4098 JUMPHERE(alreadyfound);
4099 JUMPHERE(toolong);
4100 return notfound;
4101 }
4102
4103 static void do_revertframes(compiler_common *common)
4104 {
4105 DEFINE_COMPILER;
4106 struct sljit_jump *jump;
4107 struct sljit_label *mainloop;
4108
4109 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4110 OP1(SLJIT_MOV, TMP1, 0, STACK_TOP, 0);
4111 GET_LOCAL_BASE(TMP3, 0, 0);
4112
4113 /* Drop frames until we reach STACK_TOP. */
4114 mainloop = LABEL();
4115 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), 0);
4116 OP2(SLJIT_SUB | SLJIT_SET_S, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0);
4117 jump = JUMP(SLJIT_SIG_LESS_EQUAL);
4118
4119 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
4120 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw));
4121 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), SLJIT_MEM1(TMP1), 2 * sizeof(sljit_sw));
4122 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
4123 JUMPTO(SLJIT_JUMP, mainloop);
4124
4125 JUMPHERE(jump);
4126 jump = JUMP(SLJIT_SIG_LESS);
4127 /* End of dropping frames. */
4128 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4129
4130 JUMPHERE(jump);
4131 OP1(SLJIT_NEG, TMP2, 0, TMP2, 0);
4132 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
4133 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw));
4134 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_sw));
4135 JUMPTO(SLJIT_JUMP, mainloop);
4136 }
4137
4138 static void check_wordboundary(compiler_common *common)
4139 {
4140 DEFINE_COMPILER;
4141 struct sljit_jump *skipread;
4142 jump_list *skipread_list = NULL;
4143 #if !(defined COMPILE_PCRE8) || defined SUPPORT_UTF
4144 struct sljit_jump *jump;
4145 #endif
4146
4147 SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16);
4148
4149 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
4150 /* Get type of the previous char, and put it to LOCALS1. */
4151 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4152 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4153 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, SLJIT_IMM, 0);
4154 skipread = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
4155 skip_char_back(common);
4156 check_start_used_ptr(common);
4157 read_char(common);
4158
4159 /* Testing char type. */
4160 #ifdef SUPPORT_UCP
4161 if (common->use_ucp)
4162 {
4163 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
4164 jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
4165 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4166 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
4167 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
4168 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
4169 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
4170 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
4171 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
4172 JUMPHERE(jump);
4173 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP2, 0);
4174 }
4175 else
4176 #endif
4177 {
4178 #ifndef COMPILE_PCRE8
4179 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4180 #elif defined SUPPORT_UTF
4181 /* Here LOCALS1 has already been zeroed. */
4182 jump = NULL;
4183 if (common->utf)
4184 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4185 #endif /* COMPILE_PCRE8 */
4186 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);
4187 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */);
4188 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4189 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP1, 0);
4190 #ifndef COMPILE_PCRE8
4191 JUMPHERE(jump);
4192 #elif defined SUPPORT_UTF
4193 if (jump != NULL)
4194 JUMPHERE(jump);
4195 #endif /* COMPILE_PCRE8 */
4196 }
4197 JUMPHERE(skipread);
4198
4199 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
4200 check_str_end(common, &skipread_list);
4201 peek_char(common, READ_CHAR_MAX);
4202
4203 /* Testing char type. This is a code duplication. */
4204 #ifdef SUPPORT_UCP
4205 if (common->use_ucp)
4206 {
4207 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
4208 jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
4209 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4210 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
4211 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
4212 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
4213 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
4214 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
4215 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
4216 JUMPHERE(jump);
4217 }
4218 else
4219 #endif
4220 {
4221 #ifndef COMPILE_PCRE8
4222 /* TMP2 may be destroyed by peek_char. */
4223 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
4224 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4225 #elif defined SUPPORT_UTF
4226 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
4227 jump = NULL;
4228 if (common->utf)
4229 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4230 #endif
4231 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes);
4232 OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */);
4233 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
4234 #ifndef COMPILE_PCRE8
4235 JUMPHERE(jump);
4236 #elif defined SUPPORT_UTF
4237 if (jump != NULL)
4238 JUMPHERE(jump);
4239 #endif /* COMPILE_PCRE8 */
4240 }
4241 set_jumps(skipread_list, LABEL());
4242
4243 OP2(SLJIT_XOR | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
4244 sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
4245 }
4246
4247 static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
4248 {
4249 DEFINE_COMPILER;
4250 int ranges[MAX_RANGE_SIZE];
4251 pcre_uint8 bit, cbit, all;
4252 int i, byte, length = 0;
4253
4254 bit = bits[0] & 0x1;
4255 /* All bits will be zero or one (since bit is zero or one). */
4256 all = -bit;
4257
4258 for (i = 0; i < 256; )
4259 {
4260 byte = i >> 3;
4261 if ((i & 0x7) == 0 && bits[byte] == all)
4262 i += 8;
4263 else
4264 {
4265 cbit = (bits[byte] >> (i & 0x7)) & 0x1;
4266 if (cbit != bit)
4267 {
4268 if (length >= MAX_RANGE_SIZE)
4269 return FALSE;
4270 ranges[length] = i;
4271 length++;
4272 bit = cbit;
4273 all = -cbit;
4274 }
4275 i++;
4276 }
4277 }
4278
4279 if (((bit == 0) && nclass) || ((bit == 1) && !nclass))
4280 {
4281 if (length >= MAX_RANGE_SIZE)
4282 return FALSE;
4283 ranges[length] = 256;
4284 length++;
4285 }
4286
4287 if (length < 0 || length > 4)
4288 return FALSE;
4289
4290 bit = bits[0] & 0x1;
4291 if (invert) bit ^= 0x1;
4292
4293 /* No character is accepted. */
4294 if (length == 0 && bit == 0)
4295 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4296
4297 switch(length)
4298 {
4299 case 0:
4300 /* When bit != 0, all characters are accepted. */
4301 return TRUE;
4302
4303 case 1:
4304 add_jump(compiler, backtracks, CMP(bit == 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
4305 return TRUE;
4306
4307 case 2:
4308 if (ranges[0] + 1 != ranges[1])
4309 {
4310 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
4311 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
4312 }
4313 else
4314 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
4315 return TRUE;
4316
4317 case 3:
4318 if (bit != 0)
4319 {
4320 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
4321 if (ranges[0] + 1 != ranges[1])
4322 {
4323 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
4324 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
4325 }
4326 else
4327 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
4328 return TRUE;
4329 }
4330
4331 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[0]));
4332 if (ranges[1] + 1 != ranges[2])
4333 {
4334 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1]);
4335 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
4336 }
4337 else
4338 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1]));
4339 return TRUE;
4340
4341 case 4:
4342 if ((ranges[1] - ranges[0]) == (ranges[3] - ranges[2])
4343 && (ranges[0] | (ranges[2] - ranges[0])) == ranges[2]
4344 && is_powerof2(ranges[2] - ranges[0]))
4345 {
4346 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[0]);
4347 if (ranges[2] + 1 != ranges[3])
4348 {
4349 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);
4350 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
4351 }
4352 else
4353 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
4354 return TRUE;
4355 }
4356
4357 if (bit != 0)
4358 {
4359 i = 0;
4360 if (ranges[0] + 1 != ranges[1])
4361 {
4362 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
4363 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
4364 i = ranges[0];
4365 }
4366 else
4367 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
4368
4369 if (ranges[2] + 1 != ranges[3])
4370 {
4371 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - i);
4372 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
4373 }
4374 else
4375 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2] - i));
4376 return TRUE;
4377 }
4378
4379 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
4380 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[0]));
4381 if (ranges[1] + 1 != ranges[2])
4382 {
4383 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]);
4384 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
4385 }
4386 else
4387 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
4388 return TRUE;
4389
4390 default:
4391 SLJIT_ASSERT_STOP();
4392 return FALSE;
4393 }
4394 }
4395
4396 static void check_anynewline(compiler_common *common)
4397 {
4398 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
4399 DEFINE_COMPILER;
4400
4401 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4402
4403 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
4404 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
4405 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
4406 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
4407 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4408 #ifdef COMPILE_PCRE8
4409 if (common->utf)
4410 {
4411 #endif
4412 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4413 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
4414 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
4415 #ifdef COMPILE_PCRE8
4416 }
4417 #endif
4418 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
4419 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4420 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4421 }
4422
4423 static void check_hspace(compiler_common *common)
4424 {
4425 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
4426 DEFINE_COMPILER;
4427
4428 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4429
4430 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x09);
4431 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
4432 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
4433 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4434 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xa0);
4435 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4436 #ifdef COMPILE_PCRE8
4437 if (common->utf)
4438 {
4439 #endif
4440 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4441 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x1680);
4442 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4443 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e);
4444 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4445 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000);
4446 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000);
4447 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
4448 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000);
4449 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4450 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000);
4451 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4452 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000);
4453 #ifdef COMPILE_PCRE8
4454 }
4455 #endif
4456 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
4457 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4458
4459 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4460 }
4461
4462 static void check_vspace(compiler_common *common)
4463 {
4464 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
4465 DEFINE_COMPILER;
4466
4467 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4468
4469 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
4470 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
4471 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
4472 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
4473 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4474 #ifdef COMPILE_PCRE8
4475 if (common->utf)
4476 {
4477 #endif
4478 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4479 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
4480 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
4481 #ifdef COMPILE_PCRE8
4482 }
4483 #endif
4484 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
4485 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4486
4487 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4488 }
4489
4490 #define CHAR1 STR_END
4491 #define CHAR2 STACK_TOP
4492
4493 static void do_casefulcmp(compiler_common *common)
4494 {
4495 DEFINE_COMPILER;
4496 struct sljit_jump *jump;
4497 struct sljit_label *label;
4498
4499 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4500 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4501 OP1(SLJIT_MOV, TMP3, 0, CHAR1, 0);
4502 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, CHAR2, 0);
4503 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
4504 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4505
4506 label = LABEL();
4507 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
4508 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4509 jump = CMP(SLJIT_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
4510 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
4511 JUMPTO(SLJIT_NOT_ZERO, label);
4512
4513 JUMPHERE(jump);
4514 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4515 OP1(SLJIT_MOV, CHAR1, 0, TMP3, 0);
4516 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
4517 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4518 }
4519
4520 #define LCC_TABLE STACK_LIMIT
4521
4522 static void do_caselesscmp(compiler_common *common)
4523 {
4524 DEFINE_COMPILER;
4525 struct sljit_jump *jump;
4526 struct sljit_label *label;
4527
4528 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4529 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4530
4531 OP1(SLJIT_MOV, TMP3, 0, LCC_TABLE, 0);
4532 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, CHAR1, 0);
4533 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, CHAR2, 0);
4534 OP1(SLJIT_MOV, LCC_TABLE, 0, SLJIT_IMM, common->lcc);
4535 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
4536 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4537
4538 label = LABEL();
4539 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
4540 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4541 #ifndef COMPILE_PCRE8
4542 jump = CMP(SLJIT_GREATER, CHAR1, 0, SLJIT_IMM, 255);
4543 #endif
4544 OP1(SLJIT_MOV_UB, CHAR1, 0, SLJIT_MEM2(LCC_TABLE, CHAR1), 0);
4545 #ifndef COMPILE_PCRE8
4546 JUMPHERE(jump);
4547 jump = CMP(SLJIT_GREATER, CHAR2, 0, SLJIT_IMM, 255);
4548 #endif
4549 OP1(SLJIT_MOV_UB, CHAR2, 0, SLJIT_MEM2(LCC_TABLE, CHAR2), 0);
4550 #ifndef COMPILE_PCRE8
4551 JUMPHERE(jump);
4552 #endif
4553 jump = CMP(SLJIT_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
4554 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
4555 JUMPTO(SLJIT_NOT_ZERO, label);
4556
4557 JUMPHERE(jump);
4558 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4559 OP1(SLJIT_MOV, LCC_TABLE, 0, TMP3, 0);
4560 OP1(SLJIT_MOV, CHAR1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
4561 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
4562 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4563 }
4564
4565 #undef LCC_TABLE
4566 #undef CHAR1
4567 #undef CHAR2
4568
4569 #if defined SUPPORT_UTF && defined SUPPORT_UCP
4570
4571 static const pcre_uchar * SLJIT_CALL do_utf_caselesscmp(pcre_uchar *src1, jit_arguments *args, pcre_uchar *end1)
4572 {
4573 /* This function would be ineffective to do in JIT level. */
4574 pcre_uint32 c1, c2;
4575 const pcre_uchar *src2 = args->uchar_ptr;
4576 const pcre_uchar *end2 = args->end;
4577 const ucd_record *ur;
4578 const pcre_uint32 *pp;
4579
4580 while (src1 < end1)
4581 {
4582 if (src2 >= end2)
4583 return (pcre_uchar*)1;
4584 GETCHARINC(c1, src1);
4585 GETCHARINC(c2, src2);
4586 ur = GET_UCD(c2);
4587 if (c1 != c2 && c1 != c2 + ur->other_case)
4588 {
4589 pp = PRIV(ucd_caseless_sets) + ur->caseset;
4590 for (;;)
4591 {
4592 if (c1 < *pp) return NULL;
4593 if (c1 == *pp++) break;
4594 }
4595 }
4596 }
4597 return src2;
4598 }
4599
4600 #endif /* SUPPORT_UTF && SUPPORT_UCP */
4601
4602 static pcre_uchar *byte_sequence_compare(compiler_common *common, BOOL caseless, pcre_uchar *cc,
4603 compare_context *context, jump_list **backtracks)
4604 {
4605 DEFINE_COMPILER;
4606 unsigned int othercasebit = 0;
4607 pcre_uchar *othercasechar = NULL;
4608 #ifdef SUPPORT_UTF
4609 int utflength;
4610 #endif
4611
4612 if (caseless && char_has_othercase(common, cc))
4613 {
4614 othercasebit = char_get_othercase_bit(common, cc);
4615 SLJIT_ASSERT(othercasebit);
4616 /* Extracting bit difference info. */
4617 #if defined COMPILE_PCRE8
4618 othercasechar = cc + (othercasebit >> 8);
4619 othercasebit &= 0xff;
4620 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4621 /* Note that this code only handles characters in the BMP. If there
4622 ever are characters outside the BMP whose othercase differs in only one
4623 bit from itself (there currently are none), this code will need to be
4624 revised for COMPILE_PCRE32. */
4625 othercasechar = cc + (othercasebit >> 9);
4626 if ((othercasebit & 0x100) != 0)
4627 othercasebit = (othercasebit & 0xff) << 8;
4628 else
4629 othercasebit &= 0xff;
4630 #endif /* COMPILE_PCRE[8|16|32] */
4631 }
4632
4633 if (context->sourcereg == -1)
4634 {
4635 #if defined COMPILE_PCRE8
4636 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
4637 if (context->length >= 4)
4638 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4639 else if (context->length >= 2)
4640 OP1(SLJIT_MOV_UH, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4641 else
4642 #endif
4643 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4644 #elif defined COMPILE_PCRE16
4645 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
4646 if (context->length >= 4)
4647 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4648 else
4649 #endif
4650 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4651 #elif defined COMPILE_PCRE32
4652 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4653 #endif /* COMPILE_PCRE[8|16|32] */
4654 context->sourcereg = TMP2;
4655 }
4656
4657 #ifdef SUPPORT_UTF
4658 utflength = 1;
4659 if (common->utf && HAS_EXTRALEN(*cc))
4660 utflength += GET_EXTRALEN(*cc);
4661
4662 do
4663 {
4664 #endif
4665
4666 context->length -= IN_UCHARS(1);
4667 #if (defined SLJIT_UNALIGNED && SLJIT_UNALIGNED) && (defined COMPILE_PCRE8 || defined COMPILE_PCRE16)
4668
4669 /* Unaligned read is supported. */
4670 if (othercasebit != 0 && othercasechar == cc)
4671 {
4672 context->c.asuchars[context->ucharptr] = *cc | othercasebit;
4673 context->oc.asuchars[context->ucharptr] = othercasebit;
4674 }
4675 else
4676 {
4677 context->c.asuchars[context->ucharptr] = *cc;
4678 context->oc.asuchars[context->ucharptr] = 0;
4679 }
4680 context->ucharptr++;
4681
4682 #if defined COMPILE_PCRE8
4683 if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1))
4684 #else
4685 if (context->ucharptr >= 2 || context->length == 0)
4686 #endif
4687 {
4688 if (context->length >= 4)
4689 OP1(SLJIT_MOV_SI, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
4690 else if (context->length >= 2)
4691 OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
4692 #if defined COMPILE_PCRE8
4693 else if (context->length >= 1)
4694 OP1(SLJIT_MOV_UB, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
4695 #endif /* COMPILE_PCRE8 */
4696 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
4697
4698 switch(context->ucharptr)
4699 {
4700 case 4 / sizeof(pcre_uchar):
4701 if (context->oc.asint != 0)
4702 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint);
4703 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint));
4704 break;
4705
4706 case 2 / sizeof(pcre_uchar):
4707 if (context->oc.asushort != 0)
4708 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort);
4709 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort));
4710 break;
4711
4712 #ifdef COMPILE_PCRE8
4713 case 1:
4714 if (context->oc.asbyte != 0)
4715 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte);
4716 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte));
4717 break;
4718 #endif
4719
4720 default:
4721 SLJIT_ASSERT_STOP();
4722 break;
4723 }
4724 context->ucharptr = 0;
4725 }
4726
4727 #else
4728
4729 /* Unaligned read is unsupported or in 32 bit mode. */
4730 if (context->length >= 1)
4731 OP1(MOV_UCHAR, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
4732
4733 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
4734
4735 if (othercasebit != 0 && othercasechar == cc)
4736 {
4737 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit);
4738 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit));
4739 }
4740 else
4741 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc));
4742
4743 #endif
4744
4745 cc++;
4746 #ifdef SUPPORT_UTF
4747 utflength--;
4748 }
4749 while (utflength > 0);
4750 #endif
4751
4752 return cc;
4753 }
4754
4755 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
4756
4757 #define SET_TYPE_OFFSET(value) \
4758 if ((value) != typeoffset) \
4759 { \
4760 if ((value) < typeoffset) \
4761 OP2(SLJIT_ADD, typereg, 0, typereg, 0, SLJIT_IMM, typeoffset - (value)); \
4762 else \
4763 OP2(SLJIT_SUB, typereg, 0, typereg, 0, SLJIT_IMM, (value) - typeoffset); \
4764 } \
4765 typeoffset = (value);
4766
4767 #define SET_CHAR_OFFSET(value) \
4768 if ((value) != charoffset) \
4769 { \
4770 if ((value) < charoffset) \
4771 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(charoffset - (value))); \
4772 else \
4773 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)((value) - charoffset)); \
4774 } \
4775 charoffset = (value);
4776
4777 static void compile_xclass_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
4778 {
4779 DEFINE_COMPILER;
4780 jump_list *found = NULL;
4781 jump_list **list = (cc[0] & XCL_NOT) == 0 ? &found : backtracks;
4782 sljit_uw c, charoffset, max = 256, min = READ_CHAR_MAX;
4783 struct sljit_jump *jump = NULL;
4784 pcre_uchar *ccbegin;
4785 int compares, invertcmp, numberofcmps;
4786 #if defined SUPPORT_UTF && (defined COMPILE_PCRE8 || defined COMPILE_PCRE16)
4787 BOOL utf = common->utf;
4788 #endif
4789
4790 #ifdef SUPPORT_UCP
4791 BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE;
4792 BOOL charsaved = FALSE;
4793 int typereg = TMP1, scriptreg = TMP1;
4794 const pcre_uint32 *other_cases;
4795 sljit_uw typeoffset;
4796 #endif
4797
4798 /* Scanning the necessary info. */
4799 cc++;
4800 ccbegin = cc;
4801 compares = 0;
4802 if (cc[-1] & XCL_MAP)
4803 {
4804 min = 0;
4805 cc += 32 / sizeof(pcre_uchar);
4806 }
4807
4808 while (*cc != XCL_END)
4809 {
4810 compares++;
4811 if (*cc == XCL_SINGLE)
4812 {
4813 cc ++;
4814 GETCHARINCTEST(c, cc);
4815 if (c > max) max = c;
4816 if (c < min) min = c;
4817 #ifdef SUPPORT_UCP
4818 needschar = TRUE;
4819 #endif
4820 }
4821 else if (*cc == XCL_RANGE)
4822 {
4823 cc ++;
4824 GETCHARINCTEST(c, cc);
4825 if (c < min) min = c;
4826 GETCHARINCTEST(c, cc);
4827 if (c > max) max = c;
4828 #ifdef SUPPORT_UCP
4829 needschar = TRUE;
4830 #endif
4831 }
4832 #ifdef SUPPORT_UCP
4833 else
4834 {
4835 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
4836 cc++;
4837 if (*cc == PT_CLIST)
4838 {
4839 other_cases = PRIV(ucd_caseless_sets) + cc[1];
4840 while (*other_cases != NOTACHAR)
4841 {
4842 if (*other_cases > max) max = *other_cases;
4843 if (*other_cases < min) min = *other_cases;
4844 other_cases++;
4845 }
4846 }
4847 else
4848 {
4849 max = READ_CHAR_MAX;
4850 min = 0;
4851 }
4852
4853 switch(*cc)
4854 {
4855 case PT_ANY:
4856 break;
4857
4858 case PT_LAMP:
4859 case PT_GC:
4860 case PT_PC:
4861 case PT_ALNUM:
4862 needstype = TRUE;
4863 break;
4864
4865 case PT_SC:
4866 needsscript = TRUE;
4867 break;
4868
4869 case PT_SPACE:
4870 case PT_PXSPACE:
4871 case PT_WORD:
4872 case PT_PXGRAPH:
4873 case PT_PXPRINT:
4874 case PT_PXPUNCT:
4875 needstype = TRUE;
4876 needschar = TRUE;
4877 break;
4878
4879 case PT_CLIST:
4880 case PT_UCNC:
4881 needschar = TRUE;
4882 break;
4883
4884 default:
4885 SLJIT_ASSERT_STOP();
4886 break;
4887 }
4888 cc += 2;
4889 }
4890 #endif
4891 }
4892
4893 /* We are not necessary in utf mode even in 8 bit mode. */
4894 cc = ccbegin;
4895 detect_partial_match(common, backtracks);
4896 read_char_range(common, min, max, (cc[-1] & XCL_NOT) != 0);
4897
4898 if ((cc[-1] & XCL_HASPROP) == 0)
4899 {
4900 if ((cc[-1] & XCL_MAP) != 0)
4901 {
4902 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4903 if (!check_class_ranges(common, (const pcre_uint8 *)cc, (((const pcre_uint8 *)cc)[31] & 0x80) != 0, TRUE, &found))
4904 {
4905 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
4906 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
4907 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
4908 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4909 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4910 add_jump(compiler, &found, JUMP(SLJIT_NOT_ZERO));
4911 }
4912
4913 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4914 JUMPHERE(jump);
4915
4916 cc += 32 / sizeof(pcre_uchar);
4917 }
4918 else
4919 {
4920 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, min);
4921 add_jump(compiler, (cc[-1] & XCL_NOT) == 0 ? backtracks : &found, CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, max - min));
4922 }
4923 }
4924 else if ((cc[-1] & XCL_MAP) != 0)
4925 {
4926 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
4927 #ifdef SUPPORT_UCP
4928 charsaved = TRUE;
4929 #endif
4930 if (!check_class_ranges(common, (const pcre_uint8 *)cc, FALSE, TRUE, list))
4931 {
4932 #ifdef COMPILE_PCRE8
4933 SLJIT_ASSERT(common->utf);
4934 #endif
4935 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4936
4937 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
4938 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
4939 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
4940 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4941 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4942 add_jump(compiler, list, JUMP(SLJIT_NOT_ZERO));
4943
4944 JUMPHERE(jump);
4945 }
4946
4947 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
4948 cc += 32 / sizeof(pcre_uchar);
4949 }
4950
4951 #ifdef SUPPORT_UCP
4952 /* Simple register allocation. TMP1 is preferred if possible. */
4953 if (needstype || needsscript)
4954 {
4955 if (needschar && !charsaved)
4956 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
4957 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4958 if (needschar)
4959 {
4960 if (needstype)
4961 {
4962 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
4963 typereg = RETURN_ADDR;
4964 }
4965
4966 if (needsscript)
4967 scriptreg = TMP3;
4968 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
4969 }
4970 else if (needstype && needsscript)
4971 scriptreg = TMP3;
4972 /* In all other cases only one of them was specified, and that can goes to TMP1. */
4973
4974 if (needsscript)
4975 {
4976 if (scriptreg == TMP1)
4977 {
4978 OP1(SLJIT_MOV, scriptreg, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
4979 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM2(scriptreg, TMP2), 3);
4980 }
4981 else
4982 {
4983 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
4984 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
4985 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM1(TMP2), 0);
4986 }
4987 }
4988 }
4989 #endif
4990
4991 /* Generating code. */
4992 charoffset = 0;
4993 numberofcmps = 0;
4994 #ifdef SUPPORT_UCP
4995 typeoffset = 0;
4996 #endif
4997
4998 while (*cc != XCL_END)
4999 {
5000 compares--;
5001 invertcmp = (compares == 0 && list != backtracks);
5002 jump = NULL;
5003
5004 if (*cc == XCL_SINGLE)
5005 {
5006 cc ++;
5007 GETCHARINCTEST(c, cc);
5008
5009 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
5010 {
5011 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5012 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_EQUAL);
5013 numberofcmps++;
5014 }
5015 else if (numberofcmps > 0)
5016 {
5017 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5018 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5019 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5020 numberofcmps = 0;
5021 }
5022 else
5023 {
5024 jump = CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5025 numberofcmps = 0;
5026 }
5027 }
5028 else if (*cc == XCL_RANGE)
5029 {
5030 cc ++;
5031 GETCHARINCTEST(c, cc);
5032 SET_CHAR_OFFSET(c);
5033 GETCHARINCTEST(c, cc);
5034
5035 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
5036 {
5037 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5038 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_LESS_EQUAL);
5039 numberofcmps++;
5040 }
5041 else if (numberofcmps > 0)
5042 {
5043 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5044 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
5045 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5046 numberofcmps = 0;
5047 }
5048 else
5049 {
5050 jump = CMP(SLJIT_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5051 numberofcmps = 0;
5052 }
5053 }
5054 #ifdef SUPPORT_UCP
5055 else
5056 {
5057 if (*cc == XCL_NOTPROP)
5058 invertcmp ^= 0x1;
5059 cc++;
5060 switch(*cc)
5061 {
5062 case PT_ANY:
5063 if (list != backtracks)
5064 {
5065 if ((cc[-1] == XCL_NOTPROP && compares > 0) || (cc[-1] == XCL_PROP && compares == 0))
5066 continue;
5067 }
5068 else if (cc[-1] == XCL_NOTPROP)
5069 continue;
5070 jump = JUMP(SLJIT_JUMP);
5071 break;
5072
5073 case PT_LAMP:
5074 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - typeoffset);
5075 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5076 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ll - typeoffset);
5077 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5078 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lt - typeoffset);
5079 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5080 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5081 break;
5082
5083 case PT_GC:
5084 c = PRIV(ucp_typerange)[(int)cc[1] * 2];
5085 SET_TYPE_OFFSET(c);
5086 jump = CMP(SLJIT_LESS_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, PRIV(ucp_typerange)[(int)cc[1] * 2 + 1] - c);
5087 break;
5088
5089 case PT_PC:
5090 jump = CMP(SLJIT_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, (int)cc[1] - typeoffset);
5091 break;
5092
5093 case PT_SC:
5094 jump = CMP(SLJIT_EQUAL ^ invertcmp, scriptreg, 0, SLJIT_IMM, (int)cc[1]);
5095 break;
5096
5097 case PT_SPACE:
5098 case PT_PXSPACE:
5099 SET_CHAR_OFFSET(9);
5100 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd - 0x9);
5101 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
5102
5103 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x9);
5104 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5105
5106 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x9);
5107 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5108
5109 SET_TYPE_OFFSET(ucp_Zl);
5110 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl);
5111 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
5112 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5113 break;
5114
5115 case PT_WORD:
5116 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_UNDERSCORE - charoffset));
5117 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5118 /* Fall through. */
5119
5120 case PT_ALNUM:
5121 SET_TYPE_OFFSET(ucp_Ll);
5122 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
5123 OP_FLAGS((*cc == PT_ALNUM) ? SLJIT_MOV : SLJIT_OR, TMP2, 0, (*cc == PT_ALNUM) ? SLJIT_UNUSED : TMP2, 0, SLJIT_LESS_EQUAL);
5124 SET_TYPE_OFFSET(ucp_Nd);
5125 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_No - ucp_Nd);
5126 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
5127 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5128 break;
5129
5130 case PT_CLIST:
5131 other_cases = PRIV(ucd_caseless_sets) + cc[1];
5132
5133 /* At least three characters are required.
5134 Otherwise this case would be handled by the normal code path. */
5135 SLJIT_ASSERT(other_cases[0] != NOTACHAR && other_cases[1] != NOTACHAR && other_cases[2] != NOTACHAR);
5136 SLJIT_ASSERT(other_cases[0] < other_cases[1] && other_cases[1] < other_cases[2]);
5137
5138 /* Optimizing character pairs, if their difference is power of 2. */
5139 if (is_powerof2(other_cases[1] ^ other_cases[0]))
5140 {
5141 if (charoffset == 0)
5142 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
5143 else
5144 {
5145 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
5146 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
5147 }
5148 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[1]);
5149 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5150 other_cases += 2;
5151 }
5152 else if (is_powerof2(other_cases[2] ^ other_cases[1]))
5153 {
5154 if (charoffset == 0)
5155 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[2] ^ other_cases[1]);
5156 else
5157 {
5158 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
5159 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
5160 }
5161 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[2]);
5162 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5163
5164 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(other_cases[0] - charoffset));
5165 OP_FLAGS(SLJIT_OR | ((other_cases[3] == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5166
5167 other_cases += 3;
5168 }
5169 else
5170 {
5171 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
5172 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5173 }
5174
5175 while (*other_cases != NOTACHAR)
5176 {
5177 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
5178 OP_FLAGS(SLJIT_OR | ((*other_cases == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5179 }
5180 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5181 break;
5182
5183 case PT_UCNC:
5184 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_DOLLAR_SIGN - charoffset));
5185 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5186 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_COMMERCIAL_AT - charoffset));
5187 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5188 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_GRAVE_ACCENT - charoffset));
5189 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5190
5191 SET_CHAR_OFFSET(0xa0);
5192 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(0xd7ff - charoffset));
5193 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
5194 SET_CHAR_OFFSET(0);
5195 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xe000 - 0);
5196 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_GREATER_EQUAL);
5197 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5198 break;
5199
5200 case PT_PXGRAPH:
5201 /* C and Z groups are the farthest two groups. */
5202 SET_TYPE_OFFSET(ucp_Ll);
5203 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
5204 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_GREATER);
5205
5206 jump = CMP(SLJIT_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
5207
5208 /* In case of ucp_Cf, we overwrite the result. */
5209 SET_CHAR_OFFSET(0x2066);
5210 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
5211 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
5212
5213 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
5214 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5215
5216 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x2066);
5217 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5218
5219 JUMPHERE(jump);
5220 jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
5221 break;
5222
5223 case PT_PXPRINT:
5224 /* C and Z groups are the farthest two groups. */
5225 SET_TYPE_OFFSET(ucp_Ll);
5226 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
5227 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_GREATER);
5228
5229 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Ll);
5230 OP_FLAGS(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_NOT_EQUAL);
5231
5232 jump = CMP(SLJIT_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
5233
5234 /* In case of ucp_Cf, we overwrite the result. */
5235 SET_CHAR_OFFSET(0x2066);
5236 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
5237 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
5238
5239 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
5240 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5241
5242 JUMPHERE(jump);
5243 jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
5244 break;
5245
5246 case PT_PXPUNCT:
5247 SET_TYPE_OFFSET(ucp_Sc);
5248 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Sc);
5249 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
5250
5251 SET_CHAR_OFFSET(0);
5252 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xff);
5253 OP_FLAGS(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
5254
5255 SET_TYPE_OFFSET(ucp_Pc);
5256 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ps - ucp_Pc);
5257 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
5258 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5259 break;
5260 }
5261 cc += 2;
5262 }
5263 #endif
5264
5265 if (jump != NULL)
5266 add_jump(compiler, compares > 0 ? list : backtracks, jump);
5267 }
5268
5269 if (found != NULL)
5270 set_jumps(found, LABEL());
5271 }
5272
5273 #undef SET_TYPE_OFFSET
5274 #undef SET_CHAR_OFFSET
5275
5276 #endif
5277
5278 static pcre_uchar *compile_char1_matchingpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **backtracks)
5279 {
5280 DEFINE_COMPILER;
5281 int length;
5282 unsigned int c, oc, bit;
5283 compare_context context;
5284 struct sljit_jump *jump[4];
5285 jump_list *end_list;
5286 #ifdef SUPPORT_UTF
5287 struct sljit_label *label;
5288 #ifdef SUPPORT_UCP
5289 pcre_uchar propdata[5];
5290 #endif
5291 #endif /* SUPPORT_UTF */
5292
5293 switch(type)
5294 {
5295 case OP_SOD:
5296 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5297 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
5298 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
5299 return cc;
5300
5301 case OP_SOM:
5302 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5303 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
5304 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
5305 return cc;
5306
5307 case OP_NOT_WORD_BOUNDARY:
5308 case OP_WORD_BOUNDARY:
5309 add_jump(compiler, &common->wordboundary, JUMP(SLJIT_FAST_CALL));
5310 add_jump(compiler, backtracks, JUMP(type == OP_NOT_WORD_BOUNDARY ? SLJIT_NOT_ZERO : SLJIT_ZERO));
5311 return cc;
5312
5313 case OP_NOT_DIGIT:
5314 case OP_DIGIT:
5315 /* Digits are usually 0-9, so it is worth to optimize them. */
5316 detect_partial_match(common, backtracks);
5317 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5318 if (common->utf && is_char7_bitset((const pcre_uint8*)common->ctypes - cbit_length + cbit_digit, FALSE))
5319 read_char7_type(common, type == OP_NOT_DIGIT);
5320 else
5321 #endif
5322 read_char8_type(common, type == OP_NOT_DIGIT);
5323 /* Flip the starting bit in the negative case. */
5324 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit);
5325 add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_ZERO : SLJIT_NOT_ZERO));
5326 return cc;
5327
5328 case OP_NOT_WHITESPACE:
5329 case OP_WHITESPACE:
5330 detect_partial_match(common, backtracks);
5331 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5332 if (common->utf && is_char7_bitset((const pcre_uint8*)common->ctypes - cbit_length + cbit_space, FALSE))
5333 read_char7_type(common, type == OP_NOT_WHITESPACE);
5334 else
5335 #endif
5336 read_char8_type(common, type == OP_NOT_WHITESPACE);
5337 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space);
5338 add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_ZERO : SLJIT_NOT_ZERO));
5339 return cc;
5340
5341 case OP_NOT_WORDCHAR:
5342 case OP_WORDCHAR:
5343 detect_partial_match(common, backtracks);
5344 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5345 if (common->utf && is_char7_bitset((const pcre_uint8*)common->ctypes - cbit_length + cbit_word, FALSE))
5346 read_char7_type(common, type == OP_NOT_WORDCHAR);
5347 else
5348 #endif
5349 read_char8_type(common, type == OP_NOT_WORDCHAR);
5350 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word);
5351 add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_ZERO : SLJIT_NOT_ZERO));
5352 return cc;
5353
5354 case OP_ANY:
5355 detect_partial_match(common, backtracks);
5356 read_char_range(common, common->nlmin, common->nlmax, TRUE);
5357 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
5358 {
5359 jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
5360 end_list = NULL;
5361 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
5362 add_jump(compiler, &end_list, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
5363 else
5364 check_str_end(common, &end_list);
5365
5366 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5367 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff));
5368 set_jumps(end_list, LABEL());
5369 JUMPHERE(jump[0]);
5370 }
5371 else
5372 check_newlinechar(common, common->nltype, backtracks, TRUE);
5373 return cc;
5374
5375 case OP_ALLANY:
5376 detect_partial_match(common, backtracks);
5377 #ifdef SUPPORT_UTF
5378 if (common->utf)
5379 {
5380 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5381 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5382 #if defined COMPILE_PCRE8 || defined COMPILE_PCRE16
5383 #if defined COMPILE_PCRE8
5384 jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
5385 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
5386 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5387 #elif defined COMPILE_PCRE16
5388 jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
5389 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
5390 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
5391 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5392 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
5393 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5394 #endif
5395 JUMPHERE(jump[0]);
5396 #endif /* COMPILE_PCRE[8|16] */
5397 return cc;
5398 }
5399 #endif
5400 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5401 return cc;
5402
5403 case OP_ANYBYTE:
5404 detect_partial_match(common, backtracks);
5405 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5406 return cc;
5407
5408 #ifdef SUPPORT_UTF
5409 #ifdef SUPPORT_UCP
5410 case OP_NOTPROP:
5411 case OP_PROP:
5412 propdata[0] = XCL_HASPROP;
5413 propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP;
5414 propdata[2] = cc[0];
5415 propdata[3] = cc[1];
5416 propdata[4] = XCL_END;
5417 compile_xclass_matchingpath(common, propdata, backtracks);
5418 return cc + 2;
5419 #endif
5420 #endif
5421
5422 case OP_ANYNL:
5423 detect_partial_match(common, backtracks);
5424 read_char_range(common, common->bsr_nlmin, common->bsr_nlmax, FALSE);
5425 jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
5426 /* We don't need to handle soft partial matching case. */
5427 end_list = NULL;
5428 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
5429 add_jump(compiler, &end_list, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
5430 else
5431 check_str_end(common, &end_list);
5432 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5433 jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
5434 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5435 jump[2] = JUMP(SLJIT_JUMP);
5436 JUMPHERE(jump[0]);
5437 check_newlinechar(common, common->bsr_nltype, backtracks, FALSE);
5438 set_jumps(end_list, LABEL());
5439 JUMPHERE(jump[1]);
5440 JUMPHERE(jump[2]);
5441 return cc;
5442
5443 case OP_NOT_HSPACE:
5444 case OP_HSPACE:
5445 detect_partial_match(common, backtracks);
5446 read_char_range(common, 0x9, 0x3000, type == OP_NOT_HSPACE);
5447 add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));
5448 add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO));
5449 return cc;
5450
5451 case OP_NOT_VSPACE:
5452 case OP_VSPACE:
5453 detect_partial_match(common, backtracks);
5454 read_char_range(common, 0xa, 0x2029, type == OP_NOT_VSPACE);
5455 add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));
5456 add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO));
5457 return cc;
5458
5459 #ifdef SUPPORT_UCP
5460 case OP_EXTUNI:
5461 detect_partial_match(common, backtracks);
5462 read_char(common);
5463 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
5464 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
5465 /* Optimize register allocation: use a real register. */
5466 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STACK_TOP, 0);
5467 OP1(SLJIT_MOV_UB, STACK_TOP, 0, SLJIT_MEM2(TMP1, TMP2), 3);
5468
5469 label = LABEL();
5470 jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5471 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
5472 read_char(common);
5473 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
5474 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
5475 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM2(TMP1, TMP2), 3);
5476
5477 OP2(SLJIT_SHL, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2);
5478 OP1(SLJIT_MOV_UI, TMP1, 0, SLJIT_MEM1(STACK_TOP), (sljit_sw)PRIV(ucp_gbtable));
5479 OP1(SLJIT_MOV, STACK_TOP, 0, TMP2, 0);
5480 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
5481 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
5482 JUMPTO(SLJIT_NOT_ZERO, label);
5483
5484 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
5485 JUMPHERE(jump[0]);
5486 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
5487
5488 if (common->mode == JIT_PARTIAL_HARD_COMPILE)
5489 {
5490 jump[0] = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
5491 /* Since we successfully read a char above, partial matching must occure. */
5492 check_partial(common, TRUE);
5493 JUMPHERE(jump[0]);
5494 }
5495 return cc;
5496 #endif
5497
5498 case OP_EODN:
5499 /* Requires rather complex checks. */
5500 jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5501 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
5502 {
5503 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
5504 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5505 if (common->mode == JIT_COMPILE)
5506 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0));
5507 else
5508 {
5509 jump[1] = CMP(SLJIT_EQUAL, TMP2, 0, STR_END, 0);
5510 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
5511 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS);
5512 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
5513 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_NOT_EQUAL);
5514 add_jump(compiler, backtracks, JUMP(SLJIT_NOT_EQUAL));
5515 check_partial(common, TRUE);
5516 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5517 JUMPHERE(jump[1]);
5518 }
5519 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
5520 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
5521 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
5522 }
5523 else if (common->nltype == NLTYPE_FIXED)
5524 {
5525 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5526 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5527 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0));
5528 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
5529 }
5530 else
5531 {
5532 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5533 jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
5534 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
5535 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
5536 jump[2] = JUMP(SLJIT_GREATER);
5537 add_jump(compiler, backtracks, JUMP(SLJIT_LESS));
5538 /* Equal. */
5539 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
5540 jump[3] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
5541 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5542
5543 JUMPHERE(jump[1]);
5544 if (common->nltype == NLTYPE_ANYCRLF)
5545 {
5546 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5547 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, STR_END, 0));
5548 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
5549 }
5550 else
5551 {
5552 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, STR_PTR, 0);
5553 read_char_range(common, common->nlmin, common->nlmax, TRUE);
5554 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
5555 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
5556 add_jump(compiler, backtracks, JUMP(SLJIT_ZERO));
5557 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
5558 }
5559 JUMPHERE(jump[2]);
5560 JUMPHERE(jump[3]);
5561 }
5562 JUMPHERE(jump[0]);
5563 check_partial(common, FALSE);
5564 return cc;
5565
5566 case OP_EOD:
5567 add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
5568 check_partial(common, FALSE);
5569 return cc;
5570
5571 case OP_CIRC:
5572 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
5573 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
5574 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0));
5575 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
5576 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5577 return cc;
5578
5579 case OP_CIRCM:
5580 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
5581 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
5582 jump[1] = CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0);
5583 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
5584 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5585 jump[0] = JUMP(SLJIT_JUMP);
5586 JUMPHERE(jump[1]);
5587
5588 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
5589 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
5590 {
5591 OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
5592 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, TMP1, 0));
5593 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
5594 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
5595 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
5596 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
5597 }
5598 else
5599 {
5600 skip_char_back(common);
5601 read_char_range(common, common->nlmin, common->nlmax, TRUE);
5602 check_newlinechar(common, common->nltype, backtracks, FALSE);
5603 }
5604 JUMPHERE(jump[0]);
5605 return cc;
5606
5607 case OP_DOLL:
5608 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
5609 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
5610 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5611
5612 if (!common->endonly)
5613 compile_char1_matchingpath(common, OP_EODN, cc, backtracks);
5614 else
5615 {
5616 add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
5617 check_partial(common, FALSE);
5618 }
5619 return cc;
5620
5621 case OP_DOLLM:
5622 jump[1] = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
5623 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
5624 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
5625 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5626 check_partial(common, FALSE);
5627 jump[0] = JUMP(SLJIT_JUMP);
5628 JUMPHERE(jump[1]);
5629
5630 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
5631 {
5632 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
5633 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5634 if (common->mode == JIT_COMPILE)
5635 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, TMP2, 0, STR_END, 0));
5636 else
5637 {
5638 jump[1] = CMP(SLJIT_LESS_EQUAL, TMP2, 0, STR_END, 0);
5639 /* STR_PTR = STR_END - IN_UCHARS(1) */
5640 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
5641 check_partial(common, TRUE);
5642 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5643 JUMPHERE(jump[1]);
5644 }
5645
5646 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
5647 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
5648 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
5649 }
5650 else
5651 {
5652 peek_char(common, common->nlmax);
5653 check_newlinechar(common, common->nltype, backtracks, FALSE);
5654 }
5655 JUMPHERE(jump[0]);
5656 return cc;
5657
5658 case OP_CHAR:
5659 case OP_CHARI:
5660 length = 1;
5661 #ifdef SUPPORT_UTF
5662 if (common->utf && HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc);
5663 #endif
5664 if (common->mode == JIT_COMPILE && (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0))
5665 {
5666 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
5667 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
5668
5669 context.length = IN_UCHARS(length);
5670 context.sourcereg = -1;
5671 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
5672 context.ucharptr = 0;
5673 #endif
5674 return byte_sequence_compare(common, type == OP_CHARI, cc, &context, backtracks);
5675 }
5676
5677 detect_partial_match(common, backtracks);
5678 #ifdef SUPPORT_UTF
5679 if (common->utf)
5680 {
5681 GETCHAR(c, cc);
5682 }
5683 else
5684 #endif
5685 c = *cc;
5686
5687 if (type == OP_CHAR || !char_has_othercase(common, cc))
5688 {
5689 read_char_range(common, c, c, FALSE);
5690 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c));
5691 return cc + length;
5692 }
5693 oc = char_othercase(common, c);
5694 read_char_range(common, c < oc ? c : oc, c > oc ? c : oc, FALSE);
5695 bit = c ^ oc;
5696 if (is_powerof2(bit))
5697 {
5698 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
5699 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
5700 return cc + length;
5701 }
5702 jump[0] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c);
5703 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, oc));
5704 JUMPHERE(jump[0]);
5705 return cc + length;
5706
5707 case OP_NOT:
5708 case OP_NOTI:
5709 detect_partial_match(common, backtracks);
5710 length = 1;
5711 #ifdef SUPPORT_UTF
5712 if (common->utf)
5713 {
5714 #ifdef COMPILE_PCRE8
5715 c = *cc;
5716 if (c < 128)
5717 {
5718 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5719 if (type == OP_NOT || !char_has_othercase(common, cc))
5720 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
5721 else
5722 {
5723 /* Since UTF8 code page is fixed, we know that c is in [a-z] or [A-Z] range. */
5724 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x20);
5725 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20));
5726 }
5727 /* Skip the variable-length character. */
5728 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5729 jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
5730 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
5731 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5732 JUMPHERE(jump[0]);
5733 return cc + 1;
5734 }
5735 else
5736 #endif /* COMPILE_PCRE8 */
5737 {
5738 GETCHARLEN(c, cc, length);
5739 }
5740 }
5741 else
5742 #endif /* SUPPORT_UTF */
5743 c = *cc;
5744
5745 if (type == OP_NOT || !char_has_othercase(common, cc))
5746 {
5747 read_char_range(common, c, c, TRUE);
5748 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
5749 }
5750 else
5751 {
5752 oc = char_othercase(common, c);
5753 read_char_range(common, c < oc ? c : oc, c > oc ? c : oc, TRUE);
5754 bit = c ^ oc;
5755 if (is_powerof2(bit))
5756 {
5757 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
5758 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
5759 }
5760 else
5761 {
5762 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
5763 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, oc));
5764 }
5765 }
5766 return cc + length;
5767
5768 case OP_CLASS:
5769 case OP_NCLASS:
5770 detect_partial_match(common, backtracks);
5771
5772 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5773 bit = (common->utf && is_char7_bitset((const pcre_uint8 *)cc, type == OP_NCLASS)) ? 127 : 255;
5774 read_char_range(common, 0, bit, type == OP_NCLASS);
5775 #else
5776 read_char_range(common, 0, 255, type == OP_NCLASS);
5777 #endif
5778
5779 if (check_class_ranges(common, (const pcre_uint8 *)cc, type == OP_NCLASS, FALSE, backtracks))
5780 return cc + 32 / sizeof(pcre_uchar);
5781
5782 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5783 jump[0] = NULL;
5784 if (common->utf)
5785 {
5786 jump[0] = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, bit);
5787 if (type == OP_CLASS)
5788 {
5789 add_jump(compiler, backtracks, jump[0]);
5790 jump[0] = NULL;
5791 }
5792 }
5793 #elif !defined COMPILE_PCRE8
5794 jump[0] = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
5795 if (type == OP_CLASS)
5796 {
5797 add_jump(compiler, backtracks, jump[0]);
5798 jump[0] = NULL;
5799 }
5800 #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
5801
5802 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
5803 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
5804 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
5805 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
5806 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
5807 add_jump(compiler, backtracks, JUMP(SLJIT_ZERO));
5808
5809 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
5810 if (jump[0] != NULL)
5811 JUMPHERE(jump[0]);
5812 #endif
5813
5814 return cc + 32 / sizeof(pcre_uchar);
5815
5816 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5817 case OP_XCLASS:
5818 compile_xclass_matchingpath(common, cc + LINK_SIZE, backtracks);
5819 return cc + GET(cc, 0) - 1;
5820 #endif
5821
5822 case OP_REVERSE:
5823 length = GET(cc, 0);
5824 if (length == 0)
5825 return cc + LINK_SIZE;
5826 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5827 #ifdef SUPPORT_UTF
5828 if (common->utf)
5829 {
5830 OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
5831 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, length);
5832 label = LABEL();
5833 add_jump(compiler, backtracks, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP3, 0));
5834 skip_char_back(common);
5835 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
5836 JUMPTO(SLJIT_NOT_ZERO, label);
5837 }
5838 else
5839 #endif
5840 {
5841 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
5842 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
5843 add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, TMP1, 0));
5844 }
5845 check_start_used_ptr(common);
5846 return cc + LINK_SIZE;
5847 }
5848 SLJIT_ASSERT_STOP();
5849 return cc;
5850 }
5851
5852 static SLJIT_INLINE pcre_uchar *compile_charn_matchingpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, jump_list **backtracks)
5853 {
5854 /* This function consumes at least one input character. */
5855 /* To decrease the number of length checks, we try to concatenate the fixed length character sequences. */
5856 DEFINE_COMPILER;
5857 pcre_uchar *ccbegin = cc;
5858 compare_context context;
5859 int size;
5860
5861 context.length = 0;
5862 do
5863 {
5864 if (cc >= ccend)
5865 break;
5866
5867 if (*cc == OP_CHAR)
5868 {
5869 size = 1;
5870 #ifdef SUPPORT_UTF
5871 if (common->utf && HAS_EXTRALEN(cc[1]))
5872 size += GET_EXTRALEN(cc[1]);
5873 #endif
5874 }
5875 else if (*cc == OP_CHARI)
5876 {
5877 size = 1;
5878 #ifdef SUPPORT_UTF
5879 if (common->utf)
5880 {
5881 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
5882 size = 0;
5883 else if (HAS_EXTRALEN(cc[1]))
5884 size += GET_EXTRALEN(cc[1]);
5885 }
5886 else
5887 #endif
5888 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
5889 size = 0;
5890 }
5891 else
5892 size = 0;
5893
5894 cc += 1 + size;
5895 context.length += IN_UCHARS(size);
5896 }
5897 while (size > 0 && context.length <= 128);
5898
5899 cc = ccbegin;
5900 if (context.length > 0)
5901 {
5902 /* We have a fixed-length byte sequence. */
5903 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, context.length);
5904 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
5905
5906 context.sourcereg = -1;
5907 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
5908 context.ucharptr = 0;
5909 #endif
5910 do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, backtracks); while (context.length > 0);
5911 return cc;
5912 }
5913
5914 /* A non-fixed length character will be checked if length == 0. */
5915 return compile_char1_matchingpath(common, *cc, cc + 1, backtracks);
5916 }
5917
5918 /* Forward definitions. */
5919 static void compile_matchingpath(compiler_common *, pcre_uchar *, pcre_uchar *, backtrack_common *);
5920 static void compile_backtrackingpath(compiler_common *, struct backtrack_common *);
5921
5922 #define PUSH_BACKTRACK(size, ccstart, error) \
5923 do \
5924 { \
5925 backtrack = sljit_alloc_memory(compiler, (size)); \
5926 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
5927 return error; \
5928 memset(backtrack, 0, size); \
5929 backtrack->prev = parent->top; \
5930 backtrack->cc = (ccstart); \
5931 parent->top = backtrack; \
5932 } \
5933 while (0)
5934
5935 #define PUSH_BACKTRACK_NOVALUE(size, ccstart) \
5936 do \
5937 { \
5938 backtrack = sljit_alloc_memory(compiler, (size)); \
5939 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
5940 return; \
5941 memset(backtrack, 0, size); \
5942 backtrack->prev = parent->top; \
5943 backtrack->cc = (ccstart); \
5944 parent->top = backtrack; \
5945 } \
5946 while (0)
5947
5948 #define BACKTRACK_AS(type) ((type *)backtrack)
5949
5950 static void compile_dnref_search(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
5951 {
5952 /* The OVECTOR offset goes to TMP2. */
5953 DEFINE_COMPILER;
5954 int count = GET2(cc, 1 + IMM2_SIZE);
5955 pcre_uchar *slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
5956 unsigned int offset;
5957 jump_list *found = NULL;
5958
5959 SLJIT_ASSERT(*cc == OP_DNREF || *cc == OP_DNREFI);
5960
5961 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
5962
5963 count--;
5964 while (count-- > 0)
5965 {
5966 offset = GET2(slot, 0) << 1;
5967 GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
5968 add_jump(compiler, &found, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0));
5969 slot += common->name_entry_size;
5970 }
5971
5972 offset = GET2(slot, 0) << 1;
5973 GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
5974 if (backtracks != NULL && !common->jscript_compat)
5975 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0));
5976
5977 set_jumps(found, LABEL());
5978 }
5979
5980 static void compile_ref_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks, BOOL withchecks, BOOL emptyfail)
5981 {
5982 DEFINE_COMPILER;
5983 BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
5984 int offset = 0;
5985 struct sljit_jump *jump = NULL;
5986 struct sljit_jump *partial;
5987 struct sljit_jump *nopartial;
5988
5989 if (ref)
5990 {
5991 offset = GET2(cc, 1) << 1;
5992 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
5993 /* OVECTOR(1) contains the "string begin - 1" constant. */
5994 if (withchecks && !common->jscript_compat)
5995 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
5996 }
5997 else
5998 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
5999
6000 #if defined SUPPORT_UTF && defined SUPPORT_UCP
6001 if (common->utf && *cc == OP_REFI)
6002 {
6003 SLJIT_ASSERT(TMP1 == SLJIT_R0 && STACK_TOP == SLJIT_R1 && TMP2 == SLJIT_R2);
6004 if (ref)
6005 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
6006 else
6007 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
6008
6009 if (withchecks)
6010 jump = CMP(SLJIT_EQUAL, TMP1, 0, TMP2, 0);
6011
6012 /* Needed to save important temporary registers. */
6013 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STACK_TOP, 0);
6014 OP1(SLJIT_MOV, SLJIT_R1, 0, ARGUMENTS, 0);
6015 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, uchar_ptr), STR_PTR, 0);
6016 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_utf_caselesscmp));
6017 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6018 if (common->mode == JIT_COMPILE)
6019 add_jump(compiler, backtracks, CMP(SLJIT_LESS_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1));
6020 else
6021 {
6022 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
6023 nopartial = CMP(SLJIT_NOT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
6024 check_partial(common, FALSE);
6025 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
6026 JUMPHERE(nopartial);
6027 }
6028 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
6029 }
6030 else
6031 #endif /* SUPPORT_UTF && SUPPORT_UCP */
6032 {
6033 if (ref)
6034 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP1, 0);
6035 else
6036 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
6037
6038 if (withchecks)
6039 jump = JUMP(SLJIT_ZERO);
6040
6041 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
6042 partial = CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0);
6043 if (common->mode == JIT_COMPILE)
6044 add_jump(compiler, backtracks, partial);
6045
6046 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
6047 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
6048
6049 if (common->mode != JIT_COMPILE)
6050 {
6051 nopartial = JUMP(SLJIT_JUMP);
6052 JUMPHERE(partial);
6053 /* TMP2 -= STR_END - STR_PTR */
6054 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, STR_PTR, 0);
6055 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, STR_END, 0);
6056 partial = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0);
6057 OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
6058 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
6059 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
6060 JUMPHERE(partial);
6061 check_partial(common, FALSE);
6062 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
6063 JUMPHERE(nopartial);
6064 }
6065 }
6066
6067 if (jump != NULL)
6068 {
6069 if (emptyfail)
6070 add_jump(compiler, backtracks, jump);
6071 else
6072 JUMPHERE(jump);
6073 }
6074 }
6075
6076 static SLJIT_INLINE pcre_uchar *compile_ref_iterator_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6077 {
6078 DEFINE_COMPILER;
6079 BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
6080 backtrack_common *backtrack;
6081 pcre_uchar type;
6082 int offset = 0;
6083 struct sljit_label *label;
6084 struct sljit_jump *zerolength;
6085 struct sljit_jump *jump = NULL;
6086 pcre_uchar *ccbegin = cc;
6087 int min = 0, max = 0;
6088 BOOL minimize;
6089
6090 PUSH_BACKTRACK(sizeof(iterator_backtrack), cc, NULL);
6091
6092 if (ref)
6093 offset = GET2(cc, 1) << 1;
6094 else
6095 cc += IMM2_SIZE;
6096 type = cc[1 + IMM2_SIZE];
6097
6098 SLJIT_COMPILE_ASSERT((OP_CRSTAR & 0x1) == 0, crstar_opcode_must_be_even);
6099 minimize = (type & 0x1) != 0;
6100 switch(type)
6101 {
6102 case OP_CRSTAR:
6103 case OP_CRMINSTAR:
6104 min = 0;
6105 max = 0;
6106 cc += 1 + IMM2_SIZE + 1;
6107 break;
6108 case OP_CRPLUS:
6109 case OP_CRMINPLUS:
6110 min = 1;
6111 max = 0;
6112 cc += 1 + IMM2_SIZE + 1;
6113 break;
6114 case OP_CRQUERY:
6115 case OP_CRMINQUERY:
6116 min = 0;
6117 max = 1;
6118 cc += 1 + IMM2_SIZE + 1;
6119 break;
6120 case OP_CRRANGE:
6121 case OP_CRMINRANGE:
6122 min = GET2(cc, 1 + IMM2_SIZE + 1);
6123 max = GET2(cc, 1 + IMM2_SIZE + 1 + IMM2_SIZE);
6124 cc += 1 + IMM2_SIZE + 1 + 2 * IMM2_SIZE;
6125 break;
6126 default:
6127 SLJIT_ASSERT_STOP();
6128 break;
6129 }
6130
6131 if (!minimize)
6132 {
6133 if (min == 0)
6134 {
6135 allocate_stack(common, 2);
6136 if (ref)
6137 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
6138 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6139 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
6140 /* Temporary release of STR_PTR. */
6141 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
6142 /* Handles both invalid and empty cases. Since the minimum repeat,
6143 is zero the invalid case is basically the same as an empty case. */
6144 if (ref)
6145 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
6146 else
6147 {
6148 compile_dnref_search(common, ccbegin, NULL);
6149 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
6150 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, TMP2, 0);
6151 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
6152 }
6153 /* Restore if not zero length. */
6154 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
6155 }
6156 else
6157 {
6158 allocate_stack(common, 1);
6159 if (ref)
6160 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
6161 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6162 if (ref)
6163 {
6164 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
6165 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
6166 }
6167 else
6168 {
6169 compile_dnref_search(common, ccbegin, &backtrack->topbacktracks);
6170 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
6171 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, TMP2, 0);
6172 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
6173 }
6174 }
6175
6176 if (min > 1 || max > 1)
6177 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, SLJIT_IMM, 0);
6178
6179 label = LABEL();
6180 if (!ref)
6181 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1);
6182 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, FALSE, FALSE);
6183
6184 if (min > 1 || max > 1)
6185 {
6186 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0);
6187 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
6188 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, TMP1, 0);
6189 if (min > 1)
6190 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min, label);
6191 if (max > 1)
6192 {
6193 jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max);
6194 allocate_stack(common, 1);
6195 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6196 JUMPTO(SLJIT_JUMP, label);
6197 JUMPHERE(jump);
6198 }
6199 }
6200
6201 if (max == 0)
6202 {
6203 /* Includes min > 1 case as well. */
6204 allocate_stack(common, 1);
6205 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6206 JUMPTO(SLJIT_JUMP, label);
6207 }
6208
6209 JUMPHERE(zerolength);
6210 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
6211
6212 count_match(common);
6213 return cc;
6214 }
6215
6216 allocate_stack(common, ref ? 2 : 3);
6217 if (ref)
6218 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
6219 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6220 if (type != OP_CRMINSTAR)
6221 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
6222
6223 if (min == 0)
6224 {
6225 /* Handles both invalid and empty cases. Since the minimum repeat,
6226 is zero the invalid case is basically the same as an empty case. */
6227 if (ref)
6228 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
6229 else
6230 {
6231 compile_dnref_search(common, ccbegin, NULL);
6232 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
6233 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
6234 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
6235 }
6236 /* Length is non-zero, we can match real repeats. */
6237 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6238 jump = JUMP(SLJIT_JUMP);
6239 }
6240 else
6241 {
6242 if (ref)
6243 {
6244 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
6245 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
6246 }
6247 else
6248 {
6249 compile_dnref_search(common, ccbegin, &backtrack->topbacktracks);
6250 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
6251 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
6252 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
6253 }
6254 }
6255
6256 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
6257 if (max > 0)
6258 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max));
6259
6260 if (!ref)
6261 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
6262 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, TRUE, TRUE);
6263 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6264
6265 if (min > 1)
6266 {
6267 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
6268 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
6269 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
6270 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min, BACKTRACK_AS(iterator_backtrack)->matchingpath);
6271 }
6272 else if (max > 0)
6273 OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
6274
6275 if (jump != NULL)
6276 JUMPHERE(jump);
6277 JUMPHERE(zerolength);
6278
6279 count_match(common);
6280 return cc;
6281 }
6282
6283 static SLJIT_INLINE pcre_uchar *compile_recurse_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6284 {
6285 DEFINE_COMPILER;
6286 backtrack_common *backtrack;
6287 recurse_entry *entry = common->entries;
6288 recurse_entry *prev = NULL;
6289 sljit_sw start = GET(cc, 1);
6290 pcre_uchar *start_cc;
6291 BOOL needs_control_head;
6292
6293 PUSH_BACKTRACK(sizeof(recurse_backtrack), cc, NULL);
6294
6295 /* Inlining simple patterns. */
6296 if (get_framesize(common, common->start + start, NULL, TRUE, &needs_control_head) == no_stack)
6297 {
6298 start_cc = common->start + start;
6299 compile_matchingpath(common, next_opcode(common, start_cc), bracketend(start_cc) - (1 + LINK_SIZE), backtrack);
6300 BACKTRACK_AS(recurse_backtrack)->inlined_pattern = TRUE;
6301 return cc + 1 + LINK_SIZE;
6302 }
6303
6304 while (entry != NULL)
6305 {
6306 if (entry->start == start)
6307 break;
6308 prev = entry;
6309 entry = entry->next;
6310 }
6311
6312 if (entry == NULL)
6313 {
6314 entry = sljit_alloc_memory(compiler, sizeof(recurse_entry));
6315 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6316 return NULL;
6317 entry->next = NULL;
6318 entry->entry = NULL;
6319 entry->calls = NULL;
6320 entry->start = start;
6321
6322 if (prev != NULL)
6323 prev->next = entry;
6324 else
6325 common->entries = entry;
6326 }
6327
6328 if (common->has_set_som && common->mark_ptr != 0)
6329 {
6330 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
6331 allocate_stack(common, 2);
6332 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
6333 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
6334 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
6335 }
6336 else if (common->has_set_som || common->mark_ptr != 0)
6337 {
6338 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->has_set_som ? (int)(OVECTOR(0)) : common->mark_ptr);
6339 allocate_stack(common, 1);
6340 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
6341 }
6342
6343 if (entry->entry == NULL)
6344 add_jump(compiler, &entry->calls, JUMP(SLJIT_FAST_CALL));
6345 else
6346 JUMPTO(SLJIT_FAST_CALL, entry->entry);
6347 /* Leave if the match is failed. */
6348 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0));
6349 return cc + 1 + LINK_SIZE;
6350 }
6351
6352 static int SLJIT_CALL do_callout(struct jit_arguments *arguments, PUBL(callout_block) *callout_block, pcre_uchar **jit_ovector)
6353 {
6354 const pcre_uchar *begin = arguments->begin;
6355 int *offset_vector = arguments->offsets;
6356 int offset_count = arguments->offset_count;
6357 int i;
6358
6359 if (PUBL(callout) == NULL)
6360 return 0;
6361
6362 callout_block->version = 2;
6363 callout_block->callout_data = arguments->callout_data;
6364
6365 /* Offsets in subject. */
6366 callout_block->subject_length = arguments->end - arguments->begin;
6367 callout_block->start_match = (pcre_uchar*)callout_block->subject - arguments->begin;
6368 callout_block->current_position = (pcre_uchar*)callout_block->offset_vector - arguments->begin;
6369 #if defined COMPILE_PCRE8
6370 callout_block->subject = (PCRE_SPTR)begin;
6371 #elif defined COMPILE_PCRE16
6372 callout_block->subject = (PCRE_SPTR16)begin;
6373 #elif defined COMPILE_PCRE32
6374 callout_block->subject = (PCRE_SPTR32)begin;
6375 #endif
6376
6377 /* Convert and copy the JIT offset vector to the offset_vector array. */
6378 callout_block->capture_top = 0;
6379 callout_block->offset_vector = offset_vector;
6380 for (i = 2; i < offset_count; i += 2)
6381 {
6382 offset_vector[i] = jit_ovector[i] - begin;
6383 offset_vector[i + 1] = jit_ovector[i + 1] - begin;
6384 if (jit_ovector[i] >= begin)
6385 callout_block->capture_top = i;
6386 }
6387
6388 callout_block->capture_top = (callout_block->capture_top >> 1) + 1;
6389 if (offset_count > 0)
6390 offset_vector[0] = -1;
6391 if (offset_count > 1)
6392 offset_vector[1] = -1;
6393 return (*PUBL(callout))(callout_block);
6394 }
6395
6396 /* Aligning to 8 byte. */
6397 #define CALLOUT_ARG_SIZE \
6398 (((int)sizeof(PUBL(callout_block)) + 7) & ~7)
6399
6400 #define CALLOUT_ARG_OFFSET(arg) \
6401 (-CALLOUT_ARG_SIZE + SLJIT_OFFSETOF(PUBL(callout_block), arg))
6402
6403 static SLJIT_INLINE pcre_uchar *compile_callout_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6404 {
6405 DEFINE_COMPILER;
6406 backtrack_common *backtrack;
6407
6408 PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
6409
6410 allocate_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw));
6411
6412 SLJIT_ASSERT(common->capture_last_ptr != 0);
6413 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
6414 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6415 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_number), SLJIT_IMM, cc[1]);
6416 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_last), TMP2, 0);
6417
6418 /* These pointer sized fields temporarly stores internal variables. */
6419 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
6420 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(offset_vector), STR_PTR, 0);
6421 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(subject), TMP2, 0);
6422
6423 if (common->mark_ptr != 0)
6424 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr));
6425 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(pattern_position), SLJIT_IMM, GET(cc, 2));
6426 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(next_item_length), SLJIT_IMM, GET(cc, 2 + LINK_SIZE));
6427 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(mark), (common->mark_ptr != 0) ? TMP2 : SLJIT_IMM, 0);
6428
6429 /* Needed to save important temporary registers. */
6430 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STACK_TOP, 0);
6431 OP2(SLJIT_SUB, SLJIT_R1, 0, STACK_TOP, 0, SLJIT_IMM, CALLOUT_ARG_SIZE);
6432 GET_LOCAL_BASE(SLJIT_R2, 0, OVECTOR_START);
6433 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_callout));
6434 OP1(SLJIT_MOV_SI, SLJIT_RETURN_REG, 0, SLJIT_RETURN_REG, 0);
6435 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6436 free_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw));
6437
6438 /* Check return value. */
6439 OP2(SLJIT_SUB | SLJIT_SET_S, SLJIT_UNUSED, 0, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
6440 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_SIG_GREATER));
6441 if (common->forced_quit_label == NULL)
6442 add_jump(compiler, &common->forced_quit, JUMP(SLJIT_SIG_LESS));
6443 else
6444 JUMPTO(SLJIT_SIG_LESS, common->forced_quit_label);
6445 return cc + 2 + 2 * LINK_SIZE;
6446 }
6447
6448 #undef CALLOUT_ARG_SIZE
6449 #undef CALLOUT_ARG_OFFSET
6450
6451 static pcre_uchar *compile_assert_matchingpath(compiler_common *common, pcre_uchar *cc, assert_backtrack *backtrack, BOOL conditional)
6452 {
6453 DEFINE_COMPILER;
6454 int framesize;
6455 int extrasize;
6456 BOOL needs_control_head;
6457 int private_data_ptr;
6458 backtrack_common altbacktrack;
6459 pcre_uchar *ccbegin;
6460 pcre_uchar opcode;
6461 pcre_uchar bra = OP_BRA;
6462 jump_list *tmp = NULL;
6463 jump_list **target = (conditional) ? &backtrack->condfailed : &backtrack->common.topbacktracks;
6464 jump_list **found;
6465 /* Saving previous accept variables. */
6466 BOOL save_local_exit = common->local_exit;
6467 BOOL save_positive_assert = common->positive_assert;
6468 then_trap_backtrack *save_then_trap = common->then_trap;
6469 struct sljit_label *save_quit_label = common->quit_label;
6470 struct sljit_label *save_accept_label = common->accept_label;
6471 jump_list *save_quit = common->quit;
6472 jump_list *save_positive_assert_quit = common->positive_assert_quit;
6473 jump_list *save_accept = common->accept;
6474 struct sljit_jump *jump;
6475 struct sljit_jump *brajump = NULL;
6476
6477 /* Assert captures then. */
6478 common->then_trap = NULL;
6479
6480 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
6481 {
6482 SLJIT_ASSERT(!conditional);
6483 bra = *cc;
6484 cc++;
6485 }
6486 private_data_ptr = PRIVATE_DATA(cc);
6487 SLJIT_ASSERT(private_data_ptr != 0);
6488 framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
6489 backtrack->framesize = framesize;
6490 backtrack->private_data_ptr = private_data_ptr;
6491 opcode = *cc;
6492 SLJIT_ASSERT(opcode >= OP_ASSERT && opcode <= OP_ASSERTBACK_NOT);
6493 found = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) ? &tmp : target;
6494 ccbegin = cc;
6495 cc += GET(cc, 1);
6496
6497 if (bra == OP_BRAMINZERO)
6498 {
6499 /* This is a braminzero backtrack path. */
6500 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6501 free_stack(common, 1);
6502 brajump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
6503 }
6504
6505 if (framesize < 0)
6506 {
6507 extrasize = needs_control_head ? 2 : 1;
6508 if (framesize == no_frame)
6509 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
6510 allocate_stack(common, extrasize);
6511 if (needs_control_head)
6512 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
6513 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6514 if (needs_control_head)
6515 {
6516 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
6517 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
6518 }
6519 }
6520 else
6521 {
6522 extrasize = needs_control_head ? 3 : 2;
6523 allocate_stack(common, framesize + extrasize);
6524 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
6525 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw));
6526 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
6527 if (needs_control_head)
6528 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
6529 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6530 if (needs_control_head)
6531 {
6532 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
6533 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
6534 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
6535 }
6536 else
6537 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
6538 init_frame(common, ccbegin, NULL, framesize + extrasize - 1, extrasize, FALSE);
6539 }
6540
6541 memset(&altbacktrack, 0, sizeof(backtrack_common));
6542 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6543 {
6544 /* Negative assert is stronger than positive assert. */
6545 common->local_exit = TRUE;
6546 common->quit_label = NULL;
6547 common->quit = NULL;
6548 common->positive_assert = FALSE;
6549 }
6550 else
6551 common->positive_assert = TRUE;
6552 common->positive_assert_quit = NULL;
6553
6554 while (1)
6555 {
6556 common->accept_label = NULL;
6557 common->accept = NULL;
6558 altbacktrack.top = NULL;
6559 altbacktrack.topbacktracks = NULL;
6560
6561 if (*ccbegin == OP_ALT)
6562 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6563
6564 altbacktrack.cc = ccbegin;
6565 compile_matchingpath(common, ccbegin + 1 + LINK_SIZE, cc, &altbacktrack);
6566 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6567 {
6568 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6569 {
6570 common->local_exit = save_local_exit;
6571 common->quit_label = save_quit_label;
6572 common->quit = save_quit;
6573 }
6574 common->positive_assert = save_positive_assert;
6575 common->then_trap = save_then_trap;
6576 common->accept_label = save_accept_label;
6577 common->positive_assert_quit = save_positive_assert_quit;
6578 common->accept = save_accept;
6579 return NULL;
6580 }
6581 common->accept_label = LABEL();
6582 if (common->accept != NULL)
6583 set_jumps(common->accept, common->accept_label);
6584
6585 /* Reset stack. */
6586 if (framesize < 0)
6587 {
6588 if (framesize == no_frame)
6589 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
6590 else
6591 free_stack(common, extrasize);
6592 if (needs_control_head)
6593 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), 0);
6594 }
6595 else
6596 {
6597 if ((opcode != OP_ASSERT_NOT && opcode != OP_ASSERTBACK_NOT) || conditional)
6598 {
6599 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
6600 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
6601 if (needs_control_head)
6602 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), 0);
6603 }
6604 else
6605 {
6606 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
6607 if (needs_control_head)
6608 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), (framesize + 1) * sizeof(sljit_sw));
6609 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6610 }
6611 }
6612
6613 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6614 {
6615 /* We know that STR_PTR was stored on the top of the stack. */
6616 if (conditional)
6617 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), needs_control_head ? sizeof(sljit_sw) : 0);
6618 else if (bra == OP_BRAZERO)
6619 {
6620 if (framesize < 0)
6621 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 1) * sizeof(sljit_sw));
6622 else
6623 {
6624 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
6625 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (framesize + extrasize - 1) * sizeof(sljit_sw));
6626 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
6627 }
6628 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
6629 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6630 }
6631 else if (framesize >= 0)
6632 {
6633 /* For OP_BRA and OP_BRAMINZERO. */
6634 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
6635 }
6636 }
6637 add_jump(compiler, found, JUMP(SLJIT_JUMP));
6638
6639 compile_backtrackingpath(common, altbacktrack.top);
6640 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6641 {
6642 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6643 {
6644 common->local_exit = save_local_exit;
6645 common->quit_label = save_quit_label;
6646 common->quit = save_quit;
6647 }
6648 common->positive_assert = save_positive_assert;
6649 common->then_trap = save_then_trap;
6650 common->accept_label = save_accept_label;
6651 common->positive_assert_quit = save_positive_assert_quit;
6652 common->accept = save_accept;
6653 return NULL;
6654 }
6655 set_jumps(altbacktrack.topbacktracks, LABEL());
6656
6657 if (*cc != OP_ALT)
6658 break;
6659
6660 ccbegin = cc;
6661 cc += GET(cc, 1);
6662 }
6663
6664 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6665 {
6666 SLJIT_ASSERT(common->positive_assert_quit == NULL);
6667 /* Makes the check less complicated below. */
6668 common->positive_assert_quit = common->quit;
6669 }
6670
6671 /* None of them matched. */
6672 if (common->positive_assert_quit != NULL)
6673 {
6674 jump = JUMP(SLJIT_JUMP);
6675 set_jumps(common->positive_assert_quit, LABEL());
6676 SLJIT_ASSERT(framesize != no_stack);
6677 if (framesize < 0)
6678 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, extrasize * sizeof(sljit_sw));
6679 else
6680 {
6681 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
6682 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6683 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw));
6684 }
6685 JUMPHERE(jump);
6686 }
6687
6688 if (needs_control_head)
6689 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(1));
6690
6691 if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK)
6692 {
6693 /* Assert is failed. */
6694 if (conditional || bra == OP_BRAZERO)
6695 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6696
6697 if (framesize < 0)
6698 {
6699 /* The topmost item should be 0. */
6700 if (bra == OP_BRAZERO)
6701 {
6702 if (extrasize == 2)
6703 free_stack(common, 1);
6704 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6705 }
6706 else
6707 free_stack(common, extrasize);
6708 }
6709 else
6710 {
6711 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
6712 /* The topmost item should be 0. */
6713 if (bra == OP_BRAZERO)
6714 {
6715 free_stack(common, framesize + extrasize - 1);
6716 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6717 }
6718 else
6719 free_stack(common, framesize + extrasize);
6720 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
6721 }
6722 jump = JUMP(SLJIT_JUMP);
6723 if (bra != OP_BRAZERO)
6724 add_jump(compiler, target, jump);
6725
6726 /* Assert is successful. */
6727 set_jumps(tmp, LABEL());
6728 if (framesize < 0)
6729 {
6730 /* We know that STR_PTR was stored on the top of the stack. */
6731 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 1) * sizeof(sljit_sw));
6732 /* Keep the STR_PTR on the top of the stack. */
6733 if (bra == OP_BRAZERO)
6734 {
6735 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
6736 if (extrasize == 2)
6737 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6738 }
6739 else if (bra == OP_BRAMINZERO)
6740 {
6741 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
6742 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6743 }
6744 }
6745 else
6746 {
6747 if (bra == OP_BRA)
6748 {
6749 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
6750 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
6751 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 2) * sizeof(sljit_sw));
6752 }
6753 else
6754 {
6755 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
6756 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 2) * sizeof(sljit_sw));
6757 if (extrasize == 2)
6758 {
6759 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6760 if (bra == OP_BRAMINZERO)
6761 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6762 }
6763 else
6764 {
6765 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
6766 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), bra == OP_BRAZERO ? STR_PTR : SLJIT_IMM, 0);
6767 }
6768 }
6769 }
6770
6771 if (bra == OP_BRAZERO)
6772 {
6773 backtrack->matchingpath = LABEL();
6774 SET_LABEL(jump, backtrack->matchingpath);
6775 }
6776 else if (bra == OP_BRAMINZERO)
6777 {
6778 JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
6779 JUMPHERE(brajump);