/[pcre]/code/trunk/pcre_jit_compile.c
ViewVC logotype

Contents of /code/trunk/pcre_jit_compile.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1447 - (show annotations)
Mon Jan 13 20:18:33 2014 UTC (5 years, 7 months ago) by zherczeg
File MIME type: text/plain
File size: 334035 byte(s)
JIT: Improved update table for the fast forward search algorithm.
1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Written by Philip Hazel
9 Copyright (c) 1997-2013 University of Cambridge
10
11 The machine code generator part (this module) was written by Zoltan Herczeg
12 Copyright (c) 2010-2013
13
14 -----------------------------------------------------------------------------
15 Redistribution and use in source and binary forms, with or without
16 modification, are permitted provided that the following conditions are met:
17
18 * Redistributions of source code must retain the above copyright notice,
19 this list of conditions and the following disclaimer.
20
21 * Redistributions in binary form must reproduce the above copyright
22 notice, this list of conditions and the following disclaimer in the
23 documentation and/or other materials provided with the distribution.
24
25 * Neither the name of the University of Cambridge nor the names of its
26 contributors may be used to endorse or promote products derived from
27 this software without specific prior written permission.
28
29 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 POSSIBILITY OF SUCH DAMAGE.
40 -----------------------------------------------------------------------------
41 */
42
43 #ifdef HAVE_CONFIG_H
44 #include "config.h"
45 #endif
46
47 #include "pcre_internal.h"
48
49 #if defined SUPPORT_JIT
50
51 /* All-in-one: Since we use the JIT compiler only from here,
52 we just include it. This way we don't need to touch the build
53 system files. */
54
55 #define SLJIT_MALLOC(size) (PUBL(malloc))(size)
56 #define SLJIT_FREE(ptr) (PUBL(free))(ptr)
57 #define SLJIT_CONFIG_AUTO 1
58 #define SLJIT_CONFIG_STATIC 1
59 #define SLJIT_VERBOSE 0
60 #define SLJIT_DEBUG 0
61
62 #include "sljit/sljitLir.c"
63
64 #if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED
65 #error Unsupported architecture
66 #endif
67
68 /* Defines for debugging purposes. */
69
70 /* 1 - Use unoptimized capturing brackets.
71 2 - Enable capture_last_ptr (includes option 1). */
72 /* #define DEBUG_FORCE_UNOPTIMIZED_CBRAS 2 */
73
74 /* 1 - Always have a control head. */
75 /* #define DEBUG_FORCE_CONTROL_HEAD 1 */
76
77 /* Allocate memory for the regex stack on the real machine stack.
78 Fast, but limited size. */
79 #define MACHINE_STACK_SIZE 32768
80
81 /* Growth rate for stack allocated by the OS. Should be the multiply
82 of page size. */
83 #define STACK_GROWTH_RATE 8192
84
85 /* Enable to check that the allocation could destroy temporaries. */
86 #if defined SLJIT_DEBUG && SLJIT_DEBUG
87 #define DESTROY_REGISTERS 1
88 #endif
89
90 /*
91 Short summary about the backtracking mechanism empolyed by the jit code generator:
92
93 The code generator follows the recursive nature of the PERL compatible regular
94 expressions. The basic blocks of regular expressions are condition checkers
95 whose execute different commands depending on the result of the condition check.
96 The relationship between the operators can be horizontal (concatenation) and
97 vertical (sub-expression) (See struct backtrack_common for more details).
98
99 'ab' - 'a' and 'b' regexps are concatenated
100 'a+' - 'a' is the sub-expression of the '+' operator
101
102 The condition checkers are boolean (true/false) checkers. Machine code is generated
103 for the checker itself and for the actions depending on the result of the checker.
104 The 'true' case is called as the matching path (expected path), and the other is called as
105 the 'backtrack' path. Branch instructions are expesive for all CPUs, so we avoid taken
106 branches on the matching path.
107
108 Greedy star operator (*) :
109 Matching path: match happens.
110 Backtrack path: match failed.
111 Non-greedy star operator (*?) :
112 Matching path: no need to perform a match.
113 Backtrack path: match is required.
114
115 The following example shows how the code generated for a capturing bracket
116 with two alternatives. Let A, B, C, D are arbirary regular expressions, and
117 we have the following regular expression:
118
119 A(B|C)D
120
121 The generated code will be the following:
122
123 A matching path
124 '(' matching path (pushing arguments to the stack)
125 B matching path
126 ')' matching path (pushing arguments to the stack)
127 D matching path
128 return with successful match
129
130 D backtrack path
131 ')' backtrack path (If we arrived from "C" jump to the backtrack of "C")
132 B backtrack path
133 C expected path
134 jump to D matching path
135 C backtrack path
136 A backtrack path
137
138 Notice, that the order of backtrack code paths are the opposite of the fast
139 code paths. In this way the topmost value on the stack is always belong
140 to the current backtrack code path. The backtrack path must check
141 whether there is a next alternative. If so, it needs to jump back to
142 the matching path eventually. Otherwise it needs to clear out its own stack
143 frame and continue the execution on the backtrack code paths.
144 */
145
146 /*
147 Saved stack frames:
148
149 Atomic blocks and asserts require reloading the values of private data
150 when the backtrack mechanism performed. Because of OP_RECURSE, the data
151 are not necessarly known in compile time, thus we need a dynamic restore
152 mechanism.
153
154 The stack frames are stored in a chain list, and have the following format:
155 ([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]
156
157 Thus we can restore the private data to a particular point in the stack.
158 */
159
160 typedef struct jit_arguments {
161 /* Pointers first. */
162 struct sljit_stack *stack;
163 const pcre_uchar *str;
164 const pcre_uchar *begin;
165 const pcre_uchar *end;
166 int *offsets;
167 pcre_uchar *uchar_ptr;
168 pcre_uchar *mark_ptr;
169 void *callout_data;
170 /* Everything else after. */
171 pcre_uint32 limit_match;
172 int real_offset_count;
173 int offset_count;
174 pcre_uint8 notbol;
175 pcre_uint8 noteol;
176 pcre_uint8 notempty;
177 pcre_uint8 notempty_atstart;
178 } jit_arguments;
179
180 typedef struct executable_functions {
181 void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES];
182 sljit_uw *read_only_data[JIT_NUMBER_OF_COMPILE_MODES];
183 sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];
184 PUBL(jit_callback) callback;
185 void *userdata;
186 pcre_uint32 top_bracket;
187 pcre_uint32 limit_match;
188 } executable_functions;
189
190 typedef struct jump_list {
191 struct sljit_jump *jump;
192 struct jump_list *next;
193 } jump_list;
194
195 typedef struct stub_list {
196 struct sljit_jump *start;
197 struct sljit_label *quit;
198 struct stub_list *next;
199 } stub_list;
200
201 typedef struct label_addr_list {
202 struct sljit_label *label;
203 sljit_uw *addr;
204 struct label_addr_list *next;
205 } label_addr_list;
206
207 enum frame_types {
208 no_frame = -1,
209 no_stack = -2
210 };
211
212 enum control_types {
213 type_mark = 0,
214 type_then_trap = 1
215 };
216
217 typedef int (SLJIT_CALL *jit_function)(jit_arguments *args);
218
219 /* The following structure is the key data type for the recursive
220 code generator. It is allocated by compile_matchingpath, and contains
221 the arguments for compile_backtrackingpath. Must be the first member
222 of its descendants. */
223 typedef struct backtrack_common {
224 /* Concatenation stack. */
225 struct backtrack_common *prev;
226 jump_list *nextbacktracks;
227 /* Internal stack (for component operators). */
228 struct backtrack_common *top;
229 jump_list *topbacktracks;
230 /* Opcode pointer. */
231 pcre_uchar *cc;
232 } backtrack_common;
233
234 typedef struct assert_backtrack {
235 backtrack_common common;
236 jump_list *condfailed;
237 /* Less than 0 if a frame is not needed. */
238 int framesize;
239 /* Points to our private memory word on the stack. */
240 int private_data_ptr;
241 /* For iterators. */
242 struct sljit_label *matchingpath;
243 } assert_backtrack;
244
245 typedef struct bracket_backtrack {
246 backtrack_common common;
247 /* Where to coninue if an alternative is successfully matched. */
248 struct sljit_label *alternative_matchingpath;
249 /* For rmin and rmax iterators. */
250 struct sljit_label *recursive_matchingpath;
251 /* For greedy ? operator. */
252 struct sljit_label *zero_matchingpath;
253 /* Contains the branches of a failed condition. */
254 union {
255 /* Both for OP_COND, OP_SCOND. */
256 jump_list *condfailed;
257 assert_backtrack *assert;
258 /* For OP_ONCE. Less than 0 if not needed. */
259 int framesize;
260 } u;
261 /* Points to our private memory word on the stack. */
262 int private_data_ptr;
263 } bracket_backtrack;
264
265 typedef struct bracketpos_backtrack {
266 backtrack_common common;
267 /* Points to our private memory word on the stack. */
268 int private_data_ptr;
269 /* Reverting stack is needed. */
270 int framesize;
271 /* Allocated stack size. */
272 int stacksize;
273 } bracketpos_backtrack;
274
275 typedef struct braminzero_backtrack {
276 backtrack_common common;
277 struct sljit_label *matchingpath;
278 } braminzero_backtrack;
279
280 typedef struct iterator_backtrack {
281 backtrack_common common;
282 /* Next iteration. */
283 struct sljit_label *matchingpath;
284 } iterator_backtrack;
285
286 typedef struct recurse_entry {
287 struct recurse_entry *next;
288 /* Contains the function entry. */
289 struct sljit_label *entry;
290 /* Collects the calls until the function is not created. */
291 jump_list *calls;
292 /* Points to the starting opcode. */
293 sljit_sw start;
294 } recurse_entry;
295
296 typedef struct recurse_backtrack {
297 backtrack_common common;
298 BOOL inlined_pattern;
299 } recurse_backtrack;
300
301 #define OP_THEN_TRAP OP_TABLE_LENGTH
302
303 typedef struct then_trap_backtrack {
304 backtrack_common common;
305 /* If then_trap is not NULL, this structure contains the real
306 then_trap for the backtracking path. */
307 struct then_trap_backtrack *then_trap;
308 /* Points to the starting opcode. */
309 sljit_sw start;
310 /* Exit point for the then opcodes of this alternative. */
311 jump_list *quit;
312 /* Frame size of the current alternative. */
313 int framesize;
314 } then_trap_backtrack;
315
316 #define MAX_RANGE_SIZE 4
317
318 typedef struct compiler_common {
319 /* The sljit ceneric compiler. */
320 struct sljit_compiler *compiler;
321 /* First byte code. */
322 pcre_uchar *start;
323 /* Maps private data offset to each opcode. */
324 sljit_si *private_data_ptrs;
325 /* This read-only data is available during runtime. */
326 sljit_uw *read_only_data;
327 /* The total size of the read-only data. */
328 sljit_uw read_only_data_size;
329 /* The next free entry of the read_only_data. */
330 sljit_uw *read_only_data_ptr;
331 /* Tells whether the capturing bracket is optimized. */
332 pcre_uint8 *optimized_cbracket;
333 /* Tells whether the starting offset is a target of then. */
334 pcre_uint8 *then_offsets;
335 /* Current position where a THEN must jump. */
336 then_trap_backtrack *then_trap;
337 /* Starting offset of private data for capturing brackets. */
338 int cbra_ptr;
339 /* Output vector starting point. Must be divisible by 2. */
340 int ovector_start;
341 /* Last known position of the requested byte. */
342 int req_char_ptr;
343 /* Head of the last recursion. */
344 int recursive_head_ptr;
345 /* First inspected character for partial matching. */
346 int start_used_ptr;
347 /* Starting pointer for partial soft matches. */
348 int hit_start;
349 /* End pointer of the first line. */
350 int first_line_end;
351 /* Points to the marked string. */
352 int mark_ptr;
353 /* Recursive control verb management chain. */
354 int control_head_ptr;
355 /* Points to the last matched capture block index. */
356 int capture_last_ptr;
357 /* Points to the starting position of the current match. */
358 int start_ptr;
359
360 /* Flipped and lower case tables. */
361 const pcre_uint8 *fcc;
362 sljit_sw lcc;
363 /* Mode can be PCRE_STUDY_JIT_COMPILE and others. */
364 int mode;
365 /* TRUE, when minlength is greater than 0. */
366 BOOL might_be_empty;
367 /* \K is found in the pattern. */
368 BOOL has_set_som;
369 /* (*SKIP:arg) is found in the pattern. */
370 BOOL has_skip_arg;
371 /* (*THEN) is found in the pattern. */
372 BOOL has_then;
373 /* Needs to know the start position anytime. */
374 BOOL needs_start_ptr;
375 /* Currently in recurse or negative assert. */
376 BOOL local_exit;
377 /* Currently in a positive assert. */
378 BOOL positive_assert;
379 /* Newline control. */
380 int nltype;
381 pcre_uint32 nlmax;
382 pcre_uint32 nlmin;
383 int newline;
384 int bsr_nltype;
385 pcre_uint32 bsr_nlmax;
386 pcre_uint32 bsr_nlmin;
387 /* Dollar endonly. */
388 int endonly;
389 /* Tables. */
390 sljit_sw ctypes;
391 /* Named capturing brackets. */
392 pcre_uchar *name_table;
393 sljit_sw name_count;
394 sljit_sw name_entry_size;
395
396 /* Labels and jump lists. */
397 struct sljit_label *partialmatchlabel;
398 struct sljit_label *quit_label;
399 struct sljit_label *forced_quit_label;
400 struct sljit_label *accept_label;
401 stub_list *stubs;
402 label_addr_list *label_addrs;
403 recurse_entry *entries;
404 recurse_entry *currententry;
405 jump_list *partialmatch;
406 jump_list *quit;
407 jump_list *positive_assert_quit;
408 jump_list *forced_quit;
409 jump_list *accept;
410 jump_list *calllimit;
411 jump_list *stackalloc;
412 jump_list *revertframes;
413 jump_list *wordboundary;
414 jump_list *anynewline;
415 jump_list *hspace;
416 jump_list *vspace;
417 jump_list *casefulcmp;
418 jump_list *caselesscmp;
419 jump_list *reset_match;
420 BOOL jscript_compat;
421 #ifdef SUPPORT_UTF
422 BOOL utf;
423 #ifdef SUPPORT_UCP
424 BOOL use_ucp;
425 #endif
426 #ifdef COMPILE_PCRE8
427 jump_list *utfreadchar;
428 jump_list *utfreadchar16;
429 jump_list *utfreadtype8;
430 #endif
431 #endif /* SUPPORT_UTF */
432 #ifdef SUPPORT_UCP
433 jump_list *getucd;
434 #endif
435 } compiler_common;
436
437 /* For byte_sequence_compare. */
438
439 typedef struct compare_context {
440 int length;
441 int sourcereg;
442 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
443 int ucharptr;
444 union {
445 sljit_si asint;
446 sljit_uh asushort;
447 #if defined COMPILE_PCRE8
448 sljit_ub asbyte;
449 sljit_ub asuchars[4];
450 #elif defined COMPILE_PCRE16
451 sljit_uh asuchars[2];
452 #elif defined COMPILE_PCRE32
453 sljit_ui asuchars[1];
454 #endif
455 } c;
456 union {
457 sljit_si asint;
458 sljit_uh asushort;
459 #if defined COMPILE_PCRE8
460 sljit_ub asbyte;
461 sljit_ub asuchars[4];
462 #elif defined COMPILE_PCRE16
463 sljit_uh asuchars[2];
464 #elif defined COMPILE_PCRE32
465 sljit_ui asuchars[1];
466 #endif
467 } oc;
468 #endif
469 } compare_context;
470
471 /* Undefine sljit macros. */
472 #undef CMP
473
474 /* Used for accessing the elements of the stack. */
475 #define STACK(i) ((-(i) - 1) * (int)sizeof(sljit_sw))
476
477 #define TMP1 SLJIT_SCRATCH_REG1
478 #define TMP2 SLJIT_SCRATCH_REG3
479 #define TMP3 SLJIT_TEMPORARY_EREG2
480 #define STR_PTR SLJIT_SAVED_REG1
481 #define STR_END SLJIT_SAVED_REG2
482 #define STACK_TOP SLJIT_SCRATCH_REG2
483 #define STACK_LIMIT SLJIT_SAVED_REG3
484 #define ARGUMENTS SLJIT_SAVED_EREG1
485 #define COUNT_MATCH SLJIT_SAVED_EREG2
486 #define RETURN_ADDR SLJIT_TEMPORARY_EREG1
487
488 /* Local space layout. */
489 /* These two locals can be used by the current opcode. */
490 #define LOCALS0 (0 * sizeof(sljit_sw))
491 #define LOCALS1 (1 * sizeof(sljit_sw))
492 /* Two local variables for possessive quantifiers (char1 cannot use them). */
493 #define POSSESSIVE0 (2 * sizeof(sljit_sw))
494 #define POSSESSIVE1 (3 * sizeof(sljit_sw))
495 /* Max limit of recursions. */
496 #define LIMIT_MATCH (4 * sizeof(sljit_sw))
497 /* The output vector is stored on the stack, and contains pointers
498 to characters. The vector data is divided into two groups: the first
499 group contains the start / end character pointers, and the second is
500 the start pointers when the end of the capturing group has not yet reached. */
501 #define OVECTOR_START (common->ovector_start)
502 #define OVECTOR(i) (OVECTOR_START + (i) * (sljit_sw)sizeof(sljit_sw))
503 #define OVECTOR_PRIV(i) (common->cbra_ptr + (i) * (sljit_sw)sizeof(sljit_sw))
504 #define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start])
505
506 #if defined COMPILE_PCRE8
507 #define MOV_UCHAR SLJIT_MOV_UB
508 #define MOVU_UCHAR SLJIT_MOVU_UB
509 #elif defined COMPILE_PCRE16
510 #define MOV_UCHAR SLJIT_MOV_UH
511 #define MOVU_UCHAR SLJIT_MOVU_UH
512 #elif defined COMPILE_PCRE32
513 #define MOV_UCHAR SLJIT_MOV_UI
514 #define MOVU_UCHAR SLJIT_MOVU_UI
515 #else
516 #error Unsupported compiling mode
517 #endif
518
519 /* Shortcuts. */
520 #define DEFINE_COMPILER \
521 struct sljit_compiler *compiler = common->compiler
522 #define OP1(op, dst, dstw, src, srcw) \
523 sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw))
524 #define OP2(op, dst, dstw, src1, src1w, src2, src2w) \
525 sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w))
526 #define LABEL() \
527 sljit_emit_label(compiler)
528 #define JUMP(type) \
529 sljit_emit_jump(compiler, (type))
530 #define JUMPTO(type, label) \
531 sljit_set_label(sljit_emit_jump(compiler, (type)), (label))
532 #define JUMPHERE(jump) \
533 sljit_set_label((jump), sljit_emit_label(compiler))
534 #define SET_LABEL(jump, label) \
535 sljit_set_label((jump), (label))
536 #define CMP(type, src1, src1w, src2, src2w) \
537 sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))
538 #define CMPTO(type, src1, src1w, src2, src2w, label) \
539 sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))
540 #define OP_FLAGS(op, dst, dstw, src, srcw, type) \
541 sljit_emit_op_flags(compiler, (op), (dst), (dstw), (src), (srcw), (type))
542 #define GET_LOCAL_BASE(dst, dstw, offset) \
543 sljit_get_local_base(compiler, (dst), (dstw), (offset))
544
545 #define READ_CHAR_MAX 0x7fffffff
546
547 static pcre_uchar* bracketend(pcre_uchar* cc)
548 {
549 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
550 do cc += GET(cc, 1); while (*cc == OP_ALT);
551 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
552 cc += 1 + LINK_SIZE;
553 return cc;
554 }
555
556 static int no_alternatives(pcre_uchar* cc)
557 {
558 int count = 0;
559 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
560 do
561 {
562 cc += GET(cc, 1);
563 count++;
564 }
565 while (*cc == OP_ALT);
566 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
567 return count;
568 }
569
570 static int ones_in_half_byte[16] = {
571 /* 0 */ 0, 1, 1, 2, /* 4 */ 1, 2, 2, 3,
572 /* 8 */ 1, 2, 2, 3, /* 12 */ 2, 3, 3, 4
573 };
574
575 /* Functions whose might need modification for all new supported opcodes:
576 next_opcode
577 check_opcode_types
578 set_private_data_ptrs
579 get_framesize
580 init_frame
581 get_private_data_copy_length
582 copy_private_data
583 compile_matchingpath
584 compile_backtrackingpath
585 */
586
587 static pcre_uchar *next_opcode(compiler_common *common, pcre_uchar *cc)
588 {
589 SLJIT_UNUSED_ARG(common);
590 switch(*cc)
591 {
592 case OP_SOD:
593 case OP_SOM:
594 case OP_SET_SOM:
595 case OP_NOT_WORD_BOUNDARY:
596 case OP_WORD_BOUNDARY:
597 case OP_NOT_DIGIT:
598 case OP_DIGIT:
599 case OP_NOT_WHITESPACE:
600 case OP_WHITESPACE:
601 case OP_NOT_WORDCHAR:
602 case OP_WORDCHAR:
603 case OP_ANY:
604 case OP_ALLANY:
605 case OP_NOTPROP:
606 case OP_PROP:
607 case OP_ANYNL:
608 case OP_NOT_HSPACE:
609 case OP_HSPACE:
610 case OP_NOT_VSPACE:
611 case OP_VSPACE:
612 case OP_EXTUNI:
613 case OP_EODN:
614 case OP_EOD:
615 case OP_CIRC:
616 case OP_CIRCM:
617 case OP_DOLL:
618 case OP_DOLLM:
619 case OP_CRSTAR:
620 case OP_CRMINSTAR:
621 case OP_CRPLUS:
622 case OP_CRMINPLUS:
623 case OP_CRQUERY:
624 case OP_CRMINQUERY:
625 case OP_CRRANGE:
626 case OP_CRMINRANGE:
627 case OP_CRPOSSTAR:
628 case OP_CRPOSPLUS:
629 case OP_CRPOSQUERY:
630 case OP_CRPOSRANGE:
631 case OP_CLASS:
632 case OP_NCLASS:
633 case OP_REF:
634 case OP_REFI:
635 case OP_DNREF:
636 case OP_DNREFI:
637 case OP_RECURSE:
638 case OP_CALLOUT:
639 case OP_ALT:
640 case OP_KET:
641 case OP_KETRMAX:
642 case OP_KETRMIN:
643 case OP_KETRPOS:
644 case OP_REVERSE:
645 case OP_ASSERT:
646 case OP_ASSERT_NOT:
647 case OP_ASSERTBACK:
648 case OP_ASSERTBACK_NOT:
649 case OP_ONCE:
650 case OP_ONCE_NC:
651 case OP_BRA:
652 case OP_BRAPOS:
653 case OP_CBRA:
654 case OP_CBRAPOS:
655 case OP_COND:
656 case OP_SBRA:
657 case OP_SBRAPOS:
658 case OP_SCBRA:
659 case OP_SCBRAPOS:
660 case OP_SCOND:
661 case OP_CREF:
662 case OP_DNCREF:
663 case OP_RREF:
664 case OP_DNRREF:
665 case OP_DEF:
666 case OP_BRAZERO:
667 case OP_BRAMINZERO:
668 case OP_BRAPOSZERO:
669 case OP_PRUNE:
670 case OP_SKIP:
671 case OP_THEN:
672 case OP_COMMIT:
673 case OP_FAIL:
674 case OP_ACCEPT:
675 case OP_ASSERT_ACCEPT:
676 case OP_CLOSE:
677 case OP_SKIPZERO:
678 return cc + PRIV(OP_lengths)[*cc];
679
680 case OP_CHAR:
681 case OP_CHARI:
682 case OP_NOT:
683 case OP_NOTI:
684 case OP_STAR:
685 case OP_MINSTAR:
686 case OP_PLUS:
687 case OP_MINPLUS:
688 case OP_QUERY:
689 case OP_MINQUERY:
690 case OP_UPTO:
691 case OP_MINUPTO:
692 case OP_EXACT:
693 case OP_POSSTAR:
694 case OP_POSPLUS:
695 case OP_POSQUERY:
696 case OP_POSUPTO:
697 case OP_STARI:
698 case OP_MINSTARI:
699 case OP_PLUSI:
700 case OP_MINPLUSI:
701 case OP_QUERYI:
702 case OP_MINQUERYI:
703 case OP_UPTOI:
704 case OP_MINUPTOI:
705 case OP_EXACTI:
706 case OP_POSSTARI:
707 case OP_POSPLUSI:
708 case OP_POSQUERYI:
709 case OP_POSUPTOI:
710 case OP_NOTSTAR:
711 case OP_NOTMINSTAR:
712 case OP_NOTPLUS:
713 case OP_NOTMINPLUS:
714 case OP_NOTQUERY:
715 case OP_NOTMINQUERY:
716 case OP_NOTUPTO:
717 case OP_NOTMINUPTO:
718 case OP_NOTEXACT:
719 case OP_NOTPOSSTAR:
720 case OP_NOTPOSPLUS:
721 case OP_NOTPOSQUERY:
722 case OP_NOTPOSUPTO:
723 case OP_NOTSTARI:
724 case OP_NOTMINSTARI:
725 case OP_NOTPLUSI:
726 case OP_NOTMINPLUSI:
727 case OP_NOTQUERYI:
728 case OP_NOTMINQUERYI:
729 case OP_NOTUPTOI:
730 case OP_NOTMINUPTOI:
731 case OP_NOTEXACTI:
732 case OP_NOTPOSSTARI:
733 case OP_NOTPOSPLUSI:
734 case OP_NOTPOSQUERYI:
735 case OP_NOTPOSUPTOI:
736 cc += PRIV(OP_lengths)[*cc];
737 #ifdef SUPPORT_UTF
738 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
739 #endif
740 return cc;
741
742 /* Special cases. */
743 case OP_TYPESTAR:
744 case OP_TYPEMINSTAR:
745 case OP_TYPEPLUS:
746 case OP_TYPEMINPLUS:
747 case OP_TYPEQUERY:
748 case OP_TYPEMINQUERY:
749 case OP_TYPEUPTO:
750 case OP_TYPEMINUPTO:
751 case OP_TYPEEXACT:
752 case OP_TYPEPOSSTAR:
753 case OP_TYPEPOSPLUS:
754 case OP_TYPEPOSQUERY:
755 case OP_TYPEPOSUPTO:
756 return cc + PRIV(OP_lengths)[*cc] - 1;
757
758 case OP_ANYBYTE:
759 #ifdef SUPPORT_UTF
760 if (common->utf) return NULL;
761 #endif
762 return cc + 1;
763
764 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
765 case OP_XCLASS:
766 return cc + GET(cc, 1);
767 #endif
768
769 case OP_MARK:
770 case OP_PRUNE_ARG:
771 case OP_SKIP_ARG:
772 case OP_THEN_ARG:
773 return cc + 1 + 2 + cc[1];
774
775 default:
776 /* All opcodes are supported now! */
777 SLJIT_ASSERT_STOP();
778 return NULL;
779 }
780 }
781
782 static BOOL check_opcode_types(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
783 {
784 int count;
785 pcre_uchar *slot;
786
787 /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
788 while (cc < ccend)
789 {
790 switch(*cc)
791 {
792 case OP_SET_SOM:
793 common->has_set_som = TRUE;
794 common->might_be_empty = TRUE;
795 cc += 1;
796 break;
797
798 case OP_REF:
799 case OP_REFI:
800 common->optimized_cbracket[GET2(cc, 1)] = 0;
801 cc += 1 + IMM2_SIZE;
802 break;
803
804 case OP_BRA:
805 case OP_CBRA:
806 case OP_SBRA:
807 case OP_SCBRA:
808 count = no_alternatives(cc);
809 if (count > 4)
810 common->read_only_data_size += count * sizeof(sljit_uw);
811 cc += 1 + LINK_SIZE + (*cc == OP_CBRA || *cc == OP_SCBRA ? IMM2_SIZE : 0);
812 break;
813
814 case OP_CBRAPOS:
815 case OP_SCBRAPOS:
816 common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] = 0;
817 cc += 1 + LINK_SIZE + IMM2_SIZE;
818 break;
819
820 case OP_COND:
821 case OP_SCOND:
822 /* Only AUTO_CALLOUT can insert this opcode. We do
823 not intend to support this case. */
824 if (cc[1 + LINK_SIZE] == OP_CALLOUT)
825 return FALSE;
826 cc += 1 + LINK_SIZE;
827 break;
828
829 case OP_CREF:
830 common->optimized_cbracket[GET2(cc, 1)] = 0;
831 cc += 1 + IMM2_SIZE;
832 break;
833
834 case OP_DNREF:
835 case OP_DNREFI:
836 case OP_DNCREF:
837 count = GET2(cc, 1 + IMM2_SIZE);
838 slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
839 while (count-- > 0)
840 {
841 common->optimized_cbracket[GET2(slot, 0)] = 0;
842 slot += common->name_entry_size;
843 }
844 cc += 1 + 2 * IMM2_SIZE;
845 break;
846
847 case OP_RECURSE:
848 /* Set its value only once. */
849 if (common->recursive_head_ptr == 0)
850 {
851 common->recursive_head_ptr = common->ovector_start;
852 common->ovector_start += sizeof(sljit_sw);
853 }
854 cc += 1 + LINK_SIZE;
855 break;
856
857 case OP_CALLOUT:
858 if (common->capture_last_ptr == 0)
859 {
860 common->capture_last_ptr = common->ovector_start;
861 common->ovector_start += sizeof(sljit_sw);
862 }
863 cc += 2 + 2 * LINK_SIZE;
864 break;
865
866 case OP_THEN_ARG:
867 common->has_then = TRUE;
868 common->control_head_ptr = 1;
869 /* Fall through. */
870
871 case OP_PRUNE_ARG:
872 common->needs_start_ptr = TRUE;
873 /* Fall through. */
874
875 case OP_MARK:
876 if (common->mark_ptr == 0)
877 {
878 common->mark_ptr = common->ovector_start;
879 common->ovector_start += sizeof(sljit_sw);
880 }
881 cc += 1 + 2 + cc[1];
882 break;
883
884 case OP_THEN:
885 common->has_then = TRUE;
886 common->control_head_ptr = 1;
887 /* Fall through. */
888
889 case OP_PRUNE:
890 case OP_SKIP:
891 common->needs_start_ptr = TRUE;
892 cc += 1;
893 break;
894
895 case OP_SKIP_ARG:
896 common->control_head_ptr = 1;
897 common->has_skip_arg = TRUE;
898 cc += 1 + 2 + cc[1];
899 break;
900
901 default:
902 cc = next_opcode(common, cc);
903 if (cc == NULL)
904 return FALSE;
905 break;
906 }
907 }
908 return TRUE;
909 }
910
911 static int get_class_iterator_size(pcre_uchar *cc)
912 {
913 switch(*cc)
914 {
915 case OP_CRSTAR:
916 case OP_CRPLUS:
917 return 2;
918
919 case OP_CRMINSTAR:
920 case OP_CRMINPLUS:
921 case OP_CRQUERY:
922 case OP_CRMINQUERY:
923 return 1;
924
925 case OP_CRRANGE:
926 case OP_CRMINRANGE:
927 if (GET2(cc, 1) == GET2(cc, 1 + IMM2_SIZE))
928 return 0;
929 return 2;
930
931 default:
932 return 0;
933 }
934 }
935
936 static BOOL detect_repeat(compiler_common *common, pcre_uchar *begin)
937 {
938 pcre_uchar *end = bracketend(begin);
939 pcre_uchar *next;
940 pcre_uchar *next_end;
941 pcre_uchar *max_end;
942 pcre_uchar type;
943 sljit_sw length = end - begin;
944 int min, max, i;
945
946 /* Detect fixed iterations first. */
947 if (end[-(1 + LINK_SIZE)] != OP_KET)
948 return FALSE;
949
950 /* Already detected repeat. */
951 if (common->private_data_ptrs[end - common->start - LINK_SIZE] != 0)
952 return TRUE;
953
954 next = end;
955 min = 1;
956 while (1)
957 {
958 if (*next != *begin)
959 break;
960 next_end = bracketend(next);
961 if (next_end - next != length || memcmp(begin, next, IN_UCHARS(length)) != 0)
962 break;
963 next = next_end;
964 min++;
965 }
966
967 if (min == 2)
968 return FALSE;
969
970 max = 0;
971 max_end = next;
972 if (*next == OP_BRAZERO || *next == OP_BRAMINZERO)
973 {
974 type = *next;
975 while (1)
976 {
977 if (next[0] != type || next[1] != OP_BRA || next[2 + LINK_SIZE] != *begin)
978 break;
979 next_end = bracketend(next + 2 + LINK_SIZE);
980 if (next_end - next != (length + 2 + LINK_SIZE) || memcmp(begin, next + 2 + LINK_SIZE, IN_UCHARS(length)) != 0)
981 break;
982 next = next_end;
983 max++;
984 }
985
986 if (next[0] == type && next[1] == *begin && max >= 1)
987 {
988 next_end = bracketend(next + 1);
989 if (next_end - next == (length + 1) && memcmp(begin, next + 1, IN_UCHARS(length)) == 0)
990 {
991 for (i = 0; i < max; i++, next_end += 1 + LINK_SIZE)
992 if (*next_end != OP_KET)
993 break;
994
995 if (i == max)
996 {
997 common->private_data_ptrs[max_end - common->start - LINK_SIZE] = next_end - max_end;
998 common->private_data_ptrs[max_end - common->start - LINK_SIZE + 1] = (type == OP_BRAZERO) ? OP_UPTO : OP_MINUPTO;
999 /* +2 the original and the last. */
1000 common->private_data_ptrs[max_end - common->start - LINK_SIZE + 2] = max + 2;
1001 if (min == 1)
1002 return TRUE;
1003 min--;
1004 max_end -= (1 + LINK_SIZE) + GET(max_end, -LINK_SIZE);
1005 }
1006 }
1007 }
1008 }
1009
1010 if (min >= 3)
1011 {
1012 common->private_data_ptrs[end - common->start - LINK_SIZE] = max_end - end;
1013 common->private_data_ptrs[end - common->start - LINK_SIZE + 1] = OP_EXACT;
1014 common->private_data_ptrs[end - common->start - LINK_SIZE + 2] = min;
1015 return TRUE;
1016 }
1017
1018 return FALSE;
1019 }
1020
1021 #define CASE_ITERATOR_PRIVATE_DATA_1 \
1022 case OP_MINSTAR: \
1023 case OP_MINPLUS: \
1024 case OP_QUERY: \
1025 case OP_MINQUERY: \
1026 case OP_MINSTARI: \
1027 case OP_MINPLUSI: \
1028 case OP_QUERYI: \
1029 case OP_MINQUERYI: \
1030 case OP_NOTMINSTAR: \
1031 case OP_NOTMINPLUS: \
1032 case OP_NOTQUERY: \
1033 case OP_NOTMINQUERY: \
1034 case OP_NOTMINSTARI: \
1035 case OP_NOTMINPLUSI: \
1036 case OP_NOTQUERYI: \
1037 case OP_NOTMINQUERYI:
1038
1039 #define CASE_ITERATOR_PRIVATE_DATA_2A \
1040 case OP_STAR: \
1041 case OP_PLUS: \
1042 case OP_STARI: \
1043 case OP_PLUSI: \
1044 case OP_NOTSTAR: \
1045 case OP_NOTPLUS: \
1046 case OP_NOTSTARI: \
1047 case OP_NOTPLUSI:
1048
1049 #define CASE_ITERATOR_PRIVATE_DATA_2B \
1050 case OP_UPTO: \
1051 case OP_MINUPTO: \
1052 case OP_UPTOI: \
1053 case OP_MINUPTOI: \
1054 case OP_NOTUPTO: \
1055 case OP_NOTMINUPTO: \
1056 case OP_NOTUPTOI: \
1057 case OP_NOTMINUPTOI:
1058
1059 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_1 \
1060 case OP_TYPEMINSTAR: \
1061 case OP_TYPEMINPLUS: \
1062 case OP_TYPEQUERY: \
1063 case OP_TYPEMINQUERY:
1064
1065 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2A \
1066 case OP_TYPESTAR: \
1067 case OP_TYPEPLUS:
1068
1069 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2B \
1070 case OP_TYPEUPTO: \
1071 case OP_TYPEMINUPTO:
1072
1073 static void set_private_data_ptrs(compiler_common *common, int *private_data_start, pcre_uchar *ccend)
1074 {
1075 pcre_uchar *cc = common->start;
1076 pcre_uchar *alternative;
1077 pcre_uchar *end = NULL;
1078 int private_data_ptr = *private_data_start;
1079 int space, size, bracketlen;
1080
1081 while (cc < ccend)
1082 {
1083 space = 0;
1084 size = 0;
1085 bracketlen = 0;
1086 if (private_data_ptr > SLJIT_MAX_LOCAL_SIZE)
1087 return;
1088
1089 if (*cc == OP_ONCE || *cc == OP_ONCE_NC || *cc == OP_BRA || *cc == OP_CBRA || *cc == OP_COND)
1090 if (detect_repeat(common, cc))
1091 {
1092 /* These brackets are converted to repeats, so no global
1093 based single character repeat is allowed. */
1094 if (cc >= end)
1095 end = bracketend(cc);
1096 }
1097
1098 switch(*cc)
1099 {
1100 case OP_KET:
1101 if (common->private_data_ptrs[cc + 1 - common->start] != 0)
1102 {
1103 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1104 private_data_ptr += sizeof(sljit_sw);
1105 cc += common->private_data_ptrs[cc + 1 - common->start];
1106 }
1107 cc += 1 + LINK_SIZE;
1108 break;
1109
1110 case OP_ASSERT:
1111 case OP_ASSERT_NOT:
1112 case OP_ASSERTBACK:
1113 case OP_ASSERTBACK_NOT:
1114 case OP_ONCE:
1115 case OP_ONCE_NC:
1116 case OP_BRAPOS:
1117 case OP_SBRA:
1118 case OP_SBRAPOS:
1119 case OP_SCOND:
1120 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1121 private_data_ptr += sizeof(sljit_sw);
1122 bracketlen = 1 + LINK_SIZE;
1123 break;
1124
1125 case OP_CBRAPOS:
1126 case OP_SCBRAPOS:
1127 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1128 private_data_ptr += sizeof(sljit_sw);
1129 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1130 break;
1131
1132 case OP_COND:
1133 /* Might be a hidden SCOND. */
1134 alternative = cc + GET(cc, 1);
1135 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1136 {
1137 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1138 private_data_ptr += sizeof(sljit_sw);
1139 }
1140 bracketlen = 1 + LINK_SIZE;
1141 break;
1142
1143 case OP_BRA:
1144 bracketlen = 1 + LINK_SIZE;
1145 break;
1146
1147 case OP_CBRA:
1148 case OP_SCBRA:
1149 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1150 break;
1151
1152 CASE_ITERATOR_PRIVATE_DATA_1
1153 space = 1;
1154 size = -2;
1155 break;
1156
1157 CASE_ITERATOR_PRIVATE_DATA_2A
1158 space = 2;
1159 size = -2;
1160 break;
1161
1162 CASE_ITERATOR_PRIVATE_DATA_2B
1163 space = 2;
1164 size = -(2 + IMM2_SIZE);
1165 break;
1166
1167 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1168 space = 1;
1169 size = 1;
1170 break;
1171
1172 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1173 if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
1174 space = 2;
1175 size = 1;
1176 break;
1177
1178 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1179 if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
1180 space = 2;
1181 size = 1 + IMM2_SIZE;
1182 break;
1183
1184 case OP_CLASS:
1185 case OP_NCLASS:
1186 size += 1 + 32 / sizeof(pcre_uchar);
1187 space = get_class_iterator_size(cc + size);
1188 break;
1189
1190 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1191 case OP_XCLASS:
1192 size = GET(cc, 1);
1193 space = get_class_iterator_size(cc + size);
1194 break;
1195 #endif
1196
1197 default:
1198 cc = next_opcode(common, cc);
1199 SLJIT_ASSERT(cc != NULL);
1200 break;
1201 }
1202
1203 /* Character iterators, which are not inside a repeated bracket,
1204 gets a private slot instead of allocating it on the stack. */
1205 if (space > 0 && cc >= end)
1206 {
1207 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1208 private_data_ptr += sizeof(sljit_sw) * space;
1209 }
1210
1211 if (size != 0)
1212 {
1213 if (size < 0)
1214 {
1215 cc += -size;
1216 #ifdef SUPPORT_UTF
1217 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1218 #endif
1219 }
1220 else
1221 cc += size;
1222 }
1223
1224 if (bracketlen > 0)
1225 {
1226 if (cc >= end)
1227 {
1228 end = bracketend(cc);
1229 if (end[-1 - LINK_SIZE] == OP_KET)
1230 end = NULL;
1231 }
1232 cc += bracketlen;
1233 }
1234 }
1235 *private_data_start = private_data_ptr;
1236 }
1237
1238 /* Returns with a frame_types (always < 0) if no need for frame. */
1239 static int get_framesize(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL recursive, BOOL* needs_control_head)
1240 {
1241 int length = 0;
1242 int possessive = 0;
1243 BOOL stack_restore = FALSE;
1244 BOOL setsom_found = recursive;
1245 BOOL setmark_found = recursive;
1246 /* The last capture is a local variable even for recursions. */
1247 BOOL capture_last_found = FALSE;
1248
1249 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
1250 SLJIT_ASSERT(common->control_head_ptr != 0);
1251 *needs_control_head = TRUE;
1252 #else
1253 *needs_control_head = FALSE;
1254 #endif
1255
1256 if (ccend == NULL)
1257 {
1258 ccend = bracketend(cc) - (1 + LINK_SIZE);
1259 if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))
1260 {
1261 possessive = length = (common->capture_last_ptr != 0) ? 5 : 3;
1262 /* This is correct regardless of common->capture_last_ptr. */
1263 capture_last_found = TRUE;
1264 }
1265 cc = next_opcode(common, cc);
1266 }
1267
1268 SLJIT_ASSERT(cc != NULL);
1269 while (cc < ccend)
1270 switch(*cc)
1271 {
1272 case OP_SET_SOM:
1273 SLJIT_ASSERT(common->has_set_som);
1274 stack_restore = TRUE;
1275 if (!setsom_found)
1276 {
1277 length += 2;
1278 setsom_found = TRUE;
1279 }
1280 cc += 1;
1281 break;
1282
1283 case OP_MARK:
1284 case OP_PRUNE_ARG:
1285 case OP_THEN_ARG:
1286 SLJIT_ASSERT(common->mark_ptr != 0);
1287 stack_restore = TRUE;
1288 if (!setmark_found)
1289 {
1290 length += 2;
1291 setmark_found = TRUE;
1292 }
1293 if (common->control_head_ptr != 0)
1294 *needs_control_head = TRUE;
1295 cc += 1 + 2 + cc[1];
1296 break;
1297
1298 case OP_RECURSE:
1299 stack_restore = TRUE;
1300 if (common->has_set_som && !setsom_found)
1301 {
1302 length += 2;
1303 setsom_found = TRUE;
1304 }
1305 if (common->mark_ptr != 0 && !setmark_found)
1306 {
1307 length += 2;
1308 setmark_found = TRUE;
1309 }
1310 if (common->capture_last_ptr != 0 && !capture_last_found)
1311 {
1312 length += 2;
1313 capture_last_found = TRUE;
1314 }
1315 cc += 1 + LINK_SIZE;
1316 break;
1317
1318 case OP_CBRA:
1319 case OP_CBRAPOS:
1320 case OP_SCBRA:
1321 case OP_SCBRAPOS:
1322 stack_restore = TRUE;
1323 if (common->capture_last_ptr != 0 && !capture_last_found)
1324 {
1325 length += 2;
1326 capture_last_found = TRUE;
1327 }
1328 length += 3;
1329 cc += 1 + LINK_SIZE + IMM2_SIZE;
1330 break;
1331
1332 default:
1333 stack_restore = TRUE;
1334 /* Fall through. */
1335
1336 case OP_NOT_WORD_BOUNDARY:
1337 case OP_WORD_BOUNDARY:
1338 case OP_NOT_DIGIT:
1339 case OP_DIGIT:
1340 case OP_NOT_WHITESPACE:
1341 case OP_WHITESPACE:
1342 case OP_NOT_WORDCHAR:
1343 case OP_WORDCHAR:
1344 case OP_ANY:
1345 case OP_ALLANY:
1346 case OP_ANYBYTE:
1347 case OP_NOTPROP:
1348 case OP_PROP:
1349 case OP_ANYNL:
1350 case OP_NOT_HSPACE:
1351 case OP_HSPACE:
1352 case OP_NOT_VSPACE:
1353 case OP_VSPACE:
1354 case OP_EXTUNI:
1355 case OP_EODN:
1356 case OP_EOD:
1357 case OP_CIRC:
1358 case OP_CIRCM:
1359 case OP_DOLL:
1360 case OP_DOLLM:
1361 case OP_CHAR:
1362 case OP_CHARI:
1363 case OP_NOT:
1364 case OP_NOTI:
1365
1366 case OP_EXACT:
1367 case OP_POSSTAR:
1368 case OP_POSPLUS:
1369 case OP_POSQUERY:
1370 case OP_POSUPTO:
1371
1372 case OP_EXACTI:
1373 case OP_POSSTARI:
1374 case OP_POSPLUSI:
1375 case OP_POSQUERYI:
1376 case OP_POSUPTOI:
1377
1378 case OP_NOTEXACT:
1379 case OP_NOTPOSSTAR:
1380 case OP_NOTPOSPLUS:
1381 case OP_NOTPOSQUERY:
1382 case OP_NOTPOSUPTO:
1383
1384 case OP_NOTEXACTI:
1385 case OP_NOTPOSSTARI:
1386 case OP_NOTPOSPLUSI:
1387 case OP_NOTPOSQUERYI:
1388 case OP_NOTPOSUPTOI:
1389
1390 case OP_TYPEEXACT:
1391 case OP_TYPEPOSSTAR:
1392 case OP_TYPEPOSPLUS:
1393 case OP_TYPEPOSQUERY:
1394 case OP_TYPEPOSUPTO:
1395
1396 case OP_CLASS:
1397 case OP_NCLASS:
1398 case OP_XCLASS:
1399
1400 cc = next_opcode(common, cc);
1401 SLJIT_ASSERT(cc != NULL);
1402 break;
1403 }
1404
1405 /* Possessive quantifiers can use a special case. */
1406 if (SLJIT_UNLIKELY(possessive == length))
1407 return stack_restore ? no_frame : no_stack;
1408
1409 if (length > 0)
1410 return length + 1;
1411 return stack_restore ? no_frame : no_stack;
1412 }
1413
1414 static void init_frame(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, int stackpos, int stacktop, BOOL recursive)
1415 {
1416 DEFINE_COMPILER;
1417 BOOL setsom_found = recursive;
1418 BOOL setmark_found = recursive;
1419 /* The last capture is a local variable even for recursions. */
1420 BOOL capture_last_found = FALSE;
1421 int offset;
1422
1423 /* >= 1 + shortest item size (2) */
1424 SLJIT_UNUSED_ARG(stacktop);
1425 SLJIT_ASSERT(stackpos >= stacktop + 2);
1426
1427 stackpos = STACK(stackpos);
1428 if (ccend == NULL)
1429 {
1430 ccend = bracketend(cc) - (1 + LINK_SIZE);
1431 if (recursive || (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS))
1432 cc = next_opcode(common, cc);
1433 }
1434
1435 SLJIT_ASSERT(cc != NULL);
1436 while (cc < ccend)
1437 switch(*cc)
1438 {
1439 case OP_SET_SOM:
1440 SLJIT_ASSERT(common->has_set_som);
1441 if (!setsom_found)
1442 {
1443 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
1444 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1445 stackpos += (int)sizeof(sljit_sw);
1446 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1447 stackpos += (int)sizeof(sljit_sw);
1448 setsom_found = TRUE;
1449 }
1450 cc += 1;
1451 break;
1452
1453 case OP_MARK:
1454 case OP_PRUNE_ARG:
1455 case OP_THEN_ARG:
1456 SLJIT_ASSERT(common->mark_ptr != 0);
1457 if (!setmark_found)
1458 {
1459 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
1460 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1461 stackpos += (int)sizeof(sljit_sw);
1462 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1463 stackpos += (int)sizeof(sljit_sw);
1464 setmark_found = TRUE;
1465 }
1466 cc += 1 + 2 + cc[1];
1467 break;
1468
1469 case OP_RECURSE:
1470 if (common->has_set_som && !setsom_found)
1471 {
1472 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
1473 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1474 stackpos += (int)sizeof(sljit_sw);
1475 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1476 stackpos += (int)sizeof(sljit_sw);
1477 setsom_found = TRUE;
1478 }
1479 if (common->mark_ptr != 0 && !setmark_found)
1480 {
1481 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
1482 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1483 stackpos += (int)sizeof(sljit_sw);
1484 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1485 stackpos += (int)sizeof(sljit_sw);
1486 setmark_found = TRUE;
1487 }
1488 if (common->capture_last_ptr != 0 && !capture_last_found)
1489 {
1490 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
1491 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1492 stackpos += (int)sizeof(sljit_sw);
1493 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1494 stackpos += (int)sizeof(sljit_sw);
1495 capture_last_found = TRUE;
1496 }
1497 cc += 1 + LINK_SIZE;
1498 break;
1499
1500 case OP_CBRA:
1501 case OP_CBRAPOS:
1502 case OP_SCBRA:
1503 case OP_SCBRAPOS:
1504 if (common->capture_last_ptr != 0 && !capture_last_found)
1505 {
1506 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
1507 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1508 stackpos += (int)sizeof(sljit_sw);
1509 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1510 stackpos += (int)sizeof(sljit_sw);
1511 capture_last_found = TRUE;
1512 }
1513 offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
1514 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));
1515 stackpos += (int)sizeof(sljit_sw);
1516 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
1517 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
1518 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1519 stackpos += (int)sizeof(sljit_sw);
1520 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
1521 stackpos += (int)sizeof(sljit_sw);
1522
1523 cc += 1 + LINK_SIZE + IMM2_SIZE;
1524 break;
1525
1526 default:
1527 cc = next_opcode(common, cc);
1528 SLJIT_ASSERT(cc != NULL);
1529 break;
1530 }
1531
1532 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, 0);
1533 SLJIT_ASSERT(stackpos == STACK(stacktop));
1534 }
1535
1536 static SLJIT_INLINE int get_private_data_copy_length(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL needs_control_head)
1537 {
1538 int private_data_length = needs_control_head ? 3 : 2;
1539 int size;
1540 pcre_uchar *alternative;
1541 /* Calculate the sum of the private machine words. */
1542 while (cc < ccend)
1543 {
1544 size = 0;
1545 switch(*cc)
1546 {
1547 case OP_KET:
1548 if (PRIVATE_DATA(cc) != 0)
1549 private_data_length++;
1550 cc += 1 + LINK_SIZE;
1551 break;
1552
1553 case OP_ASSERT:
1554 case OP_ASSERT_NOT:
1555 case OP_ASSERTBACK:
1556 case OP_ASSERTBACK_NOT:
1557 case OP_ONCE:
1558 case OP_ONCE_NC:
1559 case OP_BRAPOS:
1560 case OP_SBRA:
1561 case OP_SBRAPOS:
1562 case OP_SCOND:
1563 private_data_length++;
1564 cc += 1 + LINK_SIZE;
1565 break;
1566
1567 case OP_CBRA:
1568 case OP_SCBRA:
1569 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1570 private_data_length++;
1571 cc += 1 + LINK_SIZE + IMM2_SIZE;
1572 break;
1573
1574 case OP_CBRAPOS:
1575 case OP_SCBRAPOS:
1576 private_data_length += 2;
1577 cc += 1 + LINK_SIZE + IMM2_SIZE;
1578 break;
1579
1580 case OP_COND:
1581 /* Might be a hidden SCOND. */
1582 alternative = cc + GET(cc, 1);
1583 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1584 private_data_length++;
1585 cc += 1 + LINK_SIZE;
1586 break;
1587
1588 CASE_ITERATOR_PRIVATE_DATA_1
1589 if (PRIVATE_DATA(cc))
1590 private_data_length++;
1591 cc += 2;
1592 #ifdef SUPPORT_UTF
1593 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1594 #endif
1595 break;
1596
1597 CASE_ITERATOR_PRIVATE_DATA_2A
1598 if (PRIVATE_DATA(cc))
1599 private_data_length += 2;
1600 cc += 2;
1601 #ifdef SUPPORT_UTF
1602 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1603 #endif
1604 break;
1605
1606 CASE_ITERATOR_PRIVATE_DATA_2B
1607 if (PRIVATE_DATA(cc))
1608 private_data_length += 2;
1609 cc += 2 + IMM2_SIZE;
1610 #ifdef SUPPORT_UTF
1611 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1612 #endif
1613 break;
1614
1615 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1616 if (PRIVATE_DATA(cc))
1617 private_data_length++;
1618 cc += 1;
1619 break;
1620
1621 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1622 if (PRIVATE_DATA(cc))
1623 private_data_length += 2;
1624 cc += 1;
1625 break;
1626
1627 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1628 if (PRIVATE_DATA(cc))
1629 private_data_length += 2;
1630 cc += 1 + IMM2_SIZE;
1631 break;
1632
1633 case OP_CLASS:
1634 case OP_NCLASS:
1635 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1636 case OP_XCLASS:
1637 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1638 #else
1639 size = 1 + 32 / (int)sizeof(pcre_uchar);
1640 #endif
1641 if (PRIVATE_DATA(cc))
1642 private_data_length += get_class_iterator_size(cc + size);
1643 cc += size;
1644 break;
1645
1646 default:
1647 cc = next_opcode(common, cc);
1648 SLJIT_ASSERT(cc != NULL);
1649 break;
1650 }
1651 }
1652 SLJIT_ASSERT(cc == ccend);
1653 return private_data_length;
1654 }
1655
1656 static void copy_private_data(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend,
1657 BOOL save, int stackptr, int stacktop, BOOL needs_control_head)
1658 {
1659 DEFINE_COMPILER;
1660 int srcw[2];
1661 int count, size;
1662 BOOL tmp1next = TRUE;
1663 BOOL tmp1empty = TRUE;
1664 BOOL tmp2empty = TRUE;
1665 pcre_uchar *alternative;
1666 enum {
1667 start,
1668 loop,
1669 end
1670 } status;
1671
1672 status = save ? start : loop;
1673 stackptr = STACK(stackptr - 2);
1674 stacktop = STACK(stacktop - 1);
1675
1676 if (!save)
1677 {
1678 stackptr += (needs_control_head ? 2 : 1) * sizeof(sljit_sw);
1679 if (stackptr < stacktop)
1680 {
1681 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1682 stackptr += sizeof(sljit_sw);
1683 tmp1empty = FALSE;
1684 }
1685 if (stackptr < stacktop)
1686 {
1687 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1688 stackptr += sizeof(sljit_sw);
1689 tmp2empty = FALSE;
1690 }
1691 /* The tmp1next must be TRUE in either way. */
1692 }
1693
1694 do
1695 {
1696 count = 0;
1697 switch(status)
1698 {
1699 case start:
1700 SLJIT_ASSERT(save && common->recursive_head_ptr != 0);
1701 count = 1;
1702 srcw[0] = common->recursive_head_ptr;
1703 if (needs_control_head)
1704 {
1705 SLJIT_ASSERT(common->control_head_ptr != 0);
1706 count = 2;
1707 srcw[1] = common->control_head_ptr;
1708 }
1709 status = loop;
1710 break;
1711
1712 case loop:
1713 if (cc >= ccend)
1714 {
1715 status = end;
1716 break;
1717 }
1718
1719 switch(*cc)
1720 {
1721 case OP_KET:
1722 if (PRIVATE_DATA(cc) != 0)
1723 {
1724 count = 1;
1725 srcw[0] = PRIVATE_DATA(cc);
1726 }
1727 cc += 1 + LINK_SIZE;
1728 break;
1729
1730 case OP_ASSERT:
1731 case OP_ASSERT_NOT:
1732 case OP_ASSERTBACK:
1733 case OP_ASSERTBACK_NOT:
1734 case OP_ONCE:
1735 case OP_ONCE_NC:
1736 case OP_BRAPOS:
1737 case OP_SBRA:
1738 case OP_SBRAPOS:
1739 case OP_SCOND:
1740 count = 1;
1741 srcw[0] = PRIVATE_DATA(cc);
1742 SLJIT_ASSERT(srcw[0] != 0);
1743 cc += 1 + LINK_SIZE;
1744 break;
1745
1746 case OP_CBRA:
1747 case OP_SCBRA:
1748 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1749 {
1750 count = 1;
1751 srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1752 }
1753 cc += 1 + LINK_SIZE + IMM2_SIZE;
1754 break;
1755
1756 case OP_CBRAPOS:
1757 case OP_SCBRAPOS:
1758 count = 2;
1759 srcw[0] = PRIVATE_DATA(cc);
1760 srcw[1] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1761 SLJIT_ASSERT(srcw[0] != 0 && srcw[1] != 0);
1762 cc += 1 + LINK_SIZE + IMM2_SIZE;
1763 break;
1764
1765 case OP_COND:
1766 /* Might be a hidden SCOND. */
1767 alternative = cc + GET(cc, 1);
1768 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1769 {
1770 count = 1;
1771 srcw[0] = PRIVATE_DATA(cc);
1772 SLJIT_ASSERT(srcw[0] != 0);
1773 }
1774 cc += 1 + LINK_SIZE;
1775 break;
1776
1777 CASE_ITERATOR_PRIVATE_DATA_1
1778 if (PRIVATE_DATA(cc))
1779 {
1780 count = 1;
1781 srcw[0] = PRIVATE_DATA(cc);
1782 }
1783 cc += 2;
1784 #ifdef SUPPORT_UTF
1785 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1786 #endif
1787 break;
1788
1789 CASE_ITERATOR_PRIVATE_DATA_2A
1790 if (PRIVATE_DATA(cc))
1791 {
1792 count = 2;
1793 srcw[0] = PRIVATE_DATA(cc);
1794 srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
1795 }
1796 cc += 2;
1797 #ifdef SUPPORT_UTF
1798 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1799 #endif
1800 break;
1801
1802 CASE_ITERATOR_PRIVATE_DATA_2B
1803 if (PRIVATE_DATA(cc))
1804 {
1805 count = 2;
1806 srcw[0] = PRIVATE_DATA(cc);
1807 srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
1808 }
1809 cc += 2 + IMM2_SIZE;
1810 #ifdef SUPPORT_UTF
1811 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1812 #endif
1813 break;
1814
1815 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1816 if (PRIVATE_DATA(cc))
1817 {
1818 count = 1;
1819 srcw[0] = PRIVATE_DATA(cc);
1820 }
1821 cc += 1;
1822 break;
1823
1824 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1825 if (PRIVATE_DATA(cc))
1826 {
1827 count = 2;
1828 srcw[0] = PRIVATE_DATA(cc);
1829 srcw[1] = srcw[0] + sizeof(sljit_sw);
1830 }
1831 cc += 1;
1832 break;
1833
1834 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1835 if (PRIVATE_DATA(cc))
1836 {
1837 count = 2;
1838 srcw[0] = PRIVATE_DATA(cc);
1839 srcw[1] = srcw[0] + sizeof(sljit_sw);
1840 }
1841 cc += 1 + IMM2_SIZE;
1842 break;
1843
1844 case OP_CLASS:
1845 case OP_NCLASS:
1846 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1847 case OP_XCLASS:
1848 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1849 #else
1850 size = 1 + 32 / (int)sizeof(pcre_uchar);
1851 #endif
1852 if (PRIVATE_DATA(cc))
1853 switch(get_class_iterator_size(cc + size))
1854 {
1855 case 1:
1856 count = 1;
1857 srcw[0] = PRIVATE_DATA(cc);
1858 break;
1859
1860 case 2:
1861 count = 2;
1862 srcw[0] = PRIVATE_DATA(cc);
1863 srcw[1] = srcw[0] + sizeof(sljit_sw);
1864 break;
1865
1866 default:
1867 SLJIT_ASSERT_STOP();
1868 break;
1869 }
1870 cc += size;
1871 break;
1872
1873 default:
1874 cc = next_opcode(common, cc);
1875 SLJIT_ASSERT(cc != NULL);
1876 break;
1877 }
1878 break;
1879
1880 case end:
1881 SLJIT_ASSERT_STOP();
1882 break;
1883 }
1884
1885 while (count > 0)
1886 {
1887 count--;
1888 if (save)
1889 {
1890 if (tmp1next)
1891 {
1892 if (!tmp1empty)
1893 {
1894 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1895 stackptr += sizeof(sljit_sw);
1896 }
1897 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);
1898 tmp1empty = FALSE;
1899 tmp1next = FALSE;
1900 }
1901 else
1902 {
1903 if (!tmp2empty)
1904 {
1905 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1906 stackptr += sizeof(sljit_sw);
1907 }
1908 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);
1909 tmp2empty = FALSE;
1910 tmp1next = TRUE;
1911 }
1912 }
1913 else
1914 {
1915 if (tmp1next)
1916 {
1917 SLJIT_ASSERT(!tmp1empty);
1918 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP1, 0);
1919 tmp1empty = stackptr >= stacktop;
1920 if (!tmp1empty)
1921 {
1922 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1923 stackptr += sizeof(sljit_sw);
1924 }
1925 tmp1next = FALSE;
1926 }
1927 else
1928 {
1929 SLJIT_ASSERT(!tmp2empty);
1930 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP2, 0);
1931 tmp2empty = stackptr >= stacktop;
1932 if (!tmp2empty)
1933 {
1934 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1935 stackptr += sizeof(sljit_sw);
1936 }
1937 tmp1next = TRUE;
1938 }
1939 }
1940 }
1941 }
1942 while (status != end);
1943
1944 if (save)
1945 {
1946 if (tmp1next)
1947 {
1948 if (!tmp1empty)
1949 {
1950 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1951 stackptr += sizeof(sljit_sw);
1952 }
1953 if (!tmp2empty)
1954 {
1955 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1956 stackptr += sizeof(sljit_sw);
1957 }
1958 }
1959 else
1960 {
1961 if (!tmp2empty)
1962 {
1963 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1964 stackptr += sizeof(sljit_sw);
1965 }
1966 if (!tmp1empty)
1967 {
1968 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1969 stackptr += sizeof(sljit_sw);
1970 }
1971 }
1972 }
1973 SLJIT_ASSERT(cc == ccend && stackptr == stacktop && (save || (tmp1empty && tmp2empty)));
1974 }
1975
1976 static SLJIT_INLINE pcre_uchar *set_then_offsets(compiler_common *common, pcre_uchar *cc, pcre_uint8 *current_offset)
1977 {
1978 pcre_uchar *end = bracketend(cc);
1979 BOOL has_alternatives = cc[GET(cc, 1)] == OP_ALT;
1980
1981 /* Assert captures then. */
1982 if (*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT)
1983 current_offset = NULL;
1984 /* Conditional block does not. */
1985 if (*cc == OP_COND || *cc == OP_SCOND)
1986 has_alternatives = FALSE;
1987
1988 cc = next_opcode(common, cc);
1989 if (has_alternatives)
1990 current_offset = common->then_offsets + (cc - common->start);
1991
1992 while (cc < end)
1993 {
1994 if ((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND))
1995 cc = set_then_offsets(common, cc, current_offset);
1996 else
1997 {
1998 if (*cc == OP_ALT && has_alternatives)
1999 current_offset = common->then_offsets + (cc + 1 + LINK_SIZE - common->start);
2000 if (*cc >= OP_THEN && *cc <= OP_THEN_ARG && current_offset != NULL)
2001 *current_offset = 1;
2002 cc = next_opcode(common, cc);
2003 }
2004 }
2005
2006 return end;
2007 }
2008
2009 #undef CASE_ITERATOR_PRIVATE_DATA_1
2010 #undef CASE_ITERATOR_PRIVATE_DATA_2A
2011 #undef CASE_ITERATOR_PRIVATE_DATA_2B
2012 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_1
2013 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
2014 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
2015
2016 static SLJIT_INLINE BOOL is_powerof2(unsigned int value)
2017 {
2018 return (value & (value - 1)) == 0;
2019 }
2020
2021 static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label)
2022 {
2023 while (list)
2024 {
2025 /* sljit_set_label is clever enough to do nothing
2026 if either the jump or the label is NULL. */
2027 SET_LABEL(list->jump, label);
2028 list = list->next;
2029 }
2030 }
2031
2032 static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump* jump)
2033 {
2034 jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list));
2035 if (list_item)
2036 {
2037 list_item->next = *list;
2038 list_item->jump = jump;
2039 *list = list_item;
2040 }
2041 }
2042
2043 static void add_stub(compiler_common *common, struct sljit_jump *start)
2044 {
2045 DEFINE_COMPILER;
2046 stub_list* list_item = sljit_alloc_memory(compiler, sizeof(stub_list));
2047
2048 if (list_item)
2049 {
2050 list_item->start = start;
2051 list_item->quit = LABEL();
2052 list_item->next = common->stubs;
2053 common->stubs = list_item;
2054 }
2055 }
2056
2057 static void flush_stubs(compiler_common *common)
2058 {
2059 DEFINE_COMPILER;
2060 stub_list* list_item = common->stubs;
2061
2062 while (list_item)
2063 {
2064 JUMPHERE(list_item->start);
2065 add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));
2066 JUMPTO(SLJIT_JUMP, list_item->quit);
2067 list_item = list_item->next;
2068 }
2069 common->stubs = NULL;
2070 }
2071
2072 static void add_label_addr(compiler_common *common)
2073 {
2074 DEFINE_COMPILER;
2075 label_addr_list *label_addr;
2076
2077 label_addr = sljit_alloc_memory(compiler, sizeof(label_addr_list));
2078 if (label_addr == NULL)
2079 return;
2080 label_addr->label = LABEL();
2081 label_addr->addr = common->read_only_data_ptr;
2082 label_addr->next = common->label_addrs;
2083 common->label_addrs = label_addr;
2084 common->read_only_data_ptr++;
2085 }
2086
2087 static SLJIT_INLINE void count_match(compiler_common *common)
2088 {
2089 DEFINE_COMPILER;
2090
2091 OP2(SLJIT_SUB | SLJIT_SET_E, COUNT_MATCH, 0, COUNT_MATCH, 0, SLJIT_IMM, 1);
2092 add_jump(compiler, &common->calllimit, JUMP(SLJIT_C_ZERO));
2093 }
2094
2095 static SLJIT_INLINE void allocate_stack(compiler_common *common, int size)
2096 {
2097 /* May destroy all locals and registers except TMP2. */
2098 DEFINE_COMPILER;
2099
2100 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
2101 #ifdef DESTROY_REGISTERS
2102 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);
2103 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
2104 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
2105 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, TMP1, 0);
2106 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);
2107 #endif
2108 add_stub(common, CMP(SLJIT_C_GREATER, STACK_TOP, 0, STACK_LIMIT, 0));
2109 }
2110
2111 static SLJIT_INLINE void free_stack(compiler_common *common, int size)
2112 {
2113 DEFINE_COMPILER;
2114 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
2115 }
2116
2117 static SLJIT_INLINE void reset_ovector(compiler_common *common, int length)
2118 {
2119 DEFINE_COMPILER;
2120 struct sljit_label *loop;
2121 int i;
2122
2123 /* At this point we can freely use all temporary registers. */
2124 SLJIT_ASSERT(length > 1);
2125 /* TMP1 returns with begin - 1. */
2126 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG1, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));
2127 if (length < 8)
2128 {
2129 for (i = 1; i < length; i++)
2130 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(i), SLJIT_SCRATCH_REG1, 0);
2131 }
2132 else
2133 {
2134 GET_LOCAL_BASE(SLJIT_SCRATCH_REG2, 0, OVECTOR_START);
2135 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, length - 1);
2136 loop = LABEL();
2137 OP1(SLJIT_MOVU, SLJIT_MEM1(SLJIT_SCRATCH_REG2), sizeof(sljit_sw), SLJIT_SCRATCH_REG1, 0);
2138 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 1);
2139 JUMPTO(SLJIT_C_NOT_ZERO, loop);
2140 }
2141 }
2142
2143 static SLJIT_INLINE void do_reset_match(compiler_common *common, int length)
2144 {
2145 DEFINE_COMPILER;
2146 struct sljit_label *loop;
2147 int i;
2148
2149 SLJIT_ASSERT(length > 1);
2150 /* OVECTOR(1) contains the "string begin - 1" constant. */
2151 if (length > 2)
2152 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
2153 if (length < 8)
2154 {
2155 for (i = 2; i < length; i++)
2156 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(i), TMP1, 0);
2157 }
2158 else
2159 {
2160 GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + sizeof(sljit_sw));
2161 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
2162 loop = LABEL();
2163 OP1(SLJIT_MOVU, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
2164 OP2(SLJIT_SUB | SLJIT_SET_E, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
2165 JUMPTO(SLJIT_C_NOT_ZERO, loop);
2166 }
2167
2168 OP1(SLJIT_MOV, STACK_TOP, 0, ARGUMENTS, 0);
2169 if (common->mark_ptr != 0)
2170 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr, SLJIT_IMM, 0);
2171 if (common->control_head_ptr != 0)
2172 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_IMM, 0);
2173 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(jit_arguments, stack));
2174 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_ptr);
2175 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(struct sljit_stack, base));
2176 }
2177
2178 static sljit_sw SLJIT_CALL do_search_mark(sljit_sw *current, const pcre_uchar *skip_arg)
2179 {
2180 while (current != NULL)
2181 {
2182 switch (current[-2])
2183 {
2184 case type_then_trap:
2185 break;
2186
2187 case type_mark:
2188 if (STRCMP_UC_UC(skip_arg, (pcre_uchar *)current[-3]) == 0)
2189 return current[-4];
2190 break;
2191
2192 default:
2193 SLJIT_ASSERT_STOP();
2194 break;
2195 }
2196 current = (sljit_sw*)current[-1];
2197 }
2198 return -1;
2199 }
2200
2201 static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)
2202 {
2203 DEFINE_COMPILER;
2204 struct sljit_label *loop;
2205 struct sljit_jump *early_quit;
2206
2207 /* At this point we can freely use all registers. */
2208 OP1(SLJIT_MOV, SLJIT_SAVED_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
2209 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1), STR_PTR, 0);
2210
2211 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, ARGUMENTS, 0);
2212 if (common->mark_ptr != 0)
2213 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
2214 OP1(SLJIT_MOV_SI, SLJIT_SCRATCH_REG2, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, offset_count));
2215 if (common->mark_ptr != 0)
2216 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_SCRATCH_REG3, 0);
2217 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, offsets), SLJIT_IMM, sizeof(int));
2218 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, begin));
2219 GET_LOCAL_BASE(SLJIT_SAVED_REG1, 0, OVECTOR_START);
2220 /* Unlikely, but possible */
2221 early_quit = CMP(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, 0);
2222 loop = LABEL();
2223 OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), 0, SLJIT_SCRATCH_REG1, 0);
2224 OP2(SLJIT_ADD, SLJIT_SAVED_REG1, 0, SLJIT_SAVED_REG1, 0, SLJIT_IMM, sizeof(sljit_sw));
2225 /* Copy the integer value to the output buffer */
2226 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2227 OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, UCHAR_SHIFT);
2228 #endif
2229 OP1(SLJIT_MOVU_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG3), sizeof(int), SLJIT_SAVED_REG2, 0);
2230 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_SCRATCH_REG2, 0, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, 1);
2231 JUMPTO(SLJIT_C_NOT_ZERO, loop);
2232 JUMPHERE(early_quit);
2233
2234 /* Calculate the return value, which is the maximum ovector value. */
2235 if (topbracket > 1)
2236 {
2237 GET_LOCAL_BASE(SLJIT_SCRATCH_REG1, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_sw));
2238 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, topbracket + 1);
2239
2240 /* OVECTOR(0) is never equal to SLJIT_SAVED_REG3. */
2241 loop = LABEL();
2242 OP1(SLJIT_MOVU, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), -(2 * (sljit_sw)sizeof(sljit_sw)));
2243 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG2, 0, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, 1);
2244 CMPTO(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG3, 0, SLJIT_SAVED_REG3, 0, loop);
2245 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_SCRATCH_REG2, 0);
2246 }
2247 else
2248 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
2249 }
2250
2251 static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *quit)
2252 {
2253 DEFINE_COMPILER;
2254 struct sljit_jump *jump;
2255
2256 SLJIT_COMPILE_ASSERT(STR_END == SLJIT_SAVED_REG2, str_end_must_be_saved_reg2);
2257 SLJIT_ASSERT(common->start_used_ptr != 0 && common->start_ptr != 0
2258 && (common->mode == JIT_PARTIAL_SOFT_COMPILE ? common->hit_start != 0 : common->hit_start == 0));
2259
2260 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, ARGUMENTS, 0);
2261 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_PARTIAL);
2262 OP1(SLJIT_MOV_SI, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, real_offset_count));
2263 CMPTO(SLJIT_C_SIG_LESS, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 2, quit);
2264
2265 /* Store match begin and end. */
2266 OP1(SLJIT_MOV, SLJIT_SAVED_REG1, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, begin));
2267 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, offsets));
2268
2269 jump = CMP(SLJIT_C_SIG_LESS, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 3);
2270 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_ptr : (common->hit_start + (int)sizeof(sljit_sw)), SLJIT_SAVED_REG1, 0);
2271 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2272 OP2(SLJIT_ASHR, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, UCHAR_SHIFT);
2273 #endif
2274 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG2), 2 * sizeof(int), SLJIT_SCRATCH_REG3, 0);
2275 JUMPHERE(jump);
2276
2277 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_used_ptr : common->hit_start);
2278 OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, STR_END, 0, SLJIT_SAVED_REG1, 0);
2279 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2280 OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, UCHAR_SHIFT);
2281 #endif
2282 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG2), sizeof(int), SLJIT_SAVED_REG2, 0);
2283
2284 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_SAVED_REG1, 0);
2285 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2286 OP2(SLJIT_ASHR, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, UCHAR_SHIFT);
2287 #endif
2288 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG2), 0, SLJIT_SCRATCH_REG3, 0);
2289
2290 JUMPTO(SLJIT_JUMP, quit);
2291 }
2292
2293 static SLJIT_INLINE void check_start_used_ptr(compiler_common *common)
2294 {
2295 /* May destroy TMP1. */
2296 DEFINE_COMPILER;
2297 struct sljit_jump *jump;
2298
2299 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2300 {
2301 /* The value of -1 must be kept for start_used_ptr! */
2302 OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, 1);
2303 /* Jumps if start_used_ptr < STR_PTR, or start_used_ptr == -1. Although overwriting
2304 is not necessary if start_used_ptr == STR_PTR, it does not hurt as well. */
2305 jump = CMP(SLJIT_C_LESS_EQUAL, TMP1, 0, STR_PTR, 0);
2306 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2307 JUMPHERE(jump);
2308 }
2309 else if (common->mode == JIT_PARTIAL_HARD_COMPILE)
2310 {
2311 jump = CMP(SLJIT_C_LESS_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2312 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2313 JUMPHERE(jump);
2314 }
2315 }
2316
2317 static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, pcre_uchar* cc)
2318 {
2319 /* Detects if the character has an othercase. */
2320 unsigned int c;
2321
2322 #ifdef SUPPORT_UTF
2323 if (common->utf)
2324 {
2325 GETCHAR(c, cc);
2326 if (c > 127)
2327 {
2328 #ifdef SUPPORT_UCP
2329 return c != UCD_OTHERCASE(c);
2330 #else
2331 return FALSE;
2332 #endif
2333 }
2334 #ifndef COMPILE_PCRE8
2335 return common->fcc[c] != c;
2336 #endif
2337 }
2338 else
2339 #endif
2340 c = *cc;
2341 return MAX_255(c) ? common->fcc[c] != c : FALSE;
2342 }
2343
2344 static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)
2345 {
2346 /* Returns with the othercase. */
2347 #ifdef SUPPORT_UTF
2348 if (common->utf && c > 127)
2349 {
2350 #ifdef SUPPORT_UCP
2351 return UCD_OTHERCASE(c);
2352 #else
2353 return c;
2354 #endif
2355 }
2356 #endif
2357 return TABLE_GET(c, common->fcc, c);
2358 }
2359
2360 static unsigned int char_get_othercase_bit(compiler_common *common, pcre_uchar* cc)
2361 {
2362 /* Detects if the character and its othercase has only 1 bit difference. */
2363 unsigned int c, oc, bit;
2364 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2365 int n;
2366 #endif
2367
2368 #ifdef SUPPORT_UTF
2369 if (common->utf)
2370 {
2371 GETCHAR(c, cc);
2372 if (c <= 127)
2373 oc = common->fcc[c];
2374 else
2375 {
2376 #ifdef SUPPORT_UCP
2377 oc = UCD_OTHERCASE(c);
2378 #else
2379 oc = c;
2380 #endif
2381 }
2382 }
2383 else
2384 {
2385 c = *cc;
2386 oc = TABLE_GET(c, common->fcc, c);
2387 }
2388 #else
2389 c = *cc;
2390 oc = TABLE_GET(c, common->fcc, c);
2391 #endif
2392
2393 SLJIT_ASSERT(c != oc);
2394
2395 bit = c ^ oc;
2396 /* Optimized for English alphabet. */
2397 if (c <= 127 && bit == 0x20)
2398 return (0 << 8) | 0x20;
2399
2400 /* Since c != oc, they must have at least 1 bit difference. */
2401 if (!is_powerof2(bit))
2402 return 0;
2403
2404 #if defined COMPILE_PCRE8
2405
2406 #ifdef SUPPORT_UTF
2407 if (common->utf && c > 127)
2408 {
2409 n = GET_EXTRALEN(*cc);
2410 while ((bit & 0x3f) == 0)
2411 {
2412 n--;
2413 bit >>= 6;
2414 }
2415 return (n << 8) | bit;
2416 }
2417 #endif /* SUPPORT_UTF */
2418 return (0 << 8) | bit;
2419
2420 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2421
2422 #ifdef SUPPORT_UTF
2423 if (common->utf && c > 65535)
2424 {
2425 if (bit >= (1 << 10))
2426 bit >>= 10;
2427 else
2428 return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8));
2429 }
2430 #endif /* SUPPORT_UTF */
2431 return (bit < 256) ? ((0 << 8) | bit) : ((1 << 8) | (bit >> 8));
2432
2433 #endif /* COMPILE_PCRE[8|16|32] */
2434 }
2435
2436 static void check_partial(compiler_common *common, BOOL force)
2437 {
2438 /* Checks whether a partial matching is occurred. Does not modify registers. */
2439 DEFINE_COMPILER;
2440 struct sljit_jump *jump = NULL;
2441
2442 SLJIT_ASSERT(!force || common->mode != JIT_COMPILE);
2443
2444 if (common->mode == JIT_COMPILE)
2445 return;
2446
2447 if (!force)
2448 jump = CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2449 else if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2450 jump = CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, -1);
2451
2452 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2453 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, 0);
2454 else
2455 {
2456 if (common->partialmatchlabel != NULL)
2457 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2458 else
2459 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2460 }
2461
2462 if (jump != NULL)
2463 JUMPHERE(jump);
2464 }
2465
2466 static void check_str_end(compiler_common *common, jump_list **end_reached)
2467 {
2468 /* Does not affect registers. Usually used in a tight spot. */
2469 DEFINE_COMPILER;
2470 struct sljit_jump *jump;
2471
2472 if (common->mode == JIT_COMPILE)
2473 {
2474 add_jump(compiler, end_reached, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2475 return;
2476 }
2477
2478 jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
2479 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2480 {
2481 add_jump(compiler, end_reached, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0));
2482 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, 0);
2483 add_jump(compiler, end_reached, JUMP(SLJIT_JUMP));
2484 }
2485 else
2486 {
2487 add_jump(compiler, end_reached, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0));
2488 if (common->partialmatchlabel != NULL)
2489 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2490 else
2491 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2492 }
2493 JUMPHERE(jump);
2494 }
2495
2496 static void detect_partial_match(compiler_common *common, jump_list **backtracks)
2497 {
2498 DEFINE_COMPILER;
2499 struct sljit_jump *jump;
2500
2501 if (common->mode == JIT_COMPILE)
2502 {
2503 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2504 return;
2505 }
2506
2507 /* Partial matching mode. */
2508 jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
2509 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0));
2510 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2511 {
2512 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, 0);
2513 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
2514 }
2515 else
2516 {
2517 if (common->partialmatchlabel != NULL)
2518 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2519 else
2520 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2521 }
2522 JUMPHERE(jump);
2523 }
2524
2525 static void peek_char(compiler_common *common, pcre_uint32 max)
2526 {
2527 /* Reads the character into TMP1, keeps STR_PTR.
2528 Does not check STR_END. TMP2 Destroyed. */
2529 DEFINE_COMPILER;
2530 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2531 struct sljit_jump *jump;
2532 #endif
2533
2534 SLJIT_UNUSED_ARG(max);
2535
2536 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2537 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2538 if (common->utf)
2539 {
2540 if (max < 128) return;
2541
2542 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2543 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2544 add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2545 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2546 JUMPHERE(jump);
2547 }
2548 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2549
2550 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2551 if (common->utf)
2552 {
2553 if (max < 0xd800) return;
2554
2555 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2556 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2557 /* TMP2 contains the high surrogate. */
2558 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2559 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x40);
2560 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
2561 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
2562 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2563 JUMPHERE(jump);
2564 }
2565 #endif
2566 }
2567
2568 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2569
2570 static BOOL is_char7_bitset(const pcre_uint8 *bitset, BOOL nclass)
2571 {
2572 /* Tells whether the character codes below 128 are enough
2573 to determine a match. */
2574 const pcre_uint8 value = nclass ? 0xff : 0;
2575 const pcre_uint8* end = bitset + 32;
2576
2577 bitset += 16;
2578 do
2579 {
2580 if (*bitset++ != value)
2581 return FALSE;
2582 }
2583 while (bitset < end);
2584 return TRUE;
2585 }
2586
2587 static void read_char7_type(compiler_common *common, BOOL full_read)
2588 {
2589 /* Reads the precise character type of a character into TMP1, if the character
2590 is less than 128. Otherwise it returns with zero. Does not check STR_END. The
2591 full_read argument tells whether characters above max are accepted or not. */
2592 DEFINE_COMPILER;
2593 struct sljit_jump *jump;
2594
2595 SLJIT_ASSERT(common->utf);
2596
2597 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2598 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2599
2600 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2601
2602 if (full_read)
2603 {
2604 jump = CMP(SLJIT_C_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
2605 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2606 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2607 JUMPHERE(jump);
2608 }
2609 }
2610
2611 #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
2612
2613 static void read_char_range(compiler_common *common, pcre_uint32 min, pcre_uint32 max, BOOL update_str_ptr)
2614 {
2615 /* Reads the precise value of a character into TMP1, if the character is
2616 between min and max (c >= min && c <= max). Otherwise it returns with a value
2617 outside the range. Does not check STR_END. */
2618 DEFINE_COMPILER;
2619 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2620 struct sljit_jump *jump;
2621 #endif
2622 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2623 struct sljit_jump *jump2;
2624 #endif
2625
2626 SLJIT_UNUSED_ARG(update_str_ptr);
2627 SLJIT_UNUSED_ARG(min);
2628 SLJIT_UNUSED_ARG(max);
2629 SLJIT_ASSERT(min <= max);
2630
2631 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2632 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2633
2634 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2635 if (common->utf)
2636 {
2637 if (max < 128 && !update_str_ptr) return;
2638
2639 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2640 if (min >= 0x10000)
2641 {
2642 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xf0);
2643 if (update_str_ptr)
2644 OP1(SLJIT_MOV_UB, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2645 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2646 jump2 = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0x7);
2647 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2648 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2649 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2650 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2651 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2652 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2653 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2654 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2655 if (!update_str_ptr)
2656 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
2657 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2658 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2659 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2660 JUMPHERE(jump2);
2661 if (update_str_ptr)
2662 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
2663 }
2664 else if (min >= 0x800 && max <= 0xffff)
2665 {
2666 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xe0);
2667 if (update_str_ptr)
2668 OP1(SLJIT_MOV_UB, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2669 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2670 jump2 = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0xf);
2671 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2672 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2673 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2674 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2675 if (!update_str_ptr)
2676 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
2677 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2678 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2679 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2680 JUMPHERE(jump2);
2681 if (update_str_ptr)
2682 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
2683 }
2684 else if (max >= 0x800)
2685 add_jump(compiler, (max < 0x10000) ? &common->utfreadchar16 : &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2686 else if (max < 128)
2687 {
2688 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2689 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2690 }
2691 else
2692 {
2693 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2694 if (!update_str_ptr)
2695 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2696 else
2697 OP1(SLJIT_MOV_UB, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2698 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2699 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2700 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2701 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2702 if (update_str_ptr)
2703 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
2704 }
2705 JUMPHERE(jump);
2706 }
2707 #endif
2708
2709 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2710 if (common->utf)
2711 {
2712 if (max >= 0x10000)
2713 {
2714 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2715 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2716 /* TMP2 contains the high surrogate. */
2717 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2718 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x40);
2719 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
2720 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2721 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
2722 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2723 JUMPHERE(jump);
2724 return;
2725 }
2726
2727 if (max < 0xd800 && !update_str_ptr) return;
2728
2729 /* Skip low surrogate if necessary. */
2730 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2731 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2732 if (update_str_ptr)
2733 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2734 if (max >= 0xd800)
2735 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x10000);
2736 JUMPHERE(jump);
2737 }
2738 #endif
2739 }
2740
2741 static SLJIT_INLINE void read_char(compiler_common *common)
2742 {
2743 read_char_range(common, 0, READ_CHAR_MAX, TRUE);
2744 }
2745
2746 static void read_char8_type(compiler_common *common, BOOL update_str_ptr)
2747 {
2748 /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */
2749 DEFINE_COMPILER;
2750 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
2751 struct sljit_jump *jump;
2752 #endif
2753 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2754 struct sljit_jump *jump2;
2755 #endif
2756
2757 SLJIT_UNUSED_ARG(update_str_ptr);
2758
2759 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2760 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2761
2762 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2763 if (common->utf)
2764 {
2765 /* This can be an extra read in some situations, but hopefully
2766 it is needed in most cases. */
2767 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2768 jump = CMP(SLJIT_C_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
2769 if (!update_str_ptr)
2770 {
2771 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2772 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2773 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2774 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2775 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2776 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
2777 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2778 jump2 = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2779 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2780 JUMPHERE(jump2);
2781 }
2782 else
2783 add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));
2784 JUMPHERE(jump);
2785 return;
2786 }
2787 #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
2788
2789 #if !defined COMPILE_PCRE8
2790 /* The ctypes array contains only 256 values. */
2791 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2792 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2793 #endif
2794 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2795 #if !defined COMPILE_PCRE8
2796 JUMPHERE(jump);
2797 #endif
2798
2799 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2800 if (common->utf && update_str_ptr)
2801 {
2802 /* Skip low surrogate if necessary. */
2803 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);
2804 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2805 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2806 JUMPHERE(jump);
2807 }
2808 #endif /* SUPPORT_UTF && COMPILE_PCRE16 */
2809 }
2810
2811 static void skip_char_back(compiler_common *common)
2812 {
2813 /* Goes one character back. Affects STR_PTR and TMP1. Does not check begin. */
2814 DEFINE_COMPILER;
2815 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2816 #if defined COMPILE_PCRE8
2817 struct sljit_label *label;
2818
2819 if (common->utf)
2820 {
2821 label = LABEL();
2822 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2823 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2824 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
2825 CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);
2826 return;
2827 }
2828 #elif defined COMPILE_PCRE16
2829 if (common->utf)
2830 {
2831 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2832 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2833 /* Skip low surrogate if necessary. */
2834 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2835 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
2836 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2837 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2838 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2839 return;
2840 }
2841 #endif /* COMPILE_PCRE[8|16] */
2842 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2843 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2844 }
2845
2846 static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpifmatch)
2847 {
2848 /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */
2849 DEFINE_COMPILER;
2850 struct sljit_jump *jump;
2851
2852 if (nltype == NLTYPE_ANY)
2853 {
2854 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
2855 add_jump(compiler, backtracks, JUMP(jumpifmatch ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
2856 }
2857 else if (nltype == NLTYPE_ANYCRLF)
2858 {
2859 if (jumpifmatch)
2860 {
2861 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR));
2862 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
2863 }
2864 else
2865 {
2866 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
2867 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
2868 JUMPHERE(jump);
2869 }
2870 }
2871 else
2872 {
2873 SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);
2874 add_jump(compiler, backtracks, CMP(jumpifmatch ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
2875 }
2876 }
2877
2878 #ifdef SUPPORT_UTF
2879
2880 #if defined COMPILE_PCRE8
2881 static void do_utfreadchar(compiler_common *common)
2882 {
2883 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
2884 of the character (>= 0xc0). Return char value in TMP1, length in TMP2. */
2885 DEFINE_COMPILER;
2886 struct sljit_jump *jump;
2887
2888 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2889 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2890 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2891 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2892 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2893 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2894
2895 /* Searching for the first zero. */
2896 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
2897 jump = JUMP(SLJIT_C_NOT_ZERO);
2898 /* Two byte sequence. */
2899 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2900 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(2));
2901 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2902
2903 JUMPHERE(jump);
2904 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2905 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x800);
2906 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2907 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2908 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2909
2910 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10000);
2911 jump = JUMP(SLJIT_C_NOT_ZERO);
2912 /* Three byte sequence. */
2913 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
2914 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));
2915 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2916
2917 /* Four byte sequence. */
2918 JUMPHERE(jump);
2919 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2920 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
2921 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2922 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
2923 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2924 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2925 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(4));
2926 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2927 }
2928
2929 static void do_utfreadchar16(compiler_common *common)
2930 {
2931 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
2932 of the character (>= 0xc0). Return value in TMP1. */
2933 DEFINE_COMPILER;
2934 struct sljit_jump *jump;
2935
2936 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2937 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2938 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2939 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2940 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2941 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2942
2943 /* Searching for the first zero. */
2944 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
2945 jump = JUMP(SLJIT_C_NOT_ZERO);
2946 /* Two byte sequence. */
2947 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2948 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2949
2950 JUMPHERE(jump);
2951 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x400);
2952 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_NOT_ZERO);
2953 /* This code runs only in 8 bit mode. No need to shift the value. */
2954 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2955 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2956 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x800);
2957 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2958 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2959 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2960 /* Three byte sequence. */
2961 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
2962 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2963 }
2964
2965 static void do_utfreadtype8(compiler_common *common)
2966 {
2967 /* Fast decoding a UTF-8 character type. TMP2 contains the first byte
2968 of the character (>= 0xc0). Return value in TMP1. */
2969 DEFINE_COMPILER;
2970 struct sljit_jump *jump;
2971 struct sljit_jump *compare;
2972
2973 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2974
2975 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20);
2976 jump = JUMP(SLJIT_C_NOT_ZERO);
2977 /* Two byte sequence. */
2978 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2979 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2980 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
2981 /* The upper 5 bits are known at this point. */
2982 compare = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0x3);
2983 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2984 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2985 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
2986 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2987 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2988
2989 JUMPHERE(compare);
2990 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2991 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2992
2993 /* We only have types for characters less than 256. */
2994 JUMPHERE(jump);
2995 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2996 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2997 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2998 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2999 }
3000
3001 #endif /* COMPILE_PCRE8 */
3002
3003 #endif /* SUPPORT_UTF */
3004
3005 #ifdef SUPPORT_UCP
3006
3007 /* UCD_BLOCK_SIZE must be 128 (see the assert below). */
3008 #define UCD_BLOCK_MASK 127
3009 #define UCD_BLOCK_SHIFT 7
3010
3011 static void do_getucd(compiler_common *common)
3012 {
3013 /* Search the UCD record for the character comes in TMP1.
3014 Returns chartype in TMP1 and UCD offset in TMP2. */
3015 DEFINE_COMPILER;
3016
3017 SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 8);
3018
3019 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3020 OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
3021 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
3022 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
3023 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
3024 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
3025 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
3026 OP1(SLJIT_MOV_UH, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
3027 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
3028 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
3029 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3030 }
3031 #endif
3032
3033 static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common, BOOL hascrorlf, BOOL firstline)
3034 {
3035 DEFINE_COMPILER;
3036 struct sljit_label *mainloop;
3037 struct sljit_label *newlinelabel = NULL;
3038 struct sljit_jump *start;
3039 struct sljit_jump *end = NULL;
3040 struct sljit_jump *nl = NULL;
3041 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3042 struct sljit_jump *singlechar;
3043 #endif
3044 jump_list *newline = NULL;
3045 BOOL newlinecheck = FALSE;
3046 BOOL readuchar = FALSE;
3047
3048 if (!(hascrorlf || firstline) && (common->nltype == NLTYPE_ANY ||
3049 common->nltype == NLTYPE_ANYCRLF || common->newline > 255))
3050 newlinecheck = TRUE;
3051
3052 if (firstline)
3053 {
3054 /* Search for the end of the first line. */
3055 SLJIT_ASSERT(common->first_line_end != 0);
3056 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
3057
3058 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3059 {
3060 mainloop = LABEL();
3061 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3062 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3063 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
3064 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3065 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);
3066 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);
3067 JUMPHERE(end);
3068 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3069 }
3070 else
3071 {
3072 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3073 mainloop = LABEL();
3074 /* Continual stores does not cause data dependency. */
3075 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0);
3076 read_char_range(common, common->nlmin, common->nlmax, TRUE);
3077 check_newlinechar(common, common->nltype, &newline, TRUE);
3078 CMPTO(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0, mainloop);
3079 JUMPHERE(end);
3080 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0);
3081 set_jumps(newline, LABEL());
3082 }
3083
3084 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
3085 }
3086
3087 start = JUMP(SLJIT_JUMP);
3088
3089 if (newlinecheck)
3090 {
3091 newlinelabel = LABEL();
3092 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3093 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3094 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3095 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
3096 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3097 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3098 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
3099 #endif
3100 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3101 nl = JUMP(SLJIT_JUMP);
3102 }
3103
3104 mainloop = LABEL();
3105
3106 /* Increasing the STR_PTR here requires one less jump in the most common case. */
3107 #ifdef SUPPORT_UTF
3108 if (common->utf) readuchar = TRUE;
3109 #endif
3110 if (newlinecheck) readuchar = TRUE;
3111
3112 if (readuchar)
3113 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3114
3115 if (newlinecheck)
3116 CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);
3117
3118 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3119 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3120 #if defined COMPILE_PCRE8
3121 if (common->utf)
3122 {
3123 singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
3124 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3125 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3126 JUMPHERE(singlechar);
3127 }
3128 #elif defined COMPILE_PCRE16
3129 if (common->utf)
3130 {
3131 singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
3132 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
3133 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3134 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3135 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3136 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3137 JUMPHERE(singlechar);
3138 }
3139 #endif /* COMPILE_PCRE[8|16] */
3140 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
3141 JUMPHERE(start);
3142
3143 if (newlinecheck)
3144 {
3145 JUMPHERE(end);
3146 JUMPHERE(nl);
3147 }
3148
3149 return mainloop;
3150 }
3151
3152 #define MAX_N_CHARS 16
3153 #define MAX_N_BYTES 8
3154
3155 static SLJIT_INLINE void add_prefix_byte(pcre_uint8 byte, pcre_uint8 *bytes)
3156 {
3157 pcre_uint8 len = bytes[0];
3158 int i;
3159
3160 if (len == 255)
3161 return;
3162
3163 if (len == 0)
3164 {
3165 bytes[0] = 1;
3166 bytes[1] = byte;
3167 return;
3168 }
3169
3170 for (i = len; i > 0; i--)
3171 if (bytes[i] == byte)
3172 return;
3173
3174 if (len >= MAX_N_BYTES - 1)
3175 {
3176 bytes[0] = 255;
3177 return;
3178 }
3179
3180 len++;
3181 bytes[len] = byte;
3182 bytes[0] = len;
3183 }
3184
3185 static int scan_prefix(compiler_common *common, pcre_uchar *cc, pcre_uint32 *chars, pcre_uint8 *bytes, int max_chars)
3186 {
3187 /* Recursive function, which scans prefix literals. */
3188 BOOL last, any, caseless;
3189 int len, repeat, len_save, consumed = 0;
3190 pcre_uint32 chr, mask;
3191 pcre_uchar *alternative, *cc_save, *oc;
3192 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3193 pcre_uchar othercase[8];
3194 #elif defined SUPPORT_UTF && defined COMPILE_PCRE16
3195 pcre_uchar othercase[2];
3196 #else
3197 pcre_uchar othercase[1];
3198 #endif
3199
3200 repeat = 1;
3201 while (TRUE)
3202 {
3203 last = TRUE;
3204 any = FALSE;
3205 caseless = FALSE;
3206 switch (*cc)
3207 {
3208 case OP_CHARI:
3209 caseless = TRUE;
3210 case OP_CHAR:
3211 last = FALSE;
3212 cc++;
3213 break;
3214
3215 case OP_SOD:
3216 case OP_SOM:
3217 case OP_SET_SOM:
3218 case OP_NOT_WORD_BOUNDARY:
3219 case OP_WORD_BOUNDARY:
3220 case OP_EODN:
3221 case OP_EOD:
3222 case OP_CIRC:
3223 case OP_CIRCM:
3224 case OP_DOLL:
3225 case OP_DOLLM:
3226 /* Zero width assertions. */
3227 cc++;
3228 continue;
3229
3230 case OP_ASSERT:
3231 case OP_ASSERT_NOT:
3232 case OP_ASSERTBACK:
3233 case OP_ASSERTBACK_NOT:
3234 cc = bracketend(cc);
3235 continue;
3236
3237 case OP_PLUS:
3238 case OP_MINPLUS:
3239 case OP_POSPLUS:
3240 cc++;
3241 break;
3242
3243 case OP_EXACTI:
3244 caseless = TRUE;
3245 case OP_EXACT:
3246 repeat = GET2(cc, 1);
3247 last = FALSE;
3248 cc += 1 + IMM2_SIZE;
3249 break;
3250
3251 case OP_PLUSI:
3252 case OP_MINPLUSI:
3253 case OP_POSPLUSI:
3254 caseless = TRUE;
3255 cc++;
3256 break;
3257
3258 case OP_KET:
3259 cc += 1 + LINK_SIZE;
3260 continue;
3261
3262 case OP_ALT:
3263 cc += GET(cc, 1);
3264 continue;
3265
3266 case OP_ONCE:
3267 case OP_ONCE_NC:
3268 case OP_BRA:
3269 case OP_BRAPOS:
3270 case OP_CBRA:
3271 case OP_CBRAPOS:
3272 alternative = cc + GET(cc, 1);
3273 while (*alternative == OP_ALT)
3274 {
3275 max_chars = scan_prefix(common, alternative + 1 + LINK_SIZE, chars, bytes, max_chars);
3276 if (max_chars == 0)
3277 return consumed;
3278 alternative += GET(alternative, 1);
3279 }
3280
3281 if (*cc == OP_CBRA || *cc == OP_CBRAPOS)
3282 cc += IMM2_SIZE;
3283 cc += 1 + LINK_SIZE;
3284 continue;
3285
3286 case OP_CLASS:
3287 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3288 if (common->utf && !is_char7_bitset((const pcre_uint8 *)(cc + 1), FALSE)) return consumed;
3289 #endif
3290 any = TRUE;
3291 cc += 1 + 32 / sizeof(pcre_uchar);
3292 break;
3293
3294 case OP_NCLASS:
3295 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3296 if (common->utf) return consumed;
3297 #endif
3298 any = TRUE;
3299 cc += 1 + 32 / sizeof(pcre_uchar);
3300 break;
3301
3302 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3303 case OP_XCLASS:
3304 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3305 if (common->utf) return consumed;
3306 #endif
3307 any = TRUE;
3308 cc += GET(cc, 1);
3309 break;
3310 #endif
3311
3312 case OP_DIGIT:
3313 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3314 if (common->utf && !is_char7_bitset((const pcre_uint8 *)common->ctypes - cbit_length + cbit_digit, FALSE))
3315 return consumed;
3316 #endif
3317 any = TRUE;
3318 cc++;
3319 break;
3320
3321 case OP_WHITESPACE:
3322 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3323 if (common->utf && !is_char7_bitset((const pcre_uint8 *)common->ctypes - cbit_length + cbit_space, FALSE))
3324 return consumed;
3325 #endif
3326 any = TRUE;
3327 cc++;
3328 break;
3329
3330 case OP_WORDCHAR:
3331 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3332 if (common->utf && !is_char7_bitset((const pcre_uint8 *)common->ctypes - cbit_length + cbit_word, FALSE))
3333 return consumed;
3334 #endif
3335 any = TRUE;
3336 cc++;
3337 break;
3338
3339 case OP_NOT_DIGIT:
3340 case OP_NOT_WHITESPACE:
3341 case OP_NOT_WORDCHAR:
3342 case OP_ANY:
3343 case OP_ALLANY:
3344 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3345 if (common->utf) return consumed;
3346 #endif
3347 any = TRUE;
3348 cc++;
3349 break;
3350
3351 #ifdef SUPPORT_UCP
3352 case OP_NOTPROP:
3353 case OP_PROP:
3354 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3355 if (common->utf) return consumed;
3356 #endif
3357 any = TRUE;
3358 cc += 1 + 2;
3359 break;
3360 #endif
3361
3362 case OP_TYPEEXACT:
3363 repeat = GET2(cc, 1);
3364 cc += 1 + IMM2_SIZE;
3365 continue;
3366
3367 default:
3368 return consumed;
3369 }
3370
3371 if (any)
3372 {
3373 #if defined COMPILE_PCRE8
3374 mask = 0xff;
3375 #elif defined COMPILE_PCRE16
3376 mask = 0xffff;
3377 #elif defined COMPILE_PCRE32
3378 mask = 0xffffffff;
3379 #else
3380 SLJIT_ASSERT_STOP();
3381 #endif
3382
3383 do
3384 {
3385 chars[0] = mask;
3386 chars[1] = mask;
3387 bytes[0] = 255;
3388
3389 consumed++;
3390 if (--max_chars == 0)
3391 return consumed;
3392 chars += 2;
3393 bytes += MAX_N_BYTES;
3394 }
3395 while (--repeat > 0);
3396
3397 repeat = 1;
3398 continue;
3399 }
3400
3401 len = 1;
3402 #ifdef SUPPORT_UTF
3403 if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
3404 #endif
3405
3406 if (caseless && char_has_othercase(common, cc))
3407 {
3408 #ifdef SUPPORT_UTF
3409 if (common->utf)
3410 {
3411 GETCHAR(chr, cc);
3412 if ((int)PRIV(ord2utf)(char_othercase(common, chr), othercase) != len)
3413 return consumed;
3414 }
3415 else
3416 #endif
3417 {
3418 chr = *cc;
3419 othercase[0] = TABLE_GET(chr, common->fcc, chr);
3420 }
3421 }
3422 else
3423 caseless = FALSE;
3424
3425 len_save = len;
3426 cc_save = cc;
3427 while (TRUE)
3428 {
3429 oc = othercase;
3430 do
3431 {
3432 chr = *cc;
3433 #ifdef COMPILE_PCRE32
3434 if (SLJIT_UNLIKELY(chr == NOTACHAR))
3435 return consumed;
3436 #endif
3437 add_prefix_byte((pcre_uint8)chr, bytes);
3438
3439 mask = 0;
3440 if (caseless)
3441 {
3442 add_prefix_byte((pcre_uint8)*oc, bytes);
3443 mask = *cc ^ *oc;
3444 chr |= mask;
3445 }
3446
3447 #ifdef COMPILE_PCRE32
3448 if (chars[0] == NOTACHAR && chars[1] == 0)
3449 #else
3450 if (chars[0] == NOTACHAR)
3451 #endif
3452 {
3453 chars[0] = chr;
3454 chars[1] = mask;
3455 }
3456 else
3457 {
3458 mask |= chars[0] ^ chr;
3459 chr |= mask;
3460 chars[0] = chr;
3461 chars[1] |= mask;
3462 }
3463
3464 len--;
3465 consumed++;
3466 if (--max_chars == 0)
3467 return consumed;
3468 chars += 2;
3469 bytes += MAX_N_BYTES;
3470 cc++;
3471 oc++;
3472 }
3473 while (len > 0);
3474
3475 if (--repeat == 0)
3476 break;
3477
3478 len = len_save;
3479 cc = cc_save;
3480 }
3481
3482 repeat = 1;
3483 if (last)
3484 return consumed;
3485 }
3486 }
3487
3488 static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common, BOOL firstline)
3489 {
3490 DEFINE_COMPILER;
3491 struct sljit_label *start;
3492 struct sljit_jump *quit;
3493 pcre_uint32 chars[MAX_N_CHARS * 2];
3494 pcre_uint8 bytes[MAX_N_CHARS * MAX_N_BYTES];
3495 pcre_uint8 ones[MAX_N_CHARS];
3496 int offsets[3];
3497 pcre_uint32 mask;
3498 pcre_uint8 *byte_set, *byte_set_end;
3499 int i, max, from;
3500 int range_right = -1, range_len = 4 - 1;
3501 sljit_ub *update_table = NULL;
3502 BOOL in_range;
3503
3504 /* This is even TRUE, if both are NULL. */
3505 SLJIT_ASSERT(common->read_only_data_ptr == common->read_only_data);
3506
3507 for (i = 0; i < MAX_N_CHARS; i++)
3508 {
3509 chars[i << 1] = NOTACHAR;
3510 chars[(i << 1) + 1] = 0;
3511 bytes[i * MAX_N_BYTES] = 0;
3512 }
3513
3514 max = scan_prefix(common, common->start, chars, bytes, MAX_N_CHARS);
3515
3516 if (max <= 1)
3517 return FALSE;
3518
3519 for (i = 0; i < max; i++)
3520 {
3521 mask = chars[(i << 1) + 1];
3522 ones[i] = ones_in_half_byte[mask & 0xf];
3523 mask >>= 4;
3524 while (mask != 0)
3525 {
3526 ones[i] += ones_in_half_byte[mask & 0xf];
3527 mask >>= 4;
3528 }
3529 }
3530
3531 in_range = FALSE;
3532 for (i = 0; i <= max; i++)
3533 {
3534 if (in_range && (i - from) > range_len && (bytes[(i - 1) * MAX_N_BYTES] <= 4))
3535 {
3536 range_len = i - from;
3537 range_right = i - 1;
3538 }
3539
3540 if (i < max && bytes[i * MAX_N_BYTES] < 255)
3541 {
3542 if (!in_range)
3543 {
3544 in_range = TRUE;
3545 from = i;
3546 }
3547 }
3548 else if (in_range)
3549 in_range = FALSE;
3550 }
3551
3552 if (range_right >= 0)
3553 {
3554 /* Since no data is consumed (see the assert in the beginning
3555 of this function), this space can be reallocated. */
3556 if (common->read_only_data)
3557 SLJIT_FREE(common->read_only_data);
3558
3559 common->read_only_data_size += 256;
3560 common->read_only_data = (sljit_uw *)SLJIT_MALLOC(common->read_only_data_size);
3561 if (common->read_only_data == NULL)
3562 return TRUE;
3563
3564 update_table = (sljit_ub *)common->read_only_data;
3565 common->read_only_data_ptr = (sljit_uw *)(update_table + 256);
3566 memset(update_table, IN_UCHARS(range_len), 256);
3567
3568 for (i = 0; i < range_len; i++)
3569 {
3570 byte_set = bytes + ((range_right - i) * MAX_N_BYTES);
3571 SLJIT_ASSERT(byte_set[0] > 0 && byte_set[0] < 255);
3572 byte_set_end = byte_set + byte_set[0];
3573 byte_set++;
3574 while (byte_set <= byte_set_end)
3575 {
3576 if (update_table[*byte_set] > IN_UCHARS(i))
3577 update_table[*byte_set] = IN_UCHARS(i);
3578 byte_set++;
3579 }
3580 }
3581 }
3582
3583 offsets[0] = -1;
3584 /* Scan forward. */
3585 for (i = 0; i < max; i++)
3586 if (ones[i] <= 2) {
3587 offsets[0] = i;
3588 break;
3589 }
3590
3591 if (offsets[0] < 0 && range_right < 0)
3592 return FALSE;
3593
3594 if (offsets[0] >= 0)
3595 {
3596 /* Scan backward. */
3597 offsets[1] = -1;
3598 for (i = max - 1; i > offsets[0]; i--)
3599 if (ones[i] <= 2 && i != range_right)
3600 {
3601 offsets[1] = i;
3602 break;
3603 }
3604
3605 /* This case is handled better by fast_forward_first_char. */
3606 if (offsets[1] == -1 && offsets[0] == 0 && range_right < 0)
3607 return FALSE;
3608
3609 offsets[2] = -1;
3610 /* We only search for a middle character if there is no range check. */
3611 if (offsets[1] >= 0 && range_right == -1)
3612 {
3613 /* Scan from middle. */
3614 for (i = (offsets[0] + offsets[1]) / 2 + 1; i < offsets[1]; i++)
3615 if (ones[i] <= 2)
3616 {
3617 offsets[2] = i;
3618 break;
3619 }
3620
3621 if (offsets[2] == -1)
3622 {
3623 for (i = (offsets[0] + offsets[1]) / 2; i > offsets[0]; i--)
3624 if (ones[i] <= 2)
3625 {
3626 offsets[2] = i;
3627 break;
3628 }
3629 }
3630 }
3631
3632 SLJIT_ASSERT(offsets[1] == -1 || (offsets[0] < offsets[1]));
3633 SLJIT_ASSERT(offsets[2] == -1 || (offsets[0] < offsets[2] && offsets[1] > offsets[2]));
3634
3635 chars[0] = chars[offsets[0] << 1];
3636 chars[1] = chars[(offsets[0] << 1) + 1];
3637 if (offsets[2] >= 0)
3638 {
3639 chars[2] = chars[offsets[2] << 1];
3640 chars[3] = chars[(offsets[2] << 1) + 1];
3641 }
3642 if (offsets[1] >= 0)
3643 {
3644 chars[4] = chars[offsets[1] << 1];
3645 chars[5] = chars[(offsets[1] << 1) + 1];
3646 }
3647 }
3648
3649 max -= 1;
3650 if (firstline)
3651 {
3652 SLJIT_ASSERT(common->first_line_end != 0);
3653 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
3654 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
3655 OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
3656 quit = CMP(SLJIT_C_LESS_EQUAL, STR_END, 0, TMP1, 0);
3657 OP1(SLJIT_MOV, STR_END, 0, TMP1, 0);
3658 JUMPHERE(quit);
3659 }
3660 else
3661 OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
3662
3663 #if !(defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
3664 if (range_right >= 0)
3665 OP1(SLJIT_MOV, RETURN_ADDR, 0, SLJIT_IMM, (sljit_sw)update_table);
3666 #endif
3667
3668 start = LABEL();
3669 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3670
3671 SLJIT_ASSERT(range_right >= 0 || offsets[0] >= 0);
3672
3673 if (range_right >= 0)
3674 {
3675 #if defined COMPILE_PCRE8 || (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN)
3676 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right));
3677 #else
3678 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right + 1) - 1);
3679 #endif
3680
3681 #if !(defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
3682 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(RETURN_ADDR, TMP1), 0);
3683 #else
3684 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)update_table);
3685 #endif
3686 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3687 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, start);
3688 }
3689
3690 if (offsets[0] >= 0)
3691 {
3692 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[0]));
3693 if (offsets[1] >= 0)
3694 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[1]));
3695 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3696
3697 if (chars[1] != 0)
3698 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[1]);
3699 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[0], start);
3700 if (offsets[2] >= 0)
3701 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[2] - 1));
3702
3703 if (offsets[1] >= 0)
3704 {
3705 if (chars[5] != 0)
3706 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, chars[5]);
3707 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, chars[4], start);
3708 }
3709
3710 if (offsets[2] >= 0)
3711 {
3712 if (chars[3] != 0)
3713 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[3]);
3714 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[2], start);
3715 }
3716 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3717 }
3718
3719 JUMPHERE(quit);
3720
3721 if (firstline)
3722 {
3723 if (range_right >= 0)
3724 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
3725 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3726 if (range_right >= 0)
3727 {
3728 quit = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
3729 OP1(SLJIT_MOV, STR_PTR, 0, TMP1, 0);
3730 JUMPHERE(quit);
3731 }
3732 }
3733 else
3734 OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
3735 return TRUE;
3736 }
3737
3738 #undef MAX_N_CHARS
3739 #undef MAX_N_BYTES
3740
3741 static SLJIT_INLINE void fast_forward_first_char(compiler_common *common, pcre_uchar first_char, BOOL caseless, BOOL firstline)
3742 {
3743 DEFINE_COMPILER;
3744 struct sljit_label *start;
3745 struct sljit_jump *quit;
3746 struct sljit_jump *found;
3747 pcre_uchar oc, bit;
3748
3749 if (firstline)
3750 {
3751 SLJIT_ASSERT(common->first_line_end != 0);
3752 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
3753 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
3754 }
3755
3756 start = LABEL();
3757 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3758 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3759
3760 oc = first_char;
3761 if (caseless)
3762 {
3763 oc = TABLE_GET(first_char, common->fcc, first_char);
3764 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
3765 if (first_char > 127 && common->utf)
3766 oc = UCD_OTHERCASE(first_char);
3767 #endif
3768 }
3769 if (first_char == oc)
3770 found = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, first_char);
3771 else
3772 {
3773 bit = first_char ^ oc;
3774 if (is_powerof2(bit))
3775 {
3776 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, bit);
3777 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, first_char | bit);
3778 }
3779 else
3780 {
3781 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, first_char);
3782 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3783 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);
3784 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3785 found = JUMP(SLJIT_C_NOT_ZERO);
3786 }
3787 }
3788
3789 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3790 JUMPTO(SLJIT_JUMP, start);
3791 JUMPHERE(found);
3792 JUMPHERE(quit);
3793
3794 if (firstline)
3795 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3796 }
3797
3798 static SLJIT_INLINE void fast_forward_newline(compiler_common *common, BOOL firstline)
3799 {
3800 DEFINE_COMPILER;
3801 struct sljit_label *loop;
3802 struct sljit_jump *lastchar;
3803 struct sljit_jump *firstchar;
3804 struct sljit_jump *quit;
3805 struct sljit_jump *foundcr = NULL;
3806 struct sljit_jump *notfoundnl;
3807 jump_list *newline = NULL;
3808
3809 if (firstline)
3810 {
3811 SLJIT_ASSERT(common->first_line_end != 0);
3812 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
3813 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
3814 }
3815
3816 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3817 {
3818 lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3819 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3820 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
3821 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
3822 firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
3823
3824 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2));
3825 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0);
3826 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_GREATER_EQUAL);
3827 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3828 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
3829 #endif
3830 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3831
3832 loop = LABEL();
3833 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3834 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3835 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
3836 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
3837 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop);
3838 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop);
3839
3840 JUMPHERE(quit);
3841 JUMPHERE(firstchar);
3842 JUMPHERE(lastchar);
3843
3844 if (firstline)
3845 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
3846 return;
3847 }
3848
3849 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3850 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
3851 firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
3852 skip_char_back(common);
3853
3854 loop = LABEL();
3855 read_char_range(common, common->nlmin, common->nlmax, TRUE);
3856 lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3857 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
3858 foundcr = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
3859 check_newlinechar(common, common->nltype, &newline, FALSE);
3860 set_jumps(newline, loop);
3861
3862 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
3863 {
3864 quit = JUMP(SLJIT_JUMP);
3865 JUMPHERE(foundcr);
3866 notfoundnl = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3867 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3868 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
3869 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3870 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3871 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
3872 #endif
3873 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3874 JUMPHERE(notfoundnl);
3875 JUMPHERE(quit);
3876 }
3877 JUMPHERE(lastchar);
3878 JUMPHERE(firstchar);
3879
3880 if (firstline)
3881 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3882 }
3883
3884 static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks);
3885
3886 static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, pcre_uint8 *start_bits, BOOL firstline)
3887 {
3888 DEFINE_COMPILER;
3889 struct sljit_label *start;
3890 struct sljit_jump *quit;
3891 struct sljit_jump *found = NULL;
3892 jump_list *matches = NULL;
3893 #ifndef COMPILE_PCRE8
3894 struct sljit_jump *jump;
3895 #endif
3896
3897 if (firstline)
3898 {
3899 SLJIT_ASSERT(common->first_line_end != 0);
3900 OP1(SLJIT_MOV, RETURN_ADDR, 0, STR_END, 0);
3901 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
3902 }
3903
3904 start = LABEL();
3905 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3906 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3907 #ifdef SUPPORT_UTF
3908 if (common->utf)
3909 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
3910 #endif
3911
3912 if (!check_class_ranges(common, start_bits, (start_bits[31] & 0x80) != 0, TRUE, &matches))
3913 {
3914 #ifndef COMPILE_PCRE8
3915 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 255);
3916 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 255);
3917 JUMPHERE(jump);
3918 #endif
3919 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
3920 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
3921 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)start_bits);
3922 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
3923 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
3924 found = JUMP(SLJIT_C_NOT_ZERO);
3925 }
3926
3927 #ifdef SUPPORT_UTF
3928 if (common->utf)
3929 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
3930 #endif
3931 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3932 #ifdef SUPPORT_UTF
3933 #if defined COMPILE_PCRE8
3934 if (common->utf)
3935 {
3936 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start);
3937 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3938 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3939 }
3940 #elif defined COMPILE_PCRE16
3941 if (common->utf)
3942 {
3943 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800, start);
3944 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
3945 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3946 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3947 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3948 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3949 }
3950 #endif /* COMPILE_PCRE[8|16] */
3951 #endif /* SUPPORT_UTF */
3952 JUMPTO(SLJIT_JUMP, start);
3953 if (found != NULL)
3954 JUMPHERE(found);
3955 if (matches != NULL)
3956 set_jumps(matches, LABEL());
3957 JUMPHERE(quit);
3958
3959 if (firstline)
3960 OP1(SLJIT_MOV, STR_END, 0, RETURN_ADDR, 0);
3961 }
3962
3963 static SLJIT_INLINE struct sljit_jump *search_requested_char(compiler_common *common, pcre_uchar req_char, BOOL caseless, BOOL has_firstchar)
3964 {
3965 DEFINE_COMPILER;
3966 struct sljit_label *loop;
3967 struct sljit_jump *toolong;
3968 struct sljit_jump *alreadyfound;
3969 struct sljit_jump *found;
3970 struct sljit_jump *foundoc = NULL;
3971 struct sljit_jump *notfound;
3972 pcre_uint32 oc, bit;
3973
3974 SLJIT_ASSERT(common->req_char_ptr != 0);
3975 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr);
3976 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, REQ_BYTE_MAX);
3977 toolong = CMP(SLJIT_C_LESS, TMP1, 0, STR_END, 0);
3978 alreadyfound = CMP(SLJIT_C_LESS, STR_PTR, 0, TMP2, 0);
3979
3980 if (has_firstchar)
3981 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3982 else
3983 OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);
3984
3985 loop = LABEL();
3986 notfound = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, STR_END, 0);
3987
3988 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0);
3989 oc = req_char;
3990 if (caseless)
3991 {
3992 oc = TABLE_GET(req_char, common->fcc, req_char);
3993 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
3994 if (req_char > 127 && common->utf)
3995 oc = UCD_OTHERCASE(req_char);
3996 #endif
3997 }
3998 if (req_char == oc)
3999 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
4000 else
4001 {
4002 bit = req_char ^ oc;
4003 if (is_powerof2(bit))
4004 {
4005 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);
4006 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit);
4007 }
4008 else
4009 {
4010 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
4011 foundoc = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, oc);
4012 }
4013 }
4014 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
4015 JUMPTO(SLJIT_JUMP, loop);
4016
4017 JUMPHERE(found);
4018 if (foundoc)
4019 JUMPHERE(foundoc);
4020 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr, TMP1, 0);
4021 JUMPHERE(alreadyfound);
4022 JUMPHERE(toolong);
4023 return notfound;
4024 }
4025
4026 static void do_revertframes(compiler_common *common)
4027 {
4028 DEFINE_COMPILER;
4029 struct sljit_jump *jump;
4030 struct sljit_label *mainloop;
4031
4032 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4033 OP1(SLJIT_MOV, TMP1, 0, STACK_TOP, 0);
4034 GET_LOCAL_BASE(TMP3, 0, 0);
4035
4036 /* Drop frames until we reach STACK_TOP. */
4037 mainloop = LABEL();
4038 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), 0);
4039 OP2(SLJIT_SUB | SLJIT_SET_S, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0);
4040 jump = JUMP(SLJIT_C_SIG_LESS_EQUAL);
4041
4042 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
4043 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw));
4044 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), SLJIT_MEM1(TMP1), 2 * sizeof(sljit_sw));
4045 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
4046 JUMPTO(SLJIT_JUMP, mainloop);
4047
4048 JUMPHERE(jump);
4049 jump = JUMP(SLJIT_C_SIG_LESS);
4050 /* End of dropping frames. */
4051 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4052
4053 JUMPHERE(jump);
4054 OP1(SLJIT_NEG, TMP2, 0, TMP2, 0);
4055 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
4056 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw));
4057 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_sw));
4058 JUMPTO(SLJIT_JUMP, mainloop);
4059 }
4060
4061 static void check_wordboundary(compiler_common *common)
4062 {
4063 DEFINE_COMPILER;
4064 struct sljit_jump *skipread;
4065 jump_list *skipread_list = NULL;
4066 #if !(defined COMPILE_PCRE8) || defined SUPPORT_UTF
4067 struct sljit_jump *jump;
4068 #endif
4069
4070 SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16);
4071
4072 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
4073 /* Get type of the previous char, and put it to LOCALS1. */
4074 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4075 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4076 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, 0);
4077 skipread = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
4078 skip_char_back(common);
4079 check_start_used_ptr(common);
4080 read_char(common);
4081
4082 /* Testing char type. */
4083 #ifdef SUPPORT_UCP
4084 if (common->use_ucp)
4085 {
4086 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
4087 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
4088 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4089 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
4090 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
4091 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
4092 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
4093 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
4094 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4095 JUMPHERE(jump);
4096 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP2, 0);
4097 }
4098 else
4099 #endif
4100 {
4101 #ifndef COMPILE_PCRE8
4102 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
4103 #elif defined SUPPORT_UTF
4104 /* Here LOCALS1 has already been zeroed. */
4105 jump = NULL;
4106 if (common->utf)
4107 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
4108 #endif /* COMPILE_PCRE8 */
4109 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);
4110 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */);
4111 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4112 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);
4113 #ifndef COMPILE_PCRE8
4114 JUMPHERE(jump);
4115 #elif defined SUPPORT_UTF
4116 if (jump != NULL)
4117 JUMPHERE(jump);
4118 #endif /* COMPILE_PCRE8 */
4119 }
4120 JUMPHERE(skipread);
4121
4122 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
4123 check_str_end(common, &skipread_list);
4124 peek_char(common, READ_CHAR_MAX);
4125
4126 /* Testing char type. This is a code duplication. */
4127 #ifdef SUPPORT_UCP
4128 if (common->use_ucp)
4129 {
4130 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
4131 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
4132 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4133 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
4134 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
4135 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
4136 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
4137 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
4138 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4139 JUMPHERE(jump);
4140 }
4141 else
4142 #endif
4143 {
4144 #ifndef COMPILE_PCRE8
4145 /* TMP2 may be destroyed by peek_char. */
4146 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
4147 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
4148 #elif defined SUPPORT_UTF
4149 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
4150 jump = NULL;
4151 if (common->utf)
4152 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
4153 #endif
4154 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes);
4155 OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */);
4156 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
4157 #ifndef COMPILE_PCRE8
4158 JUMPHERE(jump);
4159 #elif defined SUPPORT_UTF
4160 if (jump != NULL)
4161 JUMPHERE(jump);
4162 #endif /* COMPILE_PCRE8 */
4163 }
4164 set_jumps(skipread_list, LABEL());
4165
4166 OP2(SLJIT_XOR | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
4167 sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
4168 }
4169
4170 static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
4171 {
4172 DEFINE_COMPILER;
4173 int ranges[MAX_RANGE_SIZE];
4174 pcre_uint8 bit, cbit, all;
4175 int i, byte, length = 0;
4176
4177 bit = bits[0] & 0x1;
4178 /* All bits will be zero or one (since bit is zero or one). */
4179 all = -bit;
4180
4181 for (i = 0; i < 256; )
4182 {
4183 byte = i >> 3;
4184 if ((i & 0x7) == 0 && bits[byte] == all)
4185 i += 8;
4186 else
4187 {
4188 cbit = (bits[byte] >> (i & 0x7)) & 0x1;
4189 if (cbit != bit)
4190 {
4191 if (length >= MAX_RANGE_SIZE)
4192 return FALSE;
4193 ranges[length] = i;
4194 length++;
4195 bit = cbit;
4196 all = -cbit;
4197 }
4198 i++;
4199 }
4200 }
4201
4202 if (((bit == 0) && nclass) || ((bit == 1) && !nclass))
4203 {
4204 if (length >= MAX_RANGE_SIZE)
4205 return FALSE;
4206 ranges[length] = 256;
4207 length++;
4208 }
4209
4210 if (length < 0 || length > 4)
4211 return FALSE;
4212
4213 bit = bits[0] & 0x1;
4214 if (invert) bit ^= 0x1;
4215
4216 /* No character is accepted. */
4217 if (length == 0 && bit == 0)
4218 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4219
4220 switch(length)
4221 {
4222 case 0:
4223 /* When bit != 0, all characters are accepted. */
4224 return TRUE;
4225
4226 case 1:
4227 add_jump(compiler, backtracks, CMP(bit == 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
4228 return TRUE;
4229
4230 case 2:
4231 if (ranges[0] + 1 != ranges[1])
4232 {
4233 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
4234 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
4235 }
4236 else
4237 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
4238 return TRUE;
4239
4240 case 3:
4241 if (bit != 0)
4242 {
4243 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
4244 if (ranges[0] + 1 != ranges[1])
4245 {
4246 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
4247 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
4248 }
4249 else
4250 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
4251 return TRUE;
4252 }
4253
4254 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[0]));
4255 if (ranges[1] + 1 != ranges[2])
4256 {
4257 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1]);
4258 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
4259 }
4260 else
4261 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1]));
4262 return TRUE;
4263
4264 case 4:
4265 if ((ranges[1] - ranges[0]) == (ranges[3] - ranges[2])
4266 && (ranges[0] | (ranges[2] - ranges[0])) == ranges[2]
4267 && is_powerof2(ranges[2] - ranges[0]))
4268 {
4269 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[0]);
4270 if (ranges[2] + 1 != ranges[3])
4271 {
4272 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);
4273 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
4274 }
4275 else
4276 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
4277 return TRUE;
4278 }
4279
4280 if (bit != 0)
4281 {
4282 i = 0;
4283 if (ranges[0] + 1 != ranges[1])
4284 {
4285 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
4286 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
4287 i = ranges[0];
4288 }
4289 else
4290 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
4291
4292 if (ranges[2] + 1 != ranges[3])
4293 {
4294 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - i);
4295 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
4296 }
4297 else
4298 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2] - i));
4299 return TRUE;
4300 }
4301
4302 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
4303 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[0]));
4304 if (ranges[1] + 1 != ranges[2])
4305 {
4306 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]);
4307 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
4308 }
4309 else
4310 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
4311 return TRUE;
4312
4313 default:
4314 SLJIT_ASSERT_STOP();
4315 return FALSE;
4316 }
4317 }
4318
4319 static void check_anynewline(compiler_common *common)
4320 {
4321 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
4322 DEFINE_COMPILER;
4323
4324 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4325
4326 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
4327 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
4328 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
4329 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
4330 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4331 #ifdef COMPILE_PCRE8
4332 if (common->utf)
4333 {
4334 #endif
4335 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4336 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
4337 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
4338 #ifdef COMPILE_PCRE8
4339 }
4340 #endif
4341 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
4342 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4343 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4344 }
4345
4346 static void check_hspace(compiler_common *common)
4347 {
4348 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
4349 DEFINE_COMPILER;
4350
4351 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4352
4353 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x09);
4354 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4355 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
4356 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4357 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xa0);
4358 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4359 #ifdef COMPILE_PCRE8
4360 if (common->utf)
4361 {
4362 #endif
4363 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4364 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x1680);
4365 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4366 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e);
4367 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4368 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000);
4369 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000);
4370 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4371 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000);
4372 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4373 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000);
4374 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4375 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000);
4376 #ifdef COMPILE_PCRE8
4377 }
4378 #endif
4379 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
4380 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4381
4382 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4383 }
4384
4385 static void check_vspace(compiler_common *common)
4386 {
4387 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
4388 DEFINE_COMPILER;
4389
4390 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4391
4392 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
4393 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
4394 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
4395 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
4396 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4397 #ifdef COMPILE_PCRE8
4398 if (common->utf)
4399 {
4400 #endif
4401 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4402 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
4403 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
4404 #ifdef COMPILE_PCRE8
4405 }
4406 #endif
4407 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
4408 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4409
4410 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4411 }
4412
4413 #define CHAR1 STR_END
4414 #define CHAR2 STACK_TOP
4415
4416 static void do_casefulcmp(compiler_common *common)
4417 {
4418 DEFINE_COMPILER;
4419 struct sljit_jump *jump;
4420 struct sljit_label *label;
4421
4422 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4423 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4424 OP1(SLJIT_MOV, TMP3, 0, CHAR1, 0);
4425 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR2, 0);
4426 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
4427 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4428
4429 label = LABEL();
4430 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
4431 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4432 jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
4433 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
4434 JUMPTO(SLJIT_C_NOT_ZERO, label);
4435
4436 JUMPHERE(jump);
4437 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4438 OP1(SLJIT_MOV, CHAR1, 0, TMP3, 0);
4439 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
4440 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4441 }
4442
4443 #define LCC_TABLE STACK_LIMIT
4444
4445 static void do_caselesscmp(compiler_common *common)
4446 {
4447 DEFINE_COMPILER;
4448 struct sljit_jump *jump;
4449 struct sljit_label *label;
4450
4451 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4452 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4453
4454 OP1(SLJIT_MOV, TMP3, 0, LCC_TABLE, 0);
4455 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR1, 0);
4456 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, CHAR2, 0);
4457 OP1(SLJIT_MOV, LCC_TABLE, 0, SLJIT_IMM, common->lcc);
4458 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
4459 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4460
4461 label = LABEL();
4462 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
4463 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4464 #ifndef COMPILE_PCRE8
4465 jump = CMP(SLJIT_C_GREATER, CHAR1, 0, SLJIT_IMM, 255);
4466 #endif
4467 OP1(SLJIT_MOV_UB, CHAR1, 0, SLJIT_MEM2(LCC_TABLE, CHAR1), 0);
4468 #ifndef COMPILE_PCRE8
4469 JUMPHERE(jump);
4470 jump = CMP(SLJIT_C_GREATER, CHAR2, 0, SLJIT_IMM, 255);
4471 #endif
4472 OP1(SLJIT_MOV_UB, CHAR2, 0, SLJIT_MEM2(LCC_TABLE, CHAR2), 0);
4473 #ifndef COMPILE_PCRE8
4474 JUMPHERE(jump);
4475 #endif
4476 jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
4477 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
4478 JUMPTO(SLJIT_C_NOT_ZERO, label);
4479
4480 JUMPHERE(jump);
4481 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4482 OP1(SLJIT_MOV, LCC_TABLE, 0, TMP3, 0);
4483 OP1(SLJIT_MOV, CHAR1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
4484 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
4485 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4486 }
4487
4488 #undef LCC_TABLE
4489 #undef CHAR1
4490 #undef CHAR2
4491
4492 #if defined SUPPORT_UTF && defined SUPPORT_UCP
4493
4494 static const pcre_uchar * SLJIT_CALL do_utf_caselesscmp(pcre_uchar *src1, jit_arguments *args, pcre_uchar *end1)
4495 {
4496 /* This function would be ineffective to do in JIT level. */
4497 pcre_uint32 c1, c2;
4498 const pcre_uchar *src2 = args->uchar_ptr;
4499 const pcre_uchar *end2 = args->end;
4500 const ucd_record *ur;
4501 const pcre_uint32 *pp;
4502
4503 while (src1 < end1)
4504 {
4505 if (src2 >= end2)
4506 return (pcre_uchar*)1;
4507 GETCHARINC(c1, src1);
4508 GETCHARINC(c2, src2);
4509 ur = GET_UCD(c2);
4510 if (c1 != c2 && c1 != c2 + ur->other_case)
4511 {
4512 pp = PRIV(ucd_caseless_sets) + ur->caseset;
4513 for (;;)
4514 {
4515 if (c1 < *pp) return NULL;
4516 if (c1 == *pp++) break;
4517 }
4518 }
4519 }
4520 return src2;
4521 }
4522
4523 #endif /* SUPPORT_UTF && SUPPORT_UCP */
4524
4525 static pcre_uchar *byte_sequence_compare(compiler_common *common, BOOL caseless, pcre_uchar *cc,
4526 compare_context* context, jump_list **backtracks)
4527 {
4528 DEFINE_COMPILER;
4529 unsigned int othercasebit = 0;
4530 pcre_uchar *othercasechar = NULL;
4531 #ifdef SUPPORT_UTF
4532 int utflength;
4533 #endif
4534
4535 if (caseless && char_has_othercase(common, cc))
4536 {
4537 othercasebit = char_get_othercase_bit(common, cc);
4538 SLJIT_ASSERT(othercasebit);
4539 /* Extracting bit difference info. */
4540 #if defined COMPILE_PCRE8
4541 othercasechar = cc + (othercasebit >> 8);
4542 othercasebit &= 0xff;
4543 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4544 /* Note that this code only handles characters in the BMP. If there
4545 ever are characters outside the BMP whose othercase differs in only one
4546 bit from itself (there currently are none), this code will need to be
4547 revised for COMPILE_PCRE32. */
4548 othercasechar = cc + (othercasebit >> 9);
4549 if ((othercasebit & 0x100) != 0)
4550 othercasebit = (othercasebit & 0xff) << 8;
4551 else
4552 othercasebit &= 0xff;
4553 #endif /* COMPILE_PCRE[8|16|32] */
4554 }
4555
4556 if (context->sourcereg == -1)
4557 {
4558 #if defined COMPILE_PCRE8
4559 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
4560 if (context->length >= 4)
4561 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4562 else if (context->length >= 2)
4563 OP1(SLJIT_MOV_UH, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4564 else
4565 #endif
4566 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4567 #elif defined COMPILE_PCRE16
4568 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
4569 if (context->length >= 4)
4570 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4571 else
4572 #endif
4573 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4574 #elif defined COMPILE_PCRE32
4575 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4576 #endif /* COMPILE_PCRE[8|16|32] */
4577 context->sourcereg = TMP2;
4578 }
4579
4580 #ifdef SUPPORT_UTF
4581 utflength = 1;
4582 if (common->utf && HAS_EXTRALEN(*cc))
4583 utflength += GET_EXTRALEN(*cc);
4584
4585 do
4586 {
4587 #endif
4588
4589 context->length -= IN_UCHARS(1);
4590 #if (defined SLJIT_UNALIGNED && SLJIT_UNALIGNED) && (defined COMPILE_PCRE8 || defined COMPILE_PCRE16)
4591
4592 /* Unaligned read is supported. */
4593 if (othercasebit != 0 && othercasechar == cc)
4594 {
4595 context->c.asuchars[context->ucharptr] = *cc | othercasebit;
4596 context->oc.asuchars[context->ucharptr] = othercasebit;
4597 }
4598 else
4599 {
4600 context->c.asuchars[context->ucharptr] = *cc;
4601 context->oc.asuchars[context->ucharptr] = 0;
4602 }
4603 context->ucharptr++;
4604
4605 #if defined COMPILE_PCRE8
4606 if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1))
4607 #else
4608 if (context->ucharptr >= 2 || context->length == 0)
4609 #endif
4610 {
4611 if (context->length >= 4)
4612 OP1(SLJIT_MOV_SI, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
4613 else if (context->length >= 2)
4614 OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
4615 #if defined COMPILE_PCRE8
4616 else if (context->length >= 1)
4617 OP1(SLJIT_MOV_UB, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
4618 #endif /* COMPILE_PCRE8 */
4619 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
4620
4621 switch(context->ucharptr)
4622 {
4623 case 4 / sizeof(pcre_uchar):
4624 if (context->oc.asint != 0)
4625 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint);
4626 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint));
4627 break;
4628
4629 case 2 / sizeof(pcre_uchar):
4630 if (context->oc.asushort != 0)
4631 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort);
4632 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort));
4633 break;
4634
4635 #ifdef COMPILE_PCRE8
4636 case 1:
4637 if (context->oc.asbyte != 0)
4638 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte);
4639 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte));
4640 break;
4641 #endif
4642
4643 default:
4644 SLJIT_ASSERT_STOP();
4645 break;
4646 }
4647 context->ucharptr = 0;
4648 }
4649
4650 #else
4651
4652 /* Unaligned read is unsupported or in 32 bit mode. */
4653 if (context->length >= 1)
4654 OP1(MOV_UCHAR, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
4655
4656 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
4657
4658 if (othercasebit != 0 && othercasechar == cc)
4659 {
4660 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit);
4661 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit));
4662 }
4663 else
4664 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc));
4665
4666 #endif
4667
4668 cc++;
4669 #ifdef SUPPORT_UTF
4670 utflength--;
4671 }
4672 while (utflength > 0);
4673 #endif
4674
4675 return cc;
4676 }
4677
4678 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
4679
4680 #define SET_TYPE_OFFSET(value) \
4681 if ((value) != typeoffset) \
4682 { \
4683 if ((value) < typeoffset) \
4684 OP2(SLJIT_ADD, typereg, 0, typereg, 0, SLJIT_IMM, typeoffset - (value)); \
4685 else \
4686 OP2(SLJIT_SUB, typereg, 0, typereg, 0, SLJIT_IMM, (value) - typeoffset); \
4687 } \
4688 typeoffset = (value);
4689
4690 #define SET_CHAR_OFFSET(value) \
4691 if ((value) != charoffset) \
4692 { \
4693 if ((value) < charoffset) \
4694 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(charoffset - (value))); \
4695 else \
4696 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)((value) - charoffset)); \
4697 } \
4698 charoffset = (value);
4699
4700 static void compile_xclass_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
4701 {
4702 DEFINE_COMPILER;
4703 jump_list *found = NULL;
4704 jump_list **list = (cc[0] & XCL_NOT) == 0 ? &found : backtracks;
4705 sljit_uw c, charoffset, max = 256, min = READ_CHAR_MAX;
4706 struct sljit_jump *jump = NULL;
4707 pcre_uchar *ccbegin;
4708 int compares, invertcmp, numberofcmps;
4709 #if defined SUPPORT_UTF && (defined COMPILE_PCRE8 || defined COMPILE_PCRE16)
4710 BOOL utf = common->utf;
4711 #endif
4712
4713 #ifdef SUPPORT_UCP
4714 BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE;
4715 BOOL charsaved = FALSE;
4716 int typereg = TMP1, scriptreg = TMP1;
4717 const pcre_uint32 *other_cases;
4718 sljit_uw typeoffset;
4719 #endif
4720
4721 /* Scanning the necessary info. */
4722 cc++;
4723 ccbegin = cc;
4724 compares = 0;
4725 if (cc[-1] & XCL_MAP)
4726 {
4727 min = 0;
4728 cc += 32 / sizeof(pcre_uchar);
4729 }
4730
4731 while (*cc != XCL_END)
4732 {
4733 compares++;
4734 if (*cc == XCL_SINGLE)
4735 {
4736 cc ++;
4737 GETCHARINCTEST(c, cc);
4738 if (c > max) max = c;
4739 if (c < min) min = c;
4740 #ifdef SUPPORT_UCP
4741 needschar = TRUE;
4742 #endif
4743 }
4744 else if (*cc == XCL_RANGE)
4745 {
4746 cc ++;
4747 GETCHARINCTEST(c, cc);
4748 if (c < min) min = c;
4749 GETCHARINCTEST(c, cc);
4750 if (c > max) max = c;
4751 #ifdef SUPPORT_UCP
4752 needschar = TRUE;
4753 #endif
4754 }
4755 #ifdef SUPPORT_UCP
4756 else
4757 {
4758 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
4759 cc++;
4760 if (*cc == PT_CLIST)
4761 {
4762 other_cases = PRIV(ucd_caseless_sets) + cc[1];
4763 while (*other_cases != NOTACHAR)
4764 {
4765 if (*other_cases > max) max = *other_cases;
4766 if (*other_cases < min) min = *other_cases;
4767 other_cases++;
4768 }
4769 }
4770 else
4771 {
4772 max = READ_CHAR_MAX;
4773 min = 0;
4774 }
4775
4776 switch(*cc)
4777 {
4778 case PT_ANY:
4779 break;
4780
4781 case PT_LAMP:
4782 case PT_GC:
4783 case PT_PC:
4784 case PT_ALNUM:
4785 needstype = TRUE;
4786 break;
4787
4788 case PT_SC:
4789 needsscript = TRUE;
4790 break;
4791
4792 case PT_SPACE:
4793 case PT_PXSPACE:
4794 case PT_WORD:
4795 case PT_PXGRAPH:
4796 case PT_PXPRINT:
4797 case PT_PXPUNCT:
4798 needstype = TRUE;
4799 needschar = TRUE;
4800 break;
4801
4802 case PT_CLIST:
4803 case PT_UCNC:
4804 needschar = TRUE;
4805 break;
4806
4807 default:
4808 SLJIT_ASSERT_STOP();
4809 break;
4810 }
4811 cc += 2;
4812 }
4813 #endif
4814 }
4815
4816 /* We are not necessary in utf mode even in 8 bit mode. */
4817 cc = ccbegin;
4818 detect_partial_match(common, backtracks);
4819 read_char_range(common, min, max, (cc[-1] & XCL_NOT) != 0);
4820
4821 if ((cc[-1] & XCL_HASPROP) == 0)
4822 {
4823 if ((cc[-1] & XCL_MAP) != 0)
4824 {
4825 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
4826 if (!check_class_ranges(common, (const pcre_uint8 *)cc, (((const pcre_uint8 *)cc)[31] & 0x80) != 0, TRUE, &found))
4827 {
4828 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
4829 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
4830 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
4831 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4832 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4833 add_jump(compiler, &found, JUMP(SLJIT_C_NOT_ZERO));
4834 }
4835
4836 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4837 JUMPHERE(jump);
4838
4839 cc += 32 / sizeof(pcre_uchar);
4840 }
4841 else
4842 {
4843 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, min);
4844 add_jump(compiler, (cc[-1] & XCL_NOT) == 0 ? backtracks : &found, CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, max - min));
4845 }
4846 }
4847 else if ((cc[-1] & XCL_MAP) != 0)
4848 {
4849 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
4850 #ifdef SUPPORT_UCP
4851 charsaved = TRUE;
4852 #endif
4853 if (!check_class_ranges(common, (const pcre_uint8 *)cc, FALSE, TRUE, list))
4854 {
4855 #ifdef COMPILE_PCRE8
4856 SLJIT_ASSERT(common->utf);
4857 #endif
4858 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
4859
4860 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
4861 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
4862 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
4863 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4864 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4865 add_jump(compiler, list, JUMP(SLJIT_C_NOT_ZERO));
4866
4867 JUMPHERE(jump);
4868 }
4869
4870 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
4871 cc += 32 / sizeof(pcre_uchar);
4872 }
4873
4874 #ifdef SUPPORT_UCP
4875 /* Simple register allocation. TMP1 is preferred if possible. */
4876 if (needstype || needsscript)
4877 {
4878 if (needschar && !charsaved)
4879 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
4880 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4881 if (needschar)
4882 {
4883 if (needstype)
4884 {
4885 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
4886 typereg = RETURN_ADDR;
4887 }
4888
4889 if (needsscript)
4890 scriptreg = TMP3;
4891 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
4892 }
4893 else if (needstype && needsscript)
4894 scriptreg = TMP3;
4895 /* In all other cases only one of them was specified, and that can goes to TMP1. */
4896
4897 if (needsscript)
4898 {
4899 if (scriptreg == TMP1)
4900 {
4901 OP1(SLJIT_MOV, scriptreg, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
4902 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM2(scriptreg, TMP2), 3);
4903 }
4904 else
4905 {
4906 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
4907 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
4908 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM1(TMP2), 0);
4909 }
4910 }
4911 }
4912 #endif
4913
4914 /* Generating code. */
4915 charoffset = 0;
4916 numberofcmps = 0;
4917 #ifdef SUPPORT_UCP
4918 typeoffset = 0;
4919 #endif
4920
4921 while (*cc != XCL_END)
4922 {
4923 compares--;
4924 invertcmp = (compares == 0 && list != backtracks);
4925 jump = NULL;
4926
4927 if (*cc == XCL_SINGLE)
4928 {
4929 cc ++;
4930 GETCHARINCTEST(c, cc);
4931
4932 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
4933 {
4934 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
4935 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_EQUAL);
4936 numberofcmps++;
4937 }
4938 else if (numberofcmps > 0)
4939 {
4940 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
4941 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4942 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4943 numberofcmps = 0;
4944 }
4945 else
4946 {
4947 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
4948 numberofcmps = 0;
4949 }
4950 }
4951 else if (*cc == XCL_RANGE)
4952 {
4953 cc ++;
4954 GETCHARINCTEST(c, cc);
4955 SET_CHAR_OFFSET(c);
4956 GETCHARINCTEST(c, cc);
4957
4958 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
4959 {
4960 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
4961 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_LESS_EQUAL);
4962 numberofcmps++;
4963 }
4964 else if (numberofcmps > 0)
4965 {
4966 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
4967 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4968 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4969 numberofcmps = 0;
4970 }
4971 else
4972 {
4973 jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
4974 numberofcmps = 0;
4975 }
4976 }
4977 #ifdef SUPPORT_UCP
4978 else
4979 {
4980 if (*cc == XCL_NOTPROP)
4981 invertcmp ^= 0x1;
4982 cc++;
4983 switch(*cc)
4984 {
4985 case PT_ANY:
4986 if (list != backtracks)
4987 {
4988 if ((cc[-1] == XCL_NOTPROP && compares > 0) || (cc[-1] == XCL_PROP && compares == 0))
4989 continue;
4990 }
4991 else if (cc[-1] == XCL_NOTPROP)
4992 continue;
4993 jump = JUMP(SLJIT_JUMP);
4994 break;
4995
4996 case PT_LAMP:
4997 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - typeoffset);
4998 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4999 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ll - typeoffset);
5000 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
5001 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lt - typeoffset);
5002 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
5003 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
5004 break;
5005
5006 case PT_GC:
5007 c = PRIV(ucp_typerange)[(int)cc[1] * 2];
5008 SET_TYPE_OFFSET(c);
5009 jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, PRIV(ucp_typerange)[(int)cc[1] * 2 + 1] - c);
5010 break;
5011
5012 case PT_PC:
5013 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, (int)cc[1] - typeoffset);
5014 break;
5015
5016 case PT_SC:
5017 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, scriptreg, 0, SLJIT_IMM, (int)cc[1]);
5018 break;
5019
5020 case PT_SPACE:
5021 case PT_PXSPACE:
5022 SET_CHAR_OFFSET(9);
5023 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd - 0x9);
5024 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
5025
5026 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x9);
5027 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
5028
5029 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x9);
5030 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
5031
5032 SET_TYPE_OFFSET(ucp_Zl);
5033 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl);
5034 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
5035 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
5036 break;
5037
5038 case PT_WORD:
5039 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_UNDERSCORE - charoffset));
5040 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
5041 /* Fall through. */
5042
5043 case PT_ALNUM:
5044 SET_TYPE_OFFSET(ucp_Ll);
5045 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
5046 OP_FLAGS((*cc == PT_ALNUM) ? SLJIT_MOV : SLJIT_OR, TMP2, 0, (*cc == PT_ALNUM) ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_LESS_EQUAL);
5047 SET_TYPE_OFFSET(ucp_Nd);
5048 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_No - ucp_Nd);
5049 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
5050 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
5051 break;
5052
5053 case PT_CLIST:
5054 other_cases = PRIV(ucd_caseless_sets) + cc[1];
5055
5056 /* At least three characters are required.
5057 Otherwise this case would be handled by the normal code path. */
5058 SLJIT_ASSERT(other_cases[0] != NOTACHAR && other_cases[1] != NOTACHAR && other_cases[2] != NOTACHAR);
5059 SLJIT_ASSERT(other_cases[0] < other_cases[1] && other_cases[1] < other_cases[2]);
5060
5061 /* Optimizing character pairs, if their difference is power of 2. */
5062 if (is_powerof2(other_cases[1] ^ other_cases[0]))
5063 {
5064 if (charoffset == 0)
5065 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
5066 else
5067 {
5068 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
5069 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
5070 }
5071 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[1]);
5072 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
5073 other_cases += 2;
5074 }
5075 else if (is_powerof2(other_cases[2] ^ other_cases[1]))
5076 {
5077 if (charoffset == 0)
5078 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[2] ^ other_cases[1]);
5079 else
5080 {
5081 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
5082 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
5083 }
5084 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[2]);
5085 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
5086
5087 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(other_cases[0] - charoffset));
5088 OP_FLAGS(SLJIT_OR | ((other_cases[3] == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
5089
5090 other_cases += 3;
5091 }
5092 else
5093 {
5094 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
5095 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
5096 }
5097
5098 while (*other_cases != NOTACHAR)
5099 {
5100 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
5101 OP_FLAGS(SLJIT_OR | ((*other_cases == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
5102 }
5103 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
5104 break;
5105
5106 case PT_UCNC:
5107 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_DOLLAR_SIGN - charoffset));
5108 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
5109 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_COMMERCIAL_AT - charoffset));
5110 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
5111 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_GRAVE_ACCENT - charoffset));
5112 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
5113
5114 SET_CHAR_OFFSET(0xa0);
5115 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(0xd7ff - charoffset));
5116 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
5117 SET_CHAR_OFFSET(0);
5118 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xe000 - 0);
5119 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_GREATER_EQUAL);
5120 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
5121 break;
5122
5123 case PT_PXGRAPH:
5124 /* C and Z groups are the farthest two groups. */
5125 SET_TYPE_OFFSET(ucp_Ll);
5126 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
5127 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_GREATER);
5128
5129 jump = CMP(SLJIT_C_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
5130
5131 /* In case of ucp_Cf, we overwrite the result. */
5132 SET_CHAR_OFFSET(0x2066);
5133 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
5134 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
5135
5136 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
5137 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
5138
5139 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x2066);
5140 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
5141
5142 JUMPHERE(jump);
5143 jump = CMP(SLJIT_C_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
5144 break;
5145
5146 case PT_PXPRINT:
5147 /* C and Z groups are the farthest two groups. */
5148 SET_TYPE_OFFSET(ucp_Ll);
5149 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
5150 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_GREATER);
5151
5152 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Ll);
5153 OP_FLAGS(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_C_NOT_EQUAL);
5154
5155 jump = CMP(SLJIT_C_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
5156
5157 /* In case of ucp_Cf, we overwrite the result. */
5158 SET_CHAR_OFFSET(0x2066);
5159 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
5160 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
5161
5162 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
5163 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
5164
5165 JUMPHERE(jump);
5166 jump = CMP(SLJIT_C_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
5167 break;
5168
5169 case PT_PXPUNCT:
5170 SET_TYPE_OFFSET(ucp_Sc);
5171 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Sc);
5172 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
5173
5174 SET_CHAR_OFFSET(0);
5175 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xff);
5176 OP_FLAGS(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
5177
5178 SET_TYPE_OFFSET(ucp_Pc);
5179 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ps - ucp_Pc);
5180 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
5181 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
5182 break;
5183 }
5184 cc += 2;
5185 }
5186 #endif
5187
5188 if (jump != NULL)
5189 add_jump(compiler, compares > 0 ? list : backtracks, jump);
5190 }
5191
5192 if (found != NULL)
5193 set_jumps(found, LABEL());
5194 }
5195
5196 #undef SET_TYPE_OFFSET
5197 #undef SET_CHAR_OFFSET
5198
5199 #endif
5200
5201 static pcre_uchar *compile_char1_matchingpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **backtracks)
5202 {
5203 DEFINE_COMPILER;
5204 int length;
5205 unsigned int c, oc, bit;
5206 compare_context context;
5207 struct sljit_jump *jump[4];
5208 jump_list *end_list;
5209 #ifdef SUPPORT_UTF
5210 struct sljit_label *label;
5211 #ifdef SUPPORT_UCP
5212 pcre_uchar propdata[5];
5213 #endif
5214 #endif /* SUPPORT_UTF */
5215
5216 switch(type)
5217 {
5218 case OP_SOD:
5219 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5220 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
5221 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
5222 return cc;
5223
5224 case OP_SOM:
5225 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5226 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
5227 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
5228 return cc;
5229
5230 case OP_NOT_WORD_BOUNDARY:
5231 case OP_WORD_BOUNDARY:
5232 add_jump(compiler, &common->wordboundary, JUMP(SLJIT_FAST_CALL));
5233 add_jump(compiler, backtracks, JUMP(type == OP_NOT_WORD_BOUNDARY ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
5234 return cc;
5235
5236 case OP_NOT_DIGIT:
5237 case OP_DIGIT:
5238 /* Digits are usually 0-9, so it is worth to optimize them. */
5239 detect_partial_match(common, backtracks);
5240 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5241 if (common->utf && is_char7_bitset((const pcre_uint8*)common->ctypes - cbit_length + cbit_digit, FALSE))
5242 read_char7_type(common, type == OP_NOT_DIGIT);
5243 else
5244 #endif
5245 read_char8_type(common, type == OP_NOT_DIGIT);
5246 /* Flip the starting bit in the negative case. */
5247 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit);
5248 add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
5249 return cc;
5250
5251 case OP_NOT_WHITESPACE:
5252 case OP_WHITESPACE:
5253 detect_partial_match(common, backtracks);
5254 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5255 if (common->utf && is_char7_bitset((const pcre_uint8*)common->ctypes - cbit_length + cbit_space, FALSE))
5256 read_char7_type(common, type == OP_NOT_WHITESPACE);
5257 else
5258 #endif
5259 read_char8_type(common, type == OP_NOT_WHITESPACE);
5260 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space);
5261 add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
5262 return cc;
5263
5264 case OP_NOT_WORDCHAR:
5265 case OP_WORDCHAR:
5266 detect_partial_match(common, backtracks);
5267 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5268 if (common->utf && is_char7_bitset((const pcre_uint8*)common->ctypes - cbit_length + cbit_word, FALSE))
5269 read_char7_type(common, type == OP_NOT_WORDCHAR);
5270 else
5271 #endif
5272 read_char8_type(common, type == OP_NOT_WORDCHAR);
5273 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word);
5274 add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
5275 return cc;
5276
5277 case OP_ANY:
5278 detect_partial_match(common, backtracks);
5279 read_char_range(common, common->nlmin, common->nlmax, TRUE);
5280 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
5281 {
5282 jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
5283 end_list = NULL;
5284 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
5285 add_jump(compiler, &end_list, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
5286 else
5287 check_str_end(common, &end_list);
5288
5289 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5290 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff));
5291 set_jumps(end_list, LABEL());
5292 JUMPHERE(jump[0]);
5293 }
5294 else
5295 check_newlinechar(common, common->nltype, backtracks, TRUE);
5296 return cc;
5297
5298 case OP_ALLANY:
5299 detect_partial_match(common, backtracks);
5300 #ifdef SUPPORT_UTF
5301 if (common->utf)
5302 {
5303 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5304 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5305 #if defined COMPILE_PCRE8 || defined COMPILE_PCRE16
5306 #if defined COMPILE_PCRE8
5307 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
5308 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
5309 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5310 #elif defined COMPILE_PCRE16
5311 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
5312 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
5313 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
5314 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
5315 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
5316 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5317 #endif
5318 JUMPHERE(jump[0]);
5319 #endif /* COMPILE_PCRE[8|16] */
5320 return cc;
5321 }
5322 #endif
5323 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5324 return cc;
5325
5326 case OP_ANYBYTE:
5327 detect_partial_match(common, backtracks);
5328 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5329 return cc;
5330
5331 #ifdef SUPPORT_UTF
5332 #ifdef SUPPORT_UCP
5333 case OP_NOTPROP:
5334 case OP_PROP:
5335 propdata[0] = XCL_HASPROP;
5336 propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP;
5337 propdata[2] = cc[0];
5338 propdata[3] = cc[1];
5339 propdata[4] = XCL_END;
5340 compile_xclass_matchingpath(common, propdata, backtracks);
5341 return cc + 2;
5342 #endif
5343 #endif
5344
5345 case OP_ANYNL:
5346 detect_partial_match(common, backtracks);
5347 read_char_range(common, common->bsr_nlmin, common->bsr_nlmax, FALSE);
5348 jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
5349 /* We don't need to handle soft partial matching case. */
5350 end_list = NULL;
5351 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
5352 add_jump(compiler, &end_list, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
5353 else
5354 check_str_end(common, &end_list);
5355 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5356 jump[1] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
5357 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5358 jump[2] = JUMP(SLJIT_JUMP);
5359 JUMPHERE(jump[0]);
5360 check_newlinechar(common, common->bsr_nltype, backtracks, FALSE);
5361 set_jumps(end_list, LABEL());
5362 JUMPHERE(jump[1]);
5363 JUMPHERE(jump[2]);
5364 return cc;
5365
5366 case OP_NOT_HSPACE:
5367 case OP_HSPACE:
5368 detect_partial_match(common, backtracks);
5369 read_char_range(common, 0x9, 0x3000, type == OP_NOT_HSPACE);
5370 add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));
5371 add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
5372 return cc;
5373
5374 case OP_NOT_VSPACE:
5375 case OP_VSPACE:
5376 detect_partial_match(common, backtracks);
5377 read_char_range(common, 0xa, 0x2029, type == OP_NOT_VSPACE);
5378 add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));
5379 add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
5380 return cc;
5381
5382 #ifdef SUPPORT_UCP
5383 case OP_EXTUNI:
5384 detect_partial_match(common, backtracks);
5385 read_char(common);
5386 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
5387 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
5388 /* Optimize register allocation: use a real register. */
5389 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
5390 OP1(SLJIT_MOV_UB, STACK_TOP, 0, SLJIT_MEM2(TMP1, TMP2), 3);
5391
5392 label = LABEL();
5393 jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5394 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
5395 read_char(common);
5396 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
5397 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
5398 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM2(TMP1, TMP2), 3);
5399
5400 OP2(SLJIT_SHL, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2);
5401 OP1(SLJIT_MOV_UI, TMP1, 0, SLJIT_MEM1(STACK_TOP), (sljit_sw)PRIV(ucp_gbtable));
5402 OP1(SLJIT_MOV, STACK_TOP, 0, TMP2, 0);
5403 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
5404 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
5405 JUMPTO(SLJIT_C_NOT_ZERO, label);
5406
5407 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
5408 JUMPHERE(jump[0]);
5409 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
5410
5411 if (common->mode == JIT_PARTIAL_HARD_COMPILE)
5412 {
5413 jump[0] = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
5414 /* Since we successfully read a char above, partial matching must occure. */
5415 check_partial(common, TRUE);
5416 JUMPHERE(jump[0]);
5417 }
5418 return cc;
5419 #endif
5420
5421 case OP_EODN:
5422 /* Requires rather complex checks. */
5423 jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5424 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
5425 {
5426 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
5427 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5428 if (common->mode == JIT_COMPILE)
5429 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0));
5430 else
5431 {
5432 jump[1] = CMP(SLJIT_C_EQUAL, TMP2, 0, STR_END, 0);
5433 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
5434 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS);
5435 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
5436 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_NOT_EQUAL);
5437 add_jump(compiler, backtracks, JUMP(SLJIT_C_NOT_EQUAL));
5438 check_partial(common, TRUE);
5439 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5440 JUMPHERE(jump[1]);
5441 }
5442 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
5443 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
5444 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
5445 }
5446 else if (common->nltype == NLTYPE_FIXED)
5447 {
5448 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5449 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5450 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0));
5451 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
5452 }
5453 else
5454 {
5455 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5456 jump[1] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
5457 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
5458 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
5459 jump[2] = JUMP(SLJIT_C_GREATER);
5460 add_jump(compiler, backtracks, JUMP(SLJIT_C_LESS));
5461 /* Equal. */
5462 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
5463 jump[3] = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
5464 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5465
5466 JUMPHERE(jump[1]);
5467 if (common->nltype == NLTYPE_ANYCRLF)
5468 {
5469 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5470 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP2, 0, STR_END, 0));
5471 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
5472 }
5473 else
5474 {
5475 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, STR_PTR, 0);
5476 read_char_range(common, common->nlmin, common->nlmax, TRUE);
5477 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
5478 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
5479 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
5480 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
5481 }
5482 JUMPHERE(jump[2]);
5483 JUMPHERE(jump[3]);
5484 }
5485 JUMPHERE(jump[0]);
5486 check_partial(common, FALSE);
5487 return cc;
5488
5489 case OP_EOD:
5490 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0));
5491 check_partial(common, FALSE);
5492 return cc;
5493
5494 case OP_CIRC:
5495 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
5496 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
5497 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0));
5498 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
5499 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5500 return cc;
5501
5502 case OP_CIRCM:
5503 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
5504 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
5505 jump[1] = CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0);
5506 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
5507 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5508 jump[0] = JUMP(SLJIT_JUMP);
5509 JUMPHERE(jump[1]);
5510
5511 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
5512 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
5513 {
5514 OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
5515 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP2, 0, TMP1, 0));
5516 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
5517 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
5518 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
5519 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
5520 }
5521 else
5522 {
5523 skip_char_back(common);
5524 read_char_range(common, common->nlmin, common->nlmax, TRUE);
5525 check_newlinechar(common, common->nltype, backtracks, FALSE);
5526 }
5527 JUMPHERE(jump[0]);
5528 return cc;
5529
5530 case OP_DOLL:
5531 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
5532 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
5533 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5534
5535 if (!common->endonly)
5536 compile_char1_matchingpath(common, OP_EODN, cc, backtracks);
5537 else
5538 {
5539 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0));
5540 check_partial(common, FALSE);
5541 }
5542 return cc;
5543
5544 case OP_DOLLM:
5545 jump[1] = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
5546 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
5547 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
5548 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5549 check_partial(common, FALSE);
5550 jump[0] = JUMP(SLJIT_JUMP);
5551 JUMPHERE(jump[1]);
5552
5553 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
5554 {
5555 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
5556 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5557 if (common->mode == JIT_COMPILE)
5558 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, TMP2, 0, STR_END, 0));
5559 else
5560 {
5561 jump[1] = CMP(SLJIT_C_LESS_EQUAL, TMP2, 0, STR_END, 0);
5562 /* STR_PTR = STR_END - IN_UCHARS(1) */
5563 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
5564 check_partial(common, TRUE);
5565 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5566 JUMPHERE(jump[1]);
5567 }
5568
5569 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
5570 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
5571 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
5572 }
5573 else
5574 {
5575 peek_char(common, common->nlmax);
5576 check_newlinechar(common, common->nltype, backtracks, FALSE);
5577 }
5578 JUMPHERE(jump[0]);
5579 return cc;
5580
5581 case OP_CHAR:
5582 case OP_CHARI:
5583 length = 1;
5584 #ifdef SUPPORT_UTF
5585 if (common->utf && HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc);
5586 #endif
5587 if (common->mode == JIT_COMPILE && (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0))
5588 {
5589 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
5590 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
5591
5592 context.length = IN_UCHARS(length);
5593 context.sourcereg = -1;
5594 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
5595 context.ucharptr = 0;
5596 #endif
5597 return byte_sequence_compare(common, type == OP_CHARI, cc, &context, backtracks);
5598 }
5599
5600 detect_partial_match(common, backtracks);
5601 #ifdef SUPPORT_UTF
5602 if (common->utf)
5603 {
5604 GETCHAR(c, cc);
5605 }
5606 else
5607 #endif
5608 c = *cc;
5609
5610 if (type == OP_CHAR || !char_has_othercase(common, cc))
5611 {
5612 read_char_range(common, c, c, FALSE);
5613 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c));
5614 return cc + length;
5615 }
5616 oc = char_othercase(common, c);
5617 read_char_range(common, c < oc ? c : oc, c > oc ? c : oc, FALSE);
5618 bit = c ^ oc;
5619 if (is_powerof2(bit))
5620 {
5621 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
5622 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
5623 return cc + length;
5624 }
5625 jump[0] = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c);
5626 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, oc));
5627 JUMPHERE(jump[0]);
5628 return cc + length;
5629
5630 case OP_NOT:
5631 case OP_NOTI:
5632 detect_partial_match(common, backtracks);
5633 length = 1;
5634 #ifdef SUPPORT_UTF
5635 if (common->utf)
5636 {
5637 #ifdef COMPILE_PCRE8
5638 c = *cc;
5639 if (c < 128)
5640 {
5641 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5642 if (type == OP_NOT || !char_has_othercase(common, cc))
5643 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
5644 else
5645 {
5646 /* Since UTF8 code page is fixed, we know that c is in [a-z] or [A-Z] range. */
5647 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x20);
5648 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20));
5649 }
5650 /* Skip the variable-length character. */
5651 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5652 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
5653 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
5654 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5655 JUMPHERE(jump[0]);
5656 return cc + 1;
5657 }
5658 else
5659 #endif /* COMPILE_PCRE8 */
5660 {
5661 GETCHARLEN(c, cc, length);
5662 }
5663 }
5664 else
5665 #endif /* SUPPORT_UTF */
5666 c = *cc;
5667
5668 if (type == OP_NOT || !char_has_othercase(common, cc))
5669 {
5670 read_char_range(common, c, c, TRUE);
5671 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
5672 }
5673 else
5674 {
5675 oc = char_othercase(common, c);
5676 read_char_range(common, c < oc ? c : oc, c > oc ? c : oc, TRUE);
5677 bit = c ^ oc;
5678 if (is_powerof2(bit))
5679 {
5680 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
5681 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
5682 }
5683 else
5684 {
5685 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
5686 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, oc));
5687 }
5688 }
5689 return cc + length;
5690
5691 case OP_CLASS:
5692 case OP_NCLASS:
5693 detect_partial_match(common, backtracks);
5694
5695 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5696 bit = (common->utf && is_char7_bitset((const pcre_uint8 *)cc, type == OP_NCLASS)) ? 127 : 255;
5697 read_char_range(common, 0, bit, type == OP_NCLASS);
5698 #else
5699 read_char_range(common, 0, 255, type == OP_NCLASS);
5700 #endif
5701
5702 if (check_class_ranges(common, (const pcre_uint8 *)cc, type == OP_NCLASS, FALSE, backtracks))
5703 return cc + 32 / sizeof(pcre_uchar);
5704
5705 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5706 jump[0] = NULL;
5707 if (common->utf)
5708 {
5709 jump[0] = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, bit);
5710 if (type == OP_CLASS)
5711 {
5712 add_jump(compiler, backtracks, jump[0]);
5713 jump[0] = NULL;
5714 }
5715 }
5716 #elif !defined COMPILE_PCRE8
5717 jump[0] = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
5718 if (type == OP_CLASS)
5719 {
5720 add_jump(compiler, backtracks, jump[0]);
5721 jump[0] = NULL;
5722 }
5723 #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
5724
5725 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
5726 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
5727 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
5728 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
5729 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
5730 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
5731
5732 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
5733 if (jump[0] != NULL)
5734 JUMPHERE(jump[0]);
5735 #endif
5736
5737 return cc + 32 / sizeof(pcre_uchar);
5738
5739 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5740 case OP_XCLASS:
5741 compile_xclass_matchingpath(common, cc + LINK_SIZE, backtracks);
5742 return cc + GET(cc, 0) - 1;
5743 #endif
5744
5745 case OP_REVERSE:
5746 length = GET(cc, 0);
5747 if (length == 0)
5748 return cc + LINK_SIZE;
5749 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5750 #ifdef SUPPORT_UTF
5751 if (common->utf)
5752 {
5753 OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
5754 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, length);
5755 label = LABEL();
5756 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP3, 0));
5757 skip_char_back(common);
5758 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
5759 JUMPTO(SLJIT_C_NOT_ZERO, label);
5760 }
5761 else
5762 #endif
5763 {
5764 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
5765 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
5766 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, TMP1, 0));
5767 }
5768 check_start_used_ptr(common);
5769 return cc + LINK_SIZE;
5770 }
5771 SLJIT_ASSERT_STOP();
5772 return cc;
5773 }
5774
5775 static SLJIT_INLINE pcre_uchar *compile_charn_matchingpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, jump_list **backtracks)
5776 {
5777 /* This function consumes at least one input character. */
5778 /* To decrease the number of length checks, we try to concatenate the fixed length character sequences. */
5779 DEFINE_COMPILER;
5780 pcre_uchar *ccbegin = cc;
5781 compare_context context;
5782 int size;
5783
5784 context.length = 0;
5785 do
5786 {
5787 if (cc >= ccend)
5788 break;
5789
5790 if (*cc == OP_CHAR)
5791 {
5792 size = 1;
5793 #ifdef SUPPORT_UTF
5794 if (common->utf && HAS_EXTRALEN(cc[1]))
5795 size += GET_EXTRALEN(cc[1]);
5796 #endif
5797 }
5798 else if (*cc == OP_CHARI)
5799 {
5800 size = 1;
5801 #ifdef SUPPORT_UTF
5802 if (common->utf)
5803 {
5804 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
5805 size = 0;
5806 else if (HAS_EXTRALEN(cc[1]))
5807 size += GET_EXTRALEN(cc[1]);
5808 }
5809 else
5810 #endif
5811 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
5812 size = 0;
5813 }
5814 else
5815 size = 0;
5816
5817 cc += 1 + size;
5818 context.length += IN_UCHARS(size);
5819 }
5820 while (size > 0 && context.length <= 128);
5821
5822 cc = ccbegin;
5823 if (context.length > 0)
5824 {
5825 /* We have a fixed-length byte sequence. */
5826 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, context.length);
5827 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
5828
5829 context.sourcereg = -1;
5830 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
5831 context.ucharptr = 0;
5832 #endif
5833 do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, backtracks); while (context.length > 0);
5834 return cc;
5835 }
5836
5837 /* A non-fixed length character will be checked if length == 0. */
5838 return compile_char1_matchingpath(common, *cc, cc + 1, backtracks);
5839 }
5840
5841 /* Forward definitions. */
5842 static void compile_matchingpath(compiler_common *, pcre_uchar *, pcre_uchar *, backtrack_common *);
5843 static void compile_backtrackingpath(compiler_common *, struct backtrack_common *);
5844
5845 #define PUSH_BACKTRACK(size, ccstart, error) \
5846 do \
5847 { \
5848 backtrack = sljit_alloc_memory(compiler, (size)); \
5849 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
5850 return error; \
5851 memset(backtrack, 0, size); \
5852 backtrack->prev = parent->top; \
5853 backtrack->cc = (ccstart); \
5854 parent->top = backtrack; \
5855 } \
5856 while (0)
5857
5858 #define PUSH_BACKTRACK_NOVALUE(size, ccstart) \
5859 do \
5860 { \
5861 backtrack = sljit_alloc_memory(compiler, (size)); \
5862 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
5863 return; \
5864 memset(backtrack, 0, size); \
5865 backtrack->prev = parent->top; \
5866 backtrack->cc = (ccstart); \
5867 parent->top = backtrack; \
5868 } \
5869 while (0)
5870
5871 #define BACKTRACK_AS(type) ((type *)backtrack)
5872
5873 static void compile_dnref_search(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
5874 {
5875 /* The OVECTOR offset goes to TMP2. */
5876 DEFINE_COMPILER;
5877 int count = GET2(cc, 1 + IMM2_SIZE);
5878 pcre_uchar *slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
5879 unsigned int offset;
5880 jump_list *found = NULL;
5881
5882 SLJIT_ASSERT(*cc == OP_DNREF || *cc == OP_DNREFI);
5883
5884 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
5885
5886 count--;
5887 while (count-- > 0)
5888 {
5889 offset = GET2(slot, 0) << 1;
5890 GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
5891 add_jump(compiler, &found, CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0));
5892 slot += common->name_entry_size;
5893 }
5894
5895 offset = GET2(slot, 0) << 1;
5896 GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
5897 if (backtracks != NULL && !common->jscript_compat)
5898 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0));
5899
5900 set_jumps(found, LABEL());
5901 }
5902
5903 static void compile_ref_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks, BOOL withchecks, BOOL emptyfail)
5904 {
5905 DEFINE_COMPILER;
5906 BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
5907 int offset = 0;
5908 struct sljit_jump *jump = NULL;
5909 struct sljit_jump *partial;
5910 struct sljit_jump *nopartial;
5911
5912 if (ref)
5913 {
5914 offset = GET2(cc, 1) << 1;
5915 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
5916 /* OVECTOR(1) contains the "string begin - 1" constant. */
5917 if (withchecks && !common->jscript_compat)
5918 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
5919 }
5920 else
5921 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
5922
5923 #if defined SUPPORT_UTF && defined SUPPORT_UCP
5924 if (common->utf && *cc == OP_REFI)
5925 {
5926 SLJIT_ASSERT(TMP1 == SLJIT_SCRATCH_REG1 && STACK_TOP == SLJIT_SCRATCH_REG2 && TMP2 == SLJIT_SCRATCH_REG3);
5927 if (ref)
5928 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
5929 else
5930 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
5931
5932 if (withchecks)
5933 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, TMP2, 0);
5934
5935 /* Needed to save important temporary registers. */
5936 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
5937 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, ARGUMENTS, 0);
5938 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, uchar_ptr), STR_PTR, 0);
5939 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_utf_caselesscmp));
5940 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
5941 if (common->mode == JIT_COMPILE)
5942 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1));
5943 else
5944 {
5945 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
5946 nopartial = CMP(SLJIT_C_NOT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
5947 check_partial(common, FALSE);
5948 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5949 JUMPHERE(nopartial);
5950 }
5951 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
5952 }
5953 else
5954 #endif /* SUPPORT_UTF && SUPPORT_UCP */
5955 {
5956 if (ref)
5957 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP1, 0);
5958 else
5959 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
5960
5961 if (withchecks)
5962 jump = JUMP(SLJIT_C_ZERO);
5963
5964 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
5965 partial = CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0);
5966 if (common->mode == JIT_COMPILE)
5967 add_jump(compiler, backtracks, partial);
5968
5969 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
5970 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5971
5972 if (common->mode != JIT_COMPILE)
5973 {
5974 nopartial = JUMP(SLJIT_JUMP);
5975 JUMPHERE(partial);
5976 /* TMP2 -= STR_END - STR_PTR */
5977 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, STR_PTR, 0);
5978 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, STR_END, 0);
5979 partial = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0);
5980 OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
5981 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
5982 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5983 JUMPHERE(partial);
5984 check_partial(common, FALSE);
5985 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5986 JUMPHERE(nopartial);
5987 }
5988 }
5989
5990 if (jump != NULL)
5991 {
5992 if (emptyfail)
5993 add_jump(compiler, backtracks, jump);
5994 else
5995 JUMPHERE(jump);
5996 }
5997 }
5998
5999 static SLJIT_INLINE pcre_uchar *compile_ref_iterator_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6000 {
6001 DEFINE_COMPILER;
6002 BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
6003 backtrack_common *backtrack;
6004 pcre_uchar type;
6005 int offset = 0;
6006 struct sljit_label *label;
6007 struct sljit_jump *zerolength;
6008 struct sljit_jump *jump = NULL;
6009 pcre_uchar *ccbegin = cc;
6010 int min = 0, max = 0;
6011 BOOL minimize;
6012
6013 PUSH_BACKTRACK(sizeof(iterator_backtrack), cc, NULL);
6014
6015 if (ref)
6016 offset = GET2(cc, 1) << 1;
6017 else
6018 cc += IMM2_SIZE;
6019 type = cc[1 + IMM2_SIZE];
6020
6021 SLJIT_COMPILE_ASSERT((OP_CRSTAR & 0x1) == 0, crstar_opcode_must_be_even);
6022 minimize = (type & 0x1) != 0;
6023 switch(type)
6024 {
6025 case OP_CRSTAR:
6026 case OP_CRMINSTAR:
6027 min = 0;
6028 max = 0;
6029 cc += 1 + IMM2_SIZE + 1;
6030 break;
6031 case OP_CRPLUS:
6032 case OP_CRMINPLUS:
6033 min = 1;
6034 max = 0;
6035 cc += 1 + IMM2_SIZE + 1;
6036 break;
6037 case OP_CRQUERY:
6038 case OP_CRMINQUERY:
6039 min = 0;
6040 max = 1;
6041 cc += 1 + IMM2_SIZE + 1;
6042 break;
6043 case OP_CRRANGE:
6044 case OP_CRMINRANGE:
6045 min = GET2(cc, 1 + IMM2_SIZE + 1);
6046 max = GET2(cc, 1 + IMM2_SIZE + 1 + IMM2_SIZE);
6047 cc += 1 + IMM2_SIZE + 1 + 2 * IMM2_SIZE;
6048 break;
6049 default:
6050 SLJIT_ASSERT_STOP();
6051 break;
6052 }
6053
6054 if (!minimize)
6055 {
6056 if (min == 0)
6057 {
6058 allocate_stack(common, 2);
6059 if (ref)
6060 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
6061 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6062 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
6063 /* Temporary release of STR_PTR. */
6064 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
6065 /* Handles both invalid and empty cases. Since the minimum repeat,
6066 is zero the invalid case is basically the same as an empty case. */
6067 if (ref)
6068 zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
6069 else
6070 {
6071 compile_dnref_search(common, ccbegin, NULL);
6072 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
6073 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, TMP2, 0);
6074 zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
6075 }
6076 /* Restore if not zero length. */
6077 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
6078 }
6079 else
6080 {
6081 allocate_stack(common, 1);
6082 if (ref)
6083 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
6084 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6085 if (ref)
6086 {
6087 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
6088 zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
6089 }
6090 else
6091 {
6092 compile_dnref_search(common, ccbegin, &backtrack->topbacktracks);
6093 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
6094 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, TMP2, 0);
6095 zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
6096 }
6097 }
6098
6099 if (min > 1 || max > 1)
6100 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 0);
6101
6102 label = LABEL();
6103 if (!ref)
6104 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);
6105 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, FALSE, FALSE);
6106
6107 if (min > 1 || max > 1)
6108 {
6109 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
6110 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
6111 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0);
6112 if (min > 1)
6113 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, label);
6114 if (max > 1)
6115 {
6116 jump = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max);
6117 allocate_stack(common, 1);
6118 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6119 JUMPTO(SLJIT_JUMP, label);
6120 JUMPHERE(jump);
6121 }
6122 }
6123
6124 if (max == 0)
6125 {
6126 /* Includes min > 1 case as well. */
6127 allocate_stack(common, 1);
6128 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6129 JUMPTO(SLJIT_JUMP, label);
6130 }
6131
6132 JUMPHERE(zerolength);
6133 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
6134
6135 count_match(common);
6136 return cc;
6137 }
6138
6139 allocate_stack(common, ref ? 2 : 3);
6140 if (ref)
6141 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
6142 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6143 if (type != OP_CRMINSTAR)
6144 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
6145
6146 if (min == 0)
6147 {
6148 /* Handles both invalid and empty cases. Since the minimum repeat,
6149 is zero the invalid case is basically the same as an empty case. */
6150 if (ref)
6151 zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
6152 else
6153 {
6154 compile_dnref_search(common, ccbegin, NULL);
6155 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
6156 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
6157 zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
6158 }
6159 /* Length is non-zero, we can match real repeats. */
6160 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6161 jump = JUMP(SLJIT_JUMP);
6162 }
6163 else
6164 {
6165 if (ref)
6166 {
6167 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
6168 zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
6169 }
6170 else
6171 {
6172 compile_dnref_search(common, ccbegin, &backtrack->topbacktracks);
6173 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
6174 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
6175 zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
6176 }
6177 }
6178
6179 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
6180 if (max > 0)
6181 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max));
6182
6183 if (!ref)
6184 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
6185 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, TRUE, TRUE);
6186 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6187
6188 if (min > 1)
6189 {
6190 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
6191 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
6192 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
6193 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, BACKTRACK_AS(iterator_backtrack)->matchingpath);
6194 }
6195 else if (max > 0)
6196 OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
6197
6198 if (jump != NULL)
6199 JUMPHERE(jump);
6200 JUMPHERE(zerolength);
6201
6202 count_match(common);
6203 return cc;
6204 }
6205
6206 static SLJIT_INLINE pcre_uchar *compile_recurse_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6207 {
6208 DEFINE_COMPILER;
6209 backtrack_common *backtrack;
6210 recurse_entry *entry = common->entries;
6211 recurse_entry *prev = NULL;
6212 sljit_sw start = GET(cc, 1);
6213 pcre_uchar *start_cc;
6214 BOOL needs_control_head;
6215
6216 PUSH_BACKTRACK(sizeof(recurse_backtrack), cc, NULL);
6217
6218 /* Inlining simple patterns. */
6219 if (get_framesize(common, common->start + start, NULL, TRUE, &needs_control_head) == no_stack)
6220 {
6221 start_cc = common->start + start;
6222 compile_matchingpath(common, next_opcode(common, start_cc), bracketend(start_cc) - (1 + LINK_SIZE), backtrack);
6223 BACKTRACK_AS(recurse_backtrack)->inlined_pattern = TRUE;
6224 return cc + 1 + LINK_SIZE;
6225 }
6226
6227 while (entry != NULL)
6228 {
6229 if (entry->start == start)
6230 break;
6231 prev = entry;
6232 entry = entry->next;
6233 }
6234
6235 if (entry == NULL)
6236 {
6237 entry = sljit_alloc_memory(compiler, sizeof(recurse_entry));
6238 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6239 return NULL;
6240 entry->next = NULL;
6241 entry->entry = NULL;
6242 entry->calls = NULL;
6243 entry->start = start;
6244
6245 if (prev != NULL)
6246 prev->next = entry;
6247 else
6248 common->entries = entry;
6249 }
6250
6251 if (common->has_set_som && common->mark_ptr != 0)
6252 {
6253 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
6254 allocate_stack(common, 2);
6255 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
6256 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
6257 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
6258 }
6259 else if (common->has_set_som || common->mark_ptr != 0)
6260 {
6261 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->has_set_som ? (int)(OVECTOR(0)) : common->mark_ptr);
6262 allocate_stack(common, 1);
6263 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
6264 }
6265
6266 if (entry->entry == NULL)
6267 add_jump(compiler, &entry->calls, JUMP(SLJIT_FAST_CALL));
6268 else
6269 JUMPTO(SLJIT_FAST_CALL, entry->entry);
6270 /* Leave if the match is failed. */
6271 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0));
6272 return cc + 1 + LINK_SIZE;
6273 }
6274
6275 static int SLJIT_CALL do_callout(struct jit_arguments* arguments, PUBL(callout_block) *callout_block, pcre_uchar **jit_ovector)
6276 {
6277 const pcre_uchar *begin = arguments->begin;
6278 int *offset_vector = arguments->offsets;
6279 int offset_count = arguments->offset_count;
6280 int i;
6281
6282 if (PUBL(callout) == NULL)
6283 return 0;
6284
6285 callout_block->version = 2;
6286 callout_block->callout_data = arguments->callout_data;
6287
6288 /* Offsets in subject. */
6289 callout_block->subject_length = arguments->end - arguments->begin;
6290 callout_block->start_match = (pcre_uchar*)callout_block->subject - arguments->begin;
6291 callout_block->current_position = (pcre_uchar*)callout_block->offset_vector - arguments->begin;
6292 #if defined COMPILE_PCRE8
6293 callout_block->subject = (PCRE_SPTR)begin;
6294 #elif defined COMPILE_PCRE16
6295 callout_block->subject = (PCRE_SPTR16)begin;
6296 #elif defined COMPILE_PCRE32
6297 callout_block->subject = (PCRE_SPTR32)begin;
6298 #endif
6299
6300 /* Convert and copy the JIT offset vector to the offset_vector array. */
6301 callout_block->capture_top = 0;
6302 callout_block->offset_vector = offset_vector;
6303 for (i = 2; i < offset_count; i += 2)
6304 {
6305 offset_vector[i] = jit_ovector[i] - begin;
6306 offset_vector[i + 1] = jit_ovector[i + 1] - begin;
6307 if (jit_ovector[i] >= begin)
6308 callout_block->capture_top = i;
6309 }
6310
6311 callout_block->capture_top = (callout_block->capture_top >> 1) + 1;
6312 if (offset_count > 0)
6313 offset_vector[0] = -1;
6314 if (offset_count > 1)
6315 offset_vector[1] = -1;
6316 return (*PUBL(callout))(callout_block);
6317 }
6318
6319 /* Aligning to 8 byte. */
6320 #define CALLOUT_ARG_SIZE \
6321 (((int)sizeof(PUBL(callout_block)) + 7) & ~7)
6322
6323 #define CALLOUT_ARG_OFFSET(arg) \
6324 (-CALLOUT_ARG_SIZE + SLJIT_OFFSETOF(PUBL(callout_block), arg))
6325
6326 static SLJIT_INLINE pcre_uchar *compile_callout_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6327 {
6328 DEFINE_COMPILER;
6329 backtrack_common *backtrack;
6330
6331 PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
6332
6333 allocate_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw));
6334
6335 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
6336 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6337 SLJIT_ASSERT(common->capture_last_ptr != 0);
6338 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_number), SLJIT_IMM, cc[1]);
6339 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_last), TMP2, 0);
6340
6341 /* These pointer sized fields temporarly stores internal variables. */
6342 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
6343 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(offset_vector), STR_PTR, 0);
6344 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(subject), TMP2, 0);
6345
6346 if (common->mark_ptr != 0)
6347 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr));
6348 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(pattern_position), SLJIT_IMM, GET(cc, 2));
6349 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(next_item_length), SLJIT_IMM, GET(cc, 2 + LINK_SIZE));
6350 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(mark), (common->mark_ptr != 0) ? TMP2 : SLJIT_IMM, 0);
6351
6352 /* Needed to save important temporary registers. */
6353 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
6354 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG2, 0, STACK_TOP, 0, SLJIT_IMM, CALLOUT_ARG_SIZE);
6355 GET_LOCAL_BASE(SLJIT_SCRATCH_REG3, 0, OVECTOR_START);
6356 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_callout));
6357 OP1(SLJIT_MOV_SI, SLJIT_RETURN_REG, 0, SLJIT_RETURN_REG, 0);
6358 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
6359 free_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw));
6360
6361 /* Check return value. */
6362 OP2(SLJIT_SUB | SLJIT_SET_S, SLJIT_UNUSED, 0, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
6363 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_C_SIG_GREATER));
6364 if (common->forced_quit_label == NULL)
6365 add_jump(compiler, &common->forced_quit, JUMP(SLJIT_C_SIG_LESS));
6366 else
6367 JUMPTO(SLJIT_C_SIG_LESS, common->forced_quit_label);
6368 return cc + 2 + 2 * LINK_SIZE;
6369 }
6370
6371 #undef CALLOUT_ARG_SIZE
6372 #undef CALLOUT_ARG_OFFSET
6373
6374 static pcre_uchar *compile_assert_matchingpath(compiler_common *common, pcre_uchar *cc, assert_backtrack *backtrack, BOOL conditional)
6375 {
6376 DEFINE_COMPILER;
6377 int framesize;
6378 int extrasize;
6379 BOOL needs_control_head;
6380 int private_data_ptr;
6381 backtrack_common altbacktrack;
6382 pcre_uchar *ccbegin;
6383 pcre_uchar opcode;
6384 pcre_uchar bra = OP_BRA;
6385 jump_list *tmp = NULL;
6386 jump_list **target = (conditional) ? &backtrack->condfailed : &backtrack->common.topbacktracks;
6387 jump_list **found;
6388 /* Saving previous accept variables. */
6389 BOOL save_local_exit = common->local_exit;
6390 BOOL save_positive_assert = common->positive_assert;
6391 then_trap_backtrack *save_then_trap = common->then_trap;
6392 struct sljit_label *save_quit_label = common->quit_label;
6393 struct sljit_label *save_accept_label = common->accept_label;
6394 jump_list *save_quit = common->quit;
6395 jump_list *save_positive_assert_quit = common->positive_assert_quit;
6396 jump_list *save_accept = common->accept;
6397 struct sljit_jump *jump;
6398 struct sljit_jump *brajump = NULL;
6399
6400 /* Assert captures then. */
6401 common->then_trap = NULL;
6402
6403 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
6404 {
6405 SLJIT_ASSERT(!conditional);
6406 bra = *cc;
6407 cc++;
6408 }
6409 private_data_ptr = PRIVATE_DATA(cc);
6410 SLJIT_ASSERT(private_data_ptr != 0);
6411 framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
6412 backtrack->framesize = framesize;
6413 backtrack->private_data_ptr = private_data_ptr;
6414 opcode = *cc;
6415 SLJIT_ASSERT(opcode >= OP_ASSERT && opcode <= OP_ASSERTBACK_NOT);
6416 found = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) ? &tmp : target;
6417 ccbegin = cc;
6418 cc += GET(cc, 1);
6419
6420 if (bra == OP_BRAMINZERO)
6421 {
6422 /* This is a braminzero backtrack path. */
6423 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6424 free_stack(common, 1);
6425 brajump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
6426 }
6427
6428 if (framesize < 0)
6429 {
6430 extrasize = needs_control_head ? 2 : 1;
6431 if (framesize == no_frame)
6432 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);
6433 allocate_stack(common, extrasize);
6434 if (needs_control_head)
6435 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
6436 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6437 if (needs_control_head)
6438 {
6439 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_IMM, 0);
6440 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
6441 }
6442 }
6443 else
6444 {
6445 extrasize = needs_control_head ? 3 : 2;
6446 allocate_stack(common, framesize + extrasize);
6447 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6448 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw));
6449 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0);
6450 if (needs_control_head)
6451 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
6452 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6453 if (needs_control_head)
6454 {
6455 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
6456 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
6457 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_IMM, 0);
6458 }
6459 else
6460 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
6461 init_frame(common, ccbegin, NULL, framesize + extrasize - 1, extrasize, FALSE);
6462 }
6463
6464 memset(&altbacktrack, 0, sizeof(backtrack_common));
6465 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6466 {
6467 /* Negative assert is stronger than positive assert. */
6468 common->local_exit = TRUE;
6469 common->quit_label = NULL;
6470 common->quit = NULL;
6471 common->positive_assert = FALSE;
6472 }
6473 else
6474 common->positive_assert = TRUE;
6475 common->positive_assert_quit = NULL;
6476
6477 while (1)
6478 {
6479 common->accept_label = NULL;
6480 common->accept = NULL;
6481 altbacktrack.top = NULL;
6482 altbacktrack.topbacktracks = NULL;
6483
6484 if (*ccbegin == OP_ALT)
6485 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6486
6487 altbacktrack.cc = ccbegin;
6488 compile_matchingpath(common, ccbegin + 1 + LINK_SIZE, cc, &altbacktrack);
6489 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6490 {
6491 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6492 {
6493 common->local_exit = save_local_exit;
6494 common->quit_label = save_quit_label;
6495 common->quit = save_quit;
6496 }
6497 common->positive_assert = save_positive_assert;
6498 common->then_trap = save_then_trap;
6499 common->accept_label = save_accept_label;
6500 common->positive_assert_quit = save_positive_assert_quit;
6501 common->accept = save_accept;
6502 return NULL;
6503 }
6504 common->accept_label = LABEL();
6505 if (common->accept != NULL)
6506 set_jumps(common->accept, common->accept_label);
6507
6508 /* Reset stack. */
6509 if (framesize < 0)
6510 {
6511 if (framesize == no_frame)
6512 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6513 else
6514 free_stack(common, extrasize);
6515 if (needs_control_head)
6516 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), 0);
6517 }
6518 else
6519 {
6520 if ((opcode != OP_ASSERT_NOT && opcode != OP_ASSERTBACK_NOT) || conditional)
6521 {
6522 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
6523 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
6524 if (needs_control_head)
6525 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), 0);
6526 }
6527 else
6528 {
6529 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6530 if (needs_control_head)
6531 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), (framesize + 1) * sizeof(sljit_sw));
6532 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6533 }
6534 }
6535
6536 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6537 {
6538 /* We know that STR_PTR was stored on the top of the stack. */
6539 if (conditional)
6540 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), needs_control_head ? sizeof(sljit_sw) : 0);
6541 else if (bra == OP_BRAZERO)
6542 {
6543 if (framesize < 0)
6544 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 1) * sizeof(sljit_sw));
6545 else
6546 {
6547 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
6548 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (framesize + extrasize - 1) * sizeof(sljit_sw));
6549 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
6550 }
6551 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
6552 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6553 }
6554 else if (framesize >= 0)
6555 {
6556 /* For OP_BRA and OP_BRAMINZERO. */
6557 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
6558 }
6559 }
6560 add_jump(compiler, found, JUMP(SLJIT_JUMP));
6561
6562 compile_backtrackingpath(common, altbacktrack.top);
6563 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6564 {
6565 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6566 {
6567 common->local_exit = save_local_exit;
6568 common->quit_label = save_quit_label;
6569 common->quit = save_quit;
6570 }
6571 common->positive_assert = save_positive_assert;
6572 common->then_trap = save_then_trap;
6573 common->accept_label = save_accept_label;
6574 common->positive_assert_quit = save_positive_assert_quit;
6575 common->accept = save_accept;
6576 return NULL;
6577 }
6578 set_jumps(altbacktrack.topbacktracks, LABEL());
6579
6580 if (*cc != OP_ALT)
6581 break;
6582
6583 ccbegin = cc;
6584 cc += GET(cc, 1);
6585 }
6586
6587 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6588 {
6589 SLJIT_ASSERT(common->positive_assert_quit == NULL);
6590 /* Makes the check less complicated below. */
6591 common->positive_assert_quit = common->quit;
6592 }
6593
6594 /* None of them matched. */
6595 if (common->positive_assert_quit != NULL)
6596 {
6597 jump = JUMP(SLJIT_JUMP);
6598 set_jumps(common->positive_assert_quit, LABEL());
6599 SLJIT_ASSERT(framesize != no_stack);
6600 if (framesize < 0)
6601 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, extrasize * sizeof(sljit_sw));
6602 else
6603 {
6604 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6605 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6606 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw));
6607 }
6608 JUMPHERE(jump);
6609 }
6610
6611 if (needs_control_head)
6612 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(1));
6613
6614 if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK)
6615 {
6616 /* Assert is failed. */
6617 if (conditional || bra == OP_BRAZERO)
6618 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6619
6620 if (framesize < 0)
6621 {
6622 /* The topmost item should be 0. */
6623 if (bra == OP_BRAZERO)
6624 {
6625 if (extrasize == 2)
6626 free_stack(common, 1);
6627 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6628 }
6629 else
6630 free_stack(common, extrasize);
6631 }
6632 else
6633 {
6634 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
6635 /* The topmost item should be 0. */
6636 if (bra == OP_BRAZERO)
6637 {
6638 free_stack(common, framesize + extrasize - 1);
6639 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6640 }
6641 else
6642 free_stack(common, framesize + extrasize);
6643 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
6644 }
6645 jump = JUMP(SLJIT_JUMP);
6646 if (bra != OP_BRAZERO)
6647 add_jump(compiler, target, jump);
6648
6649 /* Assert is successful. */
6650 set_jumps(tmp, LABEL());
6651 if (framesize < 0)
6652 {
6653 /* We know that STR_PTR was stored on the top of the stack. */
6654 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 1) * sizeof(sljit_sw));
6655 /* Keep the STR_PTR on the top of the stack. */
6656 if (bra == OP_BRAZERO)
6657 {
6658 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
6659 if (extrasize == 2)
6660 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6661 }
6662 else if (bra == OP_BRAMINZERO)
6663 {
6664 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
6665 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6666 }
6667 }
6668 else
6669 {
6670 if (bra == OP_BRA)
6671 {
6672 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
6673 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
6674 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 2) * sizeof(sljit_sw));
6675 }
6676 else
6677 {
6678 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
6679 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 2) * sizeof(sljit_sw));
6680 if (extrasize == 2)
6681 {
6682 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6683 if (bra == OP_BRAMINZERO)
6684 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6685 }
6686 else
6687 {
6688 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
6689 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), bra == OP_BRAZERO ? STR_PTR : SLJIT_IMM, 0);
6690 }
6691 }
6692 }
6693
6694 if (bra == OP_BRAZERO)
6695 {
6696 backtrack->matchingpath = LABEL();
6697 SET_LABEL(jump, backtrack->matchingpath);
6698 }
6699 else if (bra == OP_BRAMINZERO)
6700 {
6701 JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
6702 JUMPHERE(brajump);
6703 if (framesize >= 0)
6704 {
6705 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6706 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6707 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
6708 }
6709 set_jumps(backtrack->common.topbacktracks, LABEL());
6710 }
6711 }
6712 else
6713 {
6714 /* AssertNot is successful. */
6715 if (framesize < 0)
6716 {
6717 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6718 if (bra != OP_BRA)
6719 {
6720 if (extrasize == 2)
6721 free_stack(common, 1);
6722 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6723 }
6724 else
6725 free_stack(common, extrasize);
6726 }
6727 else
6728 {
6729 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6730 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
6731 /* The topmost item should be 0. */
6732 if (bra != OP_BRA)
6733 {
6734 free_stack(common, framesize + extrasize - 1);
6735 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6736 }
6737 else
6738 free_stack(common, framesize + extrasize);
6739 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
6740 }
6741
6742 if (bra == OP_BRAZERO)
6743 backtrack->matchingpath = LABEL();
6744 else if (bra == OP_BRAMINZERO)
6745 {
6746 JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
6747 JUMPHERE(brajump);
6748 }
6749
6750 if (bra != OP_BRA)
6751 {
6752 SLJIT_ASSERT(found == &backtrack->common.topbacktracks);
6753 set_jumps(backtrack->common.topbacktracks, LABEL());
6754 backtrack->common.topbacktracks = NULL;
6755 }
6756 }
6757
6758 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)