/[pcre]/code/trunk/pcre_jit_compile.c
ViewVC logotype

Contents of /code/trunk/pcre_jit_compile.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1474 - (show annotations)
Thu Apr 24 06:43:50 2014 UTC (5 years, 7 months ago) by zherczeg
File MIME type: text/plain
File size: 335103 byte(s)
Add missing match limit test to JIT.
1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Written by Philip Hazel
9 Copyright (c) 1997-2013 University of Cambridge
10
11 The machine code generator part (this module) was written by Zoltan Herczeg
12 Copyright (c) 2010-2013
13
14 -----------------------------------------------------------------------------
15 Redistribution and use in source and binary forms, with or without
16 modification, are permitted provided that the following conditions are met:
17
18 * Redistributions of source code must retain the above copyright notice,
19 this list of conditions and the following disclaimer.
20
21 * Redistributions in binary form must reproduce the above copyright
22 notice, this list of conditions and the following disclaimer in the
23 documentation and/or other materials provided with the distribution.
24
25 * Neither the name of the University of Cambridge nor the names of its
26 contributors may be used to endorse or promote products derived from
27 this software without specific prior written permission.
28
29 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 POSSIBILITY OF SUCH DAMAGE.
40 -----------------------------------------------------------------------------
41 */
42
43 #ifdef HAVE_CONFIG_H
44 #include "config.h"
45 #endif
46
47 #include "pcre_internal.h"
48
49 #if defined SUPPORT_JIT
50
51 /* All-in-one: Since we use the JIT compiler only from here,
52 we just include it. This way we don't need to touch the build
53 system files. */
54
55 #define SLJIT_MALLOC(size) (PUBL(malloc))(size)
56 #define SLJIT_FREE(ptr) (PUBL(free))(ptr)
57 #define SLJIT_CONFIG_AUTO 1
58 #define SLJIT_CONFIG_STATIC 1
59 #define SLJIT_VERBOSE 0
60 #define SLJIT_DEBUG 0
61
62 #include "sljit/sljitLir.c"
63
64 #if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED
65 #error Unsupported architecture
66 #endif
67
68 /* Defines for debugging purposes. */
69
70 /* 1 - Use unoptimized capturing brackets.
71 2 - Enable capture_last_ptr (includes option 1). */
72 /* #define DEBUG_FORCE_UNOPTIMIZED_CBRAS 2 */
73
74 /* 1 - Always have a control head. */
75 /* #define DEBUG_FORCE_CONTROL_HEAD 1 */
76
77 /* Allocate memory for the regex stack on the real machine stack.
78 Fast, but limited size. */
79 #define MACHINE_STACK_SIZE 32768
80
81 /* Growth rate for stack allocated by the OS. Should be the multiply
82 of page size. */
83 #define STACK_GROWTH_RATE 8192
84
85 /* Enable to check that the allocation could destroy temporaries. */
86 #if defined SLJIT_DEBUG && SLJIT_DEBUG
87 #define DESTROY_REGISTERS 1
88 #endif
89
90 /*
91 Short summary about the backtracking mechanism empolyed by the jit code generator:
92
93 The code generator follows the recursive nature of the PERL compatible regular
94 expressions. The basic blocks of regular expressions are condition checkers
95 whose execute different commands depending on the result of the condition check.
96 The relationship between the operators can be horizontal (concatenation) and
97 vertical (sub-expression) (See struct backtrack_common for more details).
98
99 'ab' - 'a' and 'b' regexps are concatenated
100 'a+' - 'a' is the sub-expression of the '+' operator
101
102 The condition checkers are boolean (true/false) checkers. Machine code is generated
103 for the checker itself and for the actions depending on the result of the checker.
104 The 'true' case is called as the matching path (expected path), and the other is called as
105 the 'backtrack' path. Branch instructions are expesive for all CPUs, so we avoid taken
106 branches on the matching path.
107
108 Greedy star operator (*) :
109 Matching path: match happens.
110 Backtrack path: match failed.
111 Non-greedy star operator (*?) :
112 Matching path: no need to perform a match.
113 Backtrack path: match is required.
114
115 The following example shows how the code generated for a capturing bracket
116 with two alternatives. Let A, B, C, D are arbirary regular expressions, and
117 we have the following regular expression:
118
119 A(B|C)D
120
121 The generated code will be the following:
122
123 A matching path
124 '(' matching path (pushing arguments to the stack)
125 B matching path
126 ')' matching path (pushing arguments to the stack)
127 D matching path
128 return with successful match
129
130 D backtrack path
131 ')' backtrack path (If we arrived from "C" jump to the backtrack of "C")
132 B backtrack path
133 C expected path
134 jump to D matching path
135 C backtrack path
136 A backtrack path
137
138 Notice, that the order of backtrack code paths are the opposite of the fast
139 code paths. In this way the topmost value on the stack is always belong
140 to the current backtrack code path. The backtrack path must check
141 whether there is a next alternative. If so, it needs to jump back to
142 the matching path eventually. Otherwise it needs to clear out its own stack
143 frame and continue the execution on the backtrack code paths.
144 */
145
146 /*
147 Saved stack frames:
148
149 Atomic blocks and asserts require reloading the values of private data
150 when the backtrack mechanism performed. Because of OP_RECURSE, the data
151 are not necessarly known in compile time, thus we need a dynamic restore
152 mechanism.
153
154 The stack frames are stored in a chain list, and have the following format:
155 ([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]
156
157 Thus we can restore the private data to a particular point in the stack.
158 */
159
160 typedef struct jit_arguments {
161 /* Pointers first. */
162 struct sljit_stack *stack;
163 const pcre_uchar *str;
164 const pcre_uchar *begin;
165 const pcre_uchar *end;
166 int *offsets;
167 pcre_uchar *uchar_ptr;
168 pcre_uchar *mark_ptr;
169 void *callout_data;
170 /* Everything else after. */
171 pcre_uint32 limit_match;
172 int real_offset_count;
173 int offset_count;
174 pcre_uint8 notbol;
175 pcre_uint8 noteol;
176 pcre_uint8 notempty;
177 pcre_uint8 notempty_atstart;
178 } jit_arguments;
179
180 typedef struct executable_functions {
181 void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES];
182 sljit_uw *read_only_data[JIT_NUMBER_OF_COMPILE_MODES];
183 sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];
184 PUBL(jit_callback) callback;
185 void *userdata;
186 pcre_uint32 top_bracket;
187 pcre_uint32 limit_match;
188 } executable_functions;
189
190 typedef struct jump_list {
191 struct sljit_jump *jump;
192 struct jump_list *next;
193 } jump_list;
194
195 typedef struct stub_list {
196 struct sljit_jump *start;
197 struct sljit_label *quit;
198 struct stub_list *next;
199 } stub_list;
200
201 typedef struct label_addr_list {
202 struct sljit_label *label;
203 sljit_uw *addr;
204 struct label_addr_list *next;
205 } label_addr_list;
206
207 enum frame_types {
208 no_frame = -1,
209 no_stack = -2
210 };
211
212 enum control_types {
213 type_mark = 0,
214 type_then_trap = 1
215 };
216
217 typedef int (SLJIT_CALL *jit_function)(jit_arguments *args);
218
219 /* The following structure is the key data type for the recursive
220 code generator. It is allocated by compile_matchingpath, and contains
221 the arguments for compile_backtrackingpath. Must be the first member
222 of its descendants. */
223 typedef struct backtrack_common {
224 /* Concatenation stack. */
225 struct backtrack_common *prev;
226 jump_list *nextbacktracks;
227 /* Internal stack (for component operators). */
228 struct backtrack_common *top;
229 jump_list *topbacktracks;
230 /* Opcode pointer. */
231 pcre_uchar *cc;
232 } backtrack_common;
233
234 typedef struct assert_backtrack {
235 backtrack_common common;
236 jump_list *condfailed;
237 /* Less than 0 if a frame is not needed. */
238 int framesize;
239 /* Points to our private memory word on the stack. */
240 int private_data_ptr;
241 /* For iterators. */
242 struct sljit_label *matchingpath;
243 } assert_backtrack;
244
245 typedef struct bracket_backtrack {
246 backtrack_common common;
247 /* Where to coninue if an alternative is successfully matched. */
248 struct sljit_label *alternative_matchingpath;
249 /* For rmin and rmax iterators. */
250 struct sljit_label *recursive_matchingpath;
251 /* For greedy ? operator. */
252 struct sljit_label *zero_matchingpath;
253 /* Contains the branches of a failed condition. */
254 union {
255 /* Both for OP_COND, OP_SCOND. */
256 jump_list *condfailed;
257 assert_backtrack *assert;
258 /* For OP_ONCE. Less than 0 if not needed. */
259 int framesize;
260 } u;
261 /* Points to our private memory word on the stack. */
262 int private_data_ptr;
263 } bracket_backtrack;
264
265 typedef struct bracketpos_backtrack {
266 backtrack_common common;
267 /* Points to our private memory word on the stack. */
268 int private_data_ptr;
269 /* Reverting stack is needed. */
270 int framesize;
271 /* Allocated stack size. */
272 int stacksize;
273 } bracketpos_backtrack;
274
275 typedef struct braminzero_backtrack {
276 backtrack_common common;
277 struct sljit_label *matchingpath;
278 } braminzero_backtrack;
279
280 typedef struct iterator_backtrack {
281 backtrack_common common;
282 /* Next iteration. */
283 struct sljit_label *matchingpath;
284 } iterator_backtrack;
285
286 typedef struct recurse_entry {
287 struct recurse_entry *next;
288 /* Contains the function entry. */
289 struct sljit_label *entry;
290 /* Collects the calls until the function is not created. */
291 jump_list *calls;
292 /* Points to the starting opcode. */
293 sljit_sw start;
294 } recurse_entry;
295
296 typedef struct recurse_backtrack {
297 backtrack_common common;
298 BOOL inlined_pattern;
299 } recurse_backtrack;
300
301 #define OP_THEN_TRAP OP_TABLE_LENGTH
302
303 typedef struct then_trap_backtrack {
304 backtrack_common common;
305 /* If then_trap is not NULL, this structure contains the real
306 then_trap for the backtracking path. */
307 struct then_trap_backtrack *then_trap;
308 /* Points to the starting opcode. */
309 sljit_sw start;
310 /* Exit point for the then opcodes of this alternative. */
311 jump_list *quit;
312 /* Frame size of the current alternative. */
313 int framesize;
314 } then_trap_backtrack;
315
316 #define MAX_RANGE_SIZE 4
317
318 typedef struct compiler_common {
319 /* The sljit ceneric compiler. */
320 struct sljit_compiler *compiler;
321 /* First byte code. */
322 pcre_uchar *start;
323 /* Maps private data offset to each opcode. */
324 sljit_si *private_data_ptrs;
325 /* This read-only data is available during runtime. */
326 sljit_uw *read_only_data;
327 /* The total size of the read-only data. */
328 sljit_uw read_only_data_size;
329 /* The next free entry of the read_only_data. */
330 sljit_uw *read_only_data_ptr;
331 /* Tells whether the capturing bracket is optimized. */
332 pcre_uint8 *optimized_cbracket;
333 /* Tells whether the starting offset is a target of then. */
334 pcre_uint8 *then_offsets;
335 /* Current position where a THEN must jump. */
336 then_trap_backtrack *then_trap;
337 /* Starting offset of private data for capturing brackets. */
338 int cbra_ptr;
339 /* Output vector starting point. Must be divisible by 2. */
340 int ovector_start;
341 /* Last known position of the requested byte. */
342 int req_char_ptr;
343 /* Head of the last recursion. */
344 int recursive_head_ptr;
345 /* First inspected character for partial matching. */
346 int start_used_ptr;
347 /* Starting pointer for partial soft matches. */
348 int hit_start;
349 /* End pointer of the first line. */
350 int first_line_end;
351 /* Points to the marked string. */
352 int mark_ptr;
353 /* Recursive control verb management chain. */
354 int control_head_ptr;
355 /* Points to the last matched capture block index. */
356 int capture_last_ptr;
357 /* Points to the starting position of the current match. */
358 int start_ptr;
359
360 /* Flipped and lower case tables. */
361 const pcre_uint8 *fcc;
362 sljit_sw lcc;
363 /* Mode can be PCRE_STUDY_JIT_COMPILE and others. */
364 int mode;
365 /* TRUE, when minlength is greater than 0. */
366 BOOL might_be_empty;
367 /* \K is found in the pattern. */
368 BOOL has_set_som;
369 /* (*SKIP:arg) is found in the pattern. */
370 BOOL has_skip_arg;
371 /* (*THEN) is found in the pattern. */
372 BOOL has_then;
373 /* Needs to know the start position anytime. */
374 BOOL needs_start_ptr;
375 /* Currently in recurse or negative assert. */
376 BOOL local_exit;
377 /* Currently in a positive assert. */
378 BOOL positive_assert;
379 /* Newline control. */
380 int nltype;
381 pcre_uint32 nlmax;
382 pcre_uint32 nlmin;
383 int newline;
384 int bsr_nltype;
385 pcre_uint32 bsr_nlmax;
386 pcre_uint32 bsr_nlmin;
387 /* Dollar endonly. */
388 int endonly;
389 /* Tables. */
390 sljit_sw ctypes;
391 /* Named capturing brackets. */
392 pcre_uchar *name_table;
393 sljit_sw name_count;
394 sljit_sw name_entry_size;
395
396 /* Labels and jump lists. */
397 struct sljit_label *partialmatchlabel;
398 struct sljit_label *quit_label;
399 struct sljit_label *forced_quit_label;
400 struct sljit_label *accept_label;
401 struct sljit_label *ff_newline_shortcut;
402 stub_list *stubs;
403 label_addr_list *label_addrs;
404 recurse_entry *entries;
405 recurse_entry *currententry;
406 jump_list *partialmatch;
407 jump_list *quit;
408 jump_list *positive_assert_quit;
409 jump_list *forced_quit;
410 jump_list *accept;
411 jump_list *calllimit;
412 jump_list *stackalloc;
413 jump_list *revertframes;
414 jump_list *wordboundary;
415 jump_list *anynewline;
416 jump_list *hspace;
417 jump_list *vspace;
418 jump_list *casefulcmp;
419 jump_list *caselesscmp;
420 jump_list *reset_match;
421 BOOL jscript_compat;
422 #ifdef SUPPORT_UTF
423 BOOL utf;
424 #ifdef SUPPORT_UCP
425 BOOL use_ucp;
426 #endif
427 #ifdef COMPILE_PCRE8
428 jump_list *utfreadchar;
429 jump_list *utfreadchar16;
430 jump_list *utfreadtype8;
431 #endif
432 #endif /* SUPPORT_UTF */
433 #ifdef SUPPORT_UCP
434 jump_list *getucd;
435 #endif
436 } compiler_common;
437
438 /* For byte_sequence_compare. */
439
440 typedef struct compare_context {
441 int length;
442 int sourcereg;
443 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
444 int ucharptr;
445 union {
446 sljit_si asint;
447 sljit_uh asushort;
448 #if defined COMPILE_PCRE8
449 sljit_ub asbyte;
450 sljit_ub asuchars[4];
451 #elif defined COMPILE_PCRE16
452 sljit_uh asuchars[2];
453 #elif defined COMPILE_PCRE32
454 sljit_ui asuchars[1];
455 #endif
456 } c;
457 union {
458 sljit_si asint;
459 sljit_uh asushort;
460 #if defined COMPILE_PCRE8
461 sljit_ub asbyte;
462 sljit_ub asuchars[4];
463 #elif defined COMPILE_PCRE16
464 sljit_uh asuchars[2];
465 #elif defined COMPILE_PCRE32
466 sljit_ui asuchars[1];
467 #endif
468 } oc;
469 #endif
470 } compare_context;
471
472 /* Undefine sljit macros. */
473 #undef CMP
474
475 /* Used for accessing the elements of the stack. */
476 #define STACK(i) ((-(i) - 1) * (int)sizeof(sljit_sw))
477
478 #define TMP1 SLJIT_SCRATCH_REG1
479 #define TMP2 SLJIT_SCRATCH_REG3
480 #define TMP3 SLJIT_TEMPORARY_EREG2
481 #define STR_PTR SLJIT_SAVED_REG1
482 #define STR_END SLJIT_SAVED_REG2
483 #define STACK_TOP SLJIT_SCRATCH_REG2
484 #define STACK_LIMIT SLJIT_SAVED_REG3
485 #define ARGUMENTS SLJIT_SAVED_EREG1
486 #define COUNT_MATCH SLJIT_SAVED_EREG2
487 #define RETURN_ADDR SLJIT_TEMPORARY_EREG1
488
489 /* Local space layout. */
490 /* These two locals can be used by the current opcode. */
491 #define LOCALS0 (0 * sizeof(sljit_sw))
492 #define LOCALS1 (1 * sizeof(sljit_sw))
493 /* Two local variables for possessive quantifiers (char1 cannot use them). */
494 #define POSSESSIVE0 (2 * sizeof(sljit_sw))
495 #define POSSESSIVE1 (3 * sizeof(sljit_sw))
496 /* Max limit of recursions. */
497 #define LIMIT_MATCH (4 * sizeof(sljit_sw))
498 /* The output vector is stored on the stack, and contains pointers
499 to characters. The vector data is divided into two groups: the first
500 group contains the start / end character pointers, and the second is
501 the start pointers when the end of the capturing group has not yet reached. */
502 #define OVECTOR_START (common->ovector_start)
503 #define OVECTOR(i) (OVECTOR_START + (i) * (sljit_sw)sizeof(sljit_sw))
504 #define OVECTOR_PRIV(i) (common->cbra_ptr + (i) * (sljit_sw)sizeof(sljit_sw))
505 #define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start])
506
507 #if defined COMPILE_PCRE8
508 #define MOV_UCHAR SLJIT_MOV_UB
509 #define MOVU_UCHAR SLJIT_MOVU_UB
510 #elif defined COMPILE_PCRE16
511 #define MOV_UCHAR SLJIT_MOV_UH
512 #define MOVU_UCHAR SLJIT_MOVU_UH
513 #elif defined COMPILE_PCRE32
514 #define MOV_UCHAR SLJIT_MOV_UI
515 #define MOVU_UCHAR SLJIT_MOVU_UI
516 #else
517 #error Unsupported compiling mode
518 #endif
519
520 /* Shortcuts. */
521 #define DEFINE_COMPILER \
522 struct sljit_compiler *compiler = common->compiler
523 #define OP1(op, dst, dstw, src, srcw) \
524 sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw))
525 #define OP2(op, dst, dstw, src1, src1w, src2, src2w) \
526 sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w))
527 #define LABEL() \
528 sljit_emit_label(compiler)
529 #define JUMP(type) \
530 sljit_emit_jump(compiler, (type))
531 #define JUMPTO(type, label) \
532 sljit_set_label(sljit_emit_jump(compiler, (type)), (label))
533 #define JUMPHERE(jump) \
534 sljit_set_label((jump), sljit_emit_label(compiler))
535 #define SET_LABEL(jump, label) \
536 sljit_set_label((jump), (label))
537 #define CMP(type, src1, src1w, src2, src2w) \
538 sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))
539 #define CMPTO(type, src1, src1w, src2, src2w, label) \
540 sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))
541 #define OP_FLAGS(op, dst, dstw, src, srcw, type) \
542 sljit_emit_op_flags(compiler, (op), (dst), (dstw), (src), (srcw), (type))
543 #define GET_LOCAL_BASE(dst, dstw, offset) \
544 sljit_get_local_base(compiler, (dst), (dstw), (offset))
545
546 #define READ_CHAR_MAX 0x7fffffff
547
548 static pcre_uchar* bracketend(pcre_uchar* cc)
549 {
550 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
551 do cc += GET(cc, 1); while (*cc == OP_ALT);
552 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
553 cc += 1 + LINK_SIZE;
554 return cc;
555 }
556
557 static int no_alternatives(pcre_uchar* cc)
558 {
559 int count = 0;
560 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
561 do
562 {
563 cc += GET(cc, 1);
564 count++;
565 }
566 while (*cc == OP_ALT);
567 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
568 return count;
569 }
570
571 static int ones_in_half_byte[16] = {
572 /* 0 */ 0, 1, 1, 2, /* 4 */ 1, 2, 2, 3,
573 /* 8 */ 1, 2, 2, 3, /* 12 */ 2, 3, 3, 4
574 };
575
576 /* Functions whose might need modification for all new supported opcodes:
577 next_opcode
578 check_opcode_types
579 set_private_data_ptrs
580 get_framesize
581 init_frame
582 get_private_data_copy_length
583 copy_private_data
584 compile_matchingpath
585 compile_backtrackingpath
586 */
587
588 static pcre_uchar *next_opcode(compiler_common *common, pcre_uchar *cc)
589 {
590 SLJIT_UNUSED_ARG(common);
591 switch(*cc)
592 {
593 case OP_SOD:
594 case OP_SOM:
595 case OP_SET_SOM:
596 case OP_NOT_WORD_BOUNDARY:
597 case OP_WORD_BOUNDARY:
598 case OP_NOT_DIGIT:
599 case OP_DIGIT:
600 case OP_NOT_WHITESPACE:
601 case OP_WHITESPACE:
602 case OP_NOT_WORDCHAR:
603 case OP_WORDCHAR:
604 case OP_ANY:
605 case OP_ALLANY:
606 case OP_NOTPROP:
607 case OP_PROP:
608 case OP_ANYNL:
609 case OP_NOT_HSPACE:
610 case OP_HSPACE:
611 case OP_NOT_VSPACE:
612 case OP_VSPACE:
613 case OP_EXTUNI:
614 case OP_EODN:
615 case OP_EOD:
616 case OP_CIRC:
617 case OP_CIRCM:
618 case OP_DOLL:
619 case OP_DOLLM:
620 case OP_CRSTAR:
621 case OP_CRMINSTAR:
622 case OP_CRPLUS:
623 case OP_CRMINPLUS:
624 case OP_CRQUERY:
625 case OP_CRMINQUERY:
626 case OP_CRRANGE:
627 case OP_CRMINRANGE:
628 case OP_CRPOSSTAR:
629 case OP_CRPOSPLUS:
630 case OP_CRPOSQUERY:
631 case OP_CRPOSRANGE:
632 case OP_CLASS:
633 case OP_NCLASS:
634 case OP_REF:
635 case OP_REFI:
636 case OP_DNREF:
637 case OP_DNREFI:
638 case OP_RECURSE:
639 case OP_CALLOUT:
640 case OP_ALT:
641 case OP_KET:
642 case OP_KETRMAX:
643 case OP_KETRMIN:
644 case OP_KETRPOS:
645 case OP_REVERSE:
646 case OP_ASSERT:
647 case OP_ASSERT_NOT:
648 case OP_ASSERTBACK:
649 case OP_ASSERTBACK_NOT:
650 case OP_ONCE:
651 case OP_ONCE_NC:
652 case OP_BRA:
653 case OP_BRAPOS:
654 case OP_CBRA:
655 case OP_CBRAPOS:
656 case OP_COND:
657 case OP_SBRA:
658 case OP_SBRAPOS:
659 case OP_SCBRA:
660 case OP_SCBRAPOS:
661 case OP_SCOND:
662 case OP_CREF:
663 case OP_DNCREF:
664 case OP_RREF:
665 case OP_DNRREF:
666 case OP_DEF:
667 case OP_BRAZERO:
668 case OP_BRAMINZERO:
669 case OP_BRAPOSZERO:
670 case OP_PRUNE:
671 case OP_SKIP:
672 case OP_THEN:
673 case OP_COMMIT:
674 case OP_FAIL:
675 case OP_ACCEPT:
676 case OP_ASSERT_ACCEPT:
677 case OP_CLOSE:
678 case OP_SKIPZERO:
679 return cc + PRIV(OP_lengths)[*cc];
680
681 case OP_CHAR:
682 case OP_CHARI:
683 case OP_NOT:
684 case OP_NOTI:
685 case OP_STAR:
686 case OP_MINSTAR:
687 case OP_PLUS:
688 case OP_MINPLUS:
689 case OP_QUERY:
690 case OP_MINQUERY:
691 case OP_UPTO:
692 case OP_MINUPTO:
693 case OP_EXACT:
694 case OP_POSSTAR:
695 case OP_POSPLUS:
696 case OP_POSQUERY:
697 case OP_POSUPTO:
698 case OP_STARI:
699 case OP_MINSTARI:
700 case OP_PLUSI:
701 case OP_MINPLUSI:
702 case OP_QUERYI:
703 case OP_MINQUERYI:
704 case OP_UPTOI:
705 case OP_MINUPTOI:
706 case OP_EXACTI:
707 case OP_POSSTARI:
708 case OP_POSPLUSI:
709 case OP_POSQUERYI:
710 case OP_POSUPTOI:
711 case OP_NOTSTAR:
712 case OP_NOTMINSTAR:
713 case OP_NOTPLUS:
714 case OP_NOTMINPLUS:
715 case OP_NOTQUERY:
716 case OP_NOTMINQUERY:
717 case OP_NOTUPTO:
718 case OP_NOTMINUPTO:
719 case OP_NOTEXACT:
720 case OP_NOTPOSSTAR:
721 case OP_NOTPOSPLUS:
722 case OP_NOTPOSQUERY:
723 case OP_NOTPOSUPTO:
724 case OP_NOTSTARI:
725 case OP_NOTMINSTARI:
726 case OP_NOTPLUSI:
727 case OP_NOTMINPLUSI:
728 case OP_NOTQUERYI:
729 case OP_NOTMINQUERYI:
730 case OP_NOTUPTOI:
731 case OP_NOTMINUPTOI:
732 case OP_NOTEXACTI:
733 case OP_NOTPOSSTARI:
734 case OP_NOTPOSPLUSI:
735 case OP_NOTPOSQUERYI:
736 case OP_NOTPOSUPTOI:
737 cc += PRIV(OP_lengths)[*cc];
738 #ifdef SUPPORT_UTF
739 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
740 #endif
741 return cc;
742
743 /* Special cases. */
744 case OP_TYPESTAR:
745 case OP_TYPEMINSTAR:
746 case OP_TYPEPLUS:
747 case OP_TYPEMINPLUS:
748 case OP_TYPEQUERY:
749 case OP_TYPEMINQUERY:
750 case OP_TYPEUPTO:
751 case OP_TYPEMINUPTO:
752 case OP_TYPEEXACT:
753 case OP_TYPEPOSSTAR:
754 case OP_TYPEPOSPLUS:
755 case OP_TYPEPOSQUERY:
756 case OP_TYPEPOSUPTO:
757 return cc + PRIV(OP_lengths)[*cc] - 1;
758
759 case OP_ANYBYTE:
760 #ifdef SUPPORT_UTF
761 if (common->utf) return NULL;
762 #endif
763 return cc + 1;
764
765 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
766 case OP_XCLASS:
767 return cc + GET(cc, 1);
768 #endif
769
770 case OP_MARK:
771 case OP_PRUNE_ARG:
772 case OP_SKIP_ARG:
773 case OP_THEN_ARG:
774 return cc + 1 + 2 + cc[1];
775
776 default:
777 /* All opcodes are supported now! */
778 SLJIT_ASSERT_STOP();
779 return NULL;
780 }
781 }
782
783 static BOOL check_opcode_types(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
784 {
785 int count;
786 pcre_uchar *slot;
787
788 /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
789 while (cc < ccend)
790 {
791 switch(*cc)
792 {
793 case OP_SET_SOM:
794 common->has_set_som = TRUE;
795 common->might_be_empty = TRUE;
796 cc += 1;
797 break;
798
799 case OP_REF:
800 case OP_REFI:
801 common->optimized_cbracket[GET2(cc, 1)] = 0;
802 cc += 1 + IMM2_SIZE;
803 break;
804
805 case OP_BRA:
806 case OP_CBRA:
807 case OP_SBRA:
808 case OP_SCBRA:
809 count = no_alternatives(cc);
810 if (count > 4)
811 common->read_only_data_size += count * sizeof(sljit_uw);
812 cc += 1 + LINK_SIZE + (*cc == OP_CBRA || *cc == OP_SCBRA ? IMM2_SIZE : 0);
813 break;
814
815 case OP_CBRAPOS:
816 case OP_SCBRAPOS:
817 common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] = 0;
818 cc += 1 + LINK_SIZE + IMM2_SIZE;
819 break;
820
821 case OP_COND:
822 case OP_SCOND:
823 /* Only AUTO_CALLOUT can insert this opcode. We do
824 not intend to support this case. */
825 if (cc[1 + LINK_SIZE] == OP_CALLOUT)
826 return FALSE;
827 cc += 1 + LINK_SIZE;
828 break;
829
830 case OP_CREF:
831 common->optimized_cbracket[GET2(cc, 1)] = 0;
832 cc += 1 + IMM2_SIZE;
833 break;
834
835 case OP_DNREF:
836 case OP_DNREFI:
837 case OP_DNCREF:
838 count = GET2(cc, 1 + IMM2_SIZE);
839 slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
840 while (count-- > 0)
841 {
842 common->optimized_cbracket[GET2(slot, 0)] = 0;
843 slot += common->name_entry_size;
844 }
845 cc += 1 + 2 * IMM2_SIZE;
846 break;
847
848 case OP_RECURSE:
849 /* Set its value only once. */
850 if (common->recursive_head_ptr == 0)
851 {
852 common->recursive_head_ptr = common->ovector_start;
853 common->ovector_start += sizeof(sljit_sw);
854 }
855 cc += 1 + LINK_SIZE;
856 break;
857
858 case OP_CALLOUT:
859 if (common->capture_last_ptr == 0)
860 {
861 common->capture_last_ptr = common->ovector_start;
862 common->ovector_start += sizeof(sljit_sw);
863 }
864 cc += 2 + 2 * LINK_SIZE;
865 break;
866
867 case OP_THEN_ARG:
868 common->has_then = TRUE;
869 common->control_head_ptr = 1;
870 /* Fall through. */
871
872 case OP_PRUNE_ARG:
873 common->needs_start_ptr = TRUE;
874 /* Fall through. */
875
876 case OP_MARK:
877 if (common->mark_ptr == 0)
878 {
879 common->mark_ptr = common->ovector_start;
880 common->ovector_start += sizeof(sljit_sw);
881 }
882 cc += 1 + 2 + cc[1];
883 break;
884
885 case OP_THEN:
886 common->has_then = TRUE;
887 common->control_head_ptr = 1;
888 /* Fall through. */
889
890 case OP_PRUNE:
891 case OP_SKIP:
892 common->needs_start_ptr = TRUE;
893 cc += 1;
894 break;
895
896 case OP_SKIP_ARG:
897 common->control_head_ptr = 1;
898 common->has_skip_arg = TRUE;
899 cc += 1 + 2 + cc[1];
900 break;
901
902 default:
903 cc = next_opcode(common, cc);
904 if (cc == NULL)
905 return FALSE;
906 break;
907 }
908 }
909 return TRUE;
910 }
911
912 static int get_class_iterator_size(pcre_uchar *cc)
913 {
914 switch(*cc)
915 {
916 case OP_CRSTAR:
917 case OP_CRPLUS:
918 return 2;
919
920 case OP_CRMINSTAR:
921 case OP_CRMINPLUS:
922 case OP_CRQUERY:
923 case OP_CRMINQUERY:
924 return 1;
925
926 case OP_CRRANGE:
927 case OP_CRMINRANGE:
928 if (GET2(cc, 1) == GET2(cc, 1 + IMM2_SIZE))
929 return 0;
930 return 2;
931
932 default:
933 return 0;
934 }
935 }
936
937 static BOOL detect_repeat(compiler_common *common, pcre_uchar *begin)
938 {
939 pcre_uchar *end = bracketend(begin);
940 pcre_uchar *next;
941 pcre_uchar *next_end;
942 pcre_uchar *max_end;
943 pcre_uchar type;
944 sljit_sw length = end - begin;
945 int min, max, i;
946
947 /* Detect fixed iterations first. */
948 if (end[-(1 + LINK_SIZE)] != OP_KET)
949 return FALSE;
950
951 /* Already detected repeat. */
952 if (common->private_data_ptrs[end - common->start - LINK_SIZE] != 0)
953 return TRUE;
954
955 next = end;
956 min = 1;
957 while (1)
958 {
959 if (*next != *begin)
960 break;
961 next_end = bracketend(next);
962 if (next_end - next != length || memcmp(begin, next, IN_UCHARS(length)) != 0)
963 break;
964 next = next_end;
965 min++;
966 }
967
968 if (min == 2)
969 return FALSE;
970
971 max = 0;
972 max_end = next;
973 if (*next == OP_BRAZERO || *next == OP_BRAMINZERO)
974 {
975 type = *next;
976 while (1)
977 {
978 if (next[0] != type || next[1] != OP_BRA || next[2 + LINK_SIZE] != *begin)
979 break;
980 next_end = bracketend(next + 2 + LINK_SIZE);
981 if (next_end - next != (length + 2 + LINK_SIZE) || memcmp(begin, next + 2 + LINK_SIZE, IN_UCHARS(length)) != 0)
982 break;
983 next = next_end;
984 max++;
985 }
986
987 if (next[0] == type && next[1] == *begin && max >= 1)
988 {
989 next_end = bracketend(next + 1);
990 if (next_end - next == (length + 1) && memcmp(begin, next + 1, IN_UCHARS(length)) == 0)
991 {
992 for (i = 0; i < max; i++, next_end += 1 + LINK_SIZE)
993 if (*next_end != OP_KET)
994 break;
995
996 if (i == max)
997 {
998 common->private_data_ptrs[max_end - common->start - LINK_SIZE] = next_end - max_end;
999 common->private_data_ptrs[max_end - common->start - LINK_SIZE + 1] = (type == OP_BRAZERO) ? OP_UPTO : OP_MINUPTO;
1000 /* +2 the original and the last. */
1001 common->private_data_ptrs[max_end - common->start - LINK_SIZE + 2] = max + 2;
1002 if (min == 1)
1003 return TRUE;
1004 min--;
1005 max_end -= (1 + LINK_SIZE) + GET(max_end, -LINK_SIZE);
1006 }
1007 }
1008 }
1009 }
1010
1011 if (min >= 3)
1012 {
1013 common->private_data_ptrs[end - common->start - LINK_SIZE] = max_end - end;
1014 common->private_data_ptrs[end - common->start - LINK_SIZE + 1] = OP_EXACT;
1015 common->private_data_ptrs[end - common->start - LINK_SIZE + 2] = min;
1016 return TRUE;
1017 }
1018
1019 return FALSE;
1020 }
1021
1022 #define CASE_ITERATOR_PRIVATE_DATA_1 \
1023 case OP_MINSTAR: \
1024 case OP_MINPLUS: \
1025 case OP_QUERY: \
1026 case OP_MINQUERY: \
1027 case OP_MINSTARI: \
1028 case OP_MINPLUSI: \
1029 case OP_QUERYI: \
1030 case OP_MINQUERYI: \
1031 case OP_NOTMINSTAR: \
1032 case OP_NOTMINPLUS: \
1033 case OP_NOTQUERY: \
1034 case OP_NOTMINQUERY: \
1035 case OP_NOTMINSTARI: \
1036 case OP_NOTMINPLUSI: \
1037 case OP_NOTQUERYI: \
1038 case OP_NOTMINQUERYI:
1039
1040 #define CASE_ITERATOR_PRIVATE_DATA_2A \
1041 case OP_STAR: \
1042 case OP_PLUS: \
1043 case OP_STARI: \
1044 case OP_PLUSI: \
1045 case OP_NOTSTAR: \
1046 case OP_NOTPLUS: \
1047 case OP_NOTSTARI: \
1048 case OP_NOTPLUSI:
1049
1050 #define CASE_ITERATOR_PRIVATE_DATA_2B \
1051 case OP_UPTO: \
1052 case OP_MINUPTO: \
1053 case OP_UPTOI: \
1054 case OP_MINUPTOI: \
1055 case OP_NOTUPTO: \
1056 case OP_NOTMINUPTO: \
1057 case OP_NOTUPTOI: \
1058 case OP_NOTMINUPTOI:
1059
1060 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_1 \
1061 case OP_TYPEMINSTAR: \
1062 case OP_TYPEMINPLUS: \
1063 case OP_TYPEQUERY: \
1064 case OP_TYPEMINQUERY:
1065
1066 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2A \
1067 case OP_TYPESTAR: \
1068 case OP_TYPEPLUS:
1069
1070 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2B \
1071 case OP_TYPEUPTO: \
1072 case OP_TYPEMINUPTO:
1073
1074 static void set_private_data_ptrs(compiler_common *common, int *private_data_start, pcre_uchar *ccend)
1075 {
1076 pcre_uchar *cc = common->start;
1077 pcre_uchar *alternative;
1078 pcre_uchar *end = NULL;
1079 int private_data_ptr = *private_data_start;
1080 int space, size, bracketlen;
1081
1082 while (cc < ccend)
1083 {
1084 space = 0;
1085 size = 0;
1086 bracketlen = 0;
1087 if (private_data_ptr > SLJIT_MAX_LOCAL_SIZE)
1088 return;
1089
1090 if (*cc == OP_ONCE || *cc == OP_ONCE_NC || *cc == OP_BRA || *cc == OP_CBRA || *cc == OP_COND)
1091 if (detect_repeat(common, cc))
1092 {
1093 /* These brackets are converted to repeats, so no global
1094 based single character repeat is allowed. */
1095 if (cc >= end)
1096 end = bracketend(cc);
1097 }
1098
1099 switch(*cc)
1100 {
1101 case OP_KET:
1102 if (common->private_data_ptrs[cc + 1 - common->start] != 0)
1103 {
1104 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1105 private_data_ptr += sizeof(sljit_sw);
1106 cc += common->private_data_ptrs[cc + 1 - common->start];
1107 }
1108 cc += 1 + LINK_SIZE;
1109 break;
1110
1111 case OP_ASSERT:
1112 case OP_ASSERT_NOT:
1113 case OP_ASSERTBACK:
1114 case OP_ASSERTBACK_NOT:
1115 case OP_ONCE:
1116 case OP_ONCE_NC:
1117 case OP_BRAPOS:
1118 case OP_SBRA:
1119 case OP_SBRAPOS:
1120 case OP_SCOND:
1121 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1122 private_data_ptr += sizeof(sljit_sw);
1123 bracketlen = 1 + LINK_SIZE;
1124 break;
1125
1126 case OP_CBRAPOS:
1127 case OP_SCBRAPOS:
1128 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1129 private_data_ptr += sizeof(sljit_sw);
1130 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1131 break;
1132
1133 case OP_COND:
1134 /* Might be a hidden SCOND. */
1135 alternative = cc + GET(cc, 1);
1136 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1137 {
1138 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1139 private_data_ptr += sizeof(sljit_sw);
1140 }
1141 bracketlen = 1 + LINK_SIZE;
1142 break;
1143
1144 case OP_BRA:
1145 bracketlen = 1 + LINK_SIZE;
1146 break;
1147
1148 case OP_CBRA:
1149 case OP_SCBRA:
1150 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1151 break;
1152
1153 CASE_ITERATOR_PRIVATE_DATA_1
1154 space = 1;
1155 size = -2;
1156 break;
1157
1158 CASE_ITERATOR_PRIVATE_DATA_2A
1159 space = 2;
1160 size = -2;
1161 break;
1162
1163 CASE_ITERATOR_PRIVATE_DATA_2B
1164 space = 2;
1165 size = -(2 + IMM2_SIZE);
1166 break;
1167
1168 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1169 space = 1;
1170 size = 1;
1171 break;
1172
1173 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1174 if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
1175 space = 2;
1176 size = 1;
1177 break;
1178
1179 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1180 if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
1181 space = 2;
1182 size = 1 + IMM2_SIZE;
1183 break;
1184
1185 case OP_CLASS:
1186 case OP_NCLASS:
1187 size += 1 + 32 / sizeof(pcre_uchar);
1188 space = get_class_iterator_size(cc + size);
1189 break;
1190
1191 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1192 case OP_XCLASS:
1193 size = GET(cc, 1);
1194 space = get_class_iterator_size(cc + size);
1195 break;
1196 #endif
1197
1198 default:
1199 cc = next_opcode(common, cc);
1200 SLJIT_ASSERT(cc != NULL);
1201 break;
1202 }
1203
1204 /* Character iterators, which are not inside a repeated bracket,
1205 gets a private slot instead of allocating it on the stack. */
1206 if (space > 0 && cc >= end)
1207 {
1208 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1209 private_data_ptr += sizeof(sljit_sw) * space;
1210 }
1211
1212 if (size != 0)
1213 {
1214 if (size < 0)
1215 {
1216 cc += -size;
1217 #ifdef SUPPORT_UTF
1218 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1219 #endif
1220 }
1221 else
1222 cc += size;
1223 }
1224
1225 if (bracketlen > 0)
1226 {
1227 if (cc >= end)
1228 {
1229 end = bracketend(cc);
1230 if (end[-1 - LINK_SIZE] == OP_KET)
1231 end = NULL;
1232 }
1233 cc += bracketlen;
1234 }
1235 }
1236 *private_data_start = private_data_ptr;
1237 }
1238
1239 /* Returns with a frame_types (always < 0) if no need for frame. */
1240 static int get_framesize(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL recursive, BOOL* needs_control_head)
1241 {
1242 int length = 0;
1243 int possessive = 0;
1244 BOOL stack_restore = FALSE;
1245 BOOL setsom_found = recursive;
1246 BOOL setmark_found = recursive;
1247 /* The last capture is a local variable even for recursions. */
1248 BOOL capture_last_found = FALSE;
1249
1250 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
1251 SLJIT_ASSERT(common->control_head_ptr != 0);
1252 *needs_control_head = TRUE;
1253 #else
1254 *needs_control_head = FALSE;
1255 #endif
1256
1257 if (ccend == NULL)
1258 {
1259 ccend = bracketend(cc) - (1 + LINK_SIZE);
1260 if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))
1261 {
1262 possessive = length = (common->capture_last_ptr != 0) ? 5 : 3;
1263 /* This is correct regardless of common->capture_last_ptr. */
1264 capture_last_found = TRUE;
1265 }
1266 cc = next_opcode(common, cc);
1267 }
1268
1269 SLJIT_ASSERT(cc != NULL);
1270 while (cc < ccend)
1271 switch(*cc)
1272 {
1273 case OP_SET_SOM:
1274 SLJIT_ASSERT(common->has_set_som);
1275 stack_restore = TRUE;
1276 if (!setsom_found)
1277 {
1278 length += 2;
1279 setsom_found = TRUE;
1280 }
1281 cc += 1;
1282 break;
1283
1284 case OP_MARK:
1285 case OP_PRUNE_ARG:
1286 case OP_THEN_ARG:
1287 SLJIT_ASSERT(common->mark_ptr != 0);
1288 stack_restore = TRUE;
1289 if (!setmark_found)
1290 {
1291 length += 2;
1292 setmark_found = TRUE;
1293 }
1294 if (common->control_head_ptr != 0)
1295 *needs_control_head = TRUE;
1296 cc += 1 + 2 + cc[1];
1297 break;
1298
1299 case OP_RECURSE:
1300 stack_restore = TRUE;
1301 if (common->has_set_som && !setsom_found)
1302 {
1303 length += 2;
1304 setsom_found = TRUE;
1305 }
1306 if (common->mark_ptr != 0 && !setmark_found)
1307 {
1308 length += 2;
1309 setmark_found = TRUE;
1310 }
1311 if (common->capture_last_ptr != 0 && !capture_last_found)
1312 {
1313 length += 2;
1314 capture_last_found = TRUE;
1315 }
1316 cc += 1 + LINK_SIZE;
1317 break;
1318
1319 case OP_CBRA:
1320 case OP_CBRAPOS:
1321 case OP_SCBRA:
1322 case OP_SCBRAPOS:
1323 stack_restore = TRUE;
1324 if (common->capture_last_ptr != 0 && !capture_last_found)
1325 {
1326 length += 2;
1327 capture_last_found = TRUE;
1328 }
1329 length += 3;
1330 cc += 1 + LINK_SIZE + IMM2_SIZE;
1331 break;
1332
1333 default:
1334 stack_restore = TRUE;
1335 /* Fall through. */
1336
1337 case OP_NOT_WORD_BOUNDARY:
1338 case OP_WORD_BOUNDARY:
1339 case OP_NOT_DIGIT:
1340 case OP_DIGIT:
1341 case OP_NOT_WHITESPACE:
1342 case OP_WHITESPACE:
1343 case OP_NOT_WORDCHAR:
1344 case OP_WORDCHAR:
1345 case OP_ANY:
1346 case OP_ALLANY:
1347 case OP_ANYBYTE:
1348 case OP_NOTPROP:
1349 case OP_PROP:
1350 case OP_ANYNL:
1351 case OP_NOT_HSPACE:
1352 case OP_HSPACE:
1353 case OP_NOT_VSPACE:
1354 case OP_VSPACE:
1355 case OP_EXTUNI:
1356 case OP_EODN:
1357 case OP_EOD:
1358 case OP_CIRC:
1359 case OP_CIRCM:
1360 case OP_DOLL:
1361 case OP_DOLLM:
1362 case OP_CHAR:
1363 case OP_CHARI:
1364 case OP_NOT:
1365 case OP_NOTI:
1366
1367 case OP_EXACT:
1368 case OP_POSSTAR:
1369 case OP_POSPLUS:
1370 case OP_POSQUERY:
1371 case OP_POSUPTO:
1372
1373 case OP_EXACTI:
1374 case OP_POSSTARI:
1375 case OP_POSPLUSI:
1376 case OP_POSQUERYI:
1377 case OP_POSUPTOI:
1378
1379 case OP_NOTEXACT:
1380 case OP_NOTPOSSTAR:
1381 case OP_NOTPOSPLUS:
1382 case OP_NOTPOSQUERY:
1383 case OP_NOTPOSUPTO:
1384
1385 case OP_NOTEXACTI:
1386 case OP_NOTPOSSTARI:
1387 case OP_NOTPOSPLUSI:
1388 case OP_NOTPOSQUERYI:
1389 case OP_NOTPOSUPTOI:
1390
1391 case OP_TYPEEXACT:
1392 case OP_TYPEPOSSTAR:
1393 case OP_TYPEPOSPLUS:
1394 case OP_TYPEPOSQUERY:
1395 case OP_TYPEPOSUPTO:
1396
1397 case OP_CLASS:
1398 case OP_NCLASS:
1399 case OP_XCLASS:
1400
1401 cc = next_opcode(common, cc);
1402 SLJIT_ASSERT(cc != NULL);
1403 break;
1404 }
1405
1406 /* Possessive quantifiers can use a special case. */
1407 if (SLJIT_UNLIKELY(possessive == length))
1408 return stack_restore ? no_frame : no_stack;
1409
1410 if (length > 0)
1411 return length + 1;
1412 return stack_restore ? no_frame : no_stack;
1413 }
1414
1415 static void init_frame(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, int stackpos, int stacktop, BOOL recursive)
1416 {
1417 DEFINE_COMPILER;
1418 BOOL setsom_found = recursive;
1419 BOOL setmark_found = recursive;
1420 /* The last capture is a local variable even for recursions. */
1421 BOOL capture_last_found = FALSE;
1422 int offset;
1423
1424 /* >= 1 + shortest item size (2) */
1425 SLJIT_UNUSED_ARG(stacktop);
1426 SLJIT_ASSERT(stackpos >= stacktop + 2);
1427
1428 stackpos = STACK(stackpos);
1429 if (ccend == NULL)
1430 {
1431 ccend = bracketend(cc) - (1 + LINK_SIZE);
1432 if (recursive || (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS))
1433 cc = next_opcode(common, cc);
1434 }
1435
1436 SLJIT_ASSERT(cc != NULL);
1437 while (cc < ccend)
1438 switch(*cc)
1439 {
1440 case OP_SET_SOM:
1441 SLJIT_ASSERT(common->has_set_som);
1442 if (!setsom_found)
1443 {
1444 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
1445 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1446 stackpos += (int)sizeof(sljit_sw);
1447 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1448 stackpos += (int)sizeof(sljit_sw);
1449 setsom_found = TRUE;
1450 }
1451 cc += 1;
1452 break;
1453
1454 case OP_MARK:
1455 case OP_PRUNE_ARG:
1456 case OP_THEN_ARG:
1457 SLJIT_ASSERT(common->mark_ptr != 0);
1458 if (!setmark_found)
1459 {
1460 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
1461 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1462 stackpos += (int)sizeof(sljit_sw);
1463 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1464 stackpos += (int)sizeof(sljit_sw);
1465 setmark_found = TRUE;
1466 }
1467 cc += 1 + 2 + cc[1];
1468 break;
1469
1470 case OP_RECURSE:
1471 if (common->has_set_som && !setsom_found)
1472 {
1473 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
1474 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1475 stackpos += (int)sizeof(sljit_sw);
1476 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1477 stackpos += (int)sizeof(sljit_sw);
1478 setsom_found = TRUE;
1479 }
1480 if (common->mark_ptr != 0 && !setmark_found)
1481 {
1482 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
1483 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1484 stackpos += (int)sizeof(sljit_sw);
1485 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1486 stackpos += (int)sizeof(sljit_sw);
1487 setmark_found = TRUE;
1488 }
1489 if (common->capture_last_ptr != 0 && !capture_last_found)
1490 {
1491 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
1492 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1493 stackpos += (int)sizeof(sljit_sw);
1494 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1495 stackpos += (int)sizeof(sljit_sw);
1496 capture_last_found = TRUE;
1497 }
1498 cc += 1 + LINK_SIZE;
1499 break;
1500
1501 case OP_CBRA:
1502 case OP_CBRAPOS:
1503 case OP_SCBRA:
1504 case OP_SCBRAPOS:
1505 if (common->capture_last_ptr != 0 && !capture_last_found)
1506 {
1507 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
1508 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1509 stackpos += (int)sizeof(sljit_sw);
1510 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1511 stackpos += (int)sizeof(sljit_sw);
1512 capture_last_found = TRUE;
1513 }
1514 offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
1515 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));
1516 stackpos += (int)sizeof(sljit_sw);
1517 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
1518 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
1519 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1520 stackpos += (int)sizeof(sljit_sw);
1521 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
1522 stackpos += (int)sizeof(sljit_sw);
1523
1524 cc += 1 + LINK_SIZE + IMM2_SIZE;
1525 break;
1526
1527 default:
1528 cc = next_opcode(common, cc);
1529 SLJIT_ASSERT(cc != NULL);
1530 break;
1531 }
1532
1533 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, 0);
1534 SLJIT_ASSERT(stackpos == STACK(stacktop));
1535 }
1536
1537 static SLJIT_INLINE int get_private_data_copy_length(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL needs_control_head)
1538 {
1539 int private_data_length = needs_control_head ? 3 : 2;
1540 int size;
1541 pcre_uchar *alternative;
1542 /* Calculate the sum of the private machine words. */
1543 while (cc < ccend)
1544 {
1545 size = 0;
1546 switch(*cc)
1547 {
1548 case OP_KET:
1549 if (PRIVATE_DATA(cc) != 0)
1550 private_data_length++;
1551 cc += 1 + LINK_SIZE;
1552 break;
1553
1554 case OP_ASSERT:
1555 case OP_ASSERT_NOT:
1556 case OP_ASSERTBACK:
1557 case OP_ASSERTBACK_NOT:
1558 case OP_ONCE:
1559 case OP_ONCE_NC:
1560 case OP_BRAPOS:
1561 case OP_SBRA:
1562 case OP_SBRAPOS:
1563 case OP_SCOND:
1564 private_data_length++;
1565 cc += 1 + LINK_SIZE;
1566 break;
1567
1568 case OP_CBRA:
1569 case OP_SCBRA:
1570 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1571 private_data_length++;
1572 cc += 1 + LINK_SIZE + IMM2_SIZE;
1573 break;
1574
1575 case OP_CBRAPOS:
1576 case OP_SCBRAPOS:
1577 private_data_length += 2;
1578 cc += 1 + LINK_SIZE + IMM2_SIZE;
1579 break;
1580
1581 case OP_COND:
1582 /* Might be a hidden SCOND. */
1583 alternative = cc + GET(cc, 1);
1584 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1585 private_data_length++;
1586 cc += 1 + LINK_SIZE;
1587 break;
1588
1589 CASE_ITERATOR_PRIVATE_DATA_1
1590 if (PRIVATE_DATA(cc))
1591 private_data_length++;
1592 cc += 2;
1593 #ifdef SUPPORT_UTF
1594 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1595 #endif
1596 break;
1597
1598 CASE_ITERATOR_PRIVATE_DATA_2A
1599 if (PRIVATE_DATA(cc))
1600 private_data_length += 2;
1601 cc += 2;
1602 #ifdef SUPPORT_UTF
1603 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1604 #endif
1605 break;
1606
1607 CASE_ITERATOR_PRIVATE_DATA_2B
1608 if (PRIVATE_DATA(cc))
1609 private_data_length += 2;
1610 cc += 2 + IMM2_SIZE;
1611 #ifdef SUPPORT_UTF
1612 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1613 #endif
1614 break;
1615
1616 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1617 if (PRIVATE_DATA(cc))
1618 private_data_length++;
1619 cc += 1;
1620 break;
1621
1622 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1623 if (PRIVATE_DATA(cc))
1624 private_data_length += 2;
1625 cc += 1;
1626 break;
1627
1628 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1629 if (PRIVATE_DATA(cc))
1630 private_data_length += 2;
1631 cc += 1 + IMM2_SIZE;
1632 break;
1633
1634 case OP_CLASS:
1635 case OP_NCLASS:
1636 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1637 case OP_XCLASS:
1638 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1639 #else
1640 size = 1 + 32 / (int)sizeof(pcre_uchar);
1641 #endif
1642 if (PRIVATE_DATA(cc))
1643 private_data_length += get_class_iterator_size(cc + size);
1644 cc += size;
1645 break;
1646
1647 default:
1648 cc = next_opcode(common, cc);
1649 SLJIT_ASSERT(cc != NULL);
1650 break;
1651 }
1652 }
1653 SLJIT_ASSERT(cc == ccend);
1654 return private_data_length;
1655 }
1656
1657 static void copy_private_data(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend,
1658 BOOL save, int stackptr, int stacktop, BOOL needs_control_head)
1659 {
1660 DEFINE_COMPILER;
1661 int srcw[2];
1662 int count, size;
1663 BOOL tmp1next = TRUE;
1664 BOOL tmp1empty = TRUE;
1665 BOOL tmp2empty = TRUE;
1666 pcre_uchar *alternative;
1667 enum {
1668 start,
1669 loop,
1670 end
1671 } status;
1672
1673 status = save ? start : loop;
1674 stackptr = STACK(stackptr - 2);
1675 stacktop = STACK(stacktop - 1);
1676
1677 if (!save)
1678 {
1679 stackptr += (needs_control_head ? 2 : 1) * sizeof(sljit_sw);
1680 if (stackptr < stacktop)
1681 {
1682 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1683 stackptr += sizeof(sljit_sw);
1684 tmp1empty = FALSE;
1685 }
1686 if (stackptr < stacktop)
1687 {
1688 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1689 stackptr += sizeof(sljit_sw);
1690 tmp2empty = FALSE;
1691 }
1692 /* The tmp1next must be TRUE in either way. */
1693 }
1694
1695 do
1696 {
1697 count = 0;
1698 switch(status)
1699 {
1700 case start:
1701 SLJIT_ASSERT(save && common->recursive_head_ptr != 0);
1702 count = 1;
1703 srcw[0] = common->recursive_head_ptr;
1704 if (needs_control_head)
1705 {
1706 SLJIT_ASSERT(common->control_head_ptr != 0);
1707 count = 2;
1708 srcw[1] = common->control_head_ptr;
1709 }
1710 status = loop;
1711 break;
1712
1713 case loop:
1714 if (cc >= ccend)
1715 {
1716 status = end;
1717 break;
1718 }
1719
1720 switch(*cc)
1721 {
1722 case OP_KET:
1723 if (PRIVATE_DATA(cc) != 0)
1724 {
1725 count = 1;
1726 srcw[0] = PRIVATE_DATA(cc);
1727 }
1728 cc += 1 + LINK_SIZE;
1729 break;
1730
1731 case OP_ASSERT:
1732 case OP_ASSERT_NOT:
1733 case OP_ASSERTBACK:
1734 case OP_ASSERTBACK_NOT:
1735 case OP_ONCE:
1736 case OP_ONCE_NC:
1737 case OP_BRAPOS:
1738 case OP_SBRA:
1739 case OP_SBRAPOS:
1740 case OP_SCOND:
1741 count = 1;
1742 srcw[0] = PRIVATE_DATA(cc);
1743 SLJIT_ASSERT(srcw[0] != 0);
1744 cc += 1 + LINK_SIZE;
1745 break;
1746
1747 case OP_CBRA:
1748 case OP_SCBRA:
1749 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1750 {
1751 count = 1;
1752 srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1753 }
1754 cc += 1 + LINK_SIZE + IMM2_SIZE;
1755 break;
1756
1757 case OP_CBRAPOS:
1758 case OP_SCBRAPOS:
1759 count = 2;
1760 srcw[0] = PRIVATE_DATA(cc);
1761 srcw[1] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1762 SLJIT_ASSERT(srcw[0] != 0 && srcw[1] != 0);
1763 cc += 1 + LINK_SIZE + IMM2_SIZE;
1764 break;
1765
1766 case OP_COND:
1767 /* Might be a hidden SCOND. */
1768 alternative = cc + GET(cc, 1);
1769 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1770 {
1771 count = 1;
1772 srcw[0] = PRIVATE_DATA(cc);
1773 SLJIT_ASSERT(srcw[0] != 0);
1774 }
1775 cc += 1 + LINK_SIZE;
1776 break;
1777
1778 CASE_ITERATOR_PRIVATE_DATA_1
1779 if (PRIVATE_DATA(cc))
1780 {
1781 count = 1;
1782 srcw[0] = PRIVATE_DATA(cc);
1783 }
1784 cc += 2;
1785 #ifdef SUPPORT_UTF
1786 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1787 #endif
1788 break;
1789
1790 CASE_ITERATOR_PRIVATE_DATA_2A
1791 if (PRIVATE_DATA(cc))
1792 {
1793 count = 2;
1794 srcw[0] = PRIVATE_DATA(cc);
1795 srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
1796 }
1797 cc += 2;
1798 #ifdef SUPPORT_UTF
1799 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1800 #endif
1801 break;
1802
1803 CASE_ITERATOR_PRIVATE_DATA_2B
1804 if (PRIVATE_DATA(cc))
1805 {
1806 count = 2;
1807 srcw[0] = PRIVATE_DATA(cc);
1808 srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
1809 }
1810 cc += 2 + IMM2_SIZE;
1811 #ifdef SUPPORT_UTF
1812 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1813 #endif
1814 break;
1815
1816 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1817 if (PRIVATE_DATA(cc))
1818 {
1819 count = 1;
1820 srcw[0] = PRIVATE_DATA(cc);
1821 }
1822 cc += 1;
1823 break;
1824
1825 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1826 if (PRIVATE_DATA(cc))
1827 {
1828 count = 2;
1829 srcw[0] = PRIVATE_DATA(cc);
1830 srcw[1] = srcw[0] + sizeof(sljit_sw);
1831 }
1832 cc += 1;
1833 break;
1834
1835 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1836 if (PRIVATE_DATA(cc))
1837 {
1838 count = 2;
1839 srcw[0] = PRIVATE_DATA(cc);
1840 srcw[1] = srcw[0] + sizeof(sljit_sw);
1841 }
1842 cc += 1 + IMM2_SIZE;
1843 break;
1844
1845 case OP_CLASS:
1846 case OP_NCLASS:
1847 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1848 case OP_XCLASS:
1849 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1850 #else
1851 size = 1 + 32 / (int)sizeof(pcre_uchar);
1852 #endif
1853 if (PRIVATE_DATA(cc))
1854 switch(get_class_iterator_size(cc + size))
1855 {
1856 case 1:
1857 count = 1;
1858 srcw[0] = PRIVATE_DATA(cc);
1859 break;
1860
1861 case 2:
1862 count = 2;
1863 srcw[0] = PRIVATE_DATA(cc);
1864 srcw[1] = srcw[0] + sizeof(sljit_sw);
1865 break;
1866
1867 default:
1868 SLJIT_ASSERT_STOP();
1869 break;
1870 }
1871 cc += size;
1872 break;
1873
1874 default:
1875 cc = next_opcode(common, cc);
1876 SLJIT_ASSERT(cc != NULL);
1877 break;
1878 }
1879 break;
1880
1881 case end:
1882 SLJIT_ASSERT_STOP();
1883 break;
1884 }
1885
1886 while (count > 0)
1887 {
1888 count--;
1889 if (save)
1890 {
1891 if (tmp1next)
1892 {
1893 if (!tmp1empty)
1894 {
1895 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1896 stackptr += sizeof(sljit_sw);
1897 }
1898 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);
1899 tmp1empty = FALSE;
1900 tmp1next = FALSE;
1901 }
1902 else
1903 {
1904 if (!tmp2empty)
1905 {
1906 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1907 stackptr += sizeof(sljit_sw);
1908 }
1909 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);
1910 tmp2empty = FALSE;
1911 tmp1next = TRUE;
1912 }
1913 }
1914 else
1915 {
1916 if (tmp1next)
1917 {
1918 SLJIT_ASSERT(!tmp1empty);
1919 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP1, 0);
1920 tmp1empty = stackptr >= stacktop;
1921 if (!tmp1empty)
1922 {
1923 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1924 stackptr += sizeof(sljit_sw);
1925 }
1926 tmp1next = FALSE;
1927 }
1928 else
1929 {
1930 SLJIT_ASSERT(!tmp2empty);
1931 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP2, 0);
1932 tmp2empty = stackptr >= stacktop;
1933 if (!tmp2empty)
1934 {
1935 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1936 stackptr += sizeof(sljit_sw);
1937 }
1938 tmp1next = TRUE;
1939 }
1940 }
1941 }
1942 }
1943 while (status != end);
1944
1945 if (save)
1946 {
1947 if (tmp1next)
1948 {
1949 if (!tmp1empty)
1950 {
1951 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1952 stackptr += sizeof(sljit_sw);
1953 }
1954 if (!tmp2empty)
1955 {
1956 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1957 stackptr += sizeof(sljit_sw);
1958 }
1959 }
1960 else
1961 {
1962 if (!tmp2empty)
1963 {
1964 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1965 stackptr += sizeof(sljit_sw);
1966 }
1967 if (!tmp1empty)
1968 {
1969 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1970 stackptr += sizeof(sljit_sw);
1971 }
1972 }
1973 }
1974 SLJIT_ASSERT(cc == ccend && stackptr == stacktop && (save || (tmp1empty && tmp2empty)));
1975 }
1976
1977 static SLJIT_INLINE pcre_uchar *set_then_offsets(compiler_common *common, pcre_uchar *cc, pcre_uint8 *current_offset)
1978 {
1979 pcre_uchar *end = bracketend(cc);
1980 BOOL has_alternatives = cc[GET(cc, 1)] == OP_ALT;
1981
1982 /* Assert captures then. */
1983 if (*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT)
1984 current_offset = NULL;
1985 /* Conditional block does not. */
1986 if (*cc == OP_COND || *cc == OP_SCOND)
1987 has_alternatives = FALSE;
1988
1989 cc = next_opcode(common, cc);
1990 if (has_alternatives)
1991 current_offset = common->then_offsets + (cc - common->start);
1992
1993 while (cc < end)
1994 {
1995 if ((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND))
1996 cc = set_then_offsets(common, cc, current_offset);
1997 else
1998 {
1999 if (*cc == OP_ALT && has_alternatives)
2000 current_offset = common->then_offsets + (cc + 1 + LINK_SIZE - common->start);
2001 if (*cc >= OP_THEN && *cc <= OP_THEN_ARG && current_offset != NULL)
2002 *current_offset = 1;
2003 cc = next_opcode(common, cc);
2004 }
2005 }
2006
2007 return end;
2008 }
2009
2010 #undef CASE_ITERATOR_PRIVATE_DATA_1
2011 #undef CASE_ITERATOR_PRIVATE_DATA_2A
2012 #undef CASE_ITERATOR_PRIVATE_DATA_2B
2013 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_1
2014 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
2015 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
2016
2017 static SLJIT_INLINE BOOL is_powerof2(unsigned int value)
2018 {
2019 return (value & (value - 1)) == 0;
2020 }
2021
2022 static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label)
2023 {
2024 while (list)
2025 {
2026 /* sljit_set_label is clever enough to do nothing
2027 if either the jump or the label is NULL. */
2028 SET_LABEL(list->jump, label);
2029 list = list->next;
2030 }
2031 }
2032
2033 static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump* jump)
2034 {
2035 jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list));
2036 if (list_item)
2037 {
2038 list_item->next = *list;
2039 list_item->jump = jump;
2040 *list = list_item;
2041 }
2042 }
2043
2044 static void add_stub(compiler_common *common, struct sljit_jump *start)
2045 {
2046 DEFINE_COMPILER;
2047 stub_list* list_item = sljit_alloc_memory(compiler, sizeof(stub_list));
2048
2049 if (list_item)
2050 {
2051 list_item->start = start;
2052 list_item->quit = LABEL();
2053 list_item->next = common->stubs;
2054 common->stubs = list_item;
2055 }
2056 }
2057
2058 static void flush_stubs(compiler_common *common)
2059 {
2060 DEFINE_COMPILER;
2061 stub_list* list_item = common->stubs;
2062
2063 while (list_item)
2064 {
2065 JUMPHERE(list_item->start);
2066 add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));
2067 JUMPTO(SLJIT_JUMP, list_item->quit);
2068 list_item = list_item->next;
2069 }
2070 common->stubs = NULL;
2071 }
2072
2073 static void add_label_addr(compiler_common *common)
2074 {
2075 DEFINE_COMPILER;
2076 label_addr_list *label_addr;
2077
2078 label_addr = sljit_alloc_memory(compiler, sizeof(label_addr_list));
2079 if (label_addr == NULL)
2080 return;
2081 label_addr->label = LABEL();
2082 label_addr->addr = common->read_only_data_ptr;
2083 label_addr->next = common->label_addrs;
2084 common->label_addrs = label_addr;
2085 common->read_only_data_ptr++;
2086 }
2087
2088 static SLJIT_INLINE void count_match(compiler_common *common)
2089 {
2090 DEFINE_COMPILER;
2091
2092 OP2(SLJIT_SUB | SLJIT_SET_E, COUNT_MATCH, 0, COUNT_MATCH, 0, SLJIT_IMM, 1);
2093 add_jump(compiler, &common->calllimit, JUMP(SLJIT_C_ZERO));
2094 }
2095
2096 static SLJIT_INLINE void allocate_stack(compiler_common *common, int size)
2097 {
2098 /* May destroy all locals and registers except TMP2. */
2099 DEFINE_COMPILER;
2100
2101 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
2102 #ifdef DESTROY_REGISTERS
2103 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);
2104 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
2105 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
2106 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, TMP1, 0);
2107 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);
2108 #endif
2109 add_stub(common, CMP(SLJIT_C_GREATER, STACK_TOP, 0, STACK_LIMIT, 0));
2110 }
2111
2112 static SLJIT_INLINE void free_stack(compiler_common *common, int size)
2113 {
2114 DEFINE_COMPILER;
2115 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
2116 }
2117
2118 static SLJIT_INLINE void reset_ovector(compiler_common *common, int length)
2119 {
2120 DEFINE_COMPILER;
2121 struct sljit_label *loop;
2122 int i;
2123
2124 /* At this point we can freely use all temporary registers. */
2125 SLJIT_ASSERT(length > 1);
2126 /* TMP1 returns with begin - 1. */
2127 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG1, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));
2128 if (length < 8)
2129 {
2130 for (i = 1; i < length; i++)
2131 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(i), SLJIT_SCRATCH_REG1, 0);
2132 }
2133 else
2134 {
2135 GET_LOCAL_BASE(SLJIT_SCRATCH_REG2, 0, OVECTOR_START);
2136 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, length - 1);
2137 loop = LABEL();
2138 OP1(SLJIT_MOVU, SLJIT_MEM1(SLJIT_SCRATCH_REG2), sizeof(sljit_sw), SLJIT_SCRATCH_REG1, 0);
2139 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 1);
2140 JUMPTO(SLJIT_C_NOT_ZERO, loop);
2141 }
2142 }
2143
2144 static SLJIT_INLINE void do_reset_match(compiler_common *common, int length)
2145 {
2146 DEFINE_COMPILER;
2147 struct sljit_label *loop;
2148 int i;
2149
2150 SLJIT_ASSERT(length > 1);
2151 /* OVECTOR(1) contains the "string begin - 1" constant. */
2152 if (length > 2)
2153 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
2154 if (length < 8)
2155 {
2156 for (i = 2; i < length; i++)
2157 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(i), TMP1, 0);
2158 }
2159 else
2160 {
2161 GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + sizeof(sljit_sw));
2162 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
2163 loop = LABEL();
2164 OP1(SLJIT_MOVU, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
2165 OP2(SLJIT_SUB | SLJIT_SET_E, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
2166 JUMPTO(SLJIT_C_NOT_ZERO, loop);
2167 }
2168
2169 OP1(SLJIT_MOV, STACK_TOP, 0, ARGUMENTS, 0);
2170 if (common->mark_ptr != 0)
2171 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr, SLJIT_IMM, 0);
2172 if (common->control_head_ptr != 0)
2173 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_IMM, 0);
2174 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(jit_arguments, stack));
2175 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_ptr);
2176 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(struct sljit_stack, base));
2177 }
2178
2179 static sljit_sw SLJIT_CALL do_search_mark(sljit_sw *current, const pcre_uchar *skip_arg)
2180 {
2181 while (current != NULL)
2182 {
2183 switch (current[-2])
2184 {
2185 case type_then_trap:
2186 break;
2187
2188 case type_mark:
2189 if (STRCMP_UC_UC(skip_arg, (pcre_uchar *)current[-3]) == 0)
2190 return current[-4];
2191 break;
2192
2193 default:
2194 SLJIT_ASSERT_STOP();
2195 break;
2196 }
2197 current = (sljit_sw*)current[-1];
2198 }
2199 return -1;
2200 }
2201
2202 static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)
2203 {
2204 DEFINE_COMPILER;
2205 struct sljit_label *loop;
2206 struct sljit_jump *early_quit;
2207
2208 /* At this point we can freely use all registers. */
2209 OP1(SLJIT_MOV, SLJIT_SAVED_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
2210 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1), STR_PTR, 0);
2211
2212 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, ARGUMENTS, 0);
2213 if (common->mark_ptr != 0)
2214 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
2215 OP1(SLJIT_MOV_SI, SLJIT_SCRATCH_REG2, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, offset_count));
2216 if (common->mark_ptr != 0)
2217 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_SCRATCH_REG3, 0);
2218 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, offsets), SLJIT_IMM, sizeof(int));
2219 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, begin));
2220 GET_LOCAL_BASE(SLJIT_SAVED_REG1, 0, OVECTOR_START);
2221 /* Unlikely, but possible */
2222 early_quit = CMP(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, 0);
2223 loop = LABEL();
2224 OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), 0, SLJIT_SCRATCH_REG1, 0);
2225 OP2(SLJIT_ADD, SLJIT_SAVED_REG1, 0, SLJIT_SAVED_REG1, 0, SLJIT_IMM, sizeof(sljit_sw));
2226 /* Copy the integer value to the output buffer */
2227 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2228 OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, UCHAR_SHIFT);
2229 #endif
2230 OP1(SLJIT_MOVU_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG3), sizeof(int), SLJIT_SAVED_REG2, 0);
2231 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_SCRATCH_REG2, 0, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, 1);
2232 JUMPTO(SLJIT_C_NOT_ZERO, loop);
2233 JUMPHERE(early_quit);
2234
2235 /* Calculate the return value, which is the maximum ovector value. */
2236 if (topbracket > 1)
2237 {
2238 GET_LOCAL_BASE(SLJIT_SCRATCH_REG1, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_sw));
2239 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, topbracket + 1);
2240
2241 /* OVECTOR(0) is never equal to SLJIT_SAVED_REG3. */
2242 loop = LABEL();
2243 OP1(SLJIT_MOVU, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), -(2 * (sljit_sw)sizeof(sljit_sw)));
2244 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG2, 0, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, 1);
2245 CMPTO(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG3, 0, SLJIT_SAVED_REG3, 0, loop);
2246 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_SCRATCH_REG2, 0);
2247 }
2248 else
2249 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
2250 }
2251
2252 static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *quit)
2253 {
2254 DEFINE_COMPILER;
2255 struct sljit_jump *jump;
2256
2257 SLJIT_COMPILE_ASSERT(STR_END == SLJIT_SAVED_REG2, str_end_must_be_saved_reg2);
2258 SLJIT_ASSERT(common->start_used_ptr != 0 && common->start_ptr != 0
2259 && (common->mode == JIT_PARTIAL_SOFT_COMPILE ? common->hit_start != 0 : common->hit_start == 0));
2260
2261 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, ARGUMENTS, 0);
2262 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_PARTIAL);
2263 OP1(SLJIT_MOV_SI, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, real_offset_count));
2264 CMPTO(SLJIT_C_SIG_LESS, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 2, quit);
2265
2266 /* Store match begin and end. */
2267 OP1(SLJIT_MOV, SLJIT_SAVED_REG1, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, begin));
2268 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, offsets));
2269
2270 jump = CMP(SLJIT_C_SIG_LESS, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 3);
2271 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_ptr : (common->hit_start + (int)sizeof(sljit_sw)), SLJIT_SAVED_REG1, 0);
2272 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2273 OP2(SLJIT_ASHR, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, UCHAR_SHIFT);
2274 #endif
2275 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG2), 2 * sizeof(int), SLJIT_SCRATCH_REG3, 0);
2276 JUMPHERE(jump);
2277
2278 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_used_ptr : common->hit_start);
2279 OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, STR_END, 0, SLJIT_SAVED_REG1, 0);
2280 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2281 OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, UCHAR_SHIFT);
2282 #endif
2283 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG2), sizeof(int), SLJIT_SAVED_REG2, 0);
2284
2285 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_SAVED_REG1, 0);
2286 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2287 OP2(SLJIT_ASHR, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, UCHAR_SHIFT);
2288 #endif
2289 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG2), 0, SLJIT_SCRATCH_REG3, 0);
2290
2291 JUMPTO(SLJIT_JUMP, quit);
2292 }
2293
2294 static SLJIT_INLINE void check_start_used_ptr(compiler_common *common)
2295 {
2296 /* May destroy TMP1. */
2297 DEFINE_COMPILER;
2298 struct sljit_jump *jump;
2299
2300 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2301 {
2302 /* The value of -1 must be kept for start_used_ptr! */
2303 OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, 1);
2304 /* Jumps if start_used_ptr < STR_PTR, or start_used_ptr == -1. Although overwriting
2305 is not necessary if start_used_ptr == STR_PTR, it does not hurt as well. */
2306 jump = CMP(SLJIT_C_LESS_EQUAL, TMP1, 0, STR_PTR, 0);
2307 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2308 JUMPHERE(jump);
2309 }
2310 else if (common->mode == JIT_PARTIAL_HARD_COMPILE)
2311 {
2312 jump = CMP(SLJIT_C_LESS_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2313 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2314 JUMPHERE(jump);
2315 }
2316 }
2317
2318 static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, pcre_uchar* cc)
2319 {
2320 /* Detects if the character has an othercase. */
2321 unsigned int c;
2322
2323 #ifdef SUPPORT_UTF
2324 if (common->utf)
2325 {
2326 GETCHAR(c, cc);
2327 if (c > 127)
2328 {
2329 #ifdef SUPPORT_UCP
2330 return c != UCD_OTHERCASE(c);
2331 #else
2332 return FALSE;
2333 #endif
2334 }
2335 #ifndef COMPILE_PCRE8
2336 return common->fcc[c] != c;
2337 #endif
2338 }
2339 else
2340 #endif
2341 c = *cc;
2342 return MAX_255(c) ? common->fcc[c] != c : FALSE;
2343 }
2344
2345 static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)
2346 {
2347 /* Returns with the othercase. */
2348 #ifdef SUPPORT_UTF
2349 if (common->utf && c > 127)
2350 {
2351 #ifdef SUPPORT_UCP
2352 return UCD_OTHERCASE(c);
2353 #else
2354 return c;
2355 #endif
2356 }
2357 #endif
2358 return TABLE_GET(c, common->fcc, c);
2359 }
2360
2361 static unsigned int char_get_othercase_bit(compiler_common *common, pcre_uchar* cc)
2362 {
2363 /* Detects if the character and its othercase has only 1 bit difference. */
2364 unsigned int c, oc, bit;
2365 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2366 int n;
2367 #endif
2368
2369 #ifdef SUPPORT_UTF
2370 if (common->utf)
2371 {
2372 GETCHAR(c, cc);
2373 if (c <= 127)
2374 oc = common->fcc[c];
2375 else
2376 {
2377 #ifdef SUPPORT_UCP
2378 oc = UCD_OTHERCASE(c);
2379 #else
2380 oc = c;
2381 #endif
2382 }
2383 }
2384 else
2385 {
2386 c = *cc;
2387 oc = TABLE_GET(c, common->fcc, c);
2388 }
2389 #else
2390 c = *cc;
2391 oc = TABLE_GET(c, common->fcc, c);
2392 #endif
2393
2394 SLJIT_ASSERT(c != oc);
2395
2396 bit = c ^ oc;
2397 /* Optimized for English alphabet. */
2398 if (c <= 127 && bit == 0x20)
2399 return (0 << 8) | 0x20;
2400
2401 /* Since c != oc, they must have at least 1 bit difference. */
2402 if (!is_powerof2(bit))
2403 return 0;
2404
2405 #if defined COMPILE_PCRE8
2406
2407 #ifdef SUPPORT_UTF
2408 if (common->utf && c > 127)
2409 {
2410 n = GET_EXTRALEN(*cc);
2411 while ((bit & 0x3f) == 0)
2412 {
2413 n--;
2414 bit >>= 6;
2415 }
2416 return (n << 8) | bit;
2417 }
2418 #endif /* SUPPORT_UTF */
2419 return (0 << 8) | bit;
2420
2421 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2422
2423 #ifdef SUPPORT_UTF
2424 if (common->utf && c > 65535)
2425 {
2426 if (bit >= (1 << 10))
2427 bit >>= 10;
2428 else
2429 return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8));
2430 }
2431 #endif /* SUPPORT_UTF */
2432 return (bit < 256) ? ((0 << 8) | bit) : ((1 << 8) | (bit >> 8));
2433
2434 #endif /* COMPILE_PCRE[8|16|32] */
2435 }
2436
2437 static void check_partial(compiler_common *common, BOOL force)
2438 {
2439 /* Checks whether a partial matching is occurred. Does not modify registers. */
2440 DEFINE_COMPILER;
2441 struct sljit_jump *jump = NULL;
2442
2443 SLJIT_ASSERT(!force || common->mode != JIT_COMPILE);
2444
2445 if (common->mode == JIT_COMPILE)
2446 return;
2447
2448 if (!force)
2449 jump = CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2450 else if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2451 jump = CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, -1);
2452
2453 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2454 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, 0);
2455 else
2456 {
2457 if (common->partialmatchlabel != NULL)
2458 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2459 else
2460 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2461 }
2462
2463 if (jump != NULL)
2464 JUMPHERE(jump);
2465 }
2466
2467 static void check_str_end(compiler_common *common, jump_list **end_reached)
2468 {
2469 /* Does not affect registers. Usually used in a tight spot. */
2470 DEFINE_COMPILER;
2471 struct sljit_jump *jump;
2472
2473 if (common->mode == JIT_COMPILE)
2474 {
2475 add_jump(compiler, end_reached, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2476 return;
2477 }
2478
2479 jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
2480 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2481 {
2482 add_jump(compiler, end_reached, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0));
2483 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, 0);
2484 add_jump(compiler, end_reached, JUMP(SLJIT_JUMP));
2485 }
2486 else
2487 {
2488 add_jump(compiler, end_reached, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0));
2489 if (common->partialmatchlabel != NULL)
2490 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2491 else
2492 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2493 }
2494 JUMPHERE(jump);
2495 }
2496
2497 static void detect_partial_match(compiler_common *common, jump_list **backtracks)
2498 {
2499 DEFINE_COMPILER;
2500 struct sljit_jump *jump;
2501
2502 if (common->mode == JIT_COMPILE)
2503 {
2504 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2505 return;
2506 }
2507
2508 /* Partial matching mode. */
2509 jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
2510 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0));
2511 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2512 {
2513 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, 0);
2514 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
2515 }
2516 else
2517 {
2518 if (common->partialmatchlabel != NULL)
2519 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2520 else
2521 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2522 }
2523 JUMPHERE(jump);
2524 }
2525
2526 static void peek_char(compiler_common *common, pcre_uint32 max)
2527 {
2528 /* Reads the character into TMP1, keeps STR_PTR.
2529 Does not check STR_END. TMP2 Destroyed. */
2530 DEFINE_COMPILER;
2531 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2532 struct sljit_jump *jump;
2533 #endif
2534
2535 SLJIT_UNUSED_ARG(max);
2536
2537 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2538 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2539 if (common->utf)
2540 {
2541 if (max < 128) return;
2542
2543 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2544 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2545 add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2546 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2547 JUMPHERE(jump);
2548 }
2549 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2550
2551 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2552 if (common->utf)
2553 {
2554 if (max < 0xd800) return;
2555
2556 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2557 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2558 /* TMP2 contains the high surrogate. */
2559 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2560 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x40);
2561 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
2562 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
2563 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2564 JUMPHERE(jump);
2565 }
2566 #endif
2567 }
2568
2569 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2570
2571 static BOOL is_char7_bitset(const pcre_uint8 *bitset, BOOL nclass)
2572 {
2573 /* Tells whether the character codes below 128 are enough
2574 to determine a match. */
2575 const pcre_uint8 value = nclass ? 0xff : 0;
2576 const pcre_uint8* end = bitset + 32;
2577
2578 bitset += 16;
2579 do
2580 {
2581 if (*bitset++ != value)
2582 return FALSE;
2583 }
2584 while (bitset < end);
2585 return TRUE;
2586 }
2587
2588 static void read_char7_type(compiler_common *common, BOOL full_read)
2589 {
2590 /* Reads the precise character type of a character into TMP1, if the character
2591 is less than 128. Otherwise it returns with zero. Does not check STR_END. The
2592 full_read argument tells whether characters above max are accepted or not. */
2593 DEFINE_COMPILER;
2594 struct sljit_jump *jump;
2595
2596 SLJIT_ASSERT(common->utf);
2597
2598 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2599 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2600
2601 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2602
2603 if (full_read)
2604 {
2605 jump = CMP(SLJIT_C_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
2606 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2607 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2608 JUMPHERE(jump);
2609 }
2610 }
2611
2612 #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
2613
2614 static void read_char_range(compiler_common *common, pcre_uint32 min, pcre_uint32 max, BOOL update_str_ptr)
2615 {
2616 /* Reads the precise value of a character into TMP1, if the character is
2617 between min and max (c >= min && c <= max). Otherwise it returns with a value
2618 outside the range. Does not check STR_END. */
2619 DEFINE_COMPILER;
2620 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2621 struct sljit_jump *jump;
2622 #endif
2623 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2624 struct sljit_jump *jump2;
2625 #endif
2626
2627 SLJIT_UNUSED_ARG(update_str_ptr);
2628 SLJIT_UNUSED_ARG(min);
2629 SLJIT_UNUSED_ARG(max);
2630 SLJIT_ASSERT(min <= max);
2631
2632 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2633 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2634
2635 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2636 if (common->utf)
2637 {
2638 if (max < 128 && !update_str_ptr) return;
2639
2640 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2641 if (min >= 0x10000)
2642 {
2643 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xf0);
2644 if (update_str_ptr)
2645 OP1(SLJIT_MOV_UB, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2646 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2647 jump2 = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0x7);
2648 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2649 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2650 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2651 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2652 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2653 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2654 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2655 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2656 if (!update_str_ptr)
2657 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
2658 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2659 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2660 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2661 JUMPHERE(jump2);
2662 if (update_str_ptr)
2663 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
2664 }
2665 else if (min >= 0x800 && max <= 0xffff)
2666 {
2667 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xe0);
2668 if (update_str_ptr)
2669 OP1(SLJIT_MOV_UB, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2670 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2671 jump2 = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0xf);
2672 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2673 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2674 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2675 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2676 if (!update_str_ptr)
2677 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
2678 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2679 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2680 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2681 JUMPHERE(jump2);
2682 if (update_str_ptr)
2683 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
2684 }
2685 else if (max >= 0x800)
2686 add_jump(compiler, (max < 0x10000) ? &common->utfreadchar16 : &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2687 else if (max < 128)
2688 {
2689 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2690 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2691 }
2692 else
2693 {
2694 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2695 if (!update_str_ptr)
2696 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2697 else
2698 OP1(SLJIT_MOV_UB, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2699 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2700 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2701 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2702 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2703 if (update_str_ptr)
2704 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
2705 }
2706 JUMPHERE(jump);
2707 }
2708 #endif
2709
2710 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2711 if (common->utf)
2712 {
2713 if (max >= 0x10000)
2714 {
2715 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2716 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2717 /* TMP2 contains the high surrogate. */
2718 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2719 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x40);
2720 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
2721 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2722 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
2723 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2724 JUMPHERE(jump);
2725 return;
2726 }
2727
2728 if (max < 0xd800 && !update_str_ptr) return;
2729
2730 /* Skip low surrogate if necessary. */
2731 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2732 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2733 if (update_str_ptr)
2734 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2735 if (max >= 0xd800)
2736 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x10000);
2737 JUMPHERE(jump);
2738 }
2739 #endif
2740 }
2741
2742 static SLJIT_INLINE void read_char(compiler_common *common)
2743 {
2744 read_char_range(common, 0, READ_CHAR_MAX, TRUE);
2745 }
2746
2747 static void read_char8_type(compiler_common *common, BOOL update_str_ptr)
2748 {
2749 /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */
2750 DEFINE_COMPILER;
2751 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
2752 struct sljit_jump *jump;
2753 #endif
2754 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2755 struct sljit_jump *jump2;
2756 #endif
2757
2758 SLJIT_UNUSED_ARG(update_str_ptr);
2759
2760 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2761 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2762
2763 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2764 if (common->utf)
2765 {
2766 /* This can be an extra read in some situations, but hopefully
2767 it is needed in most cases. */
2768 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2769 jump = CMP(SLJIT_C_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
2770 if (!update_str_ptr)
2771 {
2772 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2773 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2774 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2775 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2776 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2777 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
2778 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2779 jump2 = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2780 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2781 JUMPHERE(jump2);
2782 }
2783 else
2784 add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));
2785 JUMPHERE(jump);
2786 return;
2787 }
2788 #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
2789
2790 #if !defined COMPILE_PCRE8
2791 /* The ctypes array contains only 256 values. */
2792 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2793 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2794 #endif
2795 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2796 #if !defined COMPILE_PCRE8
2797 JUMPHERE(jump);
2798 #endif
2799
2800 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2801 if (common->utf && update_str_ptr)
2802 {
2803 /* Skip low surrogate if necessary. */
2804 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);
2805 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2806 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2807 JUMPHERE(jump);
2808 }
2809 #endif /* SUPPORT_UTF && COMPILE_PCRE16 */
2810 }
2811
2812 static void skip_char_back(compiler_common *common)
2813 {
2814 /* Goes one character back. Affects STR_PTR and TMP1. Does not check begin. */
2815 DEFINE_COMPILER;
2816 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2817 #if defined COMPILE_PCRE8
2818 struct sljit_label *label;
2819
2820 if (common->utf)
2821 {
2822 label = LABEL();
2823 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2824 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2825 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
2826 CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);
2827 return;
2828 }
2829 #elif defined COMPILE_PCRE16
2830 if (common->utf)
2831 {
2832 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2833 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2834 /* Skip low surrogate if necessary. */
2835 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2836 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
2837 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2838 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2839 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2840 return;
2841 }
2842 #endif /* COMPILE_PCRE[8|16] */
2843 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2844 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2845 }
2846
2847 static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpifmatch)
2848 {
2849 /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */
2850 DEFINE_COMPILER;
2851 struct sljit_jump *jump;
2852
2853 if (nltype == NLTYPE_ANY)
2854 {
2855 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
2856 add_jump(compiler, backtracks, JUMP(jumpifmatch ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
2857 }
2858 else if (nltype == NLTYPE_ANYCRLF)
2859 {
2860 if (jumpifmatch)
2861 {
2862 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR));
2863 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
2864 }
2865 else
2866 {
2867 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
2868 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
2869 JUMPHERE(jump);
2870 }
2871 }
2872 else
2873 {
2874 SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);
2875 add_jump(compiler, backtracks, CMP(jumpifmatch ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
2876 }
2877 }
2878
2879 #ifdef SUPPORT_UTF
2880
2881 #if defined COMPILE_PCRE8
2882 static void do_utfreadchar(compiler_common *common)
2883 {
2884 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
2885 of the character (>= 0xc0). Return char value in TMP1, length in TMP2. */
2886 DEFINE_COMPILER;
2887 struct sljit_jump *jump;
2888
2889 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2890 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2891 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2892 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2893 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2894 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2895
2896 /* Searching for the first zero. */
2897 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
2898 jump = JUMP(SLJIT_C_NOT_ZERO);
2899 /* Two byte sequence. */
2900 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2901 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(2));
2902 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2903
2904 JUMPHERE(jump);
2905 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2906 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x800);
2907 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2908 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2909 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2910
2911 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10000);
2912 jump = JUMP(SLJIT_C_NOT_ZERO);
2913 /* Three byte sequence. */
2914 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
2915 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));
2916 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2917
2918 /* Four byte sequence. */
2919 JUMPHERE(jump);
2920 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2921 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
2922 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2923 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
2924 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2925 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2926 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(4));
2927 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2928 }
2929
2930 static void do_utfreadchar16(compiler_common *common)
2931 {
2932 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
2933 of the character (>= 0xc0). Return value in TMP1. */
2934 DEFINE_COMPILER;
2935 struct sljit_jump *jump;
2936
2937 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2938 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2939 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2940 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2941 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2942 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2943
2944 /* Searching for the first zero. */
2945 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
2946 jump = JUMP(SLJIT_C_NOT_ZERO);
2947 /* Two byte sequence. */
2948 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2949 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2950
2951 JUMPHERE(jump);
2952 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x400);
2953 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_NOT_ZERO);
2954 /* This code runs only in 8 bit mode. No need to shift the value. */
2955 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2956 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2957 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x800);
2958 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2959 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2960 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2961 /* Three byte sequence. */
2962 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
2963 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2964 }
2965
2966 static void do_utfreadtype8(compiler_common *common)
2967 {
2968 /* Fast decoding a UTF-8 character type. TMP2 contains the first byte
2969 of the character (>= 0xc0). Return value in TMP1. */
2970 DEFINE_COMPILER;
2971 struct sljit_jump *jump;
2972 struct sljit_jump *compare;
2973
2974 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2975
2976 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20);
2977 jump = JUMP(SLJIT_C_NOT_ZERO);
2978 /* Two byte sequence. */
2979 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2980 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2981 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
2982 /* The upper 5 bits are known at this point. */
2983 compare = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0x3);
2984 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2985 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2986 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
2987 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2988 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2989
2990 JUMPHERE(compare);
2991 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2992 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2993
2994 /* We only have types for characters less than 256. */
2995 JUMPHERE(jump);
2996 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2997 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2998 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2999 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3000 }
3001
3002 #endif /* COMPILE_PCRE8 */
3003
3004 #endif /* SUPPORT_UTF */
3005
3006 #ifdef SUPPORT_UCP
3007
3008 /* UCD_BLOCK_SIZE must be 128 (see the assert below). */
3009 #define UCD_BLOCK_MASK 127
3010 #define UCD_BLOCK_SHIFT 7
3011
3012 static void do_getucd(compiler_common *common)
3013 {
3014 /* Search the UCD record for the character comes in TMP1.
3015 Returns chartype in TMP1 and UCD offset in TMP2. */
3016 DEFINE_COMPILER;
3017
3018 SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 8);
3019
3020 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3021 OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
3022 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
3023 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
3024 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
3025 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
3026 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
3027 OP1(SLJIT_MOV_UH, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
3028 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
3029 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
3030 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3031 }
3032 #endif
3033
3034 static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common, BOOL hascrorlf, BOOL firstline)
3035 {
3036 DEFINE_COMPILER;
3037 struct sljit_label *mainloop;
3038 struct sljit_label *newlinelabel = NULL;
3039 struct sljit_jump *start;
3040 struct sljit_jump *end = NULL;
3041 struct sljit_jump *nl = NULL;
3042 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3043 struct sljit_jump *singlechar;
3044 #endif
3045 jump_list *newline = NULL;
3046 BOOL newlinecheck = FALSE;
3047 BOOL readuchar = FALSE;
3048
3049 if (!(hascrorlf || firstline) && (common->nltype == NLTYPE_ANY ||
3050 common->nltype == NLTYPE_ANYCRLF || common->newline > 255))
3051 newlinecheck = TRUE;
3052
3053 if (firstline)
3054 {
3055 /* Search for the end of the first line. */
3056 SLJIT_ASSERT(common->first_line_end != 0);
3057 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
3058
3059 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3060 {
3061 mainloop = LABEL();
3062 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3063 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3064 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
3065 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3066 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);
3067 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);
3068 JUMPHERE(end);
3069 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3070 }
3071 else
3072 {
3073 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3074 mainloop = LABEL();
3075 /* Continual stores does not cause data dependency. */
3076 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0);
3077 read_char_range(common, common->nlmin, common->nlmax, TRUE);
3078 check_newlinechar(common, common->nltype, &newline, TRUE);
3079 CMPTO(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0, mainloop);
3080 JUMPHERE(end);
3081 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0);
3082 set_jumps(newline, LABEL());
3083 }
3084
3085 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
3086 }
3087
3088 start = JUMP(SLJIT_JUMP);
3089
3090 if (newlinecheck)
3091 {
3092 newlinelabel = LABEL();
3093 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3094 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3095 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3096 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
3097 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3098 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3099 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
3100 #endif
3101 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3102 nl = JUMP(SLJIT_JUMP);
3103 }
3104
3105 mainloop = LABEL();
3106
3107 /* Increasing the STR_PTR here requires one less jump in the most common case. */
3108 #ifdef SUPPORT_UTF
3109 if (common->utf) readuchar = TRUE;
3110 #endif
3111 if (newlinecheck) readuchar = TRUE;
3112
3113 if (readuchar)
3114 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3115
3116 if (newlinecheck)
3117 CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);
3118
3119 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3120 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3121 #if defined COMPILE_PCRE8
3122 if (common->utf)
3123 {
3124 singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
3125 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3126 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3127 JUMPHERE(singlechar);
3128 }
3129 #elif defined COMPILE_PCRE16
3130 if (common->utf)
3131 {
3132 singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
3133 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
3134 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3135 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3136 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3137 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3138 JUMPHERE(singlechar);
3139 }
3140 #endif /* COMPILE_PCRE[8|16] */
3141 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
3142 JUMPHERE(start);
3143
3144 if (newlinecheck)
3145 {
3146 JUMPHERE(end);
3147 JUMPHERE(nl);
3148 }
3149
3150 return mainloop;
3151 }
3152
3153 #define MAX_N_CHARS 16
3154 #define MAX_N_BYTES 8
3155
3156 static SLJIT_INLINE void add_prefix_byte(pcre_uint8 byte, pcre_uint8 *bytes)
3157 {
3158 pcre_uint8 len = bytes[0];
3159 int i;
3160
3161 if (len == 255)
3162 return;
3163
3164 if (len == 0)
3165 {
3166 bytes[0] = 1;
3167 bytes[1] = byte;
3168 return;
3169 }
3170
3171 for (i = len; i > 0; i--)
3172 if (bytes[i] == byte)
3173 return;
3174
3175 if (len >= MAX_N_BYTES - 1)
3176 {
3177 bytes[0] = 255;
3178 return;
3179 }
3180
3181 len++;
3182 bytes[len] = byte;
3183 bytes[0] = len;
3184 }
3185
3186 static int scan_prefix(compiler_common *common, pcre_uchar *cc, pcre_uint32 *chars, pcre_uint8 *bytes, int max_chars)
3187 {
3188 /* Recursive function, which scans prefix literals. */
3189 BOOL last, any, caseless;
3190 int len, repeat, len_save, consumed = 0;
3191 pcre_uint32 chr, mask;
3192 pcre_uchar *alternative, *cc_save, *oc;
3193 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3194 pcre_uchar othercase[8];
3195 #elif defined SUPPORT_UTF && defined COMPILE_PCRE16
3196 pcre_uchar othercase[2];
3197 #else
3198 pcre_uchar othercase[1];
3199 #endif
3200
3201 repeat = 1;
3202 while (TRUE)
3203 {
3204 last = TRUE;
3205 any = FALSE;
3206 caseless = FALSE;
3207 switch (*cc)
3208 {
3209 case OP_CHARI:
3210 caseless = TRUE;
3211 case OP_CHAR:
3212 last = FALSE;
3213 cc++;
3214 break;
3215
3216 case OP_SOD:
3217 case OP_SOM:
3218 case OP_SET_SOM:
3219 case OP_NOT_WORD_BOUNDARY:
3220 case OP_WORD_BOUNDARY:
3221 case OP_EODN:
3222 case OP_EOD:
3223 case OP_CIRC:
3224 case OP_CIRCM:
3225 case OP_DOLL:
3226 case OP_DOLLM:
3227 /* Zero width assertions. */
3228 cc++;
3229 continue;
3230
3231 case OP_ASSERT:
3232 case OP_ASSERT_NOT:
3233 case OP_ASSERTBACK:
3234 case OP_ASSERTBACK_NOT:
3235 cc = bracketend(cc);
3236 continue;
3237
3238 case OP_PLUSI:
3239 case OP_MINPLUSI:
3240 case OP_POSPLUSI:
3241 caseless = TRUE;
3242 case OP_PLUS:
3243 case OP_MINPLUS:
3244 case OP_POSPLUS:
3245 cc++;
3246 break;
3247
3248 case OP_EXACTI:
3249 caseless = TRUE;
3250 case OP_EXACT:
3251 repeat = GET2(cc, 1);
3252 last = FALSE;
3253 cc += 1 + IMM2_SIZE;
3254 break;
3255
3256 case OP_QUERYI:
3257 case OP_MINQUERYI:
3258 case OP_POSQUERYI:
3259 caseless = TRUE;
3260 case OP_QUERY:
3261 case OP_MINQUERY:
3262 case OP_POSQUERY:
3263 len = 1;
3264 cc++;
3265 #ifdef SUPPORT_UTF
3266 if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
3267 #endif
3268 max_chars = scan_prefix(common, cc + len, chars, bytes, max_chars);
3269 if (max_chars == 0)
3270 return consumed;
3271 last = FALSE;
3272 break;
3273
3274 case OP_KET:
3275 cc += 1 + LINK_SIZE;
3276 continue;
3277
3278 case OP_ALT:
3279 cc += GET(cc, 1);
3280 continue;
3281
3282 case OP_ONCE:
3283 case OP_ONCE_NC:
3284 case OP_BRA:
3285 case OP_BRAPOS:
3286 case OP_CBRA:
3287 case OP_CBRAPOS:
3288 alternative = cc + GET(cc, 1);
3289 while (*alternative == OP_ALT)
3290 {
3291 max_chars = scan_prefix(common, alternative + 1 + LINK_SIZE, chars, bytes, max_chars);
3292 if (max_chars == 0)
3293 return consumed;
3294 alternative += GET(alternative, 1);
3295 }
3296
3297 if (*cc == OP_CBRA || *cc == OP_CBRAPOS)
3298 cc += IMM2_SIZE;
3299 cc += 1 + LINK_SIZE;
3300 continue;
3301
3302 case OP_CLASS:
3303 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3304 if (common->utf && !is_char7_bitset((const pcre_uint8 *)(cc + 1), FALSE)) return consumed;
3305 #endif
3306 any = TRUE;
3307 cc += 1 + 32 / sizeof(pcre_uchar);
3308 break;
3309
3310 case OP_NCLASS:
3311 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3312 if (common->utf) return consumed;
3313 #endif
3314 any = TRUE;
3315 cc += 1 + 32 / sizeof(pcre_uchar);
3316 break;
3317
3318 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3319 case OP_XCLASS:
3320 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3321 if (common->utf) return consumed;
3322 #endif
3323 any = TRUE;
3324 cc += GET(cc, 1);
3325 break;
3326 #endif
3327
3328 case OP_DIGIT:
3329 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3330 if (common->utf && !is_char7_bitset((const pcre_uint8 *)common->ctypes - cbit_length + cbit_digit, FALSE))
3331 return consumed;
3332 #endif
3333 any = TRUE;
3334 cc++;
3335 break;
3336
3337 case OP_WHITESPACE:
3338 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3339 if (common->utf && !is_char7_bitset((const pcre_uint8 *)common->ctypes - cbit_length + cbit_space, FALSE))
3340 return consumed;
3341 #endif
3342 any = TRUE;
3343 cc++;
3344 break;
3345
3346 case OP_WORDCHAR:
3347 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3348 if (common->utf && !is_char7_bitset((const pcre_uint8 *)common->ctypes - cbit_length + cbit_word, FALSE))
3349 return consumed;
3350 #endif
3351 any = TRUE;
3352 cc++;
3353 break;
3354
3355 case OP_NOT:
3356 case OP_NOTI:
3357 cc++;
3358 /* Fall through. */
3359 case OP_NOT_DIGIT:
3360 case OP_NOT_WHITESPACE:
3361 case OP_NOT_WORDCHAR:
3362 case OP_ANY:
3363 case OP_ALLANY:
3364 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3365 if (common->utf) return consumed;
3366 #endif
3367 any = TRUE;
3368 cc++;
3369 break;
3370
3371 #ifdef SUPPORT_UCP
3372 case OP_NOTPROP:
3373 case OP_PROP:
3374 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3375 if (common->utf) return consumed;
3376 #endif
3377 any = TRUE;
3378 cc += 1 + 2;
3379 break;
3380 #endif
3381
3382 case OP_TYPEEXACT:
3383 repeat = GET2(cc, 1);
3384 cc += 1 + IMM2_SIZE;
3385 continue;
3386
3387 case OP_NOTEXACT:
3388 case OP_NOTEXACTI:
3389 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3390 if (common->utf) return consumed;
3391 #endif
3392 any = TRUE;
3393 repeat = GET2(cc, 1);
3394 cc += 1 + IMM2_SIZE + 1;
3395 break;
3396
3397 default:
3398 return consumed;
3399 }
3400
3401 if (any)
3402 {
3403 #if defined COMPILE_PCRE8
3404 mask = 0xff;
3405 #elif defined COMPILE_PCRE16
3406 mask = 0xffff;
3407 #elif defined COMPILE_PCRE32
3408 mask = 0xffffffff;
3409 #else
3410 SLJIT_ASSERT_STOP();
3411 #endif
3412
3413 do
3414 {
3415 chars[0] = mask;
3416 chars[1] = mask;
3417 bytes[0] = 255;
3418
3419 consumed++;
3420 if (--max_chars == 0)
3421 return consumed;
3422 chars += 2;
3423 bytes += MAX_N_BYTES;
3424 }
3425 while (--repeat > 0);
3426
3427 repeat = 1;
3428 continue;
3429 }
3430
3431 len = 1;
3432 #ifdef SUPPORT_UTF
3433 if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
3434 #endif
3435
3436 if (caseless && char_has_othercase(common, cc))
3437 {
3438 #ifdef SUPPORT_UTF
3439 if (common->utf)
3440 {
3441 GETCHAR(chr, cc);
3442 if ((int)PRIV(ord2utf)(char_othercase(common, chr), othercase) != len)
3443 return consumed;
3444 }
3445 else
3446 #endif
3447 {
3448 chr = *cc;
3449 othercase[0] = TABLE_GET(chr, common->fcc, chr);
3450 }
3451 }
3452 else
3453 caseless = FALSE;
3454
3455 len_save = len;
3456 cc_save = cc;
3457 while (TRUE)
3458 {
3459 oc = othercase;
3460 do
3461 {
3462 chr = *cc;
3463 #ifdef COMPILE_PCRE32
3464 if (SLJIT_UNLIKELY(chr == NOTACHAR))
3465 return consumed;
3466 #endif
3467 add_prefix_byte((pcre_uint8)chr, bytes);
3468
3469 mask = 0;
3470 if (caseless)
3471 {
3472 add_prefix_byte((pcre_uint8)*oc, bytes);
3473 mask = *cc ^ *oc;
3474 chr |= mask;
3475 }
3476
3477 #ifdef COMPILE_PCRE32
3478 if (chars[0] == NOTACHAR && chars[1] == 0)
3479 #else
3480 if (chars[0] == NOTACHAR)
3481 #endif
3482 {
3483 chars[0] = chr;
3484 chars[1] = mask;
3485 }
3486 else
3487 {
3488 mask |= chars[0] ^ chr;
3489 chr |= mask;
3490 chars[0] = chr;
3491 chars[1] |= mask;
3492 }
3493
3494 len--;
3495 consumed++;
3496 if (--max_chars == 0)
3497 return consumed;
3498 chars += 2;
3499 bytes += MAX_N_BYTES;
3500 cc++;
3501 oc++;
3502 }
3503 while (len > 0);
3504
3505 if (--repeat == 0)
3506 break;
3507
3508 len = len_save;
3509 cc = cc_save;
3510 }
3511
3512 repeat = 1;
3513 if (last)
3514 return consumed;
3515 }
3516 }
3517
3518 static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common, BOOL firstline)
3519 {
3520 DEFINE_COMPILER;
3521 struct sljit_label *start;
3522 struct sljit_jump *quit;
3523 pcre_uint32 chars[MAX_N_CHARS * 2];
3524 pcre_uint8 bytes[MAX_N_CHARS * MAX_N_BYTES];
3525 pcre_uint8 ones[MAX_N_CHARS];
3526 int offsets[3];
3527 pcre_uint32 mask;
3528 pcre_uint8 *byte_set, *byte_set_end;
3529 int i, max, from;
3530 int range_right = -1, range_len = 3 - 1;
3531 sljit_ub *update_table = NULL;
3532 BOOL in_range;
3533
3534 /* This is even TRUE, if both are NULL. */
3535 SLJIT_ASSERT(common->read_only_data_ptr == common->read_only_data);
3536
3537 for (i = 0; i < MAX_N_CHARS; i++)
3538 {
3539 chars[i << 1] = NOTACHAR;
3540 chars[(i << 1) + 1] = 0;
3541 bytes[i * MAX_N_BYTES] = 0;
3542 }
3543
3544 max = scan_prefix(common, common->start, chars, bytes, MAX_N_CHARS);
3545
3546 if (max <= 1)
3547 return FALSE;
3548
3549 for (i = 0; i < max; i++)
3550 {
3551 mask = chars[(i << 1) + 1];
3552 ones[i] = ones_in_half_byte[mask & 0xf];
3553 mask >>= 4;
3554 while (mask != 0)
3555 {
3556 ones[i] += ones_in_half_byte[mask & 0xf];
3557 mask >>= 4;
3558 }
3559 }
3560
3561 in_range = FALSE;
3562 from = 0; /* Prevent compiler "uninitialized" warning */
3563 for (i = 0; i <= max; i++)
3564 {
3565 if (in_range && (i - from) > range_len && (bytes[(i - 1) * MAX_N_BYTES] <= 4))
3566 {
3567 range_len = i - from;
3568 range_right = i - 1;
3569 }
3570
3571 if (i < max && bytes[i * MAX_N_BYTES] < 255)
3572 {
3573 if (!in_range)
3574 {
3575 in_range = TRUE;
3576 from = i;
3577 }
3578 }
3579 else if (in_range)
3580 in_range = FALSE;
3581 }
3582
3583 if (range_right >= 0)
3584 {
3585 /* Since no data is consumed (see the assert in the beginning
3586 of this function), this space can be reallocated. */
3587 if (common->read_only_data)
3588 SLJIT_FREE(common->read_only_data);
3589
3590 common->read_only_data_size += 256;
3591 common->read_only_data = (sljit_uw *)SLJIT_MALLOC(common->read_only_data_size);
3592 if (common->read_only_data == NULL)
3593 return TRUE;
3594
3595 update_table = (sljit_ub *)common->read_only_data;
3596 common->read_only_data_ptr = (sljit_uw *)(update_table + 256);
3597 memset(update_table, IN_UCHARS(range_len), 256);
3598
3599 for (i = 0; i < range_len; i++)
3600 {
3601 byte_set = bytes + ((range_right - i) * MAX_N_BYTES);
3602 SLJIT_ASSERT(byte_set[0] > 0 && byte_set[0] < 255);
3603 byte_set_end = byte_set + byte_set[0];
3604 byte_set++;
3605 while (byte_set <= byte_set_end)
3606 {
3607 if (update_table[*byte_set] > IN_UCHARS(i))
3608 update_table[*byte_set] = IN_UCHARS(i);
3609 byte_set++;
3610 }
3611 }
3612 }
3613
3614 offsets[0] = -1;
3615 /* Scan forward. */
3616 for (i = 0; i < max; i++)
3617 if (ones[i] <= 2) {
3618 offsets[0] = i;
3619 break;
3620 }
3621
3622 if (offsets[0] < 0 && range_right < 0)
3623 return FALSE;
3624
3625 if (offsets[0] >= 0)
3626 {
3627 /* Scan backward. */
3628 offsets[1] = -1;
3629 for (i = max - 1; i > offsets[0]; i--)
3630 if (ones[i] <= 2 && i != range_right)
3631 {
3632 offsets[1] = i;
3633 break;
3634 }
3635
3636 /* This case is handled better by fast_forward_first_char. */
3637 if (offsets[1] == -1 && offsets[0] == 0 && range_right < 0)
3638 return FALSE;
3639
3640 offsets[2] = -1;
3641 /* We only search for a middle character if there is no range check. */
3642 if (offsets[1] >= 0 && range_right == -1)
3643 {
3644 /* Scan from middle. */
3645 for (i = (offsets[0] + offsets[1]) / 2 + 1; i < offsets[1]; i++)
3646 if (ones[i] <= 2)
3647 {
3648 offsets[2] = i;
3649 break;
3650 }
3651
3652 if (offsets[2] == -1)
3653 {
3654 for (i = (offsets[0] + offsets[1]) / 2; i > offsets[0]; i--)
3655 if (ones[i] <= 2)
3656 {
3657 offsets[2] = i;
3658 break;
3659 }
3660 }
3661 }
3662
3663 SLJIT_ASSERT(offsets[1] == -1 || (offsets[0] < offsets[1]));
3664 SLJIT_ASSERT(offsets[2] == -1 || (offsets[0] < offsets[2] && offsets[1] > offsets[2]));
3665
3666 chars[0] = chars[offsets[0] << 1];
3667 chars[1] = chars[(offsets[0] << 1) + 1];
3668 if (offsets[2] >= 0)
3669 {
3670 chars[2] = chars[offsets[2] << 1];
3671 chars[3] = chars[(offsets[2] << 1) + 1];
3672 }
3673 if (offsets[1] >= 0)
3674 {
3675 chars[4] = chars[offsets[1] << 1];
3676 chars[5] = chars[(offsets[1] << 1) + 1];
3677 }
3678 }
3679
3680 max -= 1;
3681 if (firstline)
3682 {
3683 SLJIT_ASSERT(common->first_line_end != 0);
3684 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
3685 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
3686 OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
3687 quit = CMP(SLJIT_C_LESS_EQUAL, STR_END, 0, TMP1, 0);
3688 OP1(SLJIT_MOV, STR_END, 0, TMP1, 0);
3689 JUMPHERE(quit);
3690 }
3691 else
3692 OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
3693
3694 #if !(defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
3695 if (range_right >= 0)
3696 OP1(SLJIT_MOV, RETURN_ADDR, 0, SLJIT_IMM, (sljit_sw)update_table);
3697 #endif
3698
3699 start = LABEL();
3700 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3701
3702 SLJIT_ASSERT(range_right >= 0 || offsets[0] >= 0);
3703
3704 if (range_right >= 0)
3705 {
3706 #if defined COMPILE_PCRE8 || (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN)
3707 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right));
3708 #else
3709 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right + 1) - 1);
3710 #endif
3711
3712 #if !(defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
3713 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(RETURN_ADDR, TMP1), 0);
3714 #else
3715 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)update_table);
3716 #endif
3717 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3718 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, start);
3719 }
3720
3721 if (offsets[0] >= 0)
3722 {
3723 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[0]));
3724 if (offsets[1] >= 0)
3725 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[1]));
3726 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3727
3728 if (chars[1] != 0)
3729 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[1]);
3730 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[0], start);
3731 if (offsets[2] >= 0)
3732 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[2] - 1));
3733
3734 if (offsets[1] >= 0)
3735 {
3736 if (chars[5] != 0)
3737 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, chars[5]);
3738 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, chars[4], start);
3739 }
3740
3741 if (offsets[2] >= 0)
3742 {
3743 if (chars[3] != 0)
3744 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[3]);
3745 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[2], start);
3746 }
3747 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3748 }
3749
3750 JUMPHERE(quit);
3751
3752 if (firstline)
3753 {
3754 if (range_right >= 0)
3755 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
3756 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3757 if (range_right >= 0)
3758 {
3759 quit = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
3760 OP1(SLJIT_MOV, STR_PTR, 0, TMP1, 0);
3761 JUMPHERE(quit);
3762 }
3763 }
3764 else
3765 OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
3766 return TRUE;
3767 }
3768
3769 #undef MAX_N_CHARS
3770 #undef MAX_N_BYTES
3771
3772 static SLJIT_INLINE void fast_forward_first_char(compiler_common *common, pcre_uchar first_char, BOOL caseless, BOOL firstline)
3773 {
3774 DEFINE_COMPILER;
3775 struct sljit_label *start;
3776 struct sljit_jump *quit;
3777 struct sljit_jump *found;
3778 pcre_uchar oc, bit;
3779
3780 if (firstline)
3781 {
3782 SLJIT_ASSERT(common->first_line_end != 0);
3783 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
3784 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
3785 }
3786
3787 start = LABEL();
3788 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3789 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3790
3791 oc = first_char;
3792 if (caseless)
3793 {
3794 oc = TABLE_GET(first_char, common->fcc, first_char);
3795 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
3796 if (first_char > 127 && common->utf)
3797 oc = UCD_OTHERCASE(first_char);
3798 #endif
3799 }
3800 if (first_char == oc)
3801 found = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, first_char);
3802 else
3803 {
3804 bit = first_char ^ oc;
3805 if (is_powerof2(bit))
3806 {
3807 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, bit);
3808 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, first_char | bit);
3809 }
3810 else
3811 {
3812 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, first_char);
3813 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3814 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);
3815 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3816 found = JUMP(SLJIT_C_NOT_ZERO);
3817 }
3818 }
3819
3820 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3821 JUMPTO(SLJIT_JUMP, start);
3822 JUMPHERE(found);
3823 JUMPHERE(quit);
3824
3825 if (firstline)
3826 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3827 }
3828
3829 static SLJIT_INLINE void fast_forward_newline(compiler_common *common, BOOL firstline)
3830 {
3831 DEFINE_COMPILER;
3832 struct sljit_label *loop;
3833 struct sljit_jump *lastchar;
3834 struct sljit_jump *firstchar;
3835 struct sljit_jump *quit;
3836 struct sljit_jump *foundcr = NULL;
3837 struct sljit_jump *notfoundnl;
3838 jump_list *newline = NULL;
3839
3840 if (firstline)
3841 {
3842 SLJIT_ASSERT(common->first_line_end != 0);
3843 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
3844 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
3845 }
3846
3847 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3848 {
3849 lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3850 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3851 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
3852 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
3853 firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
3854
3855 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2));
3856 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0);
3857 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_GREATER_EQUAL);
3858 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3859 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
3860 #endif
3861 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3862
3863 loop = LABEL();
3864 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3865 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3866 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
3867 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
3868 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop);
3869 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop);
3870
3871 JUMPHERE(quit);
3872 JUMPHERE(firstchar);
3873 JUMPHERE(lastchar);
3874
3875 if (firstline)
3876 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3877 return;
3878 }
3879
3880 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3881 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
3882 firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
3883 skip_char_back(common);
3884
3885 loop = LABEL();
3886 common->ff_newline_shortcut = loop;
3887
3888 read_char_range(common, common->nlmin, common->nlmax, TRUE);
3889 lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3890 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
3891 foundcr = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
3892 check_newlinechar(common, common->nltype, &newline, FALSE);
3893 set_jumps(newline, loop);
3894
3895 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
3896 {
3897 quit = JUMP(SLJIT_JUMP);
3898 JUMPHERE(foundcr);
3899 notfoundnl = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3900 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3901 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
3902 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3903 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3904 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
3905 #endif
3906 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3907 JUMPHERE(notfoundnl);
3908 JUMPHERE(quit);
3909 }
3910 JUMPHERE(lastchar);
3911 JUMPHERE(firstchar);
3912
3913 if (firstline)
3914 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3915 }
3916
3917 static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks);
3918
3919 static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, pcre_uint8 *start_bits, BOOL firstline)
3920 {
3921 DEFINE_COMPILER;
3922 struct sljit_label *start;
3923 struct sljit_jump *quit;
3924 struct sljit_jump *found = NULL;
3925 jump_list *matches = NULL;
3926 #ifndef COMPILE_PCRE8
3927 struct sljit_jump *jump;
3928 #endif
3929
3930 if (firstline)
3931 {
3932 SLJIT_ASSERT(common->first_line_end != 0);
3933 OP1(SLJIT_MOV, RETURN_ADDR, 0, STR_END, 0);
3934 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
3935 }
3936
3937 start = LABEL();
3938 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3939 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3940 #ifdef SUPPORT_UTF
3941 if (common->utf)
3942 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
3943 #endif
3944
3945 if (!check_class_ranges(common, start_bits, (start_bits[31] & 0x80) != 0, TRUE, &matches))
3946 {
3947 #ifndef COMPILE_PCRE8
3948 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 255);
3949 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 255);
3950 JUMPHERE(jump);
3951 #endif
3952 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
3953 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
3954 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)start_bits);
3955 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
3956 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
3957 found = JUMP(SLJIT_C_NOT_ZERO);
3958 }
3959
3960 #ifdef SUPPORT_UTF
3961 if (common->utf)
3962 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
3963 #endif
3964 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3965 #ifdef SUPPORT_UTF
3966 #if defined COMPILE_PCRE8
3967 if (common->utf)
3968 {
3969 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start);
3970 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3971 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3972 }
3973 #elif defined COMPILE_PCRE16
3974 if (common->utf)
3975 {
3976 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800, start);
3977 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
3978 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3979 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3980 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3981 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3982 }
3983 #endif /* COMPILE_PCRE[8|16] */
3984 #endif /* SUPPORT_UTF */
3985 JUMPTO(SLJIT_JUMP, start);
3986 if (found != NULL)
3987 JUMPHERE(found);
3988 if (matches != NULL)
3989 set_jumps(matches, LABEL());
3990 JUMPHERE(quit);
3991
3992 if (firstline)
3993 OP1(SLJIT_MOV, STR_END, 0, RETURN_ADDR, 0);
3994 }
3995
3996 static SLJIT_INLINE struct sljit_jump *search_requested_char(compiler_common *common, pcre_uchar req_char, BOOL caseless, BOOL has_firstchar)
3997 {
3998 DEFINE_COMPILER;
3999 struct sljit_label *loop;
4000 struct sljit_jump *toolong;
4001 struct sljit_jump *alreadyfound;
4002 struct sljit_jump *found;
4003 struct sljit_jump *foundoc = NULL;
4004 struct sljit_jump *notfound;
4005 pcre_uint32 oc, bit;
4006
4007 SLJIT_ASSERT(common->req_char_ptr != 0);
4008 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr);
4009 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, REQ_BYTE_MAX);
4010 toolong = CMP(SLJIT_C_LESS, TMP1, 0, STR_END, 0);
4011 alreadyfound = CMP(SLJIT_C_LESS, STR_PTR, 0, TMP2, 0);
4012
4013 if (has_firstchar)
4014 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4015 else
4016 OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);
4017
4018 loop = LABEL();
4019 notfound = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, STR_END, 0);
4020
4021 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0);
4022 oc = req_char;
4023 if (caseless)
4024 {
4025 oc = TABLE_GET(req_char, common->fcc, req_char);
4026 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
4027 if (req_char > 127 && common->utf)
4028 oc = UCD_OTHERCASE(req_char);
4029 #endif
4030 }
4031 if (req_char == oc)
4032 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
4033 else
4034 {
4035 bit = req_char ^ oc;
4036 if (is_powerof2(bit))
4037 {
4038 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);
4039 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit);
4040 }
4041 else
4042 {
4043 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
4044 foundoc = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, oc);
4045 }
4046 }
4047 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
4048 JUMPTO(SLJIT_JUMP, loop);
4049
4050 JUMPHERE(found);
4051 if (foundoc)
4052 JUMPHERE(foundoc);
4053 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr, TMP1, 0);
4054 JUMPHERE(alreadyfound);
4055 JUMPHERE(toolong);
4056 return notfound;
4057 }
4058
4059 static void do_revertframes(compiler_common *common)
4060 {
4061 DEFINE_COMPILER;
4062 struct sljit_jump *jump;
4063 struct sljit_label *mainloop;
4064
4065 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4066 OP1(SLJIT_MOV, TMP1, 0, STACK_TOP, 0);
4067 GET_LOCAL_BASE(TMP3, 0, 0);
4068
4069 /* Drop frames until we reach STACK_TOP. */
4070 mainloop = LABEL();
4071 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), 0);
4072 OP2(SLJIT_SUB | SLJIT_SET_S, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0);
4073 jump = JUMP(SLJIT_C_SIG_LESS_EQUAL);
4074
4075 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
4076 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw));
4077 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), SLJIT_MEM1(TMP1), 2 * sizeof(sljit_sw));
4078 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
4079 JUMPTO(SLJIT_JUMP, mainloop);
4080
4081 JUMPHERE(jump);
4082 jump = JUMP(SLJIT_C_SIG_LESS);
4083 /* End of dropping frames. */
4084 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4085
4086 JUMPHERE(jump);
4087 OP1(SLJIT_NEG, TMP2, 0, TMP2, 0);
4088 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
4089 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw));
4090 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_sw));
4091 JUMPTO(SLJIT_JUMP, mainloop);
4092 }
4093
4094 static void check_wordboundary(compiler_common *common)
4095 {
4096 DEFINE_COMPILER;
4097 struct sljit_jump *skipread;
4098 jump_list *skipread_list = NULL;
4099 #if !(defined COMPILE_PCRE8) || defined SUPPORT_UTF
4100 struct sljit_jump *jump;
4101 #endif
4102
4103 SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16);
4104
4105 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
4106 /* Get type of the previous char, and put it to LOCALS1. */
4107 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4108 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4109 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, 0);
4110 skipread = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
4111 skip_char_back(common);
4112 check_start_used_ptr(common);
4113 read_char(common);
4114
4115 /* Testing char type. */
4116 #ifdef SUPPORT_UCP
4117 if (common->use_ucp)
4118 {
4119 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
4120 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
4121 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4122 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
4123 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
4124 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
4125 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
4126 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
4127 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4128 JUMPHERE(jump);
4129 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP2, 0);
4130 }
4131 else
4132 #endif
4133 {
4134 #ifndef COMPILE_PCRE8
4135 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
4136 #elif defined SUPPORT_UTF
4137 /* Here LOCALS1 has already been zeroed. */
4138 jump = NULL;
4139 if (common->utf)
4140 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
4141 #endif /* COMPILE_PCRE8 */
4142 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);
4143 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */);
4144 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4145 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);
4146 #ifndef COMPILE_PCRE8
4147 JUMPHERE(jump);
4148 #elif defined SUPPORT_UTF
4149 if (jump != NULL)
4150 JUMPHERE(jump);
4151 #endif /* COMPILE_PCRE8 */
4152 }
4153 JUMPHERE(skipread);
4154
4155 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
4156 check_str_end(common, &skipread_list);
4157 peek_char(common, READ_CHAR_MAX);
4158
4159 /* Testing char type. This is a code duplication. */
4160 #ifdef SUPPORT_UCP
4161 if (common->use_ucp)
4162 {
4163 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
4164 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
4165 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4166 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
4167 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
4168 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
4169 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
4170 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
4171 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4172 JUMPHERE(jump);
4173 }
4174 else
4175 #endif
4176 {
4177 #ifndef COMPILE_PCRE8
4178 /* TMP2 may be destroyed by peek_char. */
4179 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
4180 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
4181 #elif defined SUPPORT_UTF
4182 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
4183 jump = NULL;
4184 if (common->utf)
4185 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
4186 #endif
4187 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes);
4188 OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */);
4189 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
4190 #ifndef COMPILE_PCRE8
4191 JUMPHERE(jump);
4192 #elif defined SUPPORT_UTF
4193 if (jump != NULL)
4194 JUMPHERE(jump);
4195 #endif /* COMPILE_PCRE8 */
4196 }
4197 set_jumps(skipread_list, LABEL());
4198
4199 OP2(SLJIT_XOR | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
4200 sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
4201 }
4202
4203 static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
4204 {
4205 DEFINE_COMPILER;
4206 int ranges[MAX_RANGE_SIZE];
4207 pcre_uint8 bit, cbit, all;
4208 int i, byte, length = 0;
4209
4210 bit = bits[0] & 0x1;
4211 /* All bits will be zero or one (since bit is zero or one). */
4212 all = -bit;
4213
4214 for (i = 0; i < 256; )
4215 {
4216 byte = i >> 3;
4217 if ((i & 0x7) == 0 && bits[byte] == all)
4218 i += 8;
4219 else
4220 {
4221 cbit = (bits[byte] >> (i & 0x7)) & 0x1;
4222 if (cbit != bit)
4223 {
4224 if (length >= MAX_RANGE_SIZE)
4225 return FALSE;
4226 ranges[length] = i;
4227 length++;
4228 bit = cbit;
4229 all = -cbit;
4230 }
4231 i++;
4232 }
4233 }
4234
4235 if (((bit == 0) && nclass) || ((bit == 1) && !nclass))
4236 {
4237 if (length >= MAX_RANGE_SIZE)
4238 return FALSE;
4239 ranges[length] = 256;
4240 length++;
4241 }
4242
4243 if (length < 0 || length > 4)
4244 return FALSE;
4245
4246 bit = bits[0] & 0x1;
4247 if (invert) bit ^= 0x1;
4248
4249 /* No character is accepted. */
4250 if (length == 0 && bit == 0)
4251 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4252
4253 switch(length)
4254 {
4255 case 0:
4256 /* When bit != 0, all characters are accepted. */
4257 return TRUE;
4258
4259 case 1:
4260 add_jump(compiler, backtracks, CMP(bit == 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
4261 return TRUE;
4262
4263 case 2:
4264 if (ranges[0] + 1 != ranges[1])
4265 {
4266 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
4267 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
4268 }
4269 else
4270 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
4271 return TRUE;
4272
4273 case 3:
4274 if (bit != 0)
4275 {
4276 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
4277 if (ranges[0] + 1 != ranges[1])
4278 {
4279 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
4280 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
4281 }
4282 else
4283 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
4284 return TRUE;
4285 }
4286
4287 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[0]));
4288 if (ranges[1] + 1 != ranges[2])
4289 {
4290 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1]);
4291 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
4292 }
4293 else
4294 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1]));
4295 return TRUE;
4296
4297 case 4:
4298 if ((ranges[1] - ranges[0]) == (ranges[3] - ranges[2])
4299 && (ranges[0] | (ranges[2] - ranges[0])) == ranges[2]
4300 && is_powerof2(ranges[2] - ranges[0]))
4301 {
4302 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[0]);
4303 if (ranges[2] + 1 != ranges[3])
4304 {
4305 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);
4306 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
4307 }
4308 else
4309 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
4310 return TRUE;
4311 }
4312
4313 if (bit != 0)
4314 {
4315 i = 0;
4316 if (ranges[0] + 1 != ranges[1])
4317 {
4318 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
4319 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
4320 i = ranges[0];
4321 }
4322 else
4323 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
4324
4325 if (ranges[2] + 1 != ranges[3])
4326 {
4327 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - i);
4328 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
4329 }
4330 else
4331 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2] - i));
4332 return TRUE;
4333 }
4334
4335 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
4336 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[0]));
4337 if (ranges[1] + 1 != ranges[2])
4338 {
4339 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]);
4340 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
4341 }
4342 else
4343 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
4344 return TRUE;
4345
4346 default:
4347 SLJIT_ASSERT_STOP();
4348 return FALSE;
4349 }
4350 }
4351
4352 static void check_anynewline(compiler_common *common)
4353 {
4354 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
4355 DEFINE_COMPILER;
4356
4357 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4358
4359 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
4360 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
4361 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
4362 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
4363 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4364 #ifdef COMPILE_PCRE8
4365 if (common->utf)
4366 {
4367 #endif
4368 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4369 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
4370 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
4371 #ifdef COMPILE_PCRE8
4372 }
4373 #endif
4374 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
4375 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4376 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4377 }
4378
4379 static void check_hspace(compiler_common *common)
4380 {
4381 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
4382 DEFINE_COMPILER;
4383
4384 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4385
4386 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x09);
4387 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4388 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
4389 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4390 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xa0);
4391 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4392 #ifdef COMPILE_PCRE8
4393 if (common->utf)
4394 {
4395 #endif
4396 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4397 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x1680);
4398 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4399 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e);
4400 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4401 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000);
4402 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000);
4403 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4404 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000);
4405 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4406 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000);
4407 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4408 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000);
4409 #ifdef COMPILE_PCRE8
4410 }
4411 #endif
4412 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
4413 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4414
4415 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4416 }
4417
4418 static void check_vspace(compiler_common *common)
4419 {
4420 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
4421 DEFINE_COMPILER;
4422
4423 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4424
4425 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
4426 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
4427 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
4428 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
4429 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4430 #ifdef COMPILE_PCRE8
4431 if (common->utf)
4432 {
4433 #endif
4434 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4435 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
4436 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
4437 #ifdef COMPILE_PCRE8
4438 }
4439 #endif
4440 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
4441 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4442
4443 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4444 }
4445
4446 #define CHAR1 STR_END
4447 #define CHAR2 STACK_TOP
4448
4449 static void do_casefulcmp(compiler_common *common)
4450 {
4451 DEFINE_COMPILER;
4452 struct sljit_jump *jump;
4453 struct sljit_label *label;
4454
4455 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4456 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4457 OP1(SLJIT_MOV, TMP3, 0, CHAR1, 0);
4458 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR2, 0);
4459 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
4460 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4461
4462 label = LABEL();
4463 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
4464 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4465 jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
4466 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
4467 JUMPTO(SLJIT_C_NOT_ZERO, label);
4468
4469 JUMPHERE(jump);
4470 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4471 OP1(SLJIT_MOV, CHAR1, 0, TMP3, 0);
4472 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
4473 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4474 }
4475
4476 #define LCC_TABLE STACK_LIMIT
4477
4478 static void do_caselesscmp(compiler_common *common)
4479 {
4480 DEFINE_COMPILER;
4481 struct sljit_jump *jump;
4482 struct sljit_label *label;
4483
4484 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4485 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4486
4487 OP1(SLJIT_MOV, TMP3, 0, LCC_TABLE, 0);
4488 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR1, 0);
4489 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, CHAR2, 0);
4490 OP1(SLJIT_MOV, LCC_TABLE, 0, SLJIT_IMM, common->lcc);
4491 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
4492 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4493
4494 label = LABEL();
4495 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
4496 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4497 #ifndef COMPILE_PCRE8
4498 jump = CMP(SLJIT_C_GREATER, CHAR1, 0, SLJIT_IMM, 255);
4499 #endif
4500 OP1(SLJIT_MOV_UB, CHAR1, 0, SLJIT_MEM2(LCC_TABLE, CHAR1), 0);
4501 #ifndef COMPILE_PCRE8
4502 JUMPHERE(jump);
4503 jump = CMP(SLJIT_C_GREATER, CHAR2, 0, SLJIT_IMM, 255);
4504 #endif
4505 OP1(SLJIT_MOV_UB, CHAR2, 0, SLJIT_MEM2(LCC_TABLE, CHAR2), 0);
4506 #ifndef COMPILE_PCRE8
4507 JUMPHERE(jump);
4508 #endif
4509 jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
4510 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
4511 JUMPTO(SLJIT_C_NOT_ZERO, label);
4512
4513 JUMPHERE(jump);
4514 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4515 OP1(SLJIT_MOV, LCC_TABLE, 0, TMP3, 0);
4516 OP1(SLJIT_MOV, CHAR1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
4517 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
4518 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4519 }
4520
4521 #undef LCC_TABLE
4522 #undef CHAR1
4523 #undef CHAR2
4524
4525 #if defined SUPPORT_UTF && defined SUPPORT_UCP
4526
4527 static const pcre_uchar * SLJIT_CALL do_utf_caselesscmp(pcre_uchar *src1, jit_arguments *args, pcre_uchar *end1)
4528 {
4529 /* This function would be ineffective to do in JIT level. */
4530 pcre_uint32 c1, c2;
4531 const pcre_uchar *src2 = args->uchar_ptr;
4532 const pcre_uchar *end2 = args->end;
4533 const ucd_record *ur;
4534 const pcre_uint32 *pp;
4535
4536 while (src1 < end1)
4537 {
4538 if (src2 >= end2)
4539 return (pcre_uchar*)1;
4540 GETCHARINC(c1, src1);
4541 GETCHARINC(c2, src2);
4542 ur = GET_UCD(c2);
4543 if (c1 != c2 && c1 != c2 + ur->other_case)
4544 {
4545 pp = PRIV(ucd_caseless_sets) + ur->caseset;
4546 for (;;)
4547 {
4548 if (c1 < *pp) return NULL;
4549 if (c1 == *pp++) break;
4550 }
4551 }
4552 }
4553 return src2;
4554 }
4555
4556 #endif /* SUPPORT_UTF && SUPPORT_UCP */
4557
4558 static pcre_uchar *byte_sequence_compare(compiler_common *common, BOOL caseless, pcre_uchar *cc,
4559 compare_context* context, jump_list **backtracks)
4560 {
4561 DEFINE_COMPILER;
4562 unsigned int othercasebit = 0;
4563 pcre_uchar *othercasechar = NULL;
4564 #ifdef SUPPORT_UTF
4565 int utflength;
4566 #endif
4567
4568 if (caseless && char_has_othercase(common, cc))
4569 {
4570 othercasebit = char_get_othercase_bit(common, cc);
4571 SLJIT_ASSERT(othercasebit);
4572 /* Extracting bit difference info. */
4573 #if defined COMPILE_PCRE8
4574 othercasechar = cc + (othercasebit >> 8);
4575 othercasebit &= 0xff;
4576 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4577 /* Note that this code only handles characters in the BMP. If there
4578 ever are characters outside the BMP whose othercase differs in only one
4579 bit from itself (there currently are none), this code will need to be
4580 revised for COMPILE_PCRE32. */
4581 othercasechar = cc + (othercasebit >> 9);
4582 if ((othercasebit & 0x100) != 0)
4583 othercasebit = (othercasebit & 0xff) << 8;
4584 else
4585 othercasebit &= 0xff;
4586 #endif /* COMPILE_PCRE[8|16|32] */
4587 }
4588
4589 if (context->sourcereg == -1)
4590 {
4591 #if defined COMPILE_PCRE8
4592 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
4593 if (context->length >= 4)
4594 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4595 else if (context->length >= 2)
4596 OP1(SLJIT_MOV_UH, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4597 else
4598 #endif
4599 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4600 #elif defined COMPILE_PCRE16
4601 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
4602 if (context->length >= 4)
4603 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4604 else
4605 #endif
4606 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4607 #elif defined COMPILE_PCRE32
4608 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4609 #endif /* COMPILE_PCRE[8|16|32] */
4610 context->sourcereg = TMP2;
4611 }
4612
4613 #ifdef SUPPORT_UTF
4614 utflength = 1;
4615 if (common->utf && HAS_EXTRALEN(*cc))
4616 utflength += GET_EXTRALEN(*cc);
4617
4618 do
4619 {
4620 #endif
4621
4622 context->length -= IN_UCHARS(1);
4623 #if (defined SLJIT_UNALIGNED && SLJIT_UNALIGNED) && (defined COMPILE_PCRE8 || defined COMPILE_PCRE16)
4624
4625 /* Unaligned read is supported. */
4626 if (othercasebit != 0 && othercasechar == cc)
4627 {
4628 context->c.asuchars[context->ucharptr] = *cc | othercasebit;
4629 context->oc.asuchars[context->ucharptr] = othercasebit;
4630 }
4631 else
4632 {
4633 context->c.asuchars[context->ucharptr] = *cc;
4634 context->oc.asuchars[context->ucharptr] = 0;
4635 }
4636 context->ucharptr++;
4637
4638 #if defined COMPILE_PCRE8
4639 if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1))
4640 #else
4641 if (context->ucharptr >= 2 || context->length == 0)
4642 #endif
4643 {
4644 if (context->length >= 4)
4645 OP1(SLJIT_MOV_SI, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
4646 else if (context->length >= 2)
4647 OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
4648 #if defined COMPILE_PCRE8
4649 else if (context->length >= 1)
4650 OP1(SLJIT_MOV_UB, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
4651 #endif /* COMPILE_PCRE8 */
4652 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
4653
4654 switch(context->ucharptr)
4655 {
4656 case 4 / sizeof(pcre_uchar):
4657 if (context->oc.asint != 0)
4658 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint);
4659 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint));
4660 break;
4661
4662 case 2 / sizeof(pcre_uchar):
4663 if (context->oc.asushort != 0)
4664 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort);
4665 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort));
4666 break;
4667
4668 #ifdef COMPILE_PCRE8
4669 case 1:
4670 if (context->oc.asbyte != 0)
4671 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte);
4672 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte));
4673 break;
4674 #endif
4675
4676 default:
4677 SLJIT_ASSERT_STOP();
4678 break;
4679 }
4680 context->ucharptr = 0;
4681 }
4682
4683 #else
4684
4685 /* Unaligned read is unsupported or in 32 bit mode. */
4686 if (context->length >= 1)
4687 OP1(MOV_UCHAR, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
4688
4689 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
4690
4691 if (othercasebit != 0 && othercasechar == cc)
4692 {
4693 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit);
4694 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit));
4695 }
4696 else
4697 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc));
4698
4699 #endif
4700
4701 cc++;
4702 #ifdef SUPPORT_UTF
4703 utflength--;
4704 }
4705 while (utflength > 0);
4706 #endif
4707
4708 return cc;
4709 }
4710
4711 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
4712
4713 #define SET_TYPE_OFFSET(value) \
4714 if ((value) != typeoffset) \
4715 { \
4716 if ((value) < typeoffset) \
4717 OP2(SLJIT_ADD, typereg, 0, typereg, 0, SLJIT_IMM, typeoffset - (value)); \
4718 else \
4719 OP2(SLJIT_SUB, typereg, 0, typereg, 0, SLJIT_IMM, (value) - typeoffset); \
4720 } \
4721 typeoffset = (value);
4722
4723 #define SET_CHAR_OFFSET(value) \
4724 if ((value) != charoffset) \
4725 { \
4726 if ((value) < charoffset) \
4727 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(charoffset - (value))); \
4728 else \
4729 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)((value) - charoffset)); \
4730 } \
4731 charoffset = (value);
4732
4733 static void compile_xclass_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
4734 {
4735 DEFINE_COMPILER;
4736 jump_list *found = NULL;
4737 jump_list **list = (cc[0] & XCL_NOT) == 0 ? &found : backtracks;
4738 sljit_uw c, charoffset, max = 256, min = READ_CHAR_MAX;
4739 struct sljit_jump *jump = NULL;
4740 pcre_uchar *ccbegin;
4741 int compares, invertcmp, numberofcmps;
4742 #if defined SUPPORT_UTF && (defined COMPILE_PCRE8 || defined COMPILE_PCRE16)
4743 BOOL utf = common->utf;
4744 #endif
4745
4746 #ifdef SUPPORT_UCP
4747 BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE;
4748 BOOL charsaved = FALSE;
4749 int typereg = TMP1, scriptreg = TMP1;
4750 const pcre_uint32 *other_cases;
4751 sljit_uw typeoffset;
4752 #endif
4753
4754 /* Scanning the necessary info. */
4755 cc++;
4756 ccbegin = cc;
4757 compares = 0;
4758 if (cc[-1] & XCL_MAP)
4759 {
4760 min = 0;
4761 cc += 32 / sizeof(pcre_uchar);
4762 }
4763
4764 while (*cc != XCL_END)
4765 {
4766 compares++;
4767 if (*cc == XCL_SINGLE)
4768 {
4769 cc ++;
4770 GETCHARINCTEST(c, cc);
4771 if (c > max) max = c;
4772 if (c < min) min = c;
4773 #ifdef SUPPORT_UCP
4774 needschar = TRUE;
4775 #endif
4776 }
4777 else if (*cc == XCL_RANGE)
4778 {
4779 cc ++;
4780 GETCHARINCTEST(c, cc);
4781 if (c < min) min = c;
4782 GETCHARINCTEST(c, cc);
4783 if (c > max) max = c;
4784 #ifdef SUPPORT_UCP
4785 needschar = TRUE;
4786 #endif
4787 }
4788 #ifdef SUPPORT_UCP
4789 else
4790 {
4791 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
4792 cc++;
4793 if (*cc == PT_CLIST)
4794 {
4795 other_cases = PRIV(ucd_caseless_sets) + cc[1];
4796 while (*other_cases != NOTACHAR)
4797 {
4798 if (*other_cases > max) max = *other_cases;
4799 if (*other_cases < min) min = *other_cases;
4800 other_cases++;
4801 }
4802 }
4803 else
4804 {
4805 max = READ_CHAR_MAX;
4806 min = 0;
4807 }
4808
4809 switch(*cc)
4810 {
4811 case PT_ANY:
4812 break;
4813
4814 case PT_LAMP:
4815 case PT_GC:
4816 case PT_PC:
4817 case PT_ALNUM:
4818 needstype = TRUE;
4819 break;
4820
4821 case PT_SC:
4822 needsscript = TRUE;
4823 break;
4824
4825 case PT_SPACE:
4826 case PT_PXSPACE:
4827 case PT_WORD:
4828 case PT_PXGRAPH:
4829 case PT_PXPRINT:
4830 case PT_PXPUNCT:
4831 needstype = TRUE;
4832 needschar = TRUE;
4833 break;
4834
4835 case PT_CLIST:
4836 case PT_UCNC:
4837 needschar = TRUE;
4838 break;
4839
4840 default:
4841 SLJIT_ASSERT_STOP();
4842 break;
4843 }
4844 cc += 2;
4845 }
4846 #endif
4847 }
4848
4849 /* We are not necessary in utf mode even in 8 bit mode. */
4850 cc = ccbegin;
4851 detect_partial_match(common, backtracks);
4852 read_char_range(common, min, max, (cc[-1] & XCL_NOT) != 0);
4853
4854 if ((cc[-1] & XCL_HASPROP) == 0)
4855 {
4856 if ((cc[-1] & XCL_MAP) != 0)
4857 {
4858 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
4859 if (!check_class_ranges(common, (const pcre_uint8 *)cc, (((const pcre_uint8 *)cc)[31] & 0x80) != 0, TRUE, &found))
4860 {
4861 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
4862 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
4863 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
4864 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4865 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4866 add_jump(compiler, &found, JUMP(SLJIT_C_NOT_ZERO));
4867 }
4868
4869 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4870 JUMPHERE(jump);
4871
4872 cc += 32 / sizeof(pcre_uchar);
4873 }
4874 else
4875 {
4876 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, min);
4877 add_jump(compiler, (cc[-1] & XCL_NOT) == 0 ? backtracks : &found, CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, max - min));
4878 }
4879 }
4880 else if ((cc[-1] & XCL_MAP) != 0)
4881 {
4882 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
4883 #ifdef SUPPORT_UCP
4884 charsaved = TRUE;
4885 #endif
4886 if (!check_class_ranges(common, (const pcre_uint8 *)cc, FALSE, TRUE, list))
4887 {
4888 #ifdef COMPILE_PCRE8
4889 SLJIT_ASSERT(common->utf);
4890 #endif
4891 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
4892
4893 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
4894 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
4895 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
4896 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4897 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4898 add_jump(compiler, list, JUMP(SLJIT_C_NOT_ZERO));
4899
4900 JUMPHERE(jump);
4901 }
4902
4903 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
4904 cc += 32 / sizeof(pcre_uchar);
4905 }
4906
4907 #ifdef SUPPORT_UCP
4908 /* Simple register allocation. TMP1 is preferred if possible. */
4909 if (needstype || needsscript)
4910 {
4911 if (needschar && !charsaved)
4912 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
4913 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4914 if (needschar)
4915 {
4916 if (needstype)
4917 {
4918 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
4919 typereg = RETURN_ADDR;
4920 }
4921
4922 if (needsscript)
4923 scriptreg = TMP3;
4924 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
4925 }
4926 else if (needstype && needsscript)
4927 scriptreg = TMP3;
4928 /* In all other cases only one of them was specified, and that can goes to TMP1. */
4929
4930 if (needsscript)
4931 {
4932 if (scriptreg == TMP1)
4933 {
4934 OP1(SLJIT_MOV, scriptreg, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
4935 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM2(scriptreg, TMP2), 3);
4936 }
4937 else
4938 {
4939 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
4940 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
4941 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM1(TMP2), 0);
4942 }
4943 }
4944 }
4945 #endif
4946
4947 /* Generating code. */
4948 charoffset = 0;
4949 numberofcmps = 0;
4950 #ifdef SUPPORT_UCP
4951 typeoffset = 0;
4952 #endif
4953
4954 while (*cc != XCL_END)
4955 {
4956 compares--;
4957 invertcmp = (compares == 0 && list != backtracks);
4958 jump = NULL;
4959
4960 if (*cc == XCL_SINGLE)
4961 {
4962 cc ++;
4963 GETCHARINCTEST(c, cc);
4964
4965 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
4966 {
4967 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
4968 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_EQUAL);
4969 numberofcmps++;
4970 }
4971 else if (numberofcmps > 0)
4972 {
4973 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
4974 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4975 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4976 numberofcmps = 0;
4977 }
4978 else
4979 {
4980 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
4981 numberofcmps = 0;
4982 }
4983 }
4984 else if (*cc == XCL_RANGE)
4985 {
4986 cc ++;
4987 GETCHARINCTEST(c, cc);
4988 SET_CHAR_OFFSET(c);
4989 GETCHARINCTEST(c, cc);
4990
4991 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
4992 {
4993 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
4994 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_LESS_EQUAL);
4995 numberofcmps++;
4996 }
4997 else if (numberofcmps > 0)
4998 {
4999 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5000 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
5001 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
5002 numberofcmps = 0;
5003 }
5004 else
5005 {
5006 jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5007 numberofcmps = 0;
5008 }
5009 }
5010 #ifdef SUPPORT_UCP
5011 else
5012 {
5013 if (*cc == XCL_NOTPROP)
5014 invertcmp ^= 0x1;
5015 cc++;
5016 switch(*cc)
5017 {
5018 case PT_ANY:
5019 if (list != backtracks)
5020 {
5021 if ((cc[-1] == XCL_NOTPROP && compares > 0) || (cc[-1] == XCL_PROP && compares == 0))
5022 continue;
5023 }
5024 else if (cc[-1] == XCL_NOTPROP)
5025 continue;
5026 jump = JUMP(SLJIT_JUMP);
5027 break;
5028
5029 case PT_LAMP:
5030 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - typeoffset);
5031 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
5032 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ll - typeoffset);
5033 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
5034 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lt - typeoffset);
5035 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
5036 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
5037 break;
5038
5039 case PT_GC:
5040 c = PRIV(ucp_typerange)[(int)cc[1] * 2];
5041 SET_TYPE_OFFSET(c);
5042 jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, PRIV(ucp_typerange)[(int)cc[1] * 2 + 1] - c);
5043 break;
5044
5045 case PT_PC:
5046 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, (int)cc[1] - typeoffset);
5047 break;
5048
5049 case PT_SC:
5050 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, scriptreg, 0, SLJIT_IMM, (int)cc[1]);
5051 break;
5052
5053 case PT_SPACE:
5054 case PT_PXSPACE:
5055 SET_CHAR_OFFSET(9);
5056 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd - 0x9);
5057 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
5058
5059 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x9);
5060 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
5061
5062 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x9);
5063 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
5064
5065 SET_TYPE_OFFSET(ucp_Zl);
5066 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl);
5067 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
5068 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
5069 break;
5070
5071 case PT_WORD:
5072 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_UNDERSCORE - charoffset));
5073 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
5074 /* Fall through. */
5075
5076 case PT_ALNUM:
5077 SET_TYPE_OFFSET(ucp_Ll);
5078 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
5079 OP_FLAGS((*cc == PT_ALNUM) ? SLJIT_MOV : SLJIT_OR, TMP2, 0, (*cc == PT_ALNUM) ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_LESS_EQUAL);
5080 SET_TYPE_OFFSET(ucp_Nd);
5081 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_No - ucp_Nd);
5082 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
5083 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
5084 break;
5085
5086 case PT_CLIST:
5087 other_cases = PRIV(ucd_caseless_sets) + cc[1];
5088
5089 /* At least three characters are required.
5090 Otherwise this case would be handled by the normal code path. */
5091 SLJIT_ASSERT(other_cases[0] != NOTACHAR && other_cases[1] != NOTACHAR && other_cases[2] != NOTACHAR);
5092 SLJIT_ASSERT(other_cases[0] < other_cases[1] && other_cases[1] < other_cases[2]);
5093
5094 /* Optimizing character pairs, if their difference is power of 2. */
5095 if (is_powerof2(other_cases[1] ^ other_cases[0]))
5096 {
5097 if (charoffset == 0)
5098 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
5099 else
5100 {
5101 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
5102 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
5103 }
5104 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[1]);
5105 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
5106 other_cases += 2;
5107 }
5108 else if (is_powerof2(other_cases[2] ^ other_cases[1]))
5109 {
5110 if (charoffset == 0)
5111 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[2] ^ other_cases[1]);
5112 else
5113 {
5114 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
5115 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
5116 }
5117 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[2]);
5118 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
5119
5120 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(other_cases[0] - charoffset));
5121 OP_FLAGS(SLJIT_OR | ((other_cases[3] == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
5122
5123 other_cases += 3;
5124 }
5125 else
5126 {
5127 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
5128 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
5129 }
5130
5131 while (*other_cases != NOTACHAR)
5132 {
5133 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
5134 OP_FLAGS(SLJIT_OR | ((*other_cases == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
5135 }
5136 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
5137 break;
5138
5139 case PT_UCNC:
5140 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_DOLLAR_SIGN - charoffset));
5141 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
5142 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_COMMERCIAL_AT - charoffset));
5143 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
5144 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_GRAVE_ACCENT - charoffset));
5145 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
5146
5147 SET_CHAR_OFFSET(0xa0);
5148 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(0xd7ff - charoffset));
5149 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
5150 SET_CHAR_OFFSET(0);
5151 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xe000 - 0);
5152 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_GREATER_EQUAL);
5153 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
5154 break;
5155
5156 case PT_PXGRAPH:
5157 /* C and Z groups are the farthest two groups. */
5158 SET_TYPE_OFFSET(ucp_Ll);
5159 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
5160 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_GREATER);
5161
5162 jump = CMP(SLJIT_C_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
5163
5164 /* In case of ucp_Cf, we overwrite the result. */
5165 SET_CHAR_OFFSET(0x2066);
5166 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
5167 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
5168
5169 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
5170 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
5171
5172 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x2066);
5173 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
5174
5175 JUMPHERE(jump);
5176 jump = CMP(SLJIT_C_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
5177 break;
5178
5179 case PT_PXPRINT:
5180 /* C and Z groups are the farthest two groups. */
5181 SET_TYPE_OFFSET(ucp_Ll);
5182 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
5183 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_GREATER);
5184
5185 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Ll);
5186 OP_FLAGS(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_C_NOT_EQUAL);
5187
5188 jump = CMP(SLJIT_C_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
5189
5190 /* In case of ucp_Cf, we overwrite the result. */
5191 SET_CHAR_OFFSET(0x2066);
5192 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
5193 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
5194
5195 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
5196 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
5197
5198 JUMPHERE(jump);
5199 jump = CMP(SLJIT_C_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
5200 break;
5201
5202 case PT_PXPUNCT:
5203 SET_TYPE_OFFSET(ucp_Sc);
5204 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Sc);
5205 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
5206
5207 SET_CHAR_OFFSET(0);
5208 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xff);
5209 OP_FLAGS(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
5210
5211 SET_TYPE_OFFSET(ucp_Pc);
5212 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ps - ucp_Pc);
5213 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
5214 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
5215 break;
5216 }
5217 cc += 2;
5218 }
5219 #endif
5220
5221 if (jump != NULL)
5222 add_jump(compiler, compares > 0 ? list : backtracks, jump);
5223 }
5224
5225 if (found != NULL)
5226 set_jumps(found, LABEL());
5227 }
5228
5229 #undef SET_TYPE_OFFSET
5230 #undef SET_CHAR_OFFSET
5231
5232 #endif
5233
5234 static pcre_uchar *compile_char1_matchingpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **backtracks)
5235 {
5236 DEFINE_COMPILER;
5237 int length;
5238 unsigned int c, oc, bit;
5239 compare_context context;
5240 struct sljit_jump *jump[4];
5241 jump_list *end_list;
5242 #ifdef SUPPORT_UTF
5243 struct sljit_label *label;
5244 #ifdef SUPPORT_UCP
5245 pcre_uchar propdata[5];
5246 #endif
5247 #endif /* SUPPORT_UTF */
5248
5249 switch(type)
5250 {
5251 case OP_SOD:
5252 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5253 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
5254 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
5255 return cc;
5256
5257 case OP_SOM:
5258 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5259 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
5260 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
5261 return cc;
5262
5263 case OP_NOT_WORD_BOUNDARY:
5264 case OP_WORD_BOUNDARY:
5265 add_jump(compiler, &common->wordboundary, JUMP(SLJIT_FAST_CALL));
5266 add_jump(compiler, backtracks, JUMP(type == OP_NOT_WORD_BOUNDARY ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
5267 return cc;
5268
5269 case OP_NOT_DIGIT:
5270 case OP_DIGIT:
5271 /* Digits are usually 0-9, so it is worth to optimize them. */
5272 detect_partial_match(common, backtracks);
5273 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5274 if (common->utf && is_char7_bitset((const pcre_uint8*)common->ctypes - cbit_length + cbit_digit, FALSE))
5275 read_char7_type(common, type == OP_NOT_DIGIT);
5276 else
5277 #endif
5278 read_char8_type(common, type == OP_NOT_DIGIT);
5279 /* Flip the starting bit in the negative case. */
5280 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit);
5281 add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
5282 return cc;
5283
5284 case OP_NOT_WHITESPACE:
5285 case OP_WHITESPACE:
5286 detect_partial_match(common, backtracks);
5287 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5288 if (common->utf && is_char7_bitset((const pcre_uint8*)common->ctypes - cbit_length + cbit_space, FALSE))
5289 read_char7_type(common, type == OP_NOT_WHITESPACE);
5290 else
5291 #endif
5292 read_char8_type(common, type == OP_NOT_WHITESPACE);
5293 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space);
5294 add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
5295 return cc;
5296
5297 case OP_NOT_WORDCHAR:
5298 case OP_WORDCHAR:
5299 detect_partial_match(common, backtracks);
5300 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5301 if (common->utf && is_char7_bitset((const pcre_uint8*)common->ctypes - cbit_length + cbit_word, FALSE))
5302 read_char7_type(common, type == OP_NOT_WORDCHAR);
5303 else
5304 #endif
5305 read_char8_type(common, type == OP_NOT_WORDCHAR);
5306 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word);
5307 add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
5308 return cc;
5309
5310 case OP_ANY:
5311 detect_partial_match(common, backtracks);
5312 read_char_range(common, common->nlmin, common->nlmax, TRUE);
5313 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
5314 {
5315 jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
5316 end_list = NULL;
5317 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
5318 add_jump(compiler, &end_list, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
5319 else
5320 check_str_end(common, &end_list);
5321
5322 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5323 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff));
5324 set_jumps(end_list, LABEL());
5325 JUMPHERE(jump[0]);
5326 }
5327 else
5328 check_newlinechar(common, common->nltype, backtracks, TRUE);
5329 return cc;
5330
5331 case OP_ALLANY:
5332 detect_partial_match(common, backtracks);
5333 #ifdef SUPPORT_UTF
5334 if (common->utf)
5335 {
5336 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5337 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5338 #if defined COMPILE_PCRE8 || defined COMPILE_PCRE16
5339 #if defined COMPILE_PCRE8
5340 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
5341 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
5342 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5343 #elif defined COMPILE_PCRE16
5344 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
5345 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
5346 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
5347 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
5348 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
5349 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5350 #endif
5351 JUMPHERE(jump[0]);
5352 #endif /* COMPILE_PCRE[8|16] */
5353 return cc;
5354 }
5355 #endif
5356 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5357 return cc;
5358
5359 case OP_ANYBYTE:
5360 detect_partial_match(common, backtracks);
5361 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5362 return cc;
5363
5364 #ifdef SUPPORT_UTF
5365 #ifdef SUPPORT_UCP
5366 case OP_NOTPROP:
5367 case OP_PROP:
5368 propdata[0] = XCL_HASPROP;
5369 propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP;
5370 propdata[2] = cc[0];
5371 propdata[3] = cc[1];
5372 propdata[4] = XCL_END;
5373 compile_xclass_matchingpath(common, propdata, backtracks);
5374 return cc + 2;
5375 #endif
5376 #endif
5377
5378 case OP_ANYNL:
5379 detect_partial_match(common, backtracks);
5380 read_char_range(common, common->bsr_nlmin, common->bsr_nlmax, FALSE);
5381 jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
5382 /* We don't need to handle soft partial matching case. */
5383 end_list = NULL;
5384 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
5385 add_jump(compiler, &end_list, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
5386 else
5387 check_str_end(common, &end_list);
5388 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5389 jump[1] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
5390 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5391 jump[2] = JUMP(SLJIT_JUMP);
5392 JUMPHERE(jump[0]);
5393 check_newlinechar(common, common->bsr_nltype, backtracks, FALSE);
5394 set_jumps(end_list, LABEL());
5395 JUMPHERE(jump[1]);
5396 JUMPHERE(jump[2]);
5397 return cc;
5398
5399 case OP_NOT_HSPACE:
5400 case OP_HSPACE:
5401 detect_partial_match(common, backtracks);
5402 read_char_range(common, 0x9, 0x3000, type == OP_NOT_HSPACE);
5403 add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));
5404 add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
5405 return cc;
5406
5407 case OP_NOT_VSPACE:
5408 case OP_VSPACE:
5409 detect_partial_match(common, backtracks);
5410 read_char_range(common, 0xa, 0x2029, type == OP_NOT_VSPACE);
5411 add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));
5412 add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
5413 return cc;
5414
5415 #ifdef SUPPORT_UCP
5416 case OP_EXTUNI:
5417 detect_partial_match(common, backtracks);
5418 read_char(common);
5419 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
5420 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
5421 /* Optimize register allocation: use a real register. */
5422 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
5423 OP1(SLJIT_MOV_UB, STACK_TOP, 0, SLJIT_MEM2(TMP1, TMP2), 3);
5424
5425 label = LABEL();
5426 jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5427 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
5428 read_char(common);
5429 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
5430 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
5431 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM2(TMP1, TMP2), 3);
5432
5433 OP2(SLJIT_SHL, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2);
5434 OP1(SLJIT_MOV_UI, TMP1, 0, SLJIT_MEM1(STACK_TOP), (sljit_sw)PRIV(ucp_gbtable));
5435 OP1(SLJIT_MOV, STACK_TOP, 0, TMP2, 0);
5436 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
5437 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
5438 JUMPTO(SLJIT_C_NOT_ZERO, label);
5439
5440 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
5441 JUMPHERE(jump[0]);
5442 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
5443
5444 if (common->mode == JIT_PARTIAL_HARD_COMPILE)
5445 {
5446 jump[0] = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
5447 /* Since we successfully read a char above, partial matching must occure. */
5448 check_partial(common, TRUE);
5449 JUMPHERE(jump[0]);
5450 }
5451 return cc;
5452 #endif
5453
5454 case OP_EODN:
5455 /* Requires rather complex checks. */
5456 jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5457 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
5458 {
5459 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
5460 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5461 if (common->mode == JIT_COMPILE)
5462 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0));
5463 else
5464 {
5465 jump[1] = CMP(SLJIT_C_EQUAL, TMP2, 0, STR_END, 0);
5466 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
5467 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS);
5468 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
5469 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_NOT_EQUAL);
5470 add_jump(compiler, backtracks, JUMP(SLJIT_C_NOT_EQUAL));
5471 check_partial(common, TRUE);
5472 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5473 JUMPHERE(jump[1]);
5474 }
5475 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
5476 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
5477 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
5478 }
5479 else if (common->nltype == NLTYPE_FIXED)
5480 {
5481 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5482 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5483 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0));
5484 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
5485 }
5486 else
5487 {
5488 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5489 jump[1] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
5490 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
5491 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
5492 jump[2] = JUMP(SLJIT_C_GREATER);
5493 add_jump(compiler, backtracks, JUMP(SLJIT_C_LESS));
5494 /* Equal. */
5495 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
5496 jump[3] = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
5497 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5498
5499 JUMPHERE(jump[1]);
5500 if (common->nltype == NLTYPE_ANYCRLF)
5501 {
5502 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5503 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP2, 0, STR_END, 0));
5504 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
5505 }
5506 else
5507 {
5508 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, STR_PTR, 0);
5509 read_char_range(common, common->nlmin, common->nlmax, TRUE);
5510 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
5511 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
5512 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
5513 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
5514 }
5515 JUMPHERE(jump[2]);
5516 JUMPHERE(jump[3]);
5517 }
5518 JUMPHERE(jump[0]);
5519 check_partial(common, FALSE);
5520 return cc;
5521
5522 case OP_EOD:
5523 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0));
5524 check_partial(common, FALSE);
5525 return cc;
5526
5527 case OP_CIRC:
5528 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
5529 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
5530 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0));
5531 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
5532 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5533 return cc;
5534
5535 case OP_CIRCM:
5536 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
5537 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
5538 jump[1] = CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0);
5539 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
5540 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5541 jump[0] = JUMP(SLJIT_JUMP);
5542 JUMPHERE(jump[1]);
5543
5544 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
5545 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
5546 {
5547 OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
5548 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP2, 0, TMP1, 0));
5549 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
5550 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
5551 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
5552 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
5553 }
5554 else
5555 {
5556 skip_char_back(common);
5557 read_char_range(common, common->nlmin, common->nlmax, TRUE);
5558 check_newlinechar(common, common->nltype, backtracks, FALSE);
5559 }
5560 JUMPHERE(jump[0]);
5561 return cc;
5562
5563 case OP_DOLL:
5564 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
5565 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
5566 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5567
5568 if (!common->endonly)
5569 compile_char1_matchingpath(common, OP_EODN, cc, backtracks);
5570 else
5571 {
5572 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0));
5573 check_partial(common, FALSE);
5574 }
5575 return cc;
5576
5577 case OP_DOLLM:
5578 jump[1] = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
5579 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
5580 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
5581 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5582 check_partial(common, FALSE);
5583 jump[0] = JUMP(SLJIT_JUMP);
5584 JUMPHERE(jump[1]);
5585
5586 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
5587 {
5588 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
5589 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5590 if (common->mode == JIT_COMPILE)
5591 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, TMP2, 0, STR_END, 0));
5592 else
5593 {
5594 jump[1] = CMP(SLJIT_C_LESS_EQUAL, TMP2, 0, STR_END, 0);
5595 /* STR_PTR = STR_END - IN_UCHARS(1) */
5596 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
5597 check_partial(common, TRUE);
5598 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5599 JUMPHERE(jump[1]);
5600 }
5601
5602 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
5603 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
5604 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
5605 }
5606 else
5607 {
5608 peek_char(common, common->nlmax);
5609 check_newlinechar(common, common->nltype, backtracks, FALSE);
5610 }
5611 JUMPHERE(jump[0]);
5612 return cc;
5613
5614 case OP_CHAR:
5615 case OP_CHARI:
5616 length = 1;
5617 #ifdef SUPPORT_UTF
5618 if (common->utf && HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc);
5619 #endif
5620 if (common->mode == JIT_COMPILE && (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0))
5621 {
5622 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
5623 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
5624
5625 context.length = IN_UCHARS(length);
5626 context.sourcereg = -1;
5627 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
5628 context.ucharptr = 0;
5629 #endif
5630 return byte_sequence_compare(common, type == OP_CHARI, cc, &context, backtracks);
5631 }
5632
5633 detect_partial_match(common, backtracks);
5634 #ifdef SUPPORT_UTF
5635 if (common->utf)
5636 {
5637 GETCHAR(c, cc);
5638 }
5639 else
5640 #endif
5641 c = *cc;
5642
5643 if (type == OP_CHAR || !char_has_othercase(common, cc))
5644 {
5645 read_char_range(common, c, c, FALSE);
5646 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c));
5647 return cc + length;
5648 }
5649 oc = char_othercase(common, c);
5650 read_char_range(common, c < oc ? c : oc, c > oc ? c : oc, FALSE);
5651 bit = c ^ oc;
5652 if (is_powerof2(bit))
5653 {
5654 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
5655 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
5656 return cc + length;
5657 }
5658 jump[0] = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c);
5659 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, oc));
5660 JUMPHERE(jump[0]);
5661 return cc + length;
5662
5663 case OP_NOT:
5664 case OP_NOTI:
5665 detect_partial_match(common, backtracks);
5666 length = 1;
5667 #ifdef SUPPORT_UTF
5668 if (common->utf)
5669 {
5670 #ifdef COMPILE_PCRE8
5671 c = *cc;
5672 if (c < 128)
5673 {
5674 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5675 if (type == OP_NOT || !char_has_othercase(common, cc))
5676 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
5677 else
5678 {
5679 /* Since UTF8 code page is fixed, we know that c is in [a-z] or [A-Z] range. */
5680 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x20);
5681 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20));
5682 }
5683 /* Skip the variable-length character. */
5684 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5685 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
5686 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
5687 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5688 JUMPHERE(jump[0]);
5689 return cc + 1;
5690 }
5691 else
5692 #endif /* COMPILE_PCRE8 */
5693 {
5694 GETCHARLEN(c, cc, length);
5695 }
5696 }
5697 else
5698 #endif /* SUPPORT_UTF */
5699 c = *cc;
5700
5701 if (type == OP_NOT || !char_has_othercase(common, cc))
5702 {
5703 read_char_range(common, c, c, TRUE);
5704 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
5705 }
5706 else
5707 {
5708 oc = char_othercase(common, c);
5709 read_char_range(common, c < oc ? c : oc, c > oc ? c : oc, TRUE);
5710 bit = c ^ oc;
5711 if (is_powerof2(bit))
5712 {
5713 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
5714 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
5715 }
5716 else
5717 {
5718 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
5719 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, oc));
5720 }
5721 }
5722 return cc + length;
5723
5724 case OP_CLASS:
5725 case OP_NCLASS:
5726 detect_partial_match(common, backtracks);
5727
5728 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5729 bit = (common->utf && is_char7_bitset((const pcre_uint8 *)cc, type == OP_NCLASS)) ? 127 : 255;
5730 read_char_range(common, 0, bit, type == OP_NCLASS);
5731 #else
5732 read_char_range(common, 0, 255, type == OP_NCLASS);
5733 #endif
5734
5735 if (check_class_ranges(common, (const pcre_uint8 *)cc, type == OP_NCLASS, FALSE, backtracks))
5736 return cc + 32 / sizeof(pcre_uchar);
5737
5738 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5739 jump[0] = NULL;
5740 if (common->utf)
5741 {
5742 jump[0] = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, bit);
5743 if (type == OP_CLASS)
5744 {
5745 add_jump(compiler, backtracks, jump[0]);
5746 jump[0] = NULL;
5747 }
5748 }
5749 #elif !defined COMPILE_PCRE8
5750 jump[0] = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
5751 if (type == OP_CLASS)
5752 {
5753 add_jump(compiler, backtracks, jump[0]);
5754 jump[0] = NULL;
5755 }
5756 #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
5757
5758 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
5759 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
5760 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
5761 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
5762 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
5763 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
5764
5765 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
5766 if (jump[0] != NULL)
5767 JUMPHERE(jump[0]);
5768 #endif
5769
5770 return cc + 32 / sizeof(pcre_uchar);
5771
5772 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5773 case OP_XCLASS:
5774 compile_xclass_matchingpath(common, cc + LINK_SIZE, backtracks);
5775 return cc + GET(cc, 0) - 1;
5776 #endif
5777
5778 case OP_REVERSE:
5779 length = GET(cc, 0);
5780 if (length == 0)
5781 return cc + LINK_SIZE;
5782 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5783 #ifdef SUPPORT_UTF
5784 if (common->utf)
5785 {
5786 OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
5787 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, length);
5788 label = LABEL();
5789 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP3, 0));
5790 skip_char_back(common);
5791 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
5792 JUMPTO(SLJIT_C_NOT_ZERO, label);
5793 }
5794 else
5795 #endif
5796 {
5797 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
5798 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
5799 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, TMP1, 0));
5800 }
5801 check_start_used_ptr(common);
5802 return cc + LINK_SIZE;
5803 }
5804 SLJIT_ASSERT_STOP();
5805 return cc;
5806 }
5807
5808 static SLJIT_INLINE pcre_uchar *compile_charn_matchingpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, jump_list **backtracks)
5809 {
5810 /* This function consumes at least one input character. */
5811 /* To decrease the number of length checks, we try to concatenate the fixed length character sequences. */
5812 DEFINE_COMPILER;
5813 pcre_uchar *ccbegin = cc;
5814 compare_context context;
5815 int size;
5816
5817 context.length = 0;
5818 do
5819 {
5820 if (cc >= ccend)
5821 break;
5822
5823 if (*cc == OP_CHAR)
5824 {
5825 size = 1;
5826 #ifdef SUPPORT_UTF
5827 if (common->utf && HAS_EXTRALEN(cc[1]))
5828 size += GET_EXTRALEN(cc[1]);
5829 #endif
5830 }
5831 else if (*cc == OP_CHARI)
5832 {
5833 size = 1;
5834 #ifdef SUPPORT_UTF
5835 if (common->utf)
5836 {
5837 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
5838 size = 0;
5839 else if (HAS_EXTRALEN(cc[1]))
5840 size += GET_EXTRALEN(cc[1]);
5841 }
5842 else
5843 #endif
5844 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
5845 size = 0;
5846 }
5847 else
5848 size = 0;
5849
5850 cc += 1 + size;
5851 context.length += IN_UCHARS(size);
5852 }
5853 while (size > 0 && context.length <= 128);
5854
5855 cc = ccbegin;
5856 if (context.length > 0)
5857 {
5858 /* We have a fixed-length byte sequence. */
5859 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, context.length);
5860 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
5861
5862 context.sourcereg = -1;
5863 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
5864 context.ucharptr = 0;
5865 #endif
5866 do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, backtracks); while (context.length > 0);
5867 return cc;
5868 }
5869
5870 /* A non-fixed length character will be checked if length == 0. */
5871 return compile_char1_matchingpath(common, *cc, cc + 1, backtracks);
5872 }
5873
5874 /* Forward definitions. */
5875 static void compile_matchingpath(compiler_common *, pcre_uchar *, pcre_uchar *, backtrack_common *);
5876 static void compile_backtrackingpath(compiler_common *, struct backtrack_common *);
5877
5878 #define PUSH_BACKTRACK(size, ccstart, error) \
5879 do \
5880 { \
5881 backtrack = sljit_alloc_memory(compiler, (size)); \
5882 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
5883 return error; \
5884 memset(backtrack, 0, size); \
5885 backtrack->prev = parent->top; \
5886 backtrack->cc = (ccstart); \
5887 parent->top = backtrack; \
5888 } \
5889 while (0)
5890
5891 #define PUSH_BACKTRACK_NOVALUE(size, ccstart) \
5892 do \
5893 { \
5894 backtrack = sljit_alloc_memory(compiler, (size)); \
5895 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
5896 return; \
5897 memset(backtrack, 0, size); \
5898 backtrack->prev = parent->top; \
5899 backtrack->cc = (ccstart); \
5900 parent->top = backtrack; \
5901 } \
5902 while (0)
5903
5904 #define BACKTRACK_AS(type) ((type *)backtrack)
5905
5906 static void compile_dnref_search(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
5907 {
5908 /* The OVECTOR offset goes to TMP2. */
5909 DEFINE_COMPILER;
5910 int count = GET2(cc, 1 + IMM2_SIZE);
5911 pcre_uchar *slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
5912 unsigned int offset;
5913 jump_list *found = NULL;
5914
5915 SLJIT_ASSERT(*cc == OP_DNREF || *cc == OP_DNREFI);
5916
5917 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
5918
5919 count--;
5920 while (count-- > 0)
5921 {
5922 offset = GET2(slot, 0) << 1;
5923 GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
5924 add_jump(compiler, &found, CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0));
5925 slot += common->name_entry_size;
5926 }
5927
5928 offset = GET2(slot, 0) << 1;
5929 GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
5930 if (backtracks != NULL && !common->jscript_compat)
5931 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0));
5932
5933 set_jumps(found, LABEL());
5934 }
5935
5936 static void compile_ref_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks, BOOL withchecks, BOOL emptyfail)
5937 {
5938 DEFINE_COMPILER;
5939 BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
5940 int offset = 0;
5941 struct sljit_jump *jump = NULL;
5942 struct sljit_jump *partial;
5943 struct sljit_jump *nopartial;
5944
5945 if (ref)
5946 {
5947 offset = GET2(cc, 1) << 1;
5948 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
5949 /* OVECTOR(1) contains the "string begin - 1" constant. */
5950 if (withchecks && !common->jscript_compat)
5951 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
5952 }
5953 else
5954 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
5955
5956 #if defined SUPPORT_UTF && defined SUPPORT_UCP
5957 if (common->utf && *cc == OP_REFI)
5958 {
5959 SLJIT_ASSERT(TMP1 == SLJIT_SCRATCH_REG1 && STACK_TOP == SLJIT_SCRATCH_REG2 && TMP2 == SLJIT_SCRATCH_REG3);
5960 if (ref)
5961 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
5962 else
5963 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
5964
5965 if (withchecks)
5966 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, TMP2, 0);
5967
5968 /* Needed to save important temporary registers. */
5969 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
5970 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, ARGUMENTS, 0);
5971 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, uchar_ptr), STR_PTR, 0);
5972 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_utf_caselesscmp));
5973 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
5974 if (common->mode == JIT_COMPILE)
5975 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1));
5976 else
5977 {
5978 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
5979 nopartial = CMP(SLJIT_C_NOT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
5980 check_partial(common, FALSE);
5981 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5982 JUMPHERE(nopartial);
5983 }
5984 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
5985 }
5986 else
5987 #endif /* SUPPORT_UTF && SUPPORT_UCP */
5988 {
5989 if (ref)
5990 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP1, 0);
5991 else
5992 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
5993
5994 if (withchecks)
5995 jump = JUMP(SLJIT_C_ZERO);
5996
5997 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
5998 partial = CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0);
5999 if (common->mode == JIT_COMPILE)
6000 add_jump(compiler, backtracks, partial);
6001
6002 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
6003 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
6004
6005 if (common->mode != JIT_COMPILE)
6006 {
6007 nopartial = JUMP(SLJIT_JUMP);
6008 JUMPHERE(partial);
6009 /* TMP2 -= STR_END - STR_PTR */
6010 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, STR_PTR, 0);
6011 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, STR_END, 0);
6012 partial = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0);
6013 OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
6014 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
6015 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
6016 JUMPHERE(partial);
6017 check_partial(common, FALSE);
6018 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
6019 JUMPHERE(nopartial);
6020 }
6021 }
6022
6023 if (jump != NULL)
6024 {
6025 if (emptyfail)
6026 add_jump(compiler, backtracks, jump);
6027 else
6028 JUMPHERE(jump);
6029 }
6030 }
6031
6032 static SLJIT_INLINE pcre_uchar *compile_ref_iterator_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6033 {
6034 DEFINE_COMPILER;
6035 BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
6036 backtrack_common *backtrack;
6037 pcre_uchar type;
6038 int offset = 0;
6039 struct sljit_label *label;
6040 struct sljit_jump *zerolength;
6041 struct sljit_jump *jump = NULL;
6042 pcre_uchar *ccbegin = cc;
6043 int min = 0, max = 0;
6044 BOOL minimize;
6045
6046 PUSH_BACKTRACK(sizeof(iterator_backtrack), cc, NULL);
6047
6048 if (ref)
6049 offset = GET2(cc, 1) << 1;
6050 else
6051 cc += IMM2_SIZE;
6052 type = cc[1 + IMM2_SIZE];
6053
6054 SLJIT_COMPILE_ASSERT((OP_CRSTAR & 0x1) == 0, crstar_opcode_must_be_even);
6055 minimize = (type & 0x1) != 0;
6056 switch(type)
6057 {
6058 case OP_CRSTAR:
6059 case OP_CRMINSTAR:
6060 min = 0;
6061 max = 0;
6062 cc += 1 + IMM2_SIZE + 1;
6063 break;
6064 case OP_CRPLUS:
6065 case OP_CRMINPLUS:
6066 min = 1;
6067 max = 0;
6068 cc += 1 + IMM2_SIZE + 1;
6069 break;
6070 case OP_CRQUERY:
6071 case OP_CRMINQUERY:
6072 min = 0;
6073 max = 1;
6074 cc += 1 + IMM2_SIZE + 1;
6075 break;
6076 case OP_CRRANGE:
6077 case OP_CRMINRANGE:
6078 min = GET2(cc, 1 + IMM2_SIZE + 1);
6079 max = GET2(cc, 1 + IMM2_SIZE + 1 + IMM2_SIZE);
6080 cc += 1 + IMM2_SIZE + 1 + 2 * IMM2_SIZE;
6081 break;
6082 default:
6083 SLJIT_ASSERT_STOP();
6084 break;
6085 }
6086
6087 if (!minimize)
6088 {
6089 if (min == 0)
6090 {
6091 allocate_stack(common, 2);
6092 if (ref)
6093 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
6094 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6095 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
6096 /* Temporary release of STR_PTR. */
6097 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
6098 /* Handles both invalid and empty cases. Since the minimum repeat,
6099 is zero the invalid case is basically the same as an empty case. */
6100 if (ref)
6101 zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
6102 else
6103 {
6104 compile_dnref_search(common, ccbegin, NULL);
6105 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
6106 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, TMP2, 0);
6107 zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
6108 }
6109 /* Restore if not zero length. */
6110 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
6111 }
6112 else
6113 {
6114 allocate_stack(common, 1);
6115 if (ref)
6116 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
6117 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6118 if (ref)
6119 {
6120 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
6121 zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
6122 }
6123 else
6124 {
6125 compile_dnref_search(common, ccbegin, &backtrack->topbacktracks);
6126 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
6127 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, TMP2, 0);
6128 zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
6129 }
6130 }
6131
6132 if (min > 1 || max > 1)
6133 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 0);
6134
6135 label = LABEL();
6136 if (!ref)
6137 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);
6138 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, FALSE, FALSE);
6139
6140 if (min > 1 || max > 1)
6141 {
6142 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
6143 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
6144 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0);
6145 if (min > 1)
6146 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, label);
6147 if (max > 1)
6148 {
6149 jump = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max);
6150 allocate_stack(common, 1);
6151 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6152 JUMPTO(SLJIT_JUMP, label);
6153 JUMPHERE(jump);
6154 }
6155 }
6156
6157 if (max == 0)
6158 {
6159 /* Includes min > 1 case as well. */
6160 allocate_stack(common, 1);
6161 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6162 JUMPTO(SLJIT_JUMP, label);
6163 }
6164
6165 JUMPHERE(zerolength);
6166 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
6167
6168 count_match(common);
6169 return cc;
6170 }
6171
6172 allocate_stack(common, ref ? 2 : 3);
6173 if (ref)
6174 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
6175 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6176 if (type != OP_CRMINSTAR)
6177 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
6178
6179 if (min == 0)
6180 {
6181 /* Handles both invalid and empty cases. Since the minimum repeat,
6182 is zero the invalid case is basically the same as an empty case. */
6183 if (ref)
6184 zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
6185 else
6186 {
6187 compile_dnref_search(common, ccbegin, NULL);
6188 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
6189 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
6190 zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
6191 }
6192 /* Length is non-zero, we can match real repeats. */
6193 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6194 jump = JUMP(SLJIT_JUMP);
6195 }
6196 else
6197 {
6198 if (ref)
6199 {
6200 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
6201 zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
6202 }
6203 else
6204 {
6205 compile_dnref_search(common, ccbegin, &backtrack->topbacktracks);
6206 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
6207 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
6208 zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
6209 }
6210 }
6211
6212 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
6213 if (max > 0)
6214 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max));
6215
6216 if (!ref)
6217 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
6218 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, TRUE, TRUE);
6219 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6220
6221 if (min > 1)
6222 {
6223 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
6224 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
6225 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
6226 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, BACKTRACK_AS(iterator_backtrack)->matchingpath);
6227 }
6228 else if (max > 0)
6229 OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
6230
6231 if (jump != NULL)
6232 JUMPHERE(jump);
6233 JUMPHERE(zerolength);
6234
6235 count_match(common);
6236 return cc;
6237 }
6238
6239 static SLJIT_INLINE pcre_uchar *compile_recurse_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6240 {
6241 DEFINE_COMPILER;
6242 backtrack_common *backtrack;
6243 recurse_entry *entry = common->entries;
6244 recurse_entry *prev = NULL;
6245 sljit_sw start = GET(cc, 1);
6246 pcre_uchar *start_cc;
6247 BOOL needs_control_head;
6248
6249 PUSH_BACKTRACK(sizeof(recurse_backtrack), cc, NULL);
6250
6251 /* Inlining simple patterns. */
6252 if (get_framesize(common, common->start + start, NULL, TRUE, &needs_control_head) == no_stack)
6253 {
6254 start_cc = common->start + start;
6255 compile_matchingpath(common, next_opcode(common, start_cc), bracketend(start_cc) - (1 + LINK_SIZE), backtrack);
6256 BACKTRACK_AS(recurse_backtrack)->inlined_pattern = TRUE;
6257 return cc + 1 + LINK_SIZE;
6258 }
6259
6260 while (entry != NULL)
6261 {
6262 if (entry->start == start)
6263 break;
6264 prev = entry;
6265 entry = entry->next;
6266 }
6267
6268 if (entry == NULL)
6269 {
6270 entry = sljit_alloc_memory(compiler, sizeof(recurse_entry));
6271 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6272 return NULL;
6273 entry->next = NULL;
6274 entry->entry = NULL;
6275 entry->calls = NULL;
6276 entry->start = start;
6277
6278 if (prev != NULL)
6279 prev->next = entry;
6280 else
6281 common->entries = entry;
6282 }
6283
6284 if (common->has_set_som && common->mark_ptr != 0)
6285 {
6286 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
6287 allocate_stack(common, 2);
6288 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
6289 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
6290 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
6291 }
6292 else if (common->has_set_som || common->mark_ptr != 0)
6293 {
6294 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->has_set_som ? (int)(OVECTOR(0)) : common->mark_ptr);
6295 allocate_stack(common, 1);
6296 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
6297 }
6298
6299 if (entry->entry == NULL)
6300 add_jump(compiler, &entry->calls, JUMP(SLJIT_FAST_CALL));
6301 else
6302 JUMPTO(SLJIT_FAST_CALL, entry->entry);
6303 /* Leave if the match is failed. */
6304 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0));
6305 return cc + 1 + LINK_SIZE;
6306 }
6307
6308 static int SLJIT_CALL do_callout(struct jit_arguments* arguments, PUBL(callout_block) *callout_block, pcre_uchar **jit_ovector)
6309 {
6310 const pcre_uchar *begin = arguments->begin;
6311 int *offset_vector = arguments->offsets;
6312 int offset_count = arguments->offset_count;
6313 int i;
6314
6315 if (PUBL(callout) == NULL)
6316 return 0;
6317
6318 callout_block->version = 2;
6319 callout_block->callout_data = arguments->callout_data;
6320
6321 /* Offsets in subject. */
6322 callout_block->subject_length = arguments->end - arguments->begin;
6323 callout_block->start_match = (pcre_uchar*)callout_block->subject - arguments->begin;
6324 callout_block->current_position = (pcre_uchar*)callout_block->offset_vector - arguments->begin;
6325 #if defined COMPILE_PCRE8
6326 callout_block->subject = (PCRE_SPTR)begin;
6327 #elif defined COMPILE_PCRE16
6328 callout_block->subject = (PCRE_SPTR16)begin;
6329 #elif defined COMPILE_PCRE32
6330 callout_block->subject = (PCRE_SPTR32)begin;
6331 #endif
6332
6333 /* Convert and copy the JIT offset vector to the offset_vector array. */
6334 callout_block->capture_top = 0;
6335 callout_block->offset_vector = offset_vector;
6336 for (i = 2; i < offset_count; i += 2)
6337 {
6338 offset_vector[i] = jit_ovector[i] - begin;
6339 offset_vector[i + 1] = jit_ovector[i + 1] - begin;
6340 if (jit_ovector[i] >= begin)
6341 callout_block->capture_top = i;
6342 }
6343
6344 callout_block->capture_top = (callout_block->capture_top >> 1) + 1;
6345 if (offset_count > 0)
6346 offset_vector[0] = -1;
6347 if (offset_count > 1)
6348 offset_vector[1] = -1;
6349 return (*PUBL(callout))(callout_block);
6350 }
6351
6352 /* Aligning to 8 byte. */
6353 #define CALLOUT_ARG_SIZE \
6354 (((int)sizeof(PUBL(callout_block)) + 7) & ~7)
6355
6356 #define CALLOUT_ARG_OFFSET(arg) \
6357 (-CALLOUT_ARG_SIZE + SLJIT_OFFSETOF(PUBL(callout_block), arg))
6358
6359 static SLJIT_INLINE pcre_uchar *compile_callout_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6360 {
6361 DEFINE_COMPILER;
6362 backtrack_common *backtrack;
6363
6364 PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
6365
6366 allocate_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw));
6367
6368 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
6369 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6370 SLJIT_ASSERT(common->capture_last_ptr != 0);
6371 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_number), SLJIT_IMM, cc[1]);
6372 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_last), TMP2, 0);
6373
6374 /* These pointer sized fields temporarly stores internal variables. */
6375 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
6376 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(offset_vector), STR_PTR, 0);
6377 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(subject), TMP2, 0);
6378
6379 if (common->mark_ptr != 0)
6380 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr));
6381 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(pattern_position), SLJIT_IMM, GET(cc, 2));
6382 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(next_item_length), SLJIT_IMM, GET(cc, 2 + LINK_SIZE));
6383 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(mark), (common->mark_ptr != 0) ? TMP2 : SLJIT_IMM, 0);
6384
6385 /* Needed to save important temporary registers. */
6386 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
6387 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG2, 0, STACK_TOP, 0, SLJIT_IMM, CALLOUT_ARG_SIZE);
6388 GET_LOCAL_BASE(SLJIT_SCRATCH_REG3, 0, OVECTOR_START);
6389 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_callout));
6390 OP1(SLJIT_MOV_SI, SLJIT_RETURN_REG, 0, SLJIT_RETURN_REG, 0);
6391 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
6392 free_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw));
6393
6394 /* Check return value. */
6395 OP2(SLJIT_SUB | SLJIT_SET_S, SLJIT_UNUSED, 0, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
6396 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_C_SIG_GREATER));
6397 if (common->forced_quit_label == NULL)
6398 add_jump(compiler, &common->forced_quit, JUMP(SLJIT_C_SIG_LESS));
6399 else
6400 JUMPTO(SLJIT_C_SIG_LESS, common->forced_quit_label);
6401 return cc + 2 + 2 * LINK_SIZE;
6402 }
6403
6404 #undef CALLOUT_ARG_SIZE
6405 #undef CALLOUT_ARG_OFFSET
6406
6407 static pcre_uchar *compile_assert_matchingpath(compiler_common *common, pcre_uchar *cc, assert_backtrack *backtrack, BOOL conditional)
6408 {
6409 DEFINE_COMPILER;
6410 int framesize;
6411 int extrasize;
6412 BOOL needs_control_head;
6413 int private_data_ptr;
6414 backtrack_common altbacktrack;
6415 pcre_uchar *ccbegin;
6416 pcre_uchar opcode;
6417 pcre_uchar bra = OP_BRA;
6418 jump_list *tmp = NULL;
6419 jump_list **target = (conditional) ? &backtrack->condfailed : &backtrack->common.topbacktracks;
6420 jump_list **found;
6421 /* Saving previous accept variables. */
6422 BOOL save_local_exit = common->local_exit;
6423 BOOL save_positive_assert = common->positive_assert;
6424 then_trap_backtrack *save_then_trap = common->then_trap;
6425 struct sljit_label *save_quit_label = common->quit_label;
6426 struct sljit_label *save_accept_label = common->accept_label;
6427 jump_list *save_quit = common->quit;
6428 jump_list *save_positive_assert_quit = common->positive_assert_quit;
6429 jump_list *save_accept = common->accept;
6430 struct sljit_jump *jump;
6431 struct sljit_jump *brajump = NULL;
6432
6433 /* Assert captures then. */
6434 common->then_trap = NULL;
6435
6436 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
6437 {
6438 SLJIT_ASSERT(!conditional);
6439 bra = *cc;
6440 cc++;
6441 }
6442 private_data_ptr = PRIVATE_DATA(cc);
6443 SLJIT_ASSERT(private_data_ptr != 0);
6444 framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
6445 backtrack->framesize = framesize;
6446 backtrack->private_data_ptr = private_data_ptr;
6447 opcode = *cc;
6448 SLJIT_ASSERT(opcode >= OP_ASSERT && opcode <= OP_ASSERTBACK_NOT);
6449 found = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) ? &tmp : target;
6450 ccbegin = cc;
6451 cc += GET(cc, 1);
6452
6453 if (bra == OP_BRAMINZERO)
6454 {
6455 /* This is a braminzero backtrack path. */
6456 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6457 free_stack(common, 1);
6458 brajump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
6459 }
6460
6461 if (framesize < 0)
6462 {
6463 extrasize = needs_control_head ? 2 : 1;
6464 if (framesize == no_frame)
6465 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);
6466 allocate_stack(common, extrasize);
6467 if (needs_control_head)
6468 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
6469 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6470 if (needs_control_head)
6471 {
6472 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_IMM, 0);
6473 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
6474 }
6475 }
6476 else
6477 {
6478 extrasize = needs_control_head ? 3 : 2;
6479 allocate_stack(common, framesize + extrasize);
6480 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6481 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw));
6482 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0);
6483 if (needs_control_head)
6484 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
6485 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6486 if (needs_control_head)
6487 {
6488 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
6489 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
6490 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_IMM, 0);
6491 }
6492 else
6493 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
6494 init_frame(common, ccbegin, NULL, framesize + extrasize - 1, extrasize, FALSE);
6495 }
6496
6497 memset(&altbacktrack, 0, sizeof(backtrack_common));
6498 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6499 {
6500 /* Negative assert is stronger than positive assert. */
6501 common->local_exit = TRUE;
6502 common->quit_label = NULL;
6503 common->quit = NULL;
6504 common->positive_assert = FALSE;
6505 }
6506 else
6507 common->positive_assert = TRUE;
6508 common->positive_assert_quit = NULL;
6509
6510 while (1)
6511 {
6512 common->accept_label = NULL;
6513 common->accept = NULL;
6514 altbacktrack.top = NULL;
6515 altbacktrack.topbacktracks = NULL;
6516
6517 if (*ccbegin == OP_ALT)
6518 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6519
6520 altbacktrack.cc = ccbegin;
6521 compile_matchingpath(common, ccbegin + 1 + LINK_SIZE, cc, &altbacktrack);
6522 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6523 {
6524 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6525 {
6526 common->local_exit = save_local_exit;
6527 common->quit_label = save_quit_label;
6528 common->quit = save_quit;
6529 }
6530 common->positive_assert = save_positive_assert;
6531 common->then_trap = save_then_trap;
6532 common->accept_label = save_accept_label;
6533 common->positive_assert_quit = save_positive_assert_quit;
6534 common->accept = save_accept;
6535 return NULL;
6536 }
6537 common->accept_label = LABEL();
6538 if (common->accept != NULL)
6539 set_jumps(common->accept, common->accept_label);
6540
6541 /* Reset stack. */
6542 if (framesize < 0)
6543 {
6544 if (framesize == no_frame)
6545 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6546 else
6547 free_stack(common, extrasize);
6548 if (needs_control_head)
6549 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), 0);
6550 }
6551 else
6552 {
6553 if ((opcode != OP_ASSERT_NOT && opcode != OP_ASSERTBACK_NOT) || conditional)
6554 {
6555 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
6556 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
6557 if (needs_control_head)
6558 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), 0);
6559 }
6560 else
6561 {
6562 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6563 if (needs_control_head)
6564 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), (framesize + 1) * sizeof(sljit_sw));
6565 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6566 }
6567 }
6568
6569 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6570 {
6571 /* We know that STR_PTR was stored on the top of the stack. */
6572 if (conditional)
6573 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), needs_control_head ? sizeof(sljit_sw) : 0);
6574 else if (bra == OP_BRAZERO)
6575 {
6576 if (framesize < 0)
6577 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 1) * sizeof(sljit_sw));
6578 else
6579 {
6580 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
6581 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (framesize + extrasize - 1) * sizeof(sljit_sw));
6582 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
6583 }
6584 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
6585 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6586 }
6587 else if (framesize >= 0)
6588 {
6589 /* For OP_BRA and OP_BRAMINZERO. */
6590 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
6591 }
6592 }
6593 add_jump(compiler, found, JUMP(SLJIT_JUMP));
6594
6595 compile_backtrackingpath(common, altbacktrack.top);
6596 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6597 {
6598 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6599 {
6600 common->local_exit = save_local_exit;
6601 common->quit_label = save_quit_label;
6602 common->quit = save_quit;
6603 }
6604 common->positive_assert = save_positive_assert;
6605 common->then_trap = save_then_trap;
6606 common->accept_label = save_accept_label;
6607 common->positive_assert_quit = save_positive_assert_quit;
6608 common->accept = save_accept;
6609 return NULL;
6610 }
6611 set_jumps(altbacktrack.topbacktracks, LABEL());
6612
6613 if (*cc != OP_ALT)
6614 break;
6615
6616 ccbegin = cc;
6617 cc += GET(cc, 1);
6618 }
6619
6620 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6621 {
6622 SLJIT_ASSERT(common->positive_assert_quit == NULL);
6623 /* Makes the check less complicated below. */
6624 common->positive_assert_quit = common->quit;
6625 }
6626
6627 /* None of them matched. */
6628 if (common->positive_assert_quit != NULL)
6629 {
6630 jump = JUMP(SLJIT_JUMP);
6631 set_jumps(common->positive_assert_quit, LABEL());
6632 SLJIT_ASSERT(framesize != no_stack);
6633 if (framesize < 0)
6634 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, extrasize * sizeof(sljit_sw));
6635 else
6636 {
6637 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6638 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6639 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw));
6640 }
6641 JUMPHERE(jump);
6642 }
6643
6644 if (needs_control_head)
6645 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(1));
6646
6647 if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK)
6648 {
6649 /* Assert is failed. */
6650 if (conditional || bra == OP_BRAZERO)
6651 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6652
6653 if (framesize < 0)
6654 {
6655 /* The topmost item should be 0. */
6656 if (bra == OP_BRAZERO)
6657 {
6658 if (extrasize == 2)
6659 free_stack(common, 1);
6660 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6661 }
6662 else
6663 free_stack(common, extrasize);
6664 }
6665 else
6666 {
6667 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
6668 /* The topmost item should be 0. */
6669 if (bra == OP_BRAZERO)
6670 {
6671 free_stack(common, framesize + extrasize - 1);
6672 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6673 }
6674 else
6675 free_stack(common, framesize + extrasize);
6676 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
6677 }
6678 jump = JUMP(SLJIT_JUMP);
6679 if (bra != OP_BRAZERO)
6680 add_jump(compiler, target, jump);
6681
6682 /* Assert is successful. */
6683 set_jumps(tmp, LABEL());
6684 if (framesize < 0)
6685 {
6686 /* We know that STR_PTR was stored on the top of the stack. */
6687 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 1) * sizeof(sljit_sw));
6688 /* Keep the STR_PTR on the top of the stack. */
6689 if (bra == OP_BRAZERO)
6690 {
6691 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
6692 if (extrasize == 2)
6693 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6694 }
6695 else if (bra == OP_BRAMINZERO)
6696 {
6697 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
6698 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6699 }
6700 }
6701 else
6702 {
6703 if (bra == OP_BRA)
6704 {
6705 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
6706 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
6707 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 2) * sizeof(sljit_sw));
6708 }
6709 else
6710 {
6711 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
6712 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 2) * sizeof(sljit_sw));
6713 if (extrasize == 2)
6714 {
6715 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6716 if (bra == OP_BRAMINZERO)
6717 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6718 }
6719 else
6720 {
6721 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
6722 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), bra == OP_BRAZERO ? STR_PTR : SLJIT_IMM, 0);
6723 }
6724 }
6725 }
6726
6727 if (bra == OP_BRAZERO)
6728 {
6729 backtrack->matchingpath = LABEL();
6730 SET_LABEL(jump, backtrack->matchingpath);
6731 }
6732 else if (bra == OP_BRAMINZERO)
6733 {
6734 JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
6735 JUMPHERE(brajump);
6736 if (framesize >= 0)
6737 {
6738 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6739 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6740 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
6741 }
6742 set_jumps(backtrack->common.topbacktracks, LABEL());
6743 }
6744 }
6745 else
6746 {
6747 /* AssertNot is successful. */
6748 if (framesize < 0)
6749 {
6750 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6751 if (bra != OP_BRA)
6752 {
6753 if (extrasize == 2)
6754 free_stack(common, 1);
6755 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6756 }
6757 else
6758 free_stack(common, extrasize);
6759 }
6760