/[pcre]/code/trunk/pcre_jit_compile.c
ViewVC logotype

Contents of /code/trunk/pcre_jit_compile.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1626 - (show annotations)
Mon Feb 8 09:29:08 2016 UTC (3 years, 8 months ago) by zherczeg
File MIME type: text/plain
File size: 333741 byte(s)
The needs_start_ptr variable is removed from the JIT compiler.
1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Written by Philip Hazel
9 Copyright (c) 1997-2013 University of Cambridge
10
11 The machine code generator part (this module) was written by Zoltan Herczeg
12 Copyright (c) 2010-2013
13
14 -----------------------------------------------------------------------------
15 Redistribution and use in source and binary forms, with or without
16 modification, are permitted provided that the following conditions are met:
17
18 * Redistributions of source code must retain the above copyright notice,
19 this list of conditions and the following disclaimer.
20
21 * Redistributions in binary form must reproduce the above copyright
22 notice, this list of conditions and the following disclaimer in the
23 documentation and/or other materials provided with the distribution.
24
25 * Neither the name of the University of Cambridge nor the names of its
26 contributors may be used to endorse or promote products derived from
27 this software without specific prior written permission.
28
29 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 POSSIBILITY OF SUCH DAMAGE.
40 -----------------------------------------------------------------------------
41 */
42
43 #ifdef HAVE_CONFIG_H
44 #include "config.h"
45 #endif
46
47 #include "pcre_internal.h"
48
49 #if defined SUPPORT_JIT
50
51 /* All-in-one: Since we use the JIT compiler only from here,
52 we just include it. This way we don't need to touch the build
53 system files. */
54
55 #define SLJIT_MALLOC(size, allocator_data) (PUBL(malloc))(size)
56 #define SLJIT_FREE(ptr, allocator_data) (PUBL(free))(ptr)
57 #define SLJIT_CONFIG_AUTO 1
58 #define SLJIT_CONFIG_STATIC 1
59 #define SLJIT_VERBOSE 0
60 #define SLJIT_DEBUG 0
61
62 #include "sljit/sljitLir.c"
63
64 #if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED
65 #error Unsupported architecture
66 #endif
67
68 /* Defines for debugging purposes. */
69
70 /* 1 - Use unoptimized capturing brackets.
71 2 - Enable capture_last_ptr (includes option 1). */
72 /* #define DEBUG_FORCE_UNOPTIMIZED_CBRAS 2 */
73
74 /* 1 - Always have a control head. */
75 /* #define DEBUG_FORCE_CONTROL_HEAD 1 */
76
77 /* Allocate memory for the regex stack on the real machine stack.
78 Fast, but limited size. */
79 #define MACHINE_STACK_SIZE 32768
80
81 /* Growth rate for stack allocated by the OS. Should be the multiply
82 of page size. */
83 #define STACK_GROWTH_RATE 8192
84
85 /* Enable to check that the allocation could destroy temporaries. */
86 #if defined SLJIT_DEBUG && SLJIT_DEBUG
87 #define DESTROY_REGISTERS 1
88 #endif
89
90 /*
91 Short summary about the backtracking mechanism empolyed by the jit code generator:
92
93 The code generator follows the recursive nature of the PERL compatible regular
94 expressions. The basic blocks of regular expressions are condition checkers
95 whose execute different commands depending on the result of the condition check.
96 The relationship between the operators can be horizontal (concatenation) and
97 vertical (sub-expression) (See struct backtrack_common for more details).
98
99 'ab' - 'a' and 'b' regexps are concatenated
100 'a+' - 'a' is the sub-expression of the '+' operator
101
102 The condition checkers are boolean (true/false) checkers. Machine code is generated
103 for the checker itself and for the actions depending on the result of the checker.
104 The 'true' case is called as the matching path (expected path), and the other is called as
105 the 'backtrack' path. Branch instructions are expesive for all CPUs, so we avoid taken
106 branches on the matching path.
107
108 Greedy star operator (*) :
109 Matching path: match happens.
110 Backtrack path: match failed.
111 Non-greedy star operator (*?) :
112 Matching path: no need to perform a match.
113 Backtrack path: match is required.
114
115 The following example shows how the code generated for a capturing bracket
116 with two alternatives. Let A, B, C, D are arbirary regular expressions, and
117 we have the following regular expression:
118
119 A(B|C)D
120
121 The generated code will be the following:
122
123 A matching path
124 '(' matching path (pushing arguments to the stack)
125 B matching path
126 ')' matching path (pushing arguments to the stack)
127 D matching path
128 return with successful match
129
130 D backtrack path
131 ')' backtrack path (If we arrived from "C" jump to the backtrack of "C")
132 B backtrack path
133 C expected path
134 jump to D matching path
135 C backtrack path
136 A backtrack path
137
138 Notice, that the order of backtrack code paths are the opposite of the fast
139 code paths. In this way the topmost value on the stack is always belong
140 to the current backtrack code path. The backtrack path must check
141 whether there is a next alternative. If so, it needs to jump back to
142 the matching path eventually. Otherwise it needs to clear out its own stack
143 frame and continue the execution on the backtrack code paths.
144 */
145
146 /*
147 Saved stack frames:
148
149 Atomic blocks and asserts require reloading the values of private data
150 when the backtrack mechanism performed. Because of OP_RECURSE, the data
151 are not necessarly known in compile time, thus we need a dynamic restore
152 mechanism.
153
154 The stack frames are stored in a chain list, and have the following format:
155 ([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]
156
157 Thus we can restore the private data to a particular point in the stack.
158 */
159
160 typedef struct jit_arguments {
161 /* Pointers first. */
162 struct sljit_stack *stack;
163 const pcre_uchar *str;
164 const pcre_uchar *begin;
165 const pcre_uchar *end;
166 int *offsets;
167 pcre_uchar *uchar_ptr;
168 pcre_uchar *mark_ptr;
169 void *callout_data;
170 /* Everything else after. */
171 pcre_uint32 limit_match;
172 int real_offset_count;
173 int offset_count;
174 pcre_uint8 notbol;
175 pcre_uint8 noteol;
176 pcre_uint8 notempty;
177 pcre_uint8 notempty_atstart;
178 } jit_arguments;
179
180 typedef struct executable_functions {
181 void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES];
182 void *read_only_data_heads[JIT_NUMBER_OF_COMPILE_MODES];
183 sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];
184 PUBL(jit_callback) callback;
185 void *userdata;
186 pcre_uint32 top_bracket;
187 pcre_uint32 limit_match;
188 } executable_functions;
189
190 typedef struct jump_list {
191 struct sljit_jump *jump;
192 struct jump_list *next;
193 } jump_list;
194
195 typedef struct stub_list {
196 struct sljit_jump *start;
197 struct sljit_label *quit;
198 struct stub_list *next;
199 } stub_list;
200
201 typedef struct label_addr_list {
202 struct sljit_label *label;
203 sljit_uw *update_addr;
204 struct label_addr_list *next;
205 } label_addr_list;
206
207 enum frame_types {
208 no_frame = -1,
209 no_stack = -2
210 };
211
212 enum control_types {
213 type_mark = 0,
214 type_then_trap = 1
215 };
216
217 typedef int (SLJIT_CALL *jit_function)(jit_arguments *args);
218
219 /* The following structure is the key data type for the recursive
220 code generator. It is allocated by compile_matchingpath, and contains
221 the arguments for compile_backtrackingpath. Must be the first member
222 of its descendants. */
223 typedef struct backtrack_common {
224 /* Concatenation stack. */
225 struct backtrack_common *prev;
226 jump_list *nextbacktracks;
227 /* Internal stack (for component operators). */
228 struct backtrack_common *top;
229 jump_list *topbacktracks;
230 /* Opcode pointer. */
231 pcre_uchar *cc;
232 } backtrack_common;
233
234 typedef struct assert_backtrack {
235 backtrack_common common;
236 jump_list *condfailed;
237 /* Less than 0 if a frame is not needed. */
238 int framesize;
239 /* Points to our private memory word on the stack. */
240 int private_data_ptr;
241 /* For iterators. */
242 struct sljit_label *matchingpath;
243 } assert_backtrack;
244
245 typedef struct bracket_backtrack {
246 backtrack_common common;
247 /* Where to coninue if an alternative is successfully matched. */
248 struct sljit_label *alternative_matchingpath;
249 /* For rmin and rmax iterators. */
250 struct sljit_label *recursive_matchingpath;
251 /* For greedy ? operator. */
252 struct sljit_label *zero_matchingpath;
253 /* Contains the branches of a failed condition. */
254 union {
255 /* Both for OP_COND, OP_SCOND. */
256 jump_list *condfailed;
257 assert_backtrack *assert;
258 /* For OP_ONCE. Less than 0 if not needed. */
259 int framesize;
260 } u;
261 /* Points to our private memory word on the stack. */
262 int private_data_ptr;
263 } bracket_backtrack;
264
265 typedef struct bracketpos_backtrack {
266 backtrack_common common;
267 /* Points to our private memory word on the stack. */
268 int private_data_ptr;
269 /* Reverting stack is needed. */
270 int framesize;
271 /* Allocated stack size. */
272 int stacksize;
273 } bracketpos_backtrack;
274
275 typedef struct braminzero_backtrack {
276 backtrack_common common;
277 struct sljit_label *matchingpath;
278 } braminzero_backtrack;
279
280 typedef struct iterator_backtrack {
281 backtrack_common common;
282 /* Next iteration. */
283 struct sljit_label *matchingpath;
284 } iterator_backtrack;
285
286 typedef struct recurse_entry {
287 struct recurse_entry *next;
288 /* Contains the function entry. */
289 struct sljit_label *entry;
290 /* Collects the calls until the function is not created. */
291 jump_list *calls;
292 /* Points to the starting opcode. */
293 sljit_sw start;
294 } recurse_entry;
295
296 typedef struct recurse_backtrack {
297 backtrack_common common;
298 BOOL inlined_pattern;
299 } recurse_backtrack;
300
301 #define OP_THEN_TRAP OP_TABLE_LENGTH
302
303 typedef struct then_trap_backtrack {
304 backtrack_common common;
305 /* If then_trap is not NULL, this structure contains the real
306 then_trap for the backtracking path. */
307 struct then_trap_backtrack *then_trap;
308 /* Points to the starting opcode. */
309 sljit_sw start;
310 /* Exit point for the then opcodes of this alternative. */
311 jump_list *quit;
312 /* Frame size of the current alternative. */
313 int framesize;
314 } then_trap_backtrack;
315
316 #define MAX_RANGE_SIZE 4
317
318 typedef struct compiler_common {
319 /* The sljit ceneric compiler. */
320 struct sljit_compiler *compiler;
321 /* First byte code. */
322 pcre_uchar *start;
323 /* Maps private data offset to each opcode. */
324 sljit_si *private_data_ptrs;
325 /* Chain list of read-only data ptrs. */
326 void *read_only_data_head;
327 /* Tells whether the capturing bracket is optimized. */
328 sljit_ub *optimized_cbracket;
329 /* Tells whether the starting offset is a target of then. */
330 sljit_ub *then_offsets;
331 /* Current position where a THEN must jump. */
332 then_trap_backtrack *then_trap;
333 /* Starting offset of private data for capturing brackets. */
334 sljit_si cbra_ptr;
335 /* Output vector starting point. Must be divisible by 2. */
336 sljit_si ovector_start;
337 /* Points to the starting character of the current match. */
338 sljit_si start_ptr;
339 /* Last known position of the requested byte. */
340 sljit_si req_char_ptr;
341 /* Head of the last recursion. */
342 sljit_si recursive_head_ptr;
343 /* First inspected character for partial matching.
344 (Needed for avoiding zero length partial matches.) */
345 sljit_si start_used_ptr;
346 /* Starting pointer for partial soft matches. */
347 sljit_si hit_start;
348 /* End pointer of the first line. */
349 sljit_si first_line_end;
350 /* Points to the marked string. */
351 sljit_si mark_ptr;
352 /* Recursive control verb management chain. */
353 sljit_si control_head_ptr;
354 /* Points to the last matched capture block index. */
355 sljit_si capture_last_ptr;
356
357 /* Flipped and lower case tables. */
358 const sljit_ub *fcc;
359 sljit_sw lcc;
360 /* Mode can be PCRE_STUDY_JIT_COMPILE and others. */
361 int mode;
362 /* TRUE, when minlength is greater than 0. */
363 BOOL might_be_empty;
364 /* \K is found in the pattern. */
365 BOOL has_set_som;
366 /* (*SKIP:arg) is found in the pattern. */
367 BOOL has_skip_arg;
368 /* (*THEN) is found in the pattern. */
369 BOOL has_then;
370 /* Currently in recurse or negative assert. */
371 BOOL local_exit;
372 /* Currently in a positive assert. */
373 BOOL positive_assert;
374 /* Newline control. */
375 int nltype;
376 sljit_ui nlmax;
377 sljit_ui nlmin;
378 int newline;
379 int bsr_nltype;
380 sljit_ui bsr_nlmax;
381 sljit_ui bsr_nlmin;
382 /* Dollar endonly. */
383 int endonly;
384 /* Tables. */
385 sljit_sw ctypes;
386 /* Named capturing brackets. */
387 pcre_uchar *name_table;
388 sljit_sw name_count;
389 sljit_sw name_entry_size;
390
391 /* Labels and jump lists. */
392 struct sljit_label *partialmatchlabel;
393 struct sljit_label *quit_label;
394 struct sljit_label *forced_quit_label;
395 struct sljit_label *accept_label;
396 struct sljit_label *ff_newline_shortcut;
397 stub_list *stubs;
398 label_addr_list *label_addrs;
399 recurse_entry *entries;
400 recurse_entry *currententry;
401 jump_list *partialmatch;
402 jump_list *quit;
403 jump_list *positive_assert_quit;
404 jump_list *forced_quit;
405 jump_list *accept;
406 jump_list *calllimit;
407 jump_list *stackalloc;
408 jump_list *revertframes;
409 jump_list *wordboundary;
410 jump_list *anynewline;
411 jump_list *hspace;
412 jump_list *vspace;
413 jump_list *casefulcmp;
414 jump_list *caselesscmp;
415 jump_list *reset_match;
416 BOOL jscript_compat;
417 #ifdef SUPPORT_UTF
418 BOOL utf;
419 #ifdef SUPPORT_UCP
420 BOOL use_ucp;
421 #endif
422 #ifdef COMPILE_PCRE8
423 jump_list *utfreadchar;
424 jump_list *utfreadchar16;
425 jump_list *utfreadtype8;
426 #endif
427 #endif /* SUPPORT_UTF */
428 #ifdef SUPPORT_UCP
429 jump_list *getucd;
430 #endif
431 } compiler_common;
432
433 /* For byte_sequence_compare. */
434
435 typedef struct compare_context {
436 int length;
437 int sourcereg;
438 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
439 int ucharptr;
440 union {
441 sljit_si asint;
442 sljit_uh asushort;
443 #if defined COMPILE_PCRE8
444 sljit_ub asbyte;
445 sljit_ub asuchars[4];
446 #elif defined COMPILE_PCRE16
447 sljit_uh asuchars[2];
448 #elif defined COMPILE_PCRE32
449 sljit_ui asuchars[1];
450 #endif
451 } c;
452 union {
453 sljit_si asint;
454 sljit_uh asushort;
455 #if defined COMPILE_PCRE8
456 sljit_ub asbyte;
457 sljit_ub asuchars[4];
458 #elif defined COMPILE_PCRE16
459 sljit_uh asuchars[2];
460 #elif defined COMPILE_PCRE32
461 sljit_ui asuchars[1];
462 #endif
463 } oc;
464 #endif
465 } compare_context;
466
467 /* Undefine sljit macros. */
468 #undef CMP
469
470 /* Used for accessing the elements of the stack. */
471 #define STACK(i) ((-(i) - 1) * (int)sizeof(sljit_sw))
472
473 #define TMP1 SLJIT_R0
474 #define TMP2 SLJIT_R2
475 #define TMP3 SLJIT_R3
476 #define STR_PTR SLJIT_S0
477 #define STR_END SLJIT_S1
478 #define STACK_TOP SLJIT_R1
479 #define STACK_LIMIT SLJIT_S2
480 #define COUNT_MATCH SLJIT_S3
481 #define ARGUMENTS SLJIT_S4
482 #define RETURN_ADDR SLJIT_R4
483
484 /* Local space layout. */
485 /* These two locals can be used by the current opcode. */
486 #define LOCALS0 (0 * sizeof(sljit_sw))
487 #define LOCALS1 (1 * sizeof(sljit_sw))
488 /* Two local variables for possessive quantifiers (char1 cannot use them). */
489 #define POSSESSIVE0 (2 * sizeof(sljit_sw))
490 #define POSSESSIVE1 (3 * sizeof(sljit_sw))
491 /* Max limit of recursions. */
492 #define LIMIT_MATCH (4 * sizeof(sljit_sw))
493 /* The output vector is stored on the stack, and contains pointers
494 to characters. The vector data is divided into two groups: the first
495 group contains the start / end character pointers, and the second is
496 the start pointers when the end of the capturing group has not yet reached. */
497 #define OVECTOR_START (common->ovector_start)
498 #define OVECTOR(i) (OVECTOR_START + (i) * (sljit_sw)sizeof(sljit_sw))
499 #define OVECTOR_PRIV(i) (common->cbra_ptr + (i) * (sljit_sw)sizeof(sljit_sw))
500 #define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start])
501
502 #if defined COMPILE_PCRE8
503 #define MOV_UCHAR SLJIT_MOV_UB
504 #define MOVU_UCHAR SLJIT_MOVU_UB
505 #elif defined COMPILE_PCRE16
506 #define MOV_UCHAR SLJIT_MOV_UH
507 #define MOVU_UCHAR SLJIT_MOVU_UH
508 #elif defined COMPILE_PCRE32
509 #define MOV_UCHAR SLJIT_MOV_UI
510 #define MOVU_UCHAR SLJIT_MOVU_UI
511 #else
512 #error Unsupported compiling mode
513 #endif
514
515 /* Shortcuts. */
516 #define DEFINE_COMPILER \
517 struct sljit_compiler *compiler = common->compiler
518 #define OP1(op, dst, dstw, src, srcw) \
519 sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw))
520 #define OP2(op, dst, dstw, src1, src1w, src2, src2w) \
521 sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w))
522 #define LABEL() \
523 sljit_emit_label(compiler)
524 #define JUMP(type) \
525 sljit_emit_jump(compiler, (type))
526 #define JUMPTO(type, label) \
527 sljit_set_label(sljit_emit_jump(compiler, (type)), (label))
528 #define JUMPHERE(jump) \
529 sljit_set_label((jump), sljit_emit_label(compiler))
530 #define SET_LABEL(jump, label) \
531 sljit_set_label((jump), (label))
532 #define CMP(type, src1, src1w, src2, src2w) \
533 sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))
534 #define CMPTO(type, src1, src1w, src2, src2w, label) \
535 sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))
536 #define OP_FLAGS(op, dst, dstw, src, srcw, type) \
537 sljit_emit_op_flags(compiler, (op), (dst), (dstw), (src), (srcw), (type))
538 #define GET_LOCAL_BASE(dst, dstw, offset) \
539 sljit_get_local_base(compiler, (dst), (dstw), (offset))
540
541 #define READ_CHAR_MAX 0x7fffffff
542
543 static pcre_uchar *bracketend(pcre_uchar *cc)
544 {
545 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
546 do cc += GET(cc, 1); while (*cc == OP_ALT);
547 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
548 cc += 1 + LINK_SIZE;
549 return cc;
550 }
551
552 static int no_alternatives(pcre_uchar *cc)
553 {
554 int count = 0;
555 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
556 do
557 {
558 cc += GET(cc, 1);
559 count++;
560 }
561 while (*cc == OP_ALT);
562 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
563 return count;
564 }
565
566 static int ones_in_half_byte[16] = {
567 /* 0 */ 0, 1, 1, 2, /* 4 */ 1, 2, 2, 3,
568 /* 8 */ 1, 2, 2, 3, /* 12 */ 2, 3, 3, 4
569 };
570
571 /* Functions whose might need modification for all new supported opcodes:
572 next_opcode
573 check_opcode_types
574 set_private_data_ptrs
575 get_framesize
576 init_frame
577 get_private_data_copy_length
578 copy_private_data
579 compile_matchingpath
580 compile_backtrackingpath
581 */
582
583 static pcre_uchar *next_opcode(compiler_common *common, pcre_uchar *cc)
584 {
585 SLJIT_UNUSED_ARG(common);
586 switch(*cc)
587 {
588 case OP_SOD:
589 case OP_SOM:
590 case OP_SET_SOM:
591 case OP_NOT_WORD_BOUNDARY:
592 case OP_WORD_BOUNDARY:
593 case OP_NOT_DIGIT:
594 case OP_DIGIT:
595 case OP_NOT_WHITESPACE:
596 case OP_WHITESPACE:
597 case OP_NOT_WORDCHAR:
598 case OP_WORDCHAR:
599 case OP_ANY:
600 case OP_ALLANY:
601 case OP_NOTPROP:
602 case OP_PROP:
603 case OP_ANYNL:
604 case OP_NOT_HSPACE:
605 case OP_HSPACE:
606 case OP_NOT_VSPACE:
607 case OP_VSPACE:
608 case OP_EXTUNI:
609 case OP_EODN:
610 case OP_EOD:
611 case OP_CIRC:
612 case OP_CIRCM:
613 case OP_DOLL:
614 case OP_DOLLM:
615 case OP_CRSTAR:
616 case OP_CRMINSTAR:
617 case OP_CRPLUS:
618 case OP_CRMINPLUS:
619 case OP_CRQUERY:
620 case OP_CRMINQUERY:
621 case OP_CRRANGE:
622 case OP_CRMINRANGE:
623 case OP_CRPOSSTAR:
624 case OP_CRPOSPLUS:
625 case OP_CRPOSQUERY:
626 case OP_CRPOSRANGE:
627 case OP_CLASS:
628 case OP_NCLASS:
629 case OP_REF:
630 case OP_REFI:
631 case OP_DNREF:
632 case OP_DNREFI:
633 case OP_RECURSE:
634 case OP_CALLOUT:
635 case OP_ALT:
636 case OP_KET:
637 case OP_KETRMAX:
638 case OP_KETRMIN:
639 case OP_KETRPOS:
640 case OP_REVERSE:
641 case OP_ASSERT:
642 case OP_ASSERT_NOT:
643 case OP_ASSERTBACK:
644 case OP_ASSERTBACK_NOT:
645 case OP_ONCE:
646 case OP_ONCE_NC:
647 case OP_BRA:
648 case OP_BRAPOS:
649 case OP_CBRA:
650 case OP_CBRAPOS:
651 case OP_COND:
652 case OP_SBRA:
653 case OP_SBRAPOS:
654 case OP_SCBRA:
655 case OP_SCBRAPOS:
656 case OP_SCOND:
657 case OP_CREF:
658 case OP_DNCREF:
659 case OP_RREF:
660 case OP_DNRREF:
661 case OP_DEF:
662 case OP_BRAZERO:
663 case OP_BRAMINZERO:
664 case OP_BRAPOSZERO:
665 case OP_PRUNE:
666 case OP_SKIP:
667 case OP_THEN:
668 case OP_COMMIT:
669 case OP_FAIL:
670 case OP_ACCEPT:
671 case OP_ASSERT_ACCEPT:
672 case OP_CLOSE:
673 case OP_SKIPZERO:
674 return cc + PRIV(OP_lengths)[*cc];
675
676 case OP_CHAR:
677 case OP_CHARI:
678 case OP_NOT:
679 case OP_NOTI:
680 case OP_STAR:
681 case OP_MINSTAR:
682 case OP_PLUS:
683 case OP_MINPLUS:
684 case OP_QUERY:
685 case OP_MINQUERY:
686 case OP_UPTO:
687 case OP_MINUPTO:
688 case OP_EXACT:
689 case OP_POSSTAR:
690 case OP_POSPLUS:
691 case OP_POSQUERY:
692 case OP_POSUPTO:
693 case OP_STARI:
694 case OP_MINSTARI:
695 case OP_PLUSI:
696 case OP_MINPLUSI:
697 case OP_QUERYI:
698 case OP_MINQUERYI:
699 case OP_UPTOI:
700 case OP_MINUPTOI:
701 case OP_EXACTI:
702 case OP_POSSTARI:
703 case OP_POSPLUSI:
704 case OP_POSQUERYI:
705 case OP_POSUPTOI:
706 case OP_NOTSTAR:
707 case OP_NOTMINSTAR:
708 case OP_NOTPLUS:
709 case OP_NOTMINPLUS:
710 case OP_NOTQUERY:
711 case OP_NOTMINQUERY:
712 case OP_NOTUPTO:
713 case OP_NOTMINUPTO:
714 case OP_NOTEXACT:
715 case OP_NOTPOSSTAR:
716 case OP_NOTPOSPLUS:
717 case OP_NOTPOSQUERY:
718 case OP_NOTPOSUPTO:
719 case OP_NOTSTARI:
720 case OP_NOTMINSTARI:
721 case OP_NOTPLUSI:
722 case OP_NOTMINPLUSI:
723 case OP_NOTQUERYI:
724 case OP_NOTMINQUERYI:
725 case OP_NOTUPTOI:
726 case OP_NOTMINUPTOI:
727 case OP_NOTEXACTI:
728 case OP_NOTPOSSTARI:
729 case OP_NOTPOSPLUSI:
730 case OP_NOTPOSQUERYI:
731 case OP_NOTPOSUPTOI:
732 cc += PRIV(OP_lengths)[*cc];
733 #ifdef SUPPORT_UTF
734 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
735 #endif
736 return cc;
737
738 /* Special cases. */
739 case OP_TYPESTAR:
740 case OP_TYPEMINSTAR:
741 case OP_TYPEPLUS:
742 case OP_TYPEMINPLUS:
743 case OP_TYPEQUERY:
744 case OP_TYPEMINQUERY:
745 case OP_TYPEUPTO:
746 case OP_TYPEMINUPTO:
747 case OP_TYPEEXACT:
748 case OP_TYPEPOSSTAR:
749 case OP_TYPEPOSPLUS:
750 case OP_TYPEPOSQUERY:
751 case OP_TYPEPOSUPTO:
752 return cc + PRIV(OP_lengths)[*cc] - 1;
753
754 case OP_ANYBYTE:
755 #ifdef SUPPORT_UTF
756 if (common->utf) return NULL;
757 #endif
758 return cc + 1;
759
760 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
761 case OP_XCLASS:
762 return cc + GET(cc, 1);
763 #endif
764
765 case OP_MARK:
766 case OP_PRUNE_ARG:
767 case OP_SKIP_ARG:
768 case OP_THEN_ARG:
769 return cc + 1 + 2 + cc[1];
770
771 default:
772 /* All opcodes are supported now! */
773 SLJIT_ASSERT_STOP();
774 return NULL;
775 }
776 }
777
778 static BOOL check_opcode_types(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
779 {
780 int count;
781 pcre_uchar *slot;
782
783 /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
784 while (cc < ccend)
785 {
786 switch(*cc)
787 {
788 case OP_SET_SOM:
789 common->has_set_som = TRUE;
790 common->might_be_empty = TRUE;
791 cc += 1;
792 break;
793
794 case OP_REF:
795 case OP_REFI:
796 common->optimized_cbracket[GET2(cc, 1)] = 0;
797 cc += 1 + IMM2_SIZE;
798 break;
799
800 case OP_CBRAPOS:
801 case OP_SCBRAPOS:
802 common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] = 0;
803 cc += 1 + LINK_SIZE + IMM2_SIZE;
804 break;
805
806 case OP_COND:
807 case OP_SCOND:
808 /* Only AUTO_CALLOUT can insert this opcode. We do
809 not intend to support this case. */
810 if (cc[1 + LINK_SIZE] == OP_CALLOUT)
811 return FALSE;
812 cc += 1 + LINK_SIZE;
813 break;
814
815 case OP_CREF:
816 common->optimized_cbracket[GET2(cc, 1)] = 0;
817 cc += 1 + IMM2_SIZE;
818 break;
819
820 case OP_DNREF:
821 case OP_DNREFI:
822 case OP_DNCREF:
823 count = GET2(cc, 1 + IMM2_SIZE);
824 slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
825 while (count-- > 0)
826 {
827 common->optimized_cbracket[GET2(slot, 0)] = 0;
828 slot += common->name_entry_size;
829 }
830 cc += 1 + 2 * IMM2_SIZE;
831 break;
832
833 case OP_RECURSE:
834 /* Set its value only once. */
835 if (common->recursive_head_ptr == 0)
836 {
837 common->recursive_head_ptr = common->ovector_start;
838 common->ovector_start += sizeof(sljit_sw);
839 }
840 cc += 1 + LINK_SIZE;
841 break;
842
843 case OP_CALLOUT:
844 if (common->capture_last_ptr == 0)
845 {
846 common->capture_last_ptr = common->ovector_start;
847 common->ovector_start += sizeof(sljit_sw);
848 }
849 cc += 2 + 2 * LINK_SIZE;
850 break;
851
852 case OP_THEN_ARG:
853 common->has_then = TRUE;
854 common->control_head_ptr = 1;
855 /* Fall through. */
856
857 case OP_PRUNE_ARG:
858 case OP_MARK:
859 if (common->mark_ptr == 0)
860 {
861 common->mark_ptr = common->ovector_start;
862 common->ovector_start += sizeof(sljit_sw);
863 }
864 cc += 1 + 2 + cc[1];
865 break;
866
867 case OP_THEN:
868 common->has_then = TRUE;
869 common->control_head_ptr = 1;
870 /* Fall through. */
871
872 case OP_PRUNE:
873 case OP_SKIP:
874 cc += 1;
875 break;
876
877 case OP_SKIP_ARG:
878 common->control_head_ptr = 1;
879 common->has_skip_arg = TRUE;
880 cc += 1 + 2 + cc[1];
881 break;
882
883 default:
884 cc = next_opcode(common, cc);
885 if (cc == NULL)
886 return FALSE;
887 break;
888 }
889 }
890 return TRUE;
891 }
892
893 static int get_class_iterator_size(pcre_uchar *cc)
894 {
895 switch(*cc)
896 {
897 case OP_CRSTAR:
898 case OP_CRPLUS:
899 return 2;
900
901 case OP_CRMINSTAR:
902 case OP_CRMINPLUS:
903 case OP_CRQUERY:
904 case OP_CRMINQUERY:
905 return 1;
906
907 case OP_CRRANGE:
908 case OP_CRMINRANGE:
909 if (GET2(cc, 1) == GET2(cc, 1 + IMM2_SIZE))
910 return 0;
911 return 2;
912
913 default:
914 return 0;
915 }
916 }
917
918 static BOOL detect_repeat(compiler_common *common, pcre_uchar *begin)
919 {
920 pcre_uchar *end = bracketend(begin);
921 pcre_uchar *next;
922 pcre_uchar *next_end;
923 pcre_uchar *max_end;
924 pcre_uchar type;
925 sljit_sw length = end - begin;
926 int min, max, i;
927
928 /* Detect fixed iterations first. */
929 if (end[-(1 + LINK_SIZE)] != OP_KET)
930 return FALSE;
931
932 /* Already detected repeat. */
933 if (common->private_data_ptrs[end - common->start - LINK_SIZE] != 0)
934 return TRUE;
935
936 next = end;
937 min = 1;
938 while (1)
939 {
940 if (*next != *begin)
941 break;
942 next_end = bracketend(next);
943 if (next_end - next != length || memcmp(begin, next, IN_UCHARS(length)) != 0)
944 break;
945 next = next_end;
946 min++;
947 }
948
949 if (min == 2)
950 return FALSE;
951
952 max = 0;
953 max_end = next;
954 if (*next == OP_BRAZERO || *next == OP_BRAMINZERO)
955 {
956 type = *next;
957 while (1)
958 {
959 if (next[0] != type || next[1] != OP_BRA || next[2 + LINK_SIZE] != *begin)
960 break;
961 next_end = bracketend(next + 2 + LINK_SIZE);
962 if (next_end - next != (length + 2 + LINK_SIZE) || memcmp(begin, next + 2 + LINK_SIZE, IN_UCHARS(length)) != 0)
963 break;
964 next = next_end;
965 max++;
966 }
967
968 if (next[0] == type && next[1] == *begin && max >= 1)
969 {
970 next_end = bracketend(next + 1);
971 if (next_end - next == (length + 1) && memcmp(begin, next + 1, IN_UCHARS(length)) == 0)
972 {
973 for (i = 0; i < max; i++, next_end += 1 + LINK_SIZE)
974 if (*next_end != OP_KET)
975 break;
976
977 if (i == max)
978 {
979 common->private_data_ptrs[max_end - common->start - LINK_SIZE] = next_end - max_end;
980 common->private_data_ptrs[max_end - common->start - LINK_SIZE + 1] = (type == OP_BRAZERO) ? OP_UPTO : OP_MINUPTO;
981 /* +2 the original and the last. */
982 common->private_data_ptrs[max_end - common->start - LINK_SIZE + 2] = max + 2;
983 if (min == 1)
984 return TRUE;
985 min--;
986 max_end -= (1 + LINK_SIZE) + GET(max_end, -LINK_SIZE);
987 }
988 }
989 }
990 }
991
992 if (min >= 3)
993 {
994 common->private_data_ptrs[end - common->start - LINK_SIZE] = max_end - end;
995 common->private_data_ptrs[end - common->start - LINK_SIZE + 1] = OP_EXACT;
996 common->private_data_ptrs[end - common->start - LINK_SIZE + 2] = min;
997 return TRUE;
998 }
999
1000 return FALSE;
1001 }
1002
1003 #define CASE_ITERATOR_PRIVATE_DATA_1 \
1004 case OP_MINSTAR: \
1005 case OP_MINPLUS: \
1006 case OP_QUERY: \
1007 case OP_MINQUERY: \
1008 case OP_MINSTARI: \
1009 case OP_MINPLUSI: \
1010 case OP_QUERYI: \
1011 case OP_MINQUERYI: \
1012 case OP_NOTMINSTAR: \
1013 case OP_NOTMINPLUS: \
1014 case OP_NOTQUERY: \
1015 case OP_NOTMINQUERY: \
1016 case OP_NOTMINSTARI: \
1017 case OP_NOTMINPLUSI: \
1018 case OP_NOTQUERYI: \
1019 case OP_NOTMINQUERYI:
1020
1021 #define CASE_ITERATOR_PRIVATE_DATA_2A \
1022 case OP_STAR: \
1023 case OP_PLUS: \
1024 case OP_STARI: \
1025 case OP_PLUSI: \
1026 case OP_NOTSTAR: \
1027 case OP_NOTPLUS: \
1028 case OP_NOTSTARI: \
1029 case OP_NOTPLUSI:
1030
1031 #define CASE_ITERATOR_PRIVATE_DATA_2B \
1032 case OP_UPTO: \
1033 case OP_MINUPTO: \
1034 case OP_UPTOI: \
1035 case OP_MINUPTOI: \
1036 case OP_NOTUPTO: \
1037 case OP_NOTMINUPTO: \
1038 case OP_NOTUPTOI: \
1039 case OP_NOTMINUPTOI:
1040
1041 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_1 \
1042 case OP_TYPEMINSTAR: \
1043 case OP_TYPEMINPLUS: \
1044 case OP_TYPEQUERY: \
1045 case OP_TYPEMINQUERY:
1046
1047 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2A \
1048 case OP_TYPESTAR: \
1049 case OP_TYPEPLUS:
1050
1051 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2B \
1052 case OP_TYPEUPTO: \
1053 case OP_TYPEMINUPTO:
1054
1055 static void set_private_data_ptrs(compiler_common *common, int *private_data_start, pcre_uchar *ccend)
1056 {
1057 pcre_uchar *cc = common->start;
1058 pcre_uchar *alternative;
1059 pcre_uchar *end = NULL;
1060 int private_data_ptr = *private_data_start;
1061 int space, size, bracketlen;
1062 BOOL repeat_check = TRUE;
1063
1064 while (cc < ccend)
1065 {
1066 space = 0;
1067 size = 0;
1068 bracketlen = 0;
1069 if (private_data_ptr > SLJIT_MAX_LOCAL_SIZE)
1070 break;
1071
1072 if (repeat_check && (*cc == OP_ONCE || *cc == OP_ONCE_NC || *cc == OP_BRA || *cc == OP_CBRA || *cc == OP_COND))
1073 {
1074 if (detect_repeat(common, cc))
1075 {
1076 /* These brackets are converted to repeats, so no global
1077 based single character repeat is allowed. */
1078 if (cc >= end)
1079 end = bracketend(cc);
1080 }
1081 }
1082 repeat_check = TRUE;
1083
1084 switch(*cc)
1085 {
1086 case OP_KET:
1087 if (common->private_data_ptrs[cc + 1 - common->start] != 0)
1088 {
1089 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1090 private_data_ptr += sizeof(sljit_sw);
1091 cc += common->private_data_ptrs[cc + 1 - common->start];
1092 }
1093 cc += 1 + LINK_SIZE;
1094 break;
1095
1096 case OP_ASSERT:
1097 case OP_ASSERT_NOT:
1098 case OP_ASSERTBACK:
1099 case OP_ASSERTBACK_NOT:
1100 case OP_ONCE:
1101 case OP_ONCE_NC:
1102 case OP_BRAPOS:
1103 case OP_SBRA:
1104 case OP_SBRAPOS:
1105 case OP_SCOND:
1106 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1107 private_data_ptr += sizeof(sljit_sw);
1108 bracketlen = 1 + LINK_SIZE;
1109 break;
1110
1111 case OP_CBRAPOS:
1112 case OP_SCBRAPOS:
1113 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1114 private_data_ptr += sizeof(sljit_sw);
1115 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1116 break;
1117
1118 case OP_COND:
1119 /* Might be a hidden SCOND. */
1120 alternative = cc + GET(cc, 1);
1121 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1122 {
1123 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1124 private_data_ptr += sizeof(sljit_sw);
1125 }
1126 bracketlen = 1 + LINK_SIZE;
1127 break;
1128
1129 case OP_BRA:
1130 bracketlen = 1 + LINK_SIZE;
1131 break;
1132
1133 case OP_CBRA:
1134 case OP_SCBRA:
1135 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1136 break;
1137
1138 case OP_BRAZERO:
1139 case OP_BRAMINZERO:
1140 case OP_BRAPOSZERO:
1141 repeat_check = FALSE;
1142 size = 1;
1143 break;
1144
1145 CASE_ITERATOR_PRIVATE_DATA_1
1146 space = 1;
1147 size = -2;
1148 break;
1149
1150 CASE_ITERATOR_PRIVATE_DATA_2A
1151 space = 2;
1152 size = -2;
1153 break;
1154
1155 CASE_ITERATOR_PRIVATE_DATA_2B
1156 space = 2;
1157 size = -(2 + IMM2_SIZE);
1158 break;
1159
1160 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1161 space = 1;
1162 size = 1;
1163 break;
1164
1165 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1166 if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
1167 space = 2;
1168 size = 1;
1169 break;
1170
1171 case OP_TYPEUPTO:
1172 if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
1173 space = 2;
1174 size = 1 + IMM2_SIZE;
1175 break;
1176
1177 case OP_TYPEMINUPTO:
1178 space = 2;
1179 size = 1 + IMM2_SIZE;
1180 break;
1181
1182 case OP_CLASS:
1183 case OP_NCLASS:
1184 size += 1 + 32 / sizeof(pcre_uchar);
1185 space = get_class_iterator_size(cc + size);
1186 break;
1187
1188 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1189 case OP_XCLASS:
1190 size = GET(cc, 1);
1191 space = get_class_iterator_size(cc + size);
1192 break;
1193 #endif
1194
1195 default:
1196 cc = next_opcode(common, cc);
1197 SLJIT_ASSERT(cc != NULL);
1198 break;
1199 }
1200
1201 /* Character iterators, which are not inside a repeated bracket,
1202 gets a private slot instead of allocating it on the stack. */
1203 if (space > 0 && cc >= end)
1204 {
1205 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1206 private_data_ptr += sizeof(sljit_sw) * space;
1207 }
1208
1209 if (size != 0)
1210 {
1211 if (size < 0)
1212 {
1213 cc += -size;
1214 #ifdef SUPPORT_UTF
1215 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1216 #endif
1217 }
1218 else
1219 cc += size;
1220 }
1221
1222 if (bracketlen > 0)
1223 {
1224 if (cc >= end)
1225 {
1226 end = bracketend(cc);
1227 if (end[-1 - LINK_SIZE] == OP_KET)
1228 end = NULL;
1229 }
1230 cc += bracketlen;
1231 }
1232 }
1233 *private_data_start = private_data_ptr;
1234 }
1235
1236 /* Returns with a frame_types (always < 0) if no need for frame. */
1237 static int get_framesize(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL recursive, BOOL *needs_control_head)
1238 {
1239 int length = 0;
1240 int possessive = 0;
1241 BOOL stack_restore = FALSE;
1242 BOOL setsom_found = recursive;
1243 BOOL setmark_found = recursive;
1244 /* The last capture is a local variable even for recursions. */
1245 BOOL capture_last_found = FALSE;
1246
1247 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
1248 SLJIT_ASSERT(common->control_head_ptr != 0);
1249 *needs_control_head = TRUE;
1250 #else
1251 *needs_control_head = FALSE;
1252 #endif
1253
1254 if (ccend == NULL)
1255 {
1256 ccend = bracketend(cc) - (1 + LINK_SIZE);
1257 if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))
1258 {
1259 possessive = length = (common->capture_last_ptr != 0) ? 5 : 3;
1260 /* This is correct regardless of common->capture_last_ptr. */
1261 capture_last_found = TRUE;
1262 }
1263 cc = next_opcode(common, cc);
1264 }
1265
1266 SLJIT_ASSERT(cc != NULL);
1267 while (cc < ccend)
1268 switch(*cc)
1269 {
1270 case OP_SET_SOM:
1271 SLJIT_ASSERT(common->has_set_som);
1272 stack_restore = TRUE;
1273 if (!setsom_found)
1274 {
1275 length += 2;
1276 setsom_found = TRUE;
1277 }
1278 cc += 1;
1279 break;
1280
1281 case OP_MARK:
1282 case OP_PRUNE_ARG:
1283 case OP_THEN_ARG:
1284 SLJIT_ASSERT(common->mark_ptr != 0);
1285 stack_restore = TRUE;
1286 if (!setmark_found)
1287 {
1288 length += 2;
1289 setmark_found = TRUE;
1290 }
1291 if (common->control_head_ptr != 0)
1292 *needs_control_head = TRUE;
1293 cc += 1 + 2 + cc[1];
1294 break;
1295
1296 case OP_RECURSE:
1297 stack_restore = TRUE;
1298 if (common->has_set_som && !setsom_found)
1299 {
1300 length += 2;
1301 setsom_found = TRUE;
1302 }
1303 if (common->mark_ptr != 0 && !setmark_found)
1304 {
1305 length += 2;
1306 setmark_found = TRUE;
1307 }
1308 if (common->capture_last_ptr != 0 && !capture_last_found)
1309 {
1310 length += 2;
1311 capture_last_found = TRUE;
1312 }
1313 cc += 1 + LINK_SIZE;
1314 break;
1315
1316 case OP_CBRA:
1317 case OP_CBRAPOS:
1318 case OP_SCBRA:
1319 case OP_SCBRAPOS:
1320 stack_restore = TRUE;
1321 if (common->capture_last_ptr != 0 && !capture_last_found)
1322 {
1323 length += 2;
1324 capture_last_found = TRUE;
1325 }
1326 length += 3;
1327 cc += 1 + LINK_SIZE + IMM2_SIZE;
1328 break;
1329
1330 case OP_THEN:
1331 stack_restore = TRUE;
1332 if (common->control_head_ptr != 0)
1333 *needs_control_head = TRUE;
1334 cc ++;
1335 break;
1336
1337 default:
1338 stack_restore = TRUE;
1339 /* Fall through. */
1340
1341 case OP_NOT_WORD_BOUNDARY:
1342 case OP_WORD_BOUNDARY:
1343 case OP_NOT_DIGIT:
1344 case OP_DIGIT:
1345 case OP_NOT_WHITESPACE:
1346 case OP_WHITESPACE:
1347 case OP_NOT_WORDCHAR:
1348 case OP_WORDCHAR:
1349 case OP_ANY:
1350 case OP_ALLANY:
1351 case OP_ANYBYTE:
1352 case OP_NOTPROP:
1353 case OP_PROP:
1354 case OP_ANYNL:
1355 case OP_NOT_HSPACE:
1356 case OP_HSPACE:
1357 case OP_NOT_VSPACE:
1358 case OP_VSPACE:
1359 case OP_EXTUNI:
1360 case OP_EODN:
1361 case OP_EOD:
1362 case OP_CIRC:
1363 case OP_CIRCM:
1364 case OP_DOLL:
1365 case OP_DOLLM:
1366 case OP_CHAR:
1367 case OP_CHARI:
1368 case OP_NOT:
1369 case OP_NOTI:
1370
1371 case OP_EXACT:
1372 case OP_POSSTAR:
1373 case OP_POSPLUS:
1374 case OP_POSQUERY:
1375 case OP_POSUPTO:
1376
1377 case OP_EXACTI:
1378 case OP_POSSTARI:
1379 case OP_POSPLUSI:
1380 case OP_POSQUERYI:
1381 case OP_POSUPTOI:
1382
1383 case OP_NOTEXACT:
1384 case OP_NOTPOSSTAR:
1385 case OP_NOTPOSPLUS:
1386 case OP_NOTPOSQUERY:
1387 case OP_NOTPOSUPTO:
1388
1389 case OP_NOTEXACTI:
1390 case OP_NOTPOSSTARI:
1391 case OP_NOTPOSPLUSI:
1392 case OP_NOTPOSQUERYI:
1393 case OP_NOTPOSUPTOI:
1394
1395 case OP_TYPEEXACT:
1396 case OP_TYPEPOSSTAR:
1397 case OP_TYPEPOSPLUS:
1398 case OP_TYPEPOSQUERY:
1399 case OP_TYPEPOSUPTO:
1400
1401 case OP_CLASS:
1402 case OP_NCLASS:
1403 case OP_XCLASS:
1404 case OP_CALLOUT:
1405
1406 cc = next_opcode(common, cc);
1407 SLJIT_ASSERT(cc != NULL);
1408 break;
1409 }
1410
1411 /* Possessive quantifiers can use a special case. */
1412 if (SLJIT_UNLIKELY(possessive == length))
1413 return stack_restore ? no_frame : no_stack;
1414
1415 if (length > 0)
1416 return length + 1;
1417 return stack_restore ? no_frame : no_stack;
1418 }
1419
1420 static void init_frame(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, int stackpos, int stacktop, BOOL recursive)
1421 {
1422 DEFINE_COMPILER;
1423 BOOL setsom_found = recursive;
1424 BOOL setmark_found = recursive;
1425 /* The last capture is a local variable even for recursions. */
1426 BOOL capture_last_found = FALSE;
1427 int offset;
1428
1429 /* >= 1 + shortest item size (2) */
1430 SLJIT_UNUSED_ARG(stacktop);
1431 SLJIT_ASSERT(stackpos >= stacktop + 2);
1432
1433 stackpos = STACK(stackpos);
1434 if (ccend == NULL)
1435 {
1436 ccend = bracketend(cc) - (1 + LINK_SIZE);
1437 if (recursive || (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS))
1438 cc = next_opcode(common, cc);
1439 }
1440
1441 SLJIT_ASSERT(cc != NULL);
1442 while (cc < ccend)
1443 switch(*cc)
1444 {
1445 case OP_SET_SOM:
1446 SLJIT_ASSERT(common->has_set_som);
1447 if (!setsom_found)
1448 {
1449 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
1450 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1451 stackpos += (int)sizeof(sljit_sw);
1452 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1453 stackpos += (int)sizeof(sljit_sw);
1454 setsom_found = TRUE;
1455 }
1456 cc += 1;
1457 break;
1458
1459 case OP_MARK:
1460 case OP_PRUNE_ARG:
1461 case OP_THEN_ARG:
1462 SLJIT_ASSERT(common->mark_ptr != 0);
1463 if (!setmark_found)
1464 {
1465 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
1466 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1467 stackpos += (int)sizeof(sljit_sw);
1468 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1469 stackpos += (int)sizeof(sljit_sw);
1470 setmark_found = TRUE;
1471 }
1472 cc += 1 + 2 + cc[1];
1473 break;
1474
1475 case OP_RECURSE:
1476 if (common->has_set_som && !setsom_found)
1477 {
1478 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
1479 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1480 stackpos += (int)sizeof(sljit_sw);
1481 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1482 stackpos += (int)sizeof(sljit_sw);
1483 setsom_found = TRUE;
1484 }
1485 if (common->mark_ptr != 0 && !setmark_found)
1486 {
1487 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
1488 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1489 stackpos += (int)sizeof(sljit_sw);
1490 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1491 stackpos += (int)sizeof(sljit_sw);
1492 setmark_found = TRUE;
1493 }
1494 if (common->capture_last_ptr != 0 && !capture_last_found)
1495 {
1496 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
1497 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1498 stackpos += (int)sizeof(sljit_sw);
1499 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1500 stackpos += (int)sizeof(sljit_sw);
1501 capture_last_found = TRUE;
1502 }
1503 cc += 1 + LINK_SIZE;
1504 break;
1505
1506 case OP_CBRA:
1507 case OP_CBRAPOS:
1508 case OP_SCBRA:
1509 case OP_SCBRAPOS:
1510 if (common->capture_last_ptr != 0 && !capture_last_found)
1511 {
1512 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
1513 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1514 stackpos += (int)sizeof(sljit_sw);
1515 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1516 stackpos += (int)sizeof(sljit_sw);
1517 capture_last_found = TRUE;
1518 }
1519 offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
1520 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));
1521 stackpos += (int)sizeof(sljit_sw);
1522 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
1523 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
1524 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1525 stackpos += (int)sizeof(sljit_sw);
1526 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
1527 stackpos += (int)sizeof(sljit_sw);
1528
1529 cc += 1 + LINK_SIZE + IMM2_SIZE;
1530 break;
1531
1532 default:
1533 cc = next_opcode(common, cc);
1534 SLJIT_ASSERT(cc != NULL);
1535 break;
1536 }
1537
1538 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, 0);
1539 SLJIT_ASSERT(stackpos == STACK(stacktop));
1540 }
1541
1542 static SLJIT_INLINE int get_private_data_copy_length(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL needs_control_head)
1543 {
1544 int private_data_length = needs_control_head ? 3 : 2;
1545 int size;
1546 pcre_uchar *alternative;
1547 /* Calculate the sum of the private machine words. */
1548 while (cc < ccend)
1549 {
1550 size = 0;
1551 switch(*cc)
1552 {
1553 case OP_KET:
1554 if (PRIVATE_DATA(cc) != 0)
1555 {
1556 private_data_length++;
1557 SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0);
1558 cc += PRIVATE_DATA(cc + 1);
1559 }
1560 cc += 1 + LINK_SIZE;
1561 break;
1562
1563 case OP_ASSERT:
1564 case OP_ASSERT_NOT:
1565 case OP_ASSERTBACK:
1566 case OP_ASSERTBACK_NOT:
1567 case OP_ONCE:
1568 case OP_ONCE_NC:
1569 case OP_BRAPOS:
1570 case OP_SBRA:
1571 case OP_SBRAPOS:
1572 case OP_SCOND:
1573 private_data_length++;
1574 SLJIT_ASSERT(PRIVATE_DATA(cc) != 0);
1575 cc += 1 + LINK_SIZE;
1576 break;
1577
1578 case OP_CBRA:
1579 case OP_SCBRA:
1580 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1581 private_data_length++;
1582 cc += 1 + LINK_SIZE + IMM2_SIZE;
1583 break;
1584
1585 case OP_CBRAPOS:
1586 case OP_SCBRAPOS:
1587 private_data_length += 2;
1588 cc += 1 + LINK_SIZE + IMM2_SIZE;
1589 break;
1590
1591 case OP_COND:
1592 /* Might be a hidden SCOND. */
1593 alternative = cc + GET(cc, 1);
1594 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1595 private_data_length++;
1596 cc += 1 + LINK_SIZE;
1597 break;
1598
1599 CASE_ITERATOR_PRIVATE_DATA_1
1600 if (PRIVATE_DATA(cc))
1601 private_data_length++;
1602 cc += 2;
1603 #ifdef SUPPORT_UTF
1604 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1605 #endif
1606 break;
1607
1608 CASE_ITERATOR_PRIVATE_DATA_2A
1609 if (PRIVATE_DATA(cc))
1610 private_data_length += 2;
1611 cc += 2;
1612 #ifdef SUPPORT_UTF
1613 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1614 #endif
1615 break;
1616
1617 CASE_ITERATOR_PRIVATE_DATA_2B
1618 if (PRIVATE_DATA(cc))
1619 private_data_length += 2;
1620 cc += 2 + IMM2_SIZE;
1621 #ifdef SUPPORT_UTF
1622 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1623 #endif
1624 break;
1625
1626 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1627 if (PRIVATE_DATA(cc))
1628 private_data_length++;
1629 cc += 1;
1630 break;
1631
1632 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1633 if (PRIVATE_DATA(cc))
1634 private_data_length += 2;
1635 cc += 1;
1636 break;
1637
1638 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1639 if (PRIVATE_DATA(cc))
1640 private_data_length += 2;
1641 cc += 1 + IMM2_SIZE;
1642 break;
1643
1644 case OP_CLASS:
1645 case OP_NCLASS:
1646 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1647 case OP_XCLASS:
1648 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1649 #else
1650 size = 1 + 32 / (int)sizeof(pcre_uchar);
1651 #endif
1652 if (PRIVATE_DATA(cc))
1653 private_data_length += get_class_iterator_size(cc + size);
1654 cc += size;
1655 break;
1656
1657 default:
1658 cc = next_opcode(common, cc);
1659 SLJIT_ASSERT(cc != NULL);
1660 break;
1661 }
1662 }
1663 SLJIT_ASSERT(cc == ccend);
1664 return private_data_length;
1665 }
1666
1667 static void copy_private_data(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend,
1668 BOOL save, int stackptr, int stacktop, BOOL needs_control_head)
1669 {
1670 DEFINE_COMPILER;
1671 int srcw[2];
1672 int count, size;
1673 BOOL tmp1next = TRUE;
1674 BOOL tmp1empty = TRUE;
1675 BOOL tmp2empty = TRUE;
1676 pcre_uchar *alternative;
1677 enum {
1678 start,
1679 loop,
1680 end
1681 } status;
1682
1683 status = save ? start : loop;
1684 stackptr = STACK(stackptr - 2);
1685 stacktop = STACK(stacktop - 1);
1686
1687 if (!save)
1688 {
1689 stackptr += (needs_control_head ? 2 : 1) * sizeof(sljit_sw);
1690 if (stackptr < stacktop)
1691 {
1692 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1693 stackptr += sizeof(sljit_sw);
1694 tmp1empty = FALSE;
1695 }
1696 if (stackptr < stacktop)
1697 {
1698 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1699 stackptr += sizeof(sljit_sw);
1700 tmp2empty = FALSE;
1701 }
1702 /* The tmp1next must be TRUE in either way. */
1703 }
1704
1705 do
1706 {
1707 count = 0;
1708 switch(status)
1709 {
1710 case start:
1711 SLJIT_ASSERT(save && common->recursive_head_ptr != 0);
1712 count = 1;
1713 srcw[0] = common->recursive_head_ptr;
1714 if (needs_control_head)
1715 {
1716 SLJIT_ASSERT(common->control_head_ptr != 0);
1717 count = 2;
1718 srcw[1] = common->control_head_ptr;
1719 }
1720 status = loop;
1721 break;
1722
1723 case loop:
1724 if (cc >= ccend)
1725 {
1726 status = end;
1727 break;
1728 }
1729
1730 switch(*cc)
1731 {
1732 case OP_KET:
1733 if (PRIVATE_DATA(cc) != 0)
1734 {
1735 count = 1;
1736 srcw[0] = PRIVATE_DATA(cc);
1737 SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0);
1738 cc += PRIVATE_DATA(cc + 1);
1739 }
1740 cc += 1 + LINK_SIZE;
1741 break;
1742
1743 case OP_ASSERT:
1744 case OP_ASSERT_NOT:
1745 case OP_ASSERTBACK:
1746 case OP_ASSERTBACK_NOT:
1747 case OP_ONCE:
1748 case OP_ONCE_NC:
1749 case OP_BRAPOS:
1750 case OP_SBRA:
1751 case OP_SBRAPOS:
1752 case OP_SCOND:
1753 count = 1;
1754 srcw[0] = PRIVATE_DATA(cc);
1755 SLJIT_ASSERT(srcw[0] != 0);
1756 cc += 1 + LINK_SIZE;
1757 break;
1758
1759 case OP_CBRA:
1760 case OP_SCBRA:
1761 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1762 {
1763 count = 1;
1764 srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1765 }
1766 cc += 1 + LINK_SIZE + IMM2_SIZE;
1767 break;
1768
1769 case OP_CBRAPOS:
1770 case OP_SCBRAPOS:
1771 count = 2;
1772 srcw[0] = PRIVATE_DATA(cc);
1773 srcw[1] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1774 SLJIT_ASSERT(srcw[0] != 0 && srcw[1] != 0);
1775 cc += 1 + LINK_SIZE + IMM2_SIZE;
1776 break;
1777
1778 case OP_COND:
1779 /* Might be a hidden SCOND. */
1780 alternative = cc + GET(cc, 1);
1781 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1782 {
1783 count = 1;
1784 srcw[0] = PRIVATE_DATA(cc);
1785 SLJIT_ASSERT(srcw[0] != 0);
1786 }
1787 cc += 1 + LINK_SIZE;
1788 break;
1789
1790 CASE_ITERATOR_PRIVATE_DATA_1
1791 if (PRIVATE_DATA(cc))
1792 {
1793 count = 1;
1794 srcw[0] = PRIVATE_DATA(cc);
1795 }
1796 cc += 2;
1797 #ifdef SUPPORT_UTF
1798 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1799 #endif
1800 break;
1801
1802 CASE_ITERATOR_PRIVATE_DATA_2A
1803 if (PRIVATE_DATA(cc))
1804 {
1805 count = 2;
1806 srcw[0] = PRIVATE_DATA(cc);
1807 srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
1808 }
1809 cc += 2;
1810 #ifdef SUPPORT_UTF
1811 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1812 #endif
1813 break;
1814
1815 CASE_ITERATOR_PRIVATE_DATA_2B
1816 if (PRIVATE_DATA(cc))
1817 {
1818 count = 2;
1819 srcw[0] = PRIVATE_DATA(cc);
1820 srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
1821 }
1822 cc += 2 + IMM2_SIZE;
1823 #ifdef SUPPORT_UTF
1824 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1825 #endif
1826 break;
1827
1828 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1829 if (PRIVATE_DATA(cc))
1830 {
1831 count = 1;
1832 srcw[0] = PRIVATE_DATA(cc);
1833 }
1834 cc += 1;
1835 break;
1836
1837 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1838 if (PRIVATE_DATA(cc))
1839 {
1840 count = 2;
1841 srcw[0] = PRIVATE_DATA(cc);
1842 srcw[1] = srcw[0] + sizeof(sljit_sw);
1843 }
1844 cc += 1;
1845 break;
1846
1847 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1848 if (PRIVATE_DATA(cc))
1849 {
1850 count = 2;
1851 srcw[0] = PRIVATE_DATA(cc);
1852 srcw[1] = srcw[0] + sizeof(sljit_sw);
1853 }
1854 cc += 1 + IMM2_SIZE;
1855 break;
1856
1857 case OP_CLASS:
1858 case OP_NCLASS:
1859 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1860 case OP_XCLASS:
1861 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1862 #else
1863 size = 1 + 32 / (int)sizeof(pcre_uchar);
1864 #endif
1865 if (PRIVATE_DATA(cc))
1866 switch(get_class_iterator_size(cc + size))
1867 {
1868 case 1:
1869 count = 1;
1870 srcw[0] = PRIVATE_DATA(cc);
1871 break;
1872
1873 case 2:
1874 count = 2;
1875 srcw[0] = PRIVATE_DATA(cc);
1876 srcw[1] = srcw[0] + sizeof(sljit_sw);
1877 break;
1878
1879 default:
1880 SLJIT_ASSERT_STOP();
1881 break;
1882 }
1883 cc += size;
1884 break;
1885
1886 default:
1887 cc = next_opcode(common, cc);
1888 SLJIT_ASSERT(cc != NULL);
1889 break;
1890 }
1891 break;
1892
1893 case end:
1894 SLJIT_ASSERT_STOP();
1895 break;
1896 }
1897
1898 while (count > 0)
1899 {
1900 count--;
1901 if (save)
1902 {
1903 if (tmp1next)
1904 {
1905 if (!tmp1empty)
1906 {
1907 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1908 stackptr += sizeof(sljit_sw);
1909 }
1910 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), srcw[count]);
1911 tmp1empty = FALSE;
1912 tmp1next = FALSE;
1913 }
1914 else
1915 {
1916 if (!tmp2empty)
1917 {
1918 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1919 stackptr += sizeof(sljit_sw);
1920 }
1921 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), srcw[count]);
1922 tmp2empty = FALSE;
1923 tmp1next = TRUE;
1924 }
1925 }
1926 else
1927 {
1928 if (tmp1next)
1929 {
1930 SLJIT_ASSERT(!tmp1empty);
1931 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), srcw[count], TMP1, 0);
1932 tmp1empty = stackptr >= stacktop;
1933 if (!tmp1empty)
1934 {
1935 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1936 stackptr += sizeof(sljit_sw);
1937 }
1938 tmp1next = FALSE;
1939 }
1940 else
1941 {
1942 SLJIT_ASSERT(!tmp2empty);
1943 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), srcw[count], TMP2, 0);
1944 tmp2empty = stackptr >= stacktop;
1945 if (!tmp2empty)
1946 {
1947 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1948 stackptr += sizeof(sljit_sw);
1949 }
1950 tmp1next = TRUE;
1951 }
1952 }
1953 }
1954 }
1955 while (status != end);
1956
1957 if (save)
1958 {
1959 if (tmp1next)
1960 {
1961 if (!tmp1empty)
1962 {
1963 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1964 stackptr += sizeof(sljit_sw);
1965 }
1966 if (!tmp2empty)
1967 {
1968 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1969 stackptr += sizeof(sljit_sw);
1970 }
1971 }
1972 else
1973 {
1974 if (!tmp2empty)
1975 {
1976 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1977 stackptr += sizeof(sljit_sw);
1978 }
1979 if (!tmp1empty)
1980 {
1981 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1982 stackptr += sizeof(sljit_sw);
1983 }
1984 }
1985 }
1986 SLJIT_ASSERT(cc == ccend && stackptr == stacktop && (save || (tmp1empty && tmp2empty)));
1987 }
1988
1989 static SLJIT_INLINE pcre_uchar *set_then_offsets(compiler_common *common, pcre_uchar *cc, pcre_uint8 *current_offset)
1990 {
1991 pcre_uchar *end = bracketend(cc);
1992 BOOL has_alternatives = cc[GET(cc, 1)] == OP_ALT;
1993
1994 /* Assert captures then. */
1995 if (*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT)
1996 current_offset = NULL;
1997 /* Conditional block does not. */
1998 if (*cc == OP_COND || *cc == OP_SCOND)
1999 has_alternatives = FALSE;
2000
2001 cc = next_opcode(common, cc);
2002 if (has_alternatives)
2003 current_offset = common->then_offsets + (cc - common->start);
2004
2005 while (cc < end)
2006 {
2007 if ((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND))
2008 cc = set_then_offsets(common, cc, current_offset);
2009 else
2010 {
2011 if (*cc == OP_ALT && has_alternatives)
2012 current_offset = common->then_offsets + (cc + 1 + LINK_SIZE - common->start);
2013 if (*cc >= OP_THEN && *cc <= OP_THEN_ARG && current_offset != NULL)
2014 *current_offset = 1;
2015 cc = next_opcode(common, cc);
2016 }
2017 }
2018
2019 return end;
2020 }
2021
2022 #undef CASE_ITERATOR_PRIVATE_DATA_1
2023 #undef CASE_ITERATOR_PRIVATE_DATA_2A
2024 #undef CASE_ITERATOR_PRIVATE_DATA_2B
2025 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_1
2026 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
2027 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
2028
2029 static SLJIT_INLINE BOOL is_powerof2(unsigned int value)
2030 {
2031 return (value & (value - 1)) == 0;
2032 }
2033
2034 static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label)
2035 {
2036 while (list)
2037 {
2038 /* sljit_set_label is clever enough to do nothing
2039 if either the jump or the label is NULL. */
2040 SET_LABEL(list->jump, label);
2041 list = list->next;
2042 }
2043 }
2044
2045 static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump *jump)
2046 {
2047 jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list));
2048 if (list_item)
2049 {
2050 list_item->next = *list;
2051 list_item->jump = jump;
2052 *list = list_item;
2053 }
2054 }
2055
2056 static void add_stub(compiler_common *common, struct sljit_jump *start)
2057 {
2058 DEFINE_COMPILER;
2059 stub_list *list_item = sljit_alloc_memory(compiler, sizeof(stub_list));
2060
2061 if (list_item)
2062 {
2063 list_item->start = start;
2064 list_item->quit = LABEL();
2065 list_item->next = common->stubs;
2066 common->stubs = list_item;
2067 }
2068 }
2069
2070 static void flush_stubs(compiler_common *common)
2071 {
2072 DEFINE_COMPILER;
2073 stub_list *list_item = common->stubs;
2074
2075 while (list_item)
2076 {
2077 JUMPHERE(list_item->start);
2078 add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));
2079 JUMPTO(SLJIT_JUMP, list_item->quit);
2080 list_item = list_item->next;
2081 }
2082 common->stubs = NULL;
2083 }
2084
2085 static void add_label_addr(compiler_common *common, sljit_uw *update_addr)
2086 {
2087 DEFINE_COMPILER;
2088 label_addr_list *label_addr;
2089
2090 label_addr = sljit_alloc_memory(compiler, sizeof(label_addr_list));
2091 if (label_addr == NULL)
2092 return;
2093 label_addr->label = LABEL();
2094 label_addr->update_addr = update_addr;
2095 label_addr->next = common->label_addrs;
2096 common->label_addrs = label_addr;
2097 }
2098
2099 static SLJIT_INLINE void count_match(compiler_common *common)
2100 {
2101 DEFINE_COMPILER;
2102
2103 OP2(SLJIT_SUB | SLJIT_SET_E, COUNT_MATCH, 0, COUNT_MATCH, 0, SLJIT_IMM, 1);
2104 add_jump(compiler, &common->calllimit, JUMP(SLJIT_ZERO));
2105 }
2106
2107 static SLJIT_INLINE void allocate_stack(compiler_common *common, int size)
2108 {
2109 /* May destroy all locals and registers except TMP2. */
2110 DEFINE_COMPILER;
2111
2112 SLJIT_ASSERT(size > 0);
2113 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
2114 #ifdef DESTROY_REGISTERS
2115 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);
2116 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
2117 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
2118 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, TMP1, 0);
2119 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP1, 0);
2120 #endif
2121 add_stub(common, CMP(SLJIT_GREATER, STACK_TOP, 0, STACK_LIMIT, 0));
2122 }
2123
2124 static SLJIT_INLINE void free_stack(compiler_common *common, int size)
2125 {
2126 DEFINE_COMPILER;
2127
2128 SLJIT_ASSERT(size > 0);
2129 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
2130 }
2131
2132 static sljit_uw * allocate_read_only_data(compiler_common *common, sljit_uw size)
2133 {
2134 DEFINE_COMPILER;
2135 sljit_uw *result;
2136
2137 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
2138 return NULL;
2139
2140 result = (sljit_uw *)SLJIT_MALLOC(size + sizeof(sljit_uw), compiler->allocator_data);
2141 if (SLJIT_UNLIKELY(result == NULL))
2142 {
2143 sljit_set_compiler_memory_error(compiler);
2144 return NULL;
2145 }
2146
2147 *(void**)result = common->read_only_data_head;
2148 common->read_only_data_head = (void *)result;
2149 return result + 1;
2150 }
2151
2152 static void free_read_only_data(void *current, void *allocator_data)
2153 {
2154 void *next;
2155
2156 SLJIT_UNUSED_ARG(allocator_data);
2157
2158 while (current != NULL)
2159 {
2160 next = *(void**)current;
2161 SLJIT_FREE(current, allocator_data);
2162 current = next;
2163 }
2164 }
2165
2166 static SLJIT_INLINE void reset_ovector(compiler_common *common, int length)
2167 {
2168 DEFINE_COMPILER;
2169 struct sljit_label *loop;
2170 int i;
2171
2172 /* At this point we can freely use all temporary registers. */
2173 SLJIT_ASSERT(length > 1);
2174 /* TMP1 returns with begin - 1. */
2175 OP2(SLJIT_SUB, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_S0), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));
2176 if (length < 8)
2177 {
2178 for (i = 1; i < length; i++)
2179 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), SLJIT_R0, 0);
2180 }
2181 else
2182 {
2183 GET_LOCAL_BASE(SLJIT_R1, 0, OVECTOR_START);
2184 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, length - 1);
2185 loop = LABEL();
2186 OP1(SLJIT_MOVU, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw), SLJIT_R0, 0);
2187 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, 1);
2188 JUMPTO(SLJIT_NOT_ZERO, loop);
2189 }
2190 }
2191
2192 static SLJIT_INLINE void do_reset_match(compiler_common *common, int length)
2193 {
2194 DEFINE_COMPILER;
2195 struct sljit_label *loop;
2196 int i;
2197
2198 SLJIT_ASSERT(length > 1);
2199 /* OVECTOR(1) contains the "string begin - 1" constant. */
2200 if (length > 2)
2201 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
2202 if (length < 8)
2203 {
2204 for (i = 2; i < length; i++)
2205 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), TMP1, 0);
2206 }
2207 else
2208 {
2209 GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + sizeof(sljit_sw));
2210 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
2211 loop = LABEL();
2212 OP1(SLJIT_MOVU, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
2213 OP2(SLJIT_SUB | SLJIT_SET_E, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
2214 JUMPTO(SLJIT_NOT_ZERO, loop);
2215 }
2216
2217 OP1(SLJIT_MOV, STACK_TOP, 0, ARGUMENTS, 0);
2218 if (common->mark_ptr != 0)
2219 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, SLJIT_IMM, 0);
2220 if (common->control_head_ptr != 0)
2221 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
2222 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(jit_arguments, stack));
2223 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
2224 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(struct sljit_stack, base));
2225 }
2226
2227 static sljit_sw SLJIT_CALL do_search_mark(sljit_sw *current, const pcre_uchar *skip_arg)
2228 {
2229 while (current != NULL)
2230 {
2231 switch (current[-2])
2232 {
2233 case type_then_trap:
2234 break;
2235
2236 case type_mark:
2237 if (STRCMP_UC_UC(skip_arg, (pcre_uchar *)current[-3]) == 0)
2238 return current[-4];
2239 break;
2240
2241 default:
2242 SLJIT_ASSERT_STOP();
2243 break;
2244 }
2245 SLJIT_ASSERT(current > (sljit_sw*)current[-1]);
2246 current = (sljit_sw*)current[-1];
2247 }
2248 return -1;
2249 }
2250
2251 static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)
2252 {
2253 DEFINE_COMPILER;
2254 struct sljit_label *loop;
2255 struct sljit_jump *early_quit;
2256
2257 /* At this point we can freely use all registers. */
2258 OP1(SLJIT_MOV, SLJIT_S2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
2259 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(1), STR_PTR, 0);
2260
2261 OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0);
2262 if (common->mark_ptr != 0)
2263 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
2264 OP1(SLJIT_MOV_SI, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, offset_count));
2265 if (common->mark_ptr != 0)
2266 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_R2, 0);
2267 OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, offsets), SLJIT_IMM, sizeof(int));
2268 OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, begin));
2269 GET_LOCAL_BASE(SLJIT_S0, 0, OVECTOR_START);
2270 /* Unlikely, but possible */
2271 early_quit = CMP(SLJIT_EQUAL, SLJIT_R1, 0, SLJIT_IMM, 0);
2272 loop = LABEL();
2273 OP2(SLJIT_SUB, SLJIT_S1, 0, SLJIT_MEM1(SLJIT_S0), 0, SLJIT_R0, 0);
2274 OP2(SLJIT_ADD, SLJIT_S0, 0, SLJIT_S0, 0, SLJIT_IMM, sizeof(sljit_sw));
2275 /* Copy the integer value to the output buffer */
2276 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2277 OP2(SLJIT_ASHR, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_IMM, UCHAR_SHIFT);
2278 #endif
2279 OP1(SLJIT_MOVU_SI, SLJIT_MEM1(SLJIT_R2), sizeof(int), SLJIT_S1, 0);
2280 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
2281 JUMPTO(SLJIT_NOT_ZERO, loop);
2282 JUMPHERE(early_quit);
2283
2284 /* Calculate the return value, which is the maximum ovector value. */
2285 if (topbracket > 1)
2286 {
2287 GET_LOCAL_BASE(SLJIT_R0, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_sw));
2288 OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, topbracket + 1);
2289
2290 /* OVECTOR(0) is never equal to SLJIT_S2. */
2291 loop = LABEL();
2292 OP1(SLJIT_MOVU, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), -(2 * (sljit_sw)sizeof(sljit_sw)));
2293 OP2(SLJIT_SUB, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
2294 CMPTO(SLJIT_EQUAL, SLJIT_R2, 0, SLJIT_S2, 0, loop);
2295 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_R1, 0);
2296 }
2297 else
2298 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
2299 }
2300
2301 static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *quit)
2302 {
2303 DEFINE_COMPILER;
2304 struct sljit_jump *jump;
2305
2306 SLJIT_COMPILE_ASSERT(STR_END == SLJIT_S1, str_end_must_be_saved_reg2);
2307 SLJIT_ASSERT(common->start_used_ptr != 0 && common->start_ptr != 0
2308 && (common->mode == JIT_PARTIAL_SOFT_COMPILE ? common->hit_start != 0 : common->hit_start == 0));
2309
2310 OP1(SLJIT_MOV, SLJIT_R1, 0, ARGUMENTS, 0);
2311 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_PARTIAL);
2312 OP1(SLJIT_MOV_SI, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, real_offset_count));
2313 CMPTO(SLJIT_SIG_LESS, SLJIT_R2, 0, SLJIT_IMM, 2, quit);
2314
2315 /* Store match begin and end. */
2316 OP1(SLJIT_MOV, SLJIT_S0, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, begin));
2317 OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, offsets));
2318
2319 jump = CMP(SLJIT_SIG_LESS, SLJIT_R2, 0, SLJIT_IMM, 3);
2320 OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_ptr : (common->hit_start + (int)sizeof(sljit_sw)), SLJIT_S0, 0);
2321 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2322 OP2(SLJIT_ASHR, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, UCHAR_SHIFT);
2323 #endif
2324 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_R1), 2 * sizeof(int), SLJIT_R2, 0);
2325 JUMPHERE(jump);
2326
2327 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_used_ptr : common->hit_start);
2328 OP2(SLJIT_SUB, SLJIT_S1, 0, STR_END, 0, SLJIT_S0, 0);
2329 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2330 OP2(SLJIT_ASHR, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_IMM, UCHAR_SHIFT);
2331 #endif
2332 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_R1), sizeof(int), SLJIT_S1, 0);
2333
2334 OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_S0, 0);
2335 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2336 OP2(SLJIT_ASHR, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, UCHAR_SHIFT);
2337 #endif
2338 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_R1), 0, SLJIT_R2, 0);
2339
2340 JUMPTO(SLJIT_JUMP, quit);
2341 }
2342
2343 static SLJIT_INLINE void check_start_used_ptr(compiler_common *common)
2344 {
2345 /* May destroy TMP1. */
2346 DEFINE_COMPILER;
2347 struct sljit_jump *jump;
2348
2349 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2350 {
2351 /* The value of -1 must be kept for start_used_ptr! */
2352 OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, 1);
2353 /* Jumps if start_used_ptr < STR_PTR, or start_used_ptr == -1. Although overwriting
2354 is not necessary if start_used_ptr == STR_PTR, it does not hurt as well. */
2355 jump = CMP(SLJIT_LESS_EQUAL, TMP1, 0, STR_PTR, 0);
2356 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2357 JUMPHERE(jump);
2358 }
2359 else if (common->mode == JIT_PARTIAL_HARD_COMPILE)
2360 {
2361 jump = CMP(SLJIT_LESS_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2362 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2363 JUMPHERE(jump);
2364 }
2365 }
2366
2367 static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, pcre_uchar *cc)
2368 {
2369 /* Detects if the character has an othercase. */
2370 unsigned int c;
2371
2372 #ifdef SUPPORT_UTF
2373 if (common->utf)
2374 {
2375 GETCHAR(c, cc);
2376 if (c > 127)
2377 {
2378 #ifdef SUPPORT_UCP
2379 return c != UCD_OTHERCASE(c);
2380 #else
2381 return FALSE;
2382 #endif
2383 }
2384 #ifndef COMPILE_PCRE8
2385 return common->fcc[c] != c;
2386 #endif
2387 }
2388 else
2389 #endif
2390 c = *cc;
2391 return MAX_255(c) ? common->fcc[c] != c : FALSE;
2392 }
2393
2394 static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)
2395 {
2396 /* Returns with the othercase. */
2397 #ifdef SUPPORT_UTF
2398 if (common->utf && c > 127)
2399 {
2400 #ifdef SUPPORT_UCP
2401 return UCD_OTHERCASE(c);
2402 #else
2403 return c;
2404 #endif
2405 }
2406 #endif
2407 return TABLE_GET(c, common->fcc, c);
2408 }
2409
2410 static unsigned int char_get_othercase_bit(compiler_common *common, pcre_uchar *cc)
2411 {
2412 /* Detects if the character and its othercase has only 1 bit difference. */
2413 unsigned int c, oc, bit;
2414 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2415 int n;
2416 #endif
2417
2418 #ifdef SUPPORT_UTF
2419 if (common->utf)
2420 {
2421 GETCHAR(c, cc);
2422 if (c <= 127)
2423 oc = common->fcc[c];
2424 else
2425 {
2426 #ifdef SUPPORT_UCP
2427 oc = UCD_OTHERCASE(c);
2428 #else
2429 oc = c;
2430 #endif
2431 }
2432 }
2433 else
2434 {
2435 c = *cc;
2436 oc = TABLE_GET(c, common->fcc, c);
2437 }
2438 #else
2439 c = *cc;
2440 oc = TABLE_GET(c, common->fcc, c);
2441 #endif
2442
2443 SLJIT_ASSERT(c != oc);
2444
2445 bit = c ^ oc;
2446 /* Optimized for English alphabet. */
2447 if (c <= 127 && bit == 0x20)
2448 return (0 << 8) | 0x20;
2449
2450 /* Since c != oc, they must have at least 1 bit difference. */
2451 if (!is_powerof2(bit))
2452 return 0;
2453
2454 #if defined COMPILE_PCRE8
2455
2456 #ifdef SUPPORT_UTF
2457 if (common->utf && c > 127)
2458 {
2459 n = GET_EXTRALEN(*cc);
2460 while ((bit & 0x3f) == 0)
2461 {
2462 n--;
2463 bit >>= 6;
2464 }
2465 return (n << 8) | bit;
2466 }
2467 #endif /* SUPPORT_UTF */
2468 return (0 << 8) | bit;
2469
2470 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2471
2472 #ifdef SUPPORT_UTF
2473 if (common->utf && c > 65535)
2474 {
2475 if (bit >= (1 << 10))
2476 bit >>= 10;
2477 else
2478 return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8));
2479 }
2480 #endif /* SUPPORT_UTF */
2481 return (bit < 256) ? ((0 << 8) | bit) : ((1 << 8) | (bit >> 8));
2482
2483 #endif /* COMPILE_PCRE[8|16|32] */
2484 }
2485
2486 static void check_partial(compiler_common *common, BOOL force)
2487 {
2488 /* Checks whether a partial matching is occurred. Does not modify registers. */
2489 DEFINE_COMPILER;
2490 struct sljit_jump *jump = NULL;
2491
2492 SLJIT_ASSERT(!force || common->mode != JIT_COMPILE);
2493
2494 if (common->mode == JIT_COMPILE)
2495 return;
2496
2497 if (!force)
2498 jump = CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2499 else if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2500 jump = CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1);
2501
2502 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2503 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
2504 else
2505 {
2506 if (common->partialmatchlabel != NULL)
2507 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2508 else
2509 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2510 }
2511
2512 if (jump != NULL)
2513 JUMPHERE(jump);
2514 }
2515
2516 static void check_str_end(compiler_common *common, jump_list **end_reached)
2517 {
2518 /* Does not affect registers. Usually used in a tight spot. */
2519 DEFINE_COMPILER;
2520 struct sljit_jump *jump;
2521
2522 if (common->mode == JIT_COMPILE)
2523 {
2524 add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2525 return;
2526 }
2527
2528 jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
2529 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2530 {
2531 add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
2532 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
2533 add_jump(compiler, end_reached, JUMP(SLJIT_JUMP));
2534 }
2535 else
2536 {
2537 add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
2538 if (common->partialmatchlabel != NULL)
2539 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2540 else
2541 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2542 }
2543 JUMPHERE(jump);
2544 }
2545
2546 static void detect_partial_match(compiler_common *common, jump_list **backtracks)
2547 {
2548 DEFINE_COMPILER;
2549 struct sljit_jump *jump;
2550
2551 if (common->mode == JIT_COMPILE)
2552 {
2553 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2554 return;
2555 }
2556
2557 /* Partial matching mode. */
2558 jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
2559 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
2560 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2561 {
2562 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
2563 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
2564 }
2565 else
2566 {
2567 if (common->partialmatchlabel != NULL)
2568 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2569 else
2570 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2571 }
2572 JUMPHERE(jump);
2573 }
2574
2575 static void peek_char(compiler_common *common, pcre_uint32 max)
2576 {
2577 /* Reads the character into TMP1, keeps STR_PTR.
2578 Does not check STR_END. TMP2 Destroyed. */
2579 DEFINE_COMPILER;
2580 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2581 struct sljit_jump *jump;
2582 #endif
2583
2584 SLJIT_UNUSED_ARG(max);
2585
2586 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2587 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2588 if (common->utf)
2589 {
2590 if (max < 128) return;
2591
2592 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2593 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2594 add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2595 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2596 JUMPHERE(jump);
2597 }
2598 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2599
2600 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2601 if (common->utf)
2602 {
2603 if (max < 0xd800) return;
2604
2605 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2606 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2607 /* TMP2 contains the high surrogate. */
2608 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2609 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x40);
2610 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
2611 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
2612 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2613 JUMPHERE(jump);
2614 }
2615 #endif
2616 }
2617
2618 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2619
2620 static BOOL is_char7_bitset(const pcre_uint8 *bitset, BOOL nclass)
2621 {
2622 /* Tells whether the character codes below 128 are enough
2623 to determine a match. */
2624 const sljit_ub value = nclass ? 0xff : 0;
2625 const sljit_ub *end = bitset + 32;
2626
2627 bitset += 16;
2628 do
2629 {
2630 if (*bitset++ != value)
2631 return FALSE;
2632 }
2633 while (bitset < end);
2634 return TRUE;
2635 }
2636
2637 static void read_char7_type(compiler_common *common, BOOL full_read)
2638 {
2639 /* Reads the precise character type of a character into TMP1, if the character
2640 is less than 128. Otherwise it returns with zero. Does not check STR_END. The
2641 full_read argument tells whether characters above max are accepted or not. */
2642 DEFINE_COMPILER;
2643 struct sljit_jump *jump;
2644
2645 SLJIT_ASSERT(common->utf);
2646
2647 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2648 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2649
2650 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2651
2652 if (full_read)
2653 {
2654 jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
2655 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2656 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2657 JUMPHERE(jump);
2658 }
2659 }
2660
2661 #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
2662
2663 static void read_char_range(compiler_common *common, pcre_uint32 min, pcre_uint32 max, BOOL update_str_ptr)
2664 {
2665 /* Reads the precise value of a character into TMP1, if the character is
2666 between min and max (c >= min && c <= max). Otherwise it returns with a value
2667 outside the range. Does not check STR_END. */
2668 DEFINE_COMPILER;
2669 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2670 struct sljit_jump *jump;
2671 #endif
2672 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2673 struct sljit_jump *jump2;
2674 #endif
2675
2676 SLJIT_UNUSED_ARG(update_str_ptr);
2677 SLJIT_UNUSED_ARG(min);
2678 SLJIT_UNUSED_ARG(max);
2679 SLJIT_ASSERT(min <= max);
2680
2681 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2682 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2683
2684 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2685 if (common->utf)
2686 {
2687 if (max < 128 && !update_str_ptr) return;
2688
2689 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2690 if (min >= 0x10000)
2691 {
2692 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xf0);
2693 if (update_str_ptr)
2694 OP1(SLJIT_MOV_UB, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2695 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2696 jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x7);
2697 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2698 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2699 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2700 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2701 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2702 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2703 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2704 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2705 if (!update_str_ptr)
2706 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
2707 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2708 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2709 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2710 JUMPHERE(jump2);
2711 if (update_str_ptr)
2712 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
2713 }
2714 else if (min >= 0x800 && max <= 0xffff)
2715 {
2716 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xe0);
2717 if (update_str_ptr)
2718 OP1(SLJIT_MOV_UB, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2719 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2720 jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xf);
2721 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2722 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2723 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2724 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2725 if (!update_str_ptr)
2726 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
2727 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2728 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2729 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2730 JUMPHERE(jump2);
2731 if (update_str_ptr)
2732 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
2733 }
2734 else if (max >= 0x800)
2735 add_jump(compiler, (max < 0x10000) ? &common->utfreadchar16 : &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2736 else if (max < 128)
2737 {
2738 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2739 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2740 }
2741 else
2742 {
2743 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2744 if (!update_str_ptr)
2745 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2746 else
2747 OP1(SLJIT_MOV_UB, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2748 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2749 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2750 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2751 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2752 if (update_str_ptr)
2753 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
2754 }
2755 JUMPHERE(jump);
2756 }
2757 #endif
2758
2759 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2760 if (common->utf)
2761 {
2762 if (max >= 0x10000)
2763 {
2764 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2765 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2766 /* TMP2 contains the high surrogate. */
2767 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2768 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x40);
2769 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
2770 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2771 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
2772 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2773 JUMPHERE(jump);
2774 return;
2775 }
2776
2777 if (max < 0xd800 && !update_str_ptr) return;
2778
2779 /* Skip low surrogate if necessary. */
2780 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2781 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2782 if (update_str_ptr)
2783 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2784 if (max >= 0xd800)
2785 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x10000);
2786 JUMPHERE(jump);
2787 }
2788 #endif
2789 }
2790
2791 static SLJIT_INLINE void read_char(compiler_common *common)
2792 {
2793 read_char_range(common, 0, READ_CHAR_MAX, TRUE);
2794 }
2795
2796 static void read_char8_type(compiler_common *common, BOOL update_str_ptr)
2797 {
2798 /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */
2799 DEFINE_COMPILER;
2800 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
2801 struct sljit_jump *jump;
2802 #endif
2803 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2804 struct sljit_jump *jump2;
2805 #endif
2806
2807 SLJIT_UNUSED_ARG(update_str_ptr);
2808
2809 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2810 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2811
2812 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2813 if (common->utf)
2814 {
2815 /* This can be an extra read in some situations, but hopefully
2816 it is needed in most cases. */
2817 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2818 jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
2819 if (!update_str_ptr)
2820 {
2821 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2822 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2823 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2824 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2825 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2826 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
2827 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2828 jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
2829 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2830 JUMPHERE(jump2);
2831 }
2832 else
2833 add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));
2834 JUMPHERE(jump);
2835 return;
2836 }
2837 #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
2838
2839 #if !defined COMPILE_PCRE8
2840 /* The ctypes array contains only 256 values. */
2841 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2842 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
2843 #endif
2844 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2845 #if !defined COMPILE_PCRE8
2846 JUMPHERE(jump);
2847 #endif
2848
2849 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2850 if (common->utf && update_str_ptr)
2851 {
2852 /* Skip low surrogate if necessary. */
2853 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);
2854 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2855 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2856 JUMPHERE(jump);
2857 }
2858 #endif /* SUPPORT_UTF && COMPILE_PCRE16 */
2859 }
2860
2861 static void skip_char_back(compiler_common *common)
2862 {
2863 /* Goes one character back. Affects STR_PTR and TMP1. Does not check begin. */
2864 DEFINE_COMPILER;
2865 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2866 #if defined COMPILE_PCRE8
2867 struct sljit_label *label;
2868
2869 if (common->utf)
2870 {
2871 label = LABEL();
2872 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2873 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2874 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
2875 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);
2876 return;
2877 }
2878 #elif defined COMPILE_PCRE16
2879 if (common->utf)
2880 {
2881 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2882 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2883 /* Skip low surrogate if necessary. */
2884 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2885 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
2886 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
2887 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2888 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2889 return;
2890 }
2891 #endif /* COMPILE_PCRE[8|16] */
2892 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2893 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2894 }
2895
2896 static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpifmatch)
2897 {
2898 /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */
2899 DEFINE_COMPILER;
2900 struct sljit_jump *jump;
2901
2902 if (nltype == NLTYPE_ANY)
2903 {
2904 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
2905 add_jump(compiler, backtracks, JUMP(jumpifmatch ? SLJIT_NOT_ZERO : SLJIT_ZERO));
2906 }
2907 else if (nltype == NLTYPE_ANYCRLF)
2908 {
2909 if (jumpifmatch)
2910 {
2911 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR));
2912 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
2913 }
2914 else
2915 {
2916 jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
2917 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
2918 JUMPHERE(jump);
2919 }
2920 }
2921 else
2922 {
2923 SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);
2924 add_jump(compiler, backtracks, CMP(jumpifmatch ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
2925 }
2926 }
2927
2928 #ifdef SUPPORT_UTF
2929
2930 #if defined COMPILE_PCRE8
2931 static void do_utfreadchar(compiler_common *common)
2932 {
2933 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
2934 of the character (>= 0xc0). Return char value in TMP1, length in TMP2. */
2935 DEFINE_COMPILER;
2936 struct sljit_jump *jump;
2937
2938 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2939 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2940 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2941 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2942 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2943 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2944
2945 /* Searching for the first zero. */
2946 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
2947 jump = JUMP(SLJIT_NOT_ZERO);
2948 /* Two byte sequence. */
2949 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2950 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(2));
2951 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2952
2953 JUMPHERE(jump);
2954 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2955 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x800);
2956 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2957 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2958 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2959
2960 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10000);
2961 jump = JUMP(SLJIT_NOT_ZERO);
2962 /* Three byte sequence. */
2963 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
2964 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));
2965 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2966
2967 /* Four byte sequence. */
2968 JUMPHERE(jump);
2969 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2970 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
2971 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2972 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
2973 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2974 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2975 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(4));
2976 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2977 }
2978
2979 static void do_utfreadchar16(compiler_common *common)
2980 {
2981 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
2982 of the character (>= 0xc0). Return value in TMP1. */
2983 DEFINE_COMPILER;
2984 struct sljit_jump *jump;
2985
2986 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2987 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2988 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2989 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2990 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2991 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2992
2993 /* Searching for the first zero. */
2994 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
2995 jump = JUMP(SLJIT_NOT_ZERO);
2996 /* Two byte sequence. */
2997 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2998 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2999
3000 JUMPHERE(jump);
3001 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x400);
3002 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_NOT_ZERO);
3003 /* This code runs only in 8 bit mode. No need to shift the value. */
3004 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3005 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3006 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x800);
3007 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3008 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3009 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3010 /* Three byte sequence. */
3011 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
3012 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3013 }
3014
3015 static void do_utfreadtype8(compiler_common *common)
3016 {
3017 /* Fast decoding a UTF-8 character type. TMP2 contains the first byte
3018 of the character (>= 0xc0). Return value in TMP1. */
3019 DEFINE_COMPILER;
3020 struct sljit_jump *jump;
3021 struct sljit_jump *compare;
3022
3023 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3024
3025 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20);
3026 jump = JUMP(SLJIT_NOT_ZERO);
3027 /* Two byte sequence. */
3028 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3029 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3030 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
3031 /* The upper 5 bits are known at this point. */
3032 compare = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x3);
3033 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
3034 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3035 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
3036 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
3037 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3038
3039 JUMPHERE(compare);
3040 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
3041 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3042
3043 /* We only have types for characters less than 256. */
3044 JUMPHERE(jump);
3045 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3046 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
3047 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3048 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3049 }
3050
3051 #endif /* COMPILE_PCRE8 */
3052
3053 #endif /* SUPPORT_UTF */
3054
3055 #ifdef SUPPORT_UCP
3056
3057 /* UCD_BLOCK_SIZE must be 128 (see the assert below). */
3058 #define UCD_BLOCK_MASK 127
3059 #define UCD_BLOCK_SHIFT 7
3060
3061 static void do_getucd(compiler_common *common)
3062 {
3063 /* Search the UCD record for the character comes in TMP1.
3064 Returns chartype in TMP1 and UCD offset in TMP2. */
3065 DEFINE_COMPILER;
3066
3067 SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 8);
3068
3069 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3070 OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
3071 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
3072 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
3073 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
3074 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
3075 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
3076 OP1(SLJIT_MOV_UH, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
3077 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
3078 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
3079 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3080 }
3081 #endif
3082
3083 static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common, BOOL hascrorlf, BOOL firstline)
3084 {
3085 DEFINE_COMPILER;
3086 struct sljit_label *mainloop;
3087 struct sljit_label *newlinelabel = NULL;
3088 struct sljit_jump *start;
3089 struct sljit_jump *end = NULL;
3090 struct sljit_jump *nl = NULL;
3091 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3092 struct sljit_jump *singlechar;
3093 #endif
3094 jump_list *newline = NULL;
3095 BOOL newlinecheck = FALSE;
3096 BOOL readuchar = FALSE;
3097
3098 if (!(hascrorlf || firstline) && (common->nltype == NLTYPE_ANY ||
3099 common->nltype == NLTYPE_ANYCRLF || common->newline > 255))
3100 newlinecheck = TRUE;
3101
3102 if (firstline)
3103 {
3104 /* Search for the end of the first line. */
3105 SLJIT_ASSERT(common->first_line_end != 0);
3106 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
3107
3108 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3109 {
3110 mainloop = LABEL();
3111 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3112 end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3113 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
3114 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3115 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);
3116 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);
3117 JUMPHERE(end);
3118 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_SP), common->first_line_end, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3119 }
3120 else
3121 {
3122 end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3123 mainloop = LABEL();
3124 /* Continual stores does not cause data dependency. */
3125 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->first_line_end, STR_PTR, 0);
3126 read_char_range(common, common->nlmin, common->nlmax, TRUE);
3127 check_newlinechar(common, common->nltype, &newline, TRUE);
3128 CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, mainloop);
3129 JUMPHERE(end);
3130 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->first_line_end, STR_PTR, 0);
3131 set_jumps(newline, LABEL());
3132 }
3133
3134 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
3135 }
3136
3137 start = JUMP(SLJIT_JUMP);
3138
3139 if (newlinecheck)
3140 {
3141 newlinelabel = LABEL();
3142 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3143 end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3144 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3145 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
3146 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
3147 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3148 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
3149 #endif
3150 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3151 nl = JUMP(SLJIT_JUMP);
3152 }
3153
3154 mainloop = LABEL();
3155
3156 /* Increasing the STR_PTR here requires one less jump in the most common case. */
3157 #ifdef SUPPORT_UTF
3158 if (common->utf) readuchar = TRUE;
3159 #endif
3160 if (newlinecheck) readuchar = TRUE;
3161
3162 if (readuchar)
3163 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3164
3165 if (newlinecheck)
3166 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);
3167
3168 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3169 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3170 #if defined COMPILE_PCRE8
3171 if (common->utf)
3172 {
3173 singlechar = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
3174 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3175 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3176 JUMPHERE(singlechar);
3177 }
3178 #elif defined COMPILE_PCRE16
3179 if (common->utf)
3180 {
3181 singlechar = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
3182 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
3183 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3184 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
3185 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3186 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3187 JUMPHERE(singlechar);
3188 }
3189 #endif /* COMPILE_PCRE[8|16] */
3190 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
3191 JUMPHERE(start);
3192
3193 if (newlinecheck)
3194 {
3195 JUMPHERE(end);
3196 JUMPHERE(nl);
3197 }
3198
3199 return mainloop;
3200 }
3201
3202 #define MAX_N_CHARS 16
3203 #define MAX_N_BYTES 8
3204
3205 static SLJIT_INLINE void add_prefix_byte(pcre_uint8 byte, pcre_uint8 *bytes)
3206 {
3207 pcre_uint8 len = bytes[0];
3208 int i;
3209
3210 if (len == 255)
3211 return;
3212
3213 if (len == 0)
3214 {
3215 bytes[0] = 1;
3216 bytes[1] = byte;
3217 return;
3218 }
3219
3220 for (i = len; i > 0; i--)
3221 if (bytes[i] == byte)
3222 return;
3223
3224 if (len >= MAX_N_BYTES - 1)
3225 {
3226 bytes[0] = 255;
3227 return;
3228 }
3229
3230 len++;
3231 bytes[len] = byte;
3232 bytes[0] = len;
3233 }
3234
3235 static int scan_prefix(compiler_common *common, pcre_uchar *cc, pcre_uint32 *chars, pcre_uint8 *bytes, int max_chars, pcre_uint32 *rec_count)
3236 {
3237 /* Recursive function, which scans prefix literals. */
3238 BOOL last, any, caseless;
3239 int len, repeat, len_save, consumed = 0;
3240 pcre_uint32 chr, mask;
3241 pcre_uchar *alternative, *cc_save, *oc;
3242 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3243 pcre_uchar othercase[8];
3244 #elif defined SUPPORT_UTF && defined COMPILE_PCRE16
3245 pcre_uchar othercase[2];
3246 #else
3247 pcre_uchar othercase[1];
3248 #endif
3249
3250 repeat = 1;
3251 while (TRUE)
3252 {
3253 if (*rec_count == 0)
3254 return 0;
3255 (*rec_count)--;
3256
3257 last = TRUE;
3258 any = FALSE;
3259 caseless = FALSE;
3260
3261 switch (*cc)
3262 {
3263 case OP_CHARI:
3264 caseless = TRUE;
3265 case OP_CHAR:
3266 last = FALSE;
3267 cc++;
3268 break;
3269
3270 case OP_SOD:
3271 case OP_SOM:
3272 case OP_SET_SOM:
3273 case OP_NOT_WORD_BOUNDARY:
3274 case OP_WORD_BOUNDARY:
3275 case OP_EODN:
3276 case OP_EOD:
3277 case OP_CIRC:
3278 case OP_CIRCM:
3279 case OP_DOLL:
3280 case OP_DOLLM:
3281 /* Zero width assertions. */
3282 cc++;
3283 continue;
3284
3285 case OP_ASSERT:
3286 case OP_ASSERT_NOT:
3287 case OP_ASSERTBACK:
3288 case OP_ASSERTBACK_NOT:
3289 cc = bracketend(cc);
3290 continue;
3291
3292 case OP_PLUSI:
3293 case OP_MINPLUSI:
3294 case OP_POSPLUSI:
3295 caseless = TRUE;
3296 case OP_PLUS:
3297 case OP_MINPLUS:
3298 case OP_POSPLUS:
3299 cc++;
3300 break;
3301
3302 case OP_EXACTI:
3303 caseless = TRUE;
3304 case OP_EXACT:
3305 repeat = GET2(cc, 1);
3306 last = FALSE;
3307 cc += 1 + IMM2_SIZE;
3308 break;
3309
3310 case OP_QUERYI:
3311 case OP_MINQUERYI:
3312 case OP_POSQUERYI:
3313 caseless = TRUE;
3314 case OP_QUERY:
3315 case OP_MINQUERY:
3316 case OP_POSQUERY:
3317 len = 1;
3318 cc++;
3319 #ifdef SUPPORT_UTF
3320 if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
3321 #endif
3322 max_chars = scan_prefix(common, cc + len, chars, bytes, max_chars, rec_count);
3323 if (max_chars == 0)
3324 return consumed;
3325 last = FALSE;
3326 break;
3327
3328 case OP_KET:
3329 cc += 1 + LINK_SIZE;
3330 continue;
3331
3332 case OP_ALT:
3333 cc += GET(cc, 1);
3334 continue;
3335
3336 case OP_ONCE:
3337 case OP_ONCE_NC:
3338 case OP_BRA:
3339 case OP_BRAPOS:
3340 case OP_CBRA:
3341 case OP_CBRAPOS:
3342 alternative = cc + GET(cc, 1);
3343 while (*alternative == OP_ALT)
3344 {
3345 max_chars = scan_prefix(common, alternative + 1 + LINK_SIZE, chars, bytes, max_chars, rec_count);
3346 if (max_chars == 0)
3347 return consumed;
3348 alternative += GET(alternative, 1);
3349 }
3350
3351 if (*cc == OP_CBRA || *cc == OP_CBRAPOS)
3352 cc += IMM2_SIZE;
3353 cc += 1 + LINK_SIZE;
3354 continue;
3355
3356 case OP_CLASS:
3357 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3358 if (common->utf && !is_char7_bitset((const pcre_uint8 *)(cc + 1), FALSE)) return consumed;
3359 #endif
3360 any = TRUE;
3361 cc += 1 + 32 / sizeof(pcre_uchar);
3362 break;
3363
3364 case OP_NCLASS:
3365 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3366 if (common->utf) return consumed;
3367 #endif
3368 any = TRUE;
3369 cc += 1 + 32 / sizeof(pcre_uchar);
3370 break;
3371
3372 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3373 case OP_XCLASS:
3374 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3375 if (common->utf) return consumed;
3376 #endif
3377 any = TRUE;
3378 cc += GET(cc, 1);
3379 break;
3380 #endif
3381
3382 case OP_DIGIT:
3383 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3384 if (common->utf && !is_char7_bitset((const pcre_uint8 *)common->ctypes - cbit_length + cbit_digit, FALSE))
3385 return consumed;
3386 #endif
3387 any = TRUE;
3388 cc++;
3389 break;
3390
3391 case OP_WHITESPACE:
3392 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3393 if (common->utf && !is_char7_bitset((const pcre_uint8 *)common->ctypes - cbit_length + cbit_space, FALSE))
3394 return consumed;
3395 #endif
3396 any = TRUE;
3397 cc++;
3398 break;
3399
3400 case OP_WORDCHAR:
3401 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3402 if (common->utf && !is_char7_bitset((const pcre_uint8 *)common->ctypes - cbit_length + cbit_word, FALSE))
3403 return consumed;
3404 #endif
3405 any = TRUE;
3406 cc++;
3407 break;
3408
3409 case OP_NOT:
3410 case OP_NOTI:
3411 cc++;
3412 /* Fall through. */
3413 case OP_NOT_DIGIT:
3414 case OP_NOT_WHITESPACE:
3415 case OP_NOT_WORDCHAR:
3416 case OP_ANY:
3417 case OP_ALLANY:
3418 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3419 if (common->utf) return consumed;
3420 #endif
3421 any = TRUE;
3422 cc++;
3423 break;
3424
3425 #ifdef SUPPORT_UCP
3426 case OP_NOTPROP:
3427 case OP_PROP:
3428 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3429 if (common->utf) return consumed;
3430 #endif
3431 any = TRUE;
3432 cc += 1 + 2;
3433 break;
3434 #endif
3435
3436 case OP_TYPEEXACT:
3437 repeat = GET2(cc, 1);
3438 cc += 1 + IMM2_SIZE;
3439 continue;
3440
3441 case OP_NOTEXACT:
3442 case OP_NOTEXACTI:
3443 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3444 if (common->utf) return consumed;
3445 #endif
3446 any = TRUE;
3447 repeat = GET2(cc, 1);
3448 cc += 1 + IMM2_SIZE + 1;
3449 break;
3450
3451 default:
3452 return consumed;
3453 }
3454
3455 if (any)
3456 {
3457 #if defined COMPILE_PCRE8
3458 mask = 0xff;
3459 #elif defined COMPILE_PCRE16
3460 mask = 0xffff;
3461 #elif defined COMPILE_PCRE32
3462 mask = 0xffffffff;
3463 #else
3464 SLJIT_ASSERT_STOP();
3465 #endif
3466
3467 do
3468 {
3469 chars[0] = mask;
3470 chars[1] = mask;
3471 bytes[0] = 255;
3472
3473 consumed++;
3474 if (--max_chars == 0)
3475 return consumed;
3476 chars += 2;
3477 bytes += MAX_N_BYTES;
3478 }
3479 while (--repeat > 0);
3480
3481 repeat = 1;
3482 continue;
3483 }
3484
3485 len = 1;
3486 #ifdef SUPPORT_UTF
3487 if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
3488 #endif
3489
3490 if (caseless && char_has_othercase(common, cc))
3491 {
3492 #ifdef SUPPORT_UTF
3493 if (common->utf)
3494 {
3495 GETCHAR(chr, cc);
3496 if ((int)PRIV(ord2utf)(char_othercase(common, chr), othercase) != len)
3497 return consumed;
3498 }
3499 else
3500 #endif
3501 {
3502 chr = *cc;
3503 othercase[0] = TABLE_GET(chr, common->fcc, chr);
3504 }
3505 }
3506 else
3507 caseless = FALSE;
3508
3509 len_save = len;
3510 cc_save = cc;
3511 while (TRUE)
3512 {
3513 oc = othercase;
3514 do
3515 {
3516 chr = *cc;
3517 #ifdef COMPILE_PCRE32
3518 if (SLJIT_UNLIKELY(chr == NOTACHAR))
3519 return consumed;
3520 #endif
3521 add_prefix_byte((pcre_uint8)chr, bytes);
3522
3523 mask = 0;
3524 if (caseless)
3525 {
3526 add_prefix_byte((pcre_uint8)*oc, bytes);
3527 mask = *cc ^ *oc;
3528 chr |= mask;
3529 }
3530
3531 #ifdef COMPILE_PCRE32
3532 if (chars[0] == NOTACHAR && chars[1] == 0)
3533 #else
3534 if (chars[0] == NOTACHAR)
3535 #endif
3536 {
3537 chars[0] = chr;
3538 chars[1] = mask;
3539 }
3540 else
3541 {
3542 mask |= chars[0] ^ chr;
3543 chr |= mask;
3544 chars[0] = chr;
3545 chars[1] |= mask;
3546 }
3547
3548 len--;
3549 consumed++;
3550 if (--max_chars == 0)
3551 return consumed;
3552 chars += 2;
3553 bytes += MAX_N_BYTES;
3554 cc++;
3555 oc++;
3556 }
3557 while (len > 0);
3558
3559 if (--repeat == 0)
3560 break;
3561
3562 len = len_save;
3563 cc = cc_save;
3564 }
3565
3566 repeat = 1;
3567 if (last)
3568 return consumed;
3569 }
3570 }
3571
3572 static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common, BOOL firstline)
3573 {
3574 DEFINE_COMPILER;
3575 struct sljit_label *start;
3576 struct sljit_jump *quit;
3577 pcre_uint32 chars[MAX_N_CHARS * 2];
3578 pcre_uint8 bytes[MAX_N_CHARS * MAX_N_BYTES];
3579 pcre_uint8 ones[MAX_N_CHARS];
3580 int offsets[3];
3581 pcre_uint32 mask;
3582 pcre_uint8 *byte_set, *byte_set_end;
3583 int i, max, from;
3584 int range_right = -1, range_len = 3 - 1;
3585 sljit_ub *update_table = NULL;
3586 BOOL in_range;
3587 pcre_uint32 rec_count;
3588
3589 for (i = 0; i < MAX_N_CHARS; i++)
3590 {
3591 chars[i << 1] = NOTACHAR;
3592 chars[(i << 1) + 1] = 0;
3593 bytes[i * MAX_N_BYTES] = 0;
3594 }
3595
3596 rec_count = 10000;
3597 max = scan_prefix(common, common->start, chars, bytes, MAX_N_CHARS, &rec_count);
3598
3599 if (max <= 1)
3600 return FALSE;
3601
3602 for (i = 0; i < max; i++)
3603 {
3604 mask = chars[(i << 1) + 1];
3605 ones[i] = ones_in_half_byte[mask & 0xf];
3606 mask >>= 4;
3607 while (mask != 0)
3608 {
3609 ones[i] += ones_in_half_byte[mask & 0xf];
3610 mask >>= 4;
3611 }
3612 }
3613
3614 in_range = FALSE;
3615 from = 0; /* Prevent compiler "uninitialized" warning */
3616 for (i = 0; i <= max; i++)
3617 {
3618 if (in_range && (i - from) > range_len && (bytes[(i - 1) * MAX_N_BYTES] <= 4))
3619 {
3620 range_len = i - from;
3621 range_right = i - 1;
3622 }
3623
3624 if (i < max && bytes[i * MAX_N_BYTES] < 255)
3625 {
3626 if (!in_range)
3627 {
3628 in_range = TRUE;
3629 from = i;
3630 }
3631 }
3632 else if (in_range)
3633 in_range = FALSE;
3634 }
3635
3636 if (range_right >= 0)
3637 {
3638 update_table = (sljit_ub *)allocate_read_only_data(common, 256);
3639 if (update_table == NULL)
3640 return TRUE;
3641 memset(update_table, IN_UCHARS(range_len), 256);
3642
3643 for (i = 0; i < range_len; i++)
3644 {
3645 byte_set = bytes + ((range_right - i) * MAX_N_BYTES);
3646 SLJIT_ASSERT(byte_set[0] > 0 && byte_set[0] < 255);
3647 byte_set_end = byte_set + byte_set[0];
3648 byte_set++;
3649 while (byte_set <= byte_set_end)
3650 {
3651 if (update_table[*byte_set] > IN_UCHARS(i))
3652 update_table[*byte_set] = IN_UCHARS(i);
3653 byte_set++;
3654 }
3655 }
3656 }
3657
3658 offsets[0] = -1;
3659 /* Scan forward. */
3660 for (i = 0; i < max; i++)
3661 if (ones[i] <= 2) {
3662 offsets[0] = i;
3663 break;
3664 }
3665
3666 if (offsets[0] < 0 && range_right < 0)
3667 return FALSE;
3668
3669 if (offsets[0] >= 0)
3670 {
3671 /* Scan backward. */
3672 offsets[1] = -1;
3673 for (i = max - 1; i > offsets[0]; i--)
3674 if (ones[i] <= 2 && i != range_right)
3675 {
3676 offsets[1] = i;
3677 break;
3678 }
3679
3680 /* This case is handled better by fast_forward_first_char. */
3681 if (offsets[1] == -1 && offsets[0] == 0 && range_right < 0)
3682 return FALSE;
3683
3684 offsets[2] = -1;
3685 /* We only search for a middle character if there is no range check. */
3686 if (offsets[1] >= 0 && range_right == -1)
3687 {
3688 /* Scan from middle. */
3689 for (i = (offsets[0] + offsets[1]) / 2 + 1; i < offsets[1]; i++)
3690 if (ones[i] <= 2)
3691 {
3692 offsets[2] = i;
3693 break;
3694 }
3695
3696 if (offsets[2] == -1)
3697 {
3698 for (i = (offsets[0] + offsets[1]) / 2; i > offsets[0]; i--)
3699 if (ones[i] <= 2)
3700 {
3701 offsets[2] = i;
3702 break;
3703 }
3704 }
3705 }
3706
3707 SLJIT_ASSERT(offsets[1] == -1 || (offsets[0] < offsets[1]));
3708 SLJIT_ASSERT(offsets[2] == -1 || (offsets[0] < offsets[2] && offsets[1] > offsets[2]));
3709
3710 chars[0] = chars[offsets[0] << 1];
3711 chars[1] = chars[(offsets[0] << 1) + 1];
3712 if (offsets[2] >= 0)
3713 {
3714 chars[2] = chars[offsets[2] << 1];
3715 chars[3] = chars[(offsets[2] << 1) + 1];
3716 }
3717 if (offsets[1] >= 0)
3718 {
3719 chars[4] = chars[offsets[1] << 1];
3720 chars[5] = chars[(offsets[1] << 1) + 1];
3721 }
3722 }
3723
3724 max -= 1;
3725 if (firstline)
3726 {
3727 SLJIT_ASSERT(common->first_line_end != 0);
3728 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end);
3729 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
3730 OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
3731 quit = CMP(SLJIT_LESS_EQUAL, STR_END, 0, TMP1, 0);
3732 OP1(SLJIT_MOV, STR_END, 0, TMP1, 0);
3733 JUMPHERE(quit);
3734 }
3735 else
3736 OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
3737
3738 #if !(defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
3739 if (range_right >= 0)
3740 OP1(SLJIT_MOV, RETURN_ADDR, 0, SLJIT_IMM, (sljit_sw)update_table);
3741 #endif
3742
3743 start = LABEL();
3744 quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3745
3746 SLJIT_ASSERT(range_right >= 0 || offsets[0] >= 0);
3747
3748 if (range_right >= 0)
3749 {
3750 #if defined COMPILE_PCRE8 || (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN)
3751 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right));
3752 #else
3753 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right + 1) - 1);
3754 #endif
3755
3756 #if !(defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
3757 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(RETURN_ADDR, TMP1), 0);
3758 #else
3759 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)update_table);
3760 #endif
3761 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3762 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, start);
3763 }
3764
3765 if (offsets[0] >= 0)
3766 {
3767 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[0]));
3768 if (offsets[1] >= 0)
3769 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[1]));
3770 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3771
3772 if (chars[1] != 0)
3773 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[1]);
3774 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[0], start);
3775 if (offsets[2] >= 0)
3776 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[2] - 1));
3777
3778 if (offsets[1] >= 0)
3779 {
3780 if (chars[5] != 0)
3781 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, chars[5]);
3782 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, chars[4], start);
3783 }
3784
3785 if (offsets[2] >= 0)
3786 {
3787 if (chars[3] != 0)
3788 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[3]);
3789 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[2], start);
3790 }
3791 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3792 }
3793
3794 JUMPHERE(quit);
3795
3796 if (firstline)
3797 {
3798 if (range_right >= 0)
3799 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end);
3800 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3801 if (range_right >= 0)
3802 {
3803 quit = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
3804 OP1(SLJIT_MOV, STR_PTR, 0, TMP1, 0);
3805 JUMPHERE(quit);
3806 }
3807 }
3808 else
3809 OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
3810 return TRUE;
3811 }
3812
3813 #undef MAX_N_CHARS
3814 #undef MAX_N_BYTES
3815
3816 static SLJIT_INLINE void fast_forward_first_char(compiler_common *common, pcre_uchar first_char, BOOL caseless, BOOL firstline)
3817 {
3818 DEFINE_COMPILER;
3819 struct sljit_label *start;
3820 struct sljit_jump *quit;
3821 struct sljit_jump *found;
3822 pcre_uchar oc, bit;
3823
3824 if (firstline)
3825 {
3826 SLJIT_ASSERT(common->first_line_end != 0);
3827 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
3828 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end);
3829 }
3830
3831 start = LABEL();
3832 quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3833 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3834
3835 oc = first_char;
3836 if (caseless)
3837 {
3838 oc = TABLE_GET(first_char, common->fcc, first_char);
3839 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
3840 if (first_char > 127 && common->utf)
3841 oc = UCD_OTHERCASE(first_char);
3842 #endif
3843 }
3844 if (first_char == oc)
3845 found = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, first_char);
3846 else
3847 {
3848 bit = first_char ^ oc;
3849 if (is_powerof2(bit))
3850 {
3851 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, bit);
3852 found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, first_char | bit);
3853 }
3854 else
3855 {
3856 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, first_char);
3857 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
3858 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);
3859 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
3860 found = JUMP(SLJIT_NOT_ZERO);
3861 }
3862 }
3863
3864 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3865 JUMPTO(SLJIT_JUMP, start);
3866 JUMPHERE(found);
3867 JUMPHERE(quit);
3868
3869 if (firstline)
3870 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3871 }
3872
3873 static SLJIT_INLINE void fast_forward_newline(compiler_common *common, BOOL firstline)
3874 {
3875 DEFINE_COMPILER;
3876 struct sljit_label *loop;
3877 struct sljit_jump *lastchar;
3878 struct sljit_jump *firstchar;
3879 struct sljit_jump *quit;
3880 struct sljit_jump *foundcr = NULL;
3881 struct sljit_jump *notfoundnl;
3882 jump_list *newline = NULL;
3883
3884 if (firstline)
3885 {
3886 SLJIT_ASSERT(common->first_line_end != 0);
3887 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
3888 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end);
3889 }
3890
3891 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3892 {
3893 lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3894 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3895 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
3896 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
3897 firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
3898
3899 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2));
3900 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0);
3901 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_GREATER_EQUAL);
3902 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3903 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
3904 #endif
3905 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3906
3907 loop = LABEL();
3908 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3909 quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3910 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
3911 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
3912 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop);
3913 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop);
3914
3915 JUMPHERE(quit);
3916 JUMPHERE(firstchar);
3917 JUMPHERE(lastchar);
3918
3919 if (firstline)
3920 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3921 return;
3922 }
3923
3924 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3925 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
3926 firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
3927 skip_char_back(common);
3928
3929 loop = LABEL();
3930 common->ff_newline_shortcut = loop;
3931
3932 read_char_range(common, common->nlmin, common->nlmax, TRUE);
3933 lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3934 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
3935 foundcr = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
3936 check_newlinechar(common, common->nltype, &newline, FALSE);
3937 set_jumps(newline, loop);
3938
3939 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
3940 {
3941 quit = JUMP(SLJIT_JUMP);
3942 JUMPHERE(foundcr);
3943 notfoundnl = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3944 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3945 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
3946 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
3947 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3948 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
3949 #endif
3950 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3951 JUMPHERE(notfoundnl);
3952 JUMPHERE(quit);
3953 }
3954 JUMPHERE(lastchar);
3955 JUMPHERE(firstchar);
3956
3957 if (firstline)
3958 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3959 }
3960
3961 static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks);
3962
3963 static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, pcre_uint8 *start_bits, BOOL firstline)
3964 {
3965 DEFINE_COMPILER;
3966 struct sljit_label *start;
3967 struct sljit_jump *quit;
3968 struct sljit_jump *found = NULL;
3969 jump_list *matches = NULL;
3970 #ifndef COMPILE_PCRE8
3971 struct sljit_jump *jump;
3972 #endif
3973
3974 if (firstline)
3975 {
3976 SLJIT_ASSERT(common->first_line_end != 0);
3977 OP1(SLJIT_MOV, RETURN_ADDR, 0, STR_END, 0);
3978 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end);
3979 }
3980
3981 start = LABEL();
3982 quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3983 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3984 #ifdef SUPPORT_UTF
3985 if (common->utf)
3986 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
3987 #endif
3988
3989 if (!check_class_ranges(common, start_bits, (start_bits[31] & 0x80) != 0, TRUE, &matches))
3990 {
3991 #ifndef COMPILE_PCRE8
3992 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 255);
3993 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 255);
3994 JUMPHERE(jump);
3995 #endif
3996 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
3997 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
3998 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)start_bits);
3999 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4000 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4001 found = JUMP(SLJIT_NOT_ZERO);
4002 }
4003
4004 #ifdef SUPPORT_UTF
4005 if (common->utf)
4006 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
4007 #endif
4008 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4009 #ifdef SUPPORT_UTF
4010 #if defined COMPILE_PCRE8
4011 if (common->utf)
4012 {
4013 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start);
4014 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4015 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4016 }
4017 #elif defined COMPILE_PCRE16
4018 if (common->utf)
4019 {
4020 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800, start);
4021 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
4022 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4023 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
4024 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4025 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4026 }
4027 #endif /* COMPILE_PCRE[8|16] */
4028 #endif /* SUPPORT_UTF */
4029 JUMPTO(SLJIT_JUMP, start);
4030 if (found != NULL)
4031 JUMPHERE(found);
4032 if (matches != NULL)
4033 set_jumps(matches, LABEL());
4034 JUMPHERE(quit);
4035
4036 if (firstline)
4037 OP1(SLJIT_MOV, STR_END, 0, RETURN_ADDR, 0);
4038 }
4039
4040 static SLJIT_INLINE struct sljit_jump *search_requested_char(compiler_common *common, pcre_uchar req_char, BOOL caseless, BOOL has_firstchar)
4041 {
4042 DEFINE_COMPILER;
4043 struct sljit_label *loop;
4044 struct sljit_jump *toolong;
4045 struct sljit_jump *alreadyfound;
4046 struct sljit_jump *found;
4047 struct sljit_jump *foundoc = NULL;
4048 struct sljit_jump *notfound;
4049 pcre_uint32 oc, bit;
4050
4051 SLJIT_ASSERT(common->req_char_ptr != 0);
4052 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr);
4053 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, REQ_BYTE_MAX);
4054 toolong = CMP(SLJIT_LESS, TMP1, 0, STR_END, 0);
4055 alreadyfound = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0);
4056
4057 if (has_firstchar)
4058 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4059 else
4060 OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);
4061
4062 loop = LABEL();
4063 notfound = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0);
4064
4065 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0);
4066 oc = req_char;
4067 if (caseless)
4068 {
4069 oc = TABLE_GET(req_char, common->fcc, req_char);
4070 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
4071 if (req_char > 127 && common->utf)
4072 oc = UCD_OTHERCASE(req_char);
4073 #endif
4074 }
4075 if (req_char == oc)
4076 found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
4077 else
4078 {
4079 bit = req_char ^ oc;
4080 if (is_powerof2(bit))
4081 {
4082 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);
4083 found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit);
4084 }
4085 else
4086 {
4087 found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
4088 foundoc = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, oc);
4089 }
4090 }
4091 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
4092 JUMPTO(SLJIT_JUMP, loop);
4093
4094 JUMPHERE(found);
4095 if (foundoc)
4096 JUMPHERE(foundoc);
4097 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr, TMP1, 0);
4098 JUMPHERE(alreadyfound);
4099 JUMPHERE(toolong);
4100 return notfound;
4101 }
4102
4103 static void do_revertframes(compiler_common *common)
4104 {
4105 DEFINE_COMPILER;
4106 struct sljit_jump *jump;
4107 struct sljit_label *mainloop;
4108
4109 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4110 OP1(SLJIT_MOV, TMP1, 0, STACK_TOP, 0);
4111 GET_LOCAL_BASE(TMP3, 0, 0);
4112
4113 /* Drop frames until we reach STACK_TOP. */
4114 mainloop = LABEL();
4115 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), 0);
4116 OP2(SLJIT_SUB | SLJIT_SET_S, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0);
4117 jump = JUMP(SLJIT_SIG_LESS_EQUAL);
4118
4119 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
4120 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw));
4121 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), SLJIT_MEM1(TMP1), 2 * sizeof(sljit_sw));
4122 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
4123 JUMPTO(SLJIT_JUMP, mainloop);
4124
4125 JUMPHERE(jump);
4126 jump = JUMP(SLJIT_SIG_LESS);
4127 /* End of dropping frames. */
4128 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4129
4130 JUMPHERE(jump);
4131 OP1(SLJIT_NEG, TMP2, 0, TMP2, 0);
4132 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
4133 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw));
4134 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_sw));
4135 JUMPTO(SLJIT_JUMP, mainloop);
4136 }
4137
4138 static void check_wordboundary(compiler_common *common)
4139 {
4140 DEFINE_COMPILER;
4141 struct sljit_jump *skipread;
4142 jump_list *skipread_list = NULL;
4143 #if !(defined COMPILE_PCRE8) || defined SUPPORT_UTF
4144 struct sljit_jump *jump;
4145 #endif
4146
4147 SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16);
4148
4149 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
4150 /* Get type of the previous char, and put it to LOCALS1. */
4151 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4152 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4153 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, SLJIT_IMM, 0);
4154 skipread = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
4155 skip_char_back(common);
4156 check_start_used_ptr(common);
4157 read_char(common);
4158
4159 /* Testing char type. */
4160 #ifdef SUPPORT_UCP
4161 if (common->use_ucp)
4162 {
4163 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
4164 jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
4165 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4166 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
4167 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
4168 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
4169 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
4170 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
4171 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
4172 JUMPHERE(jump);
4173 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP2, 0);
4174 }
4175 else
4176 #endif
4177 {
4178 #ifndef COMPILE_PCRE8
4179 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4180 #elif defined SUPPORT_UTF
4181 /* Here LOCALS1 has already been zeroed. */
4182 jump = NULL;
4183 if (common->utf)
4184 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4185 #endif /* COMPILE_PCRE8 */
4186 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);
4187 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */);
4188 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4189 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP1, 0);
4190 #ifndef COMPILE_PCRE8
4191 JUMPHERE(jump);
4192 #elif defined SUPPORT_UTF
4193 if (jump != NULL)
4194 JUMPHERE(jump);
4195 #endif /* COMPILE_PCRE8 */
4196 }
4197 JUMPHERE(skipread);
4198
4199 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
4200 check_str_end(common, &skipread_list);
4201 peek_char(common, READ_CHAR_MAX);
4202
4203 /* Testing char type. This is a code duplication. */
4204 #ifdef SUPPORT_UCP
4205 if (common->use_ucp)
4206 {
4207 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
4208 jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
4209 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4210 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
4211 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
4212 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
4213 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
4214 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
4215 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
4216 JUMPHERE(jump);
4217 }
4218 else
4219 #endif
4220 {
4221 #ifndef COMPILE_PCRE8
4222 /* TMP2 may be destroyed by peek_char. */
4223 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
4224 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4225 #elif defined SUPPORT_UTF
4226 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
4227 jump = NULL;
4228 if (common->utf)
4229 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4230 #endif
4231 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes);
4232 OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */);
4233 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
4234 #ifndef COMPILE_PCRE8
4235 JUMPHERE(jump);
4236 #elif defined SUPPORT_UTF
4237 if (jump != NULL)
4238 JUMPHERE(jump);
4239 #endif /* COMPILE_PCRE8 */
4240 }
4241 set_jumps(skipread_list, LABEL());
4242
4243 OP2(SLJIT_XOR | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
4244 sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
4245 }
4246
4247 static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
4248 {
4249 DEFINE_COMPILER;
4250 int ranges[MAX_RANGE_SIZE];
4251 pcre_uint8 bit, cbit, all;
4252 int i, byte, length = 0;
4253
4254 bit = bits[0] & 0x1;
4255 /* All bits will be zero or one (since bit is zero or one). */
4256 all = -bit;
4257
4258 for (i = 0; i < 256; )
4259 {
4260 byte = i >> 3;
4261 if ((i & 0x7) == 0 && bits[byte] == all)
4262 i += 8;
4263 else
4264 {
4265 cbit = (bits[byte] >> (i & 0x7)) & 0x1;
4266 if (cbit != bit)
4267 {
4268 if (length >= MAX_RANGE_SIZE)
4269 return FALSE;
4270 ranges[length] = i;
4271 length++;
4272 bit = cbit;
4273 all = -cbit;
4274 }
4275 i++;
4276 }
4277 }
4278
4279 if (((bit == 0) && nclass) || ((bit == 1) && !nclass))
4280 {
4281 if (length >= MAX_RANGE_SIZE)
4282 return FALSE;
4283 ranges[length] = 256;
4284 length++;
4285 }
4286
4287 if (length < 0 || length > 4)
4288 return FALSE;
4289
4290 bit = bits[0] & 0x1;
4291 if (invert) bit ^= 0x1;
4292
4293 /* No character is accepted. */
4294 if (length == 0 && bit == 0)
4295 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4296
4297 switch(length)
4298 {
4299 case 0:
4300 /* When bit != 0, all characters are accepted. */
4301 return TRUE;
4302
4303 case 1:
4304 add_jump(compiler, backtracks, CMP(bit == 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
4305 return TRUE;
4306
4307 case 2:
4308 if (ranges[0] + 1 != ranges[1])
4309 {
4310 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
4311 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
4312 }
4313 else
4314 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
4315 return TRUE;
4316
4317 case 3:
4318 if (bit != 0)
4319 {
4320 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
4321 if (ranges[0] + 1 != ranges[1])
4322 {
4323 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
4324 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
4325 }
4326 else
4327 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
4328 return TRUE;
4329 }
4330
4331 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[0]));
4332 if (ranges[1] + 1 != ranges[2])
4333 {
4334 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1]);
4335 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
4336 }
4337 else
4338 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1]));
4339 return TRUE;
4340
4341 case 4:
4342 if ((ranges[1] - ranges[0]) == (ranges[3] - ranges[2])
4343 && (ranges[0] | (ranges[2] - ranges[0])) == ranges[2]
4344 && (ranges[1] & (ranges[2] - ranges[0])) == 0
4345 && is_powerof2(ranges[2] - ranges[0]))
4346 {
4347 SLJIT_ASSERT((ranges[0] & (ranges[2] - ranges[0])) == 0 && (ranges[2] & ranges[3] & (ranges[2] - ranges[0])) != 0);
4348 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[0]);
4349 if (ranges[2] + 1 != ranges[3])
4350 {
4351 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);
4352 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
4353 }
4354 else
4355 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
4356 return TRUE;
4357 }
4358
4359 if (bit != 0)
4360 {
4361 i = 0;
4362 if (ranges[0] + 1 != ranges[1])
4363 {
4364 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
4365 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
4366 i = ranges[0];
4367 }
4368 else
4369 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
4370
4371 if (ranges[2] + 1 != ranges[3])
4372 {
4373 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - i);
4374 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
4375 }
4376 else
4377 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2] - i));
4378 return TRUE;
4379 }
4380
4381 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
4382 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[0]));
4383 if (ranges[1] + 1 != ranges[2])
4384 {
4385 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]);
4386 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
4387 }
4388 else
4389 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
4390 return TRUE;
4391
4392 default:
4393 SLJIT_ASSERT_STOP();
4394 return FALSE;
4395 }
4396 }
4397
4398 static void check_anynewline(compiler_common *common)
4399 {
4400 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
4401 DEFINE_COMPILER;
4402
4403 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4404
4405 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
4406 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
4407 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
4408 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
4409 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4410 #ifdef COMPILE_PCRE8
4411 if (common->utf)
4412 {
4413 #endif
4414 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4415 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
4416 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
4417 #ifdef COMPILE_PCRE8
4418 }
4419 #endif
4420 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
4421 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4422 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4423 }
4424
4425 static void check_hspace(compiler_common *common)
4426 {
4427 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
4428 DEFINE_COMPILER;
4429
4430 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4431
4432 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x09);
4433 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
4434 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
4435 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4436 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xa0);
4437 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4438 #ifdef COMPILE_PCRE8
4439 if (common->utf)
4440 {
4441 #endif
4442 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4443 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x1680);
4444 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4445 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e);
4446 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4447 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000);
4448 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000);
4449 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
4450 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000);
4451 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4452 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000);
4453 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4454 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000);
4455 #ifdef COMPILE_PCRE8
4456 }
4457 #endif
4458 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
4459 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4460
4461 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4462 }
4463
4464 static void check_vspace(compiler_common *common)
4465 {
4466 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
4467 DEFINE_COMPILER;
4468
4469 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4470
4471 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
4472 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
4473 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
4474 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
4475 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4476 #ifdef COMPILE_PCRE8
4477 if (common->utf)
4478 {
4479 #endif
4480 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4481 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
4482 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
4483 #ifdef COMPILE_PCRE8
4484 }
4485 #endif
4486 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
4487 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4488
4489 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4490 }
4491
4492 #define CHAR1 STR_END
4493 #define CHAR2 STACK_TOP
4494
4495 static void do_casefulcmp(compiler_common *common)
4496 {
4497 DEFINE_COMPILER;
4498 struct sljit_jump *jump;
4499 struct sljit_label *label;
4500
4501 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4502 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4503 OP1(SLJIT_MOV, TMP3, 0, CHAR1, 0);
4504 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, CHAR2, 0);
4505 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
4506 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4507
4508 label = LABEL();
4509 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
4510 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4511 jump = CMP(SLJIT_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
4512 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
4513 JUMPTO(SLJIT_NOT_ZERO, label);
4514
4515 JUMPHERE(jump);
4516 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4517 OP1(SLJIT_MOV, CHAR1, 0, TMP3, 0);
4518 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
4519 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4520 }
4521
4522 #define LCC_TABLE STACK_LIMIT
4523
4524 static void do_caselesscmp(compiler_common *common)
4525 {
4526 DEFINE_COMPILER;
4527 struct sljit_jump *jump;
4528 struct sljit_label *label;
4529
4530 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4531 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4532
4533 OP1(SLJIT_MOV, TMP3, 0, LCC_TABLE, 0);
4534 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, CHAR1, 0);
4535 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, CHAR2, 0);
4536 OP1(SLJIT_MOV, LCC_TABLE, 0, SLJIT_IMM, common->lcc);
4537 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
4538 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4539
4540 label = LABEL();
4541 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
4542 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4543 #ifndef COMPILE_PCRE8
4544 jump = CMP(SLJIT_GREATER, CHAR1, 0, SLJIT_IMM, 255);
4545 #endif
4546 OP1(SLJIT_MOV_UB, CHAR1, 0, SLJIT_MEM2(LCC_TABLE, CHAR1), 0);
4547 #ifndef COMPILE_PCRE8
4548 JUMPHERE(jump);
4549 jump = CMP(SLJIT_GREATER, CHAR2, 0, SLJIT_IMM, 255);
4550 #endif
4551 OP1(SLJIT_MOV_UB, CHAR2, 0, SLJIT_MEM2(LCC_TABLE, CHAR2), 0);
4552 #ifndef COMPILE_PCRE8
4553 JUMPHERE(jump);
4554 #endif
4555 jump = CMP(SLJIT_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
4556 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
4557 JUMPTO(SLJIT_NOT_ZERO, label);
4558
4559 JUMPHERE(jump);
4560 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4561 OP1(SLJIT_MOV, LCC_TABLE, 0, TMP3, 0);
4562 OP1(SLJIT_MOV, CHAR1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
4563 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
4564 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4565 }
4566
4567 #undef LCC_TABLE
4568 #undef CHAR1
4569 #undef CHAR2
4570
4571 #if defined SUPPORT_UTF && defined SUPPORT_UCP
4572
4573 static const pcre_uchar * SLJIT_CALL do_utf_caselesscmp(pcre_uchar *src1, jit_arguments *args, pcre_uchar *end1)
4574 {
4575 /* This function would be ineffective to do in JIT level. */
4576 pcre_uint32 c1, c2;
4577 const pcre_uchar *src2 = args->uchar_ptr;
4578 const pcre_uchar *end2 = args->end;
4579 const ucd_record *ur;
4580 const pcre_uint32 *pp;
4581
4582 while (src1 < end1)
4583 {
4584 if (src2 >= end2)
4585 return (pcre_uchar*)1;
4586 GETCHARINC(c1, src1);
4587 GETCHARINC(c2, src2);
4588 ur = GET_UCD(c2);
4589 if (c1 != c2 && c1 != c2 + ur->other_case)
4590 {
4591 pp = PRIV(ucd_caseless_sets) + ur->caseset;
4592 for (;;)
4593 {
4594 if (c1 < *pp) return NULL;
4595 if (c1 == *pp++) break;
4596 }
4597 }
4598 }
4599 return src2;
4600 }
4601
4602 #endif /* SUPPORT_UTF && SUPPORT_UCP */
4603
4604 static pcre_uchar *byte_sequence_compare(compiler_common *common, BOOL caseless, pcre_uchar *cc,
4605 compare_context *context, jump_list **backtracks)
4606 {
4607 DEFINE_COMPILER;
4608 unsigned int othercasebit = 0;
4609 pcre_uchar *othercasechar = NULL;
4610 #ifdef SUPPORT_UTF
4611 int utflength;
4612 #endif
4613
4614 if (caseless && char_has_othercase(common, cc))
4615 {
4616 othercasebit = char_get_othercase_bit(common, cc);
4617 SLJIT_ASSERT(othercasebit);
4618 /* Extracting bit difference info. */
4619 #if defined COMPILE_PCRE8
4620 othercasechar = cc + (othercasebit >> 8);
4621 othercasebit &= 0xff;
4622 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4623 /* Note that this code only handles characters in the BMP. If there
4624 ever are characters outside the BMP whose othercase differs in only one
4625 bit from itself (there currently are none), this code will need to be
4626 revised for COMPILE_PCRE32. */
4627 othercasechar = cc + (othercasebit >> 9);
4628 if ((othercasebit & 0x100) != 0)
4629 othercasebit = (othercasebit & 0xff) << 8;
4630 else
4631 othercasebit &= 0xff;
4632 #endif /* COMPILE_PCRE[8|16|32] */
4633 }
4634
4635 if (context->sourcereg == -1)
4636 {
4637 #if defined COMPILE_PCRE8
4638 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
4639 if (context->length >= 4)
4640 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4641 else if (context->length >= 2)
4642 OP1(SLJIT_MOV_UH, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4643 else
4644 #endif
4645 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4646 #elif defined COMPILE_PCRE16
4647 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
4648 if (context->length >= 4)
4649 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4650 else
4651 #endif
4652 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4653 #elif defined COMPILE_PCRE32
4654 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4655 #endif /* COMPILE_PCRE[8|16|32] */
4656 context->sourcereg = TMP2;
4657 }
4658
4659 #ifdef SUPPORT_UTF
4660 utflength = 1;
4661 if (common->utf && HAS_EXTRALEN(*cc))
4662 utflength += GET_EXTRALEN(*cc);
4663
4664 do
4665 {
4666 #endif
4667
4668 context->length -= IN_UCHARS(1);
4669 #if (defined SLJIT_UNALIGNED && SLJIT_UNALIGNED) && (defined COMPILE_PCRE8 || defined COMPILE_PCRE16)
4670
4671 /* Unaligned read is supported. */
4672 if (othercasebit != 0 && othercasechar == cc)
4673 {
4674 context->c.asuchars[context->ucharptr] = *cc | othercasebit;
4675 context->oc.asuchars[context->ucharptr] = othercasebit;
4676 }
4677 else
4678 {
4679 context->c.asuchars[context->ucharptr] = *cc;
4680 context->oc.asuchars[context->ucharptr] = 0;
4681 }
4682 context->ucharptr++;
4683
4684 #if defined COMPILE_PCRE8
4685 if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1))
4686 #else
4687 if (context->ucharptr >= 2 || context->length == 0)
4688 #endif
4689 {
4690 if (context->length >= 4)
4691 OP1(SLJIT_MOV_SI, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
4692 else if (context->length >= 2)
4693 OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
4694 #if defined COMPILE_PCRE8
4695 else if (context->length >= 1)
4696 OP1(SLJIT_MOV_UB, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
4697 #endif /* COMPILE_PCRE8 */
4698 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
4699
4700 switch(context->ucharptr)
4701 {
4702 case 4 / sizeof(pcre_uchar):
4703 if (context->oc.asint != 0)
4704 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint);
4705 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint));
4706 break;
4707
4708 case 2 / sizeof(pcre_uchar):
4709 if (context->oc.asushort != 0)
4710 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort);
4711 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort));
4712 break;
4713
4714 #ifdef COMPILE_PCRE8
4715 case 1:
4716 if (context->oc.asbyte != 0)
4717 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte);
4718 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte));
4719 break;
4720 #endif
4721
4722 default:
4723 SLJIT_ASSERT_STOP();
4724 break;
4725 }
4726 context->ucharptr = 0;
4727 }
4728
4729 #else
4730
4731 /* Unaligned read is unsupported or in 32 bit mode. */
4732 if (context->length >= 1)
4733 OP1(MOV_UCHAR, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
4734
4735 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
4736
4737 if (othercasebit != 0 && othercasechar == cc)
4738 {
4739 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit);
4740 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit));
4741 }
4742 else
4743 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc));
4744
4745 #endif
4746
4747 cc++;
4748 #ifdef SUPPORT_UTF
4749 utflength--;
4750 }
4751 while (utflength > 0);
4752 #endif
4753
4754 return cc;
4755 }
4756
4757 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
4758
4759 #define SET_TYPE_OFFSET(value) \
4760 if ((value) != typeoffset) \
4761 { \
4762 if ((value) < typeoffset) \
4763 OP2(SLJIT_ADD, typereg, 0, typereg, 0, SLJIT_IMM, typeoffset - (value)); \
4764 else \
4765 OP2(SLJIT_SUB, typereg, 0, typereg, 0, SLJIT_IMM, (value) - typeoffset); \
4766 } \
4767 typeoffset = (value);
4768
4769 #define SET_CHAR_OFFSET(value) \
4770 if ((value) != charoffset) \
4771 { \
4772 if ((value) < charoffset) \
4773 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(charoffset - (value))); \
4774 else \
4775 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)((value) - charoffset)); \
4776 } \
4777 charoffset = (value);
4778
4779 static void compile_xclass_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
4780 {
4781 DEFINE_COMPILER;
4782 jump_list *found = NULL;
4783 jump_list **list = (cc[0] & XCL_NOT) == 0 ? &found : backtracks;
4784 sljit_uw c, charoffset, max = 256, min = READ_CHAR_MAX;
4785 struct sljit_jump *jump = NULL;
4786 pcre_uchar *ccbegin;
4787 int compares, invertcmp, numberofcmps;
4788 #if defined SUPPORT_UTF && (defined COMPILE_PCRE8 || defined COMPILE_PCRE16)
4789 BOOL utf = common->utf;
4790 #endif
4791
4792 #ifdef SUPPORT_UCP
4793 BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE;
4794 BOOL charsaved = FALSE;
4795 int typereg = TMP1, scriptreg = TMP1;
4796 const pcre_uint32 *other_cases;
4797 sljit_uw typeoffset;
4798 #endif
4799
4800 /* Scanning the necessary info. */
4801 cc++;
4802 ccbegin = cc;
4803 compares = 0;
4804 if (cc[-1] & XCL_MAP)
4805 {
4806 min = 0;
4807 cc += 32 / sizeof(pcre_uchar);
4808 }
4809
4810 while (*cc != XCL_END)
4811 {
4812 compares++;
4813 if (*cc == XCL_SINGLE)
4814 {
4815 cc ++;
4816 GETCHARINCTEST(c, cc);
4817 if (c > max) max = c;
4818 if (c < min) min = c;
4819 #ifdef SUPPORT_UCP
4820 needschar = TRUE;
4821 #endif
4822 }
4823 else if (*cc == XCL_RANGE)
4824 {
4825 cc ++;
4826 GETCHARINCTEST(c, cc);
4827 if (c < min) min = c;
4828 GETCHARINCTEST(c, cc);
4829 if (c > max) max = c;
4830 #ifdef SUPPORT_UCP
4831 needschar = TRUE;
4832 #endif
4833 }
4834 #ifdef SUPPORT_UCP
4835 else
4836 {
4837 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
4838 cc++;
4839 if (*cc == PT_CLIST)
4840 {
4841 other_cases = PRIV(ucd_caseless_sets) + cc[1];
4842 while (*other_cases != NOTACHAR)
4843 {
4844 if (*other_cases > max) max = *other_cases;
4845 if (*other_cases < min) min = *other_cases;
4846 other_cases++;
4847 }
4848 }
4849 else
4850 {
4851 max = READ_CHAR_MAX;
4852 min = 0;
4853 }
4854
4855 switch(*cc)
4856 {
4857 case PT_ANY:
4858 break;
4859
4860 case PT_LAMP:
4861 case PT_GC:
4862 case PT_PC:
4863 case PT_ALNUM:
4864 needstype = TRUE;
4865 break;
4866
4867 case PT_SC:
4868 needsscript = TRUE;
4869 break;
4870
4871 case PT_SPACE:
4872 case PT_PXSPACE:
4873 case PT_WORD:
4874 case PT_PXGRAPH:
4875 case PT_PXPRINT:
4876 case PT_PXPUNCT:
4877 needstype = TRUE;
4878 needschar = TRUE;
4879 break;
4880
4881 case PT_CLIST:
4882 case PT_UCNC:
4883 needschar = TRUE;
4884 break;
4885
4886 default:
4887 SLJIT_ASSERT_STOP();
4888 break;
4889 }
4890 cc += 2;
4891 }
4892 #endif
4893 }
4894
4895 /* We are not necessary in utf mode even in 8 bit mode. */
4896 cc = ccbegin;
4897 read_char_range(common, min, max, (cc[-1] & XCL_NOT) != 0);
4898
4899 if ((cc[-1] & XCL_HASPROP) == 0)
4900 {
4901 if ((cc[-1] & XCL_MAP) != 0)
4902 {
4903 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4904 if (!check_class_ranges(common, (const sljit_ub *)cc, (((const sljit_ub *)cc)[31] & 0x80) != 0, TRUE, &found))
4905 {
4906 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
4907 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
4908 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
4909 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4910 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4911 add_jump(compiler, &found, JUMP(SLJIT_NOT_ZERO));
4912 }
4913
4914 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4915 JUMPHERE(jump);
4916
4917 cc += 32 / sizeof(pcre_uchar);
4918 }
4919 else
4920 {
4921 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, min);
4922 add_jump(compiler, (cc[-1] & XCL_NOT) == 0 ? backtracks : &found, CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, max - min));
4923 }
4924 }
4925 else if ((cc[-1] & XCL_MAP) != 0)
4926 {
4927 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
4928 #ifdef SUPPORT_UCP
4929 charsaved = TRUE;
4930 #endif
4931 if (!check_class_ranges(common, (const pcre_uint8 *)cc, FALSE, TRUE, list))
4932 {
4933 #ifdef COMPILE_PCRE8
4934 jump = NULL;
4935 if (common->utf)
4936 #endif
4937 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4938
4939 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
4940 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
4941 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
4942 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4943 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4944 add_jump(compiler, list, JUMP(SLJIT_NOT_ZERO));
4945
4946 #ifdef COMPILE_PCRE8
4947 if (common->utf)
4948 #endif
4949 JUMPHERE(jump);
4950 }
4951
4952 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
4953 cc += 32 / sizeof(pcre_uchar);
4954 }
4955
4956 #ifdef SUPPORT_UCP
4957 /* Simple register allocation. TMP1 is preferred if possible. */
4958 if (needstype || needsscript)
4959 {
4960 if (needschar && !charsaved)
4961 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
4962 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4963 if (needschar)
4964 {
4965 if (needstype)
4966 {
4967 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
4968 typereg = RETURN_ADDR;
4969 }
4970
4971 if (needsscript)
4972 scriptreg = TMP3;
4973 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
4974 }
4975 else if (needstype && needsscript)
4976 scriptreg = TMP3;
4977 /* In all other cases only one of them was specified, and that can goes to TMP1. */
4978
4979 if (needsscript)
4980 {
4981 if (scriptreg == TMP1)
4982 {
4983 OP1(SLJIT_MOV, scriptreg, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
4984 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM2(scriptreg, TMP2), 3);
4985 }
4986 else
4987 {
4988 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
4989 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
4990 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM1(TMP2), 0);
4991 }
4992 }
4993 }
4994 #endif
4995
4996 /* Generating code. */
4997 charoffset = 0;
4998 numberofcmps = 0;
4999 #ifdef SUPPORT_UCP
5000 typeoffset = 0;
5001 #endif
5002
5003 while (*cc != XCL_END)
5004 {
5005 compares--;
5006 invertcmp = (compares == 0 && list != backtracks);
5007 jump = NULL;
5008
5009 if (*cc == XCL_SINGLE)
5010 {
5011 cc ++;
5012 GETCHARINCTEST(c, cc);
5013
5014 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
5015 {
5016 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5017 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_EQUAL);
5018 numberofcmps++;
5019 }
5020 else if (numberofcmps > 0)
5021 {
5022 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5023 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5024 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5025 numberofcmps = 0;
5026 }
5027 else
5028 {
5029 jump = CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5030 numberofcmps = 0;
5031 }
5032 }
5033 else if (*cc == XCL_RANGE)
5034 {
5035 cc ++;
5036 GETCHARINCTEST(c, cc);
5037 SET_CHAR_OFFSET(c);
5038 GETCHARINCTEST(c, cc);
5039
5040 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
5041 {
5042 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5043 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_LESS_EQUAL);
5044 numberofcmps++;
5045 }
5046 else if (numberofcmps > 0)
5047 {
5048 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5049 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
5050 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5051 numberofcmps = 0;
5052 }
5053 else
5054 {
5055 jump = CMP(SLJIT_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5056 numberofcmps = 0;
5057 }
5058 }
5059 #ifdef SUPPORT_UCP
5060 else
5061 {
5062 if (*cc == XCL_NOTPROP)
5063 invertcmp ^= 0x1;
5064 cc++;
5065 switch(*cc)
5066 {
5067 case PT_ANY:
5068 if (list != backtracks)
5069 {
5070 if ((cc[-1] == XCL_NOTPROP && compares > 0) || (cc[-1] == XCL_PROP && compares == 0))
5071 continue;
5072 }
5073 else if (cc[-1] == XCL_NOTPROP)
5074 continue;
5075 jump = JUMP(SLJIT_JUMP);
5076 break;
5077
5078 case PT_LAMP:
5079 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - typeoffset);
5080 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5081 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ll - typeoffset);
5082 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5083 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lt - typeoffset);
5084 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5085 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5086 break;
5087
5088 case PT_GC:
5089 c = PRIV(ucp_typerange)[(int)cc[1] * 2];
5090 SET_TYPE_OFFSET(c);
5091 jump = CMP(SLJIT_LESS_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, PRIV(ucp_typerange)[(int)cc[1] * 2 + 1] - c);
5092 break;
5093
5094 case PT_PC:
5095 jump = CMP(SLJIT_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, (int)cc[1] - typeoffset);
5096 break;
5097
5098 case PT_SC:
5099 jump = CMP(SLJIT_EQUAL ^ invertcmp, scriptreg, 0, SLJIT_IMM, (int)cc[1]);
5100 break;
5101
5102 case PT_SPACE:
5103 case PT_PXSPACE:
5104 SET_CHAR_OFFSET(9);
5105 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd - 0x9);
5106 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
5107
5108 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x9);
5109 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5110
5111 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x9);
5112 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5113
5114 SET_TYPE_OFFSET(ucp_Zl);
5115 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl);
5116 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
5117 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5118 break;
5119
5120 case PT_WORD:
5121 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_UNDERSCORE - charoffset));
5122 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5123 /* Fall through. */
5124
5125 case PT_ALNUM:
5126 SET_TYPE_OFFSET(ucp_Ll);
5127 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
5128 OP_FLAGS((*cc == PT_ALNUM) ? SLJIT_MOV : SLJIT_OR, TMP2, 0, (*cc == PT_ALNUM) ? SLJIT_UNUSED : TMP2, 0, SLJIT_LESS_EQUAL);
5129 SET_TYPE_OFFSET(ucp_Nd);
5130 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_No - ucp_Nd);
5131 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
5132 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5133 break;
5134
5135 case PT_CLIST:
5136 other_cases = PRIV(ucd_caseless_sets) + cc[1];
5137
5138 /* At least three characters are required.
5139 Otherwise this case would be handled by the normal code path. */
5140 SLJIT_ASSERT(other_cases[0] != NOTACHAR && other_cases[1] != NOTACHAR && other_cases[2] != NOTACHAR);
5141 SLJIT_ASSERT(other_cases[0] < other_cases[1] && other_cases[1] < other_cases[2]);
5142
5143 /* Optimizing character pairs, if their difference is power of 2. */
5144 if (is_powerof2(other_cases[1] ^ other_cases[0]))
5145 {
5146 if (charoffset == 0)
5147 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
5148 else
5149 {
5150 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
5151 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
5152 }
5153 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[1]);
5154 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5155 other_cases += 2;
5156 }
5157 else if (is_powerof2(other_cases[2] ^ other_cases[1]))
5158 {
5159 if (charoffset == 0)
5160 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[2] ^ other_cases[1]);
5161 else
5162 {
5163 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
5164 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
5165 }
5166 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[2]);
5167 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5168
5169 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(other_cases[0] - charoffset));
5170 OP_FLAGS(SLJIT_OR | ((other_cases[3] == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5171
5172 other_cases += 3;
5173 }
5174 else
5175 {
5176 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
5177 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5178 }
5179
5180 while (*other_cases != NOTACHAR)
5181 {
5182 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
5183 OP_FLAGS(SLJIT_OR | ((*other_cases == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5184 }
5185 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5186 break;
5187
5188 case PT_UCNC:
5189 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_DOLLAR_SIGN - charoffset));
5190 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5191 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_COMMERCIAL_AT - charoffset));
5192 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5193 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_GRAVE_ACCENT - charoffset));
5194 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5195
5196 SET_CHAR_OFFSET(0xa0);
5197 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(0xd7ff - charoffset));
5198 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
5199 SET_CHAR_OFFSET(0);
5200 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xe000 - 0);
5201 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_GREATER_EQUAL);
5202 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5203 break;
5204
5205 case PT_PXGRAPH:
5206 /* C and Z groups are the farthest two groups. */
5207 SET_TYPE_OFFSET(ucp_Ll);
5208 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
5209 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_GREATER);
5210
5211 jump = CMP(SLJIT_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
5212
5213 /* In case of ucp_Cf, we overwrite the result. */
5214 SET_CHAR_OFFSET(0x2066);
5215 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
5216 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
5217
5218 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
5219 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5220
5221 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x2066);
5222 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5223
5224 JUMPHERE(jump);
5225 jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
5226 break;
5227
5228 case PT_PXPRINT:
5229 /* C and Z groups are the farthest two groups. */
5230 SET_TYPE_OFFSET(ucp_Ll);
5231 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
5232 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_GREATER);
5233
5234 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Ll);
5235 OP_FLAGS(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_NOT_EQUAL);
5236
5237 jump = CMP(SLJIT_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
5238
5239 /* In case of ucp_Cf, we overwrite the result. */
5240 SET_CHAR_OFFSET(0x2066);
5241 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
5242 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
5243
5244 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
5245 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5246
5247 JUMPHERE(jump);
5248 jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
5249 break;
5250
5251 case PT_PXPUNCT:
5252 SET_TYPE_OFFSET(ucp_Sc);
5253 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Sc);
5254 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
5255
5256 SET_CHAR_OFFSET(0);
5257 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x7f);
5258 OP_FLAGS(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
5259
5260 SET_TYPE_OFFSET(ucp_Pc);
5261 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ps - ucp_Pc);
5262 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
5263 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5264 break;
5265 }
5266 cc += 2;
5267 }
5268 #endif
5269
5270 if (jump != NULL)
5271 add_jump(compiler, compares > 0 ? list : backtracks, jump);
5272 }
5273
5274 if (found != NULL)
5275 set_jumps(found, LABEL());
5276 }
5277
5278 #undef SET_TYPE_OFFSET
5279 #undef SET_CHAR_OFFSET
5280
5281 #endif
5282
5283 static pcre_uchar *compile_simple_assertion_matchingpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **backtracks)
5284 {
5285 DEFINE_COMPILER;
5286 int length;
5287 struct sljit_jump *jump[4];
5288 #ifdef SUPPORT_UTF
5289 struct sljit_label *label;
5290 #endif /* SUPPORT_UTF */
5291
5292 switch(type)
5293 {
5294 case OP_SOD:
5295 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5296 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
5297 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
5298 return cc;
5299
5300 case OP_SOM:
5301 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5302 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
5303 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
5304 return cc;
5305
5306 case OP_NOT_WORD_BOUNDARY:
5307 case OP_WORD_BOUNDARY:
5308 add_jump(compiler, &common->wordboundary, JUMP(SLJIT_FAST_CALL));
5309 add_jump(compiler, backtracks, JUMP(type == OP_NOT_WORD_BOUNDARY ? SLJIT_NOT_ZERO : SLJIT_ZERO));
5310 return cc;
5311
5312 case OP_EODN:
5313 /* Requires rather complex checks. */
5314 jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5315 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
5316 {
5317 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
5318 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5319 if (common->mode == JIT_COMPILE)
5320 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0));
5321 else
5322 {
5323 jump[1] = CMP(SLJIT_EQUAL, TMP2, 0, STR_END, 0);
5324 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
5325 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS);
5326 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
5327 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_NOT_EQUAL);
5328 add_jump(compiler, backtracks, JUMP(SLJIT_NOT_EQUAL));
5329 check_partial(common, TRUE);
5330 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5331 JUMPHERE(jump[1]);
5332 }
5333 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
5334 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
5335 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
5336 }
5337 else if (common->nltype == NLTYPE_FIXED)
5338 {
5339 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5340 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5341 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0));
5342 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
5343 }
5344 else
5345 {
5346 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5347 jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
5348 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
5349 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
5350 jump[2] = JUMP(SLJIT_GREATER);
5351 add_jump(compiler, backtracks, JUMP(SLJIT_LESS));
5352 /* Equal. */
5353 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
5354 jump[3] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
5355 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5356
5357 JUMPHERE(jump[1]);
5358 if (common->nltype == NLTYPE_ANYCRLF)
5359 {
5360 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5361 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, STR_END, 0));
5362 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
5363 }
5364 else
5365 {
5366 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, STR_PTR, 0);
5367 read_char_range(common, common->nlmin, common->nlmax, TRUE);
5368 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
5369 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
5370 add_jump(compiler, backtracks, JUMP(SLJIT_ZERO));
5371 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
5372 }
5373 JUMPHERE(jump[2]);
5374 JUMPHERE(jump[3]);
5375 }
5376 JUMPHERE(jump[0]);
5377 check_partial(common, FALSE);
5378 return cc;
5379
5380 case OP_EOD:
5381 add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
5382 check_partial(common, FALSE);
5383 return cc;
5384
5385 case OP_DOLL:
5386 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
5387 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
5388 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5389
5390 if (!common->endonly)
5391 compile_simple_assertion_matchingpath(common, OP_EODN, cc, backtracks);
5392 else
5393 {
5394 add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
5395 check_partial(common, FALSE);
5396 }
5397 return cc;
5398
5399 case OP_DOLLM:
5400 jump[1] = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
5401 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
5402 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
5403 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5404 check_partial(common, FALSE);
5405 jump[0] = JUMP(SLJIT_JUMP);
5406 JUMPHERE(jump[1]);
5407
5408 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
5409 {
5410 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
5411 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5412 if (common->mode == JIT_COMPILE)
5413 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, TMP2, 0, STR_END, 0));
5414 else
5415 {
5416 jump[1] = CMP(SLJIT_LESS_EQUAL, TMP2, 0, STR_END, 0);
5417 /* STR_PTR = STR_END - IN_UCHARS(1) */
5418 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
5419 check_partial(common, TRUE);
5420 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5421 JUMPHERE(jump[1]);
5422 }
5423
5424 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
5425 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
5426 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
5427 }
5428 else
5429 {
5430 peek_char(common, common->nlmax);
5431 check_newlinechar(common, common->nltype, backtracks, FALSE);
5432 }
5433 JUMPHERE(jump[0]);
5434 return cc;
5435
5436 case OP_CIRC:
5437 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
5438 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
5439 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0));
5440 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
5441 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5442 return cc;
5443
5444 case OP_CIRCM:
5445 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
5446 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
5447 jump[1] = CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0);
5448 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
5449 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5450 jump[0] = JUMP(SLJIT_JUMP);
5451 JUMPHERE(jump[1]);
5452
5453 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
5454 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
5455 {
5456 OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
5457 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, TMP1, 0));
5458 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
5459 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
5460 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
5461 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
5462 }
5463 else
5464 {
5465 skip_char_back(common);
5466 read_char_range(common, common->nlmin, common->nlmax, TRUE);
5467 check_newlinechar(common, common->nltype, backtracks, FALSE);
5468 }
5469 JUMPHERE(jump[0]);
5470 return cc;
5471
5472 case OP_REVERSE:
5473 length = GET(cc, 0);
5474 if (length == 0)
5475 return cc + LINK_SIZE;
5476 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5477 #ifdef SUPPORT_UTF
5478 if (common->utf)
5479 {
5480 OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
5481 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, length);
5482 label = LABEL();
5483 add_jump(compiler, backtracks, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP3, 0));
5484 skip_char_back(common);
5485 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
5486 JUMPTO(SLJIT_NOT_ZERO, label);
5487 }
5488 else
5489 #endif
5490 {
5491 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
5492 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
5493 add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, TMP1, 0));
5494 }
5495 check_start_used_ptr(common);
5496 return cc + LINK_SIZE;
5497 }
5498 SLJIT_ASSERT_STOP();
5499 return cc;
5500 }
5501
5502 static pcre_uchar *compile_char1_matchingpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **backtracks, BOOL check_str_ptr)
5503 {
5504 DEFINE_COMPILER;
5505 int length;
5506 unsigned int c, oc, bit;
5507 compare_context context;
5508 struct sljit_jump *jump[3];
5509 jump_list *end_list;
5510 #ifdef SUPPORT_UTF
5511 struct sljit_label *label;
5512 #ifdef SUPPORT_UCP
5513 pcre_uchar propdata[5];
5514 #endif
5515 #endif /* SUPPORT_UTF */
5516
5517 switch(type)
5518 {
5519 case OP_NOT_DIGIT:
5520 case OP_DIGIT:
5521 /* Digits are usually 0-9, so it is worth to optimize them. */
5522 if (check_str_ptr)
5523 detect_partial_match(common, backtracks);
5524 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5525 if (common->utf && is_char7_bitset((const pcre_uint8*)common->ctypes - cbit_length + cbit_digit, FALSE))
5526 read_char7_type(common, type == OP_NOT_DIGIT);
5527 else
5528 #endif
5529 read_char8_type(common, type == OP_NOT_DIGIT);
5530 /* Flip the starting bit in the negative case. */
5531 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit);
5532 add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_ZERO : SLJIT_NOT_ZERO));
5533 return cc;
5534
5535 case OP_NOT_WHITESPACE:
5536 case OP_WHITESPACE:
5537 if (check_str_ptr)
5538 detect_partial_match(common, backtracks);
5539 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5540 if (common->utf && is_char7_bitset((const pcre_uint8*)common->ctypes - cbit_length + cbit_space, FALSE))
5541 read_char7_type(common, type == OP_NOT_WHITESPACE);
5542 else
5543 #endif
5544 read_char8_type(common, type == OP_NOT_WHITESPACE);
5545 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space);
5546 add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_ZERO : SLJIT_NOT_ZERO));
5547 return cc;
5548
5549 case OP_NOT_WORDCHAR:
5550 case OP_WORDCHAR:
5551 if (check_str_ptr)
5552 detect_partial_match(common, backtracks);
5553 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5554 if (common->utf && is_char7_bitset((const pcre_uint8*)common->ctypes - cbit_length + cbit_word, FALSE))
5555 read_char7_type(common, type == OP_NOT_WORDCHAR);
5556 else
5557 #endif
5558 read_char8_type(common, type == OP_NOT_WORDCHAR);
5559 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word);
5560 add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_ZERO : SLJIT_NOT_ZERO));
5561 return cc;
5562
5563 case OP_ANY:
5564 if (check_str_ptr)
5565 detect_partial_match(common, backtracks);
5566 read_char_range(common, common->nlmin, common->nlmax, TRUE);
5567 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
5568 {
5569 jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
5570 end_list = NULL;
5571 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
5572 add_jump(compiler, &end_list, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
5573 else
5574 check_str_end(common, &end_list);
5575
5576 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5577 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff));
5578 set_jumps(end_list, LABEL());
5579 JUMPHERE(jump[0]);
5580 }
5581 else
5582 check_newlinechar(common, common->nltype, backtracks, TRUE);
5583 return cc;
5584
5585 case OP_ALLANY:
5586 if (check_str_ptr)
5587 detect_partial_match(common, backtracks);
5588 #ifdef SUPPORT_UTF
5589 if (common->utf)
5590 {
5591 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5592 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5593 #if defined COMPILE_PCRE8 || defined COMPILE_PCRE16
5594 #if defined COMPILE_PCRE8
5595 jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
5596 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
5597 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5598 #elif defined COMPILE_PCRE16
5599 jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
5600 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
5601 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
5602 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5603 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
5604 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5605 #endif
5606 JUMPHERE(jump[0]);
5607 #endif /* COMPILE_PCRE[8|16] */
5608 return cc;
5609 }
5610 #endif
5611 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5612 return cc;
5613
5614 case OP_ANYBYTE:
5615 if (check_str_ptr)
5616 detect_partial_match(common, backtracks);
5617 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5618 return cc;
5619
5620 #ifdef SUPPORT_UTF
5621 #ifdef SUPPORT_UCP
5622 case OP_NOTPROP:
5623 case OP_PROP:
5624 propdata[0] = XCL_HASPROP;
5625 propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP;
5626 propdata[2] = cc[0];
5627 propdata[3] = cc[1];
5628 propdata[4] = XCL_END;
5629 if (check_str_ptr)
5630 detect_partial_match(common, backtracks);
5631 compile_xclass_matchingpath(common, propdata, backtracks);
5632 return cc + 2;
5633 #endif
5634 #endif
5635
5636 case OP_ANYNL:
5637 if (check_str_ptr)
5638 detect_partial_match(common, backtracks);
5639 read_char_range(common, common->bsr_nlmin, common->bsr_nlmax, FALSE);
5640 jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
5641 /* We don't need to handle soft partial matching case. */
5642 end_list = NULL;
5643 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
5644 add_jump(compiler, &end_list, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
5645 else
5646 check_str_end(common, &end_list);
5647 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5648 jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
5649 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5650 jump[2] = JUMP(SLJIT_JUMP);
5651 JUMPHERE(jump[0]);
5652 check_newlinechar(common, common->bsr_nltype, backtracks, FALSE);
5653 set_jumps(end_list, LABEL());
5654 JUMPHERE(jump[1]);
5655 JUMPHERE(jump[2]);
5656 return cc;
5657
5658 case OP_NOT_HSPACE:
5659 case OP_HSPACE:
5660 if (check_str_ptr)
5661 detect_partial_match(common, backtracks);
5662 read_char_range(common, 0x9, 0x3000, type == OP_NOT_HSPACE);
5663 add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));
5664 add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO));
5665 return cc;
5666
5667 case OP_NOT_VSPACE:
5668 case OP_VSPACE:
5669 if (check_str_ptr)
5670 detect_partial_match(common, backtracks);
5671 read_char_range(common, 0xa, 0x2029, type == OP_NOT_VSPACE);
5672 add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));
5673 add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO));
5674 return cc;
5675
5676 #ifdef SUPPORT_UCP
5677 case OP_EXTUNI:
5678 if (check_str_ptr)
5679 detect_partial_match(common, backtracks);
5680 read_char(common);
5681 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
5682 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
5683 /* Optimize register allocation: use a real register. */
5684 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STACK_TOP, 0);
5685 OP1(SLJIT_MOV_UB, STACK_TOP, 0, SLJIT_MEM2(TMP1, TMP2), 3);
5686
5687 label = LABEL();
5688 jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5689 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
5690 read_char(common);
5691 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
5692 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
5693 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM2(TMP1, TMP2), 3);
5694
5695 OP2(SLJIT_SHL, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2);
5696 OP1(SLJIT_MOV_UI, TMP1, 0, SLJIT_MEM1(STACK_TOP), (sljit_sw)PRIV(ucp_gbtable));
5697 OP1(SLJIT_MOV, STACK_TOP, 0, TMP2, 0);
5698 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
5699 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
5700 JUMPTO(SLJIT_NOT_ZERO, label);
5701
5702 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
5703 JUMPHERE(jump[0]);
5704 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
5705
5706 if (common->mode == JIT_PARTIAL_HARD_COMPILE)
5707 {
5708 jump[0] = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
5709 /* Since we successfully read a char above, partial matching must occure. */
5710 check_partial(common, TRUE);
5711 JUMPHERE(jump[0]);
5712 }
5713 return cc;
5714 #endif
5715
5716 case OP_CHAR:
5717 case OP_CHARI:
5718 length = 1;
5719 #ifdef SUPPORT_UTF
5720 if (common->utf && HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc);
5721 #endif
5722 if (common->mode == JIT_COMPILE && (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0))
5723 {
5724 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
5725 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
5726
5727 context.length = IN_UCHARS(length);
5728 context.sourcereg = -1;
5729 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
5730 context.ucharptr = 0;
5731 #endif
5732 return byte_sequence_compare(common, type == OP_CHARI, cc, &context, backtracks);
5733 }
5734
5735 if (check_str_ptr)
5736 detect_partial_match(common, backtracks);
5737 #ifdef SUPPORT_UTF
5738 if (common->utf)
5739 {
5740 GETCHAR(c, cc);
5741 }
5742 else
5743 #endif
5744 c = *cc;
5745
5746 if (type == OP_CHAR || !char_has_othercase(common, cc))
5747 {
5748 read_char_range(common, c, c, FALSE);
5749 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c));
5750 return cc + length;
5751 }
5752 oc = char_othercase(common, c);
5753 read_char_range(common, c < oc ? c : oc, c > oc ? c : oc, FALSE);
5754 bit = c ^ oc;
5755 if (is_powerof2(bit))
5756 {
5757 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
5758 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
5759 return cc + length;
5760 }
5761 jump[0] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c);
5762 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, oc));
5763 JUMPHERE(jump[0]);
5764 return cc + length;
5765
5766 case OP_NOT:
5767 case OP_NOTI:
5768 if (check_str_ptr)
5769 detect_partial_match(common, backtracks);
5770 length = 1;
5771 #ifdef SUPPORT_UTF
5772 if (common->utf)
5773 {
5774 #ifdef COMPILE_PCRE8
5775 c = *cc;
5776 if (c < 128)
5777 {
5778 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5779 if (type == OP_NOT || !char_has_othercase(common, cc))
5780 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
5781 else
5782 {
5783 /* Since UTF8 code page is fixed, we know that c is in [a-z] or [A-Z] range. */
5784 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x20);
5785 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20));
5786 }
5787 /* Skip the variable-length character. */
5788 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5789 jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
5790 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
5791 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5792 JUMPHERE(jump[0]);
5793 return cc + 1;
5794 }
5795 else
5796 #endif /* COMPILE_PCRE8 */
5797 {
5798 GETCHARLEN(c, cc, length);
5799 }
5800 }
5801 else
5802 #endif /* SUPPORT_UTF */
5803 c = *cc;
5804
5805 if (type == OP_NOT || !char_has_othercase(common, cc))
5806 {
5807 read_char_range(common, c, c, TRUE);
5808 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
5809 }
5810 else
5811 {
5812 oc = char_othercase(common, c);
5813 read_char_range(common, c < oc ? c : oc, c > oc ? c : oc, TRUE);
5814 bit = c ^ oc;
5815 if (is_powerof2(bit))
5816 {
5817 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
5818 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
5819 }
5820 else
5821 {
5822 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
5823 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, oc));
5824 }
5825 }
5826 return cc + length;
5827
5828 case OP_CLASS:
5829 case OP_NCLASS:
5830 if (check_str_ptr)
5831 detect_partial_match(common, backtracks);
5832
5833 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5834 bit = (common->utf && is_char7_bitset((const sljit_ub *)cc, type == OP_NCLASS)) ? 127 : 255;
5835 read_char_range(common, 0, bit, type == OP_NCLASS);
5836 #else
5837 read_char_range(common, 0, 255, type == OP_NCLASS);
5838 #endif
5839
5840 if (check_class_ranges(common, (const sljit_ub *)cc, type == OP_NCLASS, FALSE, backtracks))
5841 return cc + 32 / sizeof(pcre_uchar);
5842
5843 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5844 jump[0] = NULL;
5845 if (common->utf)
5846 {
5847 jump[0] = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, bit);
5848 if (type == OP_CLASS)
5849 {
5850 add_jump(compiler, backtracks, jump[0]);
5851 jump[0] = NULL;
5852 }
5853 }
5854 #elif !defined COMPILE_PCRE8
5855 jump[0] = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
5856 if (type == OP_CLASS)
5857 {
5858 add_jump(compiler, backtracks, jump[0]);
5859 jump[0] = NULL;
5860 }
5861 #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
5862
5863 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
5864 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
5865 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
5866 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
5867 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
5868 add_jump(compiler, backtracks, JUMP(SLJIT_ZERO));
5869
5870 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
5871 if (jump[0] != NULL)
5872 JUMPHERE(jump[0]);
5873 #endif
5874 return cc + 32 / sizeof(pcre_uchar);
5875
5876 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5877 case OP_XCLASS:
5878 if (check_str_ptr)
5879 detect_partial_match(common, backtracks);
5880 compile_xclass_matchingpath(common, cc + LINK_SIZE, backtracks);
5881 return cc + GET(cc, 0) - 1;
5882 #endif
5883 }
5884 SLJIT_ASSERT_STOP();
5885 return cc;
5886 }
5887
5888 static SLJIT_INLINE pcre_uchar *compile_charn_matchingpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, jump_list **backtracks)
5889 {
5890 /* This function consumes at least one input character. */
5891 /* To decrease the number of length checks, we try to concatenate the fixed length character sequences. */
5892 DEFINE_COMPILER;
5893 pcre_uchar *ccbegin = cc;
5894 compare_context context;
5895 int size;
5896
5897 context.length = 0;
5898 do
5899 {
5900 if (cc >= ccend)
5901 break;
5902
5903 if (*cc == OP_CHAR)
5904 {
5905 size = 1;
5906 #ifdef SUPPORT_UTF
5907 if (common->utf && HAS_EXTRALEN(cc[1]))
5908 size += GET_EXTRALEN(cc[1]);
5909 #endif
5910 }
5911 else if (*cc == OP_CHARI)
5912 {
5913 size = 1;
5914 #ifdef SUPPORT_UTF
5915 if (common->utf)
5916 {
5917 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
5918 size = 0;
5919 else if (HAS_EXTRALEN(cc[1]))
5920 size += GET_EXTRALEN(cc[1]);
5921 }
5922 else
5923 #endif
5924 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
5925 size = 0;
5926 }
5927 else
5928 size = 0;
5929
5930 cc += 1 + size;
5931 context.length += IN_UCHARS(size);
5932 }
5933 while (size > 0 && context.length <= 128);
5934
5935 cc = ccbegin;
5936 if (context.length > 0)
5937 {
5938 /* We have a fixed-length byte sequence. */
5939 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, context.length);
5940 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
5941
5942 context.sourcereg = -1;
5943 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
5944 context.ucharptr = 0;
5945 #endif
5946 do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, backtracks); while (context.length > 0);
5947 return cc;
5948 }
5949
5950 /* A non-fixed length character will be checked if length == 0. */
5951 return compile_char1_matchingpath(common, *cc, cc + 1, backtracks, TRUE);
5952 }
5953
5954 /* Forward definitions. */
5955 static void compile_matchingpath(compiler_common *, pcre_uchar *, pcre_uchar *, backtrack_common *);
5956 static void compile_backtrackingpath(compiler_common *, struct backtrack_common *);
5957
5958 #define PUSH_BACKTRACK(size, ccstart, error) \
5959 do \
5960 { \
5961 backtrack = sljit_alloc_memory(compiler, (size)); \
5962 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
5963 return error; \
5964 memset(backtrack, 0, size); \
5965 backtrack->prev = parent->top; \
5966 backtrack->cc = (ccstart); \
5967 parent->top = backtrack; \
5968 } \
5969 while (0)
5970
5971 #define PUSH_BACKTRACK_NOVALUE(size, ccstart) \
5972 do \
5973 { \
5974 backtrack = sljit_alloc_memory(compiler, (size)); \
5975 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
5976 return; \
5977 memset(backtrack, 0, size); \
5978 backtrack->prev = parent->top; \
5979 backtrack->cc = (ccstart); \
5980 parent->top = backtrack; \
5981 } \
5982 while (0)
5983
5984 #define BACKTRACK_AS(type) ((type *)backtrack)
5985
5986 static void compile_dnref_search(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
5987 {
5988 /* The OVECTOR offset goes to TMP2. */
5989 DEFINE_COMPILER;
5990 int count = GET2(cc, 1 + IMM2_SIZE);
5991 pcre_uchar *slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
5992 unsigned int offset;
5993 jump_list *found = NULL;
5994
5995 SLJIT_ASSERT(*cc == OP_DNREF || *cc == OP_DNREFI);
5996
5997 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
5998
5999 count--;
6000 while (count-- > 0)
6001 {
6002 offset = GET2(slot, 0) << 1;
6003 GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
6004 add_jump(compiler, &found, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0));
6005 slot += common->name_entry_size;
6006 }
6007
6008 offset = GET2(slot, 0) << 1;
6009 GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
6010 if (backtracks != NULL && !common->jscript_compat)
6011 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0));
6012
6013 set_jumps(found, LABEL());
6014 }
6015
6016 static void compile_ref_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks, BOOL withchecks, BOOL emptyfail)
6017 {
6018 DEFINE_COMPILER;
6019 BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
6020 int offset = 0;
6021 struct sljit_jump *jump = NULL;
6022 struct sljit_jump *partial;
6023 struct sljit_jump *nopartial;
6024
6025 if (ref)
6026 {
6027 offset = GET2(cc, 1) << 1;
6028 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
6029 /* OVECTOR(1) contains the "string begin - 1" constant. */
6030 if (withchecks && !common->jscript_compat)
6031 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
6032 }
6033 else
6034 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
6035
6036 #if defined SUPPORT_UTF && defined SUPPORT_UCP
6037 if (common->utf && *cc == OP_REFI)
6038 {
6039 SLJIT_ASSERT(TMP1 == SLJIT_R0 && STACK_TOP == SLJIT_R1 && TMP2 == SLJIT_R2);
6040 if (ref)
6041 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
6042 else
6043 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
6044
6045 if (withchecks)
6046 jump = CMP(SLJIT_EQUAL, TMP1, 0, TMP2, 0);
6047
6048 /* Needed to save important temporary registers. */
6049 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STACK_TOP, 0);
6050 OP1(SLJIT_MOV, SLJIT_R1, 0, ARGUMENTS, 0);
6051 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, uchar_ptr), STR_PTR, 0);
6052 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_utf_caselesscmp));
6053 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6054 if (common->mode == JIT_COMPILE)
6055 add_jump(compiler, backtracks, CMP(SLJIT_LESS_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1));
6056 else
6057 {
6058 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
6059 nopartial = CMP(SLJIT_NOT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
6060 check_partial(common, FALSE);
6061 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
6062 JUMPHERE(nopartial);
6063 }
6064 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
6065 }
6066 else
6067 #endif /* SUPPORT_UTF && SUPPORT_UCP */
6068 {
6069 if (ref)
6070 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP1, 0);
6071 else
6072 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
6073
6074 if (withchecks)
6075 jump = JUMP(SLJIT_ZERO);
6076
6077 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
6078 partial = CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0);
6079 if (common->mode == JIT_COMPILE)
6080 add_jump(compiler, backtracks, partial);
6081
6082 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
6083 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
6084
6085 if (common->mode != JIT_COMPILE)
6086 {
6087 nopartial = JUMP(SLJIT_JUMP);
6088 JUMPHERE(partial);
6089 /* TMP2 -= STR_END - STR_PTR */
6090 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, STR_PTR, 0);
6091 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, STR_END, 0);
6092 partial = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0);
6093 OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
6094 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
6095 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
6096 JUMPHERE(partial);
6097 check_partial(common, FALSE);
6098 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
6099 JUMPHERE(nopartial);
6100 }
6101 }
6102
6103 if (jump != NULL)
6104 {
6105 if (emptyfail)
6106 add_jump(compiler, backtracks, jump);
6107 else
6108 JUMPHERE(jump);
6109 }
6110 }
6111
6112 static SLJIT_INLINE pcre_uchar *compile_ref_iterator_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6113 {
6114 DEFINE_COMPILER;
6115 BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
6116 backtrack_common *backtrack;
6117 pcre_uchar type;
6118 int offset = 0;
6119 struct sljit_label *label;
6120 struct sljit_jump *zerolength;
6121 struct sljit_jump *jump = NULL;
6122 pcre_uchar *ccbegin = cc;
6123 int min = 0, max = 0;
6124 BOOL minimize;
6125
6126 PUSH_BACKTRACK(sizeof(iterator_backtrack), cc, NULL);
6127
6128 if (ref)
6129 offset = GET2(cc, 1) << 1;
6130 else
6131 cc += IMM2_SIZE;
6132 type = cc[1 + IMM2_SIZE];
6133
6134 SLJIT_COMPILE_ASSERT((OP_CRSTAR & 0x1) == 0, crstar_opcode_must_be_even);
6135 minimize = (type & 0x1) != 0;
6136 switch(type)
6137 {
6138 case OP_CRSTAR:
6139 case OP_CRMINSTAR:
6140 min = 0;
6141 max = 0;
6142 cc += 1 + IMM2_SIZE + 1;
6143 break;
6144 case OP_CRPLUS:
6145 case OP_CRMINPLUS:
6146 min = 1;
6147 max = 0;
6148 cc += 1 + IMM2_SIZE + 1;
6149 break;
6150 case OP_CRQUERY:
6151 case OP_CRMINQUERY:
6152 min = 0;
6153 max = 1;
6154 cc += 1 + IMM2_SIZE + 1;
6155 break;
6156 case OP_CRRANGE:
6157 case OP_CRMINRANGE:
6158 min = GET2(cc, 1 + IMM2_SIZE + 1);
6159 max = GET2(cc, 1 + IMM2_SIZE + 1 + IMM2_SIZE);
6160 cc += 1 + IMM2_SIZE + 1 + 2 * IMM2_SIZE;
6161 break;
6162 default:
6163 SLJIT_ASSERT_STOP();
6164 break;
6165 }
6166
6167 if (!minimize)
6168 {
6169 if (min == 0)
6170 {
6171 allocate_stack(common, 2);
6172 if (ref)
6173 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
6174 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6175 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
6176 /* Temporary release of STR_PTR. */
6177 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
6178 /* Handles both invalid and empty cases. Since the minimum repeat,
6179 is zero the invalid case is basically the same as an empty case. */
6180 if (ref)
6181 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
6182 else
6183 {
6184 compile_dnref_search(common, ccbegin, NULL);
6185 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
6186 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, TMP2, 0);
6187 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
6188 }
6189 /* Restore if not zero length. */
6190 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
6191 }
6192 else
6193 {
6194 allocate_stack(common, 1);
6195 if (ref)
6196 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
6197 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6198 if (ref)
6199 {
6200 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
6201 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
6202 }
6203 else
6204 {
6205 compile_dnref_search(common, ccbegin, &backtrack->topbacktracks);
6206 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
6207 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, TMP2, 0);
6208 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
6209 }
6210 }
6211
6212 if (min > 1 || max > 1)
6213 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, SLJIT_IMM, 0);
6214
6215 label = LABEL();
6216 if (!ref)
6217 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1);
6218 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, FALSE, FALSE);
6219
6220 if (min > 1 || max > 1)
6221 {
6222 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0);
6223 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
6224 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, TMP1, 0);
6225 if (min > 1)
6226 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min, label);
6227 if (max > 1)
6228 {
6229 jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max);
6230 allocate_stack(common, 1);
6231 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6232 JUMPTO(SLJIT_JUMP, label);
6233 JUMPHERE(jump);
6234 }
6235 }
6236
6237 if (max == 0)
6238 {
6239 /* Includes min > 1 case as well. */
6240 allocate_stack(common, 1);
6241 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6242 JUMPTO(SLJIT_JUMP, label);
6243 }
6244
6245 JUMPHERE(zerolength);
6246 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
6247
6248 count_match(common);
6249 return cc;
6250 }
6251
6252 allocate_stack(common, ref ? 2 : 3);
6253 if (ref)
6254 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
6255 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6256 if (type != OP_CRMINSTAR)
6257 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
6258
6259 if (min == 0)
6260 {
6261 /* Handles both invalid and empty cases. Since the minimum repeat,
6262 is zero the invalid case is basically the same as an empty case. */
6263 if (ref)
6264 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
6265 else
6266 {
6267 compile_dnref_search(common, ccbegin, NULL);
6268 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
6269 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
6270 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
6271 }
6272 /* Length is non-zero, we can match real repeats. */
6273 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6274 jump = JUMP(SLJIT_JUMP);
6275 }
6276 else
6277 {
6278 if (ref)
6279 {
6280 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
6281 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
6282 }
6283 else
6284 {
6285 compile_dnref_search(common, ccbegin, &backtrack->topbacktracks);
6286 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
6287 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
6288 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
6289 }
6290 }
6291
6292 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
6293 if (max > 0)
6294 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max));
6295
6296 if (!ref)
6297 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
6298 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, TRUE, TRUE);
6299 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6300
6301 if (min > 1)
6302 {
6303 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
6304 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
6305 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
6306 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min, BACKTRACK_AS(iterator_backtrack)->matchingpath);
6307 }
6308 else if (max > 0)
6309 OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
6310
6311 if (jump != NULL)
6312 JUMPHERE(jump);
6313 JUMPHERE(zerolength);
6314
6315 count_match(common);
6316 return cc;
6317 }
6318
6319 static SLJIT_INLINE pcre_uchar *compile_recurse_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6320 {
6321 DEFINE_COMPILER;
6322 backtrack_common *backtrack;
6323 recurse_entry *entry = common->entries;
6324 recurse_entry *prev = NULL;
6325 sljit_sw start = GET(cc, 1);
6326 pcre_uchar *start_cc;
6327 BOOL needs_control_head;
6328
6329 PUSH_BACKTRACK(sizeof(recurse_backtrack), cc, NULL);
6330
6331 /* Inlining simple patterns. */
6332 if (get_framesize(common, common->start + start, NULL, TRUE, &needs_control_head) == no_stack)
6333 {
6334 start_cc = common->start + start;
6335 compile_matchingpath(common, next_opcode(common, start_cc), bracketend(start_cc) - (1 + LINK_SIZE), backtrack);
6336 BACKTRACK_AS(recurse_backtrack)->inlined_pattern = TRUE;
6337 return cc + 1 + LINK_SIZE;
6338 }
6339
6340 while (entry != NULL)
6341 {
6342 if (entry->start == start)
6343 break;
6344 prev = entry;
6345 entry = entry->next;
6346 }
6347
6348 if (entry == NULL)
6349 {
6350 entry = sljit_alloc_memory(compiler, sizeof(recurse_entry));
6351 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6352 return NULL;
6353 entry->next = NULL;
6354 entry->entry = NULL;
6355 entry->calls = NULL;
6356 entry->start = start;
6357
6358 if (prev != NULL)
6359 prev->next = entry;
6360 else
6361 common->entries = entry;
6362 }
6363
6364 if (common->has_set_som && common->mark_ptr != 0)
6365 {
6366 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
6367 allocate_stack(common, 2);
6368 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
6369 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
6370 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
6371 }
6372 else if (common->has_set_som || common->mark_ptr != 0)
6373 {
6374 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->has_set_som ? (int)(OVECTOR(0)) : common->mark_ptr);
6375 allocate_stack(common, 1);
6376 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
6377 }
6378
6379 if (entry->entry == NULL)
6380 add_jump(compiler, &entry->calls, JUMP(SLJIT_FAST_CALL));
6381 else
6382 JUMPTO(SLJIT_FAST_CALL, entry->entry);
6383 /* Leave if the match is failed. */
6384 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0));
6385 return cc + 1 + LINK_SIZE;
6386 }
6387
6388 static int SLJIT_CALL do_callout(struct jit_arguments *arguments, PUBL(callout_block) *callout_block, pcre_uchar **jit_ovector)
6389 {
6390 const pcre_uchar *begin = arguments->begin;
6391 int *offset_vector = arguments->offsets;
6392 int offset_count = arguments->offset_count;
6393 int i;
6394
6395 if (PUBL(callout) == NULL)
6396 return 0;
6397
6398 callout_block->version = 2;
6399 callout_block->callout_data = arguments->callout_data;
6400
6401 /* Offsets in subject. */
6402 callout_block->subject_length = arguments->end - arguments->begin;
6403 callout_block->start_match = (pcre_uchar*)callout_block->subject - arguments->begin;
6404 callout_block->current_position = (pcre_uchar*)callout_block->offset_vector - arguments->begin;
6405 #if defined COMPILE_PCRE8
6406 callout_block->subject = (PCRE_SPTR)begin;
6407 #elif defined COMPILE_PCRE16
6408 callout_block->subject = (PCRE_SPTR16)begin;
6409 #elif defined COMPILE_PCRE32
6410 callout_block->subject = (PCRE_SPTR32)begin;
6411 #endif
6412
6413 /* Convert and copy the JIT offset vector to the offset_vector array. */
6414 callout_block->capture_top = 0;
6415 callout_block->offset_vector = offset_vector;
6416 for (i = 2; i < offset_count; i += 2)
6417 {
6418 offset_vector[i] = jit_ovector[i] - begin;
6419 offset_vector[i + 1] = jit_ovector[i + 1] - begin;
6420 if (jit_ovector[i] >= begin)
6421 callout_block->capture_top = i;
6422 }
6423
6424 callout_block->capture_top = (callout_block->capture_top >> 1) + 1;
6425 if (offset_count > 0)
6426 offset_vector[0] = -1;
6427 if (offset_count > 1)
6428 offset_vector[1] = -1;
6429 return (*PUBL(callout))(callout_block);
6430 }
6431
6432 /* Aligning to 8 byte. */
6433 #define CALLOUT_ARG_SIZE \
6434 (((int)sizeof(PUBL(callout_block)) + 7) & ~7)
6435
6436 #define CALLOUT_ARG_OFFSET(arg) \
6437 (-CALLOUT_ARG_SIZE + SLJIT_OFFSETOF(PUBL(callout_block), arg))
6438
6439 static SLJIT_INLINE pcre_uchar *compile_callout_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6440 {
6441 DEFINE_COMPILER;
6442 backtrack_common *backtrack;
6443
6444 PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
6445
6446 allocate_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw));
6447
6448 SLJIT_ASSERT(common->capture_last_ptr != 0);
6449 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
6450 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6451 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_number), SLJIT_IMM, cc[1]);
6452 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_last), TMP2, 0);
6453
6454 /* These pointer sized fields temporarly stores internal variables. */
6455 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
6456 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(offset_vector), STR_PTR, 0);
6457 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(subject), TMP2, 0);
6458
6459 if (common->mark_ptr != 0)
6460 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr));
6461 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(pattern_position), SLJIT_IMM, GET(cc, 2));
6462 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(next_item_length), SLJIT_IMM, GET(cc, 2 + LINK_SIZE));
6463 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(mark), (common->mark_ptr != 0) ? TMP2 : SLJIT_IMM, 0);
6464
6465 /* Needed to save important temporary registers. */
6466 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STACK_TOP, 0);
6467 OP2(SLJIT_SUB, SLJIT_R1, 0, STACK_TOP, 0, SLJIT_IMM, CALLOUT_ARG_SIZE);
6468 GET_LOCAL_BASE(SLJIT_R2, 0, OVECTOR_START);
6469 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_callout));
6470 OP1(SLJIT_MOV_SI, SLJIT_RETURN_REG, 0, SLJIT_RETURN_REG, 0);
6471 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6472 free_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw));
6473
6474 /* Check return value. */
6475 OP2(SLJIT_SUB | SLJIT_SET_S, SLJIT_UNUSED, 0, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
6476 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_SIG_GREATER));
6477 if (common->forced_quit_label == NULL)
6478 add_jump(compiler, &common->forced_quit, JUMP(SLJIT_SIG_LESS));
6479 else
6480 JUMPTO(SLJIT_SIG_LESS, common->forced_quit_label);
6481 return cc + 2 + 2 * LINK_SIZE;
6482 }
6483
6484 #undef CALLOUT_ARG_SIZE
6485 #undef CALLOUT_ARG_OFFSET
6486
6487 static SLJIT_INLINE BOOL assert_needs_str_ptr_saving(pcre_uchar *cc)
6488 {
6489 while (TRUE)
6490 {
6491 switch (*cc)
6492 {
6493 case OP_NOT_WORD_BOUNDARY:
6494 case OP_WORD_BOUNDARY:
6495 case OP_CIRC:
6496 case OP_CIRCM:
6497 case OP_DOLL:
6498 case OP_DOLLM:
6499 case OP_CALLOUT:
6500 case OP_ALT:
6501 cc += PRIV(OP_lengths)[*cc];
6502 break;
6503
6504 case OP_KET:
6505 return FALSE;
6506
6507 default:
6508 return TRUE;
6509 }
6510 }
6511 }
6512
6513 static pcre_uchar *compile_assert_matchingpath(compiler_common *common, pcre_uchar *cc, assert_backtrack *backtrack, BOOL conditional)
6514 {
6515 DEFINE_COMPILER;
6516 int framesize;
6517 int extrasize;
6518 BOOL needs_control_head;
6519 int private_data_ptr;
6520 backtrack_common altbacktrack;
6521 pcre_uchar *ccbegin;
6522 pcre_uchar opcode;
6523 pcre_uchar bra = OP_BRA;
6524 jump_list *tmp = NULL;
6525 jump_list **target = (conditional) ? &backtrack->condfailed : &backtrack->common.topbacktracks;
6526 jump_list **found;
6527 /* Saving previous accept variables. */
6528 BOOL save_local_exit = common->local_exit;
6529 BOOL save_positive_assert = common->positive_assert;
6530 then_trap_backtrack *save_then_trap = common->then_trap;
6531 struct sljit_label *save_quit_label = common->quit_label;
6532 struct sljit_label *save_accept_label = common->accept_label;
6533 jump_list *save_quit = common->quit;
6534 jump_list *save_positive_assert_quit = common->positive_assert_quit;
6535 jump_list *save_accept = common->accept;
6536 struct sljit_jump *jump;
6537 struct sljit_jump *brajump = NULL;
6538
6539 /* Assert captures then. */
6540 common->then_trap = NULL;
6541
6542 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
6543 {
6544 SLJIT_ASSERT(!conditional);
6545 bra = *cc;
6546 cc++;
6547 }
6548 private_data_ptr = PRIVATE_DATA(cc);
6549 SLJIT_ASSERT(private_data_ptr != 0);
6550 framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
6551 backtrack->framesize = framesize;
6552 backtrack->private_data_ptr = private_data_ptr;
6553 opcode = *cc;
6554 SLJIT_ASSERT(opcode >= OP_ASSERT && opcode <= OP_ASSERTBACK_NOT);
6555 found = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) ? &tmp : target;
6556 ccbegin = cc;
6557 cc += GET(cc, 1);
6558
6559 if (bra == OP_BRAMINZERO)
6560 {
6561 /* This is a braminzero backtrack path. */
6562 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6563 free_stack(common, 1);
6564 brajump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
6565 }
6566
6567 if (framesize < 0)
6568 {
6569 extrasize = 1;
6570 if (bra == OP_BRA && !assert_needs_str_ptr_saving(ccbegin + 1 + LINK_SIZE))
6571 extrasize = 0;
6572
6573 if (needs_control_head)
6574 extrasize++;
6575
6576 if (framesize == no_frame)
6577 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
6578
6579 if (extrasize > 0)
6580 allocate_stack(common, extrasize);
6581
6582 if (needs_control_head)
6583 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
6584
6585 if (extrasize > 0)
6586 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6587
6588 if (needs_control_head)
6589 {
6590 SLJIT_ASSERT(extrasize == 2);
6591 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
6592 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
6593 }
6594 }
6595 else
6596 {
6597 extrasize = needs_control_head ? 3 : 2;
6598 allocate_stack(common, framesize + extrasize);
6599
6600 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
6601 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw));
6602 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
6603 if (needs_control_head)
6604 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
6605 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6606 if (needs_control_head)
6607 {
6608 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
6609 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
6610 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
6611 }
6612 else
6613 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
6614
6615 init_frame(common, ccbegin, NULL, framesize + extrasize - 1, extrasize, FALSE);
6616 }
6617
6618 memset(&altbacktrack, 0, sizeof(backtrack_common));
6619 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6620 {
6621 /* Negative assert is stronger than positive assert. */
6622 common->local_exit = TRUE;
6623 common->quit_label = NULL;
6624 common->quit = NULL;
6625 common->positive_assert = FALSE;
6626 }
6627 else
6628 common->positive_assert = TRUE;
6629 common->positive_assert_quit = NULL;
6630
6631 while (1)
6632 {
6633 common->accept_label = NULL;
6634 common->accept = NULL;
6635 altbacktrack.top = NULL;
6636 altbacktrack.topbacktracks = NULL;
6637
6638 if (*ccbegin == OP_ALT && extrasize > 0)
6639 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6640
6641 altbacktrack.cc = ccbegin;
6642 compile_matchingpath(common, ccbegin + 1 + LINK_SIZE, cc, &altbacktrack);
6643 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6644 {
6645 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6646 {
6647 common->local_exit = save_local_exit;
6648 common->quit_label = save_quit_label;
6649 common->quit = save_quit;
6650 }
6651 common->positive_assert = save_positive_assert;
6652 common->then_trap = save_then_trap;
6653 common->accept_label = save_accept_label;
6654 common->positive_assert_quit = save_positive_assert_quit;
6655 common->accept = save_accept;
6656 return NULL;
6657 }
6658 common->accept_label = LABEL();
6659 if (common->accept != NULL)
6660 set_jumps(common->accept, common->accept_label);
6661
6662 /* Reset stack. */
6663 if (framesize < 0)
6664 {
6665 if (framesize == no_frame)
6666 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
6667 else if (extrasize > 0)
6668 free_stack(common, extrasize);
6669
6670 if (needs_control_head)
6671 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), 0);
6672 }
6673 else
6674 {
6675 if ((opcode != OP_ASSERT_NOT && opcode != OP_ASSERTBACK_NOT) || conditional)
6676 {
6677 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
6678 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
6679 if (needs_control_head)
6680 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), 0);
6681 }
6682 else
6683 {
6684 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
6685 if (needs_control_head)
6686 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), (framesize + 1) * sizeof(sljit_sw));
6687 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6688 }
6689 }
6690
6691 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6692 {
6693 /* We know that STR_PTR was stored on the top of the stack. */
6694 if (conditional)
6695 {
6696 if (extrasize > 0)
6697 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), needs_control_head ? sizeof(sljit_sw) : 0);
6698 }
6699 else if (bra == OP_BRAZERO)
6700 {
6701 if (framesize < 0)
6702 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 1) * sizeof(sljit_sw));
6703 else
6704 {
6705 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
6706 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (framesize + extrasize - 1) * sizeof(sljit_sw));
6707 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
6708 }
6709 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
6710 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6711 }
6712 else if (framesize >= 0)
6713 {
6714 /* For OP_BRA and OP_BRAMINZERO. */
6715 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
6716 }
6717 }
6718 add_jump(compiler, found, JUMP(SLJIT_JUMP));
6719
6720 compile_backtrackingpath(common, altbacktrack.top);
6721 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6722 {
6723 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6724 {
6725 common->local_exit = save_local_exit;
6726 common->quit_label = save_quit_label;
6727 common->quit = save_quit;
6728 }
6729 common->positive_assert = save_positive_assert;
6730 common->then_trap = save_then_trap;
6731 common->accept_label = save_accept_label;
6732 common->positive_assert_quit = save_positive_assert_quit;
6733 common->accept = save_accept;
6734 return NULL;
6735 }
6736 set_jumps(altbacktrack.topbacktracks, LABEL());
6737
6738 if (*cc != OP_ALT)
6739 break;
6740
6741 ccbegin = cc;
6742 cc += GET(cc, 1);
6743 }
6744
6745 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6746 {
6747 SLJIT_ASSERT(common->positive_assert_quit == NULL);
6748 /* Makes the check less complicated below. */
6749 common->positive_assert_quit = common->quit;
6750 }
6751
6752 /* None of them matched. */
6753 if (common->positive_assert_quit != NULL)
6754 {
6755 jump = JUMP(SLJIT_JUMP);
6756 set_jumps(common->positive_assert_quit, LABEL());
6757 SLJIT_ASSERT(framesize != no_stack);
6758 if (framesize < 0)
6759 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, extrasize * sizeof(sljit_sw));
6760 else
6761 {
6762 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
6763 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6764 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw));
6765 }
6766 JUMPHERE(jump);
6767 }
6768
6769 if (needs_control_head)
6770 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(1));
6771
6772 if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK)
6773 {
6774 /* Assert is failed. */
6775 if ((conditional && extrasize > 0) || bra == OP_BRAZERO)
6776 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6777
6778 if (framesize < 0)
6779 {
6780 /* The topmost item should be 0. */
6781 if (bra == OP_BRAZERO)
6782 {
6783 if (extrasize == 2)
6784 free_stack(common, 1);
6785 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6786 }