/[pcre]/code/trunk/pcre_jit_compile.c
ViewVC logotype

Contents of /code/trunk/pcre_jit_compile.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1627 - (show annotations)
Tue Feb 9 08:55:32 2016 UTC (3 years, 8 months ago) by zherczeg
File MIME type: text/plain
File size: 334783 byte(s)
Error occurred while calculating annotation data.
JIT compiler does not use TMP3 for matching xclass anymore.
1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Written by Philip Hazel
9 Copyright (c) 1997-2013 University of Cambridge
10
11 The machine code generator part (this module) was written by Zoltan Herczeg
12 Copyright (c) 2010-2013
13
14 -----------------------------------------------------------------------------
15 Redistribution and use in source and binary forms, with or without
16 modification, are permitted provided that the following conditions are met:
17
18 * Redistributions of source code must retain the above copyright notice,
19 this list of conditions and the following disclaimer.
20
21 * Redistributions in binary form must reproduce the above copyright
22 notice, this list of conditions and the following disclaimer in the
23 documentation and/or other materials provided with the distribution.
24
25 * Neither the name of the University of Cambridge nor the names of its
26 contributors may be used to endorse or promote products derived from
27 this software without specific prior written permission.
28
29 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 POSSIBILITY OF SUCH DAMAGE.
40 -----------------------------------------------------------------------------
41 */
42
43 #ifdef HAVE_CONFIG_H
44 #include "config.h"
45 #endif
46
47 #include "pcre_internal.h"
48
49 #if defined SUPPORT_JIT
50
51 /* All-in-one: Since we use the JIT compiler only from here,
52 we just include it. This way we don't need to touch the build
53 system files. */
54
55 #define SLJIT_MALLOC(size, allocator_data) (PUBL(malloc))(size)
56 #define SLJIT_FREE(ptr, allocator_data) (PUBL(free))(ptr)
57 #define SLJIT_CONFIG_AUTO 1
58 #define SLJIT_CONFIG_STATIC 1
59 #define SLJIT_VERBOSE 0
60 #define SLJIT_DEBUG 0
61
62 #include "sljit/sljitLir.c"
63
64 #if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED
65 #error Unsupported architecture
66 #endif
67
68 /* Defines for debugging purposes. */
69
70 /* 1 - Use unoptimized capturing brackets.
71 2 - Enable capture_last_ptr (includes option 1). */
72 /* #define DEBUG_FORCE_UNOPTIMIZED_CBRAS 2 */
73
74 /* 1 - Always have a control head. */
75 /* #define DEBUG_FORCE_CONTROL_HEAD 1 */
76
77 /* Allocate memory for the regex stack on the real machine stack.
78 Fast, but limited size. */
79 #define MACHINE_STACK_SIZE 32768
80
81 /* Growth rate for stack allocated by the OS. Should be the multiply
82 of page size. */
83 #define STACK_GROWTH_RATE 8192
84
85 /* Enable to check that the allocation could destroy temporaries. */
86 #if defined SLJIT_DEBUG && SLJIT_DEBUG
87 #define DESTROY_REGISTERS 1
88 #endif
89
90 /*
91 Short summary about the backtracking mechanism empolyed by the jit code generator:
92
93 The code generator follows the recursive nature of the PERL compatible regular
94 expressions. The basic blocks of regular expressions are condition checkers
95 whose execute different commands depending on the result of the condition check.
96 The relationship between the operators can be horizontal (concatenation) and
97 vertical (sub-expression) (See struct backtrack_common for more details).
98
99 'ab' - 'a' and 'b' regexps are concatenated
100 'a+' - 'a' is the sub-expression of the '+' operator
101
102 The condition checkers are boolean (true/false) checkers. Machine code is generated
103 for the checker itself and for the actions depending on the result of the checker.
104 The 'true' case is called as the matching path (expected path), and the other is called as
105 the 'backtrack' path. Branch instructions are expesive for all CPUs, so we avoid taken
106 branches on the matching path.
107
108 Greedy star operator (*) :
109 Matching path: match happens.
110 Backtrack path: match failed.
111 Non-greedy star operator (*?) :
112 Matching path: no need to perform a match.
113 Backtrack path: match is required.
114
115 The following example shows how the code generated for a capturing bracket
116 with two alternatives. Let A, B, C, D are arbirary regular expressions, and
117 we have the following regular expression:
118
119 A(B|C)D
120
121 The generated code will be the following:
122
123 A matching path
124 '(' matching path (pushing arguments to the stack)
125 B matching path
126 ')' matching path (pushing arguments to the stack)
127 D matching path
128 return with successful match
129
130 D backtrack path
131 ')' backtrack path (If we arrived from "C" jump to the backtrack of "C")
132 B backtrack path
133 C expected path
134 jump to D matching path
135 C backtrack path
136 A backtrack path
137
138 Notice, that the order of backtrack code paths are the opposite of the fast
139 code paths. In this way the topmost value on the stack is always belong
140 to the current backtrack code path. The backtrack path must check
141 whether there is a next alternative. If so, it needs to jump back to
142 the matching path eventually. Otherwise it needs to clear out its own stack
143 frame and continue the execution on the backtrack code paths.
144 */
145
146 /*
147 Saved stack frames:
148
149 Atomic blocks and asserts require reloading the values of private data
150 when the backtrack mechanism performed. Because of OP_RECURSE, the data
151 are not necessarly known in compile time, thus we need a dynamic restore
152 mechanism.
153
154 The stack frames are stored in a chain list, and have the following format:
155 ([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]
156
157 Thus we can restore the private data to a particular point in the stack.
158 */
159
160 typedef struct jit_arguments {
161 /* Pointers first. */
162 struct sljit_stack *stack;
163 const pcre_uchar *str;
164 const pcre_uchar *begin;
165 const pcre_uchar *end;
166 int *offsets;
167 pcre_uchar *uchar_ptr;
168 pcre_uchar *mark_ptr;
169 void *callout_data;
170 /* Everything else after. */
171 pcre_uint32 limit_match;
172 int real_offset_count;
173 int offset_count;
174 pcre_uint8 notbol;
175 pcre_uint8 noteol;
176 pcre_uint8 notempty;
177 pcre_uint8 notempty_atstart;
178 } jit_arguments;
179
180 typedef struct executable_functions {
181 void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES];
182 void *read_only_data_heads[JIT_NUMBER_OF_COMPILE_MODES];
183 sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];
184 PUBL(jit_callback) callback;
185 void *userdata;
186 pcre_uint32 top_bracket;
187 pcre_uint32 limit_match;
188 } executable_functions;
189
190 typedef struct jump_list {
191 struct sljit_jump *jump;
192 struct jump_list *next;
193 } jump_list;
194
195 typedef struct stub_list {
196 struct sljit_jump *start;
197 struct sljit_label *quit;
198 struct stub_list *next;
199 } stub_list;
200
201 typedef struct label_addr_list {
202 struct sljit_label *label;
203 sljit_uw *update_addr;
204 struct label_addr_list *next;
205 } label_addr_list;
206
207 enum frame_types {
208 no_frame = -1,
209 no_stack = -2
210 };
211
212 enum control_types {
213 type_mark = 0,
214 type_then_trap = 1
215 };
216
217 typedef int (SLJIT_CALL *jit_function)(jit_arguments *args);
218
219 /* The following structure is the key data type for the recursive
220 code generator. It is allocated by compile_matchingpath, and contains
221 the arguments for compile_backtrackingpath. Must be the first member
222 of its descendants. */
223 typedef struct backtrack_common {
224 /* Concatenation stack. */
225 struct backtrack_common *prev;
226 jump_list *nextbacktracks;
227 /* Internal stack (for component operators). */
228 struct backtrack_common *top;
229 jump_list *topbacktracks;
230 /* Opcode pointer. */
231 pcre_uchar *cc;
232 } backtrack_common;
233
234 typedef struct assert_backtrack {
235 backtrack_common common;
236 jump_list *condfailed;
237 /* Less than 0 if a frame is not needed. */
238 int framesize;
239 /* Points to our private memory word on the stack. */
240 int private_data_ptr;
241 /* For iterators. */
242 struct sljit_label *matchingpath;
243 } assert_backtrack;
244
245 typedef struct bracket_backtrack {
246 backtrack_common common;
247 /* Where to coninue if an alternative is successfully matched. */
248 struct sljit_label *alternative_matchingpath;
249 /* For rmin and rmax iterators. */
250 struct sljit_label *recursive_matchingpath;
251 /* For greedy ? operator. */
252 struct sljit_label *zero_matchingpath;
253 /* Contains the branches of a failed condition. */
254 union {
255 /* Both for OP_COND, OP_SCOND. */
256 jump_list *condfailed;
257 assert_backtrack *assert;
258 /* For OP_ONCE. Less than 0 if not needed. */
259 int framesize;
260 } u;
261 /* Points to our private memory word on the stack. */
262 int private_data_ptr;
263 } bracket_backtrack;
264
265 typedef struct bracketpos_backtrack {
266 backtrack_common common;
267 /* Points to our private memory word on the stack. */
268 int private_data_ptr;
269 /* Reverting stack is needed. */
270 int framesize;
271 /* Allocated stack size. */
272 int stacksize;
273 } bracketpos_backtrack;
274
275 typedef struct braminzero_backtrack {
276 backtrack_common common;
277 struct sljit_label *matchingpath;
278 } braminzero_backtrack;
279
280 typedef struct char_iterator_backtrack {
281 backtrack_common common;
282 /* Next iteration. */
283 struct sljit_label *matchingpath;
284 } char_iterator_backtrack;
285
286 typedef struct ref_iterator_backtrack {
287 backtrack_common common;
288 /* Next iteration. */
289 struct sljit_label *matchingpath;
290 } ref_iterator_backtrack;
291
292 typedef struct recurse_entry {
293 struct recurse_entry *next;
294 /* Contains the function entry. */
295 struct sljit_label *entry;
296 /* Collects the calls until the function is not created. */
297 jump_list *calls;
298 /* Points to the starting opcode. */
299 sljit_sw start;
300 } recurse_entry;
301
302 typedef struct recurse_backtrack {
303 backtrack_common common;
304 BOOL inlined_pattern;
305 } recurse_backtrack;
306
307 #define OP_THEN_TRAP OP_TABLE_LENGTH
308
309 typedef struct then_trap_backtrack {
310 backtrack_common common;
311 /* If then_trap is not NULL, this structure contains the real
312 then_trap for the backtracking path. */
313 struct then_trap_backtrack *then_trap;
314 /* Points to the starting opcode. */
315 sljit_sw start;
316 /* Exit point for the then opcodes of this alternative. */
317 jump_list *quit;
318 /* Frame size of the current alternative. */
319 int framesize;
320 } then_trap_backtrack;
321
322 #define MAX_RANGE_SIZE 4
323
324 typedef struct compiler_common {
325 /* The sljit ceneric compiler. */
326 struct sljit_compiler *compiler;
327 /* First byte code. */
328 pcre_uchar *start;
329 /* Maps private data offset to each opcode. */
330 sljit_si *private_data_ptrs;
331 /* Chain list of read-only data ptrs. */
332 void *read_only_data_head;
333 /* Tells whether the capturing bracket is optimized. */
334 sljit_ub *optimized_cbracket;
335 /* Tells whether the starting offset is a target of then. */
336 sljit_ub *then_offsets;
337 /* Current position where a THEN must jump. */
338 then_trap_backtrack *then_trap;
339 /* Starting offset of private data for capturing brackets. */
340 sljit_si cbra_ptr;
341 /* Output vector starting point. Must be divisible by 2. */
342 sljit_si ovector_start;
343 /* Points to the starting character of the current match. */
344 sljit_si start_ptr;
345 /* Last known position of the requested byte. */
346 sljit_si req_char_ptr;
347 /* Head of the last recursion. */
348 sljit_si recursive_head_ptr;
349 /* First inspected character for partial matching.
350 (Needed for avoiding zero length partial matches.) */
351 sljit_si start_used_ptr;
352 /* Starting pointer for partial soft matches. */
353 sljit_si hit_start;
354 /* End pointer of the first line. */
355 sljit_si first_line_end;
356 /* Points to the marked string. */
357 sljit_si mark_ptr;
358 /* Recursive control verb management chain. */
359 sljit_si control_head_ptr;
360 /* Points to the last matched capture block index. */
361 sljit_si capture_last_ptr;
362
363 /* Flipped and lower case tables. */
364 const sljit_ub *fcc;
365 sljit_sw lcc;
366 /* Mode can be PCRE_STUDY_JIT_COMPILE and others. */
367 int mode;
368 /* TRUE, when minlength is greater than 0. */
369 BOOL might_be_empty;
370 /* \K is found in the pattern. */
371 BOOL has_set_som;
372 /* (*SKIP:arg) is found in the pattern. */
373 BOOL has_skip_arg;
374 /* (*THEN) is found in the pattern. */
375 BOOL has_then;
376 /* Currently in recurse or negative assert. */
377 BOOL local_exit;
378 /* Currently in a positive assert. */
379 BOOL positive_assert;
380 /* Newline control. */
381 int nltype;
382 sljit_ui nlmax;
383 sljit_ui nlmin;
384 int newline;
385 int bsr_nltype;
386 sljit_ui bsr_nlmax;
387 sljit_ui bsr_nlmin;
388 /* Dollar endonly. */
389 int endonly;
390 /* Tables. */
391 sljit_sw ctypes;
392 /* Named capturing brackets. */
393 pcre_uchar *name_table;
394 sljit_sw name_count;
395 sljit_sw name_entry_size;
396
397 /* Labels and jump lists. */
398 struct sljit_label *partialmatchlabel;
399 struct sljit_label *quit_label;
400 struct sljit_label *forced_quit_label;
401 struct sljit_label *accept_label;
402 struct sljit_label *ff_newline_shortcut;
403 stub_list *stubs;
404 label_addr_list *label_addrs;
405 recurse_entry *entries;
406 recurse_entry *currententry;
407 jump_list *partialmatch;
408 jump_list *quit;
409 jump_list *positive_assert_quit;
410 jump_list *forced_quit;
411 jump_list *accept;
412 jump_list *calllimit;
413 jump_list *stackalloc;
414 jump_list *revertframes;
415 jump_list *wordboundary;
416 jump_list *anynewline;
417 jump_list *hspace;
418 jump_list *vspace;
419 jump_list *casefulcmp;
420 jump_list *caselesscmp;
421 jump_list *reset_match;
422 BOOL jscript_compat;
423 #ifdef SUPPORT_UTF
424 BOOL utf;
425 #ifdef SUPPORT_UCP
426 BOOL use_ucp;
427 #endif
428 #ifdef COMPILE_PCRE8
429 jump_list *utfreadchar;
430 jump_list *utfreadchar16;
431 jump_list *utfreadtype8;
432 #endif
433 #endif /* SUPPORT_UTF */
434 #ifdef SUPPORT_UCP
435 jump_list *getucd;
436 #endif
437 } compiler_common;
438
439 /* For byte_sequence_compare. */
440
441 typedef struct compare_context {
442 int length;
443 int sourcereg;
444 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
445 int ucharptr;
446 union {
447 sljit_si asint;
448 sljit_uh asushort;
449 #if defined COMPILE_PCRE8
450 sljit_ub asbyte;
451 sljit_ub asuchars[4];
452 #elif defined COMPILE_PCRE16
453 sljit_uh asuchars[2];
454 #elif defined COMPILE_PCRE32
455 sljit_ui asuchars[1];
456 #endif
457 } c;
458 union {
459 sljit_si asint;
460 sljit_uh asushort;
461 #if defined COMPILE_PCRE8
462 sljit_ub asbyte;
463 sljit_ub asuchars[4];
464 #elif defined COMPILE_PCRE16
465 sljit_uh asuchars[2];
466 #elif defined COMPILE_PCRE32
467 sljit_ui asuchars[1];
468 #endif
469 } oc;
470 #endif
471 } compare_context;
472
473 /* Undefine sljit macros. */
474 #undef CMP
475
476 /* Used for accessing the elements of the stack. */
477 #define STACK(i) ((-(i) - 1) * (int)sizeof(sljit_sw))
478
479 #define TMP1 SLJIT_R0
480 #define TMP2 SLJIT_R2
481 #define TMP3 SLJIT_R3
482 #define STR_PTR SLJIT_S0
483 #define STR_END SLJIT_S1
484 #define STACK_TOP SLJIT_R1
485 #define STACK_LIMIT SLJIT_S2
486 #define COUNT_MATCH SLJIT_S3
487 #define ARGUMENTS SLJIT_S4
488 #define RETURN_ADDR SLJIT_R4
489
490 /* Local space layout. */
491 /* These two locals can be used by the current opcode. */
492 #define LOCALS0 (0 * sizeof(sljit_sw))
493 #define LOCALS1 (1 * sizeof(sljit_sw))
494 /* Two local variables for possessive quantifiers (char1 cannot use them). */
495 #define POSSESSIVE0 (2 * sizeof(sljit_sw))
496 #define POSSESSIVE1 (3 * sizeof(sljit_sw))
497 /* Max limit of recursions. */
498 #define LIMIT_MATCH (4 * sizeof(sljit_sw))
499 /* The output vector is stored on the stack, and contains pointers
500 to characters. The vector data is divided into two groups: the first
501 group contains the start / end character pointers, and the second is
502 the start pointers when the end of the capturing group has not yet reached. */
503 #define OVECTOR_START (common->ovector_start)
504 #define OVECTOR(i) (OVECTOR_START + (i) * (sljit_sw)sizeof(sljit_sw))
505 #define OVECTOR_PRIV(i) (common->cbra_ptr + (i) * (sljit_sw)sizeof(sljit_sw))
506 #define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start])
507
508 #if defined COMPILE_PCRE8
509 #define MOV_UCHAR SLJIT_MOV_UB
510 #define MOVU_UCHAR SLJIT_MOVU_UB
511 #elif defined COMPILE_PCRE16
512 #define MOV_UCHAR SLJIT_MOV_UH
513 #define MOVU_UCHAR SLJIT_MOVU_UH
514 #elif defined COMPILE_PCRE32
515 #define MOV_UCHAR SLJIT_MOV_UI
516 #define MOVU_UCHAR SLJIT_MOVU_UI
517 #else
518 #error Unsupported compiling mode
519 #endif
520
521 /* Shortcuts. */
522 #define DEFINE_COMPILER \
523 struct sljit_compiler *compiler = common->compiler
524 #define OP1(op, dst, dstw, src, srcw) \
525 sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw))
526 #define OP2(op, dst, dstw, src1, src1w, src2, src2w) \
527 sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w))
528 #define LABEL() \
529 sljit_emit_label(compiler)
530 #define JUMP(type) \
531 sljit_emit_jump(compiler, (type))
532 #define JUMPTO(type, label) \
533 sljit_set_label(sljit_emit_jump(compiler, (type)), (label))
534 #define JUMPHERE(jump) \
535 sljit_set_label((jump), sljit_emit_label(compiler))
536 #define SET_LABEL(jump, label) \
537 sljit_set_label((jump), (label))
538 #define CMP(type, src1, src1w, src2, src2w) \
539 sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))
540 #define CMPTO(type, src1, src1w, src2, src2w, label) \
541 sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))
542 #define OP_FLAGS(op, dst, dstw, src, srcw, type) \
543 sljit_emit_op_flags(compiler, (op), (dst), (dstw), (src), (srcw), (type))
544 #define GET_LOCAL_BASE(dst, dstw, offset) \
545 sljit_get_local_base(compiler, (dst), (dstw), (offset))
546
547 #define READ_CHAR_MAX 0x7fffffff
548
549 static pcre_uchar *bracketend(pcre_uchar *cc)
550 {
551 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
552 do cc += GET(cc, 1); while (*cc == OP_ALT);
553 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
554 cc += 1 + LINK_SIZE;
555 return cc;
556 }
557
558 static int no_alternatives(pcre_uchar *cc)
559 {
560 int count = 0;
561 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
562 do
563 {
564 cc += GET(cc, 1);
565 count++;
566 }
567 while (*cc == OP_ALT);
568 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
569 return count;
570 }
571
572 static int ones_in_half_byte[16] = {
573 /* 0 */ 0, 1, 1, 2, /* 4 */ 1, 2, 2, 3,
574 /* 8 */ 1, 2, 2, 3, /* 12 */ 2, 3, 3, 4
575 };
576
577 /* Functions whose might need modification for all new supported opcodes:
578 next_opcode
579 check_opcode_types
580 set_private_data_ptrs
581 get_framesize
582 init_frame
583 get_private_data_copy_length
584 copy_private_data
585 compile_matchingpath
586 compile_backtrackingpath
587 */
588
589 static pcre_uchar *next_opcode(compiler_common *common, pcre_uchar *cc)
590 {
591 SLJIT_UNUSED_ARG(common);
592 switch(*cc)
593 {
594 case OP_SOD:
595 case OP_SOM:
596 case OP_SET_SOM:
597 case OP_NOT_WORD_BOUNDARY:
598 case OP_WORD_BOUNDARY:
599 case OP_NOT_DIGIT:
600 case OP_DIGIT:
601 case OP_NOT_WHITESPACE:
602 case OP_WHITESPACE:
603 case OP_NOT_WORDCHAR:
604 case OP_WORDCHAR:
605 case OP_ANY:
606 case OP_ALLANY:
607 case OP_NOTPROP:
608 case OP_PROP:
609 case OP_ANYNL:
610 case OP_NOT_HSPACE:
611 case OP_HSPACE:
612 case OP_NOT_VSPACE:
613 case OP_VSPACE:
614 case OP_EXTUNI:
615 case OP_EODN:
616 case OP_EOD:
617 case OP_CIRC:
618 case OP_CIRCM:
619 case OP_DOLL:
620 case OP_DOLLM:
621 case OP_CRSTAR:
622 case OP_CRMINSTAR:
623 case OP_CRPLUS:
624 case OP_CRMINPLUS:
625 case OP_CRQUERY:
626 case OP_CRMINQUERY:
627 case OP_CRRANGE:
628 case OP_CRMINRANGE:
629 case OP_CRPOSSTAR:
630 case OP_CRPOSPLUS:
631 case OP_CRPOSQUERY:
632 case OP_CRPOSRANGE:
633 case OP_CLASS:
634 case OP_NCLASS:
635 case OP_REF:
636 case OP_REFI:
637 case OP_DNREF:
638 case OP_DNREFI:
639 case OP_RECURSE:
640 case OP_CALLOUT:
641 case OP_ALT:
642 case OP_KET:
643 case OP_KETRMAX:
644 case OP_KETRMIN:
645 case OP_KETRPOS:
646 case OP_REVERSE:
647 case OP_ASSERT:
648 case OP_ASSERT_NOT:
649 case OP_ASSERTBACK:
650 case OP_ASSERTBACK_NOT:
651 case OP_ONCE:
652 case OP_ONCE_NC:
653 case OP_BRA:
654 case OP_BRAPOS:
655 case OP_CBRA:
656 case OP_CBRAPOS:
657 case OP_COND:
658 case OP_SBRA:
659 case OP_SBRAPOS:
660 case OP_SCBRA:
661 case OP_SCBRAPOS:
662 case OP_SCOND:
663 case OP_CREF:
664 case OP_DNCREF:
665 case OP_RREF:
666 case OP_DNRREF:
667 case OP_DEF:
668 case OP_BRAZERO:
669 case OP_BRAMINZERO:
670 case OP_BRAPOSZERO:
671 case OP_PRUNE:
672 case OP_SKIP:
673 case OP_THEN:
674 case OP_COMMIT:
675 case OP_FAIL:
676 case OP_ACCEPT:
677 case OP_ASSERT_ACCEPT:
678 case OP_CLOSE:
679 case OP_SKIPZERO:
680 return cc + PRIV(OP_lengths)[*cc];
681
682 case OP_CHAR:
683 case OP_CHARI:
684 case OP_NOT:
685 case OP_NOTI:
686 case OP_STAR:
687 case OP_MINSTAR:
688 case OP_PLUS:
689 case OP_MINPLUS:
690 case OP_QUERY:
691 case OP_MINQUERY:
692 case OP_UPTO:
693 case OP_MINUPTO:
694 case OP_EXACT:
695 case OP_POSSTAR:
696 case OP_POSPLUS:
697 case OP_POSQUERY:
698 case OP_POSUPTO:
699 case OP_STARI:
700 case OP_MINSTARI:
701 case OP_PLUSI:
702 case OP_MINPLUSI:
703 case OP_QUERYI:
704 case OP_MINQUERYI:
705 case OP_UPTOI:
706 case OP_MINUPTOI:
707 case OP_EXACTI:
708 case OP_POSSTARI:
709 case OP_POSPLUSI:
710 case OP_POSQUERYI:
711 case OP_POSUPTOI:
712 case OP_NOTSTAR:
713 case OP_NOTMINSTAR:
714 case OP_NOTPLUS:
715 case OP_NOTMINPLUS:
716 case OP_NOTQUERY:
717 case OP_NOTMINQUERY:
718 case OP_NOTUPTO:
719 case OP_NOTMINUPTO:
720 case OP_NOTEXACT:
721 case OP_NOTPOSSTAR:
722 case OP_NOTPOSPLUS:
723 case OP_NOTPOSQUERY:
724 case OP_NOTPOSUPTO:
725 case OP_NOTSTARI:
726 case OP_NOTMINSTARI:
727 case OP_NOTPLUSI:
728 case OP_NOTMINPLUSI:
729 case OP_NOTQUERYI:
730 case OP_NOTMINQUERYI:
731 case OP_NOTUPTOI:
732 case OP_NOTMINUPTOI:
733 case OP_NOTEXACTI:
734 case OP_NOTPOSSTARI:
735 case OP_NOTPOSPLUSI:
736 case OP_NOTPOSQUERYI:
737 case OP_NOTPOSUPTOI:
738 cc += PRIV(OP_lengths)[*cc];
739 #ifdef SUPPORT_UTF
740 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
741 #endif
742 return cc;
743
744 /* Special cases. */
745 case OP_TYPESTAR:
746 case OP_TYPEMINSTAR:
747 case OP_TYPEPLUS:
748 case OP_TYPEMINPLUS:
749 case OP_TYPEQUERY:
750 case OP_TYPEMINQUERY:
751 case OP_TYPEUPTO:
752 case OP_TYPEMINUPTO:
753 case OP_TYPEEXACT:
754 case OP_TYPEPOSSTAR:
755 case OP_TYPEPOSPLUS:
756 case OP_TYPEPOSQUERY:
757 case OP_TYPEPOSUPTO:
758 return cc + PRIV(OP_lengths)[*cc] - 1;
759
760 case OP_ANYBYTE:
761 #ifdef SUPPORT_UTF
762 if (common->utf) return NULL;
763 #endif
764 return cc + 1;
765
766 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
767 case OP_XCLASS:
768 return cc + GET(cc, 1);
769 #endif
770
771 case OP_MARK:
772 case OP_PRUNE_ARG:
773 case OP_SKIP_ARG:
774 case OP_THEN_ARG:
775 return cc + 1 + 2 + cc[1];
776
777 default:
778 /* All opcodes are supported now! */
779 SLJIT_ASSERT_STOP();
780 return NULL;
781 }
782 }
783
784 static BOOL check_opcode_types(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
785 {
786 int count;
787 pcre_uchar *slot;
788
789 /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
790 while (cc < ccend)
791 {
792 switch(*cc)
793 {
794 case OP_SET_SOM:
795 common->has_set_som = TRUE;
796 common->might_be_empty = TRUE;
797 cc += 1;
798 break;
799
800 case OP_REF:
801 case OP_REFI:
802 common->optimized_cbracket[GET2(cc, 1)] = 0;
803 cc += 1 + IMM2_SIZE;
804 break;
805
806 case OP_CBRAPOS:
807 case OP_SCBRAPOS:
808 common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] = 0;
809 cc += 1 + LINK_SIZE + IMM2_SIZE;
810 break;
811
812 case OP_COND:
813 case OP_SCOND:
814 /* Only AUTO_CALLOUT can insert this opcode. We do
815 not intend to support this case. */
816 if (cc[1 + LINK_SIZE] == OP_CALLOUT)
817 return FALSE;
818 cc += 1 + LINK_SIZE;
819 break;
820
821 case OP_CREF:
822 common->optimized_cbracket[GET2(cc, 1)] = 0;
823 cc += 1 + IMM2_SIZE;
824 break;
825
826 case OP_DNREF:
827 case OP_DNREFI:
828 case OP_DNCREF:
829 count = GET2(cc, 1 + IMM2_SIZE);
830 slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
831 while (count-- > 0)
832 {
833 common->optimized_cbracket[GET2(slot, 0)] = 0;
834 slot += common->name_entry_size;
835 }
836 cc += 1 + 2 * IMM2_SIZE;
837 break;
838
839 case OP_RECURSE:
840 /* Set its value only once. */
841 if (common->recursive_head_ptr == 0)
842 {
843 common->recursive_head_ptr = common->ovector_start;
844 common->ovector_start += sizeof(sljit_sw);
845 }
846 cc += 1 + LINK_SIZE;
847 break;
848
849 case OP_CALLOUT:
850 if (common->capture_last_ptr == 0)
851 {
852 common->capture_last_ptr = common->ovector_start;
853 common->ovector_start += sizeof(sljit_sw);
854 }
855 cc += 2 + 2 * LINK_SIZE;
856 break;
857
858 case OP_THEN_ARG:
859 common->has_then = TRUE;
860 common->control_head_ptr = 1;
861 /* Fall through. */
862
863 case OP_PRUNE_ARG:
864 case OP_MARK:
865 if (common->mark_ptr == 0)
866 {
867 common->mark_ptr = common->ovector_start;
868 common->ovector_start += sizeof(sljit_sw);
869 }
870 cc += 1 + 2 + cc[1];
871 break;
872
873 case OP_THEN:
874 common->has_then = TRUE;
875 common->control_head_ptr = 1;
876 /* Fall through. */
877
878 case OP_PRUNE:
879 case OP_SKIP:
880 cc += 1;
881 break;
882
883 case OP_SKIP_ARG:
884 common->control_head_ptr = 1;
885 common->has_skip_arg = TRUE;
886 cc += 1 + 2 + cc[1];
887 break;
888
889 default:
890 cc = next_opcode(common, cc);
891 if (cc == NULL)
892 return FALSE;
893 break;
894 }
895 }
896 return TRUE;
897 }
898
899 static int get_class_iterator_size(pcre_uchar *cc)
900 {
901 switch(*cc)
902 {
903 case OP_CRSTAR:
904 case OP_CRPLUS:
905 return 2;
906
907 case OP_CRMINSTAR:
908 case OP_CRMINPLUS:
909 case OP_CRQUERY:
910 case OP_CRMINQUERY:
911 return 1;
912
913 case OP_CRRANGE:
914 case OP_CRMINRANGE:
915 if (GET2(cc, 1) == GET2(cc, 1 + IMM2_SIZE))
916 return 0;
917 return 2;
918
919 default:
920 return 0;
921 }
922 }
923
924 static BOOL detect_repeat(compiler_common *common, pcre_uchar *begin)
925 {
926 pcre_uchar *end = bracketend(begin);
927 pcre_uchar *next;
928 pcre_uchar *next_end;
929 pcre_uchar *max_end;
930 pcre_uchar type;
931 sljit_sw length = end - begin;
932 int min, max, i;
933
934 /* Detect fixed iterations first. */
935 if (end[-(1 + LINK_SIZE)] != OP_KET)
936 return FALSE;
937
938 /* Already detected repeat. */
939 if (common->private_data_ptrs[end - common->start - LINK_SIZE] != 0)
940 return TRUE;
941
942 next = end;
943 min = 1;
944 while (1)
945 {
946 if (*next != *begin)
947 break;
948 next_end = bracketend(next);
949 if (next_end - next != length || memcmp(begin, next, IN_UCHARS(length)) != 0)
950 break;
951 next = next_end;
952 min++;
953 }
954
955 if (min == 2)
956 return FALSE;
957
958 max = 0;
959 max_end = next;
960 if (*next == OP_BRAZERO || *next == OP_BRAMINZERO)
961 {
962 type = *next;
963 while (1)
964 {
965 if (next[0] != type || next[1] != OP_BRA || next[2 + LINK_SIZE] != *begin)
966 break;
967 next_end = bracketend(next + 2 + LINK_SIZE);
968 if (next_end - next != (length + 2 + LINK_SIZE) || memcmp(begin, next + 2 + LINK_SIZE, IN_UCHARS(length)) != 0)
969 break;
970 next = next_end;
971 max++;
972 }
973
974 if (next[0] == type && next[1] == *begin && max >= 1)
975 {
976 next_end = bracketend(next + 1);
977 if (next_end - next == (length + 1) && memcmp(begin, next + 1, IN_UCHARS(length)) == 0)
978 {
979 for (i = 0; i < max; i++, next_end += 1 + LINK_SIZE)
980 if (*next_end != OP_KET)
981 break;
982
983 if (i == max)
984 {
985 common->private_data_ptrs[max_end - common->start - LINK_SIZE] = next_end - max_end;
986 common->private_data_ptrs[max_end - common->start - LINK_SIZE + 1] = (type == OP_BRAZERO) ? OP_UPTO : OP_MINUPTO;
987 /* +2 the original and the last. */
988 common->private_data_ptrs[max_end - common->start - LINK_SIZE + 2] = max + 2;
989 if (min == 1)
990 return TRUE;
991 min--;
992 max_end -= (1 + LINK_SIZE) + GET(max_end, -LINK_SIZE);
993 }
994 }
995 }
996 }
997
998 if (min >= 3)
999 {
1000 common->private_data_ptrs[end - common->start - LINK_SIZE] = max_end - end;
1001 common->private_data_ptrs[end - common->start - LINK_SIZE + 1] = OP_EXACT;
1002 common->private_data_ptrs[end - common->start - LINK_SIZE + 2] = min;
1003 return TRUE;
1004 }
1005
1006 return FALSE;
1007 }
1008
1009 #define CASE_ITERATOR_PRIVATE_DATA_1 \
1010 case OP_MINSTAR: \
1011 case OP_MINPLUS: \
1012 case OP_QUERY: \
1013 case OP_MINQUERY: \
1014 case OP_MINSTARI: \
1015 case OP_MINPLUSI: \
1016 case OP_QUERYI: \
1017 case OP_MINQUERYI: \
1018 case OP_NOTMINSTAR: \
1019 case OP_NOTMINPLUS: \
1020 case OP_NOTQUERY: \
1021 case OP_NOTMINQUERY: \
1022 case OP_NOTMINSTARI: \
1023 case OP_NOTMINPLUSI: \
1024 case OP_NOTQUERYI: \
1025 case OP_NOTMINQUERYI:
1026
1027 #define CASE_ITERATOR_PRIVATE_DATA_2A \
1028 case OP_STAR: \
1029 case OP_PLUS: \
1030 case OP_STARI: \
1031 case OP_PLUSI: \
1032 case OP_NOTSTAR: \
1033 case OP_NOTPLUS: \
1034 case OP_NOTSTARI: \
1035 case OP_NOTPLUSI:
1036
1037 #define CASE_ITERATOR_PRIVATE_DATA_2B \
1038 case OP_UPTO: \
1039 case OP_MINUPTO: \
1040 case OP_UPTOI: \
1041 case OP_MINUPTOI: \
1042 case OP_NOTUPTO: \
1043 case OP_NOTMINUPTO: \
1044 case OP_NOTUPTOI: \
1045 case OP_NOTMINUPTOI:
1046
1047 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_1 \
1048 case OP_TYPEMINSTAR: \
1049 case OP_TYPEMINPLUS: \
1050 case OP_TYPEQUERY: \
1051 case OP_TYPEMINQUERY:
1052
1053 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2A \
1054 case OP_TYPESTAR: \
1055 case OP_TYPEPLUS:
1056
1057 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2B \
1058 case OP_TYPEUPTO: \
1059 case OP_TYPEMINUPTO:
1060
1061 static void set_private_data_ptrs(compiler_common *common, int *private_data_start, pcre_uchar *ccend)
1062 {
1063 pcre_uchar *cc = common->start;
1064 pcre_uchar *alternative;
1065 pcre_uchar *end = NULL;
1066 int private_data_ptr = *private_data_start;
1067 int space, size, bracketlen;
1068 BOOL repeat_check = TRUE;
1069
1070 while (cc < ccend)
1071 {
1072 space = 0;
1073 size = 0;
1074 bracketlen = 0;
1075 if (private_data_ptr > SLJIT_MAX_LOCAL_SIZE)
1076 break;
1077
1078 if (repeat_check && (*cc == OP_ONCE || *cc == OP_ONCE_NC || *cc == OP_BRA || *cc == OP_CBRA || *cc == OP_COND))
1079 {
1080 if (detect_repeat(common, cc))
1081 {
1082 /* These brackets are converted to repeats, so no global
1083 based single character repeat is allowed. */
1084 if (cc >= end)
1085 end = bracketend(cc);
1086 }
1087 }
1088 repeat_check = TRUE;
1089
1090 switch(*cc)
1091 {
1092 case OP_KET:
1093 if (common->private_data_ptrs[cc + 1 - common->start] != 0)
1094 {
1095 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1096 private_data_ptr += sizeof(sljit_sw);
1097 cc += common->private_data_ptrs[cc + 1 - common->start];
1098 }
1099 cc += 1 + LINK_SIZE;
1100 break;
1101
1102 case OP_ASSERT:
1103 case OP_ASSERT_NOT:
1104 case OP_ASSERTBACK:
1105 case OP_ASSERTBACK_NOT:
1106 case OP_ONCE:
1107 case OP_ONCE_NC:
1108 case OP_BRAPOS:
1109 case OP_SBRA:
1110 case OP_SBRAPOS:
1111 case OP_SCOND:
1112 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1113 private_data_ptr += sizeof(sljit_sw);
1114 bracketlen = 1 + LINK_SIZE;
1115 break;
1116
1117 case OP_CBRAPOS:
1118 case OP_SCBRAPOS:
1119 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1120 private_data_ptr += sizeof(sljit_sw);
1121 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1122 break;
1123
1124 case OP_COND:
1125 /* Might be a hidden SCOND. */
1126 alternative = cc + GET(cc, 1);
1127 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1128 {
1129 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1130 private_data_ptr += sizeof(sljit_sw);
1131 }
1132 bracketlen = 1 + LINK_SIZE;
1133 break;
1134
1135 case OP_BRA:
1136 bracketlen = 1 + LINK_SIZE;
1137 break;
1138
1139 case OP_CBRA:
1140 case OP_SCBRA:
1141 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1142 break;
1143
1144 case OP_BRAZERO:
1145 case OP_BRAMINZERO:
1146 case OP_BRAPOSZERO:
1147 repeat_check = FALSE;
1148 size = 1;
1149 break;
1150
1151 CASE_ITERATOR_PRIVATE_DATA_1
1152 space = 1;
1153 size = -2;
1154 break;
1155
1156 CASE_ITERATOR_PRIVATE_DATA_2A
1157 space = 2;
1158 size = -2;
1159 break;
1160
1161 CASE_ITERATOR_PRIVATE_DATA_2B
1162 space = 2;
1163 size = -(2 + IMM2_SIZE);
1164 break;
1165
1166 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1167 space = 1;
1168 size = 1;
1169 break;
1170
1171 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1172 if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
1173 space = 2;
1174 size = 1;
1175 break;
1176
1177 case OP_TYPEUPTO:
1178 if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
1179 space = 2;
1180 size = 1 + IMM2_SIZE;
1181 break;
1182
1183 case OP_TYPEMINUPTO:
1184 space = 2;
1185 size = 1 + IMM2_SIZE;
1186 break;
1187
1188 case OP_CLASS:
1189 case OP_NCLASS:
1190 size += 1 + 32 / sizeof(pcre_uchar);
1191 space = get_class_iterator_size(cc + size);
1192 break;
1193
1194 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1195 case OP_XCLASS:
1196 size = GET(cc, 1);
1197 space = get_class_iterator_size(cc + size);
1198 break;
1199 #endif
1200
1201 default:
1202 cc = next_opcode(common, cc);
1203 SLJIT_ASSERT(cc != NULL);
1204 break;
1205 }
1206
1207 /* Character iterators, which are not inside a repeated bracket,
1208 gets a private slot instead of allocating it on the stack. */
1209 if (space > 0 && cc >= end)
1210 {
1211 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1212 private_data_ptr += sizeof(sljit_sw) * space;
1213 }
1214
1215 if (size != 0)
1216 {
1217 if (size < 0)
1218 {
1219 cc += -size;
1220 #ifdef SUPPORT_UTF
1221 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1222 #endif
1223 }
1224 else
1225 cc += size;
1226 }
1227
1228 if (bracketlen > 0)
1229 {
1230 if (cc >= end)
1231 {
1232 end = bracketend(cc);
1233 if (end[-1 - LINK_SIZE] == OP_KET)
1234 end = NULL;
1235 }
1236 cc += bracketlen;
1237 }
1238 }
1239 *private_data_start = private_data_ptr;
1240 }
1241
1242 /* Returns with a frame_types (always < 0) if no need for frame. */
1243 static int get_framesize(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL recursive, BOOL *needs_control_head)
1244 {
1245 int length = 0;
1246 int possessive = 0;
1247 BOOL stack_restore = FALSE;
1248 BOOL setsom_found = recursive;
1249 BOOL setmark_found = recursive;
1250 /* The last capture is a local variable even for recursions. */
1251 BOOL capture_last_found = FALSE;
1252
1253 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
1254 SLJIT_ASSERT(common->control_head_ptr != 0);
1255 *needs_control_head = TRUE;
1256 #else
1257 *needs_control_head = FALSE;
1258 #endif
1259
1260 if (ccend == NULL)
1261 {
1262 ccend = bracketend(cc) - (1 + LINK_SIZE);
1263 if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))
1264 {
1265 possessive = length = (common->capture_last_ptr != 0) ? 5 : 3;
1266 /* This is correct regardless of common->capture_last_ptr. */
1267 capture_last_found = TRUE;
1268 }
1269 cc = next_opcode(common, cc);
1270 }
1271
1272 SLJIT_ASSERT(cc != NULL);
1273 while (cc < ccend)
1274 switch(*cc)
1275 {
1276 case OP_SET_SOM:
1277 SLJIT_ASSERT(common->has_set_som);
1278 stack_restore = TRUE;
1279 if (!setsom_found)
1280 {
1281 length += 2;
1282 setsom_found = TRUE;
1283 }
1284 cc += 1;
1285 break;
1286
1287 case OP_MARK:
1288 case OP_PRUNE_ARG:
1289 case OP_THEN_ARG:
1290 SLJIT_ASSERT(common->mark_ptr != 0);
1291 stack_restore = TRUE;
1292 if (!setmark_found)
1293 {
1294 length += 2;
1295 setmark_found = TRUE;
1296 }
1297 if (common->control_head_ptr != 0)
1298 *needs_control_head = TRUE;
1299 cc += 1 + 2 + cc[1];
1300 break;
1301
1302 case OP_RECURSE:
1303 stack_restore = TRUE;
1304 if (common->has_set_som && !setsom_found)
1305 {
1306 length += 2;
1307 setsom_found = TRUE;
1308 }
1309 if (common->mark_ptr != 0 && !setmark_found)
1310 {
1311 length += 2;
1312 setmark_found = TRUE;
1313 }
1314 if (common->capture_last_ptr != 0 && !capture_last_found)
1315 {
1316 length += 2;
1317 capture_last_found = TRUE;
1318 }
1319 cc += 1 + LINK_SIZE;
1320 break;
1321
1322 case OP_CBRA:
1323 case OP_CBRAPOS:
1324 case OP_SCBRA:
1325 case OP_SCBRAPOS:
1326 stack_restore = TRUE;
1327 if (common->capture_last_ptr != 0 && !capture_last_found)
1328 {
1329 length += 2;
1330 capture_last_found = TRUE;
1331 }
1332 length += 3;
1333 cc += 1 + LINK_SIZE + IMM2_SIZE;
1334 break;
1335
1336 case OP_THEN:
1337 stack_restore = TRUE;
1338 if (common->control_head_ptr != 0)
1339 *needs_control_head = TRUE;
1340 cc ++;
1341 break;
1342
1343 default:
1344 stack_restore = TRUE;
1345 /* Fall through. */
1346
1347 case OP_NOT_WORD_BOUNDARY:
1348 case OP_WORD_BOUNDARY:
1349 case OP_NOT_DIGIT:
1350 case OP_DIGIT:
1351 case OP_NOT_WHITESPACE:
1352 case OP_WHITESPACE:
1353 case OP_NOT_WORDCHAR:
1354 case OP_WORDCHAR:
1355 case OP_ANY:
1356 case OP_ALLANY:
1357 case OP_ANYBYTE:
1358 case OP_NOTPROP:
1359 case OP_PROP:
1360 case OP_ANYNL:
1361 case OP_NOT_HSPACE:
1362 case OP_HSPACE:
1363 case OP_NOT_VSPACE:
1364 case OP_VSPACE:
1365 case OP_EXTUNI:
1366 case OP_EODN:
1367 case OP_EOD:
1368 case OP_CIRC:
1369 case OP_CIRCM:
1370 case OP_DOLL:
1371 case OP_DOLLM:
1372 case OP_CHAR:
1373 case OP_CHARI:
1374 case OP_NOT:
1375 case OP_NOTI:
1376
1377 case OP_EXACT:
1378 case OP_POSSTAR:
1379 case OP_POSPLUS:
1380 case OP_POSQUERY:
1381 case OP_POSUPTO:
1382
1383 case OP_EXACTI:
1384 case OP_POSSTARI:
1385 case OP_POSPLUSI:
1386 case OP_POSQUERYI:
1387 case OP_POSUPTOI:
1388
1389 case OP_NOTEXACT:
1390 case OP_NOTPOSSTAR:
1391 case OP_NOTPOSPLUS:
1392 case OP_NOTPOSQUERY:
1393 case OP_NOTPOSUPTO:
1394
1395 case OP_NOTEXACTI:
1396 case OP_NOTPOSSTARI:
1397 case OP_NOTPOSPLUSI:
1398 case OP_NOTPOSQUERYI:
1399 case OP_NOTPOSUPTOI:
1400
1401 case OP_TYPEEXACT:
1402 case OP_TYPEPOSSTAR:
1403 case OP_TYPEPOSPLUS:
1404 case OP_TYPEPOSQUERY:
1405 case OP_TYPEPOSUPTO:
1406
1407 case OP_CLASS:
1408 case OP_NCLASS:
1409 case OP_XCLASS:
1410 case OP_CALLOUT:
1411
1412 cc = next_opcode(common, cc);
1413 SLJIT_ASSERT(cc != NULL);
1414 break;
1415 }
1416
1417 /* Possessive quantifiers can use a special case. */
1418 if (SLJIT_UNLIKELY(possessive == length))
1419 return stack_restore ? no_frame : no_stack;
1420
1421 if (length > 0)
1422 return length + 1;
1423 return stack_restore ? no_frame : no_stack;
1424 }
1425
1426 static void init_frame(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, int stackpos, int stacktop, BOOL recursive)
1427 {
1428 DEFINE_COMPILER;
1429 BOOL setsom_found = recursive;
1430 BOOL setmark_found = recursive;
1431 /* The last capture is a local variable even for recursions. */
1432 BOOL capture_last_found = FALSE;
1433 int offset;
1434
1435 /* >= 1 + shortest item size (2) */
1436 SLJIT_UNUSED_ARG(stacktop);
1437 SLJIT_ASSERT(stackpos >= stacktop + 2);
1438
1439 stackpos = STACK(stackpos);
1440 if (ccend == NULL)
1441 {
1442 ccend = bracketend(cc) - (1 + LINK_SIZE);
1443 if (recursive || (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS))
1444 cc = next_opcode(common, cc);
1445 }
1446
1447 SLJIT_ASSERT(cc != NULL);
1448 while (cc < ccend)
1449 switch(*cc)
1450 {
1451 case OP_SET_SOM:
1452 SLJIT_ASSERT(common->has_set_som);
1453 if (!setsom_found)
1454 {
1455 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
1456 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1457 stackpos += (int)sizeof(sljit_sw);
1458 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1459 stackpos += (int)sizeof(sljit_sw);
1460 setsom_found = TRUE;
1461 }
1462 cc += 1;
1463 break;
1464
1465 case OP_MARK:
1466 case OP_PRUNE_ARG:
1467 case OP_THEN_ARG:
1468 SLJIT_ASSERT(common->mark_ptr != 0);
1469 if (!setmark_found)
1470 {
1471 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
1472 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1473 stackpos += (int)sizeof(sljit_sw);
1474 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1475 stackpos += (int)sizeof(sljit_sw);
1476 setmark_found = TRUE;
1477 }
1478 cc += 1 + 2 + cc[1];
1479 break;
1480
1481 case OP_RECURSE:
1482 if (common->has_set_som && !setsom_found)
1483 {
1484 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
1485 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1486 stackpos += (int)sizeof(sljit_sw);
1487 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1488 stackpos += (int)sizeof(sljit_sw);
1489 setsom_found = TRUE;
1490 }
1491 if (common->mark_ptr != 0 && !setmark_found)
1492 {
1493 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
1494 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1495 stackpos += (int)sizeof(sljit_sw);
1496 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1497 stackpos += (int)sizeof(sljit_sw);
1498 setmark_found = TRUE;
1499 }
1500 if (common->capture_last_ptr != 0 && !capture_last_found)
1501 {
1502 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
1503 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1504 stackpos += (int)sizeof(sljit_sw);
1505 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1506 stackpos += (int)sizeof(sljit_sw);
1507 capture_last_found = TRUE;
1508 }
1509 cc += 1 + LINK_SIZE;
1510 break;
1511
1512 case OP_CBRA:
1513 case OP_CBRAPOS:
1514 case OP_SCBRA:
1515 case OP_SCBRAPOS:
1516 if (common->capture_last_ptr != 0 && !capture_last_found)
1517 {
1518 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
1519 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1520 stackpos += (int)sizeof(sljit_sw);
1521 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1522 stackpos += (int)sizeof(sljit_sw);
1523 capture_last_found = TRUE;
1524 }
1525 offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
1526 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));
1527 stackpos += (int)sizeof(sljit_sw);
1528 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
1529 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
1530 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1531 stackpos += (int)sizeof(sljit_sw);
1532 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
1533 stackpos += (int)sizeof(sljit_sw);
1534
1535 cc += 1 + LINK_SIZE + IMM2_SIZE;
1536 break;
1537
1538 default:
1539 cc = next_opcode(common, cc);
1540 SLJIT_ASSERT(cc != NULL);
1541 break;
1542 }
1543
1544 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, 0);
1545 SLJIT_ASSERT(stackpos == STACK(stacktop));
1546 }
1547
1548 static SLJIT_INLINE int get_private_data_copy_length(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL needs_control_head)
1549 {
1550 int private_data_length = needs_control_head ? 3 : 2;
1551 int size;
1552 pcre_uchar *alternative;
1553 /* Calculate the sum of the private machine words. */
1554 while (cc < ccend)
1555 {
1556 size = 0;
1557 switch(*cc)
1558 {
1559 case OP_KET:
1560 if (PRIVATE_DATA(cc) != 0)
1561 {
1562 private_data_length++;
1563 SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0);
1564 cc += PRIVATE_DATA(cc + 1);
1565 }
1566 cc += 1 + LINK_SIZE;
1567 break;
1568
1569 case OP_ASSERT:
1570 case OP_ASSERT_NOT:
1571 case OP_ASSERTBACK:
1572 case OP_ASSERTBACK_NOT:
1573 case OP_ONCE:
1574 case OP_ONCE_NC:
1575 case OP_BRAPOS:
1576 case OP_SBRA:
1577 case OP_SBRAPOS:
1578 case OP_SCOND:
1579 private_data_length++;
1580 SLJIT_ASSERT(PRIVATE_DATA(cc) != 0);
1581 cc += 1 + LINK_SIZE;
1582 break;
1583
1584 case OP_CBRA:
1585 case OP_SCBRA:
1586 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1587 private_data_length++;
1588 cc += 1 + LINK_SIZE + IMM2_SIZE;
1589 break;
1590
1591 case OP_CBRAPOS:
1592 case OP_SCBRAPOS:
1593 private_data_length += 2;
1594 cc += 1 + LINK_SIZE + IMM2_SIZE;
1595 break;
1596
1597 case OP_COND:
1598 /* Might be a hidden SCOND. */
1599 alternative = cc + GET(cc, 1);
1600 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1601 private_data_length++;
1602 cc += 1 + LINK_SIZE;
1603 break;
1604
1605 CASE_ITERATOR_PRIVATE_DATA_1
1606 if (PRIVATE_DATA(cc))
1607 private_data_length++;
1608 cc += 2;
1609 #ifdef SUPPORT_UTF
1610 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1611 #endif
1612 break;
1613
1614 CASE_ITERATOR_PRIVATE_DATA_2A
1615 if (PRIVATE_DATA(cc))
1616 private_data_length += 2;
1617 cc += 2;
1618 #ifdef SUPPORT_UTF
1619 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1620 #endif
1621 break;
1622
1623 CASE_ITERATOR_PRIVATE_DATA_2B
1624 if (PRIVATE_DATA(cc))
1625 private_data_length += 2;
1626 cc += 2 + IMM2_SIZE;
1627 #ifdef SUPPORT_UTF
1628 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1629 #endif
1630 break;
1631
1632 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1633 if (PRIVATE_DATA(cc))
1634 private_data_length++;
1635 cc += 1;
1636 break;
1637
1638 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1639 if (PRIVATE_DATA(cc))
1640 private_data_length += 2;
1641 cc += 1;
1642 break;
1643
1644 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1645 if (PRIVATE_DATA(cc))
1646 private_data_length += 2;
1647 cc += 1 + IMM2_SIZE;
1648 break;
1649
1650 case OP_CLASS:
1651 case OP_NCLASS:
1652 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1653 case OP_XCLASS:
1654 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1655 #else
1656 size = 1 + 32 / (int)sizeof(pcre_uchar);
1657 #endif
1658 if (PRIVATE_DATA(cc))
1659 private_data_length += get_class_iterator_size(cc + size);
1660 cc += size;
1661 break;
1662
1663 default:
1664 cc = next_opcode(common, cc);
1665 SLJIT_ASSERT(cc != NULL);
1666 break;
1667 }
1668 }
1669 SLJIT_ASSERT(cc == ccend);
1670 return private_data_length;
1671 }
1672
1673 static void copy_private_data(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend,
1674 BOOL save, int stackptr, int stacktop, BOOL needs_control_head)
1675 {
1676 DEFINE_COMPILER;
1677 int srcw[2];
1678 int count, size;
1679 BOOL tmp1next = TRUE;
1680 BOOL tmp1empty = TRUE;
1681 BOOL tmp2empty = TRUE;
1682 pcre_uchar *alternative;
1683 enum {
1684 start,
1685 loop,
1686 end
1687 } status;
1688
1689 status = save ? start : loop;
1690 stackptr = STACK(stackptr - 2);
1691 stacktop = STACK(stacktop - 1);
1692
1693 if (!save)
1694 {
1695 stackptr += (needs_control_head ? 2 : 1) * sizeof(sljit_sw);
1696 if (stackptr < stacktop)
1697 {
1698 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1699 stackptr += sizeof(sljit_sw);
1700 tmp1empty = FALSE;
1701 }
1702 if (stackptr < stacktop)
1703 {
1704 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1705 stackptr += sizeof(sljit_sw);
1706 tmp2empty = FALSE;
1707 }
1708 /* The tmp1next must be TRUE in either way. */
1709 }
1710
1711 do
1712 {
1713 count = 0;
1714 switch(status)
1715 {
1716 case start:
1717 SLJIT_ASSERT(save && common->recursive_head_ptr != 0);
1718 count = 1;
1719 srcw[0] = common->recursive_head_ptr;
1720 if (needs_control_head)
1721 {
1722 SLJIT_ASSERT(common->control_head_ptr != 0);
1723 count = 2;
1724 srcw[1] = common->control_head_ptr;
1725 }
1726 status = loop;
1727 break;
1728
1729 case loop:
1730 if (cc >= ccend)
1731 {
1732 status = end;
1733 break;
1734 }
1735
1736 switch(*cc)
1737 {
1738 case OP_KET:
1739 if (PRIVATE_DATA(cc) != 0)
1740 {
1741 count = 1;
1742 srcw[0] = PRIVATE_DATA(cc);
1743 SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0);
1744 cc += PRIVATE_DATA(cc + 1);
1745 }
1746 cc += 1 + LINK_SIZE;
1747 break;
1748
1749 case OP_ASSERT:
1750 case OP_ASSERT_NOT:
1751 case OP_ASSERTBACK:
1752 case OP_ASSERTBACK_NOT:
1753 case OP_ONCE:
1754 case OP_ONCE_NC:
1755 case OP_BRAPOS:
1756 case OP_SBRA:
1757 case OP_SBRAPOS:
1758 case OP_SCOND:
1759 count = 1;
1760 srcw[0] = PRIVATE_DATA(cc);
1761 SLJIT_ASSERT(srcw[0] != 0);
1762 cc += 1 + LINK_SIZE;
1763 break;
1764
1765 case OP_CBRA:
1766 case OP_SCBRA:
1767 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1768 {
1769 count = 1;
1770 srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1771 }
1772 cc += 1 + LINK_SIZE + IMM2_SIZE;
1773 break;
1774
1775 case OP_CBRAPOS:
1776 case OP_SCBRAPOS:
1777 count = 2;
1778 srcw[0] = PRIVATE_DATA(cc);
1779 srcw[1] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1780 SLJIT_ASSERT(srcw[0] != 0 && srcw[1] != 0);
1781 cc += 1 + LINK_SIZE + IMM2_SIZE;
1782 break;
1783
1784 case OP_COND:
1785 /* Might be a hidden SCOND. */
1786 alternative = cc + GET(cc, 1);
1787 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1788 {
1789 count = 1;
1790 srcw[0] = PRIVATE_DATA(cc);
1791 SLJIT_ASSERT(srcw[0] != 0);
1792 }
1793 cc += 1 + LINK_SIZE;
1794 break;
1795
1796 CASE_ITERATOR_PRIVATE_DATA_1
1797 if (PRIVATE_DATA(cc))
1798 {
1799 count = 1;
1800 srcw[0] = PRIVATE_DATA(cc);
1801 }
1802 cc += 2;
1803 #ifdef SUPPORT_UTF
1804 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1805 #endif
1806 break;
1807
1808 CASE_ITERATOR_PRIVATE_DATA_2A
1809 if (PRIVATE_DATA(cc))
1810 {
1811 count = 2;
1812 srcw[0] = PRIVATE_DATA(cc);
1813 srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
1814 }
1815 cc += 2;
1816 #ifdef SUPPORT_UTF
1817 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1818 #endif
1819 break;
1820
1821 CASE_ITERATOR_PRIVATE_DATA_2B
1822 if (PRIVATE_DATA(cc))
1823 {
1824 count = 2;
1825 srcw[0] = PRIVATE_DATA(cc);
1826 srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
1827 }
1828 cc += 2 + IMM2_SIZE;
1829 #ifdef SUPPORT_UTF
1830 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1831 #endif
1832 break;
1833
1834 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1835 if (PRIVATE_DATA(cc))
1836 {
1837 count = 1;
1838 srcw[0] = PRIVATE_DATA(cc);
1839 }
1840 cc += 1;
1841 break;
1842
1843 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1844 if (PRIVATE_DATA(cc))
1845 {
1846 count = 2;
1847 srcw[0] = PRIVATE_DATA(cc);
1848 srcw[1] = srcw[0] + sizeof(sljit_sw);
1849 }
1850 cc += 1;
1851 break;
1852
1853 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1854 if (PRIVATE_DATA(cc))
1855 {
1856 count = 2;
1857 srcw[0] = PRIVATE_DATA(cc);
1858 srcw[1] = srcw[0] + sizeof(sljit_sw);
1859 }
1860 cc += 1 + IMM2_SIZE;
1861 break;
1862
1863 case OP_CLASS:
1864 case OP_NCLASS:
1865 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1866 case OP_XCLASS:
1867 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1868 #else
1869 size = 1 + 32 / (int)sizeof(pcre_uchar);
1870 #endif
1871 if (PRIVATE_DATA(cc))
1872 switch(get_class_iterator_size(cc + size))
1873 {
1874 case 1:
1875 count = 1;
1876 srcw[0] = PRIVATE_DATA(cc);
1877 break;
1878
1879 case 2:
1880 count = 2;
1881 srcw[0] = PRIVATE_DATA(cc);
1882 srcw[1] = srcw[0] + sizeof(sljit_sw);
1883 break;
1884
1885 default:
1886 SLJIT_ASSERT_STOP();
1887 break;
1888 }
1889 cc += size;
1890 break;
1891
1892 default:
1893 cc = next_opcode(common, cc);
1894 SLJIT_ASSERT(cc != NULL);
1895 break;
1896 }
1897 break;
1898
1899 case end:
1900 SLJIT_ASSERT_STOP();
1901 break;
1902 }
1903
1904 while (count > 0)
1905 {
1906 count--;
1907 if (save)
1908 {
1909 if (tmp1next)
1910 {
1911 if (!tmp1empty)
1912 {
1913 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1914 stackptr += sizeof(sljit_sw);
1915 }
1916 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), srcw[count]);
1917 tmp1empty = FALSE;
1918 tmp1next = FALSE;
1919 }
1920 else
1921 {
1922 if (!tmp2empty)
1923 {
1924 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1925 stackptr += sizeof(sljit_sw);
1926 }
1927 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), srcw[count]);
1928 tmp2empty = FALSE;
1929 tmp1next = TRUE;
1930 }
1931 }
1932 else
1933 {
1934 if (tmp1next)
1935 {
1936 SLJIT_ASSERT(!tmp1empty);
1937 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), srcw[count], TMP1, 0);
1938 tmp1empty = stackptr >= stacktop;
1939 if (!tmp1empty)
1940 {
1941 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1942 stackptr += sizeof(sljit_sw);
1943 }
1944 tmp1next = FALSE;
1945 }
1946 else
1947 {
1948 SLJIT_ASSERT(!tmp2empty);
1949 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), srcw[count], TMP2, 0);
1950 tmp2empty = stackptr >= stacktop;
1951 if (!tmp2empty)
1952 {
1953 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1954 stackptr += sizeof(sljit_sw);
1955 }
1956 tmp1next = TRUE;
1957 }
1958 }
1959 }
1960 }
1961 while (status != end);
1962
1963 if (save)
1964 {
1965 if (tmp1next)
1966 {
1967 if (!tmp1empty)
1968 {
1969 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1970 stackptr += sizeof(sljit_sw);
1971 }
1972 if (!tmp2empty)
1973 {
1974 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1975 stackptr += sizeof(sljit_sw);
1976 }
1977 }
1978 else
1979 {
1980 if (!tmp2empty)
1981 {
1982 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1983 stackptr += sizeof(sljit_sw);
1984 }
1985 if (!tmp1empty)
1986 {
1987 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1988 stackptr += sizeof(sljit_sw);
1989 }
1990 }
1991 }
1992 SLJIT_ASSERT(cc == ccend && stackptr == stacktop && (save || (tmp1empty && tmp2empty)));
1993 }
1994
1995 static SLJIT_INLINE pcre_uchar *set_then_offsets(compiler_common *common, pcre_uchar *cc, pcre_uint8 *current_offset)
1996 {
1997 pcre_uchar *end = bracketend(cc);
1998 BOOL has_alternatives = cc[GET(cc, 1)] == OP_ALT;
1999
2000 /* Assert captures then. */
2001 if (*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT)
2002 current_offset = NULL;
2003 /* Conditional block does not. */
2004 if (*cc == OP_COND || *cc == OP_SCOND)
2005 has_alternatives = FALSE;
2006
2007 cc = next_opcode(common, cc);
2008 if (has_alternatives)
2009 current_offset = common->then_offsets + (cc - common->start);
2010
2011 while (cc < end)
2012 {
2013 if ((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND))
2014 cc = set_then_offsets(common, cc, current_offset);
2015 else
2016 {
2017 if (*cc == OP_ALT && has_alternatives)
2018 current_offset = common->then_offsets + (cc + 1 + LINK_SIZE - common->start);
2019 if (*cc >= OP_THEN && *cc <= OP_THEN_ARG && current_offset != NULL)
2020 *current_offset = 1;
2021 cc = next_opcode(common, cc);
2022 }
2023 }
2024
2025 return end;
2026 }
2027
2028 #undef CASE_ITERATOR_PRIVATE_DATA_1
2029 #undef CASE_ITERATOR_PRIVATE_DATA_2A
2030 #undef CASE_ITERATOR_PRIVATE_DATA_2B
2031 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_1
2032 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
2033 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
2034
2035 static SLJIT_INLINE BOOL is_powerof2(unsigned int value)
2036 {
2037 return (value & (value - 1)) == 0;
2038 }
2039
2040 static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label)
2041 {
2042 while (list)
2043 {
2044 /* sljit_set_label is clever enough to do nothing
2045 if either the jump or the label is NULL. */
2046 SET_LABEL(list->jump, label);
2047 list = list->next;
2048 }
2049 }
2050
2051 static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump *jump)
2052 {
2053 jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list));
2054 if (list_item)
2055 {
2056 list_item->next = *list;
2057 list_item->jump = jump;
2058 *list = list_item;
2059 }
2060 }
2061
2062 static void add_stub(compiler_common *common, struct sljit_jump *start)
2063 {
2064 DEFINE_COMPILER;
2065 stub_list *list_item = sljit_alloc_memory(compiler, sizeof(stub_list));
2066
2067 if (list_item)
2068 {
2069 list_item->start = start;
2070 list_item->quit = LABEL();
2071 list_item->next = common->stubs;
2072 common->stubs = list_item;
2073 }
2074 }
2075
2076 static void flush_stubs(compiler_common *common)
2077 {
2078 DEFINE_COMPILER;
2079 stub_list *list_item = common->stubs;
2080
2081 while (list_item)
2082 {
2083 JUMPHERE(list_item->start);
2084 add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));
2085 JUMPTO(SLJIT_JUMP, list_item->quit);
2086 list_item = list_item->next;
2087 }
2088 common->stubs = NULL;
2089 }
2090
2091 static void add_label_addr(compiler_common *common, sljit_uw *update_addr)
2092 {
2093 DEFINE_COMPILER;
2094 label_addr_list *label_addr;
2095
2096 label_addr = sljit_alloc_memory(compiler, sizeof(label_addr_list));
2097 if (label_addr == NULL)
2098 return;
2099 label_addr->label = LABEL();
2100 label_addr->update_addr = update_addr;
2101 label_addr->next = common->label_addrs;
2102 common->label_addrs = label_addr;
2103 }
2104
2105 static SLJIT_INLINE void count_match(compiler_common *common)
2106 {
2107 DEFINE_COMPILER;
2108
2109 OP2(SLJIT_SUB | SLJIT_SET_E, COUNT_MATCH, 0, COUNT_MATCH, 0, SLJIT_IMM, 1);
2110 add_jump(compiler, &common->calllimit, JUMP(SLJIT_ZERO));
2111 }
2112
2113 static SLJIT_INLINE void allocate_stack(compiler_common *common, int size)
2114 {
2115 /* May destroy all locals and registers except TMP2. */
2116 DEFINE_COMPILER;
2117
2118 SLJIT_ASSERT(size > 0);
2119 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
2120 #ifdef DESTROY_REGISTERS
2121 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);
2122 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
2123 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
2124 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, TMP1, 0);
2125 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP1, 0);
2126 #endif
2127 add_stub(common, CMP(SLJIT_GREATER, STACK_TOP, 0, STACK_LIMIT, 0));
2128 }
2129
2130 static SLJIT_INLINE void free_stack(compiler_common *common, int size)
2131 {
2132 DEFINE_COMPILER;
2133
2134 SLJIT_ASSERT(size > 0);
2135 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
2136 }
2137
2138 static sljit_uw * allocate_read_only_data(compiler_common *common, sljit_uw size)
2139 {
2140 DEFINE_COMPILER;
2141 sljit_uw *result;
2142
2143 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
2144 return NULL;
2145
2146 result = (sljit_uw *)SLJIT_MALLOC(size + sizeof(sljit_uw), compiler->allocator_data);
2147 if (SLJIT_UNLIKELY(result == NULL))
2148 {
2149 sljit_set_compiler_memory_error(compiler);
2150 return NULL;
2151 }
2152
2153 *(void**)result = common->read_only_data_head;
2154 common->read_only_data_head = (void *)result;
2155 return result + 1;
2156 }
2157
2158 static void free_read_only_data(void *current, void *allocator_data)
2159 {
2160 void *next;
2161
2162 SLJIT_UNUSED_ARG(allocator_data);
2163
2164 while (current != NULL)
2165 {
2166 next = *(void**)current;
2167 SLJIT_FREE(current, allocator_data);
2168 current = next;
2169 }
2170 }
2171
2172 static SLJIT_INLINE void reset_ovector(compiler_common *common, int length)
2173 {
2174 DEFINE_COMPILER;
2175 struct sljit_label *loop;
2176 int i;
2177
2178 /* At this point we can freely use all temporary registers. */
2179 SLJIT_ASSERT(length > 1);
2180 /* TMP1 returns with begin - 1. */
2181 OP2(SLJIT_SUB, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_S0), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));
2182 if (length < 8)
2183 {
2184 for (i = 1; i < length; i++)
2185 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), SLJIT_R0, 0);
2186 }
2187 else
2188 {
2189 GET_LOCAL_BASE(SLJIT_R1, 0, OVECTOR_START);
2190 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, length - 1);
2191 loop = LABEL();
2192 OP1(SLJIT_MOVU, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw), SLJIT_R0, 0);
2193 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, 1);
2194 JUMPTO(SLJIT_NOT_ZERO, loop);
2195 }
2196 }
2197
2198 static SLJIT_INLINE void do_reset_match(compiler_common *common, int length)
2199 {
2200 DEFINE_COMPILER;
2201 struct sljit_label *loop;
2202 int i;
2203
2204 SLJIT_ASSERT(length > 1);
2205 /* OVECTOR(1) contains the "string begin - 1" constant. */
2206 if (length > 2)
2207 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
2208 if (length < 8)
2209 {
2210 for (i = 2; i < length; i++)
2211 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), TMP1, 0);
2212 }
2213 else
2214 {
2215 GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + sizeof(sljit_sw));
2216 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
2217 loop = LABEL();
2218 OP1(SLJIT_MOVU, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
2219 OP2(SLJIT_SUB | SLJIT_SET_E, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
2220 JUMPTO(SLJIT_NOT_ZERO, loop);
2221 }
2222
2223 OP1(SLJIT_MOV, STACK_TOP, 0, ARGUMENTS, 0);
2224 if (common->mark_ptr != 0)
2225 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, SLJIT_IMM, 0);
2226 if (common->control_head_ptr != 0)
2227 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
2228 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(jit_arguments, stack));
2229 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
2230 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(struct sljit_stack, base));
2231 }
2232
2233 static sljit_sw SLJIT_CALL do_search_mark(sljit_sw *current, const pcre_uchar *skip_arg)
2234 {
2235 while (current != NULL)
2236 {
2237 switch (current[-2])
2238 {
2239 case type_then_trap:
2240 break;
2241
2242 case type_mark:
2243 if (STRCMP_UC_UC(skip_arg, (pcre_uchar *)current[-3]) == 0)
2244 return current[-4];
2245 break;
2246
2247 default:
2248 SLJIT_ASSERT_STOP();
2249 break;
2250 }
2251 SLJIT_ASSERT(current > (sljit_sw*)current[-1]);
2252 current = (sljit_sw*)current[-1];
2253 }
2254 return -1;
2255 }
2256
2257 static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)
2258 {
2259 DEFINE_COMPILER;
2260 struct sljit_label *loop;
2261 struct sljit_jump *early_quit;
2262
2263 /* At this point we can freely use all registers. */
2264 OP1(SLJIT_MOV, SLJIT_S2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
2265 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(1), STR_PTR, 0);
2266
2267 OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0);
2268 if (common->mark_ptr != 0)
2269 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
2270 OP1(SLJIT_MOV_SI, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, offset_count));
2271 if (common->mark_ptr != 0)
2272 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_R2, 0);
2273 OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, offsets), SLJIT_IMM, sizeof(int));
2274 OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, begin));
2275 GET_LOCAL_BASE(SLJIT_S0, 0, OVECTOR_START);
2276 /* Unlikely, but possible */
2277 early_quit = CMP(SLJIT_EQUAL, SLJIT_R1, 0, SLJIT_IMM, 0);
2278 loop = LABEL();
2279 OP2(SLJIT_SUB, SLJIT_S1, 0, SLJIT_MEM1(SLJIT_S0), 0, SLJIT_R0, 0);
2280 OP2(SLJIT_ADD, SLJIT_S0, 0, SLJIT_S0, 0, SLJIT_IMM, sizeof(sljit_sw));
2281 /* Copy the integer value to the output buffer */
2282 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2283 OP2(SLJIT_ASHR, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_IMM, UCHAR_SHIFT);
2284 #endif
2285 OP1(SLJIT_MOVU_SI, SLJIT_MEM1(SLJIT_R2), sizeof(int), SLJIT_S1, 0);
2286 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
2287 JUMPTO(SLJIT_NOT_ZERO, loop);
2288 JUMPHERE(early_quit);
2289
2290 /* Calculate the return value, which is the maximum ovector value. */
2291 if (topbracket > 1)
2292 {
2293 GET_LOCAL_BASE(SLJIT_R0, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_sw));
2294 OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, topbracket + 1);
2295
2296 /* OVECTOR(0) is never equal to SLJIT_S2. */
2297 loop = LABEL();
2298 OP1(SLJIT_MOVU, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), -(2 * (sljit_sw)sizeof(sljit_sw)));
2299 OP2(SLJIT_SUB, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
2300 CMPTO(SLJIT_EQUAL, SLJIT_R2, 0, SLJIT_S2, 0, loop);
2301 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_R1, 0);
2302 }
2303 else
2304 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
2305 }
2306
2307 static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *quit)
2308 {
2309 DEFINE_COMPILER;
2310 struct sljit_jump *jump;
2311
2312 SLJIT_COMPILE_ASSERT(STR_END == SLJIT_S1, str_end_must_be_saved_reg2);
2313 SLJIT_ASSERT(common->start_used_ptr != 0 && common->start_ptr != 0
2314 && (common->mode == JIT_PARTIAL_SOFT_COMPILE ? common->hit_start != 0 : common->hit_start == 0));
2315
2316 OP1(SLJIT_MOV, SLJIT_R1, 0, ARGUMENTS, 0);
2317 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_PARTIAL);
2318 OP1(SLJIT_MOV_SI, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, real_offset_count));
2319 CMPTO(SLJIT_SIG_LESS, SLJIT_R2, 0, SLJIT_IMM, 2, quit);
2320
2321 /* Store match begin and end. */
2322 OP1(SLJIT_MOV, SLJIT_S0, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, begin));
2323 OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, offsets));
2324
2325 jump = CMP(SLJIT_SIG_LESS, SLJIT_R2, 0, SLJIT_IMM, 3);
2326 OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_ptr : (common->hit_start + (int)sizeof(sljit_sw)), SLJIT_S0, 0);
2327 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2328 OP2(SLJIT_ASHR, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, UCHAR_SHIFT);
2329 #endif
2330 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_R1), 2 * sizeof(int), SLJIT_R2, 0);
2331 JUMPHERE(jump);
2332
2333 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_used_ptr : common->hit_start);
2334 OP2(SLJIT_SUB, SLJIT_S1, 0, STR_END, 0, SLJIT_S0, 0);
2335 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2336 OP2(SLJIT_ASHR, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_IMM, UCHAR_SHIFT);
2337 #endif
2338 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_R1), sizeof(int), SLJIT_S1, 0);
2339
2340 OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_S0, 0);
2341 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2342 OP2(SLJIT_ASHR, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, UCHAR_SHIFT);
2343 #endif
2344 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_R1), 0, SLJIT_R2, 0);
2345
2346 JUMPTO(SLJIT_JUMP, quit);
2347 }
2348
2349 static SLJIT_INLINE void check_start_used_ptr(compiler_common *common)
2350 {
2351 /* May destroy TMP1. */
2352 DEFINE_COMPILER;
2353 struct sljit_jump *jump;
2354
2355 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2356 {
2357 /* The value of -1 must be kept for start_used_ptr! */
2358 OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, 1);
2359 /* Jumps if start_used_ptr < STR_PTR, or start_used_ptr == -1. Although overwriting
2360 is not necessary if start_used_ptr == STR_PTR, it does not hurt as well. */
2361 jump = CMP(SLJIT_LESS_EQUAL, TMP1, 0, STR_PTR, 0);
2362 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2363 JUMPHERE(jump);
2364 }
2365 else if (common->mode == JIT_PARTIAL_HARD_COMPILE)
2366 {
2367 jump = CMP(SLJIT_LESS_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2368 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2369 JUMPHERE(jump);
2370 }
2371 }
2372
2373 static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, pcre_uchar *cc)
2374 {
2375 /* Detects if the character has an othercase. */
2376 unsigned int c;
2377
2378 #ifdef SUPPORT_UTF
2379 if (common->utf)
2380 {
2381 GETCHAR(c, cc);
2382 if (c > 127)
2383 {
2384 #ifdef SUPPORT_UCP
2385 return c != UCD_OTHERCASE(c);
2386 #else
2387 return FALSE;
2388 #endif
2389 }
2390 #ifndef COMPILE_PCRE8
2391 return common->fcc[c] != c;
2392 #endif
2393 }
2394 else
2395 #endif
2396 c = *cc;
2397 return MAX_255(c) ? common->fcc[c] != c : FALSE;
2398 }
2399
2400 static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)
2401 {
2402 /* Returns with the othercase. */
2403 #ifdef SUPPORT_UTF
2404 if (common->utf && c > 127)
2405 {
2406 #ifdef SUPPORT_UCP
2407 return UCD_OTHERCASE(c);
2408 #else
2409 return c;
2410 #endif
2411 }
2412 #endif
2413 return TABLE_GET(c, common->fcc, c);
2414 }
2415
2416 static unsigned int char_get_othercase_bit(compiler_common *common, pcre_uchar *cc)
2417 {
2418 /* Detects if the character and its othercase has only 1 bit difference. */
2419 unsigned int c, oc, bit;
2420 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2421 int n;
2422 #endif
2423
2424 #ifdef SUPPORT_UTF
2425 if (common->utf)
2426 {
2427 GETCHAR(c, cc);
2428 if (c <= 127)
2429 oc = common->fcc[c];
2430 else
2431 {
2432 #ifdef SUPPORT_UCP
2433 oc = UCD_OTHERCASE(c);
2434 #else
2435 oc = c;
2436 #endif
2437 }
2438 }
2439 else
2440 {
2441 c = *cc;
2442 oc = TABLE_GET(c, common->fcc, c);
2443 }
2444 #else
2445 c = *cc;
2446 oc = TABLE_GET(c, common->fcc, c);
2447 #endif
2448
2449 SLJIT_ASSERT(c != oc);
2450
2451 bit = c ^ oc;
2452 /* Optimized for English alphabet. */
2453 if (c <= 127 && bit == 0x20)
2454 return (0 << 8) | 0x20;
2455
2456 /* Since c != oc, they must have at least 1 bit difference. */
2457 if (!is_powerof2(bit))
2458 return 0;
2459
2460 #if defined COMPILE_PCRE8
2461
2462 #ifdef SUPPORT_UTF
2463 if (common->utf && c > 127)
2464 {
2465 n = GET_EXTRALEN(*cc);
2466 while ((bit & 0x3f) == 0)
2467 {
2468 n--;
2469 bit >>= 6;
2470 }
2471 return (n << 8) | bit;
2472 }
2473 #endif /* SUPPORT_UTF */
2474 return (0 << 8) | bit;
2475
2476 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2477
2478 #ifdef SUPPORT_UTF
2479 if (common->utf && c > 65535)
2480 {
2481 if (bit >= (1 << 10))
2482 bit >>= 10;
2483 else
2484 return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8));
2485 }
2486 #endif /* SUPPORT_UTF */
2487 return (bit < 256) ? ((0 << 8) | bit) : ((1 << 8) | (bit >> 8));
2488
2489 #endif /* COMPILE_PCRE[8|16|32] */
2490 }
2491
2492 static void check_partial(compiler_common *common, BOOL force)
2493 {
2494 /* Checks whether a partial matching is occurred. Does not modify registers. */
2495 DEFINE_COMPILER;
2496 struct sljit_jump *jump = NULL;
2497
2498 SLJIT_ASSERT(!force || common->mode != JIT_COMPILE);
2499
2500 if (common->mode == JIT_COMPILE)
2501 return;
2502
2503 if (!force)
2504 jump = CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2505 else if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2506 jump = CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1);
2507
2508 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2509 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
2510 else
2511 {
2512 if (common->partialmatchlabel != NULL)
2513 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2514 else
2515 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2516 }
2517
2518 if (jump != NULL)
2519 JUMPHERE(jump);
2520 }
2521
2522 static void check_str_end(compiler_common *common, jump_list **end_reached)
2523 {
2524 /* Does not affect registers. Usually used in a tight spot. */
2525 DEFINE_COMPILER;
2526 struct sljit_jump *jump;
2527
2528 if (common->mode == JIT_COMPILE)
2529 {
2530 add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2531 return;
2532 }
2533
2534 jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
2535 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2536 {
2537 add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
2538 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
2539 add_jump(compiler, end_reached, JUMP(SLJIT_JUMP));
2540 }
2541 else
2542 {
2543 add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
2544 if (common->partialmatchlabel != NULL)
2545 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2546 else
2547 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2548 }
2549 JUMPHERE(jump);
2550 }
2551
2552 static void detect_partial_match(compiler_common *common, jump_list **backtracks)
2553 {
2554 DEFINE_COMPILER;
2555 struct sljit_jump *jump;
2556
2557 if (common->mode == JIT_COMPILE)
2558 {
2559 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2560 return;
2561 }
2562
2563 /* Partial matching mode. */
2564 jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
2565 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
2566 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2567 {
2568 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
2569 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
2570 }
2571 else
2572 {
2573 if (common->partialmatchlabel != NULL)
2574 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2575 else
2576 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2577 }
2578 JUMPHERE(jump);
2579 }
2580
2581 static void peek_char(compiler_common *common, pcre_uint32 max)
2582 {
2583 /* Reads the character into TMP1, keeps STR_PTR.
2584 Does not check STR_END. TMP2 Destroyed. */
2585 DEFINE_COMPILER;
2586 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2587 struct sljit_jump *jump;
2588 #endif
2589
2590 SLJIT_UNUSED_ARG(max);
2591
2592 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2593 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2594 if (common->utf)
2595 {
2596 if (max < 128) return;
2597
2598 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2599 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2600 add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2601 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2602 JUMPHERE(jump);
2603 }
2604 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2605
2606 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2607 if (common->utf)
2608 {
2609 if (max < 0xd800) return;
2610
2611 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2612 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2613 /* TMP2 contains the high surrogate. */
2614 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2615 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x40);
2616 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
2617 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
2618 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2619 JUMPHERE(jump);
2620 }
2621 #endif
2622 }
2623
2624 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2625
2626 static BOOL is_char7_bitset(const pcre_uint8 *bitset, BOOL nclass)
2627 {
2628 /* Tells whether the character codes below 128 are enough
2629 to determine a match. */
2630 const sljit_ub value = nclass ? 0xff : 0;
2631 const sljit_ub *end = bitset + 32;
2632
2633 bitset += 16;
2634 do
2635 {
2636 if (*bitset++ != value)
2637 return FALSE;
2638 }
2639 while (bitset < end);
2640 return TRUE;
2641 }
2642
2643 static void read_char7_type(compiler_common *common, BOOL full_read)
2644 {
2645 /* Reads the precise character type of a character into TMP1, if the character
2646 is less than 128. Otherwise it returns with zero. Does not check STR_END. The
2647 full_read argument tells whether characters above max are accepted or not. */
2648 DEFINE_COMPILER;
2649 struct sljit_jump *jump;
2650
2651 SLJIT_ASSERT(common->utf);
2652
2653 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2654 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2655
2656 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2657
2658 if (full_read)
2659 {
2660 jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
2661 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2662 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2663 JUMPHERE(jump);
2664 }
2665 }
2666
2667 #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
2668
2669 static void read_char_range(compiler_common *common, pcre_uint32 min, pcre_uint32 max, BOOL update_str_ptr)
2670 {
2671 /* Reads the precise value of a character into TMP1, if the character is
2672 between min and max (c >= min && c <= max). Otherwise it returns with a value
2673 outside the range. Does not check STR_END. */
2674 DEFINE_COMPILER;
2675 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2676 struct sljit_jump *jump;
2677 #endif
2678 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2679 struct sljit_jump *jump2;
2680 #endif
2681
2682 SLJIT_UNUSED_ARG(update_str_ptr);
2683 SLJIT_UNUSED_ARG(min);
2684 SLJIT_UNUSED_ARG(max);
2685 SLJIT_ASSERT(min <= max);
2686
2687 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2688 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2689
2690 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2691 if (common->utf)
2692 {
2693 if (max < 128 && !update_str_ptr) return;
2694
2695 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2696 if (min >= 0x10000)
2697 {
2698 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xf0);
2699 if (update_str_ptr)
2700 OP1(SLJIT_MOV_UB, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2701 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2702 jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x7);
2703 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2704 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2705 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2706 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2707 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2708 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2709 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2710 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2711 if (!update_str_ptr)
2712 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
2713 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2714 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2715 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2716 JUMPHERE(jump2);
2717 if (update_str_ptr)
2718 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
2719 }
2720 else if (min >= 0x800 && max <= 0xffff)
2721 {
2722 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xe0);
2723 if (update_str_ptr)
2724 OP1(SLJIT_MOV_UB, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2725 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2726 jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xf);
2727 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2728 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2729 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2730 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2731 if (!update_str_ptr)
2732 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
2733 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2734 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2735 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2736 JUMPHERE(jump2);
2737 if (update_str_ptr)
2738 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
2739 }
2740 else if (max >= 0x800)
2741 add_jump(compiler, (max < 0x10000) ? &common->utfreadchar16 : &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2742 else if (max < 128)
2743 {
2744 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2745 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2746 }
2747 else
2748 {
2749 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2750 if (!update_str_ptr)
2751 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2752 else
2753 OP1(SLJIT_MOV_UB, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2754 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2755 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2756 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2757 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2758 if (update_str_ptr)
2759 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
2760 }
2761 JUMPHERE(jump);
2762 }
2763 #endif
2764
2765 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2766 if (common->utf)
2767 {
2768 if (max >= 0x10000)
2769 {
2770 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2771 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2772 /* TMP2 contains the high surrogate. */
2773 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2774 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x40);
2775 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
2776 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2777 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
2778 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2779 JUMPHERE(jump);
2780 return;
2781 }
2782
2783 if (max < 0xd800 && !update_str_ptr) return;
2784
2785 /* Skip low surrogate if necessary. */
2786 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2787 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2788 if (update_str_ptr)
2789 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2790 if (max >= 0xd800)
2791 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x10000);
2792 JUMPHERE(jump);
2793 }
2794 #endif
2795 }
2796
2797 static SLJIT_INLINE void read_char(compiler_common *common)
2798 {
2799 read_char_range(common, 0, READ_CHAR_MAX, TRUE);
2800 }
2801
2802 static void read_char8_type(compiler_common *common, BOOL update_str_ptr)
2803 {
2804 /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */
2805 DEFINE_COMPILER;
2806 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
2807 struct sljit_jump *jump;
2808 #endif
2809 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2810 struct sljit_jump *jump2;
2811 #endif
2812
2813 SLJIT_UNUSED_ARG(update_str_ptr);
2814
2815 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2816 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2817
2818 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2819 if (common->utf)
2820 {
2821 /* This can be an extra read in some situations, but hopefully
2822 it is needed in most cases. */
2823 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2824 jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
2825 if (!update_str_ptr)
2826 {
2827 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2828 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2829 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2830 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2831 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2832 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
2833 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2834 jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
2835 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2836 JUMPHERE(jump2);
2837 }
2838 else
2839 add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));
2840 JUMPHERE(jump);
2841 return;
2842 }
2843 #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
2844
2845 #if !defined COMPILE_PCRE8
2846 /* The ctypes array contains only 256 values. */
2847 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2848 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
2849 #endif
2850 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2851 #if !defined COMPILE_PCRE8
2852 JUMPHERE(jump);
2853 #endif
2854
2855 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2856 if (common->utf && update_str_ptr)
2857 {
2858 /* Skip low surrogate if necessary. */
2859 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);
2860 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2861 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2862 JUMPHERE(jump);
2863 }
2864 #endif /* SUPPORT_UTF && COMPILE_PCRE16 */
2865 }
2866
2867 static void skip_char_back(compiler_common *common)
2868 {
2869 /* Goes one character back. Affects STR_PTR and TMP1. Does not check begin. */
2870 DEFINE_COMPILER;
2871 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2872 #if defined COMPILE_PCRE8
2873 struct sljit_label *label;
2874
2875 if (common->utf)
2876 {
2877 label = LABEL();
2878 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2879 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2880 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
2881 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);
2882 return;
2883 }
2884 #elif defined COMPILE_PCRE16
2885 if (common->utf)
2886 {
2887 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2888 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2889 /* Skip low surrogate if necessary. */
2890 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2891 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
2892 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
2893 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2894 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2895 return;
2896 }
2897 #endif /* COMPILE_PCRE[8|16] */
2898 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2899 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2900 }
2901
2902 static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpifmatch)
2903 {
2904 /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */
2905 DEFINE_COMPILER;
2906 struct sljit_jump *jump;
2907
2908 if (nltype == NLTYPE_ANY)
2909 {
2910 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
2911 add_jump(compiler, backtracks, JUMP(jumpifmatch ? SLJIT_NOT_ZERO : SLJIT_ZERO));
2912 }
2913 else if (nltype == NLTYPE_ANYCRLF)
2914 {
2915 if (jumpifmatch)
2916 {
2917 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR));
2918 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
2919 }
2920 else
2921 {
2922 jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
2923 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
2924 JUMPHERE(jump);
2925 }
2926 }
2927 else
2928 {
2929 SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);
2930 add_jump(compiler, backtracks, CMP(jumpifmatch ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
2931 }
2932 }
2933
2934 #ifdef SUPPORT_UTF
2935
2936 #if defined COMPILE_PCRE8
2937 static void do_utfreadchar(compiler_common *common)
2938 {
2939 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
2940 of the character (>= 0xc0). Return char value in TMP1, length in TMP2. */
2941 DEFINE_COMPILER;
2942 struct sljit_jump *jump;
2943
2944 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2945 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2946 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2947 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2948 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2949 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2950
2951 /* Searching for the first zero. */
2952 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
2953 jump = JUMP(SLJIT_NOT_ZERO);
2954 /* Two byte sequence. */
2955 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2956 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(2));
2957 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2958
2959 JUMPHERE(jump);
2960 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2961 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x800);
2962 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2963 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2964 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2965
2966 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10000);
2967 jump = JUMP(SLJIT_NOT_ZERO);
2968 /* Three byte sequence. */
2969 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
2970 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));
2971 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2972
2973 /* Four byte sequence. */
2974 JUMPHERE(jump);
2975 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2976 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
2977 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2978 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
2979 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2980 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2981 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(4));
2982 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2983 }
2984
2985 static void do_utfreadchar16(compiler_common *common)
2986 {
2987 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
2988 of the character (>= 0xc0). Return value in TMP1. */
2989 DEFINE_COMPILER;
2990 struct sljit_jump *jump;
2991
2992 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2993 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2994 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2995 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2996 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2997 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2998
2999 /* Searching for the first zero. */
3000 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
3001 jump = JUMP(SLJIT_NOT_ZERO);
3002 /* Two byte sequence. */
3003 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3004 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3005
3006 JUMPHERE(jump);
3007 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x400);
3008 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_NOT_ZERO);
3009 /* This code runs only in 8 bit mode. No need to shift the value. */
3010 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3011 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3012 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x800);
3013 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3014 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3015 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3016 /* Three byte sequence. */
3017 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
3018 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3019 }
3020
3021 static void do_utfreadtype8(compiler_common *common)
3022 {
3023 /* Fast decoding a UTF-8 character type. TMP2 contains the first byte
3024 of the character (>= 0xc0). Return value in TMP1. */
3025 DEFINE_COMPILER;
3026 struct sljit_jump *jump;
3027 struct sljit_jump *compare;
3028
3029 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3030
3031 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20);
3032 jump = JUMP(SLJIT_NOT_ZERO);
3033 /* Two byte sequence. */
3034 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3035 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3036 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
3037 /* The upper 5 bits are known at this point. */
3038 compare = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x3);
3039 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
3040 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3041 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
3042 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
3043 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3044
3045 JUMPHERE(compare);
3046 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
3047 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3048
3049 /* We only have types for characters less than 256. */
3050 JUMPHERE(jump);
3051 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3052 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
3053 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3054 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3055 }
3056
3057 #endif /* COMPILE_PCRE8 */
3058
3059 #endif /* SUPPORT_UTF */
3060
3061 #ifdef SUPPORT_UCP
3062
3063 /* UCD_BLOCK_SIZE must be 128 (see the assert below). */
3064 #define UCD_BLOCK_MASK 127
3065 #define UCD_BLOCK_SHIFT 7
3066
3067 static void do_getucd(compiler_common *common)
3068 {
3069 /* Search the UCD record for the character comes in TMP1.
3070 Returns chartype in TMP1 and UCD offset in TMP2. */
3071 DEFINE_COMPILER;
3072
3073 SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 8);
3074
3075 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3076 OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
3077 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
3078 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
3079 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
3080 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
3081 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
3082 OP1(SLJIT_MOV_UH, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
3083 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
3084 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
3085 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3086 }
3087 #endif
3088
3089 static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common, BOOL hascrorlf, BOOL firstline)
3090 {
3091 DEFINE_COMPILER;
3092 struct sljit_label *mainloop;
3093 struct sljit_label *newlinelabel = NULL;
3094 struct sljit_jump *start;
3095 struct sljit_jump *end = NULL;
3096 struct sljit_jump *nl = NULL;
3097 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3098 struct sljit_jump *singlechar;
3099 #endif
3100 jump_list *newline = NULL;
3101 BOOL newlinecheck = FALSE;
3102 BOOL readuchar = FALSE;
3103
3104 if (!(hascrorlf || firstline) && (common->nltype == NLTYPE_ANY ||
3105 common->nltype == NLTYPE_ANYCRLF || common->newline > 255))
3106 newlinecheck = TRUE;
3107
3108 if (firstline)
3109 {
3110 /* Search for the end of the first line. */
3111 SLJIT_ASSERT(common->first_line_end != 0);
3112 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
3113
3114 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3115 {
3116 mainloop = LABEL();
3117 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3118 end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3119 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
3120 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3121 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);
3122 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);
3123 JUMPHERE(end);
3124 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_SP), common->first_line_end, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3125 }
3126 else
3127 {
3128 end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3129 mainloop = LABEL();
3130 /* Continual stores does not cause data dependency. */
3131 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->first_line_end, STR_PTR, 0);
3132 read_char_range(common, common->nlmin, common->nlmax, TRUE);
3133 check_newlinechar(common, common->nltype, &newline, TRUE);
3134 CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, mainloop);
3135 JUMPHERE(end);
3136 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->first_line_end, STR_PTR, 0);
3137 set_jumps(newline, LABEL());
3138 }
3139
3140 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
3141 }
3142
3143 start = JUMP(SLJIT_JUMP);
3144
3145 if (newlinecheck)
3146 {
3147 newlinelabel = LABEL();
3148 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3149 end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3150 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3151 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
3152 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
3153 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3154 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
3155 #endif
3156 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3157 nl = JUMP(SLJIT_JUMP);
3158 }
3159
3160 mainloop = LABEL();
3161
3162 /* Increasing the STR_PTR here requires one less jump in the most common case. */
3163 #ifdef SUPPORT_UTF
3164 if (common->utf) readuchar = TRUE;
3165 #endif
3166 if (newlinecheck) readuchar = TRUE;
3167
3168 if (readuchar)
3169 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3170
3171 if (newlinecheck)
3172 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);
3173
3174 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3175 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3176 #if defined COMPILE_PCRE8
3177 if (common->utf)
3178 {
3179 singlechar = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
3180 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3181 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3182 JUMPHERE(singlechar);
3183 }
3184 #elif defined COMPILE_PCRE16
3185 if (common->utf)
3186 {
3187 singlechar = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
3188 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
3189 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3190 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
3191 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3192 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3193 JUMPHERE(singlechar);
3194 }
3195 #endif /* COMPILE_PCRE[8|16] */
3196 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
3197 JUMPHERE(start);
3198
3199 if (newlinecheck)
3200 {
3201 JUMPHERE(end);
3202 JUMPHERE(nl);
3203 }
3204
3205 return mainloop;
3206 }
3207
3208 #define MAX_N_CHARS 16
3209 #define MAX_N_BYTES 8
3210
3211 static SLJIT_INLINE void add_prefix_byte(pcre_uint8 byte, pcre_uint8 *bytes)
3212 {
3213 pcre_uint8 len = bytes[0];
3214 int i;
3215
3216 if (len == 255)
3217 return;
3218
3219 if (len == 0)
3220 {
3221 bytes[0] = 1;
3222 bytes[1] = byte;
3223 return;
3224 }
3225
3226 for (i = len; i > 0; i--)
3227 if (bytes[i] == byte)
3228 return;
3229
3230 if (len >= MAX_N_BYTES - 1)
3231 {
3232 bytes[0] = 255;
3233 return;
3234 }
3235
3236 len++;
3237 bytes[len] = byte;
3238 bytes[0] = len;
3239 }
3240
3241 static int scan_prefix(compiler_common *common, pcre_uchar *cc, pcre_uint32 *chars, pcre_uint8 *bytes, int max_chars, pcre_uint32 *rec_count)
3242 {
3243 /* Recursive function, which scans prefix literals. */
3244 BOOL last, any, caseless;
3245 int len, repeat, len_save, consumed = 0;
3246 pcre_uint32 chr, mask;
3247 pcre_uchar *alternative, *cc_save, *oc;
3248 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3249 pcre_uchar othercase[8];
3250 #elif defined SUPPORT_UTF && defined COMPILE_PCRE16
3251 pcre_uchar othercase[2];
3252 #else
3253 pcre_uchar othercase[1];
3254 #endif
3255
3256 repeat = 1;
3257 while (TRUE)
3258 {
3259 if (*rec_count == 0)
3260 return 0;
3261 (*rec_count)--;
3262
3263 last = TRUE;
3264 any = FALSE;
3265 caseless = FALSE;
3266
3267 switch (*cc)
3268 {
3269 case OP_CHARI:
3270 caseless = TRUE;
3271 case OP_CHAR:
3272 last = FALSE;
3273 cc++;
3274 break;
3275
3276 case OP_SOD:
3277 case OP_SOM:
3278 case OP_SET_SOM:
3279 case OP_NOT_WORD_BOUNDARY:
3280 case OP_WORD_BOUNDARY:
3281 case OP_EODN:
3282 case OP_EOD:
3283 case OP_CIRC:
3284 case OP_CIRCM:
3285 case OP_DOLL:
3286 case OP_DOLLM:
3287 /* Zero width assertions. */
3288 cc++;
3289 continue;
3290
3291 case OP_ASSERT:
3292 case OP_ASSERT_NOT:
3293 case OP_ASSERTBACK:
3294 case OP_ASSERTBACK_NOT:
3295 cc = bracketend(cc);
3296 continue;
3297
3298 case OP_PLUSI:
3299 case OP_MINPLUSI:
3300 case OP_POSPLUSI:
3301 caseless = TRUE;
3302 case OP_PLUS:
3303 case OP_MINPLUS:
3304 case OP_POSPLUS:
3305 cc++;
3306 break;
3307
3308 case OP_EXACTI:
3309 caseless = TRUE;
3310 case OP_EXACT:
3311 repeat = GET2(cc, 1);
3312 last = FALSE;
3313 cc += 1 + IMM2_SIZE;
3314 break;
3315
3316 case OP_QUERYI:
3317 case OP_MINQUERYI:
3318 case OP_POSQUERYI:
3319 caseless = TRUE;
3320 case OP_QUERY:
3321 case OP_MINQUERY:
3322 case OP_POSQUERY:
3323 len = 1;
3324 cc++;
3325 #ifdef SUPPORT_UTF
3326 if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
3327 #endif
3328 max_chars = scan_prefix(common, cc + len, chars, bytes, max_chars, rec_count);
3329 if (max_chars == 0)
3330 return consumed;
3331 last = FALSE;
3332 break;
3333
3334 case OP_KET:
3335 cc += 1 + LINK_SIZE;
3336 continue;
3337
3338 case OP_ALT:
3339 cc += GET(cc, 1);
3340 continue;
3341
3342 case OP_ONCE:
3343 case OP_ONCE_NC:
3344 case OP_BRA:
3345 case OP_BRAPOS:
3346 case OP_CBRA:
3347 case OP_CBRAPOS:
3348 alternative = cc + GET(cc, 1);
3349 while (*alternative == OP_ALT)
3350 {
3351 max_chars = scan_prefix(common, alternative + 1 + LINK_SIZE, chars, bytes, max_chars, rec_count);
3352 if (max_chars == 0)
3353 return consumed;
3354 alternative += GET(alternative, 1);
3355 }
3356
3357 if (*cc == OP_CBRA || *cc == OP_CBRAPOS)
3358 cc += IMM2_SIZE;
3359 cc += 1 + LINK_SIZE;
3360 continue;
3361
3362 case OP_CLASS:
3363 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3364 if (common->utf && !is_char7_bitset((const pcre_uint8 *)(cc + 1), FALSE)) return consumed;
3365 #endif
3366 any = TRUE;
3367 cc += 1 + 32 / sizeof(pcre_uchar);
3368 break;
3369
3370 case OP_NCLASS:
3371 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3372 if (common->utf) return consumed;
3373 #endif
3374 any = TRUE;
3375 cc += 1 + 32 / sizeof(pcre_uchar);
3376 break;
3377
3378 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3379 case OP_XCLASS:
3380 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3381 if (common->utf) return consumed;
3382 #endif
3383 any = TRUE;
3384 cc += GET(cc, 1);
3385 break;
3386 #endif
3387
3388 case OP_DIGIT:
3389 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3390 if (common->utf && !is_char7_bitset((const pcre_uint8 *)common->ctypes - cbit_length + cbit_digit, FALSE))
3391 return consumed;
3392 #endif
3393 any = TRUE;
3394 cc++;
3395 break;
3396
3397 case OP_WHITESPACE:
3398 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3399 if (common->utf && !is_char7_bitset((const pcre_uint8 *)common->ctypes - cbit_length + cbit_space, FALSE))
3400 return consumed;
3401 #endif
3402 any = TRUE;
3403 cc++;
3404 break;
3405
3406 case OP_WORDCHAR:
3407 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3408 if (common->utf && !is_char7_bitset((const pcre_uint8 *)common->ctypes - cbit_length + cbit_word, FALSE))
3409 return consumed;
3410 #endif
3411 any = TRUE;
3412 cc++;
3413 break;
3414
3415 case OP_NOT:
3416 case OP_NOTI:
3417 cc++;
3418 /* Fall through. */
3419 case OP_NOT_DIGIT:
3420 case OP_NOT_WHITESPACE:
3421 case OP_NOT_WORDCHAR:
3422 case OP_ANY:
3423 case OP_ALLANY:
3424 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3425 if (common->utf) return consumed;
3426 #endif
3427 any = TRUE;
3428 cc++;
3429 break;
3430
3431 #ifdef SUPPORT_UCP
3432 case OP_NOTPROP:
3433 case OP_PROP:
3434 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3435 if (common->utf) return consumed;
3436 #endif
3437 any = TRUE;
3438 cc += 1 + 2;
3439 break;
3440 #endif
3441
3442 case OP_TYPEEXACT:
3443 repeat = GET2(cc, 1);
3444 cc += 1 + IMM2_SIZE;
3445 continue;
3446
3447 case OP_NOTEXACT:
3448 case OP_NOTEXACTI:
3449 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3450 if (common->utf) return consumed;
3451 #endif
3452 any = TRUE;
3453 repeat = GET2(cc, 1);
3454 cc += 1 + IMM2_SIZE + 1;
3455 break;
3456
3457 default:
3458 return consumed;
3459 }
3460
3461 if (any)
3462 {
3463 #if defined COMPILE_PCRE8
3464 mask = 0xff;
3465 #elif defined COMPILE_PCRE16
3466 mask = 0xffff;
3467 #elif defined COMPILE_PCRE32
3468 mask = 0xffffffff;
3469 #else
3470 SLJIT_ASSERT_STOP();
3471 #endif
3472
3473 do
3474 {
3475 chars[0] = mask;
3476 chars[1] = mask;
3477 bytes[0] = 255;
3478
3479 consumed++;
3480 if (--max_chars == 0)
3481 return consumed;
3482 chars += 2;
3483 bytes += MAX_N_BYTES;
3484 }
3485 while (--repeat > 0);
3486
3487 repeat = 1;
3488 continue;
3489 }
3490
3491 len = 1;
3492 #ifdef SUPPORT_UTF
3493 if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
3494 #endif
3495
3496 if (caseless && char_has_othercase(common, cc))
3497 {
3498 #ifdef SUPPORT_UTF
3499 if (common->utf)
3500 {
3501 GETCHAR(chr, cc);
3502 if ((int)PRIV(ord2utf)(char_othercase(common, chr), othercase) != len)
3503 return consumed;
3504 }
3505 else
3506 #endif
3507 {
3508 chr = *cc;
3509 othercase[0] = TABLE_GET(chr, common->fcc, chr);
3510 }
3511 }
3512 else
3513 caseless = FALSE;
3514
3515 len_save = len;
3516 cc_save = cc;
3517 while (TRUE)
3518 {
3519 oc = othercase;
3520 do
3521 {
3522 chr = *cc;
3523 #ifdef COMPILE_PCRE32
3524 if (SLJIT_UNLIKELY(chr == NOTACHAR))
3525 return consumed;
3526 #endif
3527 add_prefix_byte((pcre_uint8)chr, bytes);
3528
3529 mask = 0;
3530 if (caseless)
3531 {
3532 add_prefix_byte((pcre_uint8)*oc, bytes);
3533 mask = *cc ^ *oc;
3534 chr |= mask;
3535 }
3536
3537 #ifdef COMPILE_PCRE32
3538 if (chars[0] == NOTACHAR && chars[1] == 0)
3539 #else
3540 if (chars[0] == NOTACHAR)
3541 #endif
3542 {
3543 chars[0] = chr;
3544 chars[1] = mask;
3545 }
3546 else
3547 {
3548 mask |= chars[0] ^ chr;
3549 chr |= mask;
3550 chars[0] = chr;
3551 chars[1] |= mask;
3552 }
3553
3554 len--;
3555 consumed++;
3556 if (--max_chars == 0)
3557 return consumed;
3558 chars += 2;
3559 bytes += MAX_N_BYTES;
3560 cc++;
3561 oc++;
3562 }
3563 while (len > 0);
3564
3565 if (--repeat == 0)
3566 break;
3567
3568 len = len_save;
3569 cc = cc_save;
3570 }
3571
3572 repeat = 1;
3573 if (last)
3574 return consumed;
3575 }
3576 }
3577
3578 static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common, BOOL firstline)
3579 {
3580 DEFINE_COMPILER;
3581 struct sljit_label *start;
3582 struct sljit_jump *quit;
3583 pcre_uint32 chars[MAX_N_CHARS * 2];
3584 pcre_uint8 bytes[MAX_N_CHARS * MAX_N_BYTES];
3585 pcre_uint8 ones[MAX_N_CHARS];
3586 int offsets[3];
3587 pcre_uint32 mask;
3588 pcre_uint8 *byte_set, *byte_set_end;
3589 int i, max, from;
3590 int range_right = -1, range_len = 3 - 1;
3591 sljit_ub *update_table = NULL;
3592 BOOL in_range;
3593 pcre_uint32 rec_count;
3594
3595 for (i = 0; i < MAX_N_CHARS; i++)
3596 {
3597 chars[i << 1] = NOTACHAR;
3598 chars[(i << 1) + 1] = 0;
3599 bytes[i * MAX_N_BYTES] = 0;
3600 }
3601
3602 rec_count = 10000;
3603 max = scan_prefix(common, common->start, chars, bytes, MAX_N_CHARS, &rec_count);
3604
3605 if (max <= 1)
3606 return FALSE;
3607
3608 for (i = 0; i < max; i++)
3609 {
3610 mask = chars[(i << 1) + 1];
3611 ones[i] = ones_in_half_byte[mask & 0xf];
3612 mask >>= 4;
3613 while (mask != 0)
3614 {
3615 ones[i] += ones_in_half_byte[mask & 0xf];
3616 mask >>= 4;
3617 }
3618 }
3619
3620 in_range = FALSE;
3621 from = 0; /* Prevent compiler "uninitialized" warning */
3622 for (i = 0; i <= max; i++)
3623 {
3624 if (in_range && (i - from) > range_len && (bytes[(i - 1) * MAX_N_BYTES] <= 4))
3625 {
3626 range_len = i - from;
3627 range_right = i - 1;
3628 }
3629
3630 if (i < max && bytes[i * MAX_N_BYTES] < 255)
3631 {
3632 if (!in_range)
3633 {
3634 in_range = TRUE;
3635 from = i;
3636 }
3637 }
3638 else if (in_range)
3639 in_range = FALSE;
3640 }
3641
3642 if (range_right >= 0)
3643 {
3644 update_table = (sljit_ub *)allocate_read_only_data(common, 256);
3645 if (update_table == NULL)
3646 return TRUE;
3647 memset(update_table, IN_UCHARS(range_len), 256);
3648
3649 for (i = 0; i < range_len; i++)
3650 {
3651 byte_set = bytes + ((range_right - i) * MAX_N_BYTES);
3652 SLJIT_ASSERT(byte_set[0] > 0 && byte_set[0] < 255);
3653 byte_set_end = byte_set + byte_set[0];
3654 byte_set++;
3655 while (byte_set <= byte_set_end)
3656 {
3657 if (update_table[*byte_set] > IN_UCHARS(i))
3658 update_table[*byte_set] = IN_UCHARS(i);
3659 byte_set++;
3660 }
3661 }
3662 }
3663
3664 offsets[0] = -1;
3665 /* Scan forward. */
3666 for (i = 0; i < max; i++)
3667 if (ones[i] <= 2) {
3668 offsets[0] = i;
3669 break;
3670 }
3671
3672 if (offsets[0] < 0 && range_right < 0)
3673 return FALSE;
3674
3675 if (offsets[0] >= 0)
3676 {
3677 /* Scan backward. */
3678 offsets[1] = -1;
3679 for (i = max - 1; i > offsets[0]; i--)
3680 if (ones[i] <= 2 && i != range_right)
3681 {
3682 offsets[1] = i;
3683 break;
3684 }
3685
3686 /* This case is handled better by fast_forward_first_char. */
3687 if (offsets[1] == -1 && offsets[0] == 0 && range_right < 0)
3688 return FALSE;
3689
3690 offsets[2] = -1;
3691 /* We only search for a middle character if there is no range check. */
3692 if (offsets[1] >= 0 && range_right == -1)
3693 {
3694 /* Scan from middle. */
3695 for (i = (offsets[0] + offsets[1]) / 2 + 1; i < offsets[1]; i++)
3696 if (ones[i] <= 2)
3697 {
3698 offsets[2] = i;
3699 break;
3700 }
3701
3702 if (offsets[2] == -1)
3703 {
3704 for (i = (offsets[0] + offsets[1]) / 2; i > offsets[0]; i--)
3705 if (ones[i] <= 2)
3706 {
3707 offsets[2] = i;
3708 break;
3709 }
3710 }
3711 }
3712
3713 SLJIT_ASSERT(offsets[1] == -1 || (offsets[0] < offsets[1]));
3714 SLJIT_ASSERT(offsets[2] == -1 || (offsets[0] < offsets[2] && offsets[1] > offsets[2]));
3715
3716 chars[0] = chars[offsets[0] << 1];
3717 chars[1] = chars[(offsets[0] << 1) + 1];
3718 if (offsets[2] >= 0)
3719 {
3720 chars[2] = chars[offsets[2] << 1];
3721 chars[3] = chars[(offsets[2] << 1) + 1];
3722 }
3723 if (offsets[1] >= 0)
3724 {
3725 chars[4] = chars[offsets[1] << 1];
3726 chars[5] = chars[(offsets[1] << 1) + 1];
3727 }
3728 }
3729
3730 max -= 1;
3731 if (firstline)
3732 {
3733 SLJIT_ASSERT(common->first_line_end != 0);
3734 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end);
3735 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
3736 OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
3737 quit = CMP(SLJIT_LESS_EQUAL, STR_END, 0, TMP1, 0);
3738 OP1(SLJIT_MOV, STR_END, 0, TMP1, 0);
3739 JUMPHERE(quit);
3740 }
3741 else
3742 OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
3743
3744 #if !(defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
3745 if (range_right >= 0)
3746 OP1(SLJIT_MOV, RETURN_ADDR, 0, SLJIT_IMM, (sljit_sw)update_table);
3747 #endif
3748
3749 start = LABEL();
3750 quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3751
3752 SLJIT_ASSERT(range_right >= 0 || offsets[0] >= 0);
3753
3754 if (range_right >= 0)
3755 {
3756 #if defined COMPILE_PCRE8 || (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN)
3757 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right));
3758 #else
3759 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right + 1) - 1);
3760 #endif
3761
3762 #if !(defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
3763 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(RETURN_ADDR, TMP1), 0);
3764 #else
3765 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)update_table);
3766 #endif
3767 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3768 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, start);
3769 }
3770
3771 if (offsets[0] >= 0)
3772 {
3773 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[0]));
3774 if (offsets[1] >= 0)
3775 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[1]));
3776 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3777
3778 if (chars[1] != 0)
3779 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[1]);
3780 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[0], start);
3781 if (offsets[2] >= 0)
3782 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[2] - 1));
3783
3784 if (offsets[1] >= 0)
3785 {
3786 if (chars[5] != 0)
3787 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, chars[5]);
3788 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, chars[4], start);
3789 }
3790
3791 if (offsets[2] >= 0)
3792 {
3793 if (chars[3] != 0)
3794 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[3]);
3795 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[2], start);
3796 }
3797 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3798 }
3799
3800 JUMPHERE(quit);
3801
3802 if (firstline)
3803 {
3804 if (range_right >= 0)
3805 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end);
3806 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3807 if (range_right >= 0)
3808 {
3809 quit = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
3810 OP1(SLJIT_MOV, STR_PTR, 0, TMP1, 0);
3811 JUMPHERE(quit);
3812 }
3813 }
3814 else
3815 OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
3816 return TRUE;
3817 }
3818
3819 #undef MAX_N_CHARS
3820 #undef MAX_N_BYTES
3821
3822 static SLJIT_INLINE void fast_forward_first_char(compiler_common *common, pcre_uchar first_char, BOOL caseless, BOOL firstline)
3823 {
3824 DEFINE_COMPILER;
3825 struct sljit_label *start;
3826 struct sljit_jump *quit;
3827 struct sljit_jump *found;
3828 pcre_uchar oc, bit;
3829
3830 if (firstline)
3831 {
3832 SLJIT_ASSERT(common->first_line_end != 0);
3833 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
3834 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end);
3835 }
3836
3837 start = LABEL();
3838 quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3839 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3840
3841 oc = first_char;
3842 if (caseless)
3843 {
3844 oc = TABLE_GET(first_char, common->fcc, first_char);
3845 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
3846 if (first_char > 127 && common->utf)
3847 oc = UCD_OTHERCASE(first_char);
3848 #endif
3849 }
3850 if (first_char == oc)
3851 found = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, first_char);
3852 else
3853 {
3854 bit = first_char ^ oc;
3855 if (is_powerof2(bit))
3856 {
3857 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, bit);
3858 found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, first_char | bit);
3859 }
3860 else
3861 {
3862 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, first_char);
3863 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
3864 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);
3865 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
3866 found = JUMP(SLJIT_NOT_ZERO);
3867 }
3868 }
3869
3870 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3871 JUMPTO(SLJIT_JUMP, start);
3872 JUMPHERE(found);
3873 JUMPHERE(quit);
3874
3875 if (firstline)
3876 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3877 }
3878
3879 static SLJIT_INLINE void fast_forward_newline(compiler_common *common, BOOL firstline)
3880 {
3881 DEFINE_COMPILER;
3882 struct sljit_label *loop;
3883 struct sljit_jump *lastchar;
3884 struct sljit_jump *firstchar;
3885 struct sljit_jump *quit;
3886 struct sljit_jump *foundcr = NULL;
3887 struct sljit_jump *notfoundnl;
3888 jump_list *newline = NULL;
3889
3890 if (firstline)
3891 {
3892 SLJIT_ASSERT(common->first_line_end != 0);
3893 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
3894 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end);
3895 }
3896
3897 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3898 {
3899 lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3900 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3901 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
3902 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
3903 firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
3904
3905 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2));
3906 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0);
3907 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_GREATER_EQUAL);
3908 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3909 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
3910 #endif
3911 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3912
3913 loop = LABEL();
3914 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3915 quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3916 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
3917 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
3918 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop);
3919 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop);
3920
3921 JUMPHERE(quit);
3922 JUMPHERE(firstchar);
3923 JUMPHERE(lastchar);
3924
3925 if (firstline)
3926 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3927 return;
3928 }
3929
3930 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3931 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
3932 firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
3933 skip_char_back(common);
3934
3935 loop = LABEL();
3936 common->ff_newline_shortcut = loop;
3937
3938 read_char_range(common, common->nlmin, common->nlmax, TRUE);
3939 lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3940 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
3941 foundcr = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
3942 check_newlinechar(common, common->nltype, &newline, FALSE);
3943 set_jumps(newline, loop);
3944
3945 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
3946 {
3947 quit = JUMP(SLJIT_JUMP);
3948 JUMPHERE(foundcr);
3949 notfoundnl = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3950 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3951 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
3952 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
3953 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3954 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
3955 #endif
3956 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3957 JUMPHERE(notfoundnl);
3958 JUMPHERE(quit);
3959 }
3960 JUMPHERE(lastchar);
3961 JUMPHERE(firstchar);
3962
3963 if (firstline)
3964 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3965 }
3966
3967 static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks);
3968
3969 static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, pcre_uint8 *start_bits, BOOL firstline)
3970 {
3971 DEFINE_COMPILER;
3972 struct sljit_label *start;
3973 struct sljit_jump *quit;
3974 struct sljit_jump *found = NULL;
3975 jump_list *matches = NULL;
3976 #ifndef COMPILE_PCRE8
3977 struct sljit_jump *jump;
3978 #endif
3979
3980 if (firstline)
3981 {
3982 SLJIT_ASSERT(common->first_line_end != 0);
3983 OP1(SLJIT_MOV, RETURN_ADDR, 0, STR_END, 0);
3984 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end);
3985 }
3986
3987 start = LABEL();
3988 quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3989 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3990 #ifdef SUPPORT_UTF
3991 if (common->utf)
3992 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
3993 #endif
3994
3995 if (!check_class_ranges(common, start_bits, (start_bits[31] & 0x80) != 0, TRUE, &matches))
3996 {
3997 #ifndef COMPILE_PCRE8
3998 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 255);
3999 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 255);
4000 JUMPHERE(jump);
4001 #endif
4002 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
4003 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
4004 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)start_bits);
4005 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4006 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4007 found = JUMP(SLJIT_NOT_ZERO);
4008 }
4009
4010 #ifdef SUPPORT_UTF
4011 if (common->utf)
4012 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
4013 #endif
4014 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4015 #ifdef SUPPORT_UTF
4016 #if defined COMPILE_PCRE8
4017 if (common->utf)
4018 {
4019 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start);
4020 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4021 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4022 }
4023 #elif defined COMPILE_PCRE16
4024 if (common->utf)
4025 {
4026 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800, start);
4027 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
4028 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4029 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
4030 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4031 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4032 }
4033 #endif /* COMPILE_PCRE[8|16] */
4034 #endif /* SUPPORT_UTF */
4035 JUMPTO(SLJIT_JUMP, start);
4036 if (found != NULL)
4037 JUMPHERE(found);
4038 if (matches != NULL)
4039 set_jumps(matches, LABEL());
4040 JUMPHERE(quit);
4041
4042 if (firstline)
4043 OP1(SLJIT_MOV, STR_END, 0, RETURN_ADDR, 0);
4044 }
4045
4046 static SLJIT_INLINE struct sljit_jump *search_requested_char(compiler_common *common, pcre_uchar req_char, BOOL caseless, BOOL has_firstchar)
4047 {
4048 DEFINE_COMPILER;
4049 struct sljit_label *loop;
4050 struct sljit_jump *toolong;
4051 struct sljit_jump *alreadyfound;
4052 struct sljit_jump *found;
4053 struct sljit_jump *foundoc = NULL;
4054 struct sljit_jump *notfound;
4055 pcre_uint32 oc, bit;
4056
4057 SLJIT_ASSERT(common->req_char_ptr != 0);
4058 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr);
4059 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, REQ_BYTE_MAX);
4060 toolong = CMP(SLJIT_LESS, TMP1, 0, STR_END, 0);
4061 alreadyfound = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0);
4062
4063 if (has_firstchar)
4064 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4065 else
4066 OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);
4067
4068 loop = LABEL();
4069 notfound = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0);
4070
4071 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0);
4072 oc = req_char;
4073 if (caseless)
4074 {
4075 oc = TABLE_GET(req_char, common->fcc, req_char);
4076 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
4077 if (req_char > 127 && common->utf)
4078 oc = UCD_OTHERCASE(req_char);
4079 #endif
4080 }
4081 if (req_char == oc)
4082 found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
4083 else
4084 {
4085 bit = req_char ^ oc;
4086 if (is_powerof2(bit))
4087 {
4088 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);
4089 found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit);
4090 }
4091 else
4092 {
4093 found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
4094 foundoc = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, oc);
4095 }
4096 }
4097 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
4098 JUMPTO(SLJIT_JUMP, loop);
4099
4100 JUMPHERE(found);
4101 if (foundoc)
4102 JUMPHERE(foundoc);
4103 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr, TMP1, 0);
4104 JUMPHERE(alreadyfound);
4105 JUMPHERE(toolong);
4106 return notfound;
4107 }
4108
4109 static void do_revertframes(compiler_common *common)
4110 {
4111 DEFINE_COMPILER;
4112 struct sljit_jump *jump;
4113 struct sljit_label *mainloop;
4114
4115 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4116 OP1(SLJIT_MOV, TMP1, 0, STACK_TOP, 0);
4117 GET_LOCAL_BASE(TMP3, 0, 0);
4118
4119 /* Drop frames until we reach STACK_TOP. */
4120 mainloop = LABEL();
4121 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), 0);
4122 OP2(SLJIT_SUB | SLJIT_SET_S, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0);
4123 jump = JUMP(SLJIT_SIG_LESS_EQUAL);
4124
4125 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
4126 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw));
4127 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), SLJIT_MEM1(TMP1), 2 * sizeof(sljit_sw));
4128 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
4129 JUMPTO(SLJIT_JUMP, mainloop);
4130
4131 JUMPHERE(jump);
4132 jump = JUMP(SLJIT_SIG_LESS);
4133 /* End of dropping frames. */
4134 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4135
4136 JUMPHERE(jump);
4137 OP1(SLJIT_NEG, TMP2, 0, TMP2, 0);
4138 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
4139 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw));
4140 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_sw));
4141 JUMPTO(SLJIT_JUMP, mainloop);
4142 }
4143
4144 static void check_wordboundary(compiler_common *common)
4145 {
4146 DEFINE_COMPILER;
4147 struct sljit_jump *skipread;
4148 jump_list *skipread_list = NULL;
4149 #if !(defined COMPILE_PCRE8) || defined SUPPORT_UTF
4150 struct sljit_jump *jump;
4151 #endif
4152
4153 SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16);
4154
4155 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
4156 /* Get type of the previous char, and put it to LOCALS1. */
4157 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4158 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4159 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, SLJIT_IMM, 0);
4160 skipread = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
4161 skip_char_back(common);
4162 check_start_used_ptr(common);
4163 read_char(common);
4164
4165 /* Testing char type. */
4166 #ifdef SUPPORT_UCP
4167 if (common->use_ucp)
4168 {
4169 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
4170 jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
4171 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4172 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
4173 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
4174 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
4175 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
4176 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
4177 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
4178 JUMPHERE(jump);
4179 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP2, 0);
4180 }
4181 else
4182 #endif
4183 {
4184 #ifndef COMPILE_PCRE8
4185 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4186 #elif defined SUPPORT_UTF
4187 /* Here LOCALS1 has already been zeroed. */
4188 jump = NULL;
4189 if (common->utf)
4190 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4191 #endif /* COMPILE_PCRE8 */
4192 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);
4193 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */);
4194 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4195 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP1, 0);
4196 #ifndef COMPILE_PCRE8
4197 JUMPHERE(jump);
4198 #elif defined SUPPORT_UTF
4199 if (jump != NULL)
4200 JUMPHERE(jump);
4201 #endif /* COMPILE_PCRE8 */
4202 }
4203 JUMPHERE(skipread);
4204
4205 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
4206 check_str_end(common, &skipread_list);
4207 peek_char(common, READ_CHAR_MAX);
4208
4209 /* Testing char type. This is a code duplication. */
4210 #ifdef SUPPORT_UCP
4211 if (common->use_ucp)
4212 {
4213 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
4214 jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
4215 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4216 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
4217 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
4218 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
4219 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
4220 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
4221 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
4222 JUMPHERE(jump);
4223 }
4224 else
4225 #endif
4226 {
4227 #ifndef COMPILE_PCRE8
4228 /* TMP2 may be destroyed by peek_char. */
4229 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
4230 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4231 #elif defined SUPPORT_UTF
4232 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
4233 jump = NULL;
4234 if (common->utf)
4235 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4236 #endif
4237 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes);
4238 OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */);
4239 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
4240 #ifndef COMPILE_PCRE8
4241 JUMPHERE(jump);
4242 #elif defined SUPPORT_UTF
4243 if (jump != NULL)
4244 JUMPHERE(jump);
4245 #endif /* COMPILE_PCRE8 */
4246 }
4247 set_jumps(skipread_list, LABEL());
4248
4249 OP2(SLJIT_XOR | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
4250 sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
4251 }
4252
4253 static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
4254 {
4255 DEFINE_COMPILER;
4256 int ranges[MAX_RANGE_SIZE];
4257 pcre_uint8 bit, cbit, all;
4258 int i, byte, length = 0;
4259
4260 bit = bits[0] & 0x1;
4261 /* All bits will be zero or one (since bit is zero or one). */
4262 all = -bit;
4263
4264 for (i = 0; i < 256; )
4265 {
4266 byte = i >> 3;
4267 if ((i & 0x7) == 0 && bits[byte] == all)
4268 i += 8;
4269 else
4270 {
4271 cbit = (bits[byte] >> (i & 0x7)) & 0x1;
4272 if (cbit != bit)
4273 {
4274 if (length >= MAX_RANGE_SIZE)
4275 return FALSE;
4276 ranges[length] = i;
4277 length++;
4278 bit = cbit;
4279 all = -cbit;
4280 }
4281 i++;
4282 }
4283 }
4284
4285 if (((bit == 0) && nclass) || ((bit == 1) && !nclass))
4286 {
4287 if (length >= MAX_RANGE_SIZE)
4288 return FALSE;
4289 ranges[length] = 256;
4290 length++;
4291 }
4292
4293 if (length < 0 || length > 4)
4294 return FALSE;
4295
4296 bit = bits[0] & 0x1;
4297 if (invert) bit ^= 0x1;
4298
4299 /* No character is accepted. */
4300 if (length == 0 && bit == 0)
4301 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4302
4303 switch(length)
4304 {
4305 case 0:
4306 /* When bit != 0, all characters are accepted. */
4307 return TRUE;
4308
4309 case 1:
4310 add_jump(compiler, backtracks, CMP(bit == 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
4311 return TRUE;
4312
4313 case 2:
4314 if (ranges[0] + 1 != ranges[1])
4315 {
4316 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
4317 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
4318 }
4319 else
4320 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
4321 return TRUE;
4322
4323 case 3:
4324 if (bit != 0)
4325 {
4326 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
4327 if (ranges[0] + 1 != ranges[1])
4328 {
4329 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
4330 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
4331 }
4332 else
4333 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
4334 return TRUE;
4335 }
4336
4337 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[0]));
4338 if (ranges[1] + 1 != ranges[2])
4339 {
4340 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1]);
4341 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
4342 }
4343 else
4344 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1]));
4345 return TRUE;
4346
4347 case 4:
4348 if ((ranges[1] - ranges[0]) == (ranges[3] - ranges[2])
4349 && (ranges[0] | (ranges[2] - ranges[0])) == ranges[2]
4350 && (ranges[1] & (ranges[2] - ranges[0])) == 0
4351 && is_powerof2(ranges[2] - ranges[0]))
4352 {
4353 SLJIT_ASSERT((ranges[0] & (ranges[2] - ranges[0])) == 0 && (ranges[2] & ranges[3] & (ranges[2] - ranges[0])) != 0);
4354 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[0]);
4355 if (ranges[2] + 1 != ranges[3])
4356 {
4357 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);
4358 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
4359 }
4360 else
4361 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
4362 return TRUE;
4363 }
4364
4365 if (bit != 0)
4366 {
4367 i = 0;
4368 if (ranges[0] + 1 != ranges[1])
4369 {
4370 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
4371 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
4372 i = ranges[0];
4373 }
4374 else
4375 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
4376
4377 if (ranges[2] + 1 != ranges[3])
4378 {
4379 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - i);
4380 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
4381 }
4382 else
4383 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2] - i));
4384 return TRUE;
4385 }
4386
4387 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
4388 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[0]));
4389 if (ranges[1] + 1 != ranges[2])
4390 {
4391 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]);
4392 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
4393 }
4394 else
4395 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
4396 return TRUE;
4397
4398 default:
4399 SLJIT_ASSERT_STOP();
4400 return FALSE;
4401 }
4402 }
4403
4404 static void check_anynewline(compiler_common *common)
4405 {
4406 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
4407 DEFINE_COMPILER;
4408
4409 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4410
4411 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
4412 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
4413 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
4414 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
4415 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4416 #ifdef COMPILE_PCRE8
4417 if (common->utf)
4418 {
4419 #endif
4420 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4421 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
4422 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
4423 #ifdef COMPILE_PCRE8
4424 }
4425 #endif
4426 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
4427 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4428 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4429 }
4430
4431 static void check_hspace(compiler_common *common)
4432 {
4433 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
4434 DEFINE_COMPILER;
4435
4436 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4437
4438 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x09);
4439 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
4440 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
4441 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4442 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xa0);
4443 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4444 #ifdef COMPILE_PCRE8
4445 if (common->utf)
4446 {
4447 #endif
4448 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4449 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x1680);
4450 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4451 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e);
4452 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4453 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000);
4454 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000);
4455 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
4456 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000);
4457 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4458 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000);
4459 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4460 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000);
4461 #ifdef COMPILE_PCRE8
4462 }
4463 #endif
4464 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
4465 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4466
4467 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4468 }
4469
4470 static void check_vspace(compiler_common *common)
4471 {
4472 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
4473 DEFINE_COMPILER;
4474
4475 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4476
4477 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
4478 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
4479 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
4480 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
4481 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4482 #ifdef COMPILE_PCRE8
4483 if (common->utf)
4484 {
4485 #endif
4486 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4487 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
4488 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
4489 #ifdef COMPILE_PCRE8
4490 }
4491 #endif
4492 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
4493 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4494
4495 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4496 }
4497
4498 #define CHAR1 STR_END
4499 #define CHAR2 STACK_TOP
4500
4501 static void do_casefulcmp(compiler_common *common)
4502 {
4503 DEFINE_COMPILER;
4504 struct sljit_jump *jump;
4505 struct sljit_label *label;
4506
4507 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4508 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4509 OP1(SLJIT_MOV, TMP3, 0, CHAR1, 0);
4510 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, CHAR2, 0);
4511 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
4512 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4513
4514 label = LABEL();
4515 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
4516 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4517 jump = CMP(SLJIT_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
4518 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
4519 JUMPTO(SLJIT_NOT_ZERO, label);
4520
4521 JUMPHERE(jump);
4522 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4523 OP1(SLJIT_MOV, CHAR1, 0, TMP3, 0);
4524 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
4525 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4526 }
4527
4528 #define LCC_TABLE STACK_LIMIT
4529
4530 static void do_caselesscmp(compiler_common *common)
4531 {
4532 DEFINE_COMPILER;
4533 struct sljit_jump *jump;
4534 struct sljit_label *label;
4535
4536 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4537 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4538
4539 OP1(SLJIT_MOV, TMP3, 0, LCC_TABLE, 0);
4540 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, CHAR1, 0);
4541 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, CHAR2, 0);
4542 OP1(SLJIT_MOV, LCC_TABLE, 0, SLJIT_IMM, common->lcc);
4543 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
4544 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4545
4546 label = LABEL();
4547 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
4548 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4549 #ifndef COMPILE_PCRE8
4550 jump = CMP(SLJIT_GREATER, CHAR1, 0, SLJIT_IMM, 255);
4551 #endif
4552 OP1(SLJIT_MOV_UB, CHAR1, 0, SLJIT_MEM2(LCC_TABLE, CHAR1), 0);
4553 #ifndef COMPILE_PCRE8
4554 JUMPHERE(jump);
4555 jump = CMP(SLJIT_GREATER, CHAR2, 0, SLJIT_IMM, 255);
4556 #endif
4557 OP1(SLJIT_MOV_UB, CHAR2, 0, SLJIT_MEM2(LCC_TABLE, CHAR2), 0);
4558 #ifndef COMPILE_PCRE8
4559 JUMPHERE(jump);
4560 #endif
4561 jump = CMP(SLJIT_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
4562 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
4563 JUMPTO(SLJIT_NOT_ZERO, label);
4564
4565 JUMPHERE(jump);
4566 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4567 OP1(SLJIT_MOV, LCC_TABLE, 0, TMP3, 0);
4568 OP1(SLJIT_MOV, CHAR1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
4569 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
4570 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4571 }
4572
4573 #undef LCC_TABLE
4574 #undef CHAR1
4575 #undef CHAR2
4576
4577 #if defined SUPPORT_UTF && defined SUPPORT_UCP
4578
4579 static const pcre_uchar * SLJIT_CALL do_utf_caselesscmp(pcre_uchar *src1, jit_arguments *args, pcre_uchar *end1)
4580 {
4581 /* This function would be ineffective to do in JIT level. */
4582 pcre_uint32 c1, c2;
4583 const pcre_uchar *src2 = args->uchar_ptr;
4584 const pcre_uchar *end2 = args->end;
4585 const ucd_record *ur;
4586 const pcre_uint32 *pp;
4587
4588 while (src1 < end1)
4589 {
4590 if (src2 >= end2)
4591 return (pcre_uchar*)1;
4592 GETCHARINC(c1, src1);
4593 GETCHARINC(c2, src2);
4594 ur = GET_UCD(c2);
4595 if (c1 != c2 && c1 != c2 + ur->other_case)
4596 {
4597 pp = PRIV(ucd_caseless_sets) + ur->caseset;
4598 for (;;)
4599 {
4600 if (c1 < *pp) return NULL;
4601 if (c1 == *pp++) break;
4602 }
4603 }
4604 }
4605 return src2;
4606 }
4607
4608 #endif /* SUPPORT_UTF && SUPPORT_UCP */
4609
4610 static pcre_uchar *byte_sequence_compare(compiler_common *common, BOOL caseless, pcre_uchar *cc,
4611 compare_context *context, jump_list **backtracks)
4612 {
4613 DEFINE_COMPILER;
4614 unsigned int othercasebit = 0;
4615 pcre_uchar *othercasechar = NULL;
4616 #ifdef SUPPORT_UTF
4617 int utflength;
4618 #endif
4619
4620 if (caseless && char_has_othercase(common, cc))
4621 {
4622 othercasebit = char_get_othercase_bit(common, cc);
4623 SLJIT_ASSERT(othercasebit);
4624 /* Extracting bit difference info. */
4625 #if defined COMPILE_PCRE8
4626 othercasechar = cc + (othercasebit >> 8);
4627 othercasebit &= 0xff;
4628 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4629 /* Note that this code only handles characters in the BMP. If there
4630 ever are characters outside the BMP whose othercase differs in only one
4631 bit from itself (there currently are none), this code will need to be
4632 revised for COMPILE_PCRE32. */
4633 othercasechar = cc + (othercasebit >> 9);
4634 if ((othercasebit & 0x100) != 0)
4635 othercasebit = (othercasebit & 0xff) << 8;
4636 else
4637 othercasebit &= 0xff;
4638 #endif /* COMPILE_PCRE[8|16|32] */
4639 }
4640
4641 if (context->sourcereg == -1)
4642 {
4643 #if defined COMPILE_PCRE8
4644 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
4645 if (context->length >= 4)
4646 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4647 else if (context->length >= 2)
4648 OP1(SLJIT_MOV_UH, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4649 else
4650 #endif
4651 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4652 #elif defined COMPILE_PCRE16
4653 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
4654 if (context->length >= 4)
4655 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4656 else
4657 #endif
4658 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4659 #elif defined COMPILE_PCRE32
4660 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4661 #endif /* COMPILE_PCRE[8|16|32] */
4662 context->sourcereg = TMP2;
4663 }
4664
4665 #ifdef SUPPORT_UTF
4666 utflength = 1;
4667 if (common->utf && HAS_EXTRALEN(*cc))
4668 utflength += GET_EXTRALEN(*cc);
4669
4670 do
4671 {
4672 #endif
4673
4674 context->length -= IN_UCHARS(1);
4675 #if (defined SLJIT_UNALIGNED && SLJIT_UNALIGNED) && (defined COMPILE_PCRE8 || defined COMPILE_PCRE16)
4676
4677 /* Unaligned read is supported. */
4678 if (othercasebit != 0 && othercasechar == cc)
4679 {
4680 context->c.asuchars[context->ucharptr] = *cc | othercasebit;
4681 context->oc.asuchars[context->ucharptr] = othercasebit;
4682 }
4683 else
4684 {
4685 context->c.asuchars[context->ucharptr] = *cc;
4686 context->oc.asuchars[context->ucharptr] = 0;
4687 }
4688 context->ucharptr++;
4689
4690 #if defined COMPILE_PCRE8
4691 if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1))
4692 #else
4693 if (context->ucharptr >= 2 || context->length == 0)
4694 #endif
4695 {
4696 if (context->length >= 4)
4697 OP1(SLJIT_MOV_SI, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
4698 else if (context->length >= 2)
4699 OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
4700 #if defined COMPILE_PCRE8
4701 else if (context->length >= 1)
4702 OP1(SLJIT_MOV_UB, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
4703 #endif /* COMPILE_PCRE8 */
4704 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
4705
4706 switch(context->ucharptr)
4707 {
4708 case 4 / sizeof(pcre_uchar):
4709 if (context->oc.asint != 0)
4710 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint);
4711 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint));
4712 break;
4713
4714 case 2 / sizeof(pcre_uchar):
4715 if (context->oc.asushort != 0)
4716 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort);
4717 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort));
4718 break;
4719
4720 #ifdef COMPILE_PCRE8
4721 case 1:
4722 if (context->oc.asbyte != 0)
4723 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte);
4724 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte));
4725 break;
4726 #endif
4727
4728 default:
4729 SLJIT_ASSERT_STOP();
4730 break;
4731 }
4732 context->ucharptr = 0;
4733 }
4734
4735 #else
4736
4737 /* Unaligned read is unsupported or in 32 bit mode. */
4738 if (context->length >= 1)
4739 OP1(MOV_UCHAR, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
4740
4741 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
4742
4743 if (othercasebit != 0 && othercasechar == cc)
4744 {
4745 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit);
4746 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit));
4747 }
4748 else
4749 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc));
4750
4751 #endif
4752
4753 cc++;
4754 #ifdef SUPPORT_UTF
4755 utflength--;
4756 }
4757 while (utflength > 0);
4758 #endif
4759
4760 return cc;
4761 }
4762
4763 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
4764
4765 #define SET_TYPE_OFFSET(value) \
4766 if ((value) != typeoffset) \
4767 { \
4768 if ((value) < typeoffset) \
4769 OP2(SLJIT_ADD, typereg, 0, typereg, 0, SLJIT_IMM, typeoffset - (value)); \
4770 else \
4771 OP2(SLJIT_SUB, typereg, 0, typereg, 0, SLJIT_IMM, (value) - typeoffset); \
4772 } \
4773 typeoffset = (value);
4774
4775 #define SET_CHAR_OFFSET(value) \
4776 if ((value) != charoffset) \
4777 { \
4778 if ((value) < charoffset) \
4779 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(charoffset - (value))); \
4780 else \
4781 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)((value) - charoffset)); \
4782 } \
4783 charoffset = (value);
4784
4785 static pcre_uchar *compile_char1_matchingpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **backtracks, BOOL check_str_ptr);
4786
4787 static void compile_xclass_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
4788 {
4789 DEFINE_COMPILER;
4790 jump_list *found = NULL;
4791 jump_list **list = (cc[0] & XCL_NOT) == 0 ? &found : backtracks;
4792 sljit_uw c, charoffset, max = 256, min = READ_CHAR_MAX;
4793 struct sljit_jump *jump = NULL;
4794 pcre_uchar *ccbegin;
4795 int compares, invertcmp, numberofcmps;
4796 #if defined SUPPORT_UTF && (defined COMPILE_PCRE8 || defined COMPILE_PCRE16)
4797 BOOL utf = common->utf;
4798 #endif
4799
4800 #ifdef SUPPORT_UCP
4801 BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE;
4802 BOOL charsaved = FALSE;
4803 int typereg = TMP1;
4804 const sljit_ui *other_cases;
4805 sljit_uw typeoffset;
4806 #endif
4807
4808 /* Scanning the necessary info. */
4809 cc++;
4810 ccbegin = cc;
4811 compares = 0;
4812 if (cc[-1] & XCL_MAP)
4813 {
4814 min = 0;
4815 cc += 32 / sizeof(pcre_uchar);
4816 }
4817
4818 while (*cc != XCL_END)
4819 {
4820 compares++;
4821 if (*cc == XCL_SINGLE)
4822 {
4823 cc ++;
4824 GETCHARINCTEST(c, cc);
4825 if (c > max) max = c;
4826 if (c < min) min = c;
4827 #ifdef SUPPORT_UCP
4828 needschar = TRUE;
4829 #endif
4830 }
4831 else if (*cc == XCL_RANGE)
4832 {
4833 cc ++;
4834 GETCHARINCTEST(c, cc);
4835 if (c < min) min = c;
4836 GETCHARINCTEST(c, cc);
4837 if (c > max) max = c;
4838 #ifdef SUPPORT_UCP
4839 needschar = TRUE;
4840 #endif
4841 }
4842 #ifdef SUPPORT_UCP
4843 else
4844 {
4845 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
4846 cc++;
4847 if (*cc == PT_CLIST)
4848 {
4849 other_cases = PRIV(ucd_caseless_sets) + cc[1];
4850 while (*other_cases != NOTACHAR)
4851 {
4852 if (*other_cases > max) max = *other_cases;
4853 if (*other_cases < min) min = *other_cases;
4854 other_cases++;
4855 }
4856 }
4857 else
4858 {
4859 max = READ_CHAR_MAX;
4860 min = 0;
4861 }
4862
4863 switch(*cc)
4864 {
4865 case PT_ANY:
4866 /* Any either accepts everything or ignored. */
4867 if (cc[-1] == XCL_PROP)
4868 {
4869 compile_char1_matchingpath(common, OP_ALLANY, cc, backtracks, FALSE);
4870 if (list == backtracks)
4871 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4872 return;
4873 }
4874 break;
4875
4876 case PT_LAMP:
4877 case PT_GC:
4878 case PT_PC:
4879 case PT_ALNUM:
4880 needstype = TRUE;
4881 break;
4882
4883 case PT_SC:
4884 needsscript = TRUE;
4885 break;
4886
4887 case PT_SPACE:
4888 case PT_PXSPACE:
4889 case PT_WORD:
4890 case PT_PXGRAPH:
4891 case PT_PXPRINT:
4892 case PT_PXPUNCT:
4893 needstype = TRUE;
4894 needschar = TRUE;
4895 break;
4896
4897 case PT_CLIST:
4898 case PT_UCNC:
4899 needschar = TRUE;
4900 break;
4901
4902 default:
4903 SLJIT_ASSERT_STOP();
4904 break;
4905 }
4906 cc += 2;
4907 }
4908 #endif
4909 }
4910 SLJIT_ASSERT(compares > 0);
4911
4912 /* We are not necessary in utf mode even in 8 bit mode. */
4913 cc = ccbegin;
4914 read_char_range(common, min, max, (cc[-1] & XCL_NOT) != 0);
4915
4916 if ((cc[-1] & XCL_HASPROP) == 0)
4917 {
4918 if ((cc[-1] & XCL_MAP) != 0)
4919 {
4920 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4921 if (!check_class_ranges(common, (const sljit_ub *)cc, (((const sljit_ub *)cc)[31] & 0x80) != 0, TRUE, &found))
4922 {
4923 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
4924 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
4925 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
4926 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4927 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4928 add_jump(compiler, &found, JUMP(SLJIT_NOT_ZERO));
4929 }
4930
4931 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4932 JUMPHERE(jump);
4933
4934 cc += 32 / sizeof(pcre_uchar);
4935 }
4936 else
4937 {
4938 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, min);
4939 add_jump(compiler, (cc[-1] & XCL_NOT) == 0 ? backtracks : &found, CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, max - min));
4940 }
4941 }
4942 else if ((cc[-1] & XCL_MAP) != 0)
4943 {
4944 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
4945 #ifdef SUPPORT_UCP
4946 charsaved = TRUE;
4947 #endif
4948 if (!check_class_ranges(common, (const pcre_uint8 *)cc, FALSE, TRUE, list))
4949 {
4950 #ifdef COMPILE_PCRE8
4951 jump = NULL;
4952 if (common->utf)
4953 #endif
4954 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4955
4956 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
4957 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
4958 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
4959 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4960 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4961 add_jump(compiler, list, JUMP(SLJIT_NOT_ZERO));
4962
4963 #ifdef COMPILE_PCRE8
4964 if (common->utf)
4965 #endif
4966 JUMPHERE(jump);
4967 }
4968
4969 OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
4970 cc += 32 / sizeof(pcre_uchar);
4971 }
4972
4973 #ifdef SUPPORT_UCP
4974 if (needstype || needsscript)
4975 {
4976 if (needschar && !charsaved)
4977 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
4978
4979 OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
4980 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
4981 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
4982 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
4983 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
4984 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
4985 OP1(SLJIT_MOV_UH, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
4986
4987 /* Before anything else, we deal with scripts. */
4988 if (needsscript)
4989 {
4990 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
4991 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
4992
4993 ccbegin = cc;
4994
4995 while (*cc != XCL_END)
4996 {
4997 if (*cc == XCL_SINGLE)
4998 {
4999 cc ++;
5000 GETCHARINCTEST(c, cc);
5001 }
5002 else if (*cc == XCL_RANGE)
5003 {
5004 cc ++;
5005 GETCHARINCTEST(c, cc);
5006 GETCHARINCTEST(c, cc);
5007 }
5008 else
5009 {
5010 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
5011 cc++;
5012 if (*cc == PT_SC)
5013 {
5014 compares--;
5015 invertcmp = (compares == 0 && list != backtracks);
5016 if (cc[-1] == XCL_NOTPROP)
5017 invertcmp ^= 0x1;
5018 jump = CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (int)cc[1]);
5019 add_jump(compiler, compares > 0 ? list : backtracks, jump);
5020 }
5021 cc += 2;
5022 }
5023 }
5024
5025 cc = ccbegin;
5026 }
5027
5028 if (needschar)
5029 {
5030 OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
5031 }
5032
5033 if (needstype)
5034 {
5035 if (!needschar)
5036 {
5037 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
5038 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
5039 }
5040 else
5041 {
5042 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
5043 OP1(SLJIT_MOV_UB, RETURN_ADDR, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
5044 typereg = RETURN_ADDR;
5045 }
5046 }
5047 }
5048 #endif
5049
5050 /* Generating code. */
5051 charoffset = 0;
5052 numberofcmps = 0;
5053 #ifdef SUPPORT_UCP
5054 typeoffset = 0;
5055 #endif
5056
5057 while (*cc != XCL_END)
5058 {
5059 compares--;
5060 invertcmp = (compares == 0 && list != backtracks);
5061 jump = NULL;
5062
5063 if (*cc == XCL_SINGLE)
5064 {
5065 cc ++;
5066 GETCHARINCTEST(c, cc);
5067
5068 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
5069 {
5070 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5071 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_EQUAL);
5072 numberofcmps++;
5073 }
5074 else if (numberofcmps > 0)
5075 {
5076 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5077 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5078 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5079 numberofcmps = 0;
5080 }
5081 else
5082 {
5083 jump = CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5084 numberofcmps = 0;
5085 }
5086 }
5087 else if (*cc == XCL_RANGE)
5088 {
5089 cc ++;
5090 GETCHARINCTEST(c, cc);
5091 SET_CHAR_OFFSET(c);
5092 GETCHARINCTEST(c, cc);
5093
5094 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
5095 {
5096 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5097 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_LESS_EQUAL);
5098 numberofcmps++;
5099 }
5100 else if (numberofcmps > 0)
5101 {
5102 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5103 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
5104 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5105 numberofcmps = 0;
5106 }
5107 else
5108 {
5109 jump = CMP(SLJIT_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5110 numberofcmps = 0;
5111 }
5112 }
5113 #ifdef SUPPORT_UCP
5114 else
5115 {
5116 if (*cc == XCL_NOTPROP)
5117 invertcmp ^= 0x1;
5118 cc++;
5119 switch(*cc)
5120 {
5121 case PT_ANY:
5122 if (!invertcmp)
5123 jump = JUMP(SLJIT_JUMP);
5124 break;
5125
5126 case PT_LAMP:
5127 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - typeoffset);
5128 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5129 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ll - typeoffset);
5130 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5131 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lt - typeoffset);
5132 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5133 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5134 break;
5135
5136 case PT_GC:
5137 c = PRIV(ucp_typerange)[(int)cc[1] * 2];
5138 SET_TYPE_OFFSET(c);
5139 jump = CMP(SLJIT_LESS_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, PRIV(ucp_typerange)[(int)cc[1] * 2 + 1] - c);
5140 break;
5141
5142 case PT_PC:
5143 jump = CMP(SLJIT_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, (int)cc[1] - typeoffset);
5144 break;
5145
5146 case PT_SC:
5147 compares++;
5148 /* Do nothing. */
5149 break;
5150
5151 case PT_SPACE:
5152 case PT_PXSPACE:
5153 SET_CHAR_OFFSET(9);
5154 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd - 0x9);
5155 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
5156
5157 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x9);
5158 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5159
5160 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x9);
5161 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5162
5163 SET_TYPE_OFFSET(ucp_Zl);
5164 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl);
5165 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
5166 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5167 break;
5168
5169 case PT_WORD:
5170 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_UNDERSCORE - charoffset));
5171 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5172 /* Fall through. */
5173
5174 case PT_ALNUM:
5175 SET_TYPE_OFFSET(ucp_Ll);
5176 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
5177 OP_FLAGS((*cc == PT_ALNUM) ? SLJIT_MOV : SLJIT_OR, TMP2, 0, (*cc == PT_ALNUM) ? SLJIT_UNUSED : TMP2, 0, SLJIT_LESS_EQUAL);
5178 SET_TYPE_OFFSET(ucp_Nd);
5179 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_No - ucp_Nd);
5180 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
5181 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5182 break;
5183
5184 case PT_CLIST:
5185 other_cases = PRIV(ucd_caseless_sets) + cc[1];
5186
5187 /* At least three characters are required.
5188 Otherwise this case would be handled by the normal code path. */
5189 SLJIT_ASSERT(other_cases[0] != NOTACHAR && other_cases[1] != NOTACHAR && other_cases[2] != NOTACHAR);
5190 SLJIT_ASSERT(other_cases[0] < other_cases[1] && other_cases[1] < other_cases[2]);
5191
5192 /* Optimizing character pairs, if their difference is power of 2. */
5193 if (is_powerof2(other_cases[1] ^ other_cases[0]))
5194 {
5195 if (charoffset == 0)
5196 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
5197 else
5198 {
5199 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
5200 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
5201 }
5202 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[1]);
5203 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5204 other_cases += 2;
5205 }
5206 else if (is_powerof2(other_cases[2] ^ other_cases[1]))
5207 {
5208 if (charoffset == 0)
5209 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[2] ^ other_cases[1]);
5210 else
5211 {
5212 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
5213 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
5214 }
5215 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[2]);
5216 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5217
5218 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(other_cases[0] - charoffset));
5219 OP_FLAGS(SLJIT_OR | ((other_cases[3] == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5220
5221 other_cases += 3;
5222 }
5223 else
5224 {
5225 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
5226 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5227 }
5228
5229 while (*other_cases != NOTACHAR)
5230 {
5231 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
5232 OP_FLAGS(SLJIT_OR | ((*other_cases == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5233 }
5234 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5235 break;
5236
5237 case PT_UCNC:
5238 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_DOLLAR_SIGN - charoffset));
5239 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5240 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_COMMERCIAL_AT - charoffset));
5241 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5242 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_GRAVE_ACCENT - charoffset));
5243 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5244
5245 SET_CHAR_OFFSET(0xa0);
5246 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(0xd7ff - charoffset));
5247 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
5248 SET_CHAR_OFFSET(0);
5249 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xe000 - 0);
5250 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_GREATER_EQUAL);
5251 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5252 break;
5253
5254 case PT_PXGRAPH:
5255 /* C and Z groups are the farthest two groups. */
5256 SET_TYPE_OFFSET(ucp_Ll);
5257 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
5258 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_GREATER);
5259
5260 jump = CMP(SLJIT_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
5261
5262 /* In case of ucp_Cf, we overwrite the result. */
5263 SET_CHAR_OFFSET(0x2066);
5264 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
5265 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
5266
5267 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
5268 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5269
5270 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x2066);
5271 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5272
5273 JUMPHERE(jump);
5274 jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
5275 break;
5276
5277 case PT_PXPRINT:
5278 /* C and Z groups are the farthest two groups. */
5279 SET_TYPE_OFFSET(ucp_Ll);
5280 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
5281 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_GREATER);
5282
5283 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Ll);
5284 OP_FLAGS(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_NOT_EQUAL);
5285
5286 jump = CMP(SLJIT_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
5287
5288 /* In case of ucp_Cf, we overwrite the result. */
5289 SET_CHAR_OFFSET(0x2066);
5290 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
5291 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
5292
5293 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
5294 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5295
5296 JUMPHERE(jump);
5297 jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
5298 break;
5299
5300 case PT_PXPUNCT:
5301 SET_TYPE_OFFSET(ucp_Sc);
5302 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Sc);
5303 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
5304
5305 SET_CHAR_OFFSET(0);
5306 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x7f);
5307 OP_FLAGS(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
5308
5309 SET_TYPE_OFFSET(ucp_Pc);
5310 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ps - ucp_Pc);
5311 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
5312 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5313 break;
5314
5315 default:
5316 SLJIT_ASSERT_STOP();
5317 break;
5318 }
5319 cc += 2;
5320 }
5321 #endif
5322
5323 if (jump != NULL)
5324 add_jump(compiler, compares > 0 ? list : backtracks, jump);
5325 }
5326
5327 if (found != NULL)
5328 set_jumps(found, LABEL());
5329 }
5330
5331 #undef SET_TYPE_OFFSET
5332 #undef SET_CHAR_OFFSET
5333
5334 #endif
5335
5336 static pcre_uchar *compile_simple_assertion_matchingpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **backtracks)
5337 {
5338 DEFINE_COMPILER;
5339 int length;
5340 struct sljit_jump *jump[4];
5341 #ifdef SUPPORT_UTF
5342 struct sljit_label *label;
5343 #endif /* SUPPORT_UTF */
5344
5345 switch(type)
5346 {
5347 case OP_SOD:
5348 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5349 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
5350 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
5351 return cc;
5352
5353 case OP_SOM:
5354 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5355 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
5356 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
5357 return cc;
5358
5359 case OP_NOT_WORD_BOUNDARY:
5360 case OP_WORD_BOUNDARY:
5361 add_jump(compiler, &common->wordboundary, JUMP(SLJIT_FAST_CALL));
5362 add_jump(compiler, backtracks, JUMP(type == OP_NOT_WORD_BOUNDARY ? SLJIT_NOT_ZERO : SLJIT_ZERO));
5363 return cc;
5364
5365 case OP_EODN:
5366 /* Requires rather complex checks. */
5367 jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5368 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
5369 {
5370 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
5371 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5372 if (common->mode == JIT_COMPILE)
5373 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0));
5374 else
5375 {
5376 jump[1] = CMP(SLJIT_EQUAL, TMP2, 0, STR_END, 0);
5377 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
5378 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS);
5379 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
5380 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_NOT_EQUAL);
5381 add_jump(compiler, backtracks, JUMP(SLJIT_NOT_EQUAL));
5382 check_partial(common, TRUE);
5383 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5384 JUMPHERE(jump[1]);
5385 }
5386 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
5387 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
5388 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
5389 }
5390 else if (common->nltype == NLTYPE_FIXED)
5391 {
5392 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5393 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5394 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0));
5395 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
5396 }
5397 else
5398 {
5399 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5400 jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
5401 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
5402 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
5403 jump[2] = JUMP(SLJIT_GREATER);
5404 add_jump(compiler, backtracks, JUMP(SLJIT_LESS));
5405 /* Equal. */
5406 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
5407 jump[3] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
5408 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5409
5410 JUMPHERE(jump[1]);
5411 if (common->nltype == NLTYPE_ANYCRLF)
5412 {
5413 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5414 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, STR_END, 0));
5415 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
5416 }
5417 else
5418 {
5419 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, STR_PTR, 0);
5420 read_char_range(common, common->nlmin, common->nlmax, TRUE);
5421 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
5422 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
5423 add_jump(compiler, backtracks, JUMP(SLJIT_ZERO));
5424 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
5425 }
5426 JUMPHERE(jump[2]);
5427 JUMPHERE(jump[3]);
5428 }
5429 JUMPHERE(jump[0]);
5430 check_partial(common, FALSE);
5431 return cc;
5432
5433 case OP_EOD:
5434 add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
5435 check_partial(common, FALSE);
5436 return cc;
5437
5438 case OP_DOLL:
5439 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
5440 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
5441 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5442
5443 if (!common->endonly)
5444 compile_simple_assertion_matchingpath(common, OP_EODN, cc, backtracks);
5445 else
5446 {
5447 add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
5448 check_partial(common, FALSE);
5449 }
5450 return cc;
5451
5452 case OP_DOLLM:
5453 jump[1] = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
5454 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
5455 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
5456 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5457 check_partial(common, FALSE);
5458 jump[0] = JUMP(SLJIT_JUMP);
5459 JUMPHERE(jump[1]);
5460
5461 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
5462 {
5463 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
5464 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5465 if (common->mode == JIT_COMPILE)
5466 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, TMP2, 0, STR_END, 0));
5467 else
5468 {
5469 jump[1] = CMP(SLJIT_LESS_EQUAL, TMP2, 0, STR_END, 0);
5470 /* STR_PTR = STR_END - IN_UCHARS(1) */
5471 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
5472 check_partial(common, TRUE);
5473 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5474 JUMPHERE(jump[1]);
5475 }
5476
5477 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
5478 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
5479 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
5480 }
5481 else
5482 {
5483 peek_char(common, common->nlmax);
5484 check_newlinechar(common, common->nltype, backtracks, FALSE);
5485 }
5486 JUMPHERE(jump[0]);
5487 return cc;
5488
5489 case OP_CIRC:
5490 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
5491 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
5492 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0));
5493 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
5494 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5495 return cc;
5496
5497 case OP_CIRCM:
5498 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
5499 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
5500 jump[1] = CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0);
5501 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
5502 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5503 jump[0] = JUMP(SLJIT_JUMP);
5504 JUMPHERE(jump[1]);
5505
5506 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
5507 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
5508 {
5509 OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
5510 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, TMP1, 0));
5511 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
5512 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
5513 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
5514 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
5515 }
5516 else
5517 {
5518 skip_char_back(common);
5519 read_char_range(common, common->nlmin, common->nlmax, TRUE);
5520 check_newlinechar(common, common->nltype, backtracks, FALSE);
5521 }
5522 JUMPHERE(jump[0]);
5523 return cc;
5524
5525 case OP_REVERSE:
5526 length = GET(cc, 0);
5527 if (length == 0)
5528 return cc + LINK_SIZE;
5529 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5530 #ifdef SUPPORT_UTF
5531 if (common->utf)
5532 {
5533 OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
5534 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, length);
5535 label = LABEL();
5536 add_jump(compiler, backtracks, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP3, 0));
5537 skip_char_back(common);
5538 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
5539 JUMPTO(SLJIT_NOT_ZERO, label);
5540 }
5541 else
5542 #endif
5543 {
5544 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
5545 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
5546 add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, TMP1, 0));
5547 }
5548 check_start_used_ptr(common);
5549 return cc + LINK_SIZE;
5550 }
5551 SLJIT_ASSERT_STOP();
5552 return cc;
5553 }
5554
5555 static pcre_uchar *compile_char1_matchingpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **backtracks, BOOL check_str_ptr)
5556 {
5557 DEFINE_COMPILER;
5558 int length;
5559 unsigned int c, oc, bit;
5560 compare_context context;
5561 struct sljit_jump *jump[3];
5562 jump_list *end_list;
5563 #ifdef SUPPORT_UTF
5564 struct sljit_label *label;
5565 #ifdef SUPPORT_UCP
5566 pcre_uchar propdata[5];
5567 #endif
5568 #endif /* SUPPORT_UTF */
5569
5570 switch(type)
5571 {
5572 case OP_NOT_DIGIT:
5573 case OP_DIGIT:
5574 /* Digits are usually 0-9, so it is worth to optimize them. */
5575 if (check_str_ptr)
5576 detect_partial_match(common, backtracks);
5577 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5578 if (common->utf && is_char7_bitset((const pcre_uint8*)common->ctypes - cbit_length + cbit_digit, FALSE))
5579 read_char7_type(common, type == OP_NOT_DIGIT);
5580 else
5581 #endif
5582 read_char8_type(common, type == OP_NOT_DIGIT);
5583 /* Flip the starting bit in the negative case. */
5584 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit);
5585 add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_ZERO : SLJIT_NOT_ZERO));
5586 return cc;
5587
5588 case OP_NOT_WHITESPACE:
5589 case OP_WHITESPACE:
5590 if (check_str_ptr)
5591 detect_partial_match(common, backtracks);
5592 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5593 if (common->utf && is_char7_bitset((const pcre_uint8*)common->ctypes - cbit_length + cbit_space, FALSE))
5594 read_char7_type(common, type == OP_NOT_WHITESPACE);
5595 else
5596 #endif
5597 read_char8_type(common, type == OP_NOT_WHITESPACE);
5598 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space);
5599 add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_ZERO : SLJIT_NOT_ZERO));
5600 return cc;
5601
5602 case OP_NOT_WORDCHAR:
5603 case OP_WORDCHAR:
5604 if (check_str_ptr)
5605 detect_partial_match(common, backtracks);
5606 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5607 if (common->utf && is_char7_bitset((const pcre_uint8*)common->ctypes - cbit_length + cbit_word, FALSE))
5608 read_char7_type(common, type == OP_NOT_WORDCHAR);
5609 else
5610 #endif
5611 read_char8_type(common, type == OP_NOT_WORDCHAR);
5612 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word);
5613 add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_ZERO : SLJIT_NOT_ZERO));
5614 return cc;
5615
5616 case OP_ANY:
5617 if (check_str_ptr)
5618 detect_partial_match(common, backtracks);
5619 read_char_range(common, common->nlmin, common->nlmax, TRUE);
5620 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
5621 {
5622 jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
5623 end_list = NULL;
5624 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
5625 add_jump(compiler, &end_list, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
5626 else
5627 check_str_end(common, &end_list);
5628
5629 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5630 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff));
5631 set_jumps(end_list, LABEL());
5632 JUMPHERE(jump[0]);
5633 }
5634 else
5635 check_newlinechar(common, common->nltype, backtracks, TRUE);
5636 return cc;
5637
5638 case OP_ALLANY:
5639 if (check_str_ptr)
5640 detect_partial_match(common, backtracks);
5641 #ifdef SUPPORT_UTF
5642 if (common->utf)
5643 {
5644 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5645 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5646 #if defined COMPILE_PCRE8 || defined COMPILE_PCRE16
5647 #if defined COMPILE_PCRE8
5648 jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
5649 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
5650 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5651 #elif defined COMPILE_PCRE16
5652 jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
5653 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
5654 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
5655 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5656 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
5657 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5658 #endif
5659 JUMPHERE(jump[0]);
5660 #endif /* COMPILE_PCRE[8|16] */
5661 return cc;
5662 }
5663 #endif
5664 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5665 return cc;
5666
5667 case OP_ANYBYTE:
5668 if (check_str_ptr)
5669 detect_partial_match(common, backtracks);
5670 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5671 return cc;
5672
5673 #ifdef SUPPORT_UTF
5674 #ifdef SUPPORT_UCP
5675 case OP_NOTPROP:
5676 case OP_PROP:
5677 propdata[0] = XCL_HASPROP;
5678 propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP;
5679 propdata[2] = cc[0];
5680 propdata[3] = cc[1];
5681 propdata[4] = XCL_END;
5682 if (check_str_ptr)
5683 detect_partial_match(common, backtracks);
5684 compile_xclass_matchingpath(common, propdata, backtracks);
5685 return cc + 2;
5686 #endif
5687 #endif
5688
5689 case OP_ANYNL:
5690 if (check_str_ptr)
5691 detect_partial_match(common, backtracks);
5692 read_char_range(common, common->bsr_nlmin, common->bsr_nlmax, FALSE);
5693 jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
5694 /* We don't need to handle soft partial matching case. */
5695 end_list = NULL;
5696 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
5697 add_jump(compiler, &end_list, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
5698 else
5699 check_str_end(common, &end_list);
5700 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5701 jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
5702 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5703 jump[2] = JUMP(SLJIT_JUMP);
5704 JUMPHERE(jump[0]);
5705 check_newlinechar(common, common->bsr_nltype, backtracks, FALSE);
5706 set_jumps(end_list, LABEL());
5707 JUMPHERE(jump[1]);
5708 JUMPHERE(jump[2]);
5709 return cc;
5710
5711 case OP_NOT_HSPACE:
5712 case OP_HSPACE:
5713 if (check_str_ptr)
5714 detect_partial_match(common, backtracks);
5715 read_char_range(common, 0x9, 0x3000, type == OP_NOT_HSPACE);
5716 add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));
5717 add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO));
5718 return cc;
5719
5720 case OP_NOT_VSPACE:
5721 case OP_VSPACE:
5722 if (check_str_ptr)
5723 detect_partial_match(common, backtracks);
5724 read_char_range(common, 0xa, 0x2029, type == OP_NOT_VSPACE);
5725 add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));
5726 add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO));
5727 return cc;
5728
5729 #ifdef SUPPORT_UCP
5730 case OP_EXTUNI:
5731 if (check_str_ptr)
5732 detect_partial_match(common, backtracks);
5733 read_char(common);
5734 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
5735 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
5736 /* Optimize register allocation: use a real register. */
5737 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STACK_TOP, 0);
5738 OP1(SLJIT_MOV_UB, STACK_TOP, 0, SLJIT_MEM2(TMP1, TMP2), 3);
5739
5740 label = LABEL();
5741 jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5742 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
5743 read_char(common);
5744 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
5745 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
5746 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM2(TMP1, TMP2), 3);
5747
5748 OP2(SLJIT_SHL, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2);
5749 OP1(SLJIT_MOV_UI, TMP1, 0, SLJIT_MEM1(STACK_TOP), (sljit_sw)PRIV(ucp_gbtable));
5750 OP1(SLJIT_MOV, STACK_TOP, 0, TMP2, 0);
5751 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
5752 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
5753 JUMPTO(SLJIT_NOT_ZERO, label);
5754
5755 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
5756 JUMPHERE(jump[0]);
5757 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
5758
5759 if (common->mode == JIT_PARTIAL_HARD_COMPILE)
5760 {
5761 jump[0] = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
5762 /* Since we successfully read a char above, partial matching must occure. */
5763 check_partial(common, TRUE);
5764 JUMPHERE(jump[0]);
5765 }
5766 return cc;
5767 #endif
5768
5769 case OP_CHAR:
5770 case OP_CHARI:
5771 length = 1;
5772 #ifdef SUPPORT_UTF
5773 if (common->utf && HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc);
5774 #endif
5775 if (common->mode == JIT_COMPILE && (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0))
5776 {
5777 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
5778 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
5779
5780 context.length = IN_UCHARS(length);
5781 context.sourcereg = -1;
5782 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
5783 context.ucharptr = 0;
5784 #endif
5785 return byte_sequence_compare(common, type == OP_CHARI, cc, &context, backtracks);
5786 }
5787
5788 if (check_str_ptr)
5789 detect_partial_match(common, backtracks);
5790 #ifdef SUPPORT_UTF
5791 if (common->utf)
5792 {
5793 GETCHAR(c, cc);
5794 }
5795 else
5796 #endif
5797 c = *cc;
5798
5799 if (type == OP_CHAR || !char_has_othercase(common, cc))
5800 {
5801 read_char_range(common, c, c, FALSE);
5802 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c));
5803 return cc + length;
5804 }
5805 oc = char_othercase(common, c);
5806 read_char_range(common, c < oc ? c : oc, c > oc ? c : oc, FALSE);
5807 bit = c ^ oc;
5808 if (is_powerof2(bit))
5809 {
5810 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
5811 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
5812 return cc + length;
5813 }
5814 jump[0] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c);
5815 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, oc));
5816 JUMPHERE(jump[0]);
5817 return cc + length;
5818
5819 case OP_NOT:
5820 case OP_NOTI:
5821 if (check_str_ptr)
5822 detect_partial_match(common, backtracks);
5823 length = 1;
5824 #ifdef SUPPORT_UTF
5825 if (common->utf)
5826 {
5827 #ifdef COMPILE_PCRE8
5828 c = *cc;
5829 if (c < 128)
5830 {
5831 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5832 if (type == OP_NOT || !char_has_othercase(common, cc))
5833 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
5834 else
5835 {
5836 /* Since UTF8 code page is fixed, we know that c is in [a-z] or [A-Z] range. */
5837 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x20);
5838 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20));
5839 }
5840 /* Skip the variable-length character. */
5841 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5842 jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
5843 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
5844 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5845 JUMPHERE(jump[0]);
5846 return cc + 1;
5847 }
5848 else
5849 #endif /* COMPILE_PCRE8 */
5850 {
5851 GETCHARLEN(c, cc, length);
5852 }
5853 }
5854 else
5855 #endif /* SUPPORT_UTF */
5856 c = *cc;
5857
5858 if (type == OP_NOT || !char_has_othercase(common, cc))
5859 {
5860 read_char_range(common, c, c, TRUE);
5861 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
5862 }
5863 else
5864 {
5865 oc = char_othercase(common, c);
5866 read_char_range(common, c < oc ? c : oc, c > oc ? c : oc, TRUE);
5867 bit = c ^ oc;
5868 if (is_powerof2(bit))
5869 {
5870 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
5871 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
5872 }
5873 else
5874 {
5875 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
5876 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, oc));
5877 }
5878 }
5879 return cc + length;
5880
5881 case OP_CLASS:
5882 case OP_NCLASS:
5883 if (check_str_ptr)
5884 detect_partial_match(common, backtracks);
5885
5886 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5887 bit = (common->utf && is_char7_bitset((const sljit_ub *)cc, type == OP_NCLASS)) ? 127 : 255;
5888 read_char_range(common, 0, bit, type == OP_NCLASS);
5889 #else
5890 read_char_range(common, 0, 255, type == OP_NCLASS);
5891 #endif
5892
5893 if (check_class_ranges(common, (const sljit_ub *)cc, type == OP_NCLASS, FALSE, backtracks))
5894 return cc + 32 / sizeof(pcre_uchar);
5895
5896 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5897 jump[0] = NULL;
5898 if (common->utf)
5899 {
5900 jump[0] = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, bit);
5901 if (type == OP_CLASS)
5902 {
5903 add_jump(compiler, backtracks, jump[0]);
5904 jump[0] = NULL;
5905 }
5906 }
5907 #elif !defined COMPILE_PCRE8
5908 jump[0] = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
5909 if (type == OP_CLASS)
5910 {
5911 add_jump(compiler, backtracks, jump[0]);
5912 jump[0] = NULL;
5913 }
5914 #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
5915
5916 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
5917 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
5918 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
5919 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
5920 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
5921 add_jump(compiler, backtracks, JUMP(SLJIT_ZERO));
5922
5923 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
5924 if (jump[0] != NULL)
5925 JUMPHERE(jump[0]);
5926 #endif
5927 return cc + 32 / sizeof(pcre_uchar);
5928
5929 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5930 case OP_XCLASS:
5931 if (check_str_ptr)
5932 detect_partial_match(common, backtracks);
5933 compile_xclass_matchingpath(common, cc + LINK_SIZE, backtracks);
5934 return cc + GET(cc, 0) - 1;
5935 #endif
5936 }
5937 SLJIT_ASSERT_STOP();
5938 return cc;
5939 }
5940
5941 static SLJIT_INLINE pcre_uchar *compile_charn_matchingpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, jump_list **backtracks)
5942 {
5943 /* This function consumes at least one input character. */
5944 /* To decrease the number of length checks, we try to concatenate the fixed length character sequences. */
5945 DEFINE_COMPILER;
5946 pcre_uchar *ccbegin = cc;
5947 compare_context context;
5948 int size;
5949
5950 context.length = 0;
5951 do
5952 {
5953 if (cc >= ccend)
5954 break;
5955
5956 if (*cc == OP_CHAR)
5957 {
5958 size = 1;
5959 #ifdef SUPPORT_UTF
5960 if (common->utf && HAS_EXTRALEN(cc[1]))
5961 size += GET_EXTRALEN(cc[1]);
5962 #endif
5963 }
5964 else if (*cc == OP_CHARI)
5965 {
5966 size = 1;
5967 #ifdef SUPPORT_UTF
5968 if (common->utf)
5969 {
5970 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
5971 size = 0;
5972 else if (HAS_EXTRALEN(cc[1]))
5973 size += GET_EXTRALEN(cc[1]);
5974 }
5975 else
5976 #endif
5977 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
5978 size = 0;
5979 }
5980 else
5981 size = 0;
5982
5983 cc += 1 + size;
5984 context.length += IN_UCHARS(size);
5985 }
5986 while (size > 0 && context.length <= 128);
5987
5988 cc = ccbegin;
5989 if (context.length > 0)
5990 {
5991 /* We have a fixed-length byte sequence. */
5992 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, context.length);
5993 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
5994
5995 context.sourcereg = -1;
5996 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
5997 context.ucharptr = 0;
5998 #endif
5999 do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, backtracks); while (context.length > 0);
6000 return cc;
6001 }
6002
6003 /* A non-fixed length character will be checked if length == 0. */
6004 return compile_char1_matchingpath(common, *cc, cc + 1, backtracks, TRUE);
6005 }
6006
6007 /* Forward definitions. */
6008 static void compile_matchingpath(compiler_common *, pcre_uchar *, pcre_uchar *, backtrack_common *);
6009 static void compile_backtrackingpath(compiler_common *, struct backtrack_common *);
6010
6011 #define PUSH_BACKTRACK(size, ccstart, error) \
6012 do \
6013 { \
6014 backtrack = sljit_alloc_memory(compiler, (size)); \
6015 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
6016 return error; \
6017 memset(backtrack, 0, size); \
6018 backtrack->prev = parent->top; \
6019 backtrack->cc = (ccstart); \
6020 parent->top = backtrack; \
6021 } \
6022 while (0)
6023
6024 #define PUSH_BACKTRACK_NOVALUE(size, ccstart) \
6025 do \
6026 { \
6027 backtrack = sljit_alloc_memory(compiler, (size)); \
6028 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
6029 return; \
6030 memset(backtrack, 0, size); \
6031 backtrack->prev = parent->top; \
6032 backtrack->cc = (ccstart); \
6033 parent->top = backtrack; \
6034 } \
6035 while (0)
6036
6037 #define BACKTRACK_AS(type) ((type *)backtrack)
6038
6039 static void compile_dnref_search(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
6040 {
6041 /* The OVECTOR offset goes to TMP2. */
6042 DEFINE_COMPILER;
6043 int count = GET2(cc, 1 + IMM2_SIZE);
6044 pcre_uchar *slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
6045 unsigned int offset;
6046 jump_list *found = NULL;
6047
6048 SLJIT_ASSERT(*cc == OP_DNREF || *cc == OP_DNREFI);
6049
6050 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
6051
6052 count--;
6053 while (count-- > 0)
6054 {
6055 offset = GET2(slot, 0) << 1;
6056 GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
6057 add_jump(compiler, &found, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0));
6058 slot += common->name_entry_size;
6059 }
6060
6061 offset = GET2(slot, 0) << 1;
6062 GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
6063 if (backtracks != NULL && !common->jscript_compat)
6064 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0));
6065
6066 set_jumps(found, LABEL());
6067 }
6068
6069 static void compile_ref_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks, BOOL withchecks, BOOL emptyfail)
6070 {
6071 DEFINE_COMPILER;
6072 BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
6073 int offset = 0;
6074 struct sljit_jump *jump = NULL;
6075 struct sljit_jump *partial;
6076 struct sljit_jump *nopartial;
6077
6078 if (ref)
6079 {
6080 offset = GET2(cc, 1) << 1;
6081 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
6082 /* OVECTOR(1) contains the "string begin - 1" constant. */
6083 if (withchecks && !common->jscript_compat)
6084 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
6085 }
6086 else
6087 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
6088
6089 #if defined SUPPORT_UTF && defined SUPPORT_UCP
6090 if (common->utf && *cc == OP_REFI)
6091 {
6092 SLJIT_ASSERT(TMP1 == SLJIT_R0 && STACK_TOP == SLJIT_R1 && TMP2 == SLJIT_R2);
6093 if (ref)
6094 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
6095 else
6096 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
6097
6098 if (withchecks)
6099 jump = CMP(SLJIT_EQUAL, TMP1, 0, TMP2, 0);
6100
6101 /* Needed to save important temporary registers. */
6102 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STACK_TOP, 0);
6103 OP1(SLJIT_MOV, SLJIT_R1, 0, ARGUMENTS, 0);
6104 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, uchar_ptr), STR_PTR, 0);
6105 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_utf_caselesscmp));
6106 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6107 if (common->mode == JIT_COMPILE)
6108 add_jump(compiler, backtracks, CMP(SLJIT_LESS_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1));
6109 else
6110 {
6111 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
6112 nopartial = CMP(SLJIT_NOT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
6113 check_partial(common, FALSE);
6114 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
6115 JUMPHERE(nopartial);
6116 }
6117 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
6118 }
6119 else
6120 #endif /* SUPPORT_UTF && SUPPORT_UCP */
6121 {
6122 if (ref)
6123 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP1, 0);
6124 else
6125 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
6126
6127 if (withchecks)
6128 jump = JUMP(SLJIT_ZERO);
6129
6130 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
6131 partial = CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0);
6132 if (common->mode == JIT_COMPILE)
6133 add_jump(compiler, backtracks, partial);
6134
6135 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
6136 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
6137
6138 if (common->mode != JIT_COMPILE)
6139 {
6140 nopartial = JUMP(SLJIT_JUMP);
6141 JUMPHERE(partial);
6142 /* TMP2 -= STR_END - STR_PTR */
6143 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, STR_PTR, 0);
6144 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, STR_END, 0);
6145 partial = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0);
6146 OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
6147 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
6148 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
6149 JUMPHERE(partial);
6150 check_partial(common, FALSE);
6151 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
6152 JUMPHERE(nopartial);
6153 }
6154 }
6155
6156 if (jump != NULL)
6157 {
6158 if (emptyfail)
6159 add_jump(compiler, backtracks, jump);
6160 else
6161 JUMPHERE(jump);
6162 }
6163 }
6164
6165 static SLJIT_INLINE pcre_uchar *compile_ref_iterator_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6166 {
6167 DEFINE_COMPILER;
6168 BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
6169 backtrack_common *backtrack;
6170 pcre_uchar type;
6171 int offset = 0;
6172 struct sljit_label *label;
6173 struct sljit_jump *zerolength;
6174 struct sljit_jump *jump = NULL;
6175 pcre_uchar *ccbegin = cc;
6176 int min = 0, max = 0;
6177 BOOL minimize;
6178
6179 PUSH_BACKTRACK(sizeof(ref_iterator_backtrack), cc, NULL);
6180
6181 if (ref)
6182 offset = GET2(cc, 1) << 1;
6183 else
6184 cc += IMM2_SIZE;
6185 type = cc[1 + IMM2_SIZE];
6186
6187 SLJIT_COMPILE_ASSERT((OP_CRSTAR & 0x1) == 0, crstar_opcode_must_be_even);
6188 minimize = (type & 0x1) != 0;
6189 switch(type)
6190 {
6191 case OP_CRSTAR:
6192 case OP_CRMINSTAR:
6193 min = 0;
6194 max = 0;
6195 cc += 1 + IMM2_SIZE + 1;
6196 break;
6197 case OP_CRPLUS:
6198 case OP_CRMINPLUS:
6199 min = 1;
6200 max = 0;
6201 cc += 1 + IMM2_SIZE + 1;
6202 break;
6203 case OP_CRQUERY:
6204 case OP_CRMINQUERY:
6205 min = 0;
6206 max = 1;
6207 cc += 1 + IMM2_SIZE + 1;
6208 break;
6209 case OP_CRRANGE:
6210 case OP_CRMINRANGE:
6211 min = GET2(cc, 1 + IMM2_SIZE + 1);
6212 max = GET2(cc, 1 + IMM2_SIZE + 1 + IMM2_SIZE);
6213 cc += 1 + IMM2_SIZE + 1 + 2 * IMM2_SIZE;
6214 break;
6215 default:
6216 SLJIT_ASSERT_STOP();
6217 break;
6218 }
6219
6220 if (!minimize)
6221 {
6222 if (min == 0)
6223 {
6224 allocate_stack(common, 2);
6225 if (ref)
6226 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
6227 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6228 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
6229 /* Temporary release of STR_PTR. */
6230 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
6231 /* Handles both invalid and empty cases. Since the minimum repeat,
6232 is zero the invalid case is basically the same as an empty case. */
6233 if (ref)
6234 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
6235 else
6236 {
6237 compile_dnref_search(common, ccbegin, NULL);
6238 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
6239 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, TMP2, 0);
6240 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
6241 }
6242 /* Restore if not zero length. */
6243 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
6244 }
6245 else
6246 {
6247 allocate_stack(common, 1);
6248 if (ref)
6249 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
6250 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6251 if (ref)
6252 {
6253 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
6254 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
6255 }
6256 else
6257 {
6258 compile_dnref_search(common, ccbegin, &backtrack->topbacktracks);
6259 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
6260 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, TMP2, 0);
6261 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
6262 }
6263 }
6264
6265 if (min > 1 || max > 1)
6266 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, SLJIT_IMM, 0);
6267
6268 label = LABEL();
6269 if (!ref)
6270 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1);
6271 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, FALSE, FALSE);
6272
6273 if (min > 1 || max > 1)
6274 {
6275 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0);
6276 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
6277 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, TMP1, 0);
6278 if (min > 1)
6279 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min, label);
6280 if (max > 1)
6281 {
6282 jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max);
6283 allocate_stack(common, 1);
6284 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6285 JUMPTO(SLJIT_JUMP, label);
6286 JUMPHERE(jump);
6287 }
6288 }
6289
6290 if (max == 0)
6291 {
6292 /* Includes min > 1 case as well. */
6293 allocate_stack(common, 1);
6294 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6295 JUMPTO(SLJIT_JUMP, label);
6296 }
6297
6298 JUMPHERE(zerolength);
6299 BACKTRACK_AS(ref_iterator_backtrack)->matchingpath = LABEL();
6300
6301 count_match(common);
6302 return cc;
6303 }
6304
6305 allocate_stack(common, ref ? 2 : 3);
6306 if (ref)
6307 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
6308 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6309 if (type != OP_CRMINSTAR)
6310 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
6311
6312 if (min == 0)
6313 {
6314 /* Handles both invalid and empty cases. Since the minimum repeat,
6315 is zero the invalid case is basically the same as an empty case. */
6316 if (ref)
6317 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
6318 else
6319 {
6320 compile_dnref_search(common, ccbegin, NULL);
6321 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
6322 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
6323 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
6324 }
6325 /* Length is non-zero, we can match real repeats. */
6326 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6327 jump = JUMP(SLJIT_JUMP);
6328 }
6329 else
6330 {
6331 if (ref)
6332 {
6333 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
6334 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
6335 }
6336 else
6337 {
6338 compile_dnref_search(common, ccbegin, &backtrack->topbacktracks);
6339 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
6340 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
6341 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
6342 }
6343 }
6344
6345 BACKTRACK_AS(ref_iterator_backtrack)->matchingpath = LABEL();
6346 if (max > 0)
6347 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max));
6348
6349 if (!ref)
6350 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
6351 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, TRUE, TRUE);
6352 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6353
6354 if (min > 1)
6355 {
6356 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
6357 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
6358 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
6359 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min, BACKTRACK_AS(ref_iterator_backtrack)->matchingpath);
6360 }
6361 else if (max > 0)
6362 OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
6363
6364 if (jump != NULL)
6365 JUMPHERE(jump);
6366 JUMPHERE(zerolength);
6367
6368 count_match(common);
6369 return cc;
6370 }
6371
6372 static SLJIT_INLINE pcre_uchar *compile_recurse_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6373 {
6374 DEFINE_COMPILER;
6375 backtrack_common *backtrack;
6376 recurse_entry *entry = common->entries;
6377 recurse_entry *prev = NULL;
6378 sljit_sw start = GET(cc, 1);
6379 pcre_uchar *start_cc;
6380 BOOL needs_control_head;
6381
6382 PUSH_BACKTRACK(sizeof(recurse_backtrack), cc, NULL);
6383
6384 /* Inlining simple patterns. */
6385 if (get_framesize(common, common->start + start, NULL, TRUE, &needs_control_head) == no_stack)
6386 {
6387 start_cc = common->start + start;
6388 compile_matchingpath(common, next_opcode(common, start_cc), bracketend(start_cc) - (1 + LINK_SIZE), backtrack);
6389 BACKTRACK_AS(recurse_backtrack)->inlined_pattern = TRUE;
6390 return cc + 1 + LINK_SIZE;
6391 }
6392
6393 while (entry != NULL)
6394 {
6395 if (entry->start == start)
6396 break;
6397 prev = entry;
6398 entry = entry->next;
6399 }
6400
6401 if (entry == NULL)
6402 {
6403 entry = sljit_alloc_memory(compiler, sizeof(recurse_entry));
6404 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6405 return NULL;
6406 entry->next = NULL;
6407 entry->entry = NULL;
6408 entry->calls = NULL;
6409 entry->start = start;
6410
6411 if (prev != NULL)
6412 prev->next = entry;
6413 else
6414 common->entries = entry;
6415 }
6416
6417 if (common->has_set_som && common->mark_ptr != 0)
6418 {
6419 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
6420 allocate_stack(common, 2);
6421 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
6422 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
6423 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
6424 }
6425 else if (common->has_set_som || common->mark_ptr != 0)
6426 {
6427 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->has_set_som ? (int)(OVECTOR(0)) : common->mark_ptr);
6428 allocate_stack(common, 1);
6429 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
6430 }
6431
6432 if (entry->entry == NULL)
6433 add_jump(compiler, &entry->calls, JUMP(SLJIT_FAST_CALL));
6434 else
6435 JUMPTO(SLJIT_FAST_CALL, entry->entry);
6436 /* Leave if the match is failed. */
6437 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0));
6438 return cc + 1 + LINK_SIZE;
6439 }
6440
6441 static int SLJIT_CALL do_callout(struct jit_arguments *arguments, PUBL(callout_block) *callout_block, pcre_uchar **jit_ovector)
6442 {
6443 const pcre_uchar *begin = arguments->begin;
6444 int *offset_vector = arguments->offsets;
6445 int offset_count = arguments->offset_count;
6446 int i;
6447
6448 if (PUBL(callout) == NULL)
6449 return 0;
6450
6451 callout_block->version = 2;
6452 callout_block->callout_data = arguments->callout_data;
6453
6454 /* Offsets in subject. */
6455 callout_block->subject_length = arguments->end - arguments->begin;
6456 callout_block->start_match = (pcre_uchar*)callout_block->subject - arguments->begin;
6457 callout_block->current_position = (pcre_uchar*)callout_block->offset_vector - arguments->begin;
6458 #if defined COMPILE_PCRE8
6459 callout_block->subject = (PCRE_SPTR)begin;
6460 #elif defined COMPILE_PCRE16
6461 callout_block->subject = (PCRE_SPTR16)begin;
6462 #elif defined COMPILE_PCRE32
6463 callout_block->subject = (PCRE_SPTR32)begin;
6464 #endif
6465
6466 /* Convert and copy the JIT offset vector to the offset_vector array. */
6467 callout_block->capture_top = 0;
6468 callout_block->offset_vector = offset_vector;
6469 for (i = 2; i < offset_count; i += 2)
6470 {
6471 offset_vector[i] = jit_ovector[i] - begin;
6472 offset_vector[i + 1] = jit_ovector[i + 1] - begin;
6473 if (jit_ovector[i] >= begin)
6474 callout_block->capture_top = i;
6475 }
6476
6477 callout_block->capture_top = (callout_block->capture_top >> 1) + 1;
6478 if (offset_count > 0)
6479 offset_vector[0] = -1;
6480 if (offset_count > 1)
6481 offset_vector[1] = -1;
6482 return (*PUBL(callout))(callout_block);
6483 }
6484
6485 /* Aligning to 8 byte. */
6486 #define CALLOUT_ARG_SIZE \
6487 (((int)sizeof(PUBL(callout_block)) + 7) & ~7)
6488
6489 #define CALLOUT_ARG_OFFSET(arg) \
6490 (-CALLOUT_ARG_SIZE + SLJIT_OFFSETOF(PUBL(callout_block), arg))
6491
6492 static SLJIT_INLINE pcre_uchar *compile_callout_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6493 {
6494 DEFINE_COMPILER;
6495 backtrack_common *backtrack;
6496
6497 PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
6498
6499 allocate_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw));
6500
6501 SLJIT_ASSERT(common->capture_last_ptr != 0);
6502 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
6503 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6504 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_number), SLJIT_IMM, cc[1]);
6505 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_last), TMP2, 0);
6506
6507 /* These pointer sized fields temporarly stores internal variables. */
6508 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
6509 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(offset_vector), STR_PTR, 0);
6510 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(subject), TMP2, 0);
6511
6512 if (common->mark_ptr != 0)
6513 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr));
6514 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(pattern_position), SLJIT_IMM, GET(cc, 2));
6515 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(next_item_length), SLJIT_IMM, GET(cc, 2 + LINK_SIZE));
6516 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(mark), (common->mark_ptr != 0) ? TMP2 : SLJIT_IMM, 0);
6517
6518 /* Needed to save important temporary registers. */
6519 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STACK_TOP, 0);
6520 OP2(SLJIT_SUB, SLJIT_R1, 0, STACK_TOP, 0, SLJIT_IMM, CALLOUT_ARG_SIZE);
6521 GET_LOCAL_BASE(SLJIT_R2, 0, OVECTOR_START);
6522 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_callout));
6523 OP1(SLJIT_MOV_SI, SLJIT_RETURN_REG, 0, SLJIT_RETURN_REG, 0);
6524 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6525 free_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw));
6526
6527 /* Check return value. */
6528 OP2(SLJIT_SUB | SLJIT_SET_S, SLJIT_UNUSED, 0, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
6529 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_SIG_GREATER));
6530 if (common->forced_quit_label == NULL)
6531 add_jump(compiler, &common->forced_quit, JUMP(SLJIT_SIG_LESS));
6532 else
6533 JUMPTO(SLJIT_SIG_LESS, common->forced_quit_label);
6534 return cc + 2 + 2 * LINK_SIZE;
6535 }
6536
6537 #undef CALLOUT_ARG_SIZE
6538 #undef CALLOUT_ARG_OFFSET
6539
6540 static SLJIT_INLINE BOOL assert_needs_str_ptr_saving(pcre_uchar *cc)
6541 {
6542 while (TRUE)
6543 {
6544 switch (*cc)
6545 {
6546 case OP_NOT_WORD_BOUNDARY:
6547 case OP_WORD_BOUNDARY:
6548 case OP_CIRC:
6549 case OP_CIRCM:
6550 case OP_DOLL:
6551 case OP_DOLLM:
6552 case OP_CALLOUT:
6553 case OP_ALT:
6554 cc += PRIV(OP_lengths)[*cc];
6555 break;
6556
6557 case OP_KET:
6558 return FALSE;
6559
6560 default:
6561 return TRUE;
6562 }
6563 }
6564 }
6565
6566 static pcre_uchar *compile_assert_matchingpath(compiler_common *common, pcre_uchar *cc, assert_backtrack *backtrack, BOOL conditional)
6567 {
6568 DEFINE_COMPILER;
6569 int framesize;
6570 int extrasize;
6571 BOOL needs_control_head;
6572 int private_data_ptr;
6573 backtrack_common altbacktrack;
6574 pcre_uchar *ccbegin;
6575 pcre_uchar opcode;
6576 pcre_uchar bra = OP_BRA;
6577 jump_list *tmp = NULL;
6578 jump_list **target = (conditional) ? &backtrack->condfailed : &backtrack->common.topbacktracks;
6579 jump_list **found;
6580 /* Saving previous accept variables. */
6581 BOOL save_local_exit = common->local_exit;
6582 BOOL save_positive_assert = common->positive_assert;
6583 then_trap_backtrack *save_then_trap = common->then_trap;
6584 struct sljit_label *save_quit_label = common->quit_label;
6585 struct sljit_label *save_accept_label = common->accept_label;
6586 jump_list *save_quit = common->quit;
6587 jump_list *save_positive_assert_quit = common->positive_assert_quit;
6588 jump_list *save_accept = common->accept;
6589 struct sljit_jump *jump;
6590 struct sljit_jump *brajump = NULL;
6591
6592 /* Assert captures then. */
6593 common->then_trap = NULL;
6594
6595 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
6596 {
6597 SLJIT_ASSERT(!conditional);
6598 bra = *cc;
6599 cc++;
6600 }
6601 private_data_ptr = PRIVATE_DATA(cc);
6602 SLJIT_ASSERT(private_data_ptr != 0);
6603 framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
6604 backtrack->framesize = framesize;
6605 backtrack->private_data_ptr = private_data_ptr;
6606 opcode = *cc;
6607 SLJIT_ASSERT(opcode >= OP_ASSERT && opcode <= OP_ASSERTBACK_NOT);
6608 found = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) ? &tmp : target;
6609 ccbegin = cc;
6610 cc += GET(cc, 1);
6611
6612 if (bra == OP_BRAMINZERO)
6613 {
6614 /* This is a braminzero backtrack path. */
6615 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6616 free_stack(common, 1);
6617 brajump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
6618 }
6619
6620 if (framesize < 0)
6621 {
6622 extrasize = 1;
6623 if (bra == OP_BRA && !assert_needs_str_ptr_saving(ccbegin + 1 + LINK_SIZE))
6624 extrasize = 0;
6625
6626 if (needs_control_head)
6627 extrasize++;
6628
6629 if (framesize == no_frame)
6630 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
6631
6632 if (extrasize > 0)
6633 allocate_stack(common, extrasize);
6634
6635 if (needs_control_head)
6636 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
6637
6638 if (extrasize > 0)
6639 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6640
6641 if (needs_control_head)
6642 {
6643 SLJIT_ASSERT(extrasize == 2);
6644 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
6645 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
6646 }
6647 }
6648 else
6649 {
6650 extrasize = needs_control_head ? 3 : 2;
6651 allocate_stack(common, framesize + extrasize);
6652
6653 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
6654 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw));
6655 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
6656 if (needs_control_head)
6657 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
6658 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6659 if (needs_control_head)
6660 {
6661 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
6662 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
6663 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
6664 }
6665 else
6666 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
6667
6668 init_frame(common, ccbegin, NULL, framesize + extrasize - 1, extrasize, FALSE);
6669 }
6670
6671 memset(&altbacktrack, 0, sizeof(backtrack_common));
6672 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6673 {
6674 /* Negative assert is stronger than positive assert. */
6675 common->local_exit = TRUE;
6676 common->quit_label = NULL;
6677 common->quit = NULL;
6678 common->positive_assert = FALSE;
6679 }
6680 else
6681 common->positive_assert = TRUE;
6682 common->positive_assert_quit = NULL;
6683
6684 while (1)
6685 {
6686 common->accept_label = NULL;
6687 common->accept = NULL;
6688 altbacktrack.top = NULL;
6689 altbacktrack.topbacktracks = NULL;
6690
6691 if (*ccbegin == OP_ALT && extrasize > 0)
6692 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6693
6694 altbacktrack.cc = ccbegin;
6695 compile_matchingpath(common, ccbegin + 1 + LINK_SIZE, cc, &altbacktrack);
6696 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6697 {
6698 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6699 {
6700 common->local_exit = save_local_exit;
6701 common->quit_label = save_quit_label;
6702 common->quit = save_quit;
6703 }
6704 common->positive_assert = save_positive_assert;
6705 common->then_trap = save_then_trap;
6706 common->accept_label = save_accept_label;
6707 common->positive_assert_quit = save_positive_assert_quit;
6708 common->accept = save_accept;
6709 return NULL;
6710 }
6711 common->accept_label = LABEL();
6712 if (common->accept != NULL)
6713 set_jumps(common->accept, common->accept_label);
6714
6715 /* Reset stack. */
6716 if (framesize < 0)
6717 {
6718 if (framesize == no_frame)
6719 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
6720 else if (extrasize > 0)
6721 free_stack(common, extrasize);
6722
6723 if (needs_control_head)
6724 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), 0);
6725 }
6726 else
6727 {
6728 if ((opcode != OP_ASSERT_NOT && opcode != OP_ASSERTBACK_NOT) || conditional)
6729 {
6730 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
6731 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
6732 if (needs_control_head)
6733 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), 0);
6734 }
6735 else
6736 {
6737 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
6738 if (needs_control_head)
6739 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), (framesize + 1) * sizeof(sljit_sw));
6740 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6741 }
6742 }
6743
6744 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6745 {
6746 /* We know that STR_PTR was stored on the top of the stack. */
6747 if (conditional)
6748 {
6749 if (extrasize > 0)
6750 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), needs_control_head ? sizeof(sljit_sw) : 0);
6751 }
6752 else if (bra == OP_BRAZERO)
6753 {
6754 if (framesize < 0)
6755 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 1) * sizeof(sljit_sw));
6756 else
6757 {
6758 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
6759 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (framesize + extrasize - 1) * sizeof(sljit_sw));
6760 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
6761 }
6762 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
6763 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6764 }
6765 else if (framesize >= 0)
6766 {
6767 /* For OP_BRA and OP_BRAMINZERO. */
6768 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
6769 }
6770 }
6771 add_jump(compiler, found, JUMP(SLJIT_JUMP));
6772
6773 compile_backtrackingpath(common, altbacktrack.top);
6774 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6775 {
6776 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6777 {
6778 common->local_exit = save_local_exit;
6779 common->quit_label = save_quit_label;
6780 common->quit = save_quit;
6781 }
6782 common->positive_assert = save_positive_assert;
6783 common->then_trap = save_then_trap;
6784 common->accept_label = save_accept_label;
6785 common->positive_assert_quit = save_positive_assert_quit;
6786 common->accept = save_accept;
6787