/[pcre]/code/trunk/pcre_jit_compile.c
ViewVC logotype

Contents of /code/trunk/pcre_jit_compile.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1700 - (show annotations)
Sun May 7 07:28:03 2017 UTC (2 years, 4 months ago) by zherczeg
File MIME type: text/plain
File size: 358854 byte(s)
JIT compiler update.
1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Written by Philip Hazel
9 Copyright (c) 1997-2013 University of Cambridge
10
11 The machine code generator part (this module) was written by Zoltan Herczeg
12 Copyright (c) 2010-2013
13
14 -----------------------------------------------------------------------------
15 Redistribution and use in source and binary forms, with or without
16 modification, are permitted provided that the following conditions are met:
17
18 * Redistributions of source code must retain the above copyright notice,
19 this list of conditions and the following disclaimer.
20
21 * Redistributions in binary form must reproduce the above copyright
22 notice, this list of conditions and the following disclaimer in the
23 documentation and/or other materials provided with the distribution.
24
25 * Neither the name of the University of Cambridge nor the names of its
26 contributors may be used to endorse or promote products derived from
27 this software without specific prior written permission.
28
29 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 POSSIBILITY OF SUCH DAMAGE.
40 -----------------------------------------------------------------------------
41 */
42
43 #ifdef HAVE_CONFIG_H
44 #include "config.h"
45 #endif
46
47 #include "pcre_internal.h"
48
49 #if defined SUPPORT_JIT
50
51 /* All-in-one: Since we use the JIT compiler only from here,
52 we just include it. This way we don't need to touch the build
53 system files. */
54
55 #define SLJIT_MALLOC(size, allocator_data) (PUBL(malloc))(size)
56 #define SLJIT_FREE(ptr, allocator_data) (PUBL(free))(ptr)
57 #define SLJIT_CONFIG_AUTO 1
58 #define SLJIT_CONFIG_STATIC 1
59 #define SLJIT_VERBOSE 0
60 #define SLJIT_DEBUG 0
61
62 #include "sljit/sljitLir.c"
63
64 #if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED
65 #error Unsupported architecture
66 #endif
67
68 /* Defines for debugging purposes. */
69
70 /* 1 - Use unoptimized capturing brackets.
71 2 - Enable capture_last_ptr (includes option 1). */
72 /* #define DEBUG_FORCE_UNOPTIMIZED_CBRAS 2 */
73
74 /* 1 - Always have a control head. */
75 /* #define DEBUG_FORCE_CONTROL_HEAD 1 */
76
77 /* Allocate memory for the regex stack on the real machine stack.
78 Fast, but limited size. */
79 #define MACHINE_STACK_SIZE 32768
80
81 /* Growth rate for stack allocated by the OS. Should be the multiply
82 of page size. */
83 #define STACK_GROWTH_RATE 8192
84
85 /* Enable to check that the allocation could destroy temporaries. */
86 #if defined SLJIT_DEBUG && SLJIT_DEBUG
87 #define DESTROY_REGISTERS 1
88 #endif
89
90 /*
91 Short summary about the backtracking mechanism empolyed by the jit code generator:
92
93 The code generator follows the recursive nature of the PERL compatible regular
94 expressions. The basic blocks of regular expressions are condition checkers
95 whose execute different commands depending on the result of the condition check.
96 The relationship between the operators can be horizontal (concatenation) and
97 vertical (sub-expression) (See struct backtrack_common for more details).
98
99 'ab' - 'a' and 'b' regexps are concatenated
100 'a+' - 'a' is the sub-expression of the '+' operator
101
102 The condition checkers are boolean (true/false) checkers. Machine code is generated
103 for the checker itself and for the actions depending on the result of the checker.
104 The 'true' case is called as the matching path (expected path), and the other is called as
105 the 'backtrack' path. Branch instructions are expesive for all CPUs, so we avoid taken
106 branches on the matching path.
107
108 Greedy star operator (*) :
109 Matching path: match happens.
110 Backtrack path: match failed.
111 Non-greedy star operator (*?) :
112 Matching path: no need to perform a match.
113 Backtrack path: match is required.
114
115 The following example shows how the code generated for a capturing bracket
116 with two alternatives. Let A, B, C, D are arbirary regular expressions, and
117 we have the following regular expression:
118
119 A(B|C)D
120
121 The generated code will be the following:
122
123 A matching path
124 '(' matching path (pushing arguments to the stack)
125 B matching path
126 ')' matching path (pushing arguments to the stack)
127 D matching path
128 return with successful match
129
130 D backtrack path
131 ')' backtrack path (If we arrived from "C" jump to the backtrack of "C")
132 B backtrack path
133 C expected path
134 jump to D matching path
135 C backtrack path
136 A backtrack path
137
138 Notice, that the order of backtrack code paths are the opposite of the fast
139 code paths. In this way the topmost value on the stack is always belong
140 to the current backtrack code path. The backtrack path must check
141 whether there is a next alternative. If so, it needs to jump back to
142 the matching path eventually. Otherwise it needs to clear out its own stack
143 frame and continue the execution on the backtrack code paths.
144 */
145
146 /*
147 Saved stack frames:
148
149 Atomic blocks and asserts require reloading the values of private data
150 when the backtrack mechanism performed. Because of OP_RECURSE, the data
151 are not necessarly known in compile time, thus we need a dynamic restore
152 mechanism.
153
154 The stack frames are stored in a chain list, and have the following format:
155 ([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]
156
157 Thus we can restore the private data to a particular point in the stack.
158 */
159
160 typedef struct jit_arguments {
161 /* Pointers first. */
162 struct sljit_stack *stack;
163 const pcre_uchar *str;
164 const pcre_uchar *begin;
165 const pcre_uchar *end;
166 int *offsets;
167 pcre_uchar *uchar_ptr;
168 pcre_uchar *mark_ptr;
169 void *callout_data;
170 /* Everything else after. */
171 sljit_u32 limit_match;
172 int real_offset_count;
173 int offset_count;
174 sljit_u8 notbol;
175 sljit_u8 noteol;
176 sljit_u8 notempty;
177 sljit_u8 notempty_atstart;
178 } jit_arguments;
179
180 typedef struct executable_functions {
181 void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES];
182 void *read_only_data_heads[JIT_NUMBER_OF_COMPILE_MODES];
183 sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];
184 PUBL(jit_callback) callback;
185 void *userdata;
186 sljit_u32 top_bracket;
187 sljit_u32 limit_match;
188 } executable_functions;
189
190 typedef struct jump_list {
191 struct sljit_jump *jump;
192 struct jump_list *next;
193 } jump_list;
194
195 typedef struct stub_list {
196 struct sljit_jump *start;
197 struct sljit_label *quit;
198 struct stub_list *next;
199 } stub_list;
200
201 typedef struct label_addr_list {
202 struct sljit_label *label;
203 sljit_uw *update_addr;
204 struct label_addr_list *next;
205 } label_addr_list;
206
207 enum frame_types {
208 no_frame = -1,
209 no_stack = -2
210 };
211
212 enum control_types {
213 type_mark = 0,
214 type_then_trap = 1
215 };
216
217 typedef int (SLJIT_CALL *jit_function)(jit_arguments *args);
218
219 /* The following structure is the key data type for the recursive
220 code generator. It is allocated by compile_matchingpath, and contains
221 the arguments for compile_backtrackingpath. Must be the first member
222 of its descendants. */
223 typedef struct backtrack_common {
224 /* Concatenation stack. */
225 struct backtrack_common *prev;
226 jump_list *nextbacktracks;
227 /* Internal stack (for component operators). */
228 struct backtrack_common *top;
229 jump_list *topbacktracks;
230 /* Opcode pointer. */
231 pcre_uchar *cc;
232 } backtrack_common;
233
234 typedef struct assert_backtrack {
235 backtrack_common common;
236 jump_list *condfailed;
237 /* Less than 0 if a frame is not needed. */
238 int framesize;
239 /* Points to our private memory word on the stack. */
240 int private_data_ptr;
241 /* For iterators. */
242 struct sljit_label *matchingpath;
243 } assert_backtrack;
244
245 typedef struct bracket_backtrack {
246 backtrack_common common;
247 /* Where to coninue if an alternative is successfully matched. */
248 struct sljit_label *alternative_matchingpath;
249 /* For rmin and rmax iterators. */
250 struct sljit_label *recursive_matchingpath;
251 /* For greedy ? operator. */
252 struct sljit_label *zero_matchingpath;
253 /* Contains the branches of a failed condition. */
254 union {
255 /* Both for OP_COND, OP_SCOND. */
256 jump_list *condfailed;
257 assert_backtrack *assert;
258 /* For OP_ONCE. Less than 0 if not needed. */
259 int framesize;
260 } u;
261 /* Points to our private memory word on the stack. */
262 int private_data_ptr;
263 } bracket_backtrack;
264
265 typedef struct bracketpos_backtrack {
266 backtrack_common common;
267 /* Points to our private memory word on the stack. */
268 int private_data_ptr;
269 /* Reverting stack is needed. */
270 int framesize;
271 /* Allocated stack size. */
272 int stacksize;
273 } bracketpos_backtrack;
274
275 typedef struct braminzero_backtrack {
276 backtrack_common common;
277 struct sljit_label *matchingpath;
278 } braminzero_backtrack;
279
280 typedef struct char_iterator_backtrack {
281 backtrack_common common;
282 /* Next iteration. */
283 struct sljit_label *matchingpath;
284 union {
285 jump_list *backtracks;
286 struct {
287 unsigned int othercasebit;
288 pcre_uchar chr;
289 BOOL enabled;
290 } charpos;
291 } u;
292 } char_iterator_backtrack;
293
294 typedef struct ref_iterator_backtrack {
295 backtrack_common common;
296 /* Next iteration. */
297 struct sljit_label *matchingpath;
298 } ref_iterator_backtrack;
299
300 typedef struct recurse_entry {
301 struct recurse_entry *next;
302 /* Contains the function entry. */
303 struct sljit_label *entry;
304 /* Collects the calls until the function is not created. */
305 jump_list *calls;
306 /* Points to the starting opcode. */
307 sljit_sw start;
308 } recurse_entry;
309
310 typedef struct recurse_backtrack {
311 backtrack_common common;
312 BOOL inlined_pattern;
313 } recurse_backtrack;
314
315 #define OP_THEN_TRAP OP_TABLE_LENGTH
316
317 typedef struct then_trap_backtrack {
318 backtrack_common common;
319 /* If then_trap is not NULL, this structure contains the real
320 then_trap for the backtracking path. */
321 struct then_trap_backtrack *then_trap;
322 /* Points to the starting opcode. */
323 sljit_sw start;
324 /* Exit point for the then opcodes of this alternative. */
325 jump_list *quit;
326 /* Frame size of the current alternative. */
327 int framesize;
328 } then_trap_backtrack;
329
330 #define MAX_RANGE_SIZE 4
331
332 typedef struct compiler_common {
333 /* The sljit ceneric compiler. */
334 struct sljit_compiler *compiler;
335 /* First byte code. */
336 pcre_uchar *start;
337 /* Maps private data offset to each opcode. */
338 sljit_s32 *private_data_ptrs;
339 /* Chain list of read-only data ptrs. */
340 void *read_only_data_head;
341 /* Tells whether the capturing bracket is optimized. */
342 sljit_u8 *optimized_cbracket;
343 /* Tells whether the starting offset is a target of then. */
344 sljit_u8 *then_offsets;
345 /* Current position where a THEN must jump. */
346 then_trap_backtrack *then_trap;
347 /* Starting offset of private data for capturing brackets. */
348 sljit_s32 cbra_ptr;
349 /* Output vector starting point. Must be divisible by 2. */
350 sljit_s32 ovector_start;
351 /* Points to the starting character of the current match. */
352 sljit_s32 start_ptr;
353 /* Last known position of the requested byte. */
354 sljit_s32 req_char_ptr;
355 /* Head of the last recursion. */
356 sljit_s32 recursive_head_ptr;
357 /* First inspected character for partial matching.
358 (Needed for avoiding zero length partial matches.) */
359 sljit_s32 start_used_ptr;
360 /* Starting pointer for partial soft matches. */
361 sljit_s32 hit_start;
362 /* Pointer of the match end position. */
363 sljit_s32 match_end_ptr;
364 /* Points to the marked string. */
365 sljit_s32 mark_ptr;
366 /* Recursive control verb management chain. */
367 sljit_s32 control_head_ptr;
368 /* Points to the last matched capture block index. */
369 sljit_s32 capture_last_ptr;
370 /* Fast forward skipping byte code pointer. */
371 pcre_uchar *fast_forward_bc_ptr;
372 /* Locals used by fast fail optimization. */
373 sljit_s32 fast_fail_start_ptr;
374 sljit_s32 fast_fail_end_ptr;
375
376 /* Flipped and lower case tables. */
377 const sljit_u8 *fcc;
378 sljit_sw lcc;
379 /* Mode can be PCRE_STUDY_JIT_COMPILE and others. */
380 int mode;
381 /* TRUE, when minlength is greater than 0. */
382 BOOL might_be_empty;
383 /* \K is found in the pattern. */
384 BOOL has_set_som;
385 /* (*SKIP:arg) is found in the pattern. */
386 BOOL has_skip_arg;
387 /* (*THEN) is found in the pattern. */
388 BOOL has_then;
389 /* (*SKIP) or (*SKIP:arg) is found in lookbehind assertion. */
390 BOOL has_skip_in_assert_back;
391 /* Currently in recurse or negative assert. */
392 BOOL local_exit;
393 /* Currently in a positive assert. */
394 BOOL positive_assert;
395 /* Newline control. */
396 int nltype;
397 sljit_u32 nlmax;
398 sljit_u32 nlmin;
399 int newline;
400 int bsr_nltype;
401 sljit_u32 bsr_nlmax;
402 sljit_u32 bsr_nlmin;
403 /* Dollar endonly. */
404 int endonly;
405 /* Tables. */
406 sljit_sw ctypes;
407 /* Named capturing brackets. */
408 pcre_uchar *name_table;
409 sljit_sw name_count;
410 sljit_sw name_entry_size;
411
412 /* Labels and jump lists. */
413 struct sljit_label *partialmatchlabel;
414 struct sljit_label *quit_label;
415 struct sljit_label *forced_quit_label;
416 struct sljit_label *accept_label;
417 struct sljit_label *ff_newline_shortcut;
418 stub_list *stubs;
419 label_addr_list *label_addrs;
420 recurse_entry *entries;
421 recurse_entry *currententry;
422 jump_list *partialmatch;
423 jump_list *quit;
424 jump_list *positive_assert_quit;
425 jump_list *forced_quit;
426 jump_list *accept;
427 jump_list *calllimit;
428 jump_list *stackalloc;
429 jump_list *revertframes;
430 jump_list *wordboundary;
431 jump_list *anynewline;
432 jump_list *hspace;
433 jump_list *vspace;
434 jump_list *casefulcmp;
435 jump_list *caselesscmp;
436 jump_list *reset_match;
437 BOOL jscript_compat;
438 #ifdef SUPPORT_UTF
439 BOOL utf;
440 #ifdef SUPPORT_UCP
441 BOOL use_ucp;
442 jump_list *getucd;
443 #endif
444 #ifdef COMPILE_PCRE8
445 jump_list *utfreadchar;
446 jump_list *utfreadchar16;
447 jump_list *utfreadtype8;
448 #endif
449 #endif /* SUPPORT_UTF */
450 } compiler_common;
451
452 /* For byte_sequence_compare. */
453
454 typedef struct compare_context {
455 int length;
456 int sourcereg;
457 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
458 int ucharptr;
459 union {
460 sljit_s32 asint;
461 sljit_u16 asushort;
462 #if defined COMPILE_PCRE8
463 sljit_u8 asbyte;
464 sljit_u8 asuchars[4];
465 #elif defined COMPILE_PCRE16
466 sljit_u16 asuchars[2];
467 #elif defined COMPILE_PCRE32
468 sljit_u32 asuchars[1];
469 #endif
470 } c;
471 union {
472 sljit_s32 asint;
473 sljit_u16 asushort;
474 #if defined COMPILE_PCRE8
475 sljit_u8 asbyte;
476 sljit_u8 asuchars[4];
477 #elif defined COMPILE_PCRE16
478 sljit_u16 asuchars[2];
479 #elif defined COMPILE_PCRE32
480 sljit_u32 asuchars[1];
481 #endif
482 } oc;
483 #endif
484 } compare_context;
485
486 /* Undefine sljit macros. */
487 #undef CMP
488
489 /* Used for accessing the elements of the stack. */
490 #define STACK(i) ((i) * (int)sizeof(sljit_sw))
491
492 #define TMP1 SLJIT_R0
493 #define TMP2 SLJIT_R2
494 #define TMP3 SLJIT_R3
495 #define STR_PTR SLJIT_S0
496 #define STR_END SLJIT_S1
497 #define STACK_TOP SLJIT_R1
498 #define STACK_LIMIT SLJIT_S2
499 #define COUNT_MATCH SLJIT_S3
500 #define ARGUMENTS SLJIT_S4
501 #define RETURN_ADDR SLJIT_R4
502
503 /* Local space layout. */
504 /* These two locals can be used by the current opcode. */
505 #define LOCALS0 (0 * sizeof(sljit_sw))
506 #define LOCALS1 (1 * sizeof(sljit_sw))
507 /* Two local variables for possessive quantifiers (char1 cannot use them). */
508 #define POSSESSIVE0 (2 * sizeof(sljit_sw))
509 #define POSSESSIVE1 (3 * sizeof(sljit_sw))
510 /* Max limit of recursions. */
511 #define LIMIT_MATCH (4 * sizeof(sljit_sw))
512 /* The output vector is stored on the stack, and contains pointers
513 to characters. The vector data is divided into two groups: the first
514 group contains the start / end character pointers, and the second is
515 the start pointers when the end of the capturing group has not yet reached. */
516 #define OVECTOR_START (common->ovector_start)
517 #define OVECTOR(i) (OVECTOR_START + (i) * (sljit_sw)sizeof(sljit_sw))
518 #define OVECTOR_PRIV(i) (common->cbra_ptr + (i) * (sljit_sw)sizeof(sljit_sw))
519 #define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start])
520
521 #if defined COMPILE_PCRE8
522 #define MOV_UCHAR SLJIT_MOV_U8
523 #define MOVU_UCHAR SLJIT_MOVU_U8
524 #elif defined COMPILE_PCRE16
525 #define MOV_UCHAR SLJIT_MOV_U16
526 #define MOVU_UCHAR SLJIT_MOVU_U16
527 #elif defined COMPILE_PCRE32
528 #define MOV_UCHAR SLJIT_MOV_U32
529 #define MOVU_UCHAR SLJIT_MOVU_U32
530 #else
531 #error Unsupported compiling mode
532 #endif
533
534 /* Shortcuts. */
535 #define DEFINE_COMPILER \
536 struct sljit_compiler *compiler = common->compiler
537 #define OP1(op, dst, dstw, src, srcw) \
538 sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw))
539 #define OP2(op, dst, dstw, src1, src1w, src2, src2w) \
540 sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w))
541 #define LABEL() \
542 sljit_emit_label(compiler)
543 #define JUMP(type) \
544 sljit_emit_jump(compiler, (type))
545 #define JUMPTO(type, label) \
546 sljit_set_label(sljit_emit_jump(compiler, (type)), (label))
547 #define JUMPHERE(jump) \
548 sljit_set_label((jump), sljit_emit_label(compiler))
549 #define SET_LABEL(jump, label) \
550 sljit_set_label((jump), (label))
551 #define CMP(type, src1, src1w, src2, src2w) \
552 sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))
553 #define CMPTO(type, src1, src1w, src2, src2w, label) \
554 sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))
555 #define OP_FLAGS(op, dst, dstw, type) \
556 sljit_emit_op_flags(compiler, (op), (dst), (dstw), (type))
557 #define GET_LOCAL_BASE(dst, dstw, offset) \
558 sljit_get_local_base(compiler, (dst), (dstw), (offset))
559
560 #define READ_CHAR_MAX 0x7fffffff
561
562 #define INVALID_UTF_CHAR 888
563
564 static pcre_uchar *bracketend(pcre_uchar *cc)
565 {
566 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
567 do cc += GET(cc, 1); while (*cc == OP_ALT);
568 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
569 cc += 1 + LINK_SIZE;
570 return cc;
571 }
572
573 static int no_alternatives(pcre_uchar *cc)
574 {
575 int count = 0;
576 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
577 do
578 {
579 cc += GET(cc, 1);
580 count++;
581 }
582 while (*cc == OP_ALT);
583 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
584 return count;
585 }
586
587 /* Functions whose might need modification for all new supported opcodes:
588 next_opcode
589 check_opcode_types
590 set_private_data_ptrs
591 get_framesize
592 init_frame
593 get_private_data_copy_length
594 copy_private_data
595 compile_matchingpath
596 compile_backtrackingpath
597 */
598
599 static pcre_uchar *next_opcode(compiler_common *common, pcre_uchar *cc)
600 {
601 SLJIT_UNUSED_ARG(common);
602 switch(*cc)
603 {
604 case OP_SOD:
605 case OP_SOM:
606 case OP_SET_SOM:
607 case OP_NOT_WORD_BOUNDARY:
608 case OP_WORD_BOUNDARY:
609 case OP_NOT_DIGIT:
610 case OP_DIGIT:
611 case OP_NOT_WHITESPACE:
612 case OP_WHITESPACE:
613 case OP_NOT_WORDCHAR:
614 case OP_WORDCHAR:
615 case OP_ANY:
616 case OP_ALLANY:
617 case OP_NOTPROP:
618 case OP_PROP:
619 case OP_ANYNL:
620 case OP_NOT_HSPACE:
621 case OP_HSPACE:
622 case OP_NOT_VSPACE:
623 case OP_VSPACE:
624 case OP_EXTUNI:
625 case OP_EODN:
626 case OP_EOD:
627 case OP_CIRC:
628 case OP_CIRCM:
629 case OP_DOLL:
630 case OP_DOLLM:
631 case OP_CRSTAR:
632 case OP_CRMINSTAR:
633 case OP_CRPLUS:
634 case OP_CRMINPLUS:
635 case OP_CRQUERY:
636 case OP_CRMINQUERY:
637 case OP_CRRANGE:
638 case OP_CRMINRANGE:
639 case OP_CRPOSSTAR:
640 case OP_CRPOSPLUS:
641 case OP_CRPOSQUERY:
642 case OP_CRPOSRANGE:
643 case OP_CLASS:
644 case OP_NCLASS:
645 case OP_REF:
646 case OP_REFI:
647 case OP_DNREF:
648 case OP_DNREFI:
649 case OP_RECURSE:
650 case OP_CALLOUT:
651 case OP_ALT:
652 case OP_KET:
653 case OP_KETRMAX:
654 case OP_KETRMIN:
655 case OP_KETRPOS:
656 case OP_REVERSE:
657 case OP_ASSERT:
658 case OP_ASSERT_NOT:
659 case OP_ASSERTBACK:
660 case OP_ASSERTBACK_NOT:
661 case OP_ONCE:
662 case OP_ONCE_NC:
663 case OP_BRA:
664 case OP_BRAPOS:
665 case OP_CBRA:
666 case OP_CBRAPOS:
667 case OP_COND:
668 case OP_SBRA:
669 case OP_SBRAPOS:
670 case OP_SCBRA:
671 case OP_SCBRAPOS:
672 case OP_SCOND:
673 case OP_CREF:
674 case OP_DNCREF:
675 case OP_RREF:
676 case OP_DNRREF:
677 case OP_DEF:
678 case OP_BRAZERO:
679 case OP_BRAMINZERO:
680 case OP_BRAPOSZERO:
681 case OP_PRUNE:
682 case OP_SKIP:
683 case OP_THEN:
684 case OP_COMMIT:
685 case OP_FAIL:
686 case OP_ACCEPT:
687 case OP_ASSERT_ACCEPT:
688 case OP_CLOSE:
689 case OP_SKIPZERO:
690 return cc + PRIV(OP_lengths)[*cc];
691
692 case OP_CHAR:
693 case OP_CHARI:
694 case OP_NOT:
695 case OP_NOTI:
696 case OP_STAR:
697 case OP_MINSTAR:
698 case OP_PLUS:
699 case OP_MINPLUS:
700 case OP_QUERY:
701 case OP_MINQUERY:
702 case OP_UPTO:
703 case OP_MINUPTO:
704 case OP_EXACT:
705 case OP_POSSTAR:
706 case OP_POSPLUS:
707 case OP_POSQUERY:
708 case OP_POSUPTO:
709 case OP_STARI:
710 case OP_MINSTARI:
711 case OP_PLUSI:
712 case OP_MINPLUSI:
713 case OP_QUERYI:
714 case OP_MINQUERYI:
715 case OP_UPTOI:
716 case OP_MINUPTOI:
717 case OP_EXACTI:
718 case OP_POSSTARI:
719 case OP_POSPLUSI:
720 case OP_POSQUERYI:
721 case OP_POSUPTOI:
722 case OP_NOTSTAR:
723 case OP_NOTMINSTAR:
724 case OP_NOTPLUS:
725 case OP_NOTMINPLUS:
726 case OP_NOTQUERY:
727 case OP_NOTMINQUERY:
728 case OP_NOTUPTO:
729 case OP_NOTMINUPTO:
730 case OP_NOTEXACT:
731 case OP_NOTPOSSTAR:
732 case OP_NOTPOSPLUS:
733 case OP_NOTPOSQUERY:
734 case OP_NOTPOSUPTO:
735 case OP_NOTSTARI:
736 case OP_NOTMINSTARI:
737 case OP_NOTPLUSI:
738 case OP_NOTMINPLUSI:
739 case OP_NOTQUERYI:
740 case OP_NOTMINQUERYI:
741 case OP_NOTUPTOI:
742 case OP_NOTMINUPTOI:
743 case OP_NOTEXACTI:
744 case OP_NOTPOSSTARI:
745 case OP_NOTPOSPLUSI:
746 case OP_NOTPOSQUERYI:
747 case OP_NOTPOSUPTOI:
748 cc += PRIV(OP_lengths)[*cc];
749 #ifdef SUPPORT_UTF
750 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
751 #endif
752 return cc;
753
754 /* Special cases. */
755 case OP_TYPESTAR:
756 case OP_TYPEMINSTAR:
757 case OP_TYPEPLUS:
758 case OP_TYPEMINPLUS:
759 case OP_TYPEQUERY:
760 case OP_TYPEMINQUERY:
761 case OP_TYPEUPTO:
762 case OP_TYPEMINUPTO:
763 case OP_TYPEEXACT:
764 case OP_TYPEPOSSTAR:
765 case OP_TYPEPOSPLUS:
766 case OP_TYPEPOSQUERY:
767 case OP_TYPEPOSUPTO:
768 return cc + PRIV(OP_lengths)[*cc] - 1;
769
770 case OP_ANYBYTE:
771 #ifdef SUPPORT_UTF
772 if (common->utf) return NULL;
773 #endif
774 return cc + 1;
775
776 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
777 case OP_XCLASS:
778 return cc + GET(cc, 1);
779 #endif
780
781 case OP_MARK:
782 case OP_PRUNE_ARG:
783 case OP_SKIP_ARG:
784 case OP_THEN_ARG:
785 return cc + 1 + 2 + cc[1];
786
787 default:
788 /* All opcodes are supported now! */
789 SLJIT_UNREACHABLE();
790 return NULL;
791 }
792 }
793
794 static BOOL check_opcode_types(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
795 {
796 int count;
797 pcre_uchar *slot;
798 pcre_uchar *assert_back_end = cc - 1;
799
800 /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
801 while (cc < ccend)
802 {
803 switch(*cc)
804 {
805 case OP_SET_SOM:
806 common->has_set_som = TRUE;
807 common->might_be_empty = TRUE;
808 cc += 1;
809 break;
810
811 case OP_REF:
812 case OP_REFI:
813 common->optimized_cbracket[GET2(cc, 1)] = 0;
814 cc += 1 + IMM2_SIZE;
815 break;
816
817 case OP_CBRAPOS:
818 case OP_SCBRAPOS:
819 common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] = 0;
820 cc += 1 + LINK_SIZE + IMM2_SIZE;
821 break;
822
823 case OP_COND:
824 case OP_SCOND:
825 /* Only AUTO_CALLOUT can insert this opcode. We do
826 not intend to support this case. */
827 if (cc[1 + LINK_SIZE] == OP_CALLOUT)
828 return FALSE;
829 cc += 1 + LINK_SIZE;
830 break;
831
832 case OP_CREF:
833 common->optimized_cbracket[GET2(cc, 1)] = 0;
834 cc += 1 + IMM2_SIZE;
835 break;
836
837 case OP_DNREF:
838 case OP_DNREFI:
839 case OP_DNCREF:
840 count = GET2(cc, 1 + IMM2_SIZE);
841 slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
842 while (count-- > 0)
843 {
844 common->optimized_cbracket[GET2(slot, 0)] = 0;
845 slot += common->name_entry_size;
846 }
847 cc += 1 + 2 * IMM2_SIZE;
848 break;
849
850 case OP_RECURSE:
851 /* Set its value only once. */
852 if (common->recursive_head_ptr == 0)
853 {
854 common->recursive_head_ptr = common->ovector_start;
855 common->ovector_start += sizeof(sljit_sw);
856 }
857 cc += 1 + LINK_SIZE;
858 break;
859
860 case OP_CALLOUT:
861 if (common->capture_last_ptr == 0)
862 {
863 common->capture_last_ptr = common->ovector_start;
864 common->ovector_start += sizeof(sljit_sw);
865 }
866 cc += 2 + 2 * LINK_SIZE;
867 break;
868
869 case OP_ASSERTBACK:
870 slot = bracketend(cc);
871 if (slot > assert_back_end)
872 assert_back_end = slot;
873 cc += 1 + LINK_SIZE;
874 break;
875
876 case OP_THEN_ARG:
877 common->has_then = TRUE;
878 common->control_head_ptr = 1;
879 /* Fall through. */
880
881 case OP_PRUNE_ARG:
882 case OP_MARK:
883 if (common->mark_ptr == 0)
884 {
885 common->mark_ptr = common->ovector_start;
886 common->ovector_start += sizeof(sljit_sw);
887 }
888 cc += 1 + 2 + cc[1];
889 break;
890
891 case OP_THEN:
892 common->has_then = TRUE;
893 common->control_head_ptr = 1;
894 cc += 1;
895 break;
896
897 case OP_SKIP:
898 if (cc < assert_back_end)
899 common->has_skip_in_assert_back = TRUE;
900 cc += 1;
901 break;
902
903 case OP_SKIP_ARG:
904 common->control_head_ptr = 1;
905 common->has_skip_arg = TRUE;
906 if (cc < assert_back_end)
907 common->has_skip_in_assert_back = TRUE;
908 cc += 1 + 2 + cc[1];
909 break;
910
911 default:
912 cc = next_opcode(common, cc);
913 if (cc == NULL)
914 return FALSE;
915 break;
916 }
917 }
918 return TRUE;
919 }
920
921 static BOOL is_accelerated_repeat(pcre_uchar *cc)
922 {
923 switch(*cc)
924 {
925 case OP_TYPESTAR:
926 case OP_TYPEMINSTAR:
927 case OP_TYPEPLUS:
928 case OP_TYPEMINPLUS:
929 case OP_TYPEPOSSTAR:
930 case OP_TYPEPOSPLUS:
931 return (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI);
932
933 case OP_STAR:
934 case OP_MINSTAR:
935 case OP_PLUS:
936 case OP_MINPLUS:
937 case OP_POSSTAR:
938 case OP_POSPLUS:
939
940 case OP_STARI:
941 case OP_MINSTARI:
942 case OP_PLUSI:
943 case OP_MINPLUSI:
944 case OP_POSSTARI:
945 case OP_POSPLUSI:
946
947 case OP_NOTSTAR:
948 case OP_NOTMINSTAR:
949 case OP_NOTPLUS:
950 case OP_NOTMINPLUS:
951 case OP_NOTPOSSTAR:
952 case OP_NOTPOSPLUS:
953
954 case OP_NOTSTARI:
955 case OP_NOTMINSTARI:
956 case OP_NOTPLUSI:
957 case OP_NOTMINPLUSI:
958 case OP_NOTPOSSTARI:
959 case OP_NOTPOSPLUSI:
960 return TRUE;
961
962 case OP_CLASS:
963 case OP_NCLASS:
964 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
965 case OP_XCLASS:
966 cc += (*cc == OP_XCLASS) ? GET(cc, 1) : (int)(1 + (32 / sizeof(pcre_uchar)));
967 #else
968 cc += (1 + (32 / sizeof(pcre_uchar)));
969 #endif
970
971 switch(*cc)
972 {
973 case OP_CRSTAR:
974 case OP_CRMINSTAR:
975 case OP_CRPLUS:
976 case OP_CRMINPLUS:
977 case OP_CRPOSSTAR:
978 case OP_CRPOSPLUS:
979 return TRUE;
980 }
981 break;
982 }
983 return FALSE;
984 }
985
986 static SLJIT_INLINE BOOL detect_fast_forward_skip(compiler_common *common, int *private_data_start)
987 {
988 pcre_uchar *cc = common->start;
989 pcre_uchar *end;
990
991 /* Skip not repeated brackets. */
992 while (TRUE)
993 {
994 switch(*cc)
995 {
996 case OP_SOD:
997 case OP_SOM:
998 case OP_SET_SOM:
999 case OP_NOT_WORD_BOUNDARY:
1000 case OP_WORD_BOUNDARY:
1001 case OP_EODN:
1002 case OP_EOD:
1003 case OP_CIRC:
1004 case OP_CIRCM:
1005 case OP_DOLL:
1006 case OP_DOLLM:
1007 /* Zero width assertions. */
1008 cc++;
1009 continue;
1010 }
1011
1012 if (*cc != OP_BRA && *cc != OP_CBRA)
1013 break;
1014
1015 end = cc + GET(cc, 1);
1016 if (*end != OP_KET || PRIVATE_DATA(end) != 0)
1017 return FALSE;
1018 if (*cc == OP_CBRA)
1019 {
1020 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1021 return FALSE;
1022 cc += IMM2_SIZE;
1023 }
1024 cc += 1 + LINK_SIZE;
1025 }
1026
1027 if (is_accelerated_repeat(cc))
1028 {
1029 common->fast_forward_bc_ptr = cc;
1030 common->private_data_ptrs[(cc + 1) - common->start] = *private_data_start;
1031 *private_data_start += sizeof(sljit_sw);
1032 return TRUE;
1033 }
1034 return FALSE;
1035 }
1036
1037 static SLJIT_INLINE void detect_fast_fail(compiler_common *common, pcre_uchar *cc, int *private_data_start, sljit_s32 depth)
1038 {
1039 pcre_uchar *next_alt;
1040
1041 SLJIT_ASSERT(*cc == OP_BRA || *cc == OP_CBRA);
1042
1043 if (*cc == OP_CBRA && common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1044 return;
1045
1046 next_alt = bracketend(cc) - (1 + LINK_SIZE);
1047 if (*next_alt != OP_KET || PRIVATE_DATA(next_alt) != 0)
1048 return;
1049
1050 do
1051 {
1052 next_alt = cc + GET(cc, 1);
1053
1054 cc += 1 + LINK_SIZE + ((*cc == OP_CBRA) ? IMM2_SIZE : 0);
1055
1056 while (TRUE)
1057 {
1058 switch(*cc)
1059 {
1060 case OP_SOD:
1061 case OP_SOM:
1062 case OP_SET_SOM:
1063 case OP_NOT_WORD_BOUNDARY:
1064 case OP_WORD_BOUNDARY:
1065 case OP_EODN:
1066 case OP_EOD:
1067 case OP_CIRC:
1068 case OP_CIRCM:
1069 case OP_DOLL:
1070 case OP_DOLLM:
1071 /* Zero width assertions. */
1072 cc++;
1073 continue;
1074 }
1075 break;
1076 }
1077
1078 if (depth > 0 && (*cc == OP_BRA || *cc == OP_CBRA))
1079 detect_fast_fail(common, cc, private_data_start, depth - 1);
1080
1081 if (is_accelerated_repeat(cc))
1082 {
1083 common->private_data_ptrs[(cc + 1) - common->start] = *private_data_start;
1084
1085 if (common->fast_fail_start_ptr == 0)
1086 common->fast_fail_start_ptr = *private_data_start;
1087
1088 *private_data_start += sizeof(sljit_sw);
1089 common->fast_fail_end_ptr = *private_data_start;
1090
1091 if (*private_data_start > SLJIT_MAX_LOCAL_SIZE)
1092 return;
1093 }
1094
1095 cc = next_alt;
1096 }
1097 while (*cc == OP_ALT);
1098 }
1099
1100 static int get_class_iterator_size(pcre_uchar *cc)
1101 {
1102 sljit_u32 min;
1103 sljit_u32 max;
1104 switch(*cc)
1105 {
1106 case OP_CRSTAR:
1107 case OP_CRPLUS:
1108 return 2;
1109
1110 case OP_CRMINSTAR:
1111 case OP_CRMINPLUS:
1112 case OP_CRQUERY:
1113 case OP_CRMINQUERY:
1114 return 1;
1115
1116 case OP_CRRANGE:
1117 case OP_CRMINRANGE:
1118 min = GET2(cc, 1);
1119 max = GET2(cc, 1 + IMM2_SIZE);
1120 if (max == 0)
1121 return (*cc == OP_CRRANGE) ? 2 : 1;
1122 max -= min;
1123 if (max > 2)
1124 max = 2;
1125 return max;
1126
1127 default:
1128 return 0;
1129 }
1130 }
1131
1132 static BOOL detect_repeat(compiler_common *common, pcre_uchar *begin)
1133 {
1134 pcre_uchar *end = bracketend(begin);
1135 pcre_uchar *next;
1136 pcre_uchar *next_end;
1137 pcre_uchar *max_end;
1138 pcre_uchar type;
1139 sljit_sw length = end - begin;
1140 int min, max, i;
1141
1142 /* Detect fixed iterations first. */
1143 if (end[-(1 + LINK_SIZE)] != OP_KET)
1144 return FALSE;
1145
1146 /* Already detected repeat. */
1147 if (common->private_data_ptrs[end - common->start - LINK_SIZE] != 0)
1148 return TRUE;
1149
1150 next = end;
1151 min = 1;
1152 while (1)
1153 {
1154 if (*next != *begin)
1155 break;
1156 next_end = bracketend(next);
1157 if (next_end - next != length || memcmp(begin, next, IN_UCHARS(length)) != 0)
1158 break;
1159 next = next_end;
1160 min++;
1161 }
1162
1163 if (min == 2)
1164 return FALSE;
1165
1166 max = 0;
1167 max_end = next;
1168 if (*next == OP_BRAZERO || *next == OP_BRAMINZERO)
1169 {
1170 type = *next;
1171 while (1)
1172 {
1173 if (next[0] != type || next[1] != OP_BRA || next[2 + LINK_SIZE] != *begin)
1174 break;
1175 next_end = bracketend(next + 2 + LINK_SIZE);
1176 if (next_end - next != (length + 2 + LINK_SIZE) || memcmp(begin, next + 2 + LINK_SIZE, IN_UCHARS(length)) != 0)
1177 break;
1178 next = next_end;
1179 max++;
1180 }
1181
1182 if (next[0] == type && next[1] == *begin && max >= 1)
1183 {
1184 next_end = bracketend(next + 1);
1185 if (next_end - next == (length + 1) && memcmp(begin, next + 1, IN_UCHARS(length)) == 0)
1186 {
1187 for (i = 0; i < max; i++, next_end += 1 + LINK_SIZE)
1188 if (*next_end != OP_KET)
1189 break;
1190
1191 if (i == max)
1192 {
1193 common->private_data_ptrs[max_end - common->start - LINK_SIZE] = next_end - max_end;
1194 common->private_data_ptrs[max_end - common->start - LINK_SIZE + 1] = (type == OP_BRAZERO) ? OP_UPTO : OP_MINUPTO;
1195 /* +2 the original and the last. */
1196 common->private_data_ptrs[max_end - common->start - LINK_SIZE + 2] = max + 2;
1197 if (min == 1)
1198 return TRUE;
1199 min--;
1200 max_end -= (1 + LINK_SIZE) + GET(max_end, -LINK_SIZE);
1201 }
1202 }
1203 }
1204 }
1205
1206 if (min >= 3)
1207 {
1208 common->private_data_ptrs[end - common->start - LINK_SIZE] = max_end - end;
1209 common->private_data_ptrs[end - common->start - LINK_SIZE + 1] = OP_EXACT;
1210 common->private_data_ptrs[end - common->start - LINK_SIZE + 2] = min;
1211 return TRUE;
1212 }
1213
1214 return FALSE;
1215 }
1216
1217 #define CASE_ITERATOR_PRIVATE_DATA_1 \
1218 case OP_MINSTAR: \
1219 case OP_MINPLUS: \
1220 case OP_QUERY: \
1221 case OP_MINQUERY: \
1222 case OP_MINSTARI: \
1223 case OP_MINPLUSI: \
1224 case OP_QUERYI: \
1225 case OP_MINQUERYI: \
1226 case OP_NOTMINSTAR: \
1227 case OP_NOTMINPLUS: \
1228 case OP_NOTQUERY: \
1229 case OP_NOTMINQUERY: \
1230 case OP_NOTMINSTARI: \
1231 case OP_NOTMINPLUSI: \
1232 case OP_NOTQUERYI: \
1233 case OP_NOTMINQUERYI:
1234
1235 #define CASE_ITERATOR_PRIVATE_DATA_2A \
1236 case OP_STAR: \
1237 case OP_PLUS: \
1238 case OP_STARI: \
1239 case OP_PLUSI: \
1240 case OP_NOTSTAR: \
1241 case OP_NOTPLUS: \
1242 case OP_NOTSTARI: \
1243 case OP_NOTPLUSI:
1244
1245 #define CASE_ITERATOR_PRIVATE_DATA_2B \
1246 case OP_UPTO: \
1247 case OP_MINUPTO: \
1248 case OP_UPTOI: \
1249 case OP_MINUPTOI: \
1250 case OP_NOTUPTO: \
1251 case OP_NOTMINUPTO: \
1252 case OP_NOTUPTOI: \
1253 case OP_NOTMINUPTOI:
1254
1255 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_1 \
1256 case OP_TYPEMINSTAR: \
1257 case OP_TYPEMINPLUS: \
1258 case OP_TYPEQUERY: \
1259 case OP_TYPEMINQUERY:
1260
1261 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2A \
1262 case OP_TYPESTAR: \
1263 case OP_TYPEPLUS:
1264
1265 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2B \
1266 case OP_TYPEUPTO: \
1267 case OP_TYPEMINUPTO:
1268
1269 static void set_private_data_ptrs(compiler_common *common, int *private_data_start, pcre_uchar *ccend)
1270 {
1271 pcre_uchar *cc = common->start;
1272 pcre_uchar *alternative;
1273 pcre_uchar *end = NULL;
1274 int private_data_ptr = *private_data_start;
1275 int space, size, bracketlen;
1276 BOOL repeat_check = TRUE;
1277
1278 while (cc < ccend)
1279 {
1280 space = 0;
1281 size = 0;
1282 bracketlen = 0;
1283 if (private_data_ptr > SLJIT_MAX_LOCAL_SIZE)
1284 break;
1285
1286 if (repeat_check && (*cc == OP_ONCE || *cc == OP_ONCE_NC || *cc == OP_BRA || *cc == OP_CBRA || *cc == OP_COND))
1287 {
1288 if (detect_repeat(common, cc))
1289 {
1290 /* These brackets are converted to repeats, so no global
1291 based single character repeat is allowed. */
1292 if (cc >= end)
1293 end = bracketend(cc);
1294 }
1295 }
1296 repeat_check = TRUE;
1297
1298 switch(*cc)
1299 {
1300 case OP_KET:
1301 if (common->private_data_ptrs[cc + 1 - common->start] != 0)
1302 {
1303 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1304 private_data_ptr += sizeof(sljit_sw);
1305 cc += common->private_data_ptrs[cc + 1 - common->start];
1306 }
1307 cc += 1 + LINK_SIZE;
1308 break;
1309
1310 case OP_ASSERT:
1311 case OP_ASSERT_NOT:
1312 case OP_ASSERTBACK:
1313 case OP_ASSERTBACK_NOT:
1314 case OP_ONCE:
1315 case OP_ONCE_NC:
1316 case OP_BRAPOS:
1317 case OP_SBRA:
1318 case OP_SBRAPOS:
1319 case OP_SCOND:
1320 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1321 private_data_ptr += sizeof(sljit_sw);
1322 bracketlen = 1 + LINK_SIZE;
1323 break;
1324
1325 case OP_CBRAPOS:
1326 case OP_SCBRAPOS:
1327 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1328 private_data_ptr += sizeof(sljit_sw);
1329 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1330 break;
1331
1332 case OP_COND:
1333 /* Might be a hidden SCOND. */
1334 alternative = cc + GET(cc, 1);
1335 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1336 {
1337 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1338 private_data_ptr += sizeof(sljit_sw);
1339 }
1340 bracketlen = 1 + LINK_SIZE;
1341 break;
1342
1343 case OP_BRA:
1344 bracketlen = 1 + LINK_SIZE;
1345 break;
1346
1347 case OP_CBRA:
1348 case OP_SCBRA:
1349 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1350 break;
1351
1352 case OP_BRAZERO:
1353 case OP_BRAMINZERO:
1354 case OP_BRAPOSZERO:
1355 repeat_check = FALSE;
1356 size = 1;
1357 break;
1358
1359 CASE_ITERATOR_PRIVATE_DATA_1
1360 space = 1;
1361 size = -2;
1362 break;
1363
1364 CASE_ITERATOR_PRIVATE_DATA_2A
1365 space = 2;
1366 size = -2;
1367 break;
1368
1369 CASE_ITERATOR_PRIVATE_DATA_2B
1370 space = 2;
1371 size = -(2 + IMM2_SIZE);
1372 break;
1373
1374 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1375 space = 1;
1376 size = 1;
1377 break;
1378
1379 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1380 if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
1381 space = 2;
1382 size = 1;
1383 break;
1384
1385 case OP_TYPEUPTO:
1386 if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
1387 space = 2;
1388 size = 1 + IMM2_SIZE;
1389 break;
1390
1391 case OP_TYPEMINUPTO:
1392 space = 2;
1393 size = 1 + IMM2_SIZE;
1394 break;
1395
1396 case OP_CLASS:
1397 case OP_NCLASS:
1398 space = get_class_iterator_size(cc + size);
1399 size = 1 + 32 / sizeof(pcre_uchar);
1400 break;
1401
1402 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1403 case OP_XCLASS:
1404 space = get_class_iterator_size(cc + size);
1405 size = GET(cc, 1);
1406 break;
1407 #endif
1408
1409 default:
1410 cc = next_opcode(common, cc);
1411 SLJIT_ASSERT(cc != NULL);
1412 break;
1413 }
1414
1415 /* Character iterators, which are not inside a repeated bracket,
1416 gets a private slot instead of allocating it on the stack. */
1417 if (space > 0 && cc >= end)
1418 {
1419 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1420 private_data_ptr += sizeof(sljit_sw) * space;
1421 }
1422
1423 if (size != 0)
1424 {
1425 if (size < 0)
1426 {
1427 cc += -size;
1428 #ifdef SUPPORT_UTF
1429 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1430 #endif
1431 }
1432 else
1433 cc += size;
1434 }
1435
1436 if (bracketlen > 0)
1437 {
1438 if (cc >= end)
1439 {
1440 end = bracketend(cc);
1441 if (end[-1 - LINK_SIZE] == OP_KET)
1442 end = NULL;
1443 }
1444 cc += bracketlen;
1445 }
1446 }
1447 *private_data_start = private_data_ptr;
1448 }
1449
1450 /* Returns with a frame_types (always < 0) if no need for frame. */
1451 static int get_framesize(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL recursive, BOOL *needs_control_head)
1452 {
1453 int length = 0;
1454 int possessive = 0;
1455 BOOL stack_restore = FALSE;
1456 BOOL setsom_found = recursive;
1457 BOOL setmark_found = recursive;
1458 /* The last capture is a local variable even for recursions. */
1459 BOOL capture_last_found = FALSE;
1460
1461 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
1462 SLJIT_ASSERT(common->control_head_ptr != 0);
1463 *needs_control_head = TRUE;
1464 #else
1465 *needs_control_head = FALSE;
1466 #endif
1467
1468 if (ccend == NULL)
1469 {
1470 ccend = bracketend(cc) - (1 + LINK_SIZE);
1471 if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))
1472 {
1473 possessive = length = (common->capture_last_ptr != 0) ? 5 : 3;
1474 /* This is correct regardless of common->capture_last_ptr. */
1475 capture_last_found = TRUE;
1476 }
1477 cc = next_opcode(common, cc);
1478 }
1479
1480 SLJIT_ASSERT(cc != NULL);
1481 while (cc < ccend)
1482 switch(*cc)
1483 {
1484 case OP_SET_SOM:
1485 SLJIT_ASSERT(common->has_set_som);
1486 stack_restore = TRUE;
1487 if (!setsom_found)
1488 {
1489 length += 2;
1490 setsom_found = TRUE;
1491 }
1492 cc += 1;
1493 break;
1494
1495 case OP_MARK:
1496 case OP_PRUNE_ARG:
1497 case OP_THEN_ARG:
1498 SLJIT_ASSERT(common->mark_ptr != 0);
1499 stack_restore = TRUE;
1500 if (!setmark_found)
1501 {
1502 length += 2;
1503 setmark_found = TRUE;
1504 }
1505 if (common->control_head_ptr != 0)
1506 *needs_control_head = TRUE;
1507 cc += 1 + 2 + cc[1];
1508 break;
1509
1510 case OP_RECURSE:
1511 stack_restore = TRUE;
1512 if (common->has_set_som && !setsom_found)
1513 {
1514 length += 2;
1515 setsom_found = TRUE;
1516 }
1517 if (common->mark_ptr != 0 && !setmark_found)
1518 {
1519 length += 2;
1520 setmark_found = TRUE;
1521 }
1522 if (common->capture_last_ptr != 0 && !capture_last_found)
1523 {
1524 length += 2;
1525 capture_last_found = TRUE;
1526 }
1527 cc += 1 + LINK_SIZE;
1528 break;
1529
1530 case OP_CBRA:
1531 case OP_CBRAPOS:
1532 case OP_SCBRA:
1533 case OP_SCBRAPOS:
1534 stack_restore = TRUE;
1535 if (common->capture_last_ptr != 0 && !capture_last_found)
1536 {
1537 length += 2;
1538 capture_last_found = TRUE;
1539 }
1540 length += 3;
1541 cc += 1 + LINK_SIZE + IMM2_SIZE;
1542 break;
1543
1544 case OP_THEN:
1545 stack_restore = TRUE;
1546 if (common->control_head_ptr != 0)
1547 *needs_control_head = TRUE;
1548 cc ++;
1549 break;
1550
1551 default:
1552 stack_restore = TRUE;
1553 /* Fall through. */
1554
1555 case OP_NOT_WORD_BOUNDARY:
1556 case OP_WORD_BOUNDARY:
1557 case OP_NOT_DIGIT:
1558 case OP_DIGIT:
1559 case OP_NOT_WHITESPACE:
1560 case OP_WHITESPACE:
1561 case OP_NOT_WORDCHAR:
1562 case OP_WORDCHAR:
1563 case OP_ANY:
1564 case OP_ALLANY:
1565 case OP_ANYBYTE:
1566 case OP_NOTPROP:
1567 case OP_PROP:
1568 case OP_ANYNL:
1569 case OP_NOT_HSPACE:
1570 case OP_HSPACE:
1571 case OP_NOT_VSPACE:
1572 case OP_VSPACE:
1573 case OP_EXTUNI:
1574 case OP_EODN:
1575 case OP_EOD:
1576 case OP_CIRC:
1577 case OP_CIRCM:
1578 case OP_DOLL:
1579 case OP_DOLLM:
1580 case OP_CHAR:
1581 case OP_CHARI:
1582 case OP_NOT:
1583 case OP_NOTI:
1584
1585 case OP_EXACT:
1586 case OP_POSSTAR:
1587 case OP_POSPLUS:
1588 case OP_POSQUERY:
1589 case OP_POSUPTO:
1590
1591 case OP_EXACTI:
1592 case OP_POSSTARI:
1593 case OP_POSPLUSI:
1594 case OP_POSQUERYI:
1595 case OP_POSUPTOI:
1596
1597 case OP_NOTEXACT:
1598 case OP_NOTPOSSTAR:
1599 case OP_NOTPOSPLUS:
1600 case OP_NOTPOSQUERY:
1601 case OP_NOTPOSUPTO:
1602
1603 case OP_NOTEXACTI:
1604 case OP_NOTPOSSTARI:
1605 case OP_NOTPOSPLUSI:
1606 case OP_NOTPOSQUERYI:
1607 case OP_NOTPOSUPTOI:
1608
1609 case OP_TYPEEXACT:
1610 case OP_TYPEPOSSTAR:
1611 case OP_TYPEPOSPLUS:
1612 case OP_TYPEPOSQUERY:
1613 case OP_TYPEPOSUPTO:
1614
1615 case OP_CLASS:
1616 case OP_NCLASS:
1617 case OP_XCLASS:
1618 case OP_CALLOUT:
1619
1620 cc = next_opcode(common, cc);
1621 SLJIT_ASSERT(cc != NULL);
1622 break;
1623 }
1624
1625 /* Possessive quantifiers can use a special case. */
1626 if (SLJIT_UNLIKELY(possessive == length))
1627 return stack_restore ? no_frame : no_stack;
1628
1629 if (length > 0)
1630 return length + 1;
1631 return stack_restore ? no_frame : no_stack;
1632 }
1633
1634 static void init_frame(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, int stackpos, int stacktop, BOOL recursive)
1635 {
1636 DEFINE_COMPILER;
1637 BOOL setsom_found = recursive;
1638 BOOL setmark_found = recursive;
1639 /* The last capture is a local variable even for recursions. */
1640 BOOL capture_last_found = FALSE;
1641 int offset;
1642
1643 /* >= 1 + shortest item size (2) */
1644 SLJIT_UNUSED_ARG(stacktop);
1645 SLJIT_ASSERT(stackpos >= stacktop + 2);
1646
1647 stackpos = STACK(stackpos);
1648 if (ccend == NULL)
1649 {
1650 ccend = bracketend(cc) - (1 + LINK_SIZE);
1651 if (recursive || (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS))
1652 cc = next_opcode(common, cc);
1653 }
1654
1655 SLJIT_ASSERT(cc != NULL);
1656 while (cc < ccend)
1657 switch(*cc)
1658 {
1659 case OP_SET_SOM:
1660 SLJIT_ASSERT(common->has_set_som);
1661 if (!setsom_found)
1662 {
1663 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
1664 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1665 stackpos -= (int)sizeof(sljit_sw);
1666 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1667 stackpos -= (int)sizeof(sljit_sw);
1668 setsom_found = TRUE;
1669 }
1670 cc += 1;
1671 break;
1672
1673 case OP_MARK:
1674 case OP_PRUNE_ARG:
1675 case OP_THEN_ARG:
1676 SLJIT_ASSERT(common->mark_ptr != 0);
1677 if (!setmark_found)
1678 {
1679 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
1680 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1681 stackpos -= (int)sizeof(sljit_sw);
1682 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1683 stackpos -= (int)sizeof(sljit_sw);
1684 setmark_found = TRUE;
1685 }
1686 cc += 1 + 2 + cc[1];
1687 break;
1688
1689 case OP_RECURSE:
1690 if (common->has_set_som && !setsom_found)
1691 {
1692 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
1693 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1694 stackpos -= (int)sizeof(sljit_sw);
1695 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1696 stackpos -= (int)sizeof(sljit_sw);
1697 setsom_found = TRUE;
1698 }
1699 if (common->mark_ptr != 0 && !setmark_found)
1700 {
1701 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
1702 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1703 stackpos -= (int)sizeof(sljit_sw);
1704 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1705 stackpos -= (int)sizeof(sljit_sw);
1706 setmark_found = TRUE;
1707 }
1708 if (common->capture_last_ptr != 0 && !capture_last_found)
1709 {
1710 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
1711 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1712 stackpos -= (int)sizeof(sljit_sw);
1713 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1714 stackpos -= (int)sizeof(sljit_sw);
1715 capture_last_found = TRUE;
1716 }
1717 cc += 1 + LINK_SIZE;
1718 break;
1719
1720 case OP_CBRA:
1721 case OP_CBRAPOS:
1722 case OP_SCBRA:
1723 case OP_SCBRAPOS:
1724 if (common->capture_last_ptr != 0 && !capture_last_found)
1725 {
1726 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
1727 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1728 stackpos -= (int)sizeof(sljit_sw);
1729 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1730 stackpos -= (int)sizeof(sljit_sw);
1731 capture_last_found = TRUE;
1732 }
1733 offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
1734 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));
1735 stackpos -= (int)sizeof(sljit_sw);
1736 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
1737 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
1738 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1739 stackpos -= (int)sizeof(sljit_sw);
1740 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
1741 stackpos -= (int)sizeof(sljit_sw);
1742
1743 cc += 1 + LINK_SIZE + IMM2_SIZE;
1744 break;
1745
1746 default:
1747 cc = next_opcode(common, cc);
1748 SLJIT_ASSERT(cc != NULL);
1749 break;
1750 }
1751
1752 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, 0);
1753 SLJIT_ASSERT(stackpos == STACK(stacktop));
1754 }
1755
1756 static SLJIT_INLINE int get_private_data_copy_length(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL needs_control_head)
1757 {
1758 int private_data_length = needs_control_head ? 3 : 2;
1759 int size;
1760 pcre_uchar *alternative;
1761 /* Calculate the sum of the private machine words. */
1762 while (cc < ccend)
1763 {
1764 size = 0;
1765 switch(*cc)
1766 {
1767 case OP_KET:
1768 if (PRIVATE_DATA(cc) != 0)
1769 {
1770 private_data_length++;
1771 SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0);
1772 cc += PRIVATE_DATA(cc + 1);
1773 }
1774 cc += 1 + LINK_SIZE;
1775 break;
1776
1777 case OP_ASSERT:
1778 case OP_ASSERT_NOT:
1779 case OP_ASSERTBACK:
1780 case OP_ASSERTBACK_NOT:
1781 case OP_ONCE:
1782 case OP_ONCE_NC:
1783 case OP_BRAPOS:
1784 case OP_SBRA:
1785 case OP_SBRAPOS:
1786 case OP_SCOND:
1787 private_data_length++;
1788 SLJIT_ASSERT(PRIVATE_DATA(cc) != 0);
1789 cc += 1 + LINK_SIZE;
1790 break;
1791
1792 case OP_CBRA:
1793 case OP_SCBRA:
1794 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1795 private_data_length++;
1796 cc += 1 + LINK_SIZE + IMM2_SIZE;
1797 break;
1798
1799 case OP_CBRAPOS:
1800 case OP_SCBRAPOS:
1801 private_data_length += 2;
1802 cc += 1 + LINK_SIZE + IMM2_SIZE;
1803 break;
1804
1805 case OP_COND:
1806 /* Might be a hidden SCOND. */
1807 alternative = cc + GET(cc, 1);
1808 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1809 private_data_length++;
1810 cc += 1 + LINK_SIZE;
1811 break;
1812
1813 CASE_ITERATOR_PRIVATE_DATA_1
1814 if (PRIVATE_DATA(cc))
1815 private_data_length++;
1816 cc += 2;
1817 #ifdef SUPPORT_UTF
1818 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1819 #endif
1820 break;
1821
1822 CASE_ITERATOR_PRIVATE_DATA_2A
1823 if (PRIVATE_DATA(cc))
1824 private_data_length += 2;
1825 cc += 2;
1826 #ifdef SUPPORT_UTF
1827 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1828 #endif
1829 break;
1830
1831 CASE_ITERATOR_PRIVATE_DATA_2B
1832 if (PRIVATE_DATA(cc))
1833 private_data_length += 2;
1834 cc += 2 + IMM2_SIZE;
1835 #ifdef SUPPORT_UTF
1836 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1837 #endif
1838 break;
1839
1840 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1841 if (PRIVATE_DATA(cc))
1842 private_data_length++;
1843 cc += 1;
1844 break;
1845
1846 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1847 if (PRIVATE_DATA(cc))
1848 private_data_length += 2;
1849 cc += 1;
1850 break;
1851
1852 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1853 if (PRIVATE_DATA(cc))
1854 private_data_length += 2;
1855 cc += 1 + IMM2_SIZE;
1856 break;
1857
1858 case OP_CLASS:
1859 case OP_NCLASS:
1860 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1861 case OP_XCLASS:
1862 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1863 #else
1864 size = 1 + 32 / (int)sizeof(pcre_uchar);
1865 #endif
1866 if (PRIVATE_DATA(cc))
1867 private_data_length += get_class_iterator_size(cc + size);
1868 cc += size;
1869 break;
1870
1871 default:
1872 cc = next_opcode(common, cc);
1873 SLJIT_ASSERT(cc != NULL);
1874 break;
1875 }
1876 }
1877 SLJIT_ASSERT(cc == ccend);
1878 return private_data_length;
1879 }
1880
1881 static void copy_private_data(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend,
1882 BOOL save, int stackptr, int stacktop, BOOL needs_control_head)
1883 {
1884 DEFINE_COMPILER;
1885 int srcw[2];
1886 int count, size;
1887 BOOL tmp1next = TRUE;
1888 BOOL tmp1empty = TRUE;
1889 BOOL tmp2empty = TRUE;
1890 pcre_uchar *alternative;
1891 enum {
1892 loop,
1893 end
1894 } status;
1895
1896 status = loop;
1897 stackptr = STACK(stackptr);
1898 stacktop = STACK(stacktop - 1);
1899
1900 if (!save)
1901 {
1902 stacktop -= (needs_control_head ? 2 : 1) * sizeof(sljit_sw);
1903 if (stackptr < stacktop)
1904 {
1905 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1906 stackptr += sizeof(sljit_sw);
1907 tmp1empty = FALSE;
1908 }
1909 if (stackptr < stacktop)
1910 {
1911 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1912 stackptr += sizeof(sljit_sw);
1913 tmp2empty = FALSE;
1914 }
1915 /* The tmp1next must be TRUE in either way. */
1916 }
1917
1918 SLJIT_ASSERT(common->recursive_head_ptr != 0);
1919
1920 do
1921 {
1922 count = 0;
1923 if (cc >= ccend)
1924 {
1925 if (!save)
1926 break;
1927
1928 count = 1;
1929 srcw[0] = common->recursive_head_ptr;
1930 if (needs_control_head)
1931 {
1932 SLJIT_ASSERT(common->control_head_ptr != 0);
1933 count = 2;
1934 srcw[0] = common->control_head_ptr;
1935 srcw[1] = common->recursive_head_ptr;
1936 }
1937 status = end;
1938 }
1939 else switch(*cc)
1940 {
1941 case OP_KET:
1942 if (PRIVATE_DATA(cc) != 0)
1943 {
1944 count = 1;
1945 srcw[0] = PRIVATE_DATA(cc);
1946 SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0);
1947 cc += PRIVATE_DATA(cc + 1);
1948 }
1949 cc += 1 + LINK_SIZE;
1950 break;
1951
1952 case OP_ASSERT:
1953 case OP_ASSERT_NOT:
1954 case OP_ASSERTBACK:
1955 case OP_ASSERTBACK_NOT:
1956 case OP_ONCE:
1957 case OP_ONCE_NC:
1958 case OP_BRAPOS:
1959 case OP_SBRA:
1960 case OP_SBRAPOS:
1961 case OP_SCOND:
1962 count = 1;
1963 srcw[0] = PRIVATE_DATA(cc);
1964 SLJIT_ASSERT(srcw[0] != 0);
1965 cc += 1 + LINK_SIZE;
1966 break;
1967
1968 case OP_CBRA:
1969 case OP_SCBRA:
1970 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1971 {
1972 count = 1;
1973 srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1974 }
1975 cc += 1 + LINK_SIZE + IMM2_SIZE;
1976 break;
1977
1978 case OP_CBRAPOS:
1979 case OP_SCBRAPOS:
1980 count = 2;
1981 srcw[0] = PRIVATE_DATA(cc);
1982 srcw[1] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1983 SLJIT_ASSERT(srcw[0] != 0 && srcw[1] != 0);
1984 cc += 1 + LINK_SIZE + IMM2_SIZE;
1985 break;
1986
1987 case OP_COND:
1988 /* Might be a hidden SCOND. */
1989 alternative = cc + GET(cc, 1);
1990 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1991 {
1992 count = 1;
1993 srcw[0] = PRIVATE_DATA(cc);
1994 SLJIT_ASSERT(srcw[0] != 0);
1995 }
1996 cc += 1 + LINK_SIZE;
1997 break;
1998
1999 CASE_ITERATOR_PRIVATE_DATA_1
2000 if (PRIVATE_DATA(cc))
2001 {
2002 count = 1;
2003 srcw[0] = PRIVATE_DATA(cc);
2004 }
2005 cc += 2;
2006 #ifdef SUPPORT_UTF
2007 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2008 #endif
2009 break;
2010
2011 CASE_ITERATOR_PRIVATE_DATA_2A
2012 if (PRIVATE_DATA(cc))
2013 {
2014 count = 2;
2015 srcw[0] = PRIVATE_DATA(cc);
2016 srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
2017 }
2018 cc += 2;
2019 #ifdef SUPPORT_UTF
2020 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2021 #endif
2022 break;
2023
2024 CASE_ITERATOR_PRIVATE_DATA_2B
2025 if (PRIVATE_DATA(cc))
2026 {
2027 count = 2;
2028 srcw[0] = PRIVATE_DATA(cc);
2029 srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
2030 }
2031 cc += 2 + IMM2_SIZE;
2032 #ifdef SUPPORT_UTF
2033 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2034 #endif
2035 break;
2036
2037 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
2038 if (PRIVATE_DATA(cc))
2039 {
2040 count = 1;
2041 srcw[0] = PRIVATE_DATA(cc);
2042 }
2043 cc += 1;
2044 break;
2045
2046 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
2047 if (PRIVATE_DATA(cc))
2048 {
2049 count = 2;
2050 srcw[0] = PRIVATE_DATA(cc);
2051 srcw[1] = srcw[0] + sizeof(sljit_sw);
2052 }
2053 cc += 1;
2054 break;
2055
2056 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
2057 if (PRIVATE_DATA(cc))
2058 {
2059 count = 2;
2060 srcw[0] = PRIVATE_DATA(cc);
2061 srcw[1] = srcw[0] + sizeof(sljit_sw);
2062 }
2063 cc += 1 + IMM2_SIZE;
2064 break;
2065
2066 case OP_CLASS:
2067 case OP_NCLASS:
2068 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
2069 case OP_XCLASS:
2070 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
2071 #else
2072 size = 1 + 32 / (int)sizeof(pcre_uchar);
2073 #endif
2074 if (PRIVATE_DATA(cc))
2075 switch(get_class_iterator_size(cc + size))
2076 {
2077 case 1:
2078 count = 1;
2079 srcw[0] = PRIVATE_DATA(cc);
2080 break;
2081
2082 case 2:
2083 count = 2;
2084 srcw[0] = PRIVATE_DATA(cc);
2085 srcw[1] = srcw[0] + sizeof(sljit_sw);
2086 break;
2087
2088 default:
2089 SLJIT_UNREACHABLE();
2090 break;
2091 }
2092 cc += size;
2093 break;
2094
2095 default:
2096 cc = next_opcode(common, cc);
2097 SLJIT_ASSERT(cc != NULL);
2098 break;
2099 }
2100
2101 while (count > 0)
2102 {
2103 count--;
2104 if (save)
2105 {
2106 if (tmp1next)
2107 {
2108 if (!tmp1empty)
2109 {
2110 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
2111 stackptr += sizeof(sljit_sw);
2112 }
2113 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), srcw[count]);
2114 tmp1empty = FALSE;
2115 tmp1next = FALSE;
2116 }
2117 else
2118 {
2119 if (!tmp2empty)
2120 {
2121 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
2122 stackptr += sizeof(sljit_sw);
2123 }
2124 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), srcw[count]);
2125 tmp2empty = FALSE;
2126 tmp1next = TRUE;
2127 }
2128 }
2129 else
2130 {
2131 if (tmp1next)
2132 {
2133 SLJIT_ASSERT(!tmp1empty);
2134 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), srcw[count], TMP1, 0);
2135 tmp1empty = stackptr >= stacktop;
2136 if (!tmp1empty)
2137 {
2138 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
2139 stackptr += sizeof(sljit_sw);
2140 }
2141 tmp1next = FALSE;
2142 }
2143 else
2144 {
2145 SLJIT_ASSERT(!tmp2empty);
2146 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), srcw[count], TMP2, 0);
2147 tmp2empty = stackptr >= stacktop;
2148 if (!tmp2empty)
2149 {
2150 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
2151 stackptr += sizeof(sljit_sw);
2152 }
2153 tmp1next = TRUE;
2154 }
2155 }
2156 }
2157 }
2158 while (status != end);
2159
2160 if (save)
2161 {
2162 if (tmp1next)
2163 {
2164 if (!tmp1empty)
2165 {
2166 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
2167 stackptr += sizeof(sljit_sw);
2168 }
2169 if (!tmp2empty)
2170 {
2171 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
2172 stackptr += sizeof(sljit_sw);
2173 }
2174 }
2175 else
2176 {
2177 if (!tmp2empty)
2178 {
2179 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
2180 stackptr += sizeof(sljit_sw);
2181 }
2182 if (!tmp1empty)
2183 {
2184 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
2185 stackptr += sizeof(sljit_sw);
2186 }
2187 }
2188 }
2189 SLJIT_ASSERT(cc == ccend && stackptr == stacktop && (save || (tmp1empty && tmp2empty)));
2190 }
2191
2192 static SLJIT_INLINE pcre_uchar *set_then_offsets(compiler_common *common, pcre_uchar *cc, sljit_u8 *current_offset)
2193 {
2194 pcre_uchar *end = bracketend(cc);
2195 BOOL has_alternatives = cc[GET(cc, 1)] == OP_ALT;
2196
2197 /* Assert captures then. */
2198 if (*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT)
2199 current_offset = NULL;
2200 /* Conditional block does not. */
2201 if (*cc == OP_COND || *cc == OP_SCOND)
2202 has_alternatives = FALSE;
2203
2204 cc = next_opcode(common, cc);
2205 if (has_alternatives)
2206 current_offset = common->then_offsets + (cc - common->start);
2207
2208 while (cc < end)
2209 {
2210 if ((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND))
2211 cc = set_then_offsets(common, cc, current_offset);
2212 else
2213 {
2214 if (*cc == OP_ALT && has_alternatives)
2215 current_offset = common->then_offsets + (cc + 1 + LINK_SIZE - common->start);
2216 if (*cc >= OP_THEN && *cc <= OP_THEN_ARG && current_offset != NULL)
2217 *current_offset = 1;
2218 cc = next_opcode(common, cc);
2219 }
2220 }
2221
2222 return end;
2223 }
2224
2225 #undef CASE_ITERATOR_PRIVATE_DATA_1
2226 #undef CASE_ITERATOR_PRIVATE_DATA_2A
2227 #undef CASE_ITERATOR_PRIVATE_DATA_2B
2228 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_1
2229 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
2230 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
2231
2232 static SLJIT_INLINE BOOL is_powerof2(unsigned int value)
2233 {
2234 return (value & (value - 1)) == 0;
2235 }
2236
2237 static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label)
2238 {
2239 while (list)
2240 {
2241 /* sljit_set_label is clever enough to do nothing
2242 if either the jump or the label is NULL. */
2243 SET_LABEL(list->jump, label);
2244 list = list->next;
2245 }
2246 }
2247
2248 static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump *jump)
2249 {
2250 jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list));
2251 if (list_item)
2252 {
2253 list_item->next = *list;
2254 list_item->jump = jump;
2255 *list = list_item;
2256 }
2257 }
2258
2259 static void add_stub(compiler_common *common, struct sljit_jump *start)
2260 {
2261 DEFINE_COMPILER;
2262 stub_list *list_item = sljit_alloc_memory(compiler, sizeof(stub_list));
2263
2264 if (list_item)
2265 {
2266 list_item->start = start;
2267 list_item->quit = LABEL();
2268 list_item->next = common->stubs;
2269 common->stubs = list_item;
2270 }
2271 }
2272
2273 static void flush_stubs(compiler_common *common)
2274 {
2275 DEFINE_COMPILER;
2276 stub_list *list_item = common->stubs;
2277
2278 while (list_item)
2279 {
2280 JUMPHERE(list_item->start);
2281 add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));
2282 JUMPTO(SLJIT_JUMP, list_item->quit);
2283 list_item = list_item->next;
2284 }
2285 common->stubs = NULL;
2286 }
2287
2288 static void add_label_addr(compiler_common *common, sljit_uw *update_addr)
2289 {
2290 DEFINE_COMPILER;
2291 label_addr_list *label_addr;
2292
2293 label_addr = sljit_alloc_memory(compiler, sizeof(label_addr_list));
2294 if (label_addr == NULL)
2295 return;
2296 label_addr->label = LABEL();
2297 label_addr->update_addr = update_addr;
2298 label_addr->next = common->label_addrs;
2299 common->label_addrs = label_addr;
2300 }
2301
2302 static SLJIT_INLINE void count_match(compiler_common *common)
2303 {
2304 DEFINE_COMPILER;
2305
2306 OP2(SLJIT_SUB | SLJIT_SET_Z, COUNT_MATCH, 0, COUNT_MATCH, 0, SLJIT_IMM, 1);
2307 add_jump(compiler, &common->calllimit, JUMP(SLJIT_ZERO));
2308 }
2309
2310 static SLJIT_INLINE void allocate_stack(compiler_common *common, int size)
2311 {
2312 /* May destroy all locals and registers except TMP2. */
2313 DEFINE_COMPILER;
2314
2315 SLJIT_ASSERT(size > 0);
2316 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
2317 #ifdef DESTROY_REGISTERS
2318 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);
2319 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
2320 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
2321 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, TMP1, 0);
2322 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP1, 0);
2323 #endif
2324 add_stub(common, CMP(SLJIT_LESS, STACK_TOP, 0, STACK_LIMIT, 0));
2325 }
2326
2327 static SLJIT_INLINE void free_stack(compiler_common *common, int size)
2328 {
2329 DEFINE_COMPILER;
2330
2331 SLJIT_ASSERT(size > 0);
2332 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
2333 }
2334
2335 static sljit_uw * allocate_read_only_data(compiler_common *common, sljit_uw size)
2336 {
2337 DEFINE_COMPILER;
2338 sljit_uw *result;
2339
2340 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
2341 return NULL;
2342
2343 result = (sljit_uw *)SLJIT_MALLOC(size + sizeof(sljit_uw), compiler->allocator_data);
2344 if (SLJIT_UNLIKELY(result == NULL))
2345 {
2346 sljit_set_compiler_memory_error(compiler);
2347 return NULL;
2348 }
2349
2350 *(void**)result = common->read_only_data_head;
2351 common->read_only_data_head = (void *)result;
2352 return result + 1;
2353 }
2354
2355 static void free_read_only_data(void *current, void *allocator_data)
2356 {
2357 void *next;
2358
2359 SLJIT_UNUSED_ARG(allocator_data);
2360
2361 while (current != NULL)
2362 {
2363 next = *(void**)current;
2364 SLJIT_FREE(current, allocator_data);
2365 current = next;
2366 }
2367 }
2368
2369 static SLJIT_INLINE void reset_ovector(compiler_common *common, int length)
2370 {
2371 DEFINE_COMPILER;
2372 struct sljit_label *loop;
2373 int i;
2374
2375 /* At this point we can freely use all temporary registers. */
2376 SLJIT_ASSERT(length > 1);
2377 /* TMP1 returns with begin - 1. */
2378 OP2(SLJIT_SUB, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_S0), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));
2379 if (length < 8)
2380 {
2381 for (i = 1; i < length; i++)
2382 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), SLJIT_R0, 0);
2383 }
2384 else
2385 {
2386 GET_LOCAL_BASE(SLJIT_R1, 0, OVECTOR_START);
2387 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, length - 1);
2388 loop = LABEL();
2389 OP1(SLJIT_MOVU, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw), SLJIT_R0, 0);
2390 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, 1);
2391 JUMPTO(SLJIT_NOT_ZERO, loop);
2392 }
2393 }
2394
2395 static SLJIT_INLINE void reset_fast_fail(compiler_common *common)
2396 {
2397 DEFINE_COMPILER;
2398 sljit_s32 i;
2399
2400 SLJIT_ASSERT(common->fast_fail_start_ptr < common->fast_fail_end_ptr);
2401
2402 OP2(SLJIT_SUB, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2403 for (i = common->fast_fail_start_ptr; i < common->fast_fail_end_ptr; i += sizeof(sljit_sw))
2404 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), i, TMP1, 0);
2405 }
2406
2407 static SLJIT_INLINE void do_reset_match(compiler_common *common, int length)
2408 {
2409 DEFINE_COMPILER;
2410 struct sljit_label *loop;
2411 int i;
2412
2413 SLJIT_ASSERT(length > 1);
2414 /* OVECTOR(1) contains the "string begin - 1" constant. */
2415 if (length > 2)
2416 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
2417 if (length < 8)
2418 {
2419 for (i = 2; i < length; i++)
2420 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), TMP1, 0);
2421 }
2422 else
2423 {
2424 GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + sizeof(sljit_sw));
2425 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
2426 loop = LABEL();
2427 OP1(SLJIT_MOVU, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
2428 OP2(SLJIT_SUB | SLJIT_SET_Z, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
2429 JUMPTO(SLJIT_NOT_ZERO, loop);
2430 }
2431
2432 OP1(SLJIT_MOV, STACK_TOP, 0, ARGUMENTS, 0);
2433 if (common->mark_ptr != 0)
2434 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, SLJIT_IMM, 0);
2435 if (common->control_head_ptr != 0)
2436 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
2437 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(jit_arguments, stack));
2438 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
2439 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(struct sljit_stack, base));
2440 }
2441
2442 static sljit_sw SLJIT_CALL do_search_mark(sljit_sw *current, const pcre_uchar *skip_arg)
2443 {
2444 while (current != NULL)
2445 {
2446 switch (current[1])
2447 {
2448 case type_then_trap:
2449 break;
2450
2451 case type_mark:
2452 if (STRCMP_UC_UC(skip_arg, (pcre_uchar *)current[2]) == 0)
2453 return current[3];
2454 break;
2455
2456 default:
2457 SLJIT_UNREACHABLE();
2458 break;
2459 }
2460 SLJIT_ASSERT(current[0] == 0 || current < (sljit_sw*)current[0]);
2461 current = (sljit_sw*)current[0];
2462 }
2463 return -1;
2464 }
2465
2466 static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)
2467 {
2468 DEFINE_COMPILER;
2469 struct sljit_label *loop;
2470 struct sljit_jump *early_quit;
2471
2472 /* At this point we can freely use all registers. */
2473 OP1(SLJIT_MOV, SLJIT_S2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
2474 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(1), STR_PTR, 0);
2475
2476 OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0);
2477 if (common->mark_ptr != 0)
2478 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
2479 OP1(SLJIT_MOV_S32, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, offset_count));
2480 if (common->mark_ptr != 0)
2481 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_R2, 0);
2482 OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, offsets), SLJIT_IMM, sizeof(int));
2483 OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, begin));
2484 GET_LOCAL_BASE(SLJIT_S0, 0, OVECTOR_START);
2485 /* Unlikely, but possible */
2486 early_quit = CMP(SLJIT_EQUAL, SLJIT_R1, 0, SLJIT_IMM, 0);
2487 loop = LABEL();
2488 OP2(SLJIT_SUB, SLJIT_S1, 0, SLJIT_MEM1(SLJIT_S0), 0, SLJIT_R0, 0);
2489 OP2(SLJIT_ADD, SLJIT_S0, 0, SLJIT_S0, 0, SLJIT_IMM, sizeof(sljit_sw));
2490 /* Copy the integer value to the output buffer */
2491 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2492 OP2(SLJIT_ASHR, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_IMM, UCHAR_SHIFT);
2493 #endif
2494 OP1(SLJIT_MOVU_S32, SLJIT_MEM1(SLJIT_R2), sizeof(int), SLJIT_S1, 0);
2495 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
2496 JUMPTO(SLJIT_NOT_ZERO, loop);
2497 JUMPHERE(early_quit);
2498
2499 /* Calculate the return value, which is the maximum ovector value. */
2500 if (topbracket > 1)
2501 {
2502 GET_LOCAL_BASE(SLJIT_R0, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_sw));
2503 OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, topbracket + 1);
2504
2505 /* OVECTOR(0) is never equal to SLJIT_S2. */
2506 loop = LABEL();
2507 OP1(SLJIT_MOVU, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), -(2 * (sljit_sw)sizeof(sljit_sw)));
2508 OP2(SLJIT_SUB, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
2509 CMPTO(SLJIT_EQUAL, SLJIT_R2, 0, SLJIT_S2, 0, loop);
2510 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_R1, 0);
2511 }
2512 else
2513 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
2514 }
2515
2516 static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *quit)
2517 {
2518 DEFINE_COMPILER;
2519 struct sljit_jump *jump;
2520
2521 SLJIT_COMPILE_ASSERT(STR_END == SLJIT_S1, str_end_must_be_saved_reg2);
2522 SLJIT_ASSERT(common->start_used_ptr != 0 && common->start_ptr != 0
2523 && (common->mode == JIT_PARTIAL_SOFT_COMPILE ? common->hit_start != 0 : common->hit_start == 0));
2524
2525 OP1(SLJIT_MOV, SLJIT_R1, 0, ARGUMENTS, 0);
2526 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_PARTIAL);
2527 OP1(SLJIT_MOV_S32, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, real_offset_count));
2528 CMPTO(SLJIT_SIG_LESS, SLJIT_R2, 0, SLJIT_IMM, 2, quit);
2529
2530 /* Store match begin and end. */
2531 OP1(SLJIT_MOV, SLJIT_S0, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, begin));
2532 OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, offsets));
2533
2534 jump = CMP(SLJIT_SIG_LESS, SLJIT_R2, 0, SLJIT_IMM, 3);
2535 OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_ptr : (common->hit_start + (int)sizeof(sljit_sw)), SLJIT_S0, 0);
2536 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2537 OP2(SLJIT_ASHR, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, UCHAR_SHIFT);
2538 #endif
2539 OP1(SLJIT_MOV_S32, SLJIT_MEM1(SLJIT_R1), 2 * sizeof(int), SLJIT_R2, 0);
2540 JUMPHERE(jump);
2541
2542 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_used_ptr : common->hit_start);
2543 OP2(SLJIT_SUB, SLJIT_S1, 0, STR_END, 0, SLJIT_S0, 0);
2544 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2545 OP2(SLJIT_ASHR, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_IMM, UCHAR_SHIFT);
2546 #endif
2547 OP1(SLJIT_MOV_S32, SLJIT_MEM1(SLJIT_R1), sizeof(int), SLJIT_S1, 0);
2548
2549 OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_S0, 0);
2550 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2551 OP2(SLJIT_ASHR, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, UCHAR_SHIFT);
2552 #endif
2553 OP1(SLJIT_MOV_S32, SLJIT_MEM1(SLJIT_R1), 0, SLJIT_R2, 0);
2554
2555 JUMPTO(SLJIT_JUMP, quit);
2556 }
2557
2558 static SLJIT_INLINE void check_start_used_ptr(compiler_common *common)
2559 {
2560 /* May destroy TMP1. */
2561 DEFINE_COMPILER;
2562 struct sljit_jump *jump;
2563
2564 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2565 {
2566 /* The value of -1 must be kept for start_used_ptr! */
2567 OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, 1);
2568 /* Jumps if start_used_ptr < STR_PTR, or start_used_ptr == -1. Although overwriting
2569 is not necessary if start_used_ptr == STR_PTR, it does not hurt as well. */
2570 jump = CMP(SLJIT_LESS_EQUAL, TMP1, 0, STR_PTR, 0);
2571 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2572 JUMPHERE(jump);
2573 }
2574 else if (common->mode == JIT_PARTIAL_HARD_COMPILE)
2575 {
2576 jump = CMP(SLJIT_LESS_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2577 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2578 JUMPHERE(jump);
2579 }
2580 }
2581
2582 static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, pcre_uchar *cc)
2583 {
2584 /* Detects if the character has an othercase. */
2585 unsigned int c;
2586
2587 #ifdef SUPPORT_UTF
2588 if (common->utf)
2589 {
2590 GETCHAR(c, cc);
2591 if (c > 127)
2592 {
2593 #ifdef SUPPORT_UCP
2594 return c != UCD_OTHERCASE(c);
2595 #else
2596 return FALSE;
2597 #endif
2598 }
2599 #ifndef COMPILE_PCRE8
2600 return common->fcc[c] != c;
2601 #endif
2602 }
2603 else
2604 #endif
2605 c = *cc;
2606 return MAX_255(c) ? common->fcc[c] != c : FALSE;
2607 }
2608
2609 static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)
2610 {
2611 /* Returns with the othercase. */
2612 #ifdef SUPPORT_UTF
2613 if (common->utf && c > 127)
2614 {
2615 #ifdef SUPPORT_UCP
2616 return UCD_OTHERCASE(c);
2617 #else
2618 return c;
2619 #endif
2620 }
2621 #endif
2622 return TABLE_GET(c, common->fcc, c);
2623 }
2624
2625 static unsigned int char_get_othercase_bit(compiler_common *common, pcre_uchar *cc)
2626 {
2627 /* Detects if the character and its othercase has only 1 bit difference. */
2628 unsigned int c, oc, bit;
2629 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2630 int n;
2631 #endif
2632
2633 #ifdef SUPPORT_UTF
2634 if (common->utf)
2635 {
2636 GETCHAR(c, cc);
2637 if (c <= 127)
2638 oc = common->fcc[c];
2639 else
2640 {
2641 #ifdef SUPPORT_UCP
2642 oc = UCD_OTHERCASE(c);
2643 #else
2644 oc = c;
2645 #endif
2646 }
2647 }
2648 else
2649 {
2650 c = *cc;
2651 oc = TABLE_GET(c, common->fcc, c);
2652 }
2653 #else
2654 c = *cc;
2655 oc = TABLE_GET(c, common->fcc, c);
2656 #endif
2657
2658 SLJIT_ASSERT(c != oc);
2659
2660 bit = c ^ oc;
2661 /* Optimized for English alphabet. */
2662 if (c <= 127 && bit == 0x20)
2663 return (0 << 8) | 0x20;
2664
2665 /* Since c != oc, they must have at least 1 bit difference. */
2666 if (!is_powerof2(bit))
2667 return 0;
2668
2669 #if defined COMPILE_PCRE8
2670
2671 #ifdef SUPPORT_UTF
2672 if (common->utf && c > 127)
2673 {
2674 n = GET_EXTRALEN(*cc);
2675 while ((bit & 0x3f) == 0)
2676 {
2677 n--;
2678 bit >>= 6;
2679 }
2680 return (n << 8) | bit;
2681 }
2682 #endif /* SUPPORT_UTF */
2683 return (0 << 8) | bit;
2684
2685 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2686
2687 #ifdef SUPPORT_UTF
2688 if (common->utf && c > 65535)
2689 {
2690 if (bit >= (1 << 10))
2691 bit >>= 10;
2692 else
2693 return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8));
2694 }
2695 #endif /* SUPPORT_UTF */
2696 return (bit < 256) ? ((0 << 8) | bit) : ((1 << 8) | (bit >> 8));
2697
2698 #endif /* COMPILE_PCRE[8|16|32] */
2699 }
2700
2701 static void check_partial(compiler_common *common, BOOL force)
2702 {
2703 /* Checks whether a partial matching is occurred. Does not modify registers. */
2704 DEFINE_COMPILER;
2705 struct sljit_jump *jump = NULL;
2706
2707 SLJIT_ASSERT(!force || common->mode != JIT_COMPILE);
2708
2709 if (common->mode == JIT_COMPILE)
2710 return;
2711
2712 if (!force)
2713 jump = CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2714 else if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2715 jump = CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1);
2716
2717 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2718 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
2719 else
2720 {
2721 if (common->partialmatchlabel != NULL)
2722 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2723 else
2724 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2725 }
2726
2727 if (jump != NULL)
2728 JUMPHERE(jump);
2729 }
2730
2731 static void check_str_end(compiler_common *common, jump_list **end_reached)
2732 {
2733 /* Does not affect registers. Usually used in a tight spot. */
2734 DEFINE_COMPILER;
2735 struct sljit_jump *jump;
2736
2737 if (common->mode == JIT_COMPILE)
2738 {
2739 add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2740 return;
2741 }
2742
2743 jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
2744 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2745 {
2746 add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
2747 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
2748 add_jump(compiler, end_reached, JUMP(SLJIT_JUMP));
2749 }
2750 else
2751 {
2752 add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
2753 if (common->partialmatchlabel != NULL)
2754 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2755 else
2756 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2757 }
2758 JUMPHERE(jump);
2759 }
2760
2761 static void detect_partial_match(compiler_common *common, jump_list **backtracks)
2762 {
2763 DEFINE_COMPILER;
2764 struct sljit_jump *jump;
2765
2766 if (common->mode == JIT_COMPILE)
2767 {
2768 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2769 return;
2770 }
2771
2772 /* Partial matching mode. */
2773 jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
2774 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
2775 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2776 {
2777 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
2778 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
2779 }
2780 else
2781 {
2782 if (common->partialmatchlabel != NULL)
2783 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2784 else
2785 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2786 }
2787 JUMPHERE(jump);
2788 }
2789
2790 static void peek_char(compiler_common *common, sljit_u32 max)
2791 {
2792 /* Reads the character into TMP1, keeps STR_PTR.
2793 Does not check STR_END. TMP2 Destroyed. */
2794 DEFINE_COMPILER;
2795 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2796 struct sljit_jump *jump;
2797 #endif
2798
2799 SLJIT_UNUSED_ARG(max);
2800
2801 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2802 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2803 if (common->utf)
2804 {
2805 if (max < 128) return;
2806
2807 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2808 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2809 add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2810 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2811 JUMPHERE(jump);
2812 }
2813 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2814
2815 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2816 if (common->utf)
2817 {
2818 if (max < 0xd800) return;
2819
2820 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2821 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2822 /* TMP2 contains the high surrogate. */
2823 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2824 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x40);
2825 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
2826 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
2827 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2828 JUMPHERE(jump);
2829 }
2830 #endif
2831 }
2832
2833 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2834
2835 static BOOL is_char7_bitset(const sljit_u8 *bitset, BOOL nclass)
2836 {
2837 /* Tells whether the character codes below 128 are enough
2838 to determine a match. */
2839 const sljit_u8 value = nclass ? 0xff : 0;
2840 const sljit_u8 *end = bitset + 32;
2841
2842 bitset += 16;
2843 do
2844 {
2845 if (*bitset++ != value)
2846 return FALSE;
2847 }
2848 while (bitset < end);
2849 return TRUE;
2850 }
2851
2852 static void read_char7_type(compiler_common *common, BOOL full_read)
2853 {
2854 /* Reads the precise character type of a character into TMP1, if the character
2855 is less than 128. Otherwise it returns with zero. Does not check STR_END. The
2856 full_read argument tells whether characters above max are accepted or not. */
2857 DEFINE_COMPILER;
2858 struct sljit_jump *jump;
2859
2860 SLJIT_ASSERT(common->utf);
2861
2862 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2863 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2864
2865 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2866
2867 if (full_read)
2868 {
2869 jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
2870 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2871 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2872 JUMPHERE(jump);
2873 }
2874 }
2875
2876 #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
2877
2878 static void read_char_range(compiler_common *common, sljit_u32 min, sljit_u32 max, BOOL update_str_ptr)
2879 {
2880 /* Reads the precise value of a character into TMP1, if the character is
2881 between min and max (c >= min && c <= max). Otherwise it returns with a value
2882 outside the range. Does not check STR_END. */
2883 DEFINE_COMPILER;
2884 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2885 struct sljit_jump *jump;
2886 #endif
2887 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2888 struct sljit_jump *jump2;
2889 #endif
2890
2891 SLJIT_UNUSED_ARG(update_str_ptr);
2892 SLJIT_UNUSED_ARG(min);
2893 SLJIT_UNUSED_ARG(max);
2894 SLJIT_ASSERT(min <= max);
2895
2896 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2897 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2898
2899 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2900 if (common->utf)
2901 {
2902 if (max < 128 && !update_str_ptr) return;
2903
2904 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2905 if (min >= 0x10000)
2906 {
2907 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xf0);
2908 if (update_str_ptr)
2909 OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2910 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2911 jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x7);
2912 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2913 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2914 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2915 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2916 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2917 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2918 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2919 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2920 if (!update_str_ptr)
2921 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
2922 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2923 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2924 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2925 JUMPHERE(jump2);
2926 if (update_str_ptr)
2927 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
2928 }
2929 else if (min >= 0x800 && max <= 0xffff)
2930 {
2931 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xe0);
2932 if (update_str_ptr)
2933 OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2934 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2935 jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xf);
2936 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2937 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2938 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2939 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2940 if (!update_str_ptr)
2941 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
2942 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2943 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2944 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2945 JUMPHERE(jump2);
2946 if (update_str_ptr)
2947 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
2948 }
2949 else if (max >= 0x800)
2950 add_jump(compiler, (max < 0x10000) ? &common->utfreadchar16 : &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2951 else if (max < 128)
2952 {
2953 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2954 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2955 }
2956 else
2957 {
2958 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2959 if (!update_str_ptr)
2960 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2961 else
2962 OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2963 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2964 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2965 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2966 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2967 if (update_str_ptr)
2968 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
2969 }
2970 JUMPHERE(jump);
2971 }
2972 #endif
2973
2974 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2975 if (common->utf)
2976 {
2977 if (max >= 0x10000)
2978 {
2979 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2980 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2981 /* TMP2 contains the high surrogate. */
2982 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2983 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x40);
2984 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
2985 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2986 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
2987 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2988 JUMPHERE(jump);
2989 return;
2990 }
2991
2992 if (max < 0xd800 && !update_str_ptr) return;
2993
2994 /* Skip low surrogate if necessary. */
2995 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2996 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2997 if (update_str_ptr)
2998 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2999 if (max >= 0xd800)
3000 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x10000);
3001 JUMPHERE(jump);
3002 }
3003 #endif
3004 }
3005
3006 static SLJIT_INLINE void read_char(compiler_common *common)
3007 {
3008 read_char_range(common, 0, READ_CHAR_MAX, TRUE);
3009 }
3010
3011 static void read_char8_type(compiler_common *common, BOOL update_str_ptr)
3012 {
3013 /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */
3014 DEFINE_COMPILER;
3015 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3016 struct sljit_jump *jump;
3017 #endif
3018 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3019 struct sljit_jump *jump2;
3020 #endif
3021
3022 SLJIT_UNUSED_ARG(update_str_ptr);
3023
3024 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
3025 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3026
3027 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3028 if (common->utf)
3029 {
3030 /* This can be an extra read in some situations, but hopefully
3031 it is needed in most cases. */
3032 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
3033 jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
3034 if (!update_str_ptr)
3035 {
3036 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3037 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3038 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3039 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
3040 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3041 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
3042 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
3043 jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
3044 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
3045 JUMPHERE(jump2);
3046 }
3047 else
3048 add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));
3049 JUMPHERE(jump);
3050 return;
3051 }
3052 #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
3053
3054 #if !defined COMPILE_PCRE8
3055 /* The ctypes array contains only 256 values. */
3056 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
3057 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
3058 #endif
3059 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
3060 #if !defined COMPILE_PCRE8
3061 JUMPHERE(jump);
3062 #endif
3063
3064 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
3065 if (common->utf && update_str_ptr)
3066 {
3067 /* Skip low surrogate if necessary. */
3068 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);
3069 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
3070 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3071 JUMPHERE(jump);
3072 }
3073 #endif /* SUPPORT_UTF && COMPILE_PCRE16 */
3074 }
3075
3076 static void skip_char_back(compiler_common *common)
3077 {
3078 /* Goes one character back. Affects STR_PTR and TMP1. Does not check begin. */
3079 DEFINE_COMPILER;
3080 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3081 #if defined COMPILE_PCRE8
3082 struct sljit_label *label;
3083
3084 if (common->utf)
3085 {
3086 label = LABEL();
3087 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
3088 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3089 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
3090 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);
3091 return;
3092 }
3093 #elif defined COMPILE_PCRE16
3094 if (common->utf)
3095 {
3096 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
3097 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3098 /* Skip low surrogate if necessary. */
3099 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
3100 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
3101 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
3102 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3103 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3104 return;
3105 }
3106 #endif /* COMPILE_PCRE[8|16] */
3107 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
3108 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3109 }
3110
3111 static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpifmatch)
3112 {
3113 /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */
3114 DEFINE_COMPILER;
3115 struct sljit_jump *jump;
3116
3117 if (nltype == NLTYPE_ANY)
3118 {
3119 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
3120 sljit_set_current_flags(compiler, SLJIT_SET_Z);
3121 add_jump(compiler, backtracks, JUMP(jumpifmatch ? SLJIT_NOT_ZERO : SLJIT_ZERO));
3122 }
3123 else if (nltype == NLTYPE_ANYCRLF)
3124 {
3125 if (jumpifmatch)
3126 {
3127 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR));
3128 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
3129 }
3130 else
3131 {
3132 jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
3133 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
3134 JUMPHERE(jump);
3135 }
3136 }
3137 else
3138 {
3139 SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);
3140 add_jump(compiler, backtracks, CMP(jumpifmatch ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
3141 }
3142 }
3143
3144 #ifdef SUPPORT_UTF
3145
3146 #if defined COMPILE_PCRE8
3147 static void do_utfreadchar(compiler_common *common)
3148 {
3149 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
3150 of the character (>= 0xc0). Return char value in TMP1, length in TMP2. */
3151 DEFINE_COMPILER;
3152 struct sljit_jump *jump;
3153
3154 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3155 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3156 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3157 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3158 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3159 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3160
3161 /* Searching for the first zero. */
3162 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
3163 jump = JUMP(SLJIT_NOT_ZERO);
3164 /* Two byte sequence. */
3165 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3166 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(2));
3167 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3168
3169 JUMPHERE(jump);
3170 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3171 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x800);
3172 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3173 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3174 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3175
3176 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10000);
3177 jump = JUMP(SLJIT_NOT_ZERO);
3178 /* Three byte sequence. */
3179 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
3180 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));
3181 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3182
3183 /* Four byte sequence. */
3184 JUMPHERE(jump);
3185 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
3186 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
3187 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3188 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
3189 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3190 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3191 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(4));
3192 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3193 }
3194
3195 static void do_utfreadchar16(compiler_common *common)
3196 {
3197 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
3198 of the character (>= 0xc0). Return value in TMP1. */
3199 DEFINE_COMPILER;
3200 struct sljit_jump *jump;
3201
3202 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3203 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3204 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3205 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3206 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3207 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3208
3209 /* Searching for the first zero. */
3210 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
3211 jump = JUMP(SLJIT_NOT_ZERO);
3212 /* Two byte sequence. */
3213 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3214 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3215
3216 JUMPHERE(jump);
3217 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x400);
3218 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_NOT_ZERO);
3219 /* This code runs only in 8 bit mode. No need to shift the value. */
3220 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3221 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3222 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x800);
3223 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3224 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3225 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3226 /* Three byte sequence. */
3227 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
3228 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3229 }
3230
3231 static void do_utfreadtype8(compiler_common *common)
3232 {
3233 /* Fast decoding a UTF-8 character type. TMP2 contains the first byte
3234 of the character (>= 0xc0). Return value in TMP1. */
3235 DEFINE_COMPILER;
3236 struct sljit_jump *jump;
3237 struct sljit_jump *compare;
3238
3239 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3240
3241 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20);
3242 jump = JUMP(SLJIT_NOT_ZERO);
3243 /* Two byte sequence. */
3244 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3245 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3246 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
3247 /* The upper 5 bits are known at this point. */
3248 compare = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x3);
3249 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
3250 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3251 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
3252 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
3253 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3254
3255 JUMPHERE(compare);
3256 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
3257 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3258
3259 /* We only have types for characters less than 256. */
3260 JUMPHERE(jump);
3261 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3262 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
3263 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3264 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3265 }
3266
3267 #endif /* COMPILE_PCRE8 */
3268
3269 #endif /* SUPPORT_UTF */
3270
3271 #ifdef SUPPORT_UCP
3272
3273 /* UCD_BLOCK_SIZE must be 128 (see the assert below). */
3274 #define UCD_BLOCK_MASK 127
3275 #define UCD_BLOCK_SHIFT 7
3276
3277 static void do_getucd(compiler_common *common)
3278 {
3279 /* Search the UCD record for the character comes in TMP1.
3280 Returns chartype in TMP1 and UCD offset in TMP2. */
3281 DEFINE_COMPILER;
3282 #ifdef COMPILE_PCRE32
3283 struct sljit_jump *jump;
3284 #endif
3285
3286 #if defined SLJIT_DEBUG && SLJIT_DEBUG
3287 /* dummy_ucd_record */
3288 const ucd_record *record = GET_UCD(INVALID_UTF_CHAR);
3289 SLJIT_ASSERT(record->script == ucp_Common && record->chartype == ucp_Cn && record->gbprop == ucp_gbOther);
3290 SLJIT_ASSERT(record->caseset == 0 && record->other_case == 0);
3291 #endif
3292
3293 SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 8);
3294
3295 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3296
3297 #ifdef COMPILE_PCRE32
3298 if (!common->utf)
3299 {
3300 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x10ffff + 1);
3301 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
3302 JUMPHERE(jump);
3303 }
3304 #endif
3305
3306 OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
3307 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
3308 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
3309 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
3310 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
3311 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
3312 OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
3313 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
3314 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
3315 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3316 }
3317 #endif
3318
3319 static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common, BOOL hascrorlf)
3320 {
3321 DEFINE_COMPILER;
3322 struct sljit_label *mainloop;
3323 struct sljit_label *newlinelabel = NULL;
3324 struct sljit_jump *start;
3325 struct sljit_jump *end = NULL;
3326 struct sljit_jump *end2 = NULL;
3327 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3328 struct sljit_jump *singlechar;
3329 #endif
3330 jump_list *newline = NULL;
3331 BOOL newlinecheck = FALSE;
3332 BOOL readuchar = FALSE;
3333
3334 if (!(hascrorlf || (common->match_end_ptr != 0)) &&
3335 (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF || common->newline > 255))
3336 newlinecheck = TRUE;
3337
3338 if (common->match_end_ptr != 0)
3339 {
3340 /* Search for the end of the first line. */
3341 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
3342
3343 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3344 {
3345 mainloop = LABEL();
3346 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3347 end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3348 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
3349 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3350 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);
3351 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);
3352 JUMPHERE(end);
3353 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3354 }
3355 else
3356 {
3357 end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3358 mainloop = LABEL();
3359 /* Continual stores does not cause data dependency. */
3360 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0);
3361 read_char_range(common, common->nlmin, common->nlmax, TRUE);
3362 check_newlinechar(common, common->nltype, &newline, TRUE);
3363 CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, mainloop);
3364 JUMPHERE(end);
3365 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0);
3366 set_jumps(newline, LABEL());
3367 }
3368
3369 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
3370 }
3371
3372 start = JUMP(SLJIT_JUMP);
3373
3374 if (newlinecheck)
3375 {
3376 newlinelabel = LABEL();
3377 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3378 end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3379 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3380 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
3381 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
3382 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3383 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
3384 #endif
3385 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3386 end2 = JUMP(SLJIT_JUMP);
3387 }
3388
3389 mainloop = LABEL();
3390
3391 /* Increasing the STR_PTR here requires one less jump in the most common case. */
3392 #ifdef SUPPORT_UTF
3393 if (common->utf) readuchar = TRUE;
3394 #endif
3395 if (newlinecheck) readuchar = TRUE;
3396
3397 if (readuchar)
3398 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3399
3400 if (newlinecheck)
3401 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);
3402
3403 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3404 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3405 #if defined COMPILE_PCRE8
3406 if (common->utf)
3407 {
3408 singlechar = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
3409 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3410 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3411 JUMPHERE(singlechar);
3412 }
3413 #elif defined COMPILE_PCRE16
3414 if (common->utf)
3415 {
3416 singlechar = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
3417 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
3418 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3419 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
3420 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3421 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3422 JUMPHERE(singlechar);
3423 }
3424 #endif /* COMPILE_PCRE[8|16] */
3425 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
3426 JUMPHERE(start);
3427
3428 if (newlinecheck)
3429 {
3430 JUMPHERE(end);
3431 JUMPHERE(end2);
3432 }
3433
3434 return mainloop;
3435 }
3436
3437 #define MAX_N_CHARS 16
3438 #define MAX_DIFF_CHARS 6
3439
3440 static SLJIT_INLINE void add_prefix_char(pcre_uchar chr, pcre_uchar *chars)
3441 {
3442 pcre_uchar i, len;
3443
3444 len = chars[0];
3445 if (len == 255)
3446 return;
3447
3448 if (len == 0)
3449 {
3450 chars[0] = 1;
3451 chars[1] = chr;
3452 return;
3453 }
3454
3455 for (i = len; i > 0; i--)
3456 if (chars[i] == chr)
3457 return;
3458
3459 if (len >= MAX_DIFF_CHARS - 1)
3460 {
3461 chars[0] = 255;
3462 return;
3463 }
3464
3465 len++;
3466 chars[len] = chr;
3467 chars[0] = len;
3468 }
3469
3470 static int scan_prefix(compiler_common *common, pcre_uchar *cc, pcre_uchar *chars, int max_chars, sljit_u32 *rec_count)
3471 {
3472 /* Recursive function, which scans prefix literals. */
3473 BOOL last, any, class, caseless;
3474 int len, repeat, len_save, consumed = 0;
3475 sljit_u32 chr; /* Any unicode character. */
3476 sljit_u8 *bytes, *bytes_end, byte;
3477 pcre_uchar *alternative, *cc_save, *oc;
3478 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3479 pcre_uchar othercase[8];
3480 #elif defined SUPPORT_UTF && defined COMPILE_PCRE16
3481 pcre_uchar othercase[2];
3482 #else
3483 pcre_uchar othercase[1];
3484 #endif
3485
3486 repeat = 1;
3487 while (TRUE)
3488 {
3489 if (*rec_count == 0)
3490 return 0;
3491 (*rec_count)--;
3492
3493 last = TRUE;
3494 any = FALSE;
3495 class = FALSE;
3496 caseless = FALSE;
3497
3498 switch (*cc)
3499 {
3500 case OP_CHARI:
3501 caseless = TRUE;
3502 case OP_CHAR:
3503 last = FALSE;
3504 cc++;
3505 break;
3506
3507 case OP_SOD:
3508 case OP_SOM:
3509 case OP_SET_SOM:
3510 case OP_NOT_WORD_BOUNDARY:
3511 case OP_WORD_BOUNDARY:
3512 case OP_EODN:
3513 case OP_EOD:
3514 case OP_CIRC:
3515 case OP_CIRCM:
3516 case OP_DOLL:
3517 case OP_DOLLM:
3518 /* Zero width assertions. */
3519 cc++;
3520 continue;
3521
3522 case OP_ASSERT:
3523 case OP_ASSERT_NOT:
3524 case OP_ASSERTBACK:
3525 case OP_ASSERTBACK_NOT:
3526 cc = bracketend(cc);
3527 continue;
3528
3529 case OP_PLUSI:
3530 case OP_MINPLUSI:
3531 case OP_POSPLUSI:
3532 caseless = TRUE;
3533 case OP_PLUS:
3534 case OP_MINPLUS:
3535 case OP_POSPLUS:
3536 cc++;
3537 break;
3538
3539 case OP_EXACTI:
3540 caseless = TRUE;
3541 case OP_EXACT:
3542 repeat = GET2(cc, 1);
3543 last = FALSE;
3544 cc += 1 + IMM2_SIZE;
3545 break;
3546
3547 case OP_QUERYI:
3548 case OP_MINQUERYI:
3549 case OP_POSQUERYI:
3550 caseless = TRUE;
3551 case OP_QUERY:
3552 case OP_MINQUERY:
3553 case OP_POSQUERY:
3554 len = 1;
3555 cc++;
3556 #ifdef SUPPORT_UTF
3557 if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
3558 #endif
3559 max_chars = scan_prefix(common, cc + len, chars, max_chars, rec_count);
3560 if (max_chars == 0)
3561 return consumed;
3562 last = FALSE;
3563 break;
3564
3565 case OP_KET:
3566 cc += 1 + LINK_SIZE;
3567 continue;
3568
3569 case OP_ALT:
3570 cc += GET(cc, 1);
3571 continue;
3572
3573 case OP_ONCE:
3574 case OP_ONCE_NC:
3575 case OP_BRA:
3576 case OP_BRAPOS:
3577 case OP_CBRA:
3578 case OP_CBRAPOS:
3579 alternative = cc + GET(cc, 1);
3580 while (*alternative == OP_ALT)
3581 {
3582 max_chars = scan_prefix(common, alternative + 1 + LINK_SIZE, chars, max_chars, rec_count);
3583 if (max_chars == 0)
3584 return consumed;
3585 alternative += GET(alternative, 1);
3586 }
3587
3588 if (*cc == OP_CBRA || *cc == OP_CBRAPOS)
3589 cc += IMM2_SIZE;
3590 cc += 1 + LINK_SIZE;
3591 continue;
3592
3593 case OP_CLASS:
3594 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3595 if (common->utf && !is_char7_bitset((const sljit_u8 *)(cc + 1), FALSE))
3596 return consumed;
3597 #endif
3598 class = TRUE;
3599 break;
3600
3601 case OP_NCLASS:
3602 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3603 if (common->utf) return consumed;
3604 #endif
3605 class = TRUE;
3606 break;
3607
3608 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3609 case OP_XCLASS:
3610 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3611 if (common->utf) return consumed;
3612 #endif
3613 any = TRUE;
3614 cc += GET(cc, 1);
3615 break;
3616 #endif
3617
3618 case OP_DIGIT:
3619 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3620 if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_digit, FALSE))
3621 return consumed;
3622 #endif
3623 any = TRUE;
3624 cc++;
3625 break;
3626
3627 case OP_WHITESPACE:
3628 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3629 if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_space, FALSE))
3630 return consumed;
3631 #endif
3632 any = TRUE;
3633 cc++;
3634 break;
3635
3636 case OP_WORDCHAR:
3637 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3638 if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_word, FALSE))
3639 return consumed;
3640 #endif
3641 any = TRUE;
3642 cc++;
3643 break;
3644
3645 case OP_NOT:
3646 case OP_NOTI:
3647 cc++;
3648 /* Fall through. */
3649 case OP_NOT_DIGIT:
3650 case OP_NOT_WHITESPACE:
3651 case OP_NOT_WORDCHAR:
3652 case OP_ANY:
3653 case OP_ALLANY:
3654 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3655 if (common->utf) return consumed;
3656 #endif
3657 any = TRUE;
3658 cc++;
3659 break;
3660
3661 #ifdef SUPPORT_UTF
3662 case OP_NOTPROP:
3663 case OP_PROP:
3664 #ifndef COMPILE_PCRE32
3665 if (common->utf) return consumed;
3666 #endif
3667 any = TRUE;
3668 cc += 1 + 2;
3669 break;
3670 #endif
3671
3672 case OP_TYPEEXACT:
3673 repeat = GET2(cc, 1);
3674 cc += 1 + IMM2_SIZE;
3675 continue;
3676
3677 case OP_NOTEXACT:
3678 case OP_NOTEXACTI:
3679 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3680 if (common->utf) return consumed;
3681 #endif
3682 any = TRUE;
3683 repeat = GET2(cc, 1);
3684 cc += 1 + IMM2_SIZE + 1;
3685 break;
3686
3687 default:
3688 return consumed;
3689 }
3690
3691 if (any)
3692 {
3693 do
3694 {
3695 chars[0] = 255;
3696
3697 consumed++;
3698 if (--max_chars == 0)
3699 return consumed;
3700 chars += MAX_DIFF_CHARS;
3701 }
3702 while (--repeat > 0);
3703
3704 repeat = 1;
3705 continue;
3706 }
3707
3708 if (class)
3709 {
3710 bytes = (sljit_u8*) (cc + 1);
3711 cc += 1 + 32 / sizeof(pcre_uchar);
3712
3713 switch (*cc)
3714 {
3715 case OP_CRSTAR:
3716 case OP_CRMINSTAR:
3717 case OP_CRPOSSTAR:
3718 case OP_CRQUERY:
3719 case OP_CRMINQUERY:
3720 case OP_CRPOSQUERY:
3721 max_chars = scan_prefix(common, cc + 1, chars, max_chars, rec_count);
3722 if (max_chars == 0)
3723 return consumed;
3724 break;
3725
3726 default:
3727 case OP_CRPLUS:
3728 case OP_CRMINPLUS:
3729 case OP_CRPOSPLUS:
3730 break;
3731
3732 case OP_CRRANGE:
3733 case OP_CRMINRANGE:
3734 case OP_CRPOSRANGE:
3735 repeat = GET2(cc, 1);
3736 if (repeat <= 0)
3737 return consumed;
3738 break;
3739 }
3740
3741 do
3742 {
3743 if (bytes[31] & 0x80)
3744 chars[0] = 255;
3745 else if (chars[0] != 255)
3746 {
3747 bytes_end = bytes + 32;
3748 chr = 0;
3749 do
3750 {
3751 byte = *bytes++;
3752 SLJIT_ASSERT((chr & 0x7) == 0);
3753 if (byte == 0)
3754 chr += 8;
3755 else
3756 {
3757 do
3758 {
3759 if ((byte & 0x1) != 0)
3760 add_prefix_char(chr, chars);
3761 byte >>= 1;
3762 chr++;
3763 }
3764 while (byte != 0);
3765 chr = (chr + 7) & ~7;
3766 }
3767 }
3768 while (chars[0] != 255 && bytes < bytes_end);
3769 bytes = bytes_end - 32;
3770 }
3771
3772 consumed++;
3773 if (--max_chars == 0)
3774 return consumed;
3775 chars += MAX_DIFF_CHARS;
3776 }
3777 while (--repeat > 0);
3778
3779 switch (*cc)
3780 {
3781 case OP_CRSTAR:
3782 case OP_CRMINSTAR:
3783 case OP_CRPOSSTAR:
3784 return consumed;
3785
3786 case OP_CRQUERY:
3787 case OP_CRMINQUERY:
3788 case OP_CRPOSQUERY:
3789 cc++;
3790 break;
3791
3792 case OP_CRRANGE:
3793 case OP_CRMINRANGE:
3794 case OP_CRPOSRANGE:
3795 if (GET2(cc, 1) != GET2(cc, 1 + IMM2_SIZE))
3796 return consumed;
3797 cc += 1 + 2 * IMM2_SIZE;
3798 break;
3799 }
3800
3801 repeat = 1;
3802 continue;
3803 }
3804
3805 len = 1;
3806 #ifdef SUPPORT_UTF
3807 if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
3808 #endif
3809
3810 if (caseless && char_has_othercase(common, cc))
3811 {
3812 #ifdef SUPPORT_UTF
3813 if (common->utf)
3814 {
3815 GETCHAR(chr, cc);
3816 if ((int)PRIV(ord2utf)(char_othercase(common, chr), othercase) != len)
3817 return consumed;
3818 }
3819 else
3820 #endif
3821 {
3822 chr = *cc;
3823 othercase[0] = TABLE_GET(chr, common->fcc, chr);
3824 }
3825 }
3826 else
3827 {
3828 caseless = FALSE;
3829 othercase[0] = 0; /* Stops compiler warning - PH */
3830 }
3831
3832 len_save = len;
3833 cc_save = cc;
3834 while (TRUE)
3835 {
3836 oc = othercase;
3837 do
3838 {
3839 chr = *cc;
3840 add_prefix_char(*cc, chars);
3841
3842 if (caseless)
3843 add_prefix_char(*oc, chars);
3844
3845 len--;
3846 consumed++;
3847 if (--max_chars == 0)
3848 return consumed;
3849 chars += MAX_DIFF_CHARS;
3850 cc++;
3851 oc++;
3852 }
3853 while (len > 0);
3854
3855 if (--repeat == 0)
3856 break;
3857
3858 len = len_save;
3859 cc = cc_save;
3860 }
3861
3862 repeat = 1;
3863 if (last)
3864 return consumed;
3865 }
3866 }
3867
3868 #if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) && !(defined SUPPORT_VALGRIND)
3869
3870 static sljit_s32 character_to_int32(pcre_uchar chr)
3871 {
3872 sljit_s32 value = (sljit_s32)chr;
3873 #if defined COMPILE_PCRE8
3874 #define SSE2_COMPARE_TYPE_INDEX 0
3875 return (value << 24) | (value << 16) | (value << 8) | value;
3876 #elif defined COMPILE_PCRE16
3877 #define SSE2_COMPARE_TYPE_INDEX 1
3878 return (value << 16) | value;
3879 #elif defined COMPILE_PCRE32
3880 #define SSE2_COMPARE_TYPE_INDEX 2
3881 return value;
3882 #else
3883 #error "Unsupported unit width"
3884 #endif
3885 }
3886
3887 static SLJIT_INLINE void fast_forward_first_char2_sse2(compiler_common *common, pcre_uchar char1, pcre_uchar char2)
3888 {
3889 DEFINE_COMPILER;
3890 struct sljit_label *start;
3891 struct sljit_jump *quit[3];
3892 struct sljit_jump *nomatch;
3893 sljit_u8 instruction[8];
3894 sljit_s32 tmp1_ind = sljit_get_register_index(TMP1);
3895 sljit_s32 tmp2_ind = sljit_get_register_index(TMP2);
3896 sljit_s32 str_ptr_ind = sljit_get_register_index(STR_PTR);
3897 BOOL load_twice = FALSE;
3898 pcre_uchar bit;
3899
3900 bit = char1 ^ char2;
3901 if (!is_powerof2(bit))
3902 bit = 0;
3903
3904 if ((char1 != char2) && bit == 0)
3905 load_twice = TRUE;
3906
3907 quit[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3908
3909 /* First part (unaligned start) */
3910
3911 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char1 | bit));
3912
3913 SLJIT_ASSERT(tmp1_ind < 8 && tmp2_ind == 1);
3914
3915 /* MOVD xmm, r/m32 */
3916 instruction[0] = 0x66;
3917 instruction[1] = 0x0f;
3918 instruction[2] = 0x6e;
3919 instruction[3] = 0xc0 | (2 << 3) | tmp1_ind;
3920 sljit_emit_op_custom(compiler, instruction, 4);
3921
3922 if (char1 != char2)
3923 {
3924 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(bit != 0 ? bit : char2));
3925
3926 /* MOVD xmm, r/m32 */
3927 instruction[3] = 0xc0 | (3 << 3) | tmp1_ind;
3928 sljit_emit_op_custom(compiler, instruction, 4);
3929 }
3930
3931 /* PSHUFD xmm1, xmm2/m128, imm8 */
3932 instruction[2] = 0x70;
3933 instruction[3] = 0xc0 | (2 << 3) | 2;
3934 instruction[4] = 0;
3935 sljit_emit_op_custom(compiler, instruction, 5);
3936
3937 if (char1 != char2)
3938 {
3939 /* PSHUFD xmm1, xmm2/m128, imm8 */
3940 instruction[3] = 0xc0 | (3 << 3) | 3;
3941 instruction[4] = 0;
3942 sljit_emit_op_custom(compiler, instruction, 5);
3943 }
3944
3945 OP2(SLJIT_AND, TMP2, 0, STR_PTR, 0, SLJIT_IMM, 0xf);
3946 OP2(SLJIT_AND, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, ~0xf);
3947
3948 /* MOVDQA xmm1, xmm2/m128 */
3949 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3950
3951 if (str_ptr_ind < 8)
3952 {
3953 instruction[2] = 0x6f;
3954 instruction[3] = (0 << 3) | str_ptr_ind;
3955 sljit_emit_op_custom(compiler, instruction, 4);
3956
3957 if (load_twice)
3958 {
3959 instruction[3] = (1 << 3) | str_ptr_ind;
3960 sljit_emit_op_custom(compiler, instruction, 4);
3961 }
3962 }
3963 else
3964 {
3965 instruction[1] = 0x41;
3966 instruction[2] = 0x0f;
3967 instruction[3] = 0x6f;
3968 instruction[4] = (0 << 3) | (str_ptr_ind & 0x7);
3969 sljit_emit_op_custom(compiler, instruction, 5);
3970
3971 if (load_twice)
3972 {
3973 instruction[4] = (1 << 3) | str_ptr_ind;
3974 sljit_emit_op_custom(compiler, instruction, 5);
3975 }
3976 instruction[1] = 0x0f;
3977 }
3978
3979 #else
3980
3981 instruction[2] = 0x6f;
3982 instruction[3] = (0 << 3) | str_ptr_ind;
3983 sljit_emit_op_custom(compiler, instruction, 4);
3984
3985 if (load_twice)
3986 {
3987 instruction[3] = (1 << 3) | str_ptr_ind;
3988 sljit_emit_op_custom(compiler, instruction, 4);
3989 }
3990
3991 #endif
3992
3993 if (bit != 0)
3994 {
3995 /* POR xmm1, xmm2/m128 */
3996 instruction[2] = 0xeb;
3997 instruction[3] = 0xc0 | (0 << 3) | 3;
3998 sljit_emit_op_custom(compiler, instruction, 4);
3999 }
4000
4001 /* PCMPEQB/W/D xmm1, xmm2/m128 */
4002 instruction[2] = 0x74 + SSE2_COMPARE_TYPE_INDEX;
4003 instruction[3] = 0xc0 | (0 << 3) | 2;
4004 sljit_emit_op_custom(compiler, instruction, 4);
4005
4006 if (load_twice)
4007 {
4008 instruction[3] = 0xc0 | (1 << 3) | 3;
4009 sljit_emit_op_custom(compiler, instruction, 4);
4010 }
4011
4012 /* PMOVMSKB reg, xmm */
4013 instruction[2] = 0xd7;
4014 instruction[3] = 0xc0 | (tmp1_ind << 3) | 0;
4015 sljit_emit_op_custom(compiler, instruction, 4);
4016
4017 if (load_twice)
4018 {
4019 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP2, 0);
4020 instruction[3] = 0xc0 | (tmp2_ind << 3) | 1;
4021 sljit_emit_op_custom(compiler, instruction, 4);
4022
4023 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4024 OP1(SLJIT_MOV, TMP2, 0, RETURN_ADDR, 0);
4025 }
4026
4027 OP2(SLJIT_ASHR, TMP1, 0, TMP1, 0, TMP2, 0);
4028
4029 /* BSF r32, r/m32 */
4030 instruction[0] = 0x0f;
4031 instruction[1] = 0xbc;
4032 instruction[2] = 0xc0 | (tmp1_ind << 3) | tmp1_ind;
4033 sljit_emit_op_custom(compiler, instruction, 3);
4034 sljit_set_current_flags(compiler, SLJIT_SET_Z);
4035
4036 nomatch = JUMP(SLJIT_ZERO);
4037
4038 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4039 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4040 quit[1] = JUMP(SLJIT_JUMP);
4041
4042 JUMPHERE(nomatch);
4043
4044 start = LABEL();
4045 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 16);
4046 quit[2] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4047
4048 /* Second part (aligned) */
4049
4050 instruction[0] = 0x66;
4051 instruction[1] = 0x0f;
4052
4053 /* MOVDQA xmm1, xmm2/m128 */
4054 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
4055
4056 if (str_ptr_ind < 8)
4057 {
4058 instruction[2] = 0x6f;
4059 instruction[3] = (0 << 3) | str_ptr_ind;
4060 sljit_emit_op_custom(compiler, instruction, 4);
4061
4062 if (load_twice)
4063 {
4064 instruction[3] = (1 << 3) | str_ptr_ind;
4065 sljit_emit_op_custom(compiler, instruction, 4);
4066 }
4067 }
4068 else
4069 {
4070 instruction[1] = 0x41;
4071 instruction[2] = 0x0f;
4072 instruction[3] = 0x6f;
4073 instruction[4] = (0 << 3) | (str_ptr_ind & 0x7);
4074 sljit_emit_op_custom(compiler, instruction, 5);
4075
4076 if (load_twice)
4077 {
4078 instruction[4] = (1 << 3) | str_ptr_ind;
4079 sljit_emit_op_custom(compiler, instruction, 5);
4080 }
4081 instruction[1] = 0x0f;
4082 }
4083
4084 #else
4085
4086 instruction[2] = 0x6f;
4087 instruction[3] = (0 << 3) | str_ptr_ind;
4088 sljit_emit_op_custom(compiler, instruction, 4);
4089
4090 if (load_twice)
4091 {
4092 instruction[3] = (1 << 3) | str_ptr_ind;
4093 sljit_emit_op_custom(compiler, instruction, 4);
4094 }
4095
4096 #endif
4097
4098 if (bit != 0)
4099 {
4100 /* POR xmm1, xmm2/m128 */
4101 instruction[2] = 0xeb;
4102 instruction[3] = 0xc0 | (0 << 3) | 3;
4103 sljit_emit_op_custom(compiler, instruction, 4);
4104 }
4105
4106 /* PCMPEQB/W/D xmm1, xmm2/m128 */
4107 instruction[2] = 0x74 + SSE2_COMPARE_TYPE_INDEX;
4108 instruction[3] = 0xc0 | (0 << 3) | 2;
4109 sljit_emit_op_custom(compiler, instruction, 4);
4110
4111 if (load_twice)
4112 {
4113 instruction[3] = 0xc0 | (1 << 3) | 3;
4114 sljit_emit_op_custom(compiler, instruction, 4);
4115 }
4116
4117 /* PMOVMSKB reg, xmm */
4118 instruction[2] = 0xd7;
4119 instruction[3] = 0xc0 | (tmp1_ind << 3) | 0;
4120 sljit_emit_op_custom(compiler, instruction, 4);
4121
4122 if (load_twice)
4123 {
4124 instruction[3] = 0xc0 | (tmp2_ind << 3) | 1;
4125 sljit_emit_op_custom(compiler, instruction, 4);
4126
4127 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4128 }
4129
4130 /* BSF r32, r/m32 */
4131 instruction[0] = 0x0f;
4132 instruction[1] = 0xbc;
4133 instruction[2] = 0xc0 | (tmp1_ind << 3) | tmp1_ind;
4134 sljit_emit_op_custom(compiler, instruction, 3);
4135 sljit_set_current_flags(compiler, SLJIT_SET_Z);
4136
4137 JUMPTO(SLJIT_ZERO, start);
4138
4139 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4140
4141 start = LABEL();
4142 SET_LABEL(quit[0], start);
4143 SET_LABEL(quit[1], start);
4144 SET_LABEL(quit[2], start);
4145 }
4146
4147 #undef SSE2_COMPARE_TYPE_INDEX
4148
4149 #endif
4150
4151 static void fast_forward_first_char2(compiler_common *common, pcre_uchar char1, pcre_uchar char2, sljit_s32 offset)
4152 {
4153 DEFINE_COMPILER;
4154 struct sljit_label *start;
4155 struct sljit_jump *quit;
4156 struct sljit_jump *found;
4157 pcre_uchar mask;
4158 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
4159 struct sljit_label *utf_start = NULL;
4160 struct sljit_jump *utf_quit = NULL;
4161 #endif
4162 BOOL has_match_end = (common->match_end_ptr != 0);
4163
4164 if (offset > 0)
4165 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
4166
4167 if (has_match_end)
4168 {
4169 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
4170
4171 OP2(SLJIT_ADD, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, SLJIT_IMM, IN_UCHARS(offset + 1));
4172 OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_END, 0, TMP3, 0);
4173 sljit_emit_cmov(compiler, SLJIT_GREATER, STR_END, TMP3, 0);
4174 }
4175
4176 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
4177 if (common->utf && offset > 0)
4178 utf_start = LABEL();
4179 #endif
4180
4181 #if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) && !(defined SUPPORT_VALGRIND)
4182
4183 /* SSE2 accelerated first character search. */
4184
4185 if (sljit_has_cpu_feature(SLJIT_HAS_SSE2))
4186 {
4187 fast_forward_first_char2_sse2(common, char1, char2);
4188
4189 SLJIT_ASSERT(common->mode == JIT_COMPILE || offset == 0);
4190 if (common->mode == JIT_COMPILE)
4191 {
4192 /* In complete mode, we don't need to run a match when STR_PTR == STR_END. */
4193 SLJIT_ASSERT(common->forced_quit_label == NULL);
4194 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_NOMATCH);
4195 add_jump(compiler, &common->forced_quit, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4196
4197 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
4198 if (common->utf && offset > 0)
4199 {
4200 SLJIT_ASSERT(common->mode == JIT_COMPILE);
4201
4202 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-offset));
4203 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4204 #if defined COMPILE_PCRE8
4205 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
4206 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, utf_start);
4207 #elif defined COMPILE_PCRE16
4208 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
4209 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00, utf_start);
4210 #else
4211 #error "Unknown code width"
4212 #endif
4213 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4214 }
4215 #endif
4216
4217 if (offset > 0)
4218 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
4219 }
4220 else
4221 {
4222 OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, STR_PTR, 0, STR_END, 0);
4223 if (has_match_end)
4224 {
4225 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
4226 sljit_emit_cmov(compiler, SLJIT_GREATER_EQUAL, STR_PTR, TMP1, 0);
4227 }
4228 else
4229 sljit_emit_cmov(compiler, SLJIT_GREATER_EQUAL, STR_PTR, STR_END, 0);
4230 }
4231
4232 if (has_match_end)
4233 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
4234 return;
4235 }
4236
4237 #endif
4238
4239 quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4240
4241 start = LABEL();
4242 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4243
4244 if (char1 == char2)
4245 found = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, char1);
4246 else
4247 {
4248 mask = char1 ^ char2;
4249 if (is_powerof2(mask))
4250 {
4251 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, mask);
4252 found = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, char1 | mask);
4253 }
4254 else
4255 {
4256 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, char1);
4257 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
4258 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, char2);
4259 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
4260 found = JUMP(SLJIT_NOT_ZERO);
4261 }
4262 }
4263
4264 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4265 CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, start);
4266
4267 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
4268 if (common->utf && offset > 0)
4269 utf_quit = JUMP(SLJIT_JUMP);
4270 #endif
4271
4272 JUMPHERE(found);
4273
4274 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
4275 if (common->utf && offset > 0)
4276 {
4277 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-offset));
4278 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4279 #if defined COMPILE_PCRE8
4280 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
4281 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, utf_start);
4282 #elif defined COMPILE_PCRE16
4283 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
4284 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00, utf_start);
4285 #else
4286 #error "Unknown code width"
4287 #endif
4288 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4289 JUMPHERE(utf_quit);
4290 }
4291 #endif
4292
4293 JUMPHERE(quit);
4294
4295 if (has_match_end)
4296 {
4297 quit = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
4298 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
4299 if (offset > 0)
4300 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
4301 JUMPHERE(quit);
4302 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
4303 }
4304
4305 if (offset > 0)
4306 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
4307 }
4308
4309 static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common)
4310 {
4311 DEFINE_COMPILER;
4312 struct sljit_label *start;
4313 struct sljit_jump *quit;
4314 struct sljit_jump *match;
4315 /* bytes[0] represent the number of characters between 0
4316 and MAX_N_BYTES - 1, 255 represents any character. */
4317 pcre_uchar chars[MAX_N_CHARS * MAX_DIFF_CHARS];
4318 sljit_s32 offset;
4319 pcre_uchar mask;
4320 pcre_uchar *char_set, *char_set_end;
4321 int i, max, from;
4322 int range_right = -1, range_len;
4323 sljit_u8 *update_table = NULL;
4324 BOOL in_range;
4325 sljit_u32 rec_count;
4326
4327 for (i = 0; i < MAX_N_CHARS; i++)
4328 chars[i * MAX_DIFF_CHARS] = 0;
4329
4330 rec_count = 10000;
4331 max = scan_prefix(common, common->start, chars, MAX_N_CHARS, &rec_count);
4332
4333 if (max < 1)
4334 return FALSE;
4335
4336 in_range = FALSE;
4337 /* Prevent compiler "uninitialized" warning */
4338 from = 0;
4339 range_len = 4 /* minimum length */ - 1;
4340 for (i = 0; i <= max; i++)
4341 {
4342 if (in_range && (i - from) > range_len && (chars[(i - 1) * MAX_DIFF_CHARS] < 255))
4343 {
4344 range_len = i - from;
4345 range_right = i - 1;
4346 }
4347
4348 if (i < max && chars[i * MAX_DIFF_CHARS] < 255)
4349 {
4350 SLJIT_ASSERT(chars[i * MAX_DIFF_CHARS] > 0);
4351 if (!in_range)
4352 {
4353 in_range = TRUE;
4354 from = i;
4355 }
4356 }
4357 else
4358 in_range = FALSE;
4359 }
4360
4361 if (range_right >= 0)
4362 {
4363 update_table = (sljit_u8 *)allocate_read_only_data(common, 256);
4364 if (update_table == NULL)
4365 return TRUE;
4366 memset(update_table, IN_UCHARS(range_len), 256);
4367
4368 for (i = 0; i < range_len; i++)
4369 {
4370 char_set = chars + ((range_right - i) * MAX_DIFF_CHARS);
4371 SLJIT_ASSERT(char_set[0] > 0 && char_set[0] < 255);
4372 char_set_end = char_set + char_set[0];
4373 char_set++;
4374 while (char_set <= char_set_end)
4375 {
4376 if (update_table[(*char_set) & 0xff] > IN_UCHARS(i))
4377 update_table[(*char_set) & 0xff] = IN_UCHARS(i);
4378 char_set++;
4379 }
4380 }
4381 }
4382
4383 offset = -1;
4384 /* Scan forward. */
4385 for (i = 0; i < max; i++)
4386 {
4387 if (offset == -1)
4388 {
4389 if (chars[i * MAX_DIFF_CHARS] <= 2)
4390 offset = i;
4391 }
4392 else if (chars[offset * MAX_DIFF_CHARS] == 2 && chars[i * MAX_DIFF_CHARS] <= 2)
4393 {
4394 if (chars[i * MAX_DIFF_CHARS] == 1)
4395 offset = i;
4396 else
4397 {
4398 mask = chars[offset * MAX_DIFF_CHARS + 1] ^ chars[offset * MAX_DIFF_CHARS + 2];
4399 if (!is_powerof2(mask))
4400 {
4401 mask = chars[i * MAX_DIFF_CHARS + 1] ^ chars[i * MAX_DIFF_CHARS + 2];
4402 if (is_powerof2(mask))
4403 offset = i;
4404 }
4405 }
4406 }
4407 }
4408
4409 if (range_right < 0)
4410 {
4411 if (offset < 0)
4412 return FALSE;
4413 SLJIT_ASSERT(chars[offset * MAX_DIFF_CHARS] >= 1 && chars[offset * MAX_DIFF_CHARS] <= 2);
4414 /* Works regardless the value is 1 or 2. */
4415 mask = chars[offset * MAX_DIFF_CHARS + chars[offset * MAX_DIFF_CHARS]];
4416 fast_forward_first_char2(common, chars[offset * MAX_DIFF_CHARS + 1], mask, offset);
4417 return TRUE;
4418 }
4419
4420 if (range_right == offset)
4421 offset = -1;
4422
4423 SLJIT_ASSERT(offset == -1 || (chars[offset * MAX_DIFF_CHARS] >= 1 && chars[offset * MAX_DIFF_CHARS] <= 2));
4424
4425 max -= 1;
4426 SLJIT_ASSERT(max > 0);
4427 if (common->match_end_ptr != 0)
4428 {
4429 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
4430 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
4431 OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
4432 quit = CMP(SLJIT_LESS_EQUAL, STR_END, 0, TMP1, 0);
4433 OP1(SLJIT_MOV, STR_END, 0, TMP1, 0);
4434 JUMPHERE(quit);
4435 }
4436 else
4437 OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
4438
4439 SLJIT_ASSERT(range_right >= 0);
4440
4441 #if !(defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
4442 OP1(SLJIT_MOV, RETURN_ADDR, 0, SLJIT_IMM, (sljit_sw)update_table);
4443 #endif
4444
4445 start = LABEL();
4446 quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4447
4448 #if defined COMPILE_PCRE8 || (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN)
4449 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right));
4450 #else
4451 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right + 1) - 1);
4452 #endif
4453
4454 #if !(defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
4455 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(RETURN_ADDR, TMP1), 0);
4456 #else
4457 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)update_table);
4458 #endif
4459 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4460 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, start);
4461
4462 if (offset >= 0)
4463 {
4464 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offset));
4465 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4466
4467 if (chars[offset * MAX_DIFF_CHARS] == 1)
4468 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset * MAX_DIFF_CHARS + 1], start);
4469 else
4470 {
4471 mask = chars[offset * MAX_DIFF_CHARS + 1] ^ chars[offset * MAX_DIFF_CHARS + 2];
4472 if (is_powerof2(mask))
4473 {
4474 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, mask);
4475 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset * MAX_DIFF_CHARS + 1] | mask, start);
4476 }
4477 else
4478 {
4479 match = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset * MAX_DIFF_CHARS + 1]);
4480 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset * MAX_DIFF_CHARS + 2], start);
4481 JUMPHERE(match);
4482 }
4483 }
4484 }
4485
4486 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
4487 if (common->utf && offset != 0)
4488 {
4489 if (offset < 0)
4490 {
4491 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4492 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4493 }
4494 else
4495 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
4496 #if defined COMPILE_PCRE8
4497 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
4498 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, start);
4499 #elif defined COMPILE_PCRE16
4500 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
4501 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00, start);
4502 #else
4503 #error "Unknown code width"
4504 #endif
4505 if (offset < 0)
4506 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4507 }
4508 #endif
4509
4510 if (offset >= 0)
4511 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4512
4513 JUMPHERE(quit);
4514
4515 if (common->match_end_ptr != 0)
4516 {
4517 if (range_right >= 0)
4518 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
4519 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
4520 if (range_right >= 0)
4521 {
4522 quit = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
4523 OP1(SLJIT_MOV, STR_PTR, 0, TMP1, 0);
4524 JUMPHERE(quit);
4525 }
4526 }
4527 else
4528 OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
4529 return TRUE;
4530 }
4531
4532 #undef MAX_N_CHARS
4533 #undef MAX_DIFF_CHARS
4534
4535 static SLJIT_INLINE void fast_forward_first_char(compiler_common *common, pcre_uchar first_char, BOOL caseless)
4536 {
4537 pcre_uchar oc;
4538
4539 oc = first_char;
4540 if (caseless)
4541 {
4542 oc = TABLE_GET(first_char, common->fcc, first_char);
4543 #if defined SUPPORT_UCP && !defined COMPILE_PCRE8
4544 if (first_char > 127 && common->utf)
4545 oc = UCD_OTHERCASE(first_char);
4546 #endif
4547 }
4548
4549 fast_forward_first_char2(common, first_char, oc, 0);
4550 }
4551
4552 static SLJIT_INLINE void fast_forward_newline(compiler_common *common)
4553 {
4554 DEFINE_COMPILER;
4555 struct sljit_label *loop;
4556 struct sljit_jump *lastchar;
4557 struct sljit_jump *firstchar;
4558 struct sljit_jump *quit;
4559 struct sljit_jump *foundcr = NULL;
4560 struct sljit_jump *notfoundnl;
4561 jump_list *newline = NULL;
4562
4563 if (common->match_end_ptr != 0)
4564 {
4565 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
4566 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
4567 }
4568
4569 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4570 {
4571 lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4572 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4573 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
4574 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4575 firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
4576
4577 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2));
4578 OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0);
4579 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_GREATER_EQUAL);
4580 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4581 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
4582 #endif
4583 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4584
4585 loop = LABEL();
4586 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4587 quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4588 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4589 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
4590 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop);
4591 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop);
4592
4593 JUMPHERE(quit);
4594 JUMPHERE(firstchar);
4595 JUMPHERE(lastchar);
4596
4597 if (common->match_end_ptr != 0)
4598 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
4599 return;
4600 }
4601
4602 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4603 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
4604 firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
4605 skip_char_back(common);
4606
4607 loop = LABEL();
4608 common->ff_newline_shortcut = loop;
4609
4610 read_char_range(common, common->nlmin, common->nlmax, TRUE);
4611 lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4612 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
4613 foundcr = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
4614 check_newlinechar(common, common->nltype, &newline, FALSE);
4615 set_jumps(newline, loop);
4616
4617 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
4618 {
4619 quit = JUMP(SLJIT_JUMP);
4620 JUMPHERE(foundcr);
4621 notfoundnl = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4622 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4623 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
4624 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
4625 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4626 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
4627 #endif
4628 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4629 JUMPHERE(notfoundnl);
4630 JUMPHERE(quit);
4631 }
4632 JUMPHERE(lastchar);
4633 JUMPHERE(firstchar);
4634
4635 if (common->match_end_ptr != 0)
4636 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
4637 }
4638
4639 static BOOL check_class_ranges(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks);
4640
4641 static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, const sljit_u8 *start_bits)
4642 {
4643 DEFINE_COMPILER;
4644 struct sljit_label *start;
4645 struct sljit_jump *quit;
4646 struct sljit_jump *found = NULL;
4647 jump_list *matches = NULL;
4648 #ifndef COMPILE_PCRE8
4649 struct sljit_jump *jump;
4650 #endif
4651
4652 if (common->match_end_ptr != 0)
4653 {
4654 OP1(SLJIT_MOV, RETURN_ADDR, 0, STR_END, 0);
4655 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
4656 }
4657
4658 start = LABEL();
4659 quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4660 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4661 #ifdef SUPPORT_UTF
4662 if (common->utf)
4663 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
4664 #endif
4665
4666 if (!check_class_ranges(common, start_bits, (start_bits[31] & 0x80) != 0, TRUE, &matches))
4667 {
4668 #ifndef COMPILE_PCRE8
4669 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 255);
4670 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 255);
4671 JUMPHERE(jump);
4672 #endif
4673 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
4674 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
4675 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)start_bits);
4676 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4677 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4678 found = JUMP(SLJIT_NOT_ZERO);
4679 }
4680
4681 #ifdef SUPPORT_UTF
4682 if (common->utf)
4683 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
4684 #endif
4685 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4686 #ifdef SUPPORT_UTF
4687 #if defined COMPILE_PCRE8
4688 if (common->utf)
4689 {
4690 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start);
4691 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4692 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4693 }
4694 #elif defined COMPILE_PCRE16
4695 if (common->utf)
4696 {
4697 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800, start);
4698 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
4699 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4700 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
4701 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4702 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4703 }
4704 #endif /* COMPILE_PCRE[8|16] */
4705 #endif /* SUPPORT_UTF */
4706 JUMPTO(SLJIT_JUMP, start);
4707 if (found != NULL)
4708 JUMPHERE(found);
4709 if (matches != NULL)
4710 set_jumps(matches, LABEL());
4711 JUMPHERE(quit);
4712
4713 if (common->match_end_ptr != 0)
4714 OP1(SLJIT_MOV, STR_END, 0, RETURN_ADDR, 0);
4715 }
4716
4717 static SLJIT_INLINE struct sljit_jump *search_requested_char(compiler_common *common, pcre_uchar req_char, BOOL caseless, BOOL has_firstchar)
4718 {
4719 DEFINE_COMPILER;
4720 struct sljit_label *loop;
4721 struct sljit_jump *toolong;
4722 struct sljit_jump *alreadyfound;
4723 struct sljit_jump *found;
4724 struct sljit_jump *foundoc = NULL;
4725 struct sljit_jump *notfound;
4726 sljit_u32 oc, bit;
4727
4728 SLJIT_ASSERT(common->req_char_ptr != 0);
4729 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr);
4730 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, REQ_BYTE_MAX);
4731 toolong = CMP(SLJIT_LESS, TMP1, 0, STR_END, 0);
4732 alreadyfound = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0);
4733
4734 if (has_firstchar)
4735 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4736 else
4737 OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);
4738
4739 loop = LABEL();
4740 notfound = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0);
4741
4742 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0);
4743 oc = req_char;
4744 if (caseless)
4745 {
4746 oc = TABLE_GET(req_char, common->fcc, req_char);
4747 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
4748 if (req_char > 127 && common->utf)
4749 oc = UCD_OTHERCASE(req_char);
4750 #endif
4751 }
4752 if (req_char == oc)
4753 found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
4754 else
4755 {
4756 bit = req_char ^ oc;
4757 if (is_powerof2(bit))
4758 {
4759 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);
4760 found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit);
4761 }
4762 else
4763 {
4764 found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
4765 foundoc = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, oc);
4766 }
4767 }
4768 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
4769 JUMPTO(SLJIT_JUMP, loop);
4770
4771 JUMPHERE(found);
4772 if (foundoc)
4773 JUMPHERE(foundoc);
4774 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr, TMP1, 0);
4775 JUMPHERE(alreadyfound);
4776 JUMPHERE(toolong);
4777 return notfound;
4778 }
4779
4780 static void do_revertframes(compiler_common *common)
4781 {
4782 DEFINE_COMPILER;
4783 struct sljit_jump *jump;
4784 struct sljit_label *mainloop;
4785
4786 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4787 OP1(SLJIT_MOV, TMP3, 0, STACK_TOP, 0);
4788 GET_LOCAL_BASE(TMP1, 0, 0);
4789
4790 /* Drop frames until we reach STACK_TOP. */
4791 mainloop = LABEL();
4792 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), -sizeof(sljit_sw));
4793 jump = CMP(SLJIT_SIG_LESS_EQUAL, TMP2, 0, SLJIT_IMM, 0);
4794
4795 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
4796 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(STACK_TOP), -2 * sizeof(sljit_sw));
4797 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), SLJIT_MEM1(STACK_TOP), -3 * sizeof(sljit_sw));
4798 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
4799 JUMPTO(SLJIT_JUMP, mainloop);
4800
4801 JUMPHERE(jump);
4802 jump = CMP(SLJIT_NOT_ZERO /* SIG_LESS */, TMP2, 0, SLJIT_IMM, 0);
4803 /* End of reverting values. */
4804 OP1(SLJIT_MOV, STACK_TOP, 0, TMP3, 0);
4805 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4806
4807 JUMPHERE(jump);
4808 OP1(SLJIT_NEG, TMP2, 0, TMP2, 0);
4809 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
4810 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(STACK_TOP), -2 * sizeof(sljit_sw));
4811 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2 * sizeof(sljit_sw));
4812 JUMPTO(SLJIT_JUMP, mainloop);
4813 }
4814
4815 static void check_wordboundary(compiler_common *common)
4816 {
4817 DEFINE_COMPILER;
4818 struct sljit_jump *skipread;
4819 jump_list *skipread_list = NULL;
4820 #if !(defined COMPILE_PCRE8) || defined SUPPORT_UTF
4821 struct sljit_jump *jump;
4822 #endif
4823
4824 SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16);
4825
4826 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
4827 /* Get type of the previous char, and put it to LOCALS1. */
4828 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4829 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4830 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, SLJIT_IMM, 0);
4831 skipread = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
4832 skip_char_back(common);
4833 check_start_used_ptr(common);
4834 read_char(common);
4835
4836 /* Testing char type. */
4837 #ifdef SUPPORT_UCP
4838 if (common->use_ucp)
4839 {
4840 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
4841 jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
4842 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4843 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
4844 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
4845 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
4846 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
4847 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
4848 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
4849 JUMPHERE(jump);
4850 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP2, 0);
4851 }
4852 else
4853 #endif
4854 {
4855 #ifndef COMPILE_PCRE8
4856 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4857 #elif defined SUPPORT_UTF
4858 /* Here LOCALS1 has already been zeroed. */
4859 jump = NULL;
4860 if (common->utf)
4861 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4862 #endif /* COMPILE_PCRE8 */
4863 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);
4864 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */);
4865 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4866 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP1, 0);
4867 #ifndef COMPILE_PCRE8
4868 JUMPHERE(jump);
4869 #elif defined SUPPORT_UTF
4870 if (jump != NULL)
4871 JUMPHERE(jump);
4872 #endif /* COMPILE_PCRE8 */
4873 }
4874 JUMPHERE(skipread);
4875
4876 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
4877 check_str_end(common, &skipread_list);
4878 peek_char(common, READ_CHAR_MAX);
4879
4880 /* Testing char type. This is a code duplication. */
4881 #ifdef SUPPORT_UCP
4882 if (common->use_ucp)
4883 {
4884 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
4885 jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
4886 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4887 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
4888 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
4889 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
4890 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
4891 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
4892 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
4893 JUMPHERE(jump);
4894 }
4895 else
4896 #endif
4897 {
4898 #ifndef COMPILE_PCRE8
4899 /* TMP2 may be destroyed by peek_char. */
4900 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
4901 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4902 #elif defined SUPPORT_UTF
4903 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
4904 jump = NULL;
4905 if (common->utf)
4906 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4907 #endif
4908 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes);
4909 OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */);
4910 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
4911 #ifndef COMPILE_PCRE8
4912 JUMPHERE(jump);
4913 #elif defined SUPPORT_UTF
4914 if (jump != NULL)
4915 JUMPHERE(jump);
4916 #endif /* COMPILE_PCRE8 */
4917 }
4918 set_jumps(skipread_list, LABEL());
4919
4920 OP2(SLJIT_XOR | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
4921 sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
4922 }
4923
4924 static BOOL check_class_ranges(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
4925 {
4926 /* May destroy TMP1. */
4927 DEFINE_COMPILER;
4928 int ranges[MAX_RANGE_SIZE];
4929 sljit_u8 bit, cbit, all;
4930 int i, byte, length = 0;
4931
4932 bit = bits[0] & 0x1;
4933 /* All bits will be zero or one (since bit is zero or one). */
4934 all = -bit;
4935
4936 for (i = 0; i < 256; )
4937 {
4938 byte = i >> 3;
4939 if ((i & 0x7) == 0 && bits[byte] == all)
4940 i += 8;
4941 else
4942 {
4943 cbit = (bits[byte] >> (i & 0x7)) & 0x1;
4944 if (cbit != bit)
4945 {
4946 if (length >= MAX_RANGE_SIZE)
4947 return FALSE;
4948 ranges[length] = i;
4949 length++;
4950 bit = cbit;
4951 all = -cbit;
4952 }
4953 i++;
4954 }
4955 }
4956
4957 if (((bit == 0) && nclass) || ((bit == 1) && !nclass))
4958 {
4959 if (length >= MAX_RANGE_SIZE)
4960 return FALSE;
4961 ranges[length] = 256;
4962 length++;
4963 }
4964
4965 if (length < 0 || length > 4)
4966 return FALSE;
4967
4968 bit = bits[0] & 0x1;
4969 if (invert) bit ^= 0x1;
4970
4971 /* No character is accepted. */
4972 if (length == 0 && bit == 0)
4973 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4974
4975 switch(length)
4976 {
4977 case 0:
4978 /* When bit != 0, all characters are accepted. */
4979 return TRUE;
4980
4981 case 1:
4982 add_jump(compiler, backtracks, CMP(bit == 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
4983 return TRUE;
4984
4985 case 2:
4986 if (ranges[0] + 1 != ranges[1])
4987 {
4988 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
4989 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
4990 }
4991 else
4992 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
4993 return TRUE;
4994
4995 case 3:
4996 if (bit != 0)
4997 {
4998 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
4999 if (ranges[0] + 1 != ranges[1])
5000 {
5001 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
5002 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
5003 }
5004 else
5005 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
5006 return TRUE;
5007 }
5008
5009 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[0]));
5010 if (ranges[1] + 1 != ranges[2])
5011 {
5012 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1]);
5013 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
5014 }
5015 else
5016 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1]));
5017 return TRUE;
5018
5019 case 4:
5020 if ((ranges[1] - ranges[0]) == (ranges[3] - ranges[2])
5021 && (ranges[0] | (ranges[2] - ranges[0])) == ranges[2]
5022 && (ranges[1] & (ranges[2] - ranges[0])) == 0
5023 && is_powerof2(ranges[2] - ranges[0]))
5024 {
5025 SLJIT_ASSERT((ranges[0] & (ranges[2] - ranges[0])) == 0 && (ranges[2] & ranges[3] & (ranges[2] - ranges[0])) != 0);
5026 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[0]);
5027 if (ranges[2] + 1 != ranges[3])
5028 {
5029 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);
5030 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
5031 }
5032 else
5033 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
5034 return TRUE;
5035 }
5036
5037 if (bit != 0)
5038 {
5039 i = 0;
5040 if (ranges[0] + 1 != ranges[1])
5041 {
5042 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
5043 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
5044 i = ranges[0];
5045 }
5046 else
5047 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
5048
5049 if (ranges[2] + 1 != ranges[3])
5050 {
5051 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - i);
5052 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
5053 }
5054 else
5055 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2] - i));
5056 return TRUE;
5057 }
5058
5059 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
5060 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[0]));
5061 if (ranges[1] + 1 != ranges[2])
5062 {
5063 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]);
5064 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
5065 }
5066 else
5067 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
5068 return TRUE;
5069
5070 default:
5071 SLJIT_UNREACHABLE();
5072 return FALSE;
5073 }
5074 }
5075
5076 static void check_anynewline(compiler_common *common)
5077 {
5078 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
5079 DEFINE_COMPILER;
5080
5081 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
5082
5083 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
5084 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
5085 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
5086 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
5087 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5088 #ifdef COMPILE_PCRE8
5089 if (common->utf)
5090 {
5091 #endif
5092 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
5093 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
5094 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
5095 #ifdef COMPILE_PCRE8
5096 }
5097 #endif
5098 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
5099 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
5100 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
5101 }
5102
5103 static void check_hspace(compiler_common *common)
5104 {
5105 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
5106 DEFINE_COMPILER;
5107
5108 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
5109
5110 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x09);
5111 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
5112 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
5113 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
5114 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xa0);
5115 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5116 #ifdef COMPILE_PCRE8
5117 if (common->utf)
5118 {
5119 #endif
5120 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
5121 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x1680);
5122 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
5123 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e);
5124 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
5125 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000);
5126 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000);
5127 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
5128 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000);
5129 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
5130 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000);
5131 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
5132 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000);
5133 #ifdef COMPILE_PCRE8
5134 }
5135 #endif
5136 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
5137 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
5138
5139 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
5140 }
5141
5142 static void check_vspace(compiler_common *common)
5143 {
5144 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
5145 DEFINE_COMPILER;
5146
5147 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
5148
5149 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
5150 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
5151 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
5152 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
5153 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5154 #ifdef COMPILE_PCRE8
5155 if (common->utf)
5156 {
5157 #endif
5158 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
5159 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
5160 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
5161 #ifdef COMPILE_PCRE8
5162 }
5163 #endif
5164 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
5165 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
5166
5167 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
5168 }
5169
5170 #define CHAR1 STR_END
5171 #define CHAR2 STACK_TOP
5172
5173 static void do_casefulcmp(compiler_common *common)
5174 {
5175 DEFINE_COMPILER;
5176 struct sljit_jump *jump;
5177 struct sljit_label *label;
5178
5179 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
5180 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
5181 OP1(SLJIT_MOV, TMP3, 0, CHAR1, 0);
5182 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, CHAR2, 0);
5183 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
5184 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5185
5186 label = LABEL();
5187 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
5188 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
5189 jump = CMP(SLJIT_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
5190 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
5191 JUMPTO(SLJIT_NOT_ZERO, label);
5192
5193 JUMPHERE(jump);
5194 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5195 OP1(SLJIT_MOV, CHAR1, 0, TMP3, 0);
5196 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
5197 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
5198 }
5199
5200 #define LCC_TABLE STACK_LIMIT
5201
5202 static void do_caselesscmp(compiler_common *common)
5203 {
5204 DEFINE_COMPILER;
5205 struct sljit_jump *jump;
5206 struct sljit_label *label;
5207
5208 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
5209 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
5210
5211 OP1(SLJIT_MOV, TMP3, 0, LCC_TABLE, 0);
5212 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, CHAR1, 0);
5213 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, CHAR2, 0);
5214 OP1(SLJIT_MOV, LCC_TABLE, 0, SLJIT_IMM, common->lcc);
5215 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
5216 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5217
5218 label = LABEL();
5219 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
5220 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
5221 #ifndef COMPILE_PCRE8
5222 jump = CMP(SLJIT_GREATER, CHAR1, 0, SLJIT_IMM, 255);
5223 #endif
5224 OP1(SLJIT_MOV_U8, CHAR1, 0, SLJIT_MEM2(LCC_TABLE, CHAR1), 0);
5225 #ifndef COMPILE_PCRE8
5226 JUMPHERE(jump);
5227 jump = CMP(SLJIT_GREATER, CHAR2, 0, SLJIT_IMM, 255);
5228 #endif
5229 OP1(SLJIT_MOV_U8, CHAR2, 0, SLJIT_MEM2(LCC_TABLE, CHAR2), 0);
5230 #ifndef COMPILE_PCRE8
5231 JUMPHERE(jump);
5232 #endif
5233 jump = CMP(SLJIT_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
5234 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
5235 JUMPTO(SLJIT_NOT_ZERO, label);
5236
5237 JUMPHERE(jump);
5238 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5239 OP1(SLJIT_MOV, LCC_TABLE, 0, TMP3, 0);
5240 OP1(SLJIT_MOV, CHAR1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
5241 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
5242 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
5243 }
5244
5245 #undef LCC_TABLE
5246 #undef CHAR1
5247 #undef CHAR2
5248
5249 #if defined SUPPORT_UTF && defined SUPPORT_UCP
5250
5251 static const pcre_uchar * SLJIT_CALL do_utf_caselesscmp(pcre_uchar *src1, jit_arguments *args, pcre_uchar *end1)
5252 {
5253 /* This function would be ineffective to do in JIT level. */
5254 sljit_u32 c1, c2;
5255 const pcre_uchar *src2 = args->uchar_ptr;
5256 const pcre_uchar *end2 = args->end;
5257 const ucd_record *ur;
5258 const sljit_u32 *pp;
5259
5260 while (src1 < end1)
5261 {
5262 if (src2 >= end2)
5263 return (pcre_uchar*)1;
5264 GETCHARINC(c1, src1);
5265 GETCHARINC(c2, src2);
5266 ur = GET_UCD(c2);
5267 if (c1 != c2 && c1 != c2 + ur->other_case)
5268 {
5269 pp = PRIV(ucd_caseless_sets) + ur->caseset;
5270 for (;;)
5271 {
5272 if (c1 < *pp) return NULL;
5273 if (c1 == *pp++) break;
5274 }
5275 }
5276 }
5277 return src2;
5278 }
5279
5280 #endif /* SUPPORT_UTF && SUPPORT_UCP */
5281
5282 static pcre_uchar *byte_sequence_compare(compiler_common *common, BOOL caseless, pcre_uchar *cc,
5283 compare_context *context, jump_list **backtracks)
5284 {
5285 DEFINE_COMPILER;
5286 unsigned int othercasebit = 0;
5287 pcre_uchar *othercasechar = NULL;
5288 #ifdef SUPPORT_UTF
5289 int utflength;
5290 #endif
5291
5292 if (caseless && char_has_othercase(common, cc))
5293 {
5294 othercasebit = char_get_othercase_bit(common, cc);
5295 SLJIT_ASSERT(othercasebit);
5296 /* Extracting bit difference info. */
5297 #if defined COMPILE_PCRE8
5298 othercasechar = cc + (othercasebit >> 8);
5299 othercasebit &= 0xff;
5300 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5301 /* Note that this code only handles characters in the BMP. If there
5302 ever are characters outside the BMP whose othercase differs in only one
5303 bit from itself (there currently are none), this code will need to be
5304 revised for COMPILE_PCRE32. */
5305 othercasechar = cc + (othercasebit >> 9);
5306 if ((othercasebit & 0x100) != 0)
5307 othercasebit = (othercasebit & 0xff) << 8;
5308 else
5309 othercasebit &= 0xff;
5310 #endif /* COMPILE_PCRE[8|16|32] */
5311 }
5312
5313 if (context->sourcereg == -1)
5314 {
5315 #if defined COMPILE_PCRE8
5316 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
5317 if (context->length >= 4)
5318 OP1(SLJIT_MOV_S32, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
5319 else if (context->length >= 2)
5320 OP1(SLJIT_MOV_U16, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
5321 else
5322 #endif
5323 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
5324 #elif defined COMPILE_PCRE16
5325 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
5326 if (context->length >= 4)
5327 OP1(SLJIT_MOV_S32, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
5328 else
5329 #endif
5330 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
5331 #elif defined COMPILE_PCRE32
5332 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
5333 #endif /* COMPILE_PCRE[8|16|32] */
5334 context->sourcereg = TMP2;
5335 }
5336
5337 #ifdef SUPPORT_UTF
5338 utflength = 1;
5339 if (common->utf && HAS_EXTRALEN(*cc))
5340 utflength += GET_EXTRALEN(*cc);
5341
5342 do
5343 {
5344 #endif
5345
5346 context->length -= IN_UCHARS(1);
5347 #if (defined SLJIT_UNALIGNED && SLJIT_UNALIGNED) && (defined COMPILE_PCRE8 || defined COMPILE_PCRE16)
5348
5349 /* Unaligned read is supported. */
5350 if (othercasebit != 0 && othercasechar == cc)
5351 {
5352 context->c.asuchars[context->ucharptr] = *cc | othercasebit;
5353 context->oc.asuchars[context->ucharptr] = othercasebit;
5354 }
5355 else
5356 {
5357 context->c.asuchars[context->ucharptr] = *cc;
5358 context->oc.asuchars[context->ucharptr] = 0;
5359 }
5360 context->ucharptr++;
5361
5362 #if defined COMPILE_PCRE8
5363 if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1))
5364 #else
5365 if (context->ucharptr >= 2 || context->length == 0)
5366 #endif
5367 {
5368 if (context->length >= 4)
5369 OP1(SLJIT_MOV_S32, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
5370 else if (context->length >= 2)
5371 OP1(SLJIT_MOV_U16, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
5372 #if defined COMPILE_PCRE8
5373 else if (context->length >= 1)
5374 OP1(SLJIT_MOV_U8, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
5375 #endif /* COMPILE_PCRE8 */
5376 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
5377
5378 switch(context->ucharptr)
5379 {
5380 case 4 / sizeof(pcre_uchar):
5381 if (context->oc.asint != 0)
5382 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint);
5383 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint));
5384 break;
5385
5386 case 2 / sizeof(pcre_uchar):
5387 if (context->oc.asushort != 0)
5388 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort);
5389 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort));
5390 break;
5391
5392 #ifdef COMPILE_PCRE8
5393 case 1:
5394 if (context->oc.asbyte != 0)
5395 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte);
5396 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte));
5397 break;
5398 #endif
5399
5400 default:
5401 SLJIT_UNREACHABLE();
5402 break;
5403 }
5404 context->ucharptr = 0;
5405 }
5406
5407 #else
5408
5409 /* Unaligned read is unsupported or in 32 bit mode. */
5410 if (context->length >= 1)
5411 OP1(MOV_UCHAR, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
5412
5413 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
5414
5415 if (othercasebit != 0 && othercasechar == cc)
5416 {
5417 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit);
5418 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit));
5419 }
5420 else
5421 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc));
5422
5423 #endif
5424
5425 cc++;
5426 #ifdef SUPPORT_UTF
5427 utflength--;
5428 }
5429 while (utflength > 0);
5430 #endif
5431
5432 return cc;
5433 }
5434
5435 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
5436
5437 #define SET_TYPE_OFFSET(value) \
5438 if ((value) != typeoffset) \
5439 { \
5440 if ((value) < typeoffset) \
5441 OP2(SLJIT_ADD, typereg, 0, typereg, 0, SLJIT_IMM, typeoffset - (value)); \
5442 else \
5443 OP2(SLJIT_SUB, typereg, 0, typereg, 0, SLJIT_IMM, (value) - typeoffset); \
5444 } \
5445 typeoffset = (value);
5446
5447 #define SET_CHAR_OFFSET(value) \
5448 if ((value) != charoffset) \
5449 { \
5450 if ((value) < charoffset) \
5451 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(charoffset - (value))); \
5452 else \
5453 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)((value) - charoffset)); \
5454 } \
5455 charoffset = (value);
5456
5457 static pcre_uchar *compile_char1_matchingpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **backtracks, BOOL check_str_ptr);
5458
5459 static void compile_xclass_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
5460 {
5461 DEFINE_COMPILER;
5462 jump_list *found = NULL;
5463 jump_list **list = (cc[0] & XCL_NOT) == 0 ? &found : backtracks;
5464 sljit_uw c, charoffset, max = 256, min = READ_CHAR_MAX;
5465 struct sljit_jump *jump = NULL;
5466 pcre_uchar *ccbegin;
5467 int compares, invertcmp, numberofcmps;
5468 #if defined SUPPORT_UTF && (defined COMPILE_PCRE8 || defined COMPILE_PCRE16)
5469 BOOL utf = common->utf;
5470 #endif
5471
5472 #ifdef SUPPORT_UCP
5473 BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE;
5474 BOOL charsaved = FALSE;
5475 int typereg = TMP1;
5476 const sljit_u32 *other_cases;
5477 sljit_uw typeoffset;
5478 #endif
5479
5480 /* Scanning the necessary info. */
5481 cc++;
5482 ccbegin = cc;
5483 compares = 0;
5484 if (cc[-1] & XCL_MAP)
5485 {
5486 min = 0;
5487 cc += 32 / sizeof(pcre_uchar);
5488 }
5489
5490 while (*cc != XCL_END)
5491 {
5492 compares++;
5493 if (*cc == XCL_SINGLE)
5494 {
5495 cc ++;
5496 GETCHARINCTEST(c, cc);
5497 if (c > max) max = c;
5498 if (c < min) min = c;
5499 #ifdef SUPPORT_UCP
5500 needschar = TRUE;
5501 #endif
5502 }
5503 else if (*cc == XCL_RANGE)
5504 {
5505 cc ++;
5506 GETCHARINCTEST(c, cc);
5507 if (c < min) min = c;
5508 GETCHARINCTEST(c, cc);
5509 if (c > max) max = c;
5510 #ifdef SUPPORT_UCP
5511 needschar = TRUE;
5512 #endif
5513 }
5514 #ifdef SUPPORT_UCP
5515 else
5516 {
5517 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
5518 cc++;
5519 if (*cc == PT_CLIST)
5520 {
5521 other_cases = PRIV(ucd_caseless_sets) + cc[1];
5522 while (*other_cases != NOTACHAR)
5523 {
5524 if (*other_cases > max) max = *other_cases;
5525 if (*other_cases < min) min = *other_cases;
5526 other_cases++;
5527 }
5528 }
5529 else
5530 {
5531 max = READ_CHAR_MAX;
5532 min = 0;
5533 }
5534
5535 switch(*cc)
5536 {
5537 case PT_ANY:
5538 /* Any either accepts everything or ignored. */
5539 if (cc[-1] == XCL_PROP)
5540 {
5541 compile_char1_matchingpath(common, OP_ALLANY, cc, backtracks, FALSE);
5542 if (list == backtracks)
5543 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5544 return;
5545 }
5546 break;
5547
5548 case PT_LAMP:
5549 case PT_GC:
5550 case PT_PC:
5551 case PT_ALNUM:
5552 needstype = TRUE;
5553 break;
5554
5555 case PT_SC:
5556 needsscript = TRUE;
5557 break;
5558
5559 case PT_SPACE:
5560 case PT_PXSPACE:
5561 case PT_WORD:
5562 case PT_PXGRAPH:
5563 case PT_PXPRINT:
5564 case PT_PXPUNCT:
5565 needstype = TRUE;
5566 needschar = TRUE;
5567 break;
5568
5569 case PT_CLIST:
5570 case PT_UCNC:
5571 needschar = TRUE;
5572 break;
5573
5574 default:
5575 SLJIT_UNREACHABLE();
5576 break;
5577 }
5578 cc += 2;
5579 }
5580 #endif
5581 }
5582 SLJIT_ASSERT(compares > 0);
5583
5584 /* We are not necessary in utf mode even in 8 bit mode. */
5585 cc = ccbegin;
5586 read_char_range(common, min, max, (cc[-1] & XCL_NOT) != 0);
5587
5588 if ((cc[-1] & XCL_HASPROP) == 0)
5589 {
5590 if ((cc[-1] & XCL_MAP) != 0)
5591 {
5592 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
5593 if (!check_class_ranges(common, (const sljit_u8 *)cc, (((const sljit_u8 *)cc)[31] & 0x80) != 0, TRUE, &found))
5594 {
5595 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
5596 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
5597 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
5598 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
5599 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
5600 add_jump(compiler, &found, JUMP(SLJIT_NOT_ZERO));
5601 }
5602
5603 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5604 JUMPHERE(jump);
5605
5606 cc += 32 / sizeof(pcre_uchar);
5607 }
5608 else
5609 {
5610 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, min);
5611 add_jump(compiler, (cc[-1] & XCL_NOT) == 0 ? backtracks : &found, CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, max - min));
5612 }
5613 }
5614 else if ((cc[-1] & XCL_MAP) != 0)
5615 {
5616 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
5617 #ifdef SUPPORT_UCP
5618 charsaved = TRUE;
5619 #endif
5620 if (!check_class_ranges(common, (const sljit_u8 *)cc, FALSE, TRUE, list))
5621 {
5622 #ifdef COMPILE_PCRE8
5623 jump = NULL;
5624 if (common->utf)
5625 #endif
5626 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
5627
5628 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
5629 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
5630 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
5631 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
5632 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
5633 add_jump(compiler, list, JUMP(SLJIT_NOT_ZERO));
5634
5635 #ifdef COMPILE_PCRE8
5636 if (common->utf)
5637 #endif
5638 JUMPHERE(jump);
5639 }
5640
5641 OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
5642 cc += 32 / sizeof(pcre_uchar);
5643 }
5644
5645 #ifdef SUPPORT_UCP
5646 if (needstype || needsscript)
5647 {
5648 if (needschar && !charsaved)
5649 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
5650
5651 #ifdef COMPILE_PCRE32
5652 if (!common->utf)
5653 {
5654 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x10ffff + 1);
5655 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
5656 JUMPHERE(jump);
5657 }
5658 #endif
5659
5660 OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
5661 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
5662 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
5663 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
5664 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5665 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
5666 OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
5667
5668 /* Before anything else, we deal with scripts. */
5669 if (needsscript)
5670 {
5671 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
5672 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
5673
5674 ccbegin = cc;
5675
5676 while (*cc != XCL_END)
5677 {
5678 if (*cc == XCL_SINGLE)
5679 {
5680 cc ++;
5681 GETCHARINCTEST(c, cc);
5682 }
5683 else if (*cc == XCL_RANGE)
5684 {
5685 cc ++;
5686 GETCHARINCTEST(c, cc);
5687 GETCHARINCTEST(c, cc);
5688 }
5689 else
5690 {
5691 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
5692 cc++;
5693 if (*cc == PT_SC)
5694 {
5695 compares--;
5696 invertcmp = (compares == 0 && list != backtracks);
5697 if (cc[-1] == XCL_NOTPROP)
5698 invertcmp ^= 0x1;
5699 jump = CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (int)cc[1]);
5700 add_jump(compiler, compares > 0 ? list : backtracks, jump);
5701 }
5702 cc += 2;
5703 }
5704 }
5705
5706 cc = ccbegin;
5707 }
5708
5709 if (needschar)
5710 {
5711 OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
5712 }
5713
5714 if (needstype)
5715 {
5716 if (!needschar)
5717 {
5718 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
5719 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
5720 }
5721 else
5722 {
5723 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
5724 OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
5725 typereg = RETURN_ADDR;
5726 }
5727 }
5728 }
5729 #endif
5730
5731 /* Generating code. */
5732 charoffset = 0;
5733 numberofcmps = 0;
5734 #ifdef SUPPORT_UCP
5735 typeoffset = 0;
5736 #endif
5737
5738 while (*cc != XCL_END)
5739 {
5740 compares--;
5741 invertcmp = (compares == 0 && list != backtracks);
5742 jump = NULL;
5743
5744 if (*cc == XCL_SINGLE)
5745 {
5746 cc ++;
5747 GETCHARINCTEST(c, cc);
5748
5749 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
5750 {
5751 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5752 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
5753 numberofcmps++;
5754 }
5755 else if (numberofcmps > 0)
5756 {
5757 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5758 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
5759 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5760 numberofcmps = 0;
5761 }
5762 else
5763 {
5764 jump = CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5765 numberofcmps = 0;
5766 }
5767 }
5768 else if (*cc == XCL_RANGE)
5769 {
5770 cc ++;
5771 GETCHARINCTEST(c, cc);
5772 SET_CHAR_OFFSET(c);
5773 GETCHARINCTEST(c, cc);
5774
5775 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
5776 {
5777 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5778 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
5779 numberofcmps++;
5780 }
5781 else if (numberofcmps > 0)
5782 {
5783 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5784 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL);
5785 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5786 numberofcmps = 0;
5787 }
5788 else
5789 {
5790 jump = CMP(SLJIT_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5791 numberofcmps = 0;
5792 }
5793 }
5794 #ifdef SUPPORT_UCP
5795 else
5796 {
5797 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
5798 if (*cc == XCL_NOTPROP)
5799 invertcmp ^= 0x1;
5800 cc++;
5801 switch(*cc)
5802 {
5803 case PT_ANY:
5804 if (!invertcmp)
5805 jump = JUMP(SLJIT_JUMP);
5806 break;
5807
5808 case PT_LAMP:
5809 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - typeoffset);
5810 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
5811 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ll - typeoffset);
5812 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
5813 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lt - typeoffset);
5814 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
5815 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5816 break;
5817
5818 case PT_GC:
5819 c = PRIV(ucp_typerange)[(int)cc[1] * 2];
5820 SET_TYPE_OFFSET(c);
5821 jump = CMP(SLJIT_LESS_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, PRIV(ucp_typerange)[(int)cc[1] * 2 + 1] - c);
5822 break;
5823
5824 case PT_PC:
5825 jump = CMP(SLJIT_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, (int)cc[1] - typeoffset);
5826 break;
5827
5828 case PT_SC:
5829 compares++;
5830 /* Do nothing. */
5831 break;
5832
5833 case PT_SPACE:
5834 case PT_PXSPACE:
5835 SET_CHAR_OFFSET(9);
5836 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd - 0x9);
5837 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
5838
5839 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x9);
5840 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
5841
5842 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x9);
5843 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
5844
5845 SET_TYPE_OFFSET(ucp_Zl);
5846 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl);
5847 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL);
5848 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5849 break;
5850
5851 case PT_WORD:
5852 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_UNDERSCORE - charoffset));
5853 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
5854 /* Fall through. */
5855
5856 case PT_ALNUM:
5857 SET_TYPE_OFFSET(ucp_Ll);
5858 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
5859 OP_FLAGS((*cc == PT_ALNUM) ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
5860 SET_TYPE_OFFSET(ucp_Nd);
5861 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_No - ucp_Nd);
5862 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL);
5863 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5864 break;
5865
5866 case PT_CLIST:
5867 other_cases = PRIV(ucd_caseless_sets) + cc[1];
5868
5869 /* At least three characters are required.
5870 Otherwise this case would be handled by the normal code path. */
5871 SLJIT_ASSERT(other_cases[0] != NOTACHAR && other_cases[1] != NOTACHAR && other_cases[2] != NOTACHAR);
5872 SLJIT_ASSERT(other_cases[0] < other_cases[1] && other_cases[1] < other_cases[2]);
5873
5874 /* Optimizing character pairs, if their difference is power of 2. */
5875 if (is_powerof2(other_cases[1] ^ other_cases[0]))
5876 {
5877 if (charoffset == 0)
5878 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
5879 else
5880 {
5881 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
5882 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
5883 }
5884 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[1]);
5885 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
5886 other_cases += 2;
5887 }
5888 else if (is_powerof2(other_cases[2] ^ other_cases[1]))
5889 {
5890 if (charoffset == 0)
5891 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[2] ^ other_cases[1]);
5892 else
5893 {
5894 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
5895 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
5896 }
5897 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[2]);
5898 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
5899
5900 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(other_cases[0] - charoffset));
5901 OP_FLAGS(SLJIT_OR | ((other_cases[3] == NOTACHAR) ? SLJIT_SET_Z : 0), TMP2, 0, SLJIT_EQUAL);
5902
5903 other_cases += 3;
5904 }
5905 else
5906 {
5907 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
5908 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
5909 }
5910
5911 while (*other_cases != NOTACHAR)
5912 {
5913 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
5914 OP_FLAGS(SLJIT_OR | ((*other_cases == NOTACHAR) ? SLJIT_SET_Z : 0), TMP2, 0, SLJIT_EQUAL);
5915 }
5916 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5917 break;
5918
5919 case PT_UCNC:
5920 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_DOLLAR_SIGN - charoffset));
5921 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
5922 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_COMMERCIAL_AT - charoffset));
5923 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
5924 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_GRAVE_ACCENT - charoffset));
5925 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
5926
5927 SET_CHAR_OFFSET(0xa0);
5928 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(0xd7ff - charoffset));
5929 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
5930 SET_CHAR_OFFSET(0);
5931 OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xe000 - 0);
5932 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_GREATER_EQUAL);
5933 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5934 break;
5935
5936 case PT_PXGRAPH:
5937 /* C and Z groups are the farthest two groups. */
5938 SET_TYPE_OFFSET(ucp_Ll);
5939 OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
5940 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_GREATER);
5941
5942 jump = CMP(SLJIT_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
5943
5944 /* In case of ucp_Cf, we overwrite the result. */
5945 SET_CHAR_OFFSET(0x2066);
5946 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
5947 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
5948
5949 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
5950 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
5951
5952 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x2066);
5953 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
5954
5955 JUMPHERE(jump);
5956 jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
5957 break;
5958
5959 case PT_PXPRINT:
5960 /* C and Z groups are the farthest two groups. */
5961 SET_TYPE_OFFSET(ucp_Ll);
5962 OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
5963 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_GREATER);
5964
5965 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Ll);
5966 OP_FLAGS(SLJIT_AND, TMP2, 0, SLJIT_NOT_EQUAL);
5967
5968 jump = CMP(SLJIT_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
5969
5970 /* In case of ucp_Cf, we overwrite the result. */
5971 SET_CHAR_OFFSET(0x2066);
5972 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
5973 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
5974
5975 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
5976 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
5977
5978 JUMPHERE(jump);
5979 jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
5980 break;
5981
5982 case PT_PXPUNCT:
5983 SET_TYPE_OFFSET(ucp_Sc);
5984 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Sc);
5985 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
5986
5987 SET_CHAR_OFFSET(0);
5988 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x7f);
5989 OP_FLAGS(SLJIT_AND, TMP2, 0, SLJIT_LESS_EQUAL);
5990
5991 SET_TYPE_OFFSET(ucp_Pc);
5992 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ps - ucp_Pc);
5993 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL);
5994 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5995 break;
5996
5997 default:
5998 SLJIT_UNREACHABLE();
5999 break;
6000 }
6001 cc += 2;
6002 }
6003 #endif
6004
6005 if (jump != NULL)
6006 add_jump(compiler, compares > 0 ? list : backtracks, jump);
6007 }
6008
6009 if (found != NULL)
6010 set_jumps(found, LABEL());
6011 }
6012
6013 #undef SET_TYPE_OFFSET
6014 #undef SET_CHAR_OFFSET
6015
6016 #endif
6017
6018 static pcre_uchar *compile_simple_assertion_matchingpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **backtracks)
6019 {
6020 DEFINE_COMPILER;
6021 int length;
6022 struct sljit_jump *jump[4];
6023 #ifdef SUPPORT_UTF
6024 struct sljit_label *label;
6025 #endif /* SUPPORT_UTF */
6026
6027 switch(type)
6028 {
6029 case OP_SOD:
6030 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6031 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
6032 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
6033 return cc;
6034
6035 case OP_SOM:
6036 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6037 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
6038 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
6039 return cc;
6040
6041 case OP_NOT_WORD_BOUNDARY:
6042 case OP_WORD_BOUNDARY:
6043 add_jump(compiler, &common->wordboundary, JUMP(SLJIT_FAST_CALL));
6044 sljit_set_current_flags(compiler, SLJIT_SET_Z);
6045 add_jump(compiler, backtracks, JUMP(type == OP_NOT_WORD_BOUNDARY ? SLJIT_NOT_ZERO : SLJIT_ZERO));
6046 return cc;
6047
6048 case OP_EODN:
6049 /* Requires rather complex checks. */
6050 jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6051 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
6052 {
6053 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
6054 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
6055 if (common->mode == JIT_COMPILE)
6056 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0));
6057 else
6058 {
6059 jump[1] = CMP(SLJIT_EQUAL, TMP2, 0, STR_END, 0);
6060 OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
6061 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS);
6062 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
6063 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_NOT_EQUAL);
6064 add_jump(compiler, backtracks, JUMP(SLJIT_NOT_EQUAL));
6065 check_partial(common, TRUE);
6066 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
6067 JUMPHERE(jump[1]);
6068 }
6069 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
6070 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
6071 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
6072 }
6073 else if (common->nltype == NLTYPE_FIXED)
6074 {
6075 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6076 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
6077 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0));
6078 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
6079 }
6080 else
6081 {
6082 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
6083 jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
6084 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
6085 OP2(SLJIT_SUB | SLJIT_SET_Z | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
6086 jump[2] = JUMP(SLJIT_GREATER);
6087 add_jump(compiler, backtracks, JUMP(SLJIT_NOT_EQUAL) /* LESS */);
6088 /* Equal. */
6089 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
6090 jump[3] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
6091 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
6092
6093 JUMPHERE(jump[1]);
6094 if (common->nltype == NLTYPE_ANYCRLF)
6095 {
6096 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6097 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, STR_END, 0));
6098 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
6099 }
6100 else
6101 {
6102 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, STR_PTR, 0);
6103 read_char_range(common, common->nlmin, common->nlmax, TRUE);
6104 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
6105 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
6106 sljit_set_current_flags(compiler, SLJIT_SET_Z);
6107 add_jump(compiler, backtracks, JUMP(SLJIT_ZERO));
6108 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
6109 }
6110 JUMPHERE(jump[2]);
6111 JUMPHERE(jump[3]);
6112 }
6113 JUMPHERE(jump[0]);
6114 check_partial(common, FALSE);
6115 return cc;
6116
6117 case OP_EOD:
6118 add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
6119 check_partial(common, FALSE);
6120 return cc;
6121
6122 case OP_DOLL:
6123 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
6124 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
6125 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
6126
6127 if (!common->endonly)
6128 compile_simple_assertion_matchingpath(common, OP_EODN, cc, backtracks);
6129 else
6130 {
6131 add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
6132 check_partial(common, FALSE);
6133 }
6134 return cc;
6135
6136 case OP_DOLLM:
6137 jump[1] = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
6138 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
6139 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
6140 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
6141 check_partial(common, FALSE);
6142 jump[0] = JUMP(SLJIT_JUMP);
6143 JUMPHERE(jump[1]);
6144
6145 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
6146 {
6147 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
6148 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
6149 if (common->mode == JIT_COMPILE)
6150 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, TMP2, 0, STR_END, 0));
6151 else
6152 {
6153 jump[1] = CMP(SLJIT_LESS_EQUAL, TMP2, 0, STR_END, 0);
6154 /* STR_PTR = STR_END - IN_UCHARS(1) */
6155 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
6156 check_partial(common, TRUE);
6157 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
6158 JUMPHERE(jump[1]);
6159 }
6160
6161 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
6162 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
6163 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
6164 }
6165 else
6166 {
6167 peek_char(common, common->nlmax);
6168 check_newlinechar(common, common->nltype, backtracks, FALSE);
6169 }
6170 JUMPHERE(jump[0]);
6171 return cc;
6172
6173 case OP_CIRC:
6174 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
6175 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
6176 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0));
6177 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
6178 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
6179 return cc;
6180
6181 case OP_CIRCM:
6182 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
6183 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
6184 jump[1] = CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0);
6185 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
6186 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
6187 jump[0] = JUMP(SLJIT_JUMP);
6188 JUMPHERE(jump[1]);
6189
6190 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
6191 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
6192 {
6193 OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
6194 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, TMP1, 0));
6195 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
6196 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
6197 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
6198 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
6199 }
6200 else
6201 {
6202 skip_char_back(common);
6203 read_char_range(common, common->nlmin, common->nlmax, TRUE);
6204 check_newlinechar(common, common->nltype, backtracks, FALSE);
6205 }
6206 JUMPHERE(jump[0]);
6207 return cc;
6208
6209 case OP_REVERSE:
6210 length = GET(cc, 0);
6211 if (length == 0)
6212 return cc + LINK_SIZE;
6213 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6214 #ifdef SUPPORT_UTF
6215 if (common->utf)
6216 {
6217 OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
6218 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, length);
6219 label = LABEL();
6220 add_jump(compiler, backtracks, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP3, 0));
6221 skip_char_back(common);
6222 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
6223 JUMPTO(SLJIT_NOT_ZERO, label);
6224 }
6225 else
6226 #endif
6227 {
6228 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
6229 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
6230 add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, TMP1, 0));
6231 }
6232 check_start_used_ptr(common);
6233 return cc + LINK_SIZE;
6234 }
6235 SLJIT_UNREACHABLE();
6236 return cc;
6237 }
6238
6239 static pcre_uchar *compile_char1_matchingpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **backtracks, BOOL check_str_ptr)
6240 {
6241 DEFINE_COMPILER;
6242 int length;
6243 unsigned int c, oc, bit;
6244 compare_context context;
6245 struct sljit_jump *jump[3];
6246 jump_list *end_list;
6247 #ifdef SUPPORT_UTF
6248 struct sljit_label *label;
6249 #ifdef SUPPORT_UCP
6250 pcre_uchar propdata[5];
6251 #endif
6252 #endif /* SUPPORT_UTF */
6253
6254 switch(type)
6255 {
6256 case OP_NOT_DIGIT:
6257 case OP_DIGIT:
6258 /* Digits are usually 0-9, so it is worth to optimize them. */
6259 if (check_str_ptr)
6260 detect_partial_match(common, backtracks);
6261 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
6262 if (common->utf && is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_digit, FALSE))
6263 read_char7_type(common, type == OP_NOT_DIGIT);
6264 else
6265 #endif
6266 read_char8_type(common, type == OP_NOT_DIGIT);
6267 /* Flip the starting bit in the negative case. */
6268 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit);
6269 add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_ZERO : SLJIT_NOT_ZERO));
6270 return cc;
6271
6272 case OP_NOT_WHITESPACE:
6273 case OP_WHITESPACE:
6274 if (check_str_ptr)
6275 detect_partial_match(common, backtracks);
6276 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
6277 if (common->utf && is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_space, FALSE))
6278 read_char7_type(common, type == OP_NOT_WHITESPACE);
6279 else
6280 #endif
6281 read_char8_type(common, type == OP_NOT_WHITESPACE);
6282 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space);
6283 add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_ZERO : SLJIT_NOT_ZERO));
6284 return cc;
6285
6286 case OP_NOT_WORDCHAR:
6287 case OP_WORDCHAR:
6288 if (check_str_ptr)
6289 detect_partial_match(common, backtracks);
6290 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
6291 if (common->utf && is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_word, FALSE))
6292 read_char7_type(common, type == OP_NOT_WORDCHAR);
6293 else
6294 #endif
6295 read_char8_type(common, type == OP_NOT_WORDCHAR);
6296 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word);
6297 add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_ZERO : SLJIT_NOT_ZERO));
6298 return cc;
6299
6300 case OP_ANY:
6301 if (check_str_ptr)
6302 detect_partial_match(common, backtracks);
6303 read_char_range(common, common->nlmin, common->nlmax, TRUE);
6304 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
6305 {
6306 jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
6307 end_list = NULL;
6308 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
6309 add_jump(compiler, &end_list, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
6310 else
6311 check_str_end(common, &end_list);
6312
6313 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6314 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff));
6315 set_jumps(end_list, LABEL());
6316 JUMPHERE(jump[0]);
6317 }
6318 else
6319 check_newlinechar(common, common->nltype, backtracks, TRUE);
6320 return cc;
6321
6322 case OP_ALLANY:
6323 if (check_str_ptr)
6324 detect_partial_match(common, backtracks);
6325 #ifdef SUPPORT_UTF
6326 if (common->utf)
6327 {
6328 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6329 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6330 #if defined COMPILE_PCRE8 || defined COMPILE_PCRE16
6331 #if defined COMPILE_PCRE8
6332 jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
6333 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
6334 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
6335 #elif defined COMPILE_PCRE16
6336 jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
6337 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
6338 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
6339 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
6340 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
6341 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
6342 #endif
6343 JUMPHERE(jump[0]);
6344 #endif /* COMPILE_PCRE[8|16] */
6345 return cc;
6346 }
6347 #endif
6348 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6349 return cc;
6350
6351 case OP_ANYBYTE:
6352 if (check_str_ptr)
6353 detect_partial_match(common, backtracks);
6354 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6355 return cc;
6356
6357 #ifdef SUPPORT_UTF
6358 #ifdef SUPPORT_UCP
6359 case OP_NOTPROP:
6360 case OP_PROP:
6361 propdata[0] = XCL_HASPROP;
6362 propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP;
6363 propdata[2] = cc[0];
6364 propdata[3] = cc[1];
6365 propdata[4] = XCL_END;
6366 if (check_str_ptr)
6367 detect_partial_match(common, backtracks);
6368 compile_xclass_matchingpath(common, propdata, backtracks);
6369 return cc + 2;
6370 #endif
6371 #endif
6372
6373 case OP_ANYNL:
6374 if (check_str_ptr)
6375 detect_partial_match(common, backtracks);
6376 read_char_range(common, common->bsr_nlmin, common->bsr_nlmax, FALSE);
6377 jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
6378 /* We don't need to handle soft partial matching case. */
6379 end_list = NULL;
6380 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
6381 add_jump(compiler, &end_list, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
6382 else
6383 check_str_end(common, &end_list);
6384 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6385 jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
6386 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6387 jump[2] = JUMP(SLJIT_JUMP);
6388 JUMPHERE(jump[0]);
6389 check_newlinechar(common, common->bsr_nltype, backtracks, FALSE);
6390 set_jumps(end_list, LABEL());
6391 JUMPHERE(jump[1]);
6392 JUMPHERE(jump[2]);
6393 return cc;
6394
6395 case OP_NOT_HSPACE:
6396 case OP_HSPACE:
6397 if (check_str_ptr)
6398 detect_partial_match(common, backtracks);
6399 read_char_range(common, 0x9, 0x3000, type == OP_NOT_HSPACE);
6400 add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));
6401 sljit_set_current_flags(compiler, SLJIT_SET_Z);
6402 add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO));
6403 return cc;
6404
6405 case OP_NOT_VSPACE:
6406 case OP_VSPACE:
6407 if (check_str_ptr)
6408 detect_partial_match(common, backtracks);
6409 read_char_range(common, 0xa, 0x2029, type == OP_NOT_VSPACE);
6410 add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));
6411 sljit_set_current_flags(compiler, SLJIT_SET_Z);
6412 add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO));
6413 return cc;
6414
6415 #ifdef SUPPORT_UCP
6416 case OP_EXTUNI:
6417 if (check_str_ptr)
6418 detect_partial_match(common, backtracks);
6419 read_char(common);
6420 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
6421 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
6422 /* Optimize register allocation: use a real register. */
6423 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STACK_TOP, 0);
6424 OP1(SLJIT_MOV_U8, STACK_TOP, 0, SLJIT_MEM2(TMP1, TMP2), 3);
6425
6426 label = LABEL();
6427 jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6428 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
6429 read_char(common);
6430 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
6431 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
6432 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM2(TMP1, TMP2), 3);
6433
6434 OP2(SLJIT_SHL, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2);
6435 OP1(SLJIT_MOV_U32, TMP1, 0, SLJIT_MEM1(STACK_TOP), (sljit_sw)PRIV(ucp_gbtable));
6436 OP1(SLJIT_MOV, STACK_TOP, 0, TMP2, 0);
6437 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
6438 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
6439 JUMPTO(SLJIT_NOT_ZERO, label);
6440
6441 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
6442 JUMPHERE(jump[0]);
6443 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6444
6445 if (common->mode == JIT_PARTIAL_HARD_COMPILE)
6446 {
6447 jump[0] = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
6448 /* Since we successfully read a char above, partial matching must occure. */
6449 check_partial(common, TRUE);
6450 JUMPHERE(jump[0]);
6451 }
6452 return cc;
6453 #endif
6454
6455 case OP_CHAR:
6456 case OP_CHARI:
6457 length = 1;
6458 #ifdef SUPPORT_UTF
6459 if (common->utf && HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc);
6460 #endif
6461 if (common->mode == JIT_COMPILE && check_str_ptr
6462 && (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0))
6463 {
6464 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
6465 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
6466
6467 context.length = IN_UCHARS(length);
6468 context.sourcereg = -1;
6469 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
6470 context.ucharptr = 0;
6471 #endif
6472 return byte_sequence_compare(common, type == OP_CHARI, cc, &context, backtracks);
6473 }
6474
6475 if (check_str_ptr)
6476 detect_partial_match(common, backtracks);
6477 #ifdef SUPPORT_UTF
6478 if (common->utf)
6479 {
6480 GETCHAR(c, cc);
6481 }
6482 else
6483 #endif
6484 c = *cc;
6485
6486 if (type == OP_CHAR || !char_has_othercase(common, cc))
6487 {
6488 read_char_range(common, c, c, FALSE);
6489 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c));
6490 return cc + length;
6491 }
6492 oc = char_othercase(common, c);
6493 read_char_range(common, c < oc ? c : oc, c > oc ? c : oc, FALSE);
6494 bit = c ^ oc;
6495 if (is_powerof2(bit))
6496 {
6497 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
6498 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
6499 return cc + length;
6500 }
6501 jump[0] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c);
6502 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, oc));
6503 JUMPHERE(jump[0]);
6504 return cc + length;
6505
6506 case OP_NOT:
6507 case OP_NOTI:
6508 if (check_str_ptr)
6509 detect_partial_match(common, backtracks);
6510 length = 1;
6511 #ifdef SUPPORT_UTF
6512 if (common->utf)
6513 {
6514 #ifdef COMPILE_PCRE8
6515 c = *cc;
6516 if (c < 128)
6517 {
6518 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6519 if (type == OP_NOT || !char_has_othercase(common, cc))
6520 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
6521 else
6522 {
6523 /* Since UTF8 code page is fixed, we know that c is in [a-z] or [A-Z] range. */
6524 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x20);
6525 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20));
6526 }
6527 /* Skip the variable-length character. */
6528 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6529 jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
6530 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
6531 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
6532 JUMPHERE(jump[0]);
6533 return cc + 1;
6534 }
6535 else
6536 #endif /* COMPILE_PCRE8 */
6537 {
6538 GETCHARLEN(c, cc, length);
6539 }
6540 }
6541 else
6542 #endif /* SUPPORT_UTF */
6543 c = *cc;
6544
6545 if (type == OP_NOT || !char_has_othercase(common, cc))
6546 {
6547 read_char_range(common, c, c, TRUE);
6548 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
6549 }
6550 else
6551 {
6552 oc = char_othercase(common, c);
6553 read_char_range(common, c < oc ? c : oc, c > oc ? c : oc, TRUE);
6554 bit = c ^ oc;
6555 if (is_powerof2(bit))
6556 {
6557 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
6558 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
6559 }
6560 else
6561 {
6562 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
6563 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, oc));
6564 }
6565 }
6566 return cc + length;
6567
6568 case OP_CLASS:
6569 case OP_NCLASS:
6570 if (check_str_ptr)
6571 detect_partial_match(common, backtracks);
6572
6573 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
6574 bit = (common->utf && is_char7_bitset((const sljit_u8 *)cc, type == OP_NCLASS)) ? 127 : 255;
6575 read_char_range(common, 0, bit, type == OP_NCLASS);
6576 #else
6577 read_char_range(common, 0, 255, type == OP_NCLASS);
6578 #endif
6579
6580 if (check_class_ranges(common, (const sljit_u8 *)cc, type == OP_NCLASS, FALSE, backtracks))
6581 return cc + 32 / sizeof(pcre_uchar);
6582
6583 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
6584 jump[0] = NULL;
6585 if (common->utf)
6586 {
6587 jump[0] = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, bit);
6588 if (type == OP_CLASS)
6589 {
6590 add_jump(compiler, backtracks, jump[0]);
6591 jump[0] = NULL;
6592 }
6593 }
6594 #elif !defined COMPILE_PCRE8
6595 jump[0] = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
6596 if (type == OP_CLASS)
6597 {
6598 add_jump(compiler, backtracks, jump[0]);
6599 jump[0] = NULL;
6600 }
6601 #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
6602
6603 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
6604 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
6605 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
6606 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
6607 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
6608 add_jump(compiler, backtracks, JUMP(SLJIT_ZERO));
6609
6610 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
6611 if (jump[0] != NULL)
6612 JUMPHERE(jump[0]);
6613 #endif
6614 return cc + 32 / sizeof(pcre_uchar);
6615
6616 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
6617 case OP_XCLASS:
6618 if (check_str_ptr)
6619 detect_partial_match(common, backtracks);
6620 compile_xclass_matchingpath(common, cc + LINK_SIZE, backtracks);
6621 return cc + GET(cc, 0) - 1;
6622 #endif
6623 }
6624 SLJIT_UNREACHABLE();
6625 return cc;
6626 }
6627
6628 static SLJIT_INLINE pcre_uchar *compile_charn_matchingpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, jump_list **backtracks)
6629 {
6630 /* This function consumes at least one input character. */
6631 /* To decrease the number of length checks, we try to concatenate the fixed length character sequences. */
6632 DEFINE_COMPILER;
6633 pcre_uchar *ccbegin = cc;
6634 compare_context context;
6635 int size;
6636
6637 context.length = 0;
6638 do
6639 {
6640 if (cc >= ccend)
6641 break;
6642
6643 if (*cc == OP_CHAR)
6644 {
6645 size = 1;
6646 #ifdef SUPPORT_UTF
6647 if (common->utf && HAS_EXTRALEN(cc[1]))
6648 size += GET_EXTRALEN(cc[1]);
6649 #endif
6650 }
6651 else if (*cc == OP_CHARI)
6652 {
6653 size = 1;
6654 #ifdef SUPPORT_UTF
6655 if (common->utf)
6656 {
6657 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
6658 size = 0;
6659 else if (HAS_EXTRALEN(cc[1]))
6660 size += GET_EXTRALEN(cc[1]);
6661 }
6662 else
6663 #endif
6664 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
6665 size = 0;
6666 }
6667 else
6668 size = 0;
6669
6670 cc += 1 + size;
6671 context.length += IN_UCHARS(size);
6672 }
6673 while (size > 0 && context.length <= 128);
6674
6675 cc = ccbegin;
6676 if (context.length > 0)
6677 {
6678 /* We have a fixed-length byte sequence. */
6679 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, context.length);
6680 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
6681
6682 context.sourcereg = -1;
6683 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
6684 context.ucharptr = 0;
6685 #endif
6686 do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, backtracks); while (context.length > 0);
6687 return cc;
6688 }
6689
6690 /* A non-fixed length character will be checked if length == 0. */
6691 return compile_char1_matchingpath(common, *cc, cc + 1, backtracks, TRUE);
6692 }
6693
6694 /* Forward definitions. */
6695 static void compile_matchingpath(compiler_common *, pcre_uchar *, pcre_uchar *, backtrack_common *);
6696 static void compile_backtrackingpath(compiler_common *, struct backtrack_common *);
6697
6698 #define PUSH_BACKTRACK(size, ccstart, error) \
6699 do \
6700 { \
6701 backtrack = sljit_alloc_memory(compiler, (size)); \
6702 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
6703 return error; \
6704 memset(backtrack, 0, size); \
6705 backtrack->prev = parent->top; \
6706 backtrack->cc = (ccstart); \
6707 parent->top = backtrack; \
6708 } \
6709 while (0)
6710
6711 #define PUSH_BACKTRACK_NOVALUE(size, ccstart) \
6712 do \
6713 { \
6714 backtrack = sljit_alloc_memory(compiler, (size)); \
6715 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
6716 return; \
6717 memset(backtrack, 0, size); \
6718 backtrack->prev = parent->top; \
6719 backtrack->cc = (ccstart); \
6720 parent->top = backtrack; \
6721 } \
6722 while (0)
6723
6724 #define BACKTRACK_AS(type) ((type *)backtrack)
6725
6726 static void compile_dnref_search(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
6727 {
6728 /* The OVECTOR offset goes to TMP2. */
6729 DEFINE_COMPILER;
6730 int count = GET2(cc, 1 + IMM2_SIZE);
6731 pcre_uchar *slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
6732 unsigned int offset;
6733 jump_list *found = NULL;
6734
6735 SLJIT_ASSERT(*cc == OP_DNREF || *cc == OP_DNREFI);
6736
6737 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
6738
6739 count--;
6740 while (count-- > 0)
6741 {
6742 offset = GET2(slot, 0) << 1;
6743 GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
6744 add_jump(compiler, &found, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0));
6745 slot += common->name_entry_size;
6746 }
6747
6748 offset = GET2(slot, 0) << 1;
6749 GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
6750 if (backtracks != NULL && !common->jscript_compat)
6751 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0));
6752
6753 set_jumps(found, LABEL());
6754 }
6755
6756 static void compile_ref_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks, BOOL withchecks, BOOL emptyfail)
6757 {
6758 DEFINE_COMPILER;
6759 BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
6760 int offset = 0;
6761 struct sljit_jump *jump = NULL;
6762 struct sljit_jump *partial;
6763 struct sljit_jump *nopartial;
6764
6765 if (ref)
6766 {
6767 offset = GET2(cc, 1) << 1;
6768 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
6769 /* OVECTOR(1) contains the "string begin - 1" constant. */
6770 if (withchecks && !common->jscript_compat)
6771 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
6772 }
6773 else
6774 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
6775
6776 #if defined SUPPORT_UTF && defined SUPPORT_UCP
6777 if (common->utf && *cc == OP_REFI)
6778 {
6779 SLJIT_ASSERT(TMP1 == SLJIT_R0 && STACK_TOP == SLJIT_R1 && TMP2 == SLJIT_R2);
6780 if (ref)
6781 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
6782 else
6783 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
6784
6785 if (withchecks)
6786 jump = CMP(SLJIT_EQUAL, TMP1, 0, TMP2, 0);
6787
6788 /* Needed to save important temporary registers. */
6789 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STACK_TOP, 0);
6790 OP1(SLJIT_MOV, SLJIT_R1, 0, ARGUMENTS, 0);
6791 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, uchar_ptr), STR_PTR, 0);
6792 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_utf_caselesscmp));
6793 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6794 if (common->mode == JIT_COMPILE)
6795 add_jump(compiler, backtracks, CMP(SLJIT_LESS_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1));
6796 else
6797 {
6798 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
6799 nopartial = CMP(SLJIT_NOT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
6800 check_partial(common, FALSE);
6801 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
6802 JUMPHERE(nopartial);
6803 }
6804 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
6805 }
6806 else
6807 #endif /* SUPPORT_UTF && SUPPORT_UCP */
6808 {
6809 if (ref)
6810 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP1, 0);
6811 else
6812 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
6813
6814 if (withchecks)
6815 jump = JUMP(SLJIT_ZERO);
6816
6817 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
6818 partial = CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0);
6819 if (common->mode == JIT_COMPILE)
6820 add_jump(compiler, backtracks, partial);
6821
6822 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
6823 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
6824
6825 if (common->mode != JIT_COMPILE)