/[pcre]/code/trunk/pcre_jit_compile.c
ViewVC logotype

Contents of /code/trunk/pcre_jit_compile.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1630 - (show annotations)
Wed Feb 10 10:53:45 2016 UTC (3 years, 8 months ago) by zherczeg
File MIME type: text/plain
File size: 352723 byte(s)
Migrating single character optimizations from PCRE2-JIT.
1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Written by Philip Hazel
9 Copyright (c) 1997-2013 University of Cambridge
10
11 The machine code generator part (this module) was written by Zoltan Herczeg
12 Copyright (c) 2010-2013
13
14 -----------------------------------------------------------------------------
15 Redistribution and use in source and binary forms, with or without
16 modification, are permitted provided that the following conditions are met:
17
18 * Redistributions of source code must retain the above copyright notice,
19 this list of conditions and the following disclaimer.
20
21 * Redistributions in binary form must reproduce the above copyright
22 notice, this list of conditions and the following disclaimer in the
23 documentation and/or other materials provided with the distribution.
24
25 * Neither the name of the University of Cambridge nor the names of its
26 contributors may be used to endorse or promote products derived from
27 this software without specific prior written permission.
28
29 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 POSSIBILITY OF SUCH DAMAGE.
40 -----------------------------------------------------------------------------
41 */
42
43 #ifdef HAVE_CONFIG_H
44 #include "config.h"
45 #endif
46
47 #include "pcre_internal.h"
48
49 #if defined SUPPORT_JIT
50
51 /* All-in-one: Since we use the JIT compiler only from here,
52 we just include it. This way we don't need to touch the build
53 system files. */
54
55 #define SLJIT_MALLOC(size, allocator_data) (PUBL(malloc))(size)
56 #define SLJIT_FREE(ptr, allocator_data) (PUBL(free))(ptr)
57 #define SLJIT_CONFIG_AUTO 1
58 #define SLJIT_CONFIG_STATIC 1
59 #define SLJIT_VERBOSE 0
60 #define SLJIT_DEBUG 0
61
62 #include "sljit/sljitLir.c"
63
64 #if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED
65 #error Unsupported architecture
66 #endif
67
68 /* Defines for debugging purposes. */
69
70 /* 1 - Use unoptimized capturing brackets.
71 2 - Enable capture_last_ptr (includes option 1). */
72 /* #define DEBUG_FORCE_UNOPTIMIZED_CBRAS 2 */
73
74 /* 1 - Always have a control head. */
75 /* #define DEBUG_FORCE_CONTROL_HEAD 1 */
76
77 /* Allocate memory for the regex stack on the real machine stack.
78 Fast, but limited size. */
79 #define MACHINE_STACK_SIZE 32768
80
81 /* Growth rate for stack allocated by the OS. Should be the multiply
82 of page size. */
83 #define STACK_GROWTH_RATE 8192
84
85 /* Enable to check that the allocation could destroy temporaries. */
86 #if defined SLJIT_DEBUG && SLJIT_DEBUG
87 #define DESTROY_REGISTERS 1
88 #endif
89
90 /*
91 Short summary about the backtracking mechanism empolyed by the jit code generator:
92
93 The code generator follows the recursive nature of the PERL compatible regular
94 expressions. The basic blocks of regular expressions are condition checkers
95 whose execute different commands depending on the result of the condition check.
96 The relationship between the operators can be horizontal (concatenation) and
97 vertical (sub-expression) (See struct backtrack_common for more details).
98
99 'ab' - 'a' and 'b' regexps are concatenated
100 'a+' - 'a' is the sub-expression of the '+' operator
101
102 The condition checkers are boolean (true/false) checkers. Machine code is generated
103 for the checker itself and for the actions depending on the result of the checker.
104 The 'true' case is called as the matching path (expected path), and the other is called as
105 the 'backtrack' path. Branch instructions are expesive for all CPUs, so we avoid taken
106 branches on the matching path.
107
108 Greedy star operator (*) :
109 Matching path: match happens.
110 Backtrack path: match failed.
111 Non-greedy star operator (*?) :
112 Matching path: no need to perform a match.
113 Backtrack path: match is required.
114
115 The following example shows how the code generated for a capturing bracket
116 with two alternatives. Let A, B, C, D are arbirary regular expressions, and
117 we have the following regular expression:
118
119 A(B|C)D
120
121 The generated code will be the following:
122
123 A matching path
124 '(' matching path (pushing arguments to the stack)
125 B matching path
126 ')' matching path (pushing arguments to the stack)
127 D matching path
128 return with successful match
129
130 D backtrack path
131 ')' backtrack path (If we arrived from "C" jump to the backtrack of "C")
132 B backtrack path
133 C expected path
134 jump to D matching path
135 C backtrack path
136 A backtrack path
137
138 Notice, that the order of backtrack code paths are the opposite of the fast
139 code paths. In this way the topmost value on the stack is always belong
140 to the current backtrack code path. The backtrack path must check
141 whether there is a next alternative. If so, it needs to jump back to
142 the matching path eventually. Otherwise it needs to clear out its own stack
143 frame and continue the execution on the backtrack code paths.
144 */
145
146 /*
147 Saved stack frames:
148
149 Atomic blocks and asserts require reloading the values of private data
150 when the backtrack mechanism performed. Because of OP_RECURSE, the data
151 are not necessarly known in compile time, thus we need a dynamic restore
152 mechanism.
153
154 The stack frames are stored in a chain list, and have the following format:
155 ([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]
156
157 Thus we can restore the private data to a particular point in the stack.
158 */
159
160 typedef struct jit_arguments {
161 /* Pointers first. */
162 struct sljit_stack *stack;
163 const pcre_uchar *str;
164 const pcre_uchar *begin;
165 const pcre_uchar *end;
166 int *offsets;
167 pcre_uchar *uchar_ptr;
168 pcre_uchar *mark_ptr;
169 void *callout_data;
170 /* Everything else after. */
171 pcre_uint32 limit_match;
172 int real_offset_count;
173 int offset_count;
174 pcre_uint8 notbol;
175 pcre_uint8 noteol;
176 pcre_uint8 notempty;
177 pcre_uint8 notempty_atstart;
178 } jit_arguments;
179
180 typedef struct executable_functions {
181 void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES];
182 void *read_only_data_heads[JIT_NUMBER_OF_COMPILE_MODES];
183 sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];
184 PUBL(jit_callback) callback;
185 void *userdata;
186 pcre_uint32 top_bracket;
187 pcre_uint32 limit_match;
188 } executable_functions;
189
190 typedef struct jump_list {
191 struct sljit_jump *jump;
192 struct jump_list *next;
193 } jump_list;
194
195 typedef struct stub_list {
196 struct sljit_jump *start;
197 struct sljit_label *quit;
198 struct stub_list *next;
199 } stub_list;
200
201 typedef struct label_addr_list {
202 struct sljit_label *label;
203 sljit_uw *update_addr;
204 struct label_addr_list *next;
205 } label_addr_list;
206
207 enum frame_types {
208 no_frame = -1,
209 no_stack = -2
210 };
211
212 enum control_types {
213 type_mark = 0,
214 type_then_trap = 1
215 };
216
217 typedef int (SLJIT_CALL *jit_function)(jit_arguments *args);
218
219 /* The following structure is the key data type for the recursive
220 code generator. It is allocated by compile_matchingpath, and contains
221 the arguments for compile_backtrackingpath. Must be the first member
222 of its descendants. */
223 typedef struct backtrack_common {
224 /* Concatenation stack. */
225 struct backtrack_common *prev;
226 jump_list *nextbacktracks;
227 /* Internal stack (for component operators). */
228 struct backtrack_common *top;
229 jump_list *topbacktracks;
230 /* Opcode pointer. */
231 pcre_uchar *cc;
232 } backtrack_common;
233
234 typedef struct assert_backtrack {
235 backtrack_common common;
236 jump_list *condfailed;
237 /* Less than 0 if a frame is not needed. */
238 int framesize;
239 /* Points to our private memory word on the stack. */
240 int private_data_ptr;
241 /* For iterators. */
242 struct sljit_label *matchingpath;
243 } assert_backtrack;
244
245 typedef struct bracket_backtrack {
246 backtrack_common common;
247 /* Where to coninue if an alternative is successfully matched. */
248 struct sljit_label *alternative_matchingpath;
249 /* For rmin and rmax iterators. */
250 struct sljit_label *recursive_matchingpath;
251 /* For greedy ? operator. */
252 struct sljit_label *zero_matchingpath;
253 /* Contains the branches of a failed condition. */
254 union {
255 /* Both for OP_COND, OP_SCOND. */
256 jump_list *condfailed;
257 assert_backtrack *assert;
258 /* For OP_ONCE. Less than 0 if not needed. */
259 int framesize;
260 } u;
261 /* Points to our private memory word on the stack. */
262 int private_data_ptr;
263 } bracket_backtrack;
264
265 typedef struct bracketpos_backtrack {
266 backtrack_common common;
267 /* Points to our private memory word on the stack. */
268 int private_data_ptr;
269 /* Reverting stack is needed. */
270 int framesize;
271 /* Allocated stack size. */
272 int stacksize;
273 } bracketpos_backtrack;
274
275 typedef struct braminzero_backtrack {
276 backtrack_common common;
277 struct sljit_label *matchingpath;
278 } braminzero_backtrack;
279
280 typedef struct char_iterator_backtrack {
281 backtrack_common common;
282 /* Next iteration. */
283 struct sljit_label *matchingpath;
284 union {
285 jump_list *backtracks;
286 struct {
287 unsigned int othercasebit;
288 pcre_uchar chr;
289 BOOL enabled;
290 } charpos;
291 } u;
292 } char_iterator_backtrack;
293
294 typedef struct ref_iterator_backtrack {
295 backtrack_common common;
296 /* Next iteration. */
297 struct sljit_label *matchingpath;
298 } ref_iterator_backtrack;
299
300 typedef struct recurse_entry {
301 struct recurse_entry *next;
302 /* Contains the function entry. */
303 struct sljit_label *entry;
304 /* Collects the calls until the function is not created. */
305 jump_list *calls;
306 /* Points to the starting opcode. */
307 sljit_sw start;
308 } recurse_entry;
309
310 typedef struct recurse_backtrack {
311 backtrack_common common;
312 BOOL inlined_pattern;
313 } recurse_backtrack;
314
315 #define OP_THEN_TRAP OP_TABLE_LENGTH
316
317 typedef struct then_trap_backtrack {
318 backtrack_common common;
319 /* If then_trap is not NULL, this structure contains the real
320 then_trap for the backtracking path. */
321 struct then_trap_backtrack *then_trap;
322 /* Points to the starting opcode. */
323 sljit_sw start;
324 /* Exit point for the then opcodes of this alternative. */
325 jump_list *quit;
326 /* Frame size of the current alternative. */
327 int framesize;
328 } then_trap_backtrack;
329
330 #define MAX_RANGE_SIZE 4
331
332 typedef struct compiler_common {
333 /* The sljit ceneric compiler. */
334 struct sljit_compiler *compiler;
335 /* First byte code. */
336 pcre_uchar *start;
337 /* Maps private data offset to each opcode. */
338 sljit_si *private_data_ptrs;
339 /* Chain list of read-only data ptrs. */
340 void *read_only_data_head;
341 /* Tells whether the capturing bracket is optimized. */
342 sljit_ub *optimized_cbracket;
343 /* Tells whether the starting offset is a target of then. */
344 sljit_ub *then_offsets;
345 /* Current position where a THEN must jump. */
346 then_trap_backtrack *then_trap;
347 /* Starting offset of private data for capturing brackets. */
348 sljit_si cbra_ptr;
349 /* Output vector starting point. Must be divisible by 2. */
350 sljit_si ovector_start;
351 /* Points to the starting character of the current match. */
352 sljit_si start_ptr;
353 /* Last known position of the requested byte. */
354 sljit_si req_char_ptr;
355 /* Head of the last recursion. */
356 sljit_si recursive_head_ptr;
357 /* First inspected character for partial matching.
358 (Needed for avoiding zero length partial matches.) */
359 sljit_si start_used_ptr;
360 /* Starting pointer for partial soft matches. */
361 sljit_si hit_start;
362 /* End pointer of the first line. */
363 sljit_si first_line_end;
364 /* Points to the marked string. */
365 sljit_si mark_ptr;
366 /* Recursive control verb management chain. */
367 sljit_si control_head_ptr;
368 /* Points to the last matched capture block index. */
369 sljit_si capture_last_ptr;
370 /* Fast forward skipping byte code pointer. */
371 pcre_uchar *fast_forward_bc_ptr;
372 /* Locals used by fast fail optimization. */
373 sljit_si fast_fail_start_ptr;
374 sljit_si fast_fail_end_ptr;
375
376 /* Flipped and lower case tables. */
377 const sljit_ub *fcc;
378 sljit_sw lcc;
379 /* Mode can be PCRE_STUDY_JIT_COMPILE and others. */
380 int mode;
381 /* TRUE, when minlength is greater than 0. */
382 BOOL might_be_empty;
383 /* \K is found in the pattern. */
384 BOOL has_set_som;
385 /* (*SKIP:arg) is found in the pattern. */
386 BOOL has_skip_arg;
387 /* (*THEN) is found in the pattern. */
388 BOOL has_then;
389 /* Currently in recurse or negative assert. */
390 BOOL local_exit;
391 /* Currently in a positive assert. */
392 BOOL positive_assert;
393 /* Newline control. */
394 int nltype;
395 sljit_ui nlmax;
396 sljit_ui nlmin;
397 int newline;
398 int bsr_nltype;
399 sljit_ui bsr_nlmax;
400 sljit_ui bsr_nlmin;
401 /* Dollar endonly. */
402 int endonly;
403 /* Tables. */
404 sljit_sw ctypes;
405 /* Named capturing brackets. */
406 pcre_uchar *name_table;
407 sljit_sw name_count;
408 sljit_sw name_entry_size;
409
410 /* Labels and jump lists. */
411 struct sljit_label *partialmatchlabel;
412 struct sljit_label *quit_label;
413 struct sljit_label *forced_quit_label;
414 struct sljit_label *accept_label;
415 struct sljit_label *ff_newline_shortcut;
416 stub_list *stubs;
417 label_addr_list *label_addrs;
418 recurse_entry *entries;
419 recurse_entry *currententry;
420 jump_list *partialmatch;
421 jump_list *quit;
422 jump_list *positive_assert_quit;
423 jump_list *forced_quit;
424 jump_list *accept;
425 jump_list *calllimit;
426 jump_list *stackalloc;
427 jump_list *revertframes;
428 jump_list *wordboundary;
429 jump_list *anynewline;
430 jump_list *hspace;
431 jump_list *vspace;
432 jump_list *casefulcmp;
433 jump_list *caselesscmp;
434 jump_list *reset_match;
435 BOOL jscript_compat;
436 #ifdef SUPPORT_UTF
437 BOOL utf;
438 #ifdef SUPPORT_UCP
439 BOOL use_ucp;
440 jump_list *getucd;
441 #endif
442 #ifdef COMPILE_PCRE8
443 jump_list *utfreadchar;
444 jump_list *utfreadchar16;
445 jump_list *utfreadtype8;
446 #endif
447 #endif /* SUPPORT_UTF */
448 } compiler_common;
449
450 /* For byte_sequence_compare. */
451
452 typedef struct compare_context {
453 int length;
454 int sourcereg;
455 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
456 int ucharptr;
457 union {
458 sljit_si asint;
459 sljit_uh asushort;
460 #if defined COMPILE_PCRE8
461 sljit_ub asbyte;
462 sljit_ub asuchars[4];
463 #elif defined COMPILE_PCRE16
464 sljit_uh asuchars[2];
465 #elif defined COMPILE_PCRE32
466 sljit_ui asuchars[1];
467 #endif
468 } c;
469 union {
470 sljit_si asint;
471 sljit_uh asushort;
472 #if defined COMPILE_PCRE8
473 sljit_ub asbyte;
474 sljit_ub asuchars[4];
475 #elif defined COMPILE_PCRE16
476 sljit_uh asuchars[2];
477 #elif defined COMPILE_PCRE32
478 sljit_ui asuchars[1];
479 #endif
480 } oc;
481 #endif
482 } compare_context;
483
484 /* Undefine sljit macros. */
485 #undef CMP
486
487 /* Used for accessing the elements of the stack. */
488 #define STACK(i) ((-(i) - 1) * (int)sizeof(sljit_sw))
489
490 #define TMP1 SLJIT_R0
491 #define TMP2 SLJIT_R2
492 #define TMP3 SLJIT_R3
493 #define STR_PTR SLJIT_S0
494 #define STR_END SLJIT_S1
495 #define STACK_TOP SLJIT_R1
496 #define STACK_LIMIT SLJIT_S2
497 #define COUNT_MATCH SLJIT_S3
498 #define ARGUMENTS SLJIT_S4
499 #define RETURN_ADDR SLJIT_R4
500
501 /* Local space layout. */
502 /* These two locals can be used by the current opcode. */
503 #define LOCALS0 (0 * sizeof(sljit_sw))
504 #define LOCALS1 (1 * sizeof(sljit_sw))
505 /* Two local variables for possessive quantifiers (char1 cannot use them). */
506 #define POSSESSIVE0 (2 * sizeof(sljit_sw))
507 #define POSSESSIVE1 (3 * sizeof(sljit_sw))
508 /* Max limit of recursions. */
509 #define LIMIT_MATCH (4 * sizeof(sljit_sw))
510 /* The output vector is stored on the stack, and contains pointers
511 to characters. The vector data is divided into two groups: the first
512 group contains the start / end character pointers, and the second is
513 the start pointers when the end of the capturing group has not yet reached. */
514 #define OVECTOR_START (common->ovector_start)
515 #define OVECTOR(i) (OVECTOR_START + (i) * (sljit_sw)sizeof(sljit_sw))
516 #define OVECTOR_PRIV(i) (common->cbra_ptr + (i) * (sljit_sw)sizeof(sljit_sw))
517 #define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start])
518
519 #if defined COMPILE_PCRE8
520 #define MOV_UCHAR SLJIT_MOV_UB
521 #define MOVU_UCHAR SLJIT_MOVU_UB
522 #elif defined COMPILE_PCRE16
523 #define MOV_UCHAR SLJIT_MOV_UH
524 #define MOVU_UCHAR SLJIT_MOVU_UH
525 #elif defined COMPILE_PCRE32
526 #define MOV_UCHAR SLJIT_MOV_UI
527 #define MOVU_UCHAR SLJIT_MOVU_UI
528 #else
529 #error Unsupported compiling mode
530 #endif
531
532 /* Shortcuts. */
533 #define DEFINE_COMPILER \
534 struct sljit_compiler *compiler = common->compiler
535 #define OP1(op, dst, dstw, src, srcw) \
536 sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw))
537 #define OP2(op, dst, dstw, src1, src1w, src2, src2w) \
538 sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w))
539 #define LABEL() \
540 sljit_emit_label(compiler)
541 #define JUMP(type) \
542 sljit_emit_jump(compiler, (type))
543 #define JUMPTO(type, label) \
544 sljit_set_label(sljit_emit_jump(compiler, (type)), (label))
545 #define JUMPHERE(jump) \
546 sljit_set_label((jump), sljit_emit_label(compiler))
547 #define SET_LABEL(jump, label) \
548 sljit_set_label((jump), (label))
549 #define CMP(type, src1, src1w, src2, src2w) \
550 sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))
551 #define CMPTO(type, src1, src1w, src2, src2w, label) \
552 sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))
553 #define OP_FLAGS(op, dst, dstw, src, srcw, type) \
554 sljit_emit_op_flags(compiler, (op), (dst), (dstw), (src), (srcw), (type))
555 #define GET_LOCAL_BASE(dst, dstw, offset) \
556 sljit_get_local_base(compiler, (dst), (dstw), (offset))
557
558 #define READ_CHAR_MAX 0x7fffffff
559
560 static pcre_uchar *bracketend(pcre_uchar *cc)
561 {
562 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
563 do cc += GET(cc, 1); while (*cc == OP_ALT);
564 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
565 cc += 1 + LINK_SIZE;
566 return cc;
567 }
568
569 static int no_alternatives(pcre_uchar *cc)
570 {
571 int count = 0;
572 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
573 do
574 {
575 cc += GET(cc, 1);
576 count++;
577 }
578 while (*cc == OP_ALT);
579 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
580 return count;
581 }
582
583 static int ones_in_half_byte[16] = {
584 /* 0 */ 0, 1, 1, 2, /* 4 */ 1, 2, 2, 3,
585 /* 8 */ 1, 2, 2, 3, /* 12 */ 2, 3, 3, 4
586 };
587
588 /* Functions whose might need modification for all new supported opcodes:
589 next_opcode
590 check_opcode_types
591 set_private_data_ptrs
592 get_framesize
593 init_frame
594 get_private_data_copy_length
595 copy_private_data
596 compile_matchingpath
597 compile_backtrackingpath
598 */
599
600 static pcre_uchar *next_opcode(compiler_common *common, pcre_uchar *cc)
601 {
602 SLJIT_UNUSED_ARG(common);
603 switch(*cc)
604 {
605 case OP_SOD:
606 case OP_SOM:
607 case OP_SET_SOM:
608 case OP_NOT_WORD_BOUNDARY:
609 case OP_WORD_BOUNDARY:
610 case OP_NOT_DIGIT:
611 case OP_DIGIT:
612 case OP_NOT_WHITESPACE:
613 case OP_WHITESPACE:
614 case OP_NOT_WORDCHAR:
615 case OP_WORDCHAR:
616 case OP_ANY:
617 case OP_ALLANY:
618 case OP_NOTPROP:
619 case OP_PROP:
620 case OP_ANYNL:
621 case OP_NOT_HSPACE:
622 case OP_HSPACE:
623 case OP_NOT_VSPACE:
624 case OP_VSPACE:
625 case OP_EXTUNI:
626 case OP_EODN:
627 case OP_EOD:
628 case OP_CIRC:
629 case OP_CIRCM:
630 case OP_DOLL:
631 case OP_DOLLM:
632 case OP_CRSTAR:
633 case OP_CRMINSTAR:
634 case OP_CRPLUS:
635 case OP_CRMINPLUS:
636 case OP_CRQUERY:
637 case OP_CRMINQUERY:
638 case OP_CRRANGE:
639 case OP_CRMINRANGE:
640 case OP_CRPOSSTAR:
641 case OP_CRPOSPLUS:
642 case OP_CRPOSQUERY:
643 case OP_CRPOSRANGE:
644 case OP_CLASS:
645 case OP_NCLASS:
646 case OP_REF:
647 case OP_REFI:
648 case OP_DNREF:
649 case OP_DNREFI:
650 case OP_RECURSE:
651 case OP_CALLOUT:
652 case OP_ALT:
653 case OP_KET:
654 case OP_KETRMAX:
655 case OP_KETRMIN:
656 case OP_KETRPOS:
657 case OP_REVERSE:
658 case OP_ASSERT:
659 case OP_ASSERT_NOT:
660 case OP_ASSERTBACK:
661 case OP_ASSERTBACK_NOT:
662 case OP_ONCE:
663 case OP_ONCE_NC:
664 case OP_BRA:
665 case OP_BRAPOS:
666 case OP_CBRA:
667 case OP_CBRAPOS:
668 case OP_COND:
669 case OP_SBRA:
670 case OP_SBRAPOS:
671 case OP_SCBRA:
672 case OP_SCBRAPOS:
673 case OP_SCOND:
674 case OP_CREF:
675 case OP_DNCREF:
676 case OP_RREF:
677 case OP_DNRREF:
678 case OP_DEF:
679 case OP_BRAZERO:
680 case OP_BRAMINZERO:
681 case OP_BRAPOSZERO:
682 case OP_PRUNE:
683 case OP_SKIP:
684 case OP_THEN:
685 case OP_COMMIT:
686 case OP_FAIL:
687 case OP_ACCEPT:
688 case OP_ASSERT_ACCEPT:
689 case OP_CLOSE:
690 case OP_SKIPZERO:
691 return cc + PRIV(OP_lengths)[*cc];
692
693 case OP_CHAR:
694 case OP_CHARI:
695 case OP_NOT:
696 case OP_NOTI:
697 case OP_STAR:
698 case OP_MINSTAR:
699 case OP_PLUS:
700 case OP_MINPLUS:
701 case OP_QUERY:
702 case OP_MINQUERY:
703 case OP_UPTO:
704 case OP_MINUPTO:
705 case OP_EXACT:
706 case OP_POSSTAR:
707 case OP_POSPLUS:
708 case OP_POSQUERY:
709 case OP_POSUPTO:
710 case OP_STARI:
711 case OP_MINSTARI:
712 case OP_PLUSI:
713 case OP_MINPLUSI:
714 case OP_QUERYI:
715 case OP_MINQUERYI:
716 case OP_UPTOI:
717 case OP_MINUPTOI:
718 case OP_EXACTI:
719 case OP_POSSTARI:
720 case OP_POSPLUSI:
721 case OP_POSQUERYI:
722 case OP_POSUPTOI:
723 case OP_NOTSTAR:
724 case OP_NOTMINSTAR:
725 case OP_NOTPLUS:
726 case OP_NOTMINPLUS:
727 case OP_NOTQUERY:
728 case OP_NOTMINQUERY:
729 case OP_NOTUPTO:
730 case OP_NOTMINUPTO:
731 case OP_NOTEXACT:
732 case OP_NOTPOSSTAR:
733 case OP_NOTPOSPLUS:
734 case OP_NOTPOSQUERY:
735 case OP_NOTPOSUPTO:
736 case OP_NOTSTARI:
737 case OP_NOTMINSTARI:
738 case OP_NOTPLUSI:
739 case OP_NOTMINPLUSI:
740 case OP_NOTQUERYI:
741 case OP_NOTMINQUERYI:
742 case OP_NOTUPTOI:
743 case OP_NOTMINUPTOI:
744 case OP_NOTEXACTI:
745 case OP_NOTPOSSTARI:
746 case OP_NOTPOSPLUSI:
747 case OP_NOTPOSQUERYI:
748 case OP_NOTPOSUPTOI:
749 cc += PRIV(OP_lengths)[*cc];
750 #ifdef SUPPORT_UTF
751 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
752 #endif
753 return cc;
754
755 /* Special cases. */
756 case OP_TYPESTAR:
757 case OP_TYPEMINSTAR:
758 case OP_TYPEPLUS:
759 case OP_TYPEMINPLUS:
760 case OP_TYPEQUERY:
761 case OP_TYPEMINQUERY:
762 case OP_TYPEUPTO:
763 case OP_TYPEMINUPTO:
764 case OP_TYPEEXACT:
765 case OP_TYPEPOSSTAR:
766 case OP_TYPEPOSPLUS:
767 case OP_TYPEPOSQUERY:
768 case OP_TYPEPOSUPTO:
769 return cc + PRIV(OP_lengths)[*cc] - 1;
770
771 case OP_ANYBYTE:
772 #ifdef SUPPORT_UTF
773 if (common->utf) return NULL;
774 #endif
775 return cc + 1;
776
777 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
778 case OP_XCLASS:
779 return cc + GET(cc, 1);
780 #endif
781
782 case OP_MARK:
783 case OP_PRUNE_ARG:
784 case OP_SKIP_ARG:
785 case OP_THEN_ARG:
786 return cc + 1 + 2 + cc[1];
787
788 default:
789 /* All opcodes are supported now! */
790 SLJIT_ASSERT_STOP();
791 return NULL;
792 }
793 }
794
795 static BOOL check_opcode_types(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
796 {
797 int count;
798 pcre_uchar *slot;
799
800 /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
801 while (cc < ccend)
802 {
803 switch(*cc)
804 {
805 case OP_SET_SOM:
806 common->has_set_som = TRUE;
807 common->might_be_empty = TRUE;
808 cc += 1;
809 break;
810
811 case OP_REF:
812 case OP_REFI:
813 common->optimized_cbracket[GET2(cc, 1)] = 0;
814 cc += 1 + IMM2_SIZE;
815 break;
816
817 case OP_CBRAPOS:
818 case OP_SCBRAPOS:
819 common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] = 0;
820 cc += 1 + LINK_SIZE + IMM2_SIZE;
821 break;
822
823 case OP_COND:
824 case OP_SCOND:
825 /* Only AUTO_CALLOUT can insert this opcode. We do
826 not intend to support this case. */
827 if (cc[1 + LINK_SIZE] == OP_CALLOUT)
828 return FALSE;
829 cc += 1 + LINK_SIZE;
830 break;
831
832 case OP_CREF:
833 common->optimized_cbracket[GET2(cc, 1)] = 0;
834 cc += 1 + IMM2_SIZE;
835 break;
836
837 case OP_DNREF:
838 case OP_DNREFI:
839 case OP_DNCREF:
840 count = GET2(cc, 1 + IMM2_SIZE);
841 slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
842 while (count-- > 0)
843 {
844 common->optimized_cbracket[GET2(slot, 0)] = 0;
845 slot += common->name_entry_size;
846 }
847 cc += 1 + 2 * IMM2_SIZE;
848 break;
849
850 case OP_RECURSE:
851 /* Set its value only once. */
852 if (common->recursive_head_ptr == 0)
853 {
854 common->recursive_head_ptr = common->ovector_start;
855 common->ovector_start += sizeof(sljit_sw);
856 }
857 cc += 1 + LINK_SIZE;
858 break;
859
860 case OP_CALLOUT:
861 if (common->capture_last_ptr == 0)
862 {
863 common->capture_last_ptr = common->ovector_start;
864 common->ovector_start += sizeof(sljit_sw);
865 }
866 cc += 2 + 2 * LINK_SIZE;
867 break;
868
869 case OP_THEN_ARG:
870 common->has_then = TRUE;
871 common->control_head_ptr = 1;
872 /* Fall through. */
873
874 case OP_PRUNE_ARG:
875 case OP_MARK:
876 if (common->mark_ptr == 0)
877 {
878 common->mark_ptr = common->ovector_start;
879 common->ovector_start += sizeof(sljit_sw);
880 }
881 cc += 1 + 2 + cc[1];
882 break;
883
884 case OP_THEN:
885 common->has_then = TRUE;
886 common->control_head_ptr = 1;
887 /* Fall through. */
888
889 case OP_PRUNE:
890 case OP_SKIP:
891 cc += 1;
892 break;
893
894 case OP_SKIP_ARG:
895 common->control_head_ptr = 1;
896 common->has_skip_arg = TRUE;
897 cc += 1 + 2 + cc[1];
898 break;
899
900 default:
901 cc = next_opcode(common, cc);
902 if (cc == NULL)
903 return FALSE;
904 break;
905 }
906 }
907 return TRUE;
908 }
909
910 static int get_class_iterator_size(pcre_uchar *cc)
911 {
912 switch(*cc)
913 {
914 case OP_CRSTAR:
915 case OP_CRPLUS:
916 return 2;
917
918 case OP_CRMINSTAR:
919 case OP_CRMINPLUS:
920 case OP_CRQUERY:
921 case OP_CRMINQUERY:
922 return 1;
923
924 case OP_CRRANGE:
925 case OP_CRMINRANGE:
926 if (GET2(cc, 1) == GET2(cc, 1 + IMM2_SIZE))
927 return 0;
928 return 2;
929
930 default:
931 return 0;
932 }
933 }
934
935 static BOOL detect_repeat(compiler_common *common, pcre_uchar *begin)
936 {
937 pcre_uchar *end = bracketend(begin);
938 pcre_uchar *next;
939 pcre_uchar *next_end;
940 pcre_uchar *max_end;
941 pcre_uchar type;
942 sljit_sw length = end - begin;
943 int min, max, i;
944
945 /* Detect fixed iterations first. */
946 if (end[-(1 + LINK_SIZE)] != OP_KET)
947 return FALSE;
948
949 /* Already detected repeat. */
950 if (common->private_data_ptrs[end - common->start - LINK_SIZE] != 0)
951 return TRUE;
952
953 next = end;
954 min = 1;
955 while (1)
956 {
957 if (*next != *begin)
958 break;
959 next_end = bracketend(next);
960 if (next_end - next != length || memcmp(begin, next, IN_UCHARS(length)) != 0)
961 break;
962 next = next_end;
963 min++;
964 }
965
966 if (min == 2)
967 return FALSE;
968
969 max = 0;
970 max_end = next;
971 if (*next == OP_BRAZERO || *next == OP_BRAMINZERO)
972 {
973 type = *next;
974 while (1)
975 {
976 if (next[0] != type || next[1] != OP_BRA || next[2 + LINK_SIZE] != *begin)
977 break;
978 next_end = bracketend(next + 2 + LINK_SIZE);
979 if (next_end - next != (length + 2 + LINK_SIZE) || memcmp(begin, next + 2 + LINK_SIZE, IN_UCHARS(length)) != 0)
980 break;
981 next = next_end;
982 max++;
983 }
984
985 if (next[0] == type && next[1] == *begin && max >= 1)
986 {
987 next_end = bracketend(next + 1);
988 if (next_end - next == (length + 1) && memcmp(begin, next + 1, IN_UCHARS(length)) == 0)
989 {
990 for (i = 0; i < max; i++, next_end += 1 + LINK_SIZE)
991 if (*next_end != OP_KET)
992 break;
993
994 if (i == max)
995 {
996 common->private_data_ptrs[max_end - common->start - LINK_SIZE] = next_end - max_end;
997 common->private_data_ptrs[max_end - common->start - LINK_SIZE + 1] = (type == OP_BRAZERO) ? OP_UPTO : OP_MINUPTO;
998 /* +2 the original and the last. */
999 common->private_data_ptrs[max_end - common->start - LINK_SIZE + 2] = max + 2;
1000 if (min == 1)
1001 return TRUE;
1002 min--;
1003 max_end -= (1 + LINK_SIZE) + GET(max_end, -LINK_SIZE);
1004 }
1005 }
1006 }
1007 }
1008
1009 if (min >= 3)
1010 {
1011 common->private_data_ptrs[end - common->start - LINK_SIZE] = max_end - end;
1012 common->private_data_ptrs[end - common->start - LINK_SIZE + 1] = OP_EXACT;
1013 common->private_data_ptrs[end - common->start - LINK_SIZE + 2] = min;
1014 return TRUE;
1015 }
1016
1017 return FALSE;
1018 }
1019
1020 #define CASE_ITERATOR_PRIVATE_DATA_1 \
1021 case OP_MINSTAR: \
1022 case OP_MINPLUS: \
1023 case OP_QUERY: \
1024 case OP_MINQUERY: \
1025 case OP_MINSTARI: \
1026 case OP_MINPLUSI: \
1027 case OP_QUERYI: \
1028 case OP_MINQUERYI: \
1029 case OP_NOTMINSTAR: \
1030 case OP_NOTMINPLUS: \
1031 case OP_NOTQUERY: \
1032 case OP_NOTMINQUERY: \
1033 case OP_NOTMINSTARI: \
1034 case OP_NOTMINPLUSI: \
1035 case OP_NOTQUERYI: \
1036 case OP_NOTMINQUERYI:
1037
1038 #define CASE_ITERATOR_PRIVATE_DATA_2A \
1039 case OP_STAR: \
1040 case OP_PLUS: \
1041 case OP_STARI: \
1042 case OP_PLUSI: \
1043 case OP_NOTSTAR: \
1044 case OP_NOTPLUS: \
1045 case OP_NOTSTARI: \
1046 case OP_NOTPLUSI:
1047
1048 #define CASE_ITERATOR_PRIVATE_DATA_2B \
1049 case OP_UPTO: \
1050 case OP_MINUPTO: \
1051 case OP_UPTOI: \
1052 case OP_MINUPTOI: \
1053 case OP_NOTUPTO: \
1054 case OP_NOTMINUPTO: \
1055 case OP_NOTUPTOI: \
1056 case OP_NOTMINUPTOI:
1057
1058 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_1 \
1059 case OP_TYPEMINSTAR: \
1060 case OP_TYPEMINPLUS: \
1061 case OP_TYPEQUERY: \
1062 case OP_TYPEMINQUERY:
1063
1064 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2A \
1065 case OP_TYPESTAR: \
1066 case OP_TYPEPLUS:
1067
1068 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2B \
1069 case OP_TYPEUPTO: \
1070 case OP_TYPEMINUPTO:
1071
1072 static void set_private_data_ptrs(compiler_common *common, int *private_data_start, pcre_uchar *ccend)
1073 {
1074 pcre_uchar *cc = common->start;
1075 pcre_uchar *alternative;
1076 pcre_uchar *end = NULL;
1077 int private_data_ptr = *private_data_start;
1078 int space, size, bracketlen;
1079 BOOL repeat_check = TRUE;
1080
1081 while (cc < ccend)
1082 {
1083 space = 0;
1084 size = 0;
1085 bracketlen = 0;
1086 if (private_data_ptr > SLJIT_MAX_LOCAL_SIZE)
1087 break;
1088
1089 if (repeat_check && (*cc == OP_ONCE || *cc == OP_ONCE_NC || *cc == OP_BRA || *cc == OP_CBRA || *cc == OP_COND))
1090 {
1091 if (detect_repeat(common, cc))
1092 {
1093 /* These brackets are converted to repeats, so no global
1094 based single character repeat is allowed. */
1095 if (cc >= end)
1096 end = bracketend(cc);
1097 }
1098 }
1099 repeat_check = TRUE;
1100
1101 switch(*cc)
1102 {
1103 case OP_KET:
1104 if (common->private_data_ptrs[cc + 1 - common->start] != 0)
1105 {
1106 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1107 private_data_ptr += sizeof(sljit_sw);
1108 cc += common->private_data_ptrs[cc + 1 - common->start];
1109 }
1110 cc += 1 + LINK_SIZE;
1111 break;
1112
1113 case OP_ASSERT:
1114 case OP_ASSERT_NOT:
1115 case OP_ASSERTBACK:
1116 case OP_ASSERTBACK_NOT:
1117 case OP_ONCE:
1118 case OP_ONCE_NC:
1119 case OP_BRAPOS:
1120 case OP_SBRA:
1121 case OP_SBRAPOS:
1122 case OP_SCOND:
1123 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1124 private_data_ptr += sizeof(sljit_sw);
1125 bracketlen = 1 + LINK_SIZE;
1126 break;
1127
1128 case OP_CBRAPOS:
1129 case OP_SCBRAPOS:
1130 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1131 private_data_ptr += sizeof(sljit_sw);
1132 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1133 break;
1134
1135 case OP_COND:
1136 /* Might be a hidden SCOND. */
1137 alternative = cc + GET(cc, 1);
1138 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1139 {
1140 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1141 private_data_ptr += sizeof(sljit_sw);
1142 }
1143 bracketlen = 1 + LINK_SIZE;
1144 break;
1145
1146 case OP_BRA:
1147 bracketlen = 1 + LINK_SIZE;
1148 break;
1149
1150 case OP_CBRA:
1151 case OP_SCBRA:
1152 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1153 break;
1154
1155 case OP_BRAZERO:
1156 case OP_BRAMINZERO:
1157 case OP_BRAPOSZERO:
1158 repeat_check = FALSE;
1159 size = 1;
1160 break;
1161
1162 CASE_ITERATOR_PRIVATE_DATA_1
1163 space = 1;
1164 size = -2;
1165 break;
1166
1167 CASE_ITERATOR_PRIVATE_DATA_2A
1168 space = 2;
1169 size = -2;
1170 break;
1171
1172 CASE_ITERATOR_PRIVATE_DATA_2B
1173 space = 2;
1174 size = -(2 + IMM2_SIZE);
1175 break;
1176
1177 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1178 space = 1;
1179 size = 1;
1180 break;
1181
1182 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1183 if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
1184 space = 2;
1185 size = 1;
1186 break;
1187
1188 case OP_TYPEUPTO:
1189 if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
1190 space = 2;
1191 size = 1 + IMM2_SIZE;
1192 break;
1193
1194 case OP_TYPEMINUPTO:
1195 space = 2;
1196 size = 1 + IMM2_SIZE;
1197 break;
1198
1199 case OP_CLASS:
1200 case OP_NCLASS:
1201 size += 1 + 32 / sizeof(pcre_uchar);
1202 space = get_class_iterator_size(cc + size);
1203 break;
1204
1205 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1206 case OP_XCLASS:
1207 size = GET(cc, 1);
1208 space = get_class_iterator_size(cc + size);
1209 break;
1210 #endif
1211
1212 default:
1213 cc = next_opcode(common, cc);
1214 SLJIT_ASSERT(cc != NULL);
1215 break;
1216 }
1217
1218 /* Character iterators, which are not inside a repeated bracket,
1219 gets a private slot instead of allocating it on the stack. */
1220 if (space > 0 && cc >= end)
1221 {
1222 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1223 private_data_ptr += sizeof(sljit_sw) * space;
1224 }
1225
1226 if (size != 0)
1227 {
1228 if (size < 0)
1229 {
1230 cc += -size;
1231 #ifdef SUPPORT_UTF
1232 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1233 #endif
1234 }
1235 else
1236 cc += size;
1237 }
1238
1239 if (bracketlen > 0)
1240 {
1241 if (cc >= end)
1242 {
1243 end = bracketend(cc);
1244 if (end[-1 - LINK_SIZE] == OP_KET)
1245 end = NULL;
1246 }
1247 cc += bracketlen;
1248 }
1249 }
1250 *private_data_start = private_data_ptr;
1251 }
1252
1253 /* Returns with a frame_types (always < 0) if no need for frame. */
1254 static int get_framesize(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL recursive, BOOL *needs_control_head)
1255 {
1256 int length = 0;
1257 int possessive = 0;
1258 BOOL stack_restore = FALSE;
1259 BOOL setsom_found = recursive;
1260 BOOL setmark_found = recursive;
1261 /* The last capture is a local variable even for recursions. */
1262 BOOL capture_last_found = FALSE;
1263
1264 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
1265 SLJIT_ASSERT(common->control_head_ptr != 0);
1266 *needs_control_head = TRUE;
1267 #else
1268 *needs_control_head = FALSE;
1269 #endif
1270
1271 if (ccend == NULL)
1272 {
1273 ccend = bracketend(cc) - (1 + LINK_SIZE);
1274 if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))
1275 {
1276 possessive = length = (common->capture_last_ptr != 0) ? 5 : 3;
1277 /* This is correct regardless of common->capture_last_ptr. */
1278 capture_last_found = TRUE;
1279 }
1280 cc = next_opcode(common, cc);
1281 }
1282
1283 SLJIT_ASSERT(cc != NULL);
1284 while (cc < ccend)
1285 switch(*cc)
1286 {
1287 case OP_SET_SOM:
1288 SLJIT_ASSERT(common->has_set_som);
1289 stack_restore = TRUE;
1290 if (!setsom_found)
1291 {
1292 length += 2;
1293 setsom_found = TRUE;
1294 }
1295 cc += 1;
1296 break;
1297
1298 case OP_MARK:
1299 case OP_PRUNE_ARG:
1300 case OP_THEN_ARG:
1301 SLJIT_ASSERT(common->mark_ptr != 0);
1302 stack_restore = TRUE;
1303 if (!setmark_found)
1304 {
1305 length += 2;
1306 setmark_found = TRUE;
1307 }
1308 if (common->control_head_ptr != 0)
1309 *needs_control_head = TRUE;
1310 cc += 1 + 2 + cc[1];
1311 break;
1312
1313 case OP_RECURSE:
1314 stack_restore = TRUE;
1315 if (common->has_set_som && !setsom_found)
1316 {
1317 length += 2;
1318 setsom_found = TRUE;
1319 }
1320 if (common->mark_ptr != 0 && !setmark_found)
1321 {
1322 length += 2;
1323 setmark_found = TRUE;
1324 }
1325 if (common->capture_last_ptr != 0 && !capture_last_found)
1326 {
1327 length += 2;
1328 capture_last_found = TRUE;
1329 }
1330 cc += 1 + LINK_SIZE;
1331 break;
1332
1333 case OP_CBRA:
1334 case OP_CBRAPOS:
1335 case OP_SCBRA:
1336 case OP_SCBRAPOS:
1337 stack_restore = TRUE;
1338 if (common->capture_last_ptr != 0 && !capture_last_found)
1339 {
1340 length += 2;
1341 capture_last_found = TRUE;
1342 }
1343 length += 3;
1344 cc += 1 + LINK_SIZE + IMM2_SIZE;
1345 break;
1346
1347 case OP_THEN:
1348 stack_restore = TRUE;
1349 if (common->control_head_ptr != 0)
1350 *needs_control_head = TRUE;
1351 cc ++;
1352 break;
1353
1354 default:
1355 stack_restore = TRUE;
1356 /* Fall through. */
1357
1358 case OP_NOT_WORD_BOUNDARY:
1359 case OP_WORD_BOUNDARY:
1360 case OP_NOT_DIGIT:
1361 case OP_DIGIT:
1362 case OP_NOT_WHITESPACE:
1363 case OP_WHITESPACE:
1364 case OP_NOT_WORDCHAR:
1365 case OP_WORDCHAR:
1366 case OP_ANY:
1367 case OP_ALLANY:
1368 case OP_ANYBYTE:
1369 case OP_NOTPROP:
1370 case OP_PROP:
1371 case OP_ANYNL:
1372 case OP_NOT_HSPACE:
1373 case OP_HSPACE:
1374 case OP_NOT_VSPACE:
1375 case OP_VSPACE:
1376 case OP_EXTUNI:
1377 case OP_EODN:
1378 case OP_EOD:
1379 case OP_CIRC:
1380 case OP_CIRCM:
1381 case OP_DOLL:
1382 case OP_DOLLM:
1383 case OP_CHAR:
1384 case OP_CHARI:
1385 case OP_NOT:
1386 case OP_NOTI:
1387
1388 case OP_EXACT:
1389 case OP_POSSTAR:
1390 case OP_POSPLUS:
1391 case OP_POSQUERY:
1392 case OP_POSUPTO:
1393
1394 case OP_EXACTI:
1395 case OP_POSSTARI:
1396 case OP_POSPLUSI:
1397 case OP_POSQUERYI:
1398 case OP_POSUPTOI:
1399
1400 case OP_NOTEXACT:
1401 case OP_NOTPOSSTAR:
1402 case OP_NOTPOSPLUS:
1403 case OP_NOTPOSQUERY:
1404 case OP_NOTPOSUPTO:
1405
1406 case OP_NOTEXACTI:
1407 case OP_NOTPOSSTARI:
1408 case OP_NOTPOSPLUSI:
1409 case OP_NOTPOSQUERYI:
1410 case OP_NOTPOSUPTOI:
1411
1412 case OP_TYPEEXACT:
1413 case OP_TYPEPOSSTAR:
1414 case OP_TYPEPOSPLUS:
1415 case OP_TYPEPOSQUERY:
1416 case OP_TYPEPOSUPTO:
1417
1418 case OP_CLASS:
1419 case OP_NCLASS:
1420 case OP_XCLASS:
1421 case OP_CALLOUT:
1422
1423 cc = next_opcode(common, cc);
1424 SLJIT_ASSERT(cc != NULL);
1425 break;
1426 }
1427
1428 /* Possessive quantifiers can use a special case. */
1429 if (SLJIT_UNLIKELY(possessive == length))
1430 return stack_restore ? no_frame : no_stack;
1431
1432 if (length > 0)
1433 return length + 1;
1434 return stack_restore ? no_frame : no_stack;
1435 }
1436
1437 static void init_frame(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, int stackpos, int stacktop, BOOL recursive)
1438 {
1439 DEFINE_COMPILER;
1440 BOOL setsom_found = recursive;
1441 BOOL setmark_found = recursive;
1442 /* The last capture is a local variable even for recursions. */
1443 BOOL capture_last_found = FALSE;
1444 int offset;
1445
1446 /* >= 1 + shortest item size (2) */
1447 SLJIT_UNUSED_ARG(stacktop);
1448 SLJIT_ASSERT(stackpos >= stacktop + 2);
1449
1450 stackpos = STACK(stackpos);
1451 if (ccend == NULL)
1452 {
1453 ccend = bracketend(cc) - (1 + LINK_SIZE);
1454 if (recursive || (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS))
1455 cc = next_opcode(common, cc);
1456 }
1457
1458 SLJIT_ASSERT(cc != NULL);
1459 while (cc < ccend)
1460 switch(*cc)
1461 {
1462 case OP_SET_SOM:
1463 SLJIT_ASSERT(common->has_set_som);
1464 if (!setsom_found)
1465 {
1466 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
1467 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1468 stackpos += (int)sizeof(sljit_sw);
1469 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1470 stackpos += (int)sizeof(sljit_sw);
1471 setsom_found = TRUE;
1472 }
1473 cc += 1;
1474 break;
1475
1476 case OP_MARK:
1477 case OP_PRUNE_ARG:
1478 case OP_THEN_ARG:
1479 SLJIT_ASSERT(common->mark_ptr != 0);
1480 if (!setmark_found)
1481 {
1482 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
1483 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1484 stackpos += (int)sizeof(sljit_sw);
1485 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1486 stackpos += (int)sizeof(sljit_sw);
1487 setmark_found = TRUE;
1488 }
1489 cc += 1 + 2 + cc[1];
1490 break;
1491
1492 case OP_RECURSE:
1493 if (common->has_set_som && !setsom_found)
1494 {
1495 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
1496 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1497 stackpos += (int)sizeof(sljit_sw);
1498 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1499 stackpos += (int)sizeof(sljit_sw);
1500 setsom_found = TRUE;
1501 }
1502 if (common->mark_ptr != 0 && !setmark_found)
1503 {
1504 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
1505 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1506 stackpos += (int)sizeof(sljit_sw);
1507 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1508 stackpos += (int)sizeof(sljit_sw);
1509 setmark_found = TRUE;
1510 }
1511 if (common->capture_last_ptr != 0 && !capture_last_found)
1512 {
1513 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
1514 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1515 stackpos += (int)sizeof(sljit_sw);
1516 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1517 stackpos += (int)sizeof(sljit_sw);
1518 capture_last_found = TRUE;
1519 }
1520 cc += 1 + LINK_SIZE;
1521 break;
1522
1523 case OP_CBRA:
1524 case OP_CBRAPOS:
1525 case OP_SCBRA:
1526 case OP_SCBRAPOS:
1527 if (common->capture_last_ptr != 0 && !capture_last_found)
1528 {
1529 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
1530 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1531 stackpos += (int)sizeof(sljit_sw);
1532 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1533 stackpos += (int)sizeof(sljit_sw);
1534 capture_last_found = TRUE;
1535 }
1536 offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
1537 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));
1538 stackpos += (int)sizeof(sljit_sw);
1539 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
1540 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
1541 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1542 stackpos += (int)sizeof(sljit_sw);
1543 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
1544 stackpos += (int)sizeof(sljit_sw);
1545
1546 cc += 1 + LINK_SIZE + IMM2_SIZE;
1547 break;
1548
1549 default:
1550 cc = next_opcode(common, cc);
1551 SLJIT_ASSERT(cc != NULL);
1552 break;
1553 }
1554
1555 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, 0);
1556 SLJIT_ASSERT(stackpos == STACK(stacktop));
1557 }
1558
1559 static SLJIT_INLINE int get_private_data_copy_length(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL needs_control_head)
1560 {
1561 int private_data_length = needs_control_head ? 3 : 2;
1562 int size;
1563 pcre_uchar *alternative;
1564 /* Calculate the sum of the private machine words. */
1565 while (cc < ccend)
1566 {
1567 size = 0;
1568 switch(*cc)
1569 {
1570 case OP_KET:
1571 if (PRIVATE_DATA(cc) != 0)
1572 {
1573 private_data_length++;
1574 SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0);
1575 cc += PRIVATE_DATA(cc + 1);
1576 }
1577 cc += 1 + LINK_SIZE;
1578 break;
1579
1580 case OP_ASSERT:
1581 case OP_ASSERT_NOT:
1582 case OP_ASSERTBACK:
1583 case OP_ASSERTBACK_NOT:
1584 case OP_ONCE:
1585 case OP_ONCE_NC:
1586 case OP_BRAPOS:
1587 case OP_SBRA:
1588 case OP_SBRAPOS:
1589 case OP_SCOND:
1590 private_data_length++;
1591 SLJIT_ASSERT(PRIVATE_DATA(cc) != 0);
1592 cc += 1 + LINK_SIZE;
1593 break;
1594
1595 case OP_CBRA:
1596 case OP_SCBRA:
1597 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1598 private_data_length++;
1599 cc += 1 + LINK_SIZE + IMM2_SIZE;
1600 break;
1601
1602 case OP_CBRAPOS:
1603 case OP_SCBRAPOS:
1604 private_data_length += 2;
1605 cc += 1 + LINK_SIZE + IMM2_SIZE;
1606 break;
1607
1608 case OP_COND:
1609 /* Might be a hidden SCOND. */
1610 alternative = cc + GET(cc, 1);
1611 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1612 private_data_length++;
1613 cc += 1 + LINK_SIZE;
1614 break;
1615
1616 CASE_ITERATOR_PRIVATE_DATA_1
1617 if (PRIVATE_DATA(cc))
1618 private_data_length++;
1619 cc += 2;
1620 #ifdef SUPPORT_UTF
1621 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1622 #endif
1623 break;
1624
1625 CASE_ITERATOR_PRIVATE_DATA_2A
1626 if (PRIVATE_DATA(cc))
1627 private_data_length += 2;
1628 cc += 2;
1629 #ifdef SUPPORT_UTF
1630 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1631 #endif
1632 break;
1633
1634 CASE_ITERATOR_PRIVATE_DATA_2B
1635 if (PRIVATE_DATA(cc))
1636 private_data_length += 2;
1637 cc += 2 + IMM2_SIZE;
1638 #ifdef SUPPORT_UTF
1639 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1640 #endif
1641 break;
1642
1643 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1644 if (PRIVATE_DATA(cc))
1645 private_data_length++;
1646 cc += 1;
1647 break;
1648
1649 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1650 if (PRIVATE_DATA(cc))
1651 private_data_length += 2;
1652 cc += 1;
1653 break;
1654
1655 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1656 if (PRIVATE_DATA(cc))
1657 private_data_length += 2;
1658 cc += 1 + IMM2_SIZE;
1659 break;
1660
1661 case OP_CLASS:
1662 case OP_NCLASS:
1663 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1664 case OP_XCLASS:
1665 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1666 #else
1667 size = 1 + 32 / (int)sizeof(pcre_uchar);
1668 #endif
1669 if (PRIVATE_DATA(cc))
1670 private_data_length += get_class_iterator_size(cc + size);
1671 cc += size;
1672 break;
1673
1674 default:
1675 cc = next_opcode(common, cc);
1676 SLJIT_ASSERT(cc != NULL);
1677 break;
1678 }
1679 }
1680 SLJIT_ASSERT(cc == ccend);
1681 return private_data_length;
1682 }
1683
1684 static void copy_private_data(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend,
1685 BOOL save, int stackptr, int stacktop, BOOL needs_control_head)
1686 {
1687 DEFINE_COMPILER;
1688 int srcw[2];
1689 int count, size;
1690 BOOL tmp1next = TRUE;
1691 BOOL tmp1empty = TRUE;
1692 BOOL tmp2empty = TRUE;
1693 pcre_uchar *alternative;
1694 enum {
1695 start,
1696 loop,
1697 end
1698 } status;
1699
1700 status = save ? start : loop;
1701 stackptr = STACK(stackptr - 2);
1702 stacktop = STACK(stacktop - 1);
1703
1704 if (!save)
1705 {
1706 stackptr += (needs_control_head ? 2 : 1) * sizeof(sljit_sw);
1707 if (stackptr < stacktop)
1708 {
1709 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1710 stackptr += sizeof(sljit_sw);
1711 tmp1empty = FALSE;
1712 }
1713 if (stackptr < stacktop)
1714 {
1715 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1716 stackptr += sizeof(sljit_sw);
1717 tmp2empty = FALSE;
1718 }
1719 /* The tmp1next must be TRUE in either way. */
1720 }
1721
1722 do
1723 {
1724 count = 0;
1725 switch(status)
1726 {
1727 case start:
1728 SLJIT_ASSERT(save && common->recursive_head_ptr != 0);
1729 count = 1;
1730 srcw[0] = common->recursive_head_ptr;
1731 if (needs_control_head)
1732 {
1733 SLJIT_ASSERT(common->control_head_ptr != 0);
1734 count = 2;
1735 srcw[1] = common->control_head_ptr;
1736 }
1737 status = loop;
1738 break;
1739
1740 case loop:
1741 if (cc >= ccend)
1742 {
1743 status = end;
1744 break;
1745 }
1746
1747 switch(*cc)
1748 {
1749 case OP_KET:
1750 if (PRIVATE_DATA(cc) != 0)
1751 {
1752 count = 1;
1753 srcw[0] = PRIVATE_DATA(cc);
1754 SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0);
1755 cc += PRIVATE_DATA(cc + 1);
1756 }
1757 cc += 1 + LINK_SIZE;
1758 break;
1759
1760 case OP_ASSERT:
1761 case OP_ASSERT_NOT:
1762 case OP_ASSERTBACK:
1763 case OP_ASSERTBACK_NOT:
1764 case OP_ONCE:
1765 case OP_ONCE_NC:
1766 case OP_BRAPOS:
1767 case OP_SBRA:
1768 case OP_SBRAPOS:
1769 case OP_SCOND:
1770 count = 1;
1771 srcw[0] = PRIVATE_DATA(cc);
1772 SLJIT_ASSERT(srcw[0] != 0);
1773 cc += 1 + LINK_SIZE;
1774 break;
1775
1776 case OP_CBRA:
1777 case OP_SCBRA:
1778 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1779 {
1780 count = 1;
1781 srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1782 }
1783 cc += 1 + LINK_SIZE + IMM2_SIZE;
1784 break;
1785
1786 case OP_CBRAPOS:
1787 case OP_SCBRAPOS:
1788 count = 2;
1789 srcw[0] = PRIVATE_DATA(cc);
1790 srcw[1] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1791 SLJIT_ASSERT(srcw[0] != 0 && srcw[1] != 0);
1792 cc += 1 + LINK_SIZE + IMM2_SIZE;
1793 break;
1794
1795 case OP_COND:
1796 /* Might be a hidden SCOND. */
1797 alternative = cc + GET(cc, 1);
1798 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1799 {
1800 count = 1;
1801 srcw[0] = PRIVATE_DATA(cc);
1802 SLJIT_ASSERT(srcw[0] != 0);
1803 }
1804 cc += 1 + LINK_SIZE;
1805 break;
1806
1807 CASE_ITERATOR_PRIVATE_DATA_1
1808 if (PRIVATE_DATA(cc))
1809 {
1810 count = 1;
1811 srcw[0] = PRIVATE_DATA(cc);
1812 }
1813 cc += 2;
1814 #ifdef SUPPORT_UTF
1815 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1816 #endif
1817 break;
1818
1819 CASE_ITERATOR_PRIVATE_DATA_2A
1820 if (PRIVATE_DATA(cc))
1821 {
1822 count = 2;
1823 srcw[0] = PRIVATE_DATA(cc);
1824 srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
1825 }
1826 cc += 2;
1827 #ifdef SUPPORT_UTF
1828 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1829 #endif
1830 break;
1831
1832 CASE_ITERATOR_PRIVATE_DATA_2B
1833 if (PRIVATE_DATA(cc))
1834 {
1835 count = 2;
1836 srcw[0] = PRIVATE_DATA(cc);
1837 srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
1838 }
1839 cc += 2 + IMM2_SIZE;
1840 #ifdef SUPPORT_UTF
1841 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1842 #endif
1843 break;
1844
1845 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1846 if (PRIVATE_DATA(cc))
1847 {
1848 count = 1;
1849 srcw[0] = PRIVATE_DATA(cc);
1850 }
1851 cc += 1;
1852 break;
1853
1854 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1855 if (PRIVATE_DATA(cc))
1856 {
1857 count = 2;
1858 srcw[0] = PRIVATE_DATA(cc);
1859 srcw[1] = srcw[0] + sizeof(sljit_sw);
1860 }
1861 cc += 1;
1862 break;
1863
1864 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1865 if (PRIVATE_DATA(cc))
1866 {
1867 count = 2;
1868 srcw[0] = PRIVATE_DATA(cc);
1869 srcw[1] = srcw[0] + sizeof(sljit_sw);
1870 }
1871 cc += 1 + IMM2_SIZE;
1872 break;
1873
1874 case OP_CLASS:
1875 case OP_NCLASS:
1876 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1877 case OP_XCLASS:
1878 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1879 #else
1880 size = 1 + 32 / (int)sizeof(pcre_uchar);
1881 #endif
1882 if (PRIVATE_DATA(cc))
1883 switch(get_class_iterator_size(cc + size))
1884 {
1885 case 1:
1886 count = 1;
1887 srcw[0] = PRIVATE_DATA(cc);
1888 break;
1889
1890 case 2:
1891 count = 2;
1892 srcw[0] = PRIVATE_DATA(cc);
1893 srcw[1] = srcw[0] + sizeof(sljit_sw);
1894 break;
1895
1896 default:
1897 SLJIT_ASSERT_STOP();
1898 break;
1899 }
1900 cc += size;
1901 break;
1902
1903 default:
1904 cc = next_opcode(common, cc);
1905 SLJIT_ASSERT(cc != NULL);
1906 break;
1907 }
1908 break;
1909
1910 case end:
1911 SLJIT_ASSERT_STOP();
1912 break;
1913 }
1914
1915 while (count > 0)
1916 {
1917 count--;
1918 if (save)
1919 {
1920 if (tmp1next)
1921 {
1922 if (!tmp1empty)
1923 {
1924 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1925 stackptr += sizeof(sljit_sw);
1926 }
1927 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), srcw[count]);
1928 tmp1empty = FALSE;
1929 tmp1next = FALSE;
1930 }
1931 else
1932 {
1933 if (!tmp2empty)
1934 {
1935 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1936 stackptr += sizeof(sljit_sw);
1937 }
1938 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), srcw[count]);
1939 tmp2empty = FALSE;
1940 tmp1next = TRUE;
1941 }
1942 }
1943 else
1944 {
1945 if (tmp1next)
1946 {
1947 SLJIT_ASSERT(!tmp1empty);
1948 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), srcw[count], TMP1, 0);
1949 tmp1empty = stackptr >= stacktop;
1950 if (!tmp1empty)
1951 {
1952 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1953 stackptr += sizeof(sljit_sw);
1954 }
1955 tmp1next = FALSE;
1956 }
1957 else
1958 {
1959 SLJIT_ASSERT(!tmp2empty);
1960 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), srcw[count], TMP2, 0);
1961 tmp2empty = stackptr >= stacktop;
1962 if (!tmp2empty)
1963 {
1964 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1965 stackptr += sizeof(sljit_sw);
1966 }
1967 tmp1next = TRUE;
1968 }
1969 }
1970 }
1971 }
1972 while (status != end);
1973
1974 if (save)
1975 {
1976 if (tmp1next)
1977 {
1978 if (!tmp1empty)
1979 {
1980 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1981 stackptr += sizeof(sljit_sw);
1982 }
1983 if (!tmp2empty)
1984 {
1985 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1986 stackptr += sizeof(sljit_sw);
1987 }
1988 }
1989 else
1990 {
1991 if (!tmp2empty)
1992 {
1993 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1994 stackptr += sizeof(sljit_sw);
1995 }
1996 if (!tmp1empty)
1997 {
1998 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1999 stackptr += sizeof(sljit_sw);
2000 }
2001 }
2002 }
2003 SLJIT_ASSERT(cc == ccend && stackptr == stacktop && (save || (tmp1empty && tmp2empty)));
2004 }
2005
2006 static SLJIT_INLINE pcre_uchar *set_then_offsets(compiler_common *common, pcre_uchar *cc, pcre_uint8 *current_offset)
2007 {
2008 pcre_uchar *end = bracketend(cc);
2009 BOOL has_alternatives = cc[GET(cc, 1)] == OP_ALT;
2010
2011 /* Assert captures then. */
2012 if (*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT)
2013 current_offset = NULL;
2014 /* Conditional block does not. */
2015 if (*cc == OP_COND || *cc == OP_SCOND)
2016 has_alternatives = FALSE;
2017
2018 cc = next_opcode(common, cc);
2019 if (has_alternatives)
2020 current_offset = common->then_offsets + (cc - common->start);
2021
2022 while (cc < end)
2023 {
2024 if ((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND))
2025 cc = set_then_offsets(common, cc, current_offset);
2026 else
2027 {
2028 if (*cc == OP_ALT && has_alternatives)
2029 current_offset = common->then_offsets + (cc + 1 + LINK_SIZE - common->start);
2030 if (*cc >= OP_THEN && *cc <= OP_THEN_ARG && current_offset != NULL)
2031 *current_offset = 1;
2032 cc = next_opcode(common, cc);
2033 }
2034 }
2035
2036 return end;
2037 }
2038
2039 #undef CASE_ITERATOR_PRIVATE_DATA_1
2040 #undef CASE_ITERATOR_PRIVATE_DATA_2A
2041 #undef CASE_ITERATOR_PRIVATE_DATA_2B
2042 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_1
2043 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
2044 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
2045
2046 static SLJIT_INLINE BOOL is_powerof2(unsigned int value)
2047 {
2048 return (value & (value - 1)) == 0;
2049 }
2050
2051 static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label)
2052 {
2053 while (list)
2054 {
2055 /* sljit_set_label is clever enough to do nothing
2056 if either the jump or the label is NULL. */
2057 SET_LABEL(list->jump, label);
2058 list = list->next;
2059 }
2060 }
2061
2062 static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump *jump)
2063 {
2064 jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list));
2065 if (list_item)
2066 {
2067 list_item->next = *list;
2068 list_item->jump = jump;
2069 *list = list_item;
2070 }
2071 }
2072
2073 static void add_stub(compiler_common *common, struct sljit_jump *start)
2074 {
2075 DEFINE_COMPILER;
2076 stub_list *list_item = sljit_alloc_memory(compiler, sizeof(stub_list));
2077
2078 if (list_item)
2079 {
2080 list_item->start = start;
2081 list_item->quit = LABEL();
2082 list_item->next = common->stubs;
2083 common->stubs = list_item;
2084 }
2085 }
2086
2087 static void flush_stubs(compiler_common *common)
2088 {
2089 DEFINE_COMPILER;
2090 stub_list *list_item = common->stubs;
2091
2092 while (list_item)
2093 {
2094 JUMPHERE(list_item->start);
2095 add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));
2096 JUMPTO(SLJIT_JUMP, list_item->quit);
2097 list_item = list_item->next;
2098 }
2099 common->stubs = NULL;
2100 }
2101
2102 static void add_label_addr(compiler_common *common, sljit_uw *update_addr)
2103 {
2104 DEFINE_COMPILER;
2105 label_addr_list *label_addr;
2106
2107 label_addr = sljit_alloc_memory(compiler, sizeof(label_addr_list));
2108 if (label_addr == NULL)
2109 return;
2110 label_addr->label = LABEL();
2111 label_addr->update_addr = update_addr;
2112 label_addr->next = common->label_addrs;
2113 common->label_addrs = label_addr;
2114 }
2115
2116 static SLJIT_INLINE void count_match(compiler_common *common)
2117 {
2118 DEFINE_COMPILER;
2119
2120 OP2(SLJIT_SUB | SLJIT_SET_E, COUNT_MATCH, 0, COUNT_MATCH, 0, SLJIT_IMM, 1);
2121 add_jump(compiler, &common->calllimit, JUMP(SLJIT_ZERO));
2122 }
2123
2124 static SLJIT_INLINE void allocate_stack(compiler_common *common, int size)
2125 {
2126 /* May destroy all locals and registers except TMP2. */
2127 DEFINE_COMPILER;
2128
2129 SLJIT_ASSERT(size > 0);
2130 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
2131 #ifdef DESTROY_REGISTERS
2132 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);
2133 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
2134 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
2135 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, TMP1, 0);
2136 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP1, 0);
2137 #endif
2138 add_stub(common, CMP(SLJIT_GREATER, STACK_TOP, 0, STACK_LIMIT, 0));
2139 }
2140
2141 static SLJIT_INLINE void free_stack(compiler_common *common, int size)
2142 {
2143 DEFINE_COMPILER;
2144
2145 SLJIT_ASSERT(size > 0);
2146 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
2147 }
2148
2149 static sljit_uw * allocate_read_only_data(compiler_common *common, sljit_uw size)
2150 {
2151 DEFINE_COMPILER;
2152 sljit_uw *result;
2153
2154 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
2155 return NULL;
2156
2157 result = (sljit_uw *)SLJIT_MALLOC(size + sizeof(sljit_uw), compiler->allocator_data);
2158 if (SLJIT_UNLIKELY(result == NULL))
2159 {
2160 sljit_set_compiler_memory_error(compiler);
2161 return NULL;
2162 }
2163
2164 *(void**)result = common->read_only_data_head;
2165 common->read_only_data_head = (void *)result;
2166 return result + 1;
2167 }
2168
2169 static void free_read_only_data(void *current, void *allocator_data)
2170 {
2171 void *next;
2172
2173 SLJIT_UNUSED_ARG(allocator_data);
2174
2175 while (current != NULL)
2176 {
2177 next = *(void**)current;
2178 SLJIT_FREE(current, allocator_data);
2179 current = next;
2180 }
2181 }
2182
2183 static SLJIT_INLINE void reset_ovector(compiler_common *common, int length)
2184 {
2185 DEFINE_COMPILER;
2186 struct sljit_label *loop;
2187 int i;
2188
2189 /* At this point we can freely use all temporary registers. */
2190 SLJIT_ASSERT(length > 1);
2191 /* TMP1 returns with begin - 1. */
2192 OP2(SLJIT_SUB, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_S0), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));
2193 if (length < 8)
2194 {
2195 for (i = 1; i < length; i++)
2196 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), SLJIT_R0, 0);
2197 }
2198 else
2199 {
2200 GET_LOCAL_BASE(SLJIT_R1, 0, OVECTOR_START);
2201 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, length - 1);
2202 loop = LABEL();
2203 OP1(SLJIT_MOVU, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw), SLJIT_R0, 0);
2204 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, 1);
2205 JUMPTO(SLJIT_NOT_ZERO, loop);
2206 }
2207 }
2208
2209 static SLJIT_INLINE void do_reset_match(compiler_common *common, int length)
2210 {
2211 DEFINE_COMPILER;
2212 struct sljit_label *loop;
2213 int i;
2214
2215 SLJIT_ASSERT(length > 1);
2216 /* OVECTOR(1) contains the "string begin - 1" constant. */
2217 if (length > 2)
2218 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
2219 if (length < 8)
2220 {
2221 for (i = 2; i < length; i++)
2222 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), TMP1, 0);
2223 }
2224 else
2225 {
2226 GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + sizeof(sljit_sw));
2227 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
2228 loop = LABEL();
2229 OP1(SLJIT_MOVU, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
2230 OP2(SLJIT_SUB | SLJIT_SET_E, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
2231 JUMPTO(SLJIT_NOT_ZERO, loop);
2232 }
2233
2234 OP1(SLJIT_MOV, STACK_TOP, 0, ARGUMENTS, 0);
2235 if (common->mark_ptr != 0)
2236 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, SLJIT_IMM, 0);
2237 if (common->control_head_ptr != 0)
2238 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
2239 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(jit_arguments, stack));
2240 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
2241 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(struct sljit_stack, base));
2242 }
2243
2244 static sljit_sw SLJIT_CALL do_search_mark(sljit_sw *current, const pcre_uchar *skip_arg)
2245 {
2246 while (current != NULL)
2247 {
2248 switch (current[-2])
2249 {
2250 case type_then_trap:
2251 break;
2252
2253 case type_mark:
2254 if (STRCMP_UC_UC(skip_arg, (pcre_uchar *)current[-3]) == 0)
2255 return current[-4];
2256 break;
2257
2258 default:
2259 SLJIT_ASSERT_STOP();
2260 break;
2261 }
2262 SLJIT_ASSERT(current > (sljit_sw*)current[-1]);
2263 current = (sljit_sw*)current[-1];
2264 }
2265 return -1;
2266 }
2267
2268 static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)
2269 {
2270 DEFINE_COMPILER;
2271 struct sljit_label *loop;
2272 struct sljit_jump *early_quit;
2273
2274 /* At this point we can freely use all registers. */
2275 OP1(SLJIT_MOV, SLJIT_S2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
2276 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(1), STR_PTR, 0);
2277
2278 OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0);
2279 if (common->mark_ptr != 0)
2280 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
2281 OP1(SLJIT_MOV_SI, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, offset_count));
2282 if (common->mark_ptr != 0)
2283 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_R2, 0);
2284 OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, offsets), SLJIT_IMM, sizeof(int));
2285 OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, begin));
2286 GET_LOCAL_BASE(SLJIT_S0, 0, OVECTOR_START);
2287 /* Unlikely, but possible */
2288 early_quit = CMP(SLJIT_EQUAL, SLJIT_R1, 0, SLJIT_IMM, 0);
2289 loop = LABEL();
2290 OP2(SLJIT_SUB, SLJIT_S1, 0, SLJIT_MEM1(SLJIT_S0), 0, SLJIT_R0, 0);
2291 OP2(SLJIT_ADD, SLJIT_S0, 0, SLJIT_S0, 0, SLJIT_IMM, sizeof(sljit_sw));
2292 /* Copy the integer value to the output buffer */
2293 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2294 OP2(SLJIT_ASHR, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_IMM, UCHAR_SHIFT);
2295 #endif
2296 OP1(SLJIT_MOVU_SI, SLJIT_MEM1(SLJIT_R2), sizeof(int), SLJIT_S1, 0);
2297 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
2298 JUMPTO(SLJIT_NOT_ZERO, loop);
2299 JUMPHERE(early_quit);
2300
2301 /* Calculate the return value, which is the maximum ovector value. */
2302 if (topbracket > 1)
2303 {
2304 GET_LOCAL_BASE(SLJIT_R0, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_sw));
2305 OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, topbracket + 1);
2306
2307 /* OVECTOR(0) is never equal to SLJIT_S2. */
2308 loop = LABEL();
2309 OP1(SLJIT_MOVU, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), -(2 * (sljit_sw)sizeof(sljit_sw)));
2310 OP2(SLJIT_SUB, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
2311 CMPTO(SLJIT_EQUAL, SLJIT_R2, 0, SLJIT_S2, 0, loop);
2312 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_R1, 0);
2313 }
2314 else
2315 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
2316 }
2317
2318 static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *quit)
2319 {
2320 DEFINE_COMPILER;
2321 struct sljit_jump *jump;
2322
2323 SLJIT_COMPILE_ASSERT(STR_END == SLJIT_S1, str_end_must_be_saved_reg2);
2324 SLJIT_ASSERT(common->start_used_ptr != 0 && common->start_ptr != 0
2325 && (common->mode == JIT_PARTIAL_SOFT_COMPILE ? common->hit_start != 0 : common->hit_start == 0));
2326
2327 OP1(SLJIT_MOV, SLJIT_R1, 0, ARGUMENTS, 0);
2328 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_PARTIAL);
2329 OP1(SLJIT_MOV_SI, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, real_offset_count));
2330 CMPTO(SLJIT_SIG_LESS, SLJIT_R2, 0, SLJIT_IMM, 2, quit);
2331
2332 /* Store match begin and end. */
2333 OP1(SLJIT_MOV, SLJIT_S0, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, begin));
2334 OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, offsets));
2335
2336 jump = CMP(SLJIT_SIG_LESS, SLJIT_R2, 0, SLJIT_IMM, 3);
2337 OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_ptr : (common->hit_start + (int)sizeof(sljit_sw)), SLJIT_S0, 0);
2338 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2339 OP2(SLJIT_ASHR, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, UCHAR_SHIFT);
2340 #endif
2341 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_R1), 2 * sizeof(int), SLJIT_R2, 0);
2342 JUMPHERE(jump);
2343
2344 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_used_ptr : common->hit_start);
2345 OP2(SLJIT_SUB, SLJIT_S1, 0, STR_END, 0, SLJIT_S0, 0);
2346 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2347 OP2(SLJIT_ASHR, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_IMM, UCHAR_SHIFT);
2348 #endif
2349 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_R1), sizeof(int), SLJIT_S1, 0);
2350
2351 OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_S0, 0);
2352 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2353 OP2(SLJIT_ASHR, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, UCHAR_SHIFT);
2354 #endif
2355 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_R1), 0, SLJIT_R2, 0);
2356
2357 JUMPTO(SLJIT_JUMP, quit);
2358 }
2359
2360 static SLJIT_INLINE void check_start_used_ptr(compiler_common *common)
2361 {
2362 /* May destroy TMP1. */
2363 DEFINE_COMPILER;
2364 struct sljit_jump *jump;
2365
2366 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2367 {
2368 /* The value of -1 must be kept for start_used_ptr! */
2369 OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, 1);
2370 /* Jumps if start_used_ptr < STR_PTR, or start_used_ptr == -1. Although overwriting
2371 is not necessary if start_used_ptr == STR_PTR, it does not hurt as well. */
2372 jump = CMP(SLJIT_LESS_EQUAL, TMP1, 0, STR_PTR, 0);
2373 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2374 JUMPHERE(jump);
2375 }
2376 else if (common->mode == JIT_PARTIAL_HARD_COMPILE)
2377 {
2378 jump = CMP(SLJIT_LESS_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2379 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2380 JUMPHERE(jump);
2381 }
2382 }
2383
2384 static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, pcre_uchar *cc)
2385 {
2386 /* Detects if the character has an othercase. */
2387 unsigned int c;
2388
2389 #ifdef SUPPORT_UTF
2390 if (common->utf)
2391 {
2392 GETCHAR(c, cc);
2393 if (c > 127)
2394 {
2395 #ifdef SUPPORT_UCP
2396 return c != UCD_OTHERCASE(c);
2397 #else
2398 return FALSE;
2399 #endif
2400 }
2401 #ifndef COMPILE_PCRE8
2402 return common->fcc[c] != c;
2403 #endif
2404 }
2405 else
2406 #endif
2407 c = *cc;
2408 return MAX_255(c) ? common->fcc[c] != c : FALSE;
2409 }
2410
2411 static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)
2412 {
2413 /* Returns with the othercase. */
2414 #ifdef SUPPORT_UTF
2415 if (common->utf && c > 127)
2416 {
2417 #ifdef SUPPORT_UCP
2418 return UCD_OTHERCASE(c);
2419 #else
2420 return c;
2421 #endif
2422 }
2423 #endif
2424 return TABLE_GET(c, common->fcc, c);
2425 }
2426
2427 static unsigned int char_get_othercase_bit(compiler_common *common, pcre_uchar *cc)
2428 {
2429 /* Detects if the character and its othercase has only 1 bit difference. */
2430 unsigned int c, oc, bit;
2431 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2432 int n;
2433 #endif
2434
2435 #ifdef SUPPORT_UTF
2436 if (common->utf)
2437 {
2438 GETCHAR(c, cc);
2439 if (c <= 127)
2440 oc = common->fcc[c];
2441 else
2442 {
2443 #ifdef SUPPORT_UCP
2444 oc = UCD_OTHERCASE(c);
2445 #else
2446 oc = c;
2447 #endif
2448 }
2449 }
2450 else
2451 {
2452 c = *cc;
2453 oc = TABLE_GET(c, common->fcc, c);
2454 }
2455 #else
2456 c = *cc;
2457 oc = TABLE_GET(c, common->fcc, c);
2458 #endif
2459
2460 SLJIT_ASSERT(c != oc);
2461
2462 bit = c ^ oc;
2463 /* Optimized for English alphabet. */
2464 if (c <= 127 && bit == 0x20)
2465 return (0 << 8) | 0x20;
2466
2467 /* Since c != oc, they must have at least 1 bit difference. */
2468 if (!is_powerof2(bit))
2469 return 0;
2470
2471 #if defined COMPILE_PCRE8
2472
2473 #ifdef SUPPORT_UTF
2474 if (common->utf && c > 127)
2475 {
2476 n = GET_EXTRALEN(*cc);
2477 while ((bit & 0x3f) == 0)
2478 {
2479 n--;
2480 bit >>= 6;
2481 }
2482 return (n << 8) | bit;
2483 }
2484 #endif /* SUPPORT_UTF */
2485 return (0 << 8) | bit;
2486
2487 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2488
2489 #ifdef SUPPORT_UTF
2490 if (common->utf && c > 65535)
2491 {
2492 if (bit >= (1 << 10))
2493 bit >>= 10;
2494 else
2495 return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8));
2496 }
2497 #endif /* SUPPORT_UTF */
2498 return (bit < 256) ? ((0 << 8) | bit) : ((1 << 8) | (bit >> 8));
2499
2500 #endif /* COMPILE_PCRE[8|16|32] */
2501 }
2502
2503 static void check_partial(compiler_common *common, BOOL force)
2504 {
2505 /* Checks whether a partial matching is occurred. Does not modify registers. */
2506 DEFINE_COMPILER;
2507 struct sljit_jump *jump = NULL;
2508
2509 SLJIT_ASSERT(!force || common->mode != JIT_COMPILE);
2510
2511 if (common->mode == JIT_COMPILE)
2512 return;
2513
2514 if (!force)
2515 jump = CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2516 else if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2517 jump = CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1);
2518
2519 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2520 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
2521 else
2522 {
2523 if (common->partialmatchlabel != NULL)
2524 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2525 else
2526 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2527 }
2528
2529 if (jump != NULL)
2530 JUMPHERE(jump);
2531 }
2532
2533 static void check_str_end(compiler_common *common, jump_list **end_reached)
2534 {
2535 /* Does not affect registers. Usually used in a tight spot. */
2536 DEFINE_COMPILER;
2537 struct sljit_jump *jump;
2538
2539 if (common->mode == JIT_COMPILE)
2540 {
2541 add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2542 return;
2543 }
2544
2545 jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
2546 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2547 {
2548 add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
2549 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
2550 add_jump(compiler, end_reached, JUMP(SLJIT_JUMP));
2551 }
2552 else
2553 {
2554 add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
2555 if (common->partialmatchlabel != NULL)
2556 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2557 else
2558 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2559 }
2560 JUMPHERE(jump);
2561 }
2562
2563 static void detect_partial_match(compiler_common *common, jump_list **backtracks)
2564 {
2565 DEFINE_COMPILER;
2566 struct sljit_jump *jump;
2567
2568 if (common->mode == JIT_COMPILE)
2569 {
2570 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2571 return;
2572 }
2573
2574 /* Partial matching mode. */
2575 jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
2576 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
2577 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2578 {
2579 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
2580 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
2581 }
2582 else
2583 {
2584 if (common->partialmatchlabel != NULL)
2585 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2586 else
2587 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2588 }
2589 JUMPHERE(jump);
2590 }
2591
2592 static void peek_char(compiler_common *common, sljit_ui max)
2593 {
2594 /* Reads the character into TMP1, keeps STR_PTR.
2595 Does not check STR_END. TMP2 Destroyed. */
2596 DEFINE_COMPILER;
2597 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2598 struct sljit_jump *jump;
2599 #endif
2600
2601 SLJIT_UNUSED_ARG(max);
2602
2603 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2604 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2605 if (common->utf)
2606 {
2607 if (max < 128) return;
2608
2609 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2610 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2611 add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2612 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2613 JUMPHERE(jump);
2614 }
2615 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2616
2617 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2618 if (common->utf)
2619 {
2620 if (max < 0xd800) return;
2621
2622 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2623 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2624 /* TMP2 contains the high surrogate. */
2625 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2626 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x40);
2627 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
2628 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
2629 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2630 JUMPHERE(jump);
2631 }
2632 #endif
2633 }
2634
2635 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2636
2637 static BOOL is_char7_bitset(const sljit_ub *bitset, BOOL nclass)
2638 {
2639 /* Tells whether the character codes below 128 are enough
2640 to determine a match. */
2641 const sljit_ub value = nclass ? 0xff : 0;
2642 const sljit_ub *end = bitset + 32;
2643
2644 bitset += 16;
2645 do
2646 {
2647 if (*bitset++ != value)
2648 return FALSE;
2649 }
2650 while (bitset < end);
2651 return TRUE;
2652 }
2653
2654 static void read_char7_type(compiler_common *common, BOOL full_read)
2655 {
2656 /* Reads the precise character type of a character into TMP1, if the character
2657 is less than 128. Otherwise it returns with zero. Does not check STR_END. The
2658 full_read argument tells whether characters above max are accepted or not. */
2659 DEFINE_COMPILER;
2660 struct sljit_jump *jump;
2661
2662 SLJIT_ASSERT(common->utf);
2663
2664 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2665 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2666
2667 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2668
2669 if (full_read)
2670 {
2671 jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
2672 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2673 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2674 JUMPHERE(jump);
2675 }
2676 }
2677
2678 #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
2679
2680 static void read_char_range(compiler_common *common, sljit_ui min, sljit_ui max, BOOL update_str_ptr)
2681 {
2682 /* Reads the precise value of a character into TMP1, if the character is
2683 between min and max (c >= min && c <= max). Otherwise it returns with a value
2684 outside the range. Does not check STR_END. */
2685 DEFINE_COMPILER;
2686 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2687 struct sljit_jump *jump;
2688 #endif
2689 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2690 struct sljit_jump *jump2;
2691 #endif
2692
2693 SLJIT_UNUSED_ARG(update_str_ptr);
2694 SLJIT_UNUSED_ARG(min);
2695 SLJIT_UNUSED_ARG(max);
2696 SLJIT_ASSERT(min <= max);
2697
2698 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2699 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2700
2701 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2702 if (common->utf)
2703 {
2704 if (max < 128 && !update_str_ptr) return;
2705
2706 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2707 if (min >= 0x10000)
2708 {
2709 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xf0);
2710 if (update_str_ptr)
2711 OP1(SLJIT_MOV_UB, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2712 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2713 jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x7);
2714 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2715 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2716 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2717 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2718 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2719 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2720 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2721 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2722 if (!update_str_ptr)
2723 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
2724 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2725 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2726 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2727 JUMPHERE(jump2);
2728 if (update_str_ptr)
2729 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
2730 }
2731 else if (min >= 0x800 && max <= 0xffff)
2732 {
2733 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xe0);
2734 if (update_str_ptr)
2735 OP1(SLJIT_MOV_UB, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2736 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2737 jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xf);
2738 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2739 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2740 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2741 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2742 if (!update_str_ptr)
2743 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
2744 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2745 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2746 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2747 JUMPHERE(jump2);
2748 if (update_str_ptr)
2749 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
2750 }
2751 else if (max >= 0x800)
2752 add_jump(compiler, (max < 0x10000) ? &common->utfreadchar16 : &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2753 else if (max < 128)
2754 {
2755 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2756 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2757 }
2758 else
2759 {
2760 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2761 if (!update_str_ptr)
2762 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2763 else
2764 OP1(SLJIT_MOV_UB, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2765 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2766 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2767 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2768 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2769 if (update_str_ptr)
2770 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
2771 }
2772 JUMPHERE(jump);
2773 }
2774 #endif
2775
2776 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2777 if (common->utf)
2778 {
2779 if (max >= 0x10000)
2780 {
2781 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2782 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2783 /* TMP2 contains the high surrogate. */
2784 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2785 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x40);
2786 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
2787 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2788 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
2789 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2790 JUMPHERE(jump);
2791 return;
2792 }
2793
2794 if (max < 0xd800 && !update_str_ptr) return;
2795
2796 /* Skip low surrogate if necessary. */
2797 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2798 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2799 if (update_str_ptr)
2800 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2801 if (max >= 0xd800)
2802 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x10000);
2803 JUMPHERE(jump);
2804 }
2805 #endif
2806 }
2807
2808 static SLJIT_INLINE void read_char(compiler_common *common)
2809 {
2810 read_char_range(common, 0, READ_CHAR_MAX, TRUE);
2811 }
2812
2813 static void read_char8_type(compiler_common *common, BOOL update_str_ptr)
2814 {
2815 /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */
2816 DEFINE_COMPILER;
2817 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
2818 struct sljit_jump *jump;
2819 #endif
2820 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2821 struct sljit_jump *jump2;
2822 #endif
2823
2824 SLJIT_UNUSED_ARG(update_str_ptr);
2825
2826 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2827 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2828
2829 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2830 if (common->utf)
2831 {
2832 /* This can be an extra read in some situations, but hopefully
2833 it is needed in most cases. */
2834 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2835 jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
2836 if (!update_str_ptr)
2837 {
2838 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2839 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2840 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2841 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2842 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2843 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
2844 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2845 jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
2846 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2847 JUMPHERE(jump2);
2848 }
2849 else
2850 add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));
2851 JUMPHERE(jump);
2852 return;
2853 }
2854 #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
2855
2856 #if !defined COMPILE_PCRE8
2857 /* The ctypes array contains only 256 values. */
2858 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2859 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
2860 #endif
2861 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2862 #if !defined COMPILE_PCRE8
2863 JUMPHERE(jump);
2864 #endif
2865
2866 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2867 if (common->utf && update_str_ptr)
2868 {
2869 /* Skip low surrogate if necessary. */
2870 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);
2871 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2872 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2873 JUMPHERE(jump);
2874 }
2875 #endif /* SUPPORT_UTF && COMPILE_PCRE16 */
2876 }
2877
2878 static void skip_char_back(compiler_common *common)
2879 {
2880 /* Goes one character back. Affects STR_PTR and TMP1. Does not check begin. */
2881 DEFINE_COMPILER;
2882 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2883 #if defined COMPILE_PCRE8
2884 struct sljit_label *label;
2885
2886 if (common->utf)
2887 {
2888 label = LABEL();
2889 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2890 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2891 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
2892 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);
2893 return;
2894 }
2895 #elif defined COMPILE_PCRE16
2896 if (common->utf)
2897 {
2898 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2899 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2900 /* Skip low surrogate if necessary. */
2901 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2902 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
2903 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
2904 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2905 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2906 return;
2907 }
2908 #endif /* COMPILE_PCRE[8|16] */
2909 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2910 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2911 }
2912
2913 static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpifmatch)
2914 {
2915 /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */
2916 DEFINE_COMPILER;
2917 struct sljit_jump *jump;
2918
2919 if (nltype == NLTYPE_ANY)
2920 {
2921 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
2922 add_jump(compiler, backtracks, JUMP(jumpifmatch ? SLJIT_NOT_ZERO : SLJIT_ZERO));
2923 }
2924 else if (nltype == NLTYPE_ANYCRLF)
2925 {
2926 if (jumpifmatch)
2927 {
2928 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR));
2929 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
2930 }
2931 else
2932 {
2933 jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
2934 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
2935 JUMPHERE(jump);
2936 }
2937 }
2938 else
2939 {
2940 SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);
2941 add_jump(compiler, backtracks, CMP(jumpifmatch ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
2942 }
2943 }
2944
2945 #ifdef SUPPORT_UTF
2946
2947 #if defined COMPILE_PCRE8
2948 static void do_utfreadchar(compiler_common *common)
2949 {
2950 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
2951 of the character (>= 0xc0). Return char value in TMP1, length in TMP2. */
2952 DEFINE_COMPILER;
2953 struct sljit_jump *jump;
2954
2955 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2956 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2957 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2958 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2959 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2960 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2961
2962 /* Searching for the first zero. */
2963 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
2964 jump = JUMP(SLJIT_NOT_ZERO);
2965 /* Two byte sequence. */
2966 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2967 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(2));
2968 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2969
2970 JUMPHERE(jump);
2971 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2972 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x800);
2973 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2974 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2975 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2976
2977 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10000);
2978 jump = JUMP(SLJIT_NOT_ZERO);
2979 /* Three byte sequence. */
2980 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
2981 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));
2982 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2983
2984 /* Four byte sequence. */
2985 JUMPHERE(jump);
2986 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2987 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
2988 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2989 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
2990 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2991 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2992 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(4));
2993 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2994 }
2995
2996 static void do_utfreadchar16(compiler_common *common)
2997 {
2998 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
2999 of the character (>= 0xc0). Return value in TMP1. */
3000 DEFINE_COMPILER;
3001 struct sljit_jump *jump;
3002
3003 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3004 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3005 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3006 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3007 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3008 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3009
3010 /* Searching for the first zero. */
3011 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
3012 jump = JUMP(SLJIT_NOT_ZERO);
3013 /* Two byte sequence. */
3014 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3015 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3016
3017 JUMPHERE(jump);
3018 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x400);
3019 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_NOT_ZERO);
3020 /* This code runs only in 8 bit mode. No need to shift the value. */
3021 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3022 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3023 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x800);
3024 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3025 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3026 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3027 /* Three byte sequence. */
3028 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
3029 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3030 }
3031
3032 static void do_utfreadtype8(compiler_common *common)
3033 {
3034 /* Fast decoding a UTF-8 character type. TMP2 contains the first byte
3035 of the character (>= 0xc0). Return value in TMP1. */
3036 DEFINE_COMPILER;
3037 struct sljit_jump *jump;
3038 struct sljit_jump *compare;
3039
3040 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3041
3042 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20);
3043 jump = JUMP(SLJIT_NOT_ZERO);
3044 /* Two byte sequence. */
3045 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3046 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3047 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
3048 /* The upper 5 bits are known at this point. */
3049 compare = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x3);
3050 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
3051 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3052 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
3053 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
3054 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3055
3056 JUMPHERE(compare);
3057 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
3058 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3059
3060 /* We only have types for characters less than 256. */
3061 JUMPHERE(jump);
3062 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3063 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
3064 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3065 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3066 }
3067
3068 #endif /* COMPILE_PCRE8 */
3069
3070 #endif /* SUPPORT_UTF */
3071
3072 #ifdef SUPPORT_UCP
3073
3074 /* UCD_BLOCK_SIZE must be 128 (see the assert below). */
3075 #define UCD_BLOCK_MASK 127
3076 #define UCD_BLOCK_SHIFT 7
3077
3078 static void do_getucd(compiler_common *common)
3079 {
3080 /* Search the UCD record for the character comes in TMP1.
3081 Returns chartype in TMP1 and UCD offset in TMP2. */
3082 DEFINE_COMPILER;
3083
3084 SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 8);
3085
3086 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3087 OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
3088 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
3089 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
3090 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
3091 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
3092 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
3093 OP1(SLJIT_MOV_UH, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
3094 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
3095 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
3096 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3097 }
3098 #endif
3099
3100 static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common, BOOL hascrorlf, BOOL firstline)
3101 {
3102 DEFINE_COMPILER;
3103 struct sljit_label *mainloop;
3104 struct sljit_label *newlinelabel = NULL;
3105 struct sljit_jump *start;
3106 struct sljit_jump *end = NULL;
3107 struct sljit_jump *nl = NULL;
3108 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3109 struct sljit_jump *singlechar;
3110 #endif
3111 jump_list *newline = NULL;
3112 BOOL newlinecheck = FALSE;
3113 BOOL readuchar = FALSE;
3114
3115 if (!(hascrorlf || firstline) && (common->nltype == NLTYPE_ANY ||
3116 common->nltype == NLTYPE_ANYCRLF || common->newline > 255))
3117 newlinecheck = TRUE;
3118
3119 if (firstline)
3120 {
3121 /* Search for the end of the first line. */
3122 SLJIT_ASSERT(common->first_line_end != 0);
3123 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
3124
3125 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3126 {
3127 mainloop = LABEL();
3128 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3129 end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3130 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
3131 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3132 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);
3133 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);
3134 JUMPHERE(end);
3135 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_SP), common->first_line_end, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3136 }
3137 else
3138 {
3139 end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3140 mainloop = LABEL();
3141 /* Continual stores does not cause data dependency. */
3142 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->first_line_end, STR_PTR, 0);
3143 read_char_range(common, common->nlmin, common->nlmax, TRUE);
3144 check_newlinechar(common, common->nltype, &newline, TRUE);
3145 CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, mainloop);
3146 JUMPHERE(end);
3147 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->first_line_end, STR_PTR, 0);
3148 set_jumps(newline, LABEL());
3149 }
3150
3151 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
3152 }
3153
3154 start = JUMP(SLJIT_JUMP);
3155
3156 if (newlinecheck)
3157 {
3158 newlinelabel = LABEL();
3159 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3160 end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3161 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3162 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
3163 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
3164 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3165 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
3166 #endif
3167 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3168 nl = JUMP(SLJIT_JUMP);
3169 }
3170
3171 mainloop = LABEL();
3172
3173 /* Increasing the STR_PTR here requires one less jump in the most common case. */
3174 #ifdef SUPPORT_UTF
3175 if (common->utf) readuchar = TRUE;
3176 #endif
3177 if (newlinecheck) readuchar = TRUE;
3178
3179 if (readuchar)
3180 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3181
3182 if (newlinecheck)
3183 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);
3184
3185 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3186 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3187 #if defined COMPILE_PCRE8
3188 if (common->utf)
3189 {
3190 singlechar = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
3191 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3192 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3193 JUMPHERE(singlechar);
3194 }
3195 #elif defined COMPILE_PCRE16
3196 if (common->utf)
3197 {
3198 singlechar = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
3199 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
3200 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3201 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
3202 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3203 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3204 JUMPHERE(singlechar);
3205 }
3206 #endif /* COMPILE_PCRE[8|16] */
3207 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
3208 JUMPHERE(start);
3209
3210 if (newlinecheck)
3211 {
3212 JUMPHERE(end);
3213 JUMPHERE(nl);
3214 }
3215
3216 return mainloop;
3217 }
3218
3219 #define MAX_N_CHARS 16
3220 #define MAX_N_BYTES 8
3221
3222 static SLJIT_INLINE void add_prefix_byte(pcre_uint8 byte, pcre_uint8 *bytes)
3223 {
3224 pcre_uint8 len = bytes[0];
3225 int i;
3226
3227 if (len == 255)
3228 return;
3229
3230 if (len == 0)
3231 {
3232 bytes[0] = 1;
3233 bytes[1] = byte;
3234 return;
3235 }
3236
3237 for (i = len; i > 0; i--)
3238 if (bytes[i] == byte)
3239 return;
3240
3241 if (len >= MAX_N_BYTES - 1)
3242 {
3243 bytes[0] = 255;
3244 return;
3245 }
3246
3247 len++;
3248 bytes[len] = byte;
3249 bytes[0] = len;
3250 }
3251
3252 static int scan_prefix(compiler_common *common, pcre_uchar *cc, pcre_uint32 *chars, pcre_uint8 *bytes, int max_chars, pcre_uint32 *rec_count)
3253 {
3254 /* Recursive function, which scans prefix literals. */
3255 BOOL last, any, caseless;
3256 int len, repeat, len_save, consumed = 0;
3257 pcre_uint32 chr, mask;
3258 pcre_uchar *alternative, *cc_save, *oc;
3259 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3260 pcre_uchar othercase[8];
3261 #elif defined SUPPORT_UTF && defined COMPILE_PCRE16
3262 pcre_uchar othercase[2];
3263 #else
3264 pcre_uchar othercase[1];
3265 #endif
3266
3267 repeat = 1;
3268 while (TRUE)
3269 {
3270 if (*rec_count == 0)
3271 return 0;
3272 (*rec_count)--;
3273
3274 last = TRUE;
3275 any = FALSE;
3276 caseless = FALSE;
3277
3278 switch (*cc)
3279 {
3280 case OP_CHARI:
3281 caseless = TRUE;
3282 case OP_CHAR:
3283 last = FALSE;
3284 cc++;
3285 break;
3286
3287 case OP_SOD:
3288 case OP_SOM:
3289 case OP_SET_SOM:
3290 case OP_NOT_WORD_BOUNDARY:
3291 case OP_WORD_BOUNDARY:
3292 case OP_EODN:
3293 case OP_EOD:
3294 case OP_CIRC:
3295 case OP_CIRCM:
3296 case OP_DOLL:
3297 case OP_DOLLM:
3298 /* Zero width assertions. */
3299 cc++;
3300 continue;
3301
3302 case OP_ASSERT:
3303 case OP_ASSERT_NOT:
3304 case OP_ASSERTBACK:
3305 case OP_ASSERTBACK_NOT:
3306 cc = bracketend(cc);
3307 continue;
3308
3309 case OP_PLUSI:
3310 case OP_MINPLUSI:
3311 case OP_POSPLUSI:
3312 caseless = TRUE;
3313 case OP_PLUS:
3314 case OP_MINPLUS:
3315 case OP_POSPLUS:
3316 cc++;
3317 break;
3318
3319 case OP_EXACTI:
3320 caseless = TRUE;
3321 case OP_EXACT:
3322 repeat = GET2(cc, 1);
3323 last = FALSE;
3324 cc += 1 + IMM2_SIZE;
3325 break;
3326
3327 case OP_QUERYI:
3328 case OP_MINQUERYI:
3329 case OP_POSQUERYI:
3330 caseless = TRUE;
3331 case OP_QUERY:
3332 case OP_MINQUERY:
3333 case OP_POSQUERY:
3334 len = 1;
3335 cc++;
3336 #ifdef SUPPORT_UTF
3337 if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
3338 #endif
3339 max_chars = scan_prefix(common, cc + len, chars, bytes, max_chars, rec_count);
3340 if (max_chars == 0)
3341 return consumed;
3342 last = FALSE;
3343 break;
3344
3345 case OP_KET:
3346 cc += 1 + LINK_SIZE;
3347 continue;
3348
3349 case OP_ALT:
3350 cc += GET(cc, 1);
3351 continue;
3352
3353 case OP_ONCE:
3354 case OP_ONCE_NC:
3355 case OP_BRA:
3356 case OP_BRAPOS:
3357 case OP_CBRA:
3358 case OP_CBRAPOS:
3359 alternative = cc + GET(cc, 1);
3360 while (*alternative == OP_ALT)
3361 {
3362 max_chars = scan_prefix(common, alternative + 1 + LINK_SIZE, chars, bytes, max_chars, rec_count);
3363 if (max_chars == 0)
3364 return consumed;
3365 alternative += GET(alternative, 1);
3366 }
3367
3368 if (*cc == OP_CBRA || *cc == OP_CBRAPOS)
3369 cc += IMM2_SIZE;
3370 cc += 1 + LINK_SIZE;
3371 continue;
3372
3373 case OP_CLASS:
3374 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3375 if (common->utf && !is_char7_bitset((const pcre_uint8 *)(cc + 1), FALSE)) return consumed;
3376 #endif
3377 any = TRUE;
3378 cc += 1 + 32 / sizeof(pcre_uchar);
3379 break;
3380
3381 case OP_NCLASS:
3382 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3383 if (common->utf) return consumed;
3384 #endif
3385 any = TRUE;
3386 cc += 1 + 32 / sizeof(pcre_uchar);
3387 break;
3388
3389 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3390 case OP_XCLASS:
3391 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3392 if (common->utf) return consumed;
3393 #endif
3394 any = TRUE;
3395 cc += GET(cc, 1);
3396 break;
3397 #endif
3398
3399 case OP_DIGIT:
3400 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3401 if (common->utf && !is_char7_bitset((const pcre_uint8 *)common->ctypes - cbit_length + cbit_digit, FALSE))
3402 return consumed;
3403 #endif
3404 any = TRUE;
3405 cc++;
3406 break;
3407
3408 case OP_WHITESPACE:
3409 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3410 if (common->utf && !is_char7_bitset((const pcre_uint8 *)common->ctypes - cbit_length + cbit_space, FALSE))
3411 return consumed;
3412 #endif
3413 any = TRUE;
3414 cc++;
3415 break;
3416
3417 case OP_WORDCHAR:
3418 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3419 if (common->utf && !is_char7_bitset((const pcre_uint8 *)common->ctypes - cbit_length + cbit_word, FALSE))
3420 return consumed;
3421 #endif
3422 any = TRUE;
3423 cc++;
3424 break;
3425
3426 case OP_NOT:
3427 case OP_NOTI:
3428 cc++;
3429 /* Fall through. */
3430 case OP_NOT_DIGIT:
3431 case OP_NOT_WHITESPACE:
3432 case OP_NOT_WORDCHAR:
3433 case OP_ANY:
3434 case OP_ALLANY:
3435 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3436 if (common->utf) return consumed;
3437 #endif
3438 any = TRUE;
3439 cc++;
3440 break;
3441
3442 #ifdef SUPPORT_UCP
3443 case OP_NOTPROP:
3444 case OP_PROP:
3445 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3446 if (common->utf) return consumed;
3447 #endif
3448 any = TRUE;
3449 cc += 1 + 2;
3450 break;
3451 #endif
3452
3453 case OP_TYPEEXACT:
3454 repeat = GET2(cc, 1);
3455 cc += 1 + IMM2_SIZE;
3456 continue;
3457
3458 case OP_NOTEXACT:
3459 case OP_NOTEXACTI:
3460 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3461 if (common->utf) return consumed;
3462 #endif
3463 any = TRUE;
3464 repeat = GET2(cc, 1);
3465 cc += 1 + IMM2_SIZE + 1;
3466 break;
3467
3468 default:
3469 return consumed;
3470 }
3471
3472 if (any)
3473 {
3474 #if defined COMPILE_PCRE8
3475 mask = 0xff;
3476 #elif defined COMPILE_PCRE16
3477 mask = 0xffff;
3478 #elif defined COMPILE_PCRE32
3479 mask = 0xffffffff;
3480 #else
3481 SLJIT_ASSERT_STOP();
3482 #endif
3483
3484 do
3485 {
3486 chars[0] = mask;
3487 chars[1] = mask;
3488 bytes[0] = 255;
3489
3490 consumed++;
3491 if (--max_chars == 0)
3492 return consumed;
3493 chars += 2;
3494 bytes += MAX_N_BYTES;
3495 }
3496 while (--repeat > 0);
3497
3498 repeat = 1;
3499 continue;
3500 }
3501
3502 len = 1;
3503 #ifdef SUPPORT_UTF
3504 if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
3505 #endif
3506
3507 if (caseless && char_has_othercase(common, cc))
3508 {
3509 #ifdef SUPPORT_UTF
3510 if (common->utf)
3511 {
3512 GETCHAR(chr, cc);
3513 if ((int)PRIV(ord2utf)(char_othercase(common, chr), othercase) != len)
3514 return consumed;
3515 }
3516 else
3517 #endif
3518 {
3519 chr = *cc;
3520 othercase[0] = TABLE_GET(chr, common->fcc, chr);
3521 }
3522 }
3523 else
3524 caseless = FALSE;
3525
3526 len_save = len;
3527 cc_save = cc;
3528 while (TRUE)
3529 {
3530 oc = othercase;
3531 do
3532 {
3533 chr = *cc;
3534 #ifdef COMPILE_PCRE32
3535 if (SLJIT_UNLIKELY(chr == NOTACHAR))
3536 return consumed;
3537 #endif
3538 add_prefix_byte((pcre_uint8)chr, bytes);
3539
3540 mask = 0;
3541 if (caseless)
3542 {
3543 add_prefix_byte((pcre_uint8)*oc, bytes);
3544 mask = *cc ^ *oc;
3545 chr |= mask;
3546 }
3547
3548 #ifdef COMPILE_PCRE32
3549 if (chars[0] == NOTACHAR && chars[1] == 0)
3550 #else
3551 if (chars[0] == NOTACHAR)
3552 #endif
3553 {
3554 chars[0] = chr;
3555 chars[1] = mask;
3556 }
3557 else
3558 {
3559 mask |= chars[0] ^ chr;
3560 chr |= mask;
3561 chars[0] = chr;
3562 chars[1] |= mask;
3563 }
3564
3565 len--;
3566 consumed++;
3567 if (--max_chars == 0)
3568 return consumed;
3569 chars += 2;
3570 bytes += MAX_N_BYTES;
3571 cc++;
3572 oc++;
3573 }
3574 while (len > 0);
3575
3576 if (--repeat == 0)
3577 break;
3578
3579 len = len_save;
3580 cc = cc_save;
3581 }
3582
3583 repeat = 1;
3584 if (last)
3585 return consumed;
3586 }
3587 }
3588
3589 #if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86)
3590
3591 static sljit_si character_to_int32(pcre_uchar chr)
3592 {
3593 sljit_si value = (sljit_si)chr;
3594 #if defined COMPILE_PCRE8
3595 #define SSE2_COMPARE_TYPE_INDEX 0
3596 return (value << 24) | (value << 16) | (value << 8) | value;
3597 #elif defined COMPILE_PCRE16
3598 #define SSE2_COMPARE_TYPE_INDEX 1
3599 return (value << 16) | value;
3600 #elif defined COMPILE_PCRE32
3601 #define SSE2_COMPARE_TYPE_INDEX 2
3602 return value;
3603 #else
3604 #error "Unsupported unit width"
3605 #endif
3606 }
3607
3608 static SLJIT_INLINE void fast_forward_first_char2_sse2(compiler_common *common, pcre_uchar char1, pcre_uchar char2)
3609 {
3610 DEFINE_COMPILER;
3611 struct sljit_label *start;
3612 struct sljit_jump *quit[3];
3613 struct sljit_jump *nomatch;
3614 sljit_ub instruction[8];
3615 sljit_si tmp1_ind = sljit_get_register_index(TMP1);
3616 sljit_si tmp2_ind = sljit_get_register_index(TMP2);
3617 sljit_si str_ptr_ind = sljit_get_register_index(STR_PTR);
3618 BOOL load_twice = FALSE;
3619 pcre_uchar bit;
3620
3621 bit = char1 ^ char2;
3622 if (!is_powerof2(bit))
3623 bit = 0;
3624
3625 if ((char1 != char2) && bit == 0)
3626 load_twice = TRUE;
3627
3628 quit[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3629
3630 /* First part (unaligned start) */
3631
3632 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char1 | bit));
3633
3634 SLJIT_ASSERT(tmp1_ind < 8 && tmp2_ind == 1);
3635
3636 /* MOVD xmm, r/m32 */
3637 instruction[0] = 0x66;
3638 instruction[1] = 0x0f;
3639 instruction[2] = 0x6e;
3640 instruction[3] = 0xc0 | (2 << 3) | tmp1_ind;
3641 sljit_emit_op_custom(compiler, instruction, 4);
3642
3643 if (char1 != char2)
3644 {
3645 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(bit != 0 ? bit : char2));
3646
3647 /* MOVD xmm, r/m32 */
3648 instruction[3] = 0xc0 | (3 << 3) | tmp1_ind;
3649 sljit_emit_op_custom(compiler, instruction, 4);
3650 }
3651
3652 /* PSHUFD xmm1, xmm2/m128, imm8 */
3653 instruction[2] = 0x70;
3654 instruction[3] = 0xc0 | (2 << 3) | 2;
3655 instruction[4] = 0;
3656 sljit_emit_op_custom(compiler, instruction, 5);
3657
3658 if (char1 != char2)
3659 {
3660 /* PSHUFD xmm1, xmm2/m128, imm8 */
3661 instruction[3] = 0xc0 | (3 << 3) | 3;
3662 instruction[4] = 0;
3663 sljit_emit_op_custom(compiler, instruction, 5);
3664 }
3665
3666 OP2(SLJIT_AND, TMP2, 0, STR_PTR, 0, SLJIT_IMM, 0xf);
3667 OP2(SLJIT_AND, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, ~0xf);
3668
3669 /* MOVDQA xmm1, xmm2/m128 */
3670 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3671
3672 if (str_ptr_ind < 8)
3673 {
3674 instruction[2] = 0x6f;
3675 instruction[3] = (0 << 3) | str_ptr_ind;
3676 sljit_emit_op_custom(compiler, instruction, 4);
3677
3678 if (load_twice)
3679 {
3680 instruction[3] = (1 << 3) | str_ptr_ind;
3681 sljit_emit_op_custom(compiler, instruction, 4);
3682 }
3683 }
3684 else
3685 {
3686 instruction[1] = 0x41;
3687 instruction[2] = 0x0f;
3688 instruction[3] = 0x6f;
3689 instruction[4] = (0 << 3) | (str_ptr_ind & 0x7);
3690 sljit_emit_op_custom(compiler, instruction, 5);
3691
3692 if (load_twice)
3693 {
3694 instruction[4] = (1 << 3) | str_ptr_ind;
3695 sljit_emit_op_custom(compiler, instruction, 5);
3696 }
3697 instruction[1] = 0x0f;
3698 }
3699
3700 #else
3701
3702 instruction[2] = 0x6f;
3703 instruction[3] = (0 << 3) | str_ptr_ind;
3704 sljit_emit_op_custom(compiler, instruction, 4);
3705
3706 if (load_twice)
3707 {
3708 instruction[3] = (1 << 3) | str_ptr_ind;
3709 sljit_emit_op_custom(compiler, instruction, 4);
3710 }
3711
3712 #endif
3713
3714 if (bit != 0)
3715 {
3716 /* POR xmm1, xmm2/m128 */
3717 instruction[2] = 0xeb;
3718 instruction[3] = 0xc0 | (0 << 3) | 3;
3719 sljit_emit_op_custom(compiler, instruction, 4);
3720 }
3721
3722 /* PCMPEQB/W/D xmm1, xmm2/m128 */
3723 instruction[2] = 0x74 + SSE2_COMPARE_TYPE_INDEX;
3724 instruction[3] = 0xc0 | (0 << 3) | 2;
3725 sljit_emit_op_custom(compiler, instruction, 4);
3726
3727 if (load_twice)
3728 {
3729 instruction[3] = 0xc0 | (1 << 3) | 3;
3730 sljit_emit_op_custom(compiler, instruction, 4);
3731 }
3732
3733 /* PMOVMSKB reg, xmm */
3734 instruction[2] = 0xd7;
3735 instruction[3] = 0xc0 | (tmp1_ind << 3) | 0;
3736 sljit_emit_op_custom(compiler, instruction, 4);
3737
3738 if (load_twice)
3739 {
3740 OP1(SLJIT_MOV, TMP3, 0, TMP2, 0);
3741 instruction[3] = 0xc0 | (tmp2_ind << 3) | 1;
3742 sljit_emit_op_custom(compiler, instruction, 4);
3743
3744 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3745 OP1(SLJIT_MOV, TMP2, 0, TMP3, 0);
3746 }
3747
3748 OP2(SLJIT_ASHR, TMP1, 0, TMP1, 0, TMP2, 0);
3749
3750 /* BSF r32, r/m32 */
3751 instruction[0] = 0x0f;
3752 instruction[1] = 0xbc;
3753 instruction[2] = 0xc0 | (tmp1_ind << 3) | tmp1_ind;
3754 sljit_emit_op_custom(compiler, instruction, 3);
3755
3756 nomatch = JUMP(SLJIT_ZERO);
3757
3758 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3759 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3760 quit[1] = JUMP(SLJIT_JUMP);
3761
3762 JUMPHERE(nomatch);
3763
3764 start = LABEL();
3765 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 16);
3766 quit[2] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3767
3768 /* Second part (aligned) */
3769
3770 instruction[0] = 0x66;
3771 instruction[1] = 0x0f;
3772
3773 /* MOVDQA xmm1, xmm2/m128 */
3774 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3775
3776 if (str_ptr_ind < 8)
3777 {
3778 instruction[2] = 0x6f;
3779 instruction[3] = (0 << 3) | str_ptr_ind;
3780 sljit_emit_op_custom(compiler, instruction, 4);
3781
3782 if (load_twice)
3783 {
3784 instruction[3] = (1 << 3) | str_ptr_ind;
3785 sljit_emit_op_custom(compiler, instruction, 4);
3786 }
3787 }
3788 else
3789 {
3790 instruction[1] = 0x41;
3791 instruction[2] = 0x0f;
3792 instruction[3] = 0x6f;
3793 instruction[4] = (0 << 3) | (str_ptr_ind & 0x7);
3794 sljit_emit_op_custom(compiler, instruction, 5);
3795
3796 if (load_twice)
3797 {
3798 instruction[4] = (1 << 3) | str_ptr_ind;
3799 sljit_emit_op_custom(compiler, instruction, 5);
3800 }
3801 instruction[1] = 0x0f;
3802 }
3803
3804 #else
3805
3806 instruction[2] = 0x6f;
3807 instruction[3] = (0 << 3) | str_ptr_ind;
3808 sljit_emit_op_custom(compiler, instruction, 4);
3809
3810 if (load_twice)
3811 {
3812 instruction[3] = (1 << 3) | str_ptr_ind;
3813 sljit_emit_op_custom(compiler, instruction, 4);
3814 }
3815
3816 #endif
3817
3818 if (bit != 0)
3819 {
3820 /* POR xmm1, xmm2/m128 */
3821 instruction[2] = 0xeb;
3822 instruction[3] = 0xc0 | (0 << 3) | 3;
3823 sljit_emit_op_custom(compiler, instruction, 4);
3824 }
3825
3826 /* PCMPEQB/W/D xmm1, xmm2/m128 */
3827 instruction[2] = 0x74 + SSE2_COMPARE_TYPE_INDEX;
3828 instruction[3] = 0xc0 | (0 << 3) | 2;
3829 sljit_emit_op_custom(compiler, instruction, 4);
3830
3831 if (load_twice)
3832 {
3833 instruction[3] = 0xc0 | (1 << 3) | 3;
3834 sljit_emit_op_custom(compiler, instruction, 4);
3835 }
3836
3837 /* PMOVMSKB reg, xmm */
3838 instruction[2] = 0xd7;
3839 instruction[3] = 0xc0 | (tmp1_ind << 3) | 0;
3840 sljit_emit_op_custom(compiler, instruction, 4);
3841
3842 if (load_twice)
3843 {
3844 instruction[3] = 0xc0 | (tmp2_ind << 3) | 1;
3845 sljit_emit_op_custom(compiler, instruction, 4);
3846
3847 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3848 }
3849
3850 /* BSF r32, r/m32 */
3851 instruction[0] = 0x0f;
3852 instruction[1] = 0xbc;
3853 instruction[2] = 0xc0 | (tmp1_ind << 3) | tmp1_ind;
3854 sljit_emit_op_custom(compiler, instruction, 3);
3855
3856 JUMPTO(SLJIT_ZERO, start);
3857
3858 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3859
3860 start = LABEL();
3861 SET_LABEL(quit[0], start);
3862 SET_LABEL(quit[1], start);
3863 SET_LABEL(quit[2], start);
3864 }
3865
3866 #undef SSE2_COMPARE_TYPE_INDEX
3867
3868 #endif
3869
3870 static void fast_forward_first_char2(compiler_common *common, pcre_uchar char1, pcre_uchar char2, sljit_si offset)
3871 {
3872 DEFINE_COMPILER;
3873 struct sljit_label *start;
3874 struct sljit_jump *quit;
3875 struct sljit_jump *found;
3876 pcre_uchar mask;
3877 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3878 struct sljit_label *utf_start = NULL;
3879 struct sljit_jump *utf_quit = NULL;
3880 #endif
3881 BOOL has_first_line_end = (common->first_line_end != 0);
3882
3883 if (offset > 0)
3884 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
3885
3886 if (has_first_line_end)
3887 {
3888 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
3889
3890 OP2(SLJIT_ADD, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end, SLJIT_IMM, IN_UCHARS(offset + 1));
3891 #if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86)
3892 if (sljit_x86_is_cmov_available())
3893 {
3894 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_END, 0, TMP3, 0);
3895 sljit_x86_emit_cmov(compiler, SLJIT_GREATER, STR_END, TMP3, 0);
3896 }
3897 #endif
3898 {
3899 quit = CMP(SLJIT_LESS_EQUAL, STR_END, 0, TMP3, 0);
3900 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3901 JUMPHERE(quit);
3902 }
3903 }
3904
3905 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3906 if (common->utf && offset > 0)
3907 utf_start = LABEL();
3908 #endif
3909
3910 #if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86)
3911
3912 /* SSE2 accelerated first character search. */
3913
3914 if (sljit_x86_is_sse2_available())
3915 {
3916 fast_forward_first_char2_sse2(common, char1, char2);
3917
3918 SLJIT_ASSERT(common->mode == JIT_COMPILE || offset == 0);
3919 if (common->mode == JIT_COMPILE)
3920 {
3921 /* In complete mode, we don't need to run a match when STR_PTR == STR_END. */
3922 SLJIT_ASSERT(common->forced_quit_label == NULL);
3923 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_NOMATCH);
3924 add_jump(compiler, &common->forced_quit, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
3925
3926 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3927 if (common->utf && offset > 0)
3928 {
3929 SLJIT_ASSERT(common->mode == JIT_COMPILE);
3930
3931 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-offset));
3932 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3933 #if defined COMPILE_PCRE8
3934 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
3935 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, utf_start);
3936 #elif defined COMPILE_PCRE16
3937 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
3938 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00, utf_start);
3939 #else
3940 #error "Unknown code width"
3941 #endif
3942 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3943 }
3944 #endif
3945
3946 if (offset > 0)
3947 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
3948 }
3949 else if (sljit_x86_is_cmov_available())
3950 {
3951 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_PTR, 0, STR_END, 0);
3952 sljit_x86_emit_cmov(compiler, SLJIT_GREATER_EQUAL, STR_PTR, has_first_line_end ? SLJIT_MEM1(SLJIT_SP) : STR_END, has_first_line_end ? common->first_line_end : 0);
3953 }
3954 else
3955 {
3956 quit = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
3957 OP1(SLJIT_MOV, STR_PTR, 0, has_first_line_end ? SLJIT_MEM1(SLJIT_SP) : STR_END, has_first_line_end ? common->first_line_end : 0);
3958 JUMPHERE(quit);
3959 }
3960
3961 if (has_first_line_end)
3962 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3963 return;
3964 }
3965
3966 #endif
3967
3968 quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3969
3970 start = LABEL();
3971 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3972
3973 if (char1 == char2)
3974 found = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, char1);
3975 else
3976 {
3977 mask = char1 ^ char2;
3978 if (is_powerof2(mask))
3979 {
3980 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, mask);
3981 found = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, char1 | mask);
3982 }
3983 else
3984 {
3985 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, char1);
3986 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
3987 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, char2);
3988 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
3989 found = JUMP(SLJIT_NOT_ZERO);
3990 }
3991 }
3992
3993 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3994 CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, start);
3995
3996 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3997 if (common->utf && offset > 0)
3998 utf_quit = JUMP(SLJIT_JUMP);
3999 #endif
4000
4001 JUMPHERE(found);
4002
4003 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
4004 if (common->utf && offset > 0)
4005 {
4006 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-offset));
4007 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4008 #if defined COMPILE_PCRE8
4009 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
4010 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, utf_start);
4011 #elif defined COMPILE_PCRE16
4012 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
4013 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00, utf_start);
4014 #else
4015 #error "Unknown code width"
4016 #endif
4017 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4018 JUMPHERE(utf_quit);
4019 }
4020 #endif
4021
4022 JUMPHERE(quit);
4023
4024 if (has_first_line_end)
4025 {
4026 quit = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
4027 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end);
4028 if (offset > 0)
4029 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
4030 JUMPHERE(quit);
4031 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
4032 }
4033
4034 if (offset > 0)
4035 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
4036 }
4037
4038 static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common, BOOL firstline)
4039 {
4040 DEFINE_COMPILER;
4041 struct sljit_label *start;
4042 struct sljit_jump *quit;
4043 pcre_uint32 chars[MAX_N_CHARS * 2];
4044 pcre_uint8 bytes[MAX_N_CHARS * MAX_N_BYTES];
4045 pcre_uint8 ones[MAX_N_CHARS];
4046 int offsets[3];
4047 pcre_uint32 mask;
4048 pcre_uint8 *byte_set, *byte_set_end;
4049 int i, max, from;
4050 int range_right = -1, range_len = 3 - 1;
4051 sljit_ub *update_table = NULL;
4052 BOOL in_range;
4053 pcre_uint32 rec_count;
4054
4055 for (i = 0; i < MAX_N_CHARS; i++)
4056 {
4057 chars[i << 1] = NOTACHAR;
4058 chars[(i << 1) + 1] = 0;
4059 bytes[i * MAX_N_BYTES] = 0;
4060 }
4061
4062 rec_count = 10000;
4063 max = scan_prefix(common, common->start, chars, bytes, MAX_N_CHARS, &rec_count);
4064
4065 if (max <= 1)
4066 return FALSE;
4067
4068 for (i = 0; i < max; i++)
4069 {
4070 mask = chars[(i << 1) + 1];
4071 ones[i] = ones_in_half_byte[mask & 0xf];
4072 mask >>= 4;
4073 while (mask != 0)
4074 {
4075 ones[i] += ones_in_half_byte[mask & 0xf];
4076 mask >>= 4;
4077 }
4078 }
4079
4080 in_range = FALSE;
4081 from = 0; /* Prevent compiler "uninitialized" warning */
4082 for (i = 0; i <= max; i++)
4083 {
4084 if (in_range && (i - from) > range_len && (bytes[(i - 1) * MAX_N_BYTES] <= 4))
4085 {
4086 range_len = i - from;
4087 range_right = i - 1;
4088 }
4089
4090 if (i < max && bytes[i * MAX_N_BYTES] < 255)
4091 {
4092 if (!in_range)
4093 {
4094 in_range = TRUE;
4095 from = i;
4096 }
4097 }
4098 else if (in_range)
4099 in_range = FALSE;
4100 }
4101
4102 if (range_right >= 0)
4103 {
4104 update_table = (sljit_ub *)allocate_read_only_data(common, 256);
4105 if (update_table == NULL)
4106 return TRUE;
4107 memset(update_table, IN_UCHARS(range_len), 256);
4108
4109 for (i = 0; i < range_len; i++)
4110 {
4111 byte_set = bytes + ((range_right - i) * MAX_N_BYTES);
4112 SLJIT_ASSERT(byte_set[0] > 0 && byte_set[0] < 255);
4113 byte_set_end = byte_set + byte_set[0];
4114 byte_set++;
4115 while (byte_set <= byte_set_end)
4116 {
4117 if (update_table[*byte_set] > IN_UCHARS(i))
4118 update_table[*byte_set] = IN_UCHARS(i);
4119 byte_set++;
4120 }
4121 }
4122 }
4123
4124 offsets[0] = -1;
4125 /* Scan forward. */
4126 for (i = 0; i < max; i++)
4127 if (ones[i] <= 2) {
4128 offsets[0] = i;
4129 break;
4130 }
4131
4132 if (offsets[0] < 0 && range_right < 0)
4133 return FALSE;
4134
4135 if (offsets[0] >= 0)
4136 {
4137 /* Scan backward. */
4138 offsets[1] = -1;
4139 for (i = max - 1; i > offsets[0]; i--)
4140 if (ones[i] <= 2 && i != range_right)
4141 {
4142 offsets[1] = i;
4143 break;
4144 }
4145
4146 /* This case is handled better by fast_forward_first_char. */
4147 if (offsets[1] == -1 && offsets[0] == 0 && range_right < 0)
4148 return FALSE;
4149
4150 offsets[2] = -1;
4151 /* We only search for a middle character if there is no range check. */
4152 if (offsets[1] >= 0 && range_right == -1)
4153 {
4154 /* Scan from middle. */
4155 for (i = (offsets[0] + offsets[1]) / 2 + 1; i < offsets[1]; i++)
4156 if (ones[i] <= 2)
4157 {
4158 offsets[2] = i;
4159 break;
4160 }
4161
4162 if (offsets[2] == -1)
4163 {
4164 for (i = (offsets[0] + offsets[1]) / 2; i > offsets[0]; i--)
4165 if (ones[i] <= 2)
4166 {
4167 offsets[2] = i;
4168 break;
4169 }
4170 }
4171 }
4172
4173 SLJIT_ASSERT(offsets[1] == -1 || (offsets[0] < offsets[1]));
4174 SLJIT_ASSERT(offsets[2] == -1 || (offsets[0] < offsets[2] && offsets[1] > offsets[2]));
4175
4176 chars[0] = chars[offsets[0] << 1];
4177 chars[1] = chars[(offsets[0] << 1) + 1];
4178 if (offsets[2] >= 0)
4179 {
4180 chars[2] = chars[offsets[2] << 1];
4181 chars[3] = chars[(offsets[2] << 1) + 1];
4182 }
4183 if (offsets[1] >= 0)
4184 {
4185 chars[4] = chars[offsets[1] << 1];
4186 chars[5] = chars[(offsets[1] << 1) + 1];
4187 }
4188 }
4189
4190 max -= 1;
4191 if (firstline)
4192 {
4193 SLJIT_ASSERT(common->first_line_end != 0);
4194 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end);
4195 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
4196 OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
4197 quit = CMP(SLJIT_LESS_EQUAL, STR_END, 0, TMP1, 0);
4198 OP1(SLJIT_MOV, STR_END, 0, TMP1, 0);
4199 JUMPHERE(quit);
4200 }
4201 else
4202 OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
4203
4204 #if !(defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
4205 if (range_right >= 0)
4206 OP1(SLJIT_MOV, RETURN_ADDR, 0, SLJIT_IMM, (sljit_sw)update_table);
4207 #endif
4208
4209 start = LABEL();
4210 quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4211
4212 SLJIT_ASSERT(range_right >= 0 || offsets[0] >= 0);
4213
4214 if (range_right >= 0)
4215 {
4216 #if defined COMPILE_PCRE8 || (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN)
4217 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right));
4218 #else
4219 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right + 1) - 1);
4220 #endif
4221
4222 #if !(defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
4223 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(RETURN_ADDR, TMP1), 0);
4224 #else
4225 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)update_table);
4226 #endif
4227 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4228 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, start);
4229 }
4230
4231 if (offsets[0] >= 0)
4232 {
4233 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[0]));
4234 if (offsets[1] >= 0)
4235 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[1]));
4236 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4237
4238 if (chars[1] != 0)
4239 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[1]);
4240 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[0], start);
4241 if (offsets[2] >= 0)
4242 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[2] - 1));
4243
4244 if (offsets[1] >= 0)
4245 {
4246 if (chars[5] != 0)
4247 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, chars[5]);
4248 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, chars[4], start);
4249 }
4250
4251 if (offsets[2] >= 0)
4252 {
4253 if (chars[3] != 0)
4254 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[3]);
4255 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[2], start);
4256 }
4257 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4258 }
4259
4260 JUMPHERE(quit);
4261
4262 if (firstline)
4263 {
4264 if (range_right >= 0)
4265 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end);
4266 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
4267 if (range_right >= 0)
4268 {
4269 quit = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
4270 OP1(SLJIT_MOV, STR_PTR, 0, TMP1, 0);
4271 JUMPHERE(quit);
4272 }
4273 }
4274 else
4275 OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
4276 return TRUE;
4277 }
4278
4279 #undef MAX_N_CHARS
4280 #undef MAX_N_BYTES
4281
4282 static SLJIT_INLINE void fast_forward_first_char(compiler_common *common, pcre_uchar first_char, BOOL caseless)
4283 {
4284 pcre_uchar oc;
4285
4286 oc = first_char;
4287 if (caseless)
4288 {
4289 oc = TABLE_GET(first_char, common->fcc, first_char);
4290 #if defined SUPPORT_UTF && !defined COMPILE_PCRE8
4291 if (first_char > 127 && common->utf)
4292 oc = UCD_OTHERCASE(first_char);
4293 #endif
4294 }
4295
4296 fast_forward_first_char2(common, first_char, oc, 0);
4297 }
4298
4299 static SLJIT_INLINE void fast_forward_newline(compiler_common *common)
4300 {
4301 DEFINE_COMPILER;
4302 struct sljit_label *loop;
4303 struct sljit_jump *lastchar;
4304 struct sljit_jump *firstchar;
4305 struct sljit_jump *quit;
4306 struct sljit_jump *foundcr = NULL;
4307 struct sljit_jump *notfoundnl;
4308 jump_list *newline = NULL;
4309
4310 if (common->first_line_end != 0)
4311 {
4312 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
4313 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end);
4314 }
4315
4316 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4317 {
4318 lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4319 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4320 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
4321 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4322 firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
4323
4324 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2));
4325 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0);
4326 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_GREATER_EQUAL);
4327 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4328 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
4329 #endif
4330 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4331
4332 loop = LABEL();
4333 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4334 quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4335 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4336 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
4337 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop);
4338 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop);
4339
4340 JUMPHERE(quit);
4341 JUMPHERE(firstchar);
4342 JUMPHERE(lastchar);
4343
4344 if (common->first_line_end != 0)
4345 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
4346 return;
4347 }
4348
4349 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4350 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
4351 firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
4352 skip_char_back(common);
4353
4354 loop = LABEL();
4355 common->ff_newline_shortcut = loop;
4356
4357 read_char_range(common, common->nlmin, common->nlmax, TRUE);
4358 lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4359 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
4360 foundcr = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
4361 check_newlinechar(common, common->nltype, &newline, FALSE);
4362 set_jumps(newline, loop);
4363
4364 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
4365 {
4366 quit = JUMP(SLJIT_JUMP);
4367 JUMPHERE(foundcr);
4368 notfoundnl = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4369 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4370 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
4371 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
4372 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4373 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
4374 #endif
4375 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4376 JUMPHERE(notfoundnl);
4377 JUMPHERE(quit);
4378 }
4379 JUMPHERE(lastchar);
4380 JUMPHERE(firstchar);
4381
4382 if (common->first_line_end != 0)
4383 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
4384 }
4385
4386 static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks);
4387
4388 static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, sljit_ub *start_bits)
4389 {
4390 DEFINE_COMPILER;
4391 struct sljit_label *start;
4392 struct sljit_jump *quit;
4393 struct sljit_jump *found = NULL;
4394 jump_list *matches = NULL;
4395 #ifndef COMPILE_PCRE8
4396 struct sljit_jump *jump;
4397 #endif
4398
4399 if (common->first_line_end != 0)
4400 {
4401 OP1(SLJIT_MOV, RETURN_ADDR, 0, STR_END, 0);
4402 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end);
4403 }
4404
4405 start = LABEL();
4406 quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4407 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4408 #ifdef SUPPORT_UTF
4409 if (common->utf)
4410 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
4411 #endif
4412
4413 if (!check_class_ranges(common, start_bits, (start_bits[31] & 0x80) != 0, TRUE, &matches))
4414 {
4415 #ifndef COMPILE_PCRE8
4416 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 255);
4417 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 255);
4418 JUMPHERE(jump);
4419 #endif
4420 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
4421 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
4422 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)start_bits);
4423 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4424 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4425 found = JUMP(SLJIT_NOT_ZERO);
4426 }
4427
4428 #ifdef SUPPORT_UTF
4429 if (common->utf)
4430 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
4431 #endif
4432 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4433 #ifdef SUPPORT_UTF
4434 #if defined COMPILE_PCRE8
4435 if (common->utf)
4436 {
4437 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start);
4438 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4439 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4440 }
4441 #elif defined COMPILE_PCRE16
4442 if (common->utf)
4443 {
4444 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800, start);
4445 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
4446 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4447 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
4448 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4449 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4450 }
4451 #endif /* COMPILE_PCRE[8|16] */
4452 #endif /* SUPPORT_UTF */
4453 JUMPTO(SLJIT_JUMP, start);
4454 if (found != NULL)
4455 JUMPHERE(found);
4456 if (matches != NULL)
4457 set_jumps(matches, LABEL());
4458 JUMPHERE(quit);
4459
4460 if (common->first_line_end != 0)
4461 OP1(SLJIT_MOV, STR_END, 0, RETURN_ADDR, 0);
4462 }
4463
4464 static SLJIT_INLINE struct sljit_jump *search_requested_char(compiler_common *common, pcre_uchar req_char, BOOL caseless, BOOL has_firstchar)
4465 {
4466 DEFINE_COMPILER;
4467 struct sljit_label *loop;
4468 struct sljit_jump *toolong;
4469 struct sljit_jump *alreadyfound;
4470 struct sljit_jump *found;
4471 struct sljit_jump *foundoc = NULL;
4472 struct sljit_jump *notfound;
4473 pcre_uint32 oc, bit;
4474
4475 SLJIT_ASSERT(common->req_char_ptr != 0);
4476 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr);
4477 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, REQ_BYTE_MAX);
4478 toolong = CMP(SLJIT_LESS, TMP1, 0, STR_END, 0);
4479 alreadyfound = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0);
4480
4481 if (has_firstchar)
4482 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4483 else
4484 OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);
4485
4486 loop = LABEL();
4487 notfound = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0);
4488
4489 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0);
4490 oc = req_char;
4491 if (caseless)
4492 {
4493 oc = TABLE_GET(req_char, common->fcc, req_char);
4494 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
4495 if (req_char > 127 && common->utf)
4496 oc = UCD_OTHERCASE(req_char);
4497 #endif
4498 }
4499 if (req_char == oc)
4500 found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
4501 else
4502 {
4503 bit = req_char ^ oc;
4504 if (is_powerof2(bit))
4505 {
4506 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);
4507 found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit);
4508 }
4509 else
4510 {
4511 found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
4512 foundoc = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, oc);
4513 }
4514 }
4515 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
4516 JUMPTO(SLJIT_JUMP, loop);
4517
4518 JUMPHERE(found);
4519 if (foundoc)
4520 JUMPHERE(foundoc);
4521 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr, TMP1, 0);
4522 JUMPHERE(alreadyfound);
4523 JUMPHERE(toolong);
4524 return notfound;
4525 }
4526
4527 static void do_revertframes(compiler_common *common)
4528 {
4529 DEFINE_COMPILER;
4530 struct sljit_jump *jump;
4531 struct sljit_label *mainloop;
4532
4533 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4534 OP1(SLJIT_MOV, TMP1, 0, STACK_TOP, 0);
4535 GET_LOCAL_BASE(TMP3, 0, 0);
4536
4537 /* Drop frames until we reach STACK_TOP. */
4538 mainloop = LABEL();
4539 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), 0);
4540 OP2(SLJIT_SUB | SLJIT_SET_S, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0);
4541 jump = JUMP(SLJIT_SIG_LESS_EQUAL);
4542
4543 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
4544 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw));
4545 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), SLJIT_MEM1(TMP1), 2 * sizeof(sljit_sw));
4546 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
4547 JUMPTO(SLJIT_JUMP, mainloop);
4548
4549 JUMPHERE(jump);
4550 jump = JUMP(SLJIT_SIG_LESS);
4551 /* End of dropping frames. */
4552 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4553
4554 JUMPHERE(jump);
4555 OP1(SLJIT_NEG, TMP2, 0, TMP2, 0);
4556 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
4557 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw));
4558 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_sw));
4559 JUMPTO(SLJIT_JUMP, mainloop);
4560 }
4561
4562 static void check_wordboundary(compiler_common *common)
4563 {
4564 DEFINE_COMPILER;
4565 struct sljit_jump *skipread;
4566 jump_list *skipread_list = NULL;
4567 #if !(defined COMPILE_PCRE8) || defined SUPPORT_UTF
4568 struct sljit_jump *jump;
4569 #endif
4570
4571 SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16);
4572
4573 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
4574 /* Get type of the previous char, and put it to LOCALS1. */
4575 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4576 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4577 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, SLJIT_IMM, 0);
4578 skipread = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
4579 skip_char_back(common);
4580 check_start_used_ptr(common);
4581 read_char(common);
4582
4583 /* Testing char type. */
4584 #ifdef SUPPORT_UCP
4585 if (common->use_ucp)
4586 {
4587 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
4588 jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
4589 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4590 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
4591 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
4592 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
4593 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
4594 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
4595 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
4596 JUMPHERE(jump);
4597 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP2, 0);
4598 }
4599 else
4600 #endif
4601 {
4602 #ifndef COMPILE_PCRE8
4603 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4604 #elif defined SUPPORT_UTF
4605 /* Here LOCALS1 has already been zeroed. */
4606 jump = NULL;
4607 if (common->utf)
4608 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4609 #endif /* COMPILE_PCRE8 */
4610 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);
4611 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */);
4612 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4613 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP1, 0);
4614 #ifndef COMPILE_PCRE8
4615 JUMPHERE(jump);
4616 #elif defined SUPPORT_UTF
4617 if (jump != NULL)
4618 JUMPHERE(jump);
4619 #endif /* COMPILE_PCRE8 */
4620 }
4621 JUMPHERE(skipread);
4622
4623 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
4624 check_str_end(common, &skipread_list);
4625 peek_char(common, READ_CHAR_MAX);
4626
4627 /* Testing char type. This is a code duplication. */
4628 #ifdef SUPPORT_UCP
4629 if (common->use_ucp)
4630 {
4631 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
4632 jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
4633 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4634 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
4635 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
4636 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
4637 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
4638 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
4639 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
4640 JUMPHERE(jump);
4641 }
4642 else
4643 #endif
4644 {
4645 #ifndef COMPILE_PCRE8
4646 /* TMP2 may be destroyed by peek_char. */
4647 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
4648 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4649 #elif defined SUPPORT_UTF
4650 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
4651 jump = NULL;
4652 if (common->utf)
4653 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4654 #endif
4655 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes);
4656 OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */);
4657 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
4658 #ifndef COMPILE_PCRE8
4659 JUMPHERE(jump);
4660 #elif defined SUPPORT_UTF
4661 if (jump != NULL)
4662 JUMPHERE(jump);
4663 #endif /* COMPILE_PCRE8 */
4664 }
4665 set_jumps(skipread_list, LABEL());
4666
4667 OP2(SLJIT_XOR | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
4668 sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
4669 }
4670
4671 static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
4672 {
4673 DEFINE_COMPILER;
4674 int ranges[MAX_RANGE_SIZE];
4675 pcre_uint8 bit, cbit, all;
4676 int i, byte, length = 0;
4677
4678 bit = bits[0] & 0x1;
4679 /* All bits will be zero or one (since bit is zero or one). */
4680 all = -bit;
4681
4682 for (i = 0; i < 256; )
4683 {
4684 byte = i >> 3;
4685 if ((i & 0x7) == 0 && bits[byte] == all)
4686 i += 8;
4687 else
4688 {
4689 cbit = (bits[byte] >> (i & 0x7)) & 0x1;
4690 if (cbit != bit)
4691 {
4692 if (length >= MAX_RANGE_SIZE)
4693 return FALSE;
4694 ranges[length] = i;
4695 length++;
4696 bit = cbit;
4697 all = -cbit;
4698 }
4699 i++;
4700 }
4701 }
4702
4703 if (((bit == 0) && nclass) || ((bit == 1) && !nclass))
4704 {
4705 if (length >= MAX_RANGE_SIZE)
4706 return FALSE;
4707 ranges[length] = 256;
4708 length++;
4709 }
4710
4711 if (length < 0 || length > 4)
4712 return FALSE;
4713
4714 bit = bits[0] & 0x1;
4715 if (invert) bit ^= 0x1;
4716
4717 /* No character is accepted. */
4718 if (length == 0 && bit == 0)
4719 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4720
4721 switch(length)
4722 {
4723 case 0:
4724 /* When bit != 0, all characters are accepted. */
4725 return TRUE;
4726
4727 case 1:
4728 add_jump(compiler, backtracks, CMP(bit == 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
4729 return TRUE;
4730
4731 case 2:
4732 if (ranges[0] + 1 != ranges[1])
4733 {
4734 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
4735 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
4736 }
4737 else
4738 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
4739 return TRUE;
4740
4741 case 3:
4742 if (bit != 0)
4743 {
4744 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
4745 if (ranges[0] + 1 != ranges[1])
4746 {
4747 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
4748 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
4749 }
4750 else
4751 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
4752 return TRUE;
4753 }
4754
4755 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[0]));
4756 if (ranges[1] + 1 != ranges[2])
4757 {
4758 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1]);
4759 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
4760 }
4761 else
4762 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1]));
4763 return TRUE;
4764
4765 case 4:
4766 if ((ranges[1] - ranges[0]) == (ranges[3] - ranges[2])
4767 && (ranges[0] | (ranges[2] - ranges[0])) == ranges[2]
4768 && (ranges[1] & (ranges[2] - ranges[0])) == 0
4769 && is_powerof2(ranges[2] - ranges[0]))
4770 {
4771 SLJIT_ASSERT((ranges[0] & (ranges[2] - ranges[0])) == 0 && (ranges[2] & ranges[3] & (ranges[2] - ranges[0])) != 0);
4772 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[0]);
4773 if (ranges[2] + 1 != ranges[3])
4774 {
4775 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);
4776 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
4777 }
4778 else
4779 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
4780 return TRUE;
4781 }
4782
4783 if (bit != 0)
4784 {
4785 i = 0;
4786 if (ranges[0] + 1 != ranges[1])
4787 {
4788 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
4789 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
4790 i = ranges[0];
4791 }
4792 else
4793 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
4794
4795 if (ranges[2] + 1 != ranges[3])
4796 {
4797 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - i);
4798 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
4799 }
4800 else
4801 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2] - i));
4802 return TRUE;
4803 }
4804
4805 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
4806 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[0]));
4807 if (ranges[1] + 1 != ranges[2])
4808 {
4809 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]);
4810 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
4811 }
4812 else
4813 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
4814 return TRUE;
4815
4816 default:
4817 SLJIT_ASSERT_STOP();
4818 return FALSE;
4819 }
4820 }
4821
4822 static void check_anynewline(compiler_common *common)
4823 {
4824 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
4825 DEFINE_COMPILER;
4826
4827 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4828
4829 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
4830 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
4831 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
4832 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
4833 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4834 #ifdef COMPILE_PCRE8
4835 if (common->utf)
4836 {
4837 #endif
4838 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4839 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
4840 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
4841 #ifdef COMPILE_PCRE8
4842 }
4843 #endif
4844 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
4845 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4846 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4847 }
4848
4849 static void check_hspace(compiler_common *common)
4850 {
4851 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
4852 DEFINE_COMPILER;
4853
4854 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4855
4856 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x09);
4857 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
4858 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
4859 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4860 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xa0);
4861 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4862 #ifdef COMPILE_PCRE8
4863 if (common->utf)
4864 {
4865 #endif
4866 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4867 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x1680);
4868 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4869 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e);
4870 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4871 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000);
4872 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000);
4873 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
4874 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000);
4875 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4876 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000);
4877 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4878 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000);
4879 #ifdef COMPILE_PCRE8
4880 }
4881 #endif
4882 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
4883 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4884
4885 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4886 }
4887
4888 static void check_vspace(compiler_common *common)
4889 {
4890 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
4891 DEFINE_COMPILER;
4892
4893 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4894
4895 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
4896 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
4897 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
4898 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
4899 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4900 #ifdef COMPILE_PCRE8
4901 if (common->utf)
4902 {
4903 #endif
4904 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4905 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
4906 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
4907 #ifdef COMPILE_PCRE8
4908 }
4909 #endif
4910 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
4911 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4912
4913 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4914 }
4915
4916 #define CHAR1 STR_END
4917 #define CHAR2 STACK_TOP
4918
4919 static void do_casefulcmp(compiler_common *common)
4920 {
4921 DEFINE_COMPILER;
4922 struct sljit_jump *jump;
4923 struct sljit_label *label;
4924
4925 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4926 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4927 OP1(SLJIT_MOV, TMP3, 0, CHAR1, 0);
4928 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, CHAR2, 0);
4929 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
4930 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4931
4932 label = LABEL();
4933 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
4934 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4935 jump = CMP(SLJIT_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
4936 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
4937 JUMPTO(SLJIT_NOT_ZERO, label);
4938
4939 JUMPHERE(jump);
4940 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4941 OP1(SLJIT_MOV, CHAR1, 0, TMP3, 0);
4942 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
4943 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4944 }
4945
4946 #define LCC_TABLE STACK_LIMIT
4947
4948 static void do_caselesscmp(compiler_common *common)
4949 {
4950 DEFINE_COMPILER;
4951 struct sljit_jump *jump;
4952 struct sljit_label *label;
4953
4954 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4955 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4956
4957 OP1(SLJIT_MOV, TMP3, 0, LCC_TABLE, 0);
4958 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, CHAR1, 0);
4959 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, CHAR2, 0);
4960 OP1(SLJIT_MOV, LCC_TABLE, 0, SLJIT_IMM, common->lcc);
4961 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
4962 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4963
4964 label = LABEL();
4965 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
4966 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4967 #ifndef COMPILE_PCRE8
4968 jump = CMP(SLJIT_GREATER, CHAR1, 0, SLJIT_IMM, 255);
4969 #endif
4970 OP1(SLJIT_MOV_UB, CHAR1, 0, SLJIT_MEM2(LCC_TABLE, CHAR1), 0);
4971 #ifndef COMPILE_PCRE8
4972 JUMPHERE(jump);
4973 jump = CMP(SLJIT_GREATER, CHAR2, 0, SLJIT_IMM, 255);
4974 #endif
4975 OP1(SLJIT_MOV_UB, CHAR2, 0, SLJIT_MEM2(LCC_TABLE, CHAR2), 0);
4976 #ifndef COMPILE_PCRE8
4977 JUMPHERE(jump);
4978 #endif
4979 jump = CMP(SLJIT_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
4980 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
4981 JUMPTO(SLJIT_NOT_ZERO, label);
4982
4983 JUMPHERE(jump);
4984 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4985 OP1(SLJIT_MOV, LCC_TABLE, 0, TMP3, 0);
4986 OP1(SLJIT_MOV, CHAR1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
4987 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
4988 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4989 }
4990
4991 #undef LCC_TABLE
4992 #undef CHAR1
4993 #undef CHAR2
4994
4995 #if defined SUPPORT_UTF && defined SUPPORT_UCP
4996
4997 static const pcre_uchar * SLJIT_CALL do_utf_caselesscmp(pcre_uchar *src1, jit_arguments *args, pcre_uchar *end1)
4998 {
4999 /* This function would be ineffective to do in JIT level. */
5000 pcre_uint32 c1, c2;
5001 const pcre_uchar *src2 = args->uchar_ptr;
5002 const pcre_uchar *end2 = args->end;
5003 const ucd_record *ur;
5004 const pcre_uint32 *pp;
5005
5006 while (src1 < end1)
5007 {
5008 if (src2 >= end2)
5009 return (pcre_uchar*)1;
5010 GETCHARINC(c1, src1);
5011 GETCHARINC(c2, src2);
5012 ur = GET_UCD(c2);
5013 if (c1 != c2 && c1 != c2 + ur->other_case)
5014 {
5015 pp = PRIV(ucd_caseless_sets) + ur->caseset;
5016 for (;;)
5017 {
5018 if (c1 < *pp) return NULL;
5019 if (c1 == *pp++) break;
5020 }
5021 }
5022 }
5023 return src2;
5024 }
5025
5026 #endif /* SUPPORT_UTF && SUPPORT_UCP */
5027
5028 static pcre_uchar *byte_sequence_compare(compiler_common *common, BOOL caseless, pcre_uchar *cc,
5029 compare_context *context, jump_list **backtracks)
5030 {
5031 DEFINE_COMPILER;
5032 unsigned int othercasebit = 0;
5033 pcre_uchar *othercasechar = NULL;
5034 #ifdef SUPPORT_UTF
5035 int utflength;
5036 #endif
5037
5038 if (caseless && char_has_othercase(common, cc))
5039 {
5040 othercasebit = char_get_othercase_bit(common, cc);
5041 SLJIT_ASSERT(othercasebit);
5042 /* Extracting bit difference info. */
5043 #if defined COMPILE_PCRE8
5044 othercasechar = cc + (othercasebit >> 8);
5045 othercasebit &= 0xff;
5046 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5047 /* Note that this code only handles characters in the BMP. If there
5048 ever are characters outside the BMP whose othercase differs in only one
5049 bit from itself (there currently are none), this code will need to be
5050 revised for COMPILE_PCRE32. */
5051 othercasechar = cc + (othercasebit >> 9);
5052 if ((othercasebit & 0x100) != 0)
5053 othercasebit = (othercasebit & 0xff) << 8;
5054 else
5055 othercasebit &= 0xff;
5056 #endif /* COMPILE_PCRE[8|16|32] */
5057 }
5058
5059 if (context->sourcereg == -1)
5060 {
5061 #if defined COMPILE_PCRE8
5062 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
5063 if (context->length >= 4)
5064 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
5065 else if (context->length >= 2)
5066 OP1(SLJIT_MOV_UH, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
5067 else
5068 #endif
5069 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
5070 #elif defined COMPILE_PCRE16
5071 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
5072 if (context->length >= 4)
5073 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
5074 else
5075 #endif
5076 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
5077 #elif defined COMPILE_PCRE32
5078 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
5079 #endif /* COMPILE_PCRE[8|16|32] */
5080 context->sourcereg = TMP2;
5081 }
5082
5083 #ifdef SUPPORT_UTF
5084 utflength = 1;
5085 if (common->utf && HAS_EXTRALEN(*cc))
5086 utflength += GET_EXTRALEN(*cc);
5087
5088 do
5089 {
5090 #endif
5091
5092 context->length -= IN_UCHARS(1);
5093 #if (defined SLJIT_UNALIGNED && SLJIT_UNALIGNED) && (defined COMPILE_PCRE8 || defined COMPILE_PCRE16)
5094
5095 /* Unaligned read is supported. */
5096 if (othercasebit != 0 && othercasechar == cc)
5097 {
5098 context->c.asuchars[context->ucharptr] = *cc | othercasebit;
5099 context->oc.asuchars[context->ucharptr] = othercasebit;
5100 }
5101 else
5102 {
5103 context->c.asuchars[context->ucharptr] = *cc;
5104 context->oc.asuchars[context->ucharptr] = 0;
5105 }
5106 context->ucharptr++;
5107
5108 #if defined COMPILE_PCRE8
5109 if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1))
5110 #else
5111 if (context->ucharptr >= 2 || context->length == 0)
5112 #endif
5113 {
5114 if (context->length >= 4)
5115 OP1(SLJIT_MOV_SI, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
5116 else if (context->length >= 2)
5117 OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
5118 #if defined COMPILE_PCRE8
5119 else if (context->length >= 1)
5120 OP1(SLJIT_MOV_UB, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
5121 #endif /* COMPILE_PCRE8 */
5122 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
5123
5124 switch(context->ucharptr)
5125 {
5126 case 4 / sizeof(pcre_uchar):
5127 if (context->oc.asint != 0)
5128 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint);
5129 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint));
5130 break;
5131
5132 case 2 / sizeof(pcre_uchar):
5133 if (context->oc.asushort != 0)
5134 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort);
5135 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort));
5136 break;
5137
5138 #ifdef COMPILE_PCRE8
5139 case 1:
5140 if (context->oc.asbyte != 0)
5141 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte);
5142 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte));
5143 break;
5144 #endif
5145
5146 default:
5147 SLJIT_ASSERT_STOP();
5148 break;
5149 }
5150 context->ucharptr = 0;
5151 }
5152
5153 #else
5154
5155 /* Unaligned read is unsupported or in 32 bit mode. */
5156 if (context->length >= 1)
5157 OP1(MOV_UCHAR, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
5158
5159 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
5160
5161 if (othercasebit != 0 && othercasechar == cc)
5162 {
5163 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit);
5164 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit));
5165 }
5166 else
5167 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc));
5168
5169 #endif
5170
5171 cc++;
5172 #ifdef SUPPORT_UTF
5173 utflength--;
5174 }
5175 while (utflength > 0);
5176 #endif
5177
5178 return cc;
5179 }
5180
5181 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
5182
5183 #define SET_TYPE_OFFSET(value) \
5184 if ((value) != typeoffset) \
5185 { \
5186 if ((value) < typeoffset) \
5187 OP2(SLJIT_ADD, typereg, 0, typereg, 0, SLJIT_IMM, typeoffset - (value)); \
5188 else \
5189 OP2(SLJIT_SUB, typereg, 0, typereg, 0, SLJIT_IMM, (value) - typeoffset); \
5190 } \
5191 typeoffset = (value);
5192
5193 #define SET_CHAR_OFFSET(value) \
5194 if ((value) != charoffset) \
5195 { \
5196 if ((value) < charoffset) \
5197 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(charoffset - (value))); \
5198 else \
5199 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)((value) - charoffset)); \
5200 } \
5201 charoffset = (value);
5202
5203 static pcre_uchar *compile_char1_matchingpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **backtracks, BOOL check_str_ptr);
5204
5205 static void compile_xclass_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
5206 {
5207 DEFINE_COMPILER;
5208 jump_list *found = NULL;
5209 jump_list **list = (cc[0] & XCL_NOT) == 0 ? &found : backtracks;
5210 sljit_uw c, charoffset, max = 256, min = READ_CHAR_MAX;
5211 struct sljit_jump *jump = NULL;
5212 pcre_uchar *ccbegin;
5213 int compares, invertcmp, numberofcmps;
5214 #if defined SUPPORT_UTF && (defined COMPILE_PCRE8 || defined COMPILE_PCRE16)
5215 BOOL utf = common->utf;
5216 #endif
5217
5218 #ifdef SUPPORT_UCP
5219 BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE;
5220 BOOL charsaved = FALSE;
5221 int typereg = TMP1;
5222 const sljit_ui *other_cases;
5223 sljit_uw typeoffset;
5224 #endif
5225
5226 /* Scanning the necessary info. */
5227 cc++;
5228 ccbegin = cc;
5229 compares = 0;
5230 if (cc[-1] & XCL_MAP)
5231 {
5232 min = 0;
5233 cc += 32 / sizeof(pcre_uchar);
5234 }
5235
5236 while (*cc != XCL_END)
5237 {
5238 compares++;
5239 if (*cc == XCL_SINGLE)
5240 {
5241 cc ++;
5242 GETCHARINCTEST(c, cc);
5243 if (c > max) max = c;
5244 if (c < min) min = c;
5245 #ifdef SUPPORT_UCP
5246 needschar = TRUE;
5247 #endif
5248 }
5249 else if (*cc == XCL_RANGE)
5250 {
5251 cc ++;
5252 GETCHARINCTEST(c, cc);
5253 if (c < min) min = c;
5254 GETCHARINCTEST(c, cc);
5255 if (c > max) max = c;
5256 #ifdef SUPPORT_UCP
5257 needschar = TRUE;
5258 #endif
5259 }
5260 #ifdef SUPPORT_UCP
5261 else
5262 {
5263 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
5264 cc++;
5265 if (*cc == PT_CLIST)
5266 {
5267 other_cases = PRIV(ucd_caseless_sets) + cc[1];
5268 while (*other_cases != NOTACHAR)
5269 {
5270 if (*other_cases > max) max = *other_cases;
5271 if (*other_cases < min) min = *other_cases;
5272 other_cases++;
5273 }
5274 }
5275 else
5276 {
5277 max = READ_CHAR_MAX;
5278 min = 0;
5279 }
5280
5281 switch(*cc)
5282 {
5283 case PT_ANY:
5284 /* Any either accepts everything or ignored. */
5285 if (cc[-1] == XCL_PROP)
5286 {
5287 compile_char1_matchingpath(common, OP_ALLANY, cc, backtracks, FALSE);
5288 if (list == backtracks)
5289 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5290 return;
5291 }
5292 break;
5293
5294 case PT_LAMP:
5295 case PT_GC:
5296 case PT_PC:
5297 case PT_ALNUM:
5298 needstype = TRUE;
5299 break;
5300
5301 case PT_SC:
5302 needsscript = TRUE;
5303 break;
5304
5305 case PT_SPACE:
5306 case PT_PXSPACE:
5307 case PT_WORD:
5308 case PT_PXGRAPH:
5309 case PT_PXPRINT:
5310 case PT_PXPUNCT:
5311 needstype = TRUE;
5312 needschar = TRUE;
5313 break;
5314
5315 case PT_CLIST:
5316 case PT_UCNC:
5317 needschar = TRUE;
5318 break;
5319
5320 default:
5321 SLJIT_ASSERT_STOP();
5322 break;
5323 }
5324 cc += 2;
5325 }
5326 #endif
5327 }
5328 SLJIT_ASSERT(compares > 0);
5329
5330 /* We are not necessary in utf mode even in 8 bit mode. */
5331 cc = ccbegin;
5332 read_char_range(common, min, max, (cc[-1] & XCL_NOT) != 0);
5333
5334 if ((cc[-1] & XCL_HASPROP) == 0)
5335 {
5336 if ((cc[-1] & XCL_MAP) != 0)
5337 {
5338 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
5339 if (!check_class_ranges(common, (const sljit_ub *)cc, (((const sljit_ub *)cc)[31] & 0x80) != 0, TRUE, &found))
5340 {
5341 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
5342 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
5343 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
5344 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
5345 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
5346 add_jump(compiler, &found, JUMP(SLJIT_NOT_ZERO));
5347 }
5348
5349 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5350 JUMPHERE(jump);
5351
5352 cc += 32 / sizeof(pcre_uchar);
5353 }
5354 else
5355 {
5356 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, min);
5357 add_jump(compiler, (cc[-1] & XCL_NOT) == 0 ? backtracks : &found, CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, max - min));
5358 }
5359 }
5360 else if ((cc[-1] & XCL_MAP) != 0)
5361 {
5362 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
5363 #ifdef SUPPORT_UCP
5364 charsaved = TRUE;
5365 #endif
5366 if (!check_class_ranges(common, (const pcre_uint8 *)cc, FALSE, TRUE, list))
5367 {
5368 #ifdef COMPILE_PCRE8
5369 jump = NULL;
5370 if (common->utf)
5371 #endif
5372 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
5373
5374 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
5375 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
5376 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
5377 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
5378 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
5379 add_jump(compiler, list, JUMP(SLJIT_NOT_ZERO));
5380
5381 #ifdef COMPILE_PCRE8
5382 if (common->utf)
5383 #endif
5384 JUMPHERE(jump);
5385 }
5386
5387 OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
5388 cc += 32 / sizeof(pcre_uchar);
5389 }
5390
5391 #ifdef SUPPORT_UCP
5392 if (needstype || needsscript)
5393 {
5394 if (needschar && !charsaved)
5395 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
5396
5397 OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
5398 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
5399 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
5400 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
5401 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5402 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
5403 OP1(SLJIT_MOV_UH, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
5404
5405 /* Before anything else, we deal with scripts. */
5406 if (needsscript)
5407 {
5408 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
5409 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
5410
5411 ccbegin = cc;
5412
5413 while (*cc != XCL_END)
5414 {
5415 if (*cc == XCL_SINGLE)
5416 {
5417 cc ++;
5418 GETCHARINCTEST(c, cc);
5419 }
5420 else if (*cc == XCL_RANGE)
5421 {
5422 cc ++;
5423 GETCHARINCTEST(c, cc);
5424 GETCHARINCTEST(c, cc);
5425 }
5426 else
5427 {
5428 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
5429 cc++;
5430 if (*cc == PT_SC)
5431 {
5432 compares--;
5433 invertcmp = (compares == 0 && list != backtracks);
5434 if (cc[-1] == XCL_NOTPROP)
5435 invertcmp ^= 0x1;
5436 jump = CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (int)cc[1]);
5437 add_jump(compiler, compares > 0 ? list : backtracks, jump);
5438 }
5439 cc += 2;
5440 }
5441 }
5442
5443 cc = ccbegin;
5444 }
5445
5446 if (needschar)
5447 {
5448 OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
5449 }
5450
5451 if (needstype)
5452 {
5453 if (!needschar)
5454 {
5455 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
5456 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
5457 }
5458 else
5459 {
5460 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
5461 OP1(SLJIT_MOV_UB, RETURN_ADDR, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
5462 typereg = RETURN_ADDR;
5463 }
5464 }
5465 }
5466 #endif
5467
5468 /* Generating code. */
5469 charoffset = 0;
5470 numberofcmps = 0;
5471 #ifdef SUPPORT_UCP
5472 typeoffset = 0;
5473 #endif
5474
5475 while (*cc != XCL_END)
5476 {
5477 compares--;
5478 invertcmp = (compares == 0 && list != backtracks);
5479 jump = NULL;
5480
5481 if (*cc == XCL_SINGLE)
5482 {
5483 cc ++;
5484 GETCHARINCTEST(c, cc);
5485
5486 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
5487 {
5488 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5489 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_EQUAL);
5490 numberofcmps++;
5491 }
5492 else if (numberofcmps > 0)
5493 {
5494 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5495 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5496 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5497 numberofcmps = 0;
5498 }
5499 else
5500 {
5501 jump = CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5502 numberofcmps = 0;
5503 }
5504 }
5505 else if (*cc == XCL_RANGE)
5506 {
5507 cc ++;
5508 GETCHARINCTEST(c, cc);
5509 SET_CHAR_OFFSET(c);
5510 GETCHARINCTEST(c, cc);
5511
5512 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
5513 {
5514 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5515 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_LESS_EQUAL);
5516 numberofcmps++;
5517 }
5518 else if (numberofcmps > 0)
5519 {
5520 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5521 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
5522 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5523 numberofcmps = 0;
5524 }
5525 else
5526 {
5527 jump = CMP(SLJIT_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5528 numberofcmps = 0;
5529 }
5530 }
5531 #ifdef SUPPORT_UCP
5532 else
5533 {
5534 if (*cc == XCL_NOTPROP)
5535 invertcmp ^= 0x1;
5536 cc++;
5537 switch(*cc)
5538 {
5539 case PT_ANY:
5540 if (!invertcmp)
5541 jump = JUMP(SLJIT_JUMP);
5542 break;
5543
5544 case PT_LAMP:
5545 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - typeoffset);
5546 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5547 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ll - typeoffset);
5548 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5549 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lt - typeoffset);
5550 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5551 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5552 break;
5553
5554 case PT_GC:
5555 c = PRIV(ucp_typerange)[(int)cc[1] * 2];
5556 SET_TYPE_OFFSET(c);
5557 jump = CMP(SLJIT_LESS_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, PRIV(ucp_typerange)[(int)cc[1] * 2 + 1] - c);
5558 break;
5559
5560 case PT_PC:
5561 jump = CMP(SLJIT_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, (int)cc[1] - typeoffset);
5562 break;
5563
5564 case PT_SC:
5565 compares++;
5566 /* Do nothing. */
5567 break;
5568
5569 case PT_SPACE:
5570 case PT_PXSPACE:
5571 SET_CHAR_OFFSET(9);
5572 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd - 0x9);
5573 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
5574
5575 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x9);
5576 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5577
5578 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x9);
5579 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5580
5581 SET_TYPE_OFFSET(ucp_Zl);
5582 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl);
5583 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
5584 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5585 break;
5586
5587 case PT_WORD:
5588 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_UNDERSCORE - charoffset));
5589 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5590 /* Fall through. */
5591
5592 case PT_ALNUM:
5593 SET_TYPE_OFFSET(ucp_Ll);
5594 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
5595 OP_FLAGS((*cc == PT_ALNUM) ? SLJIT_MOV : SLJIT_OR, TMP2, 0, (*cc == PT_ALNUM) ? SLJIT_UNUSED : TMP2, 0, SLJIT_LESS_EQUAL);
5596 SET_TYPE_OFFSET(ucp_Nd);
5597 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_No - ucp_Nd);
5598 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
5599 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5600 break;
5601
5602 case PT_CLIST:
5603 other_cases = PRIV(ucd_caseless_sets) + cc[1];
5604
5605 /* At least three characters are required.
5606 Otherwise this case would be handled by the normal code path. */
5607 SLJIT_ASSERT(other_cases[0] != NOTACHAR && other_cases[1] != NOTACHAR && other_cases[2] != NOTACHAR);
5608 SLJIT_ASSERT(other_cases[0] < other_cases[1] && other_cases[1] < other_cases[2]);
5609
5610 /* Optimizing character pairs, if their difference is power of 2. */
5611 if (is_powerof2(other_cases[1] ^ other_cases[0]))
5612 {
5613 if (charoffset == 0)
5614 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
5615 else
5616 {
5617 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
5618 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
5619 }
5620 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[1]);
5621 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5622 other_cases += 2;
5623 }
5624 else if (is_powerof2(other_cases[2] ^ other_cases[1]))
5625 {
5626 if (charoffset == 0)
5627 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[2] ^ other_cases[1]);
5628 else
5629 {
5630 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
5631 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
5632 }
5633 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[2]);
5634 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5635
5636 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(other_cases[0] - charoffset));
5637 OP_FLAGS(SLJIT_OR | ((other_cases[3] == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5638
5639 other_cases += 3;
5640 }
5641 else
5642 {
5643 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
5644 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5645 }
5646
5647 while (*other_cases != NOTACHAR)
5648 {
5649 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
5650 OP_FLAGS(SLJIT_OR | ((*other_cases == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5651 }
5652 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5653 break;
5654
5655 case PT_UCNC:
5656 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_DOLLAR_SIGN - charoffset));
5657 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5658 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_COMMERCIAL_AT - charoffset));
5659 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5660 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_GRAVE_ACCENT - charoffset));
5661 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5662
5663 SET_CHAR_OFFSET(0xa0);
5664 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(0xd7ff - charoffset));
5665 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
5666 SET_CHAR_OFFSET(0);
5667 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xe000 - 0);
5668 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_GREATER_EQUAL);
5669 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5670 break;
5671
5672 case PT_PXGRAPH:
5673 /* C and Z groups are the farthest two groups. */
5674 SET_TYPE_OFFSET(ucp_Ll);
5675 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
5676 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_GREATER);
5677
5678 jump = CMP(SLJIT_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
5679
5680 /* In case of ucp_Cf, we overwrite the result. */
5681 SET_CHAR_OFFSET(0x2066);
5682 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
5683 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
5684
5685 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
5686 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5687
5688 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x2066);
5689 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5690
5691 JUMPHERE(jump);
5692 jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
5693 break;
5694
5695 case PT_PXPRINT:
5696 /* C and Z groups are the farthest two groups. */
5697 SET_TYPE_OFFSET(ucp_Ll);
5698 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
5699 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_GREATER);
5700
5701 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Ll);
5702 OP_FLAGS(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_NOT_EQUAL);
5703
5704 jump = CMP(SLJIT_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
5705
5706 /* In case of ucp_Cf, we overwrite the result. */
5707 SET_CHAR_OFFSET(0x2066);
5708 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
5709 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
5710
5711 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
5712 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5713
5714 JUMPHERE(jump);
5715 jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
5716 break;
5717
5718 case PT_PXPUNCT:
5719 SET_TYPE_OFFSET(ucp_Sc);
5720 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Sc);
5721 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
5722
5723 SET_CHAR_OFFSET(0);
5724 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x7f);
5725 OP_FLAGS(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
5726
5727 SET_TYPE_OFFSET(ucp_Pc);
5728 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ps - ucp_Pc);
5729 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
5730 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5731 break;
5732
5733 default:
5734 SLJIT_ASSERT_STOP();
5735 break;
5736 }
5737 cc += 2;
5738 }
5739 #endif
5740
5741 if (jump != NULL)
5742 add_jump(compiler, compares > 0 ? list : backtracks, jump);
5743 }
5744
5745 if (found != NULL)
5746 set_jumps(found, LABEL());
5747 }
5748
5749 #undef SET_TYPE_OFFSET
5750 #undef SET_CHAR_OFFSET
5751
5752 #endif
5753
5754 static pcre_uchar *compile_simple_assertion_matchingpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **backtracks)
5755 {
5756 DEFINE_COMPILER;
5757 int length;
5758 struct sljit_jump *jump[4];
5759 #ifdef SUPPORT_UTF
5760 struct sljit_label *label;
5761 #endif /* SUPPORT_UTF */
5762
5763 switch(type)
5764 {
5765 case OP_SOD:
5766 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5767 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
5768 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
5769 return cc;
5770
5771 case OP_SOM:
5772 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5773 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
5774 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
5775 return cc;
5776
5777 case OP_NOT_WORD_BOUNDARY:
5778 case OP_WORD_BOUNDARY:
5779 add_jump(compiler, &common->wordboundary, JUMP(SLJIT_FAST_CALL));
5780 add_jump(compiler, backtracks, JUMP(type == OP_NOT_WORD_BOUNDARY ? SLJIT_NOT_ZERO : SLJIT_ZERO));
5781 return cc;
5782
5783 case OP_EODN:
5784 /* Requires rather complex checks. */
5785 jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5786 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
5787 {
5788 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
5789 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5790 if (common->mode == JIT_COMPILE)
5791 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0));
5792 else
5793 {
5794 jump[1] = CMP(SLJIT_EQUAL, TMP2, 0, STR_END, 0);
5795 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
5796 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS);
5797 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
5798 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_NOT_EQUAL);
5799 add_jump(compiler, backtracks, JUMP(SLJIT_NOT_EQUAL));
5800 check_partial(common, TRUE);
5801 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5802 JUMPHERE(jump[1]);
5803 }
5804 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
5805 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
5806 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
5807 }
5808 else if (common->nltype == NLTYPE_FIXED)
5809 {
5810 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5811 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5812 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0));
5813 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
5814 }
5815 else
5816 {
5817 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5818 jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
5819 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
5820 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
5821 jump[2] = JUMP(SLJIT_GREATER);
5822 add_jump(compiler, backtracks, JUMP(SLJIT_LESS));
5823 /* Equal. */
5824 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
5825 jump[3] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
5826 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5827
5828 JUMPHERE(jump[1]);
5829 if (common->nltype == NLTYPE_ANYCRLF)
5830 {
5831 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5832 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, STR_END, 0));
5833 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
5834 }
5835 else
5836 {
5837 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, STR_PTR, 0);
5838 read_char_range(common, common->nlmin, common->nlmax, TRUE);
5839 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
5840 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
5841 add_jump(compiler, backtracks, JUMP(SLJIT_ZERO));
5842 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
5843 }
5844 JUMPHERE(jump[2]);
5845 JUMPHERE(jump[3]);
5846 }
5847 JUMPHERE(jump[0]);
5848 check_partial(common, FALSE);
5849 return cc;
5850
5851 case OP_EOD:
5852 add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
5853 check_partial(common, FALSE);
5854 return cc;
5855
5856 case OP_DOLL:
5857 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
5858 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
5859 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5860
5861 if (!common->endonly)
5862 compile_simple_assertion_matchingpath(common, OP_EODN, cc, backtracks);
5863 else
5864 {
5865 add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
5866 check_partial(common, FALSE);
5867 }
5868 return cc;
5869
5870 case OP_DOLLM:
5871 jump[1] = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
5872 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
5873 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
5874 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5875 check_partial(common, FALSE);
5876 jump[0] = JUMP(SLJIT_JUMP);
5877 JUMPHERE(jump[1]);
5878
5879 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
5880 {
5881 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
5882 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5883 if (common->mode == JIT_COMPILE)
5884 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, TMP2, 0, STR_END, 0));
5885 else
5886 {
5887 jump[1] = CMP(SLJIT_LESS_EQUAL, TMP2, 0, STR_END, 0);
5888 /* STR_PTR = STR_END - IN_UCHARS(1) */
5889 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
5890 check_partial(common, TRUE);
5891 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5892 JUMPHERE(jump[1]);
5893 }
5894
5895 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
5896 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
5897 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
5898 }
5899 else
5900 {
5901 peek_char(common, common->nlmax);
5902 check_newlinechar(common, common->nltype, backtracks, FALSE);
5903 }
5904 JUMPHERE(jump[0]);
5905 return cc;
5906
5907 case OP_CIRC:
5908 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
5909 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
5910 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0));
5911 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
5912 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5913 return cc;
5914
5915 case OP_CIRCM:
5916 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
5917 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
5918 jump[1] = CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0);
5919 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
5920 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5921 jump[0] = JUMP(SLJIT_JUMP);
5922 JUMPHERE(jump[1]);
5923
5924 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
5925 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
5926 {
5927 OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
5928 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, TMP1, 0));
5929 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
5930 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
5931 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
5932 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
5933 }
5934 else
5935 {
5936 skip_char_back(common);
5937 read_char_range(common, common->nlmin, common->nlmax, TRUE);
5938 check_newlinechar(common, common->nltype, backtracks, FALSE);
5939 }
5940 JUMPHERE(jump[0]);
5941 return cc;
5942
5943 case OP_REVERSE:
5944 length = GET(cc, 0);
5945 if (length == 0)
5946 return cc + LINK_SIZE;
5947 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5948 #ifdef SUPPORT_UTF
5949 if (common->utf)
5950 {
5951 OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
5952 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, length);
5953 label = LABEL();
5954 add_jump(compiler, backtracks, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP3, 0));
5955 skip_char_back(common);
5956 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
5957 JUMPTO(SLJIT_NOT_ZERO, label);
5958 }
5959 else
5960 #endif
5961 {
5962 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
5963 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
5964 add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, TMP1, 0));
5965 }
5966 check_start_used_ptr(common);
5967 return cc + LINK_SIZE;
5968 }
5969 SLJIT_ASSERT_STOP();
5970 return cc;
5971 }
5972
5973 static pcre_uchar *compile_char1_matchingpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **backtracks, BOOL check_str_ptr)
5974 {
5975 DEFINE_COMPILER;
5976 int length;
5977 unsigned int c, oc, bit;
5978 compare_context context;
5979 struct sljit_jump *jump[3];
5980 jump_list *end_list;
5981 #ifdef SUPPORT_UTF
5982 struct sljit_label *label;
5983 #ifdef SUPPORT_UCP
5984 pcre_uchar propdata[5];
5985 #endif
5986 #endif /* SUPPORT_UTF */
5987
5988 switch(type)
5989 {
5990 case OP_NOT_DIGIT:
5991 case OP_DIGIT:
5992 /* Digits are usually 0-9, so it is worth to optimize them. */
5993 if (check_str_ptr)
5994 detect_partial_match(common, backtracks);
5995 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5996 if (common->utf && is_char7_bitset((const pcre_uint8*)common->ctypes - cbit_length + cbit_digit, FALSE))
5997 read_char7_type(common, type == OP_NOT_DIGIT);
5998 else
5999 #endif
6000 read_char8_type(common, type == OP_NOT_DIGIT);
6001 /* Flip the starting bit in the negative case. */
6002 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit);
6003 add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_ZERO : SLJIT_NOT_ZERO));
6004 return cc;
6005
6006 case OP_NOT_WHITESPACE:
6007 case OP_WHITESPACE:
6008 if (check_str_ptr)
6009 detect_partial_match(common, backtracks);
6010 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
6011 if (common->utf && is_char7_bitset((const pcre_uint8*)common->ctypes - cbit_length + cbit_space, FALSE))
6012 read_char7_type(common, type == OP_NOT_WHITESPACE);
6013 else
6014 #endif
6015 read_char8_type(common, type == OP_NOT_WHITESPACE);
6016 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space);
6017 add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_ZERO : SLJIT_NOT_ZERO));
6018 return cc;
6019
6020 case OP_NOT_WORDCHAR:
6021 case OP_WORDCHAR:
6022 if (check_str_ptr)
6023 detect_partial_match(common, backtracks);
6024 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
6025 if (common->utf && is_char7_bitset((const pcre_uint8*)common->ctypes - cbit_length + cbit_word, FALSE))
6026 read_char7_type(common, type == OP_NOT_WORDCHAR);
6027 else
6028 #endif
6029 read_char8_type(common, type == OP_NOT_WORDCHAR);
6030 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word);
6031 add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_ZERO : SLJIT_NOT_ZERO));
6032 return cc;
6033
6034 case OP_ANY:
6035 if (check_str_ptr)
6036 detect_partial_match(common, backtracks);
6037 read_char_range(common, common->nlmin, common->nlmax, TRUE);
6038 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
6039 {
6040 jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
6041 end_list = NULL;
6042 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
6043 add_jump(compiler, &end_list, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
6044 else
6045 check_str_end(common, &end_list);
6046
6047 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6048 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff));
6049 set_jumps(end_list, LABEL());
6050 JUMPHERE(jump[0]);
6051 }
6052 else
6053 check_newlinechar(common, common->nltype, backtracks, TRUE);
6054 return cc;
6055
6056 case OP_ALLANY:
6057 if (check_str_ptr)
6058 detect_partial_match(common, backtracks);
6059 #ifdef SUPPORT_UTF
6060 if (common->utf)
6061 {
6062 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6063 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6064 #if defined COMPILE_PCRE8 || defined COMPILE_PCRE16
6065 #if defined COMPILE_PCRE8
6066 jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
6067 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
6068 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
6069 #elif defined COMPILE_PCRE16
6070 jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
6071 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
6072 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
6073 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
6074 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
6075 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
6076 #endif
6077 JUMPHERE(jump[0]);
6078 #endif /* COMPILE_PCRE[8|16] */
6079 return cc;
6080 }
6081 #endif
6082 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6083 return cc;
6084
6085 case OP_ANYBYTE:
6086 if (check_str_ptr)
6087 detect_partial_match(common, backtracks);
6088 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6089 return cc;
6090
6091 #ifdef SUPPORT_UTF
6092 #ifdef SUPPORT_UCP
6093 case OP_NOTPROP:
6094 case OP_PROP:
6095 propdata[0] = XCL_HASPROP;
6096 propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP;
6097 propdata[2] = cc[0];
6098 propdata[3] = cc[1];
6099 propdata[4] = XCL_END;
6100 if (check_str_ptr)
6101 detect_partial_match(common, backtracks);
6102 compile_xclass_matchingpath(common, propdata, backtracks);
6103 return cc + 2;
6104 #endif
6105 #endif
6106
6107 case OP_ANYNL:
6108 if (check_str_ptr)
6109 detect_partial_match(common, backtracks);
6110 read_char_range(common, common->bsr_nlmin, common->bsr_nlmax, FALSE);
6111 jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
6112 /* We don't need to handle soft partial matching case. */
6113 end_list = NULL;
6114 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
6115 add_jump(compiler, &end_list, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
6116 else
6117 check_str_end(common, &end_list);
6118 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6119 jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
6120 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6121 jump[2] = JUMP(SLJIT_JUMP);
6122 JUMPHERE(jump[0]);
6123 check_newlinechar(common, common->bsr_nltype, backtracks, FALSE);
6124 set_jumps(end_list, LABEL());
6125 JUMPHERE(jump[1]);
6126 JUMPHERE(jump[2]);
6127 return cc;
6128
6129 case OP_NOT_HSPACE:
6130 case OP_HSPACE:
6131 if (check_str_ptr)
6132 detect_partial_match(common, backtracks);
6133 read_char_range(common, 0x9, 0x3000, type == OP_NOT_HSPACE);
6134 add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));
6135 add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO));
6136 return cc;
6137
6138 case OP_NOT_VSPACE:
6139 case OP_VSPACE:
6140 if (check_str_ptr)
6141 detect_partial_match(common, backtracks);
6142 read_char_range(common, 0xa, 0x2029, type == OP_NOT_VSPACE);
6143 add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));
6144 add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO));
6145 return cc;
6146
6147 #ifdef SUPPORT_UCP
6148 case OP_EXTUNI:
6149 if (check_str_ptr)
6150 detect_partial_match(common, backtracks);
6151 read_char(common);
6152 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
6153 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
6154 /* Optimize register allocation: use a real register. */
6155 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STACK_TOP, 0);
6156 OP1(SLJIT_MOV_UB, STACK_TOP, 0, SLJIT_MEM2(TMP1, TMP2), 3);
6157
6158 label = LABEL();
6159 jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6160 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
6161 read_char(common);
6162 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
6163 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
6164 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM2(TMP1, TMP2), 3);
6165
6166 OP2(SLJIT_SHL, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2);
6167 OP1(SLJIT_MOV_UI, TMP1, 0, SLJIT_MEM1(STACK_TOP), (sljit_sw)PRIV(ucp_gbtable));
6168 OP1(SLJIT_MOV, STACK_TOP, 0, TMP2, 0);
6169 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
6170 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
6171 JUMPTO(SLJIT_NOT_ZERO, label);
6172
6173 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
6174 JUMPHERE(jump[0]);
6175 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6176
6177 if (common->mode == JIT_PARTIAL_HARD_COMPILE)
6178 {
6179 jump[0] = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
6180 /* Since we successfully read a char above, partial matching must occure. */
6181 check_partial(common, TRUE);
6182 JUMPHERE(jump[0]);
6183 }
6184 return cc;
6185 #endif
6186
6187 case OP_CHAR:
6188 case OP_CHARI:
6189 length = 1;
6190 #ifdef SUPPORT_UTF
6191 if (common->utf && HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc);
6192 #endif
6193 if (common->mode == JIT_COMPILE && (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0))
6194 {
6195 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
6196 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
6197
6198 context.length = IN_UCHARS(length);
6199 context.sourcereg = -1;
6200 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
6201 context.ucharptr = 0;
6202 #endif
6203 return byte_sequence_compare(common, type == OP_CHARI, cc, &context, backtracks);
6204 }
6205
6206 if (check_str_ptr)
6207 detect_partial_match(common, backtracks);
6208 #ifdef SUPPORT_UTF
6209 if (common->utf)
6210 {
6211 GETCHAR(c, cc);
6212 }
6213 else
6214 #endif
6215 c = *cc;
6216
6217 if (type == OP_CHAR || !char_has_othercase(common, cc))
6218 {
6219 read_char_range(common, c, c, FALSE);
6220 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c));
6221 return cc + length;
6222 }
6223 oc = char_othercase(common, c);
6224 read_char_range(common, c < oc ? c : oc, c > oc ? c : oc, FALSE);
6225 bit = c ^ oc;
6226 if (is_powerof2(bit))
6227 {
6228 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
6229 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
6230 return cc + length;
6231 }
6232 jump[0] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c);
6233 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, oc));
6234 JUMPHERE(jump[0]);
6235 return cc + length;
6236
6237 case OP_NOT:
6238 case OP_NOTI:
6239 if (check_str_ptr)
6240 detect_partial_match(common, backtracks);
6241 length = 1;
6242 #ifdef SUPPORT_UTF
6243 if (common->utf)
6244 {
6245 #ifdef COMPILE_PCRE8
6246 c = *cc;
6247 if (c < 128)
6248 {
6249 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6250 if (type == OP_NOT || !char_has_othercase(common, cc))
6251 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
6252 else
6253 {
6254 /* Since UTF8 code page is fixed, we know that c is in [a-z] or [A-Z] range. */
6255 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x20);
6256 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20));
6257 }
6258 /* Skip the variable-length character. */
6259 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6260 jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
6261 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
6262 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
6263 JUMPHERE(jump[0]);
6264 return cc + 1;
6265 }
6266 else
6267 #endif /* COMPILE_PCRE8 */
6268 {
6269 GETCHARLEN(c, cc, length);
6270 }
6271 }
6272 else
6273 #endif /* SUPPORT_UTF */
6274 c = *cc;
6275
6276 if (type == OP_NOT || !char_has_othercase(common, cc))
6277 {
6278 read_char_range(common, c, c, TRUE);
6279 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
6280 }
6281 else
6282 {
6283 oc = char_othercase(common, c);
6284 read_char_range(common, c < oc ? c : oc, c > oc ? c : oc, TRUE);
6285 bit = c ^ oc;
6286 if (is_powerof2(bit))
6287 {
6288 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
6289 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
6290 }
6291 else
6292 {
6293 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
6294 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, oc));
6295 }
6296 }
6297 return cc + length;
6298
6299 case OP_CLASS:
6300 case OP_NCLASS:
6301 if (check_str_ptr)
6302 detect_partial_match(common, backtracks);
6303
6304 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
6305 bit = (common->utf && is_char7_bitset((const sljit_ub *)cc, type == OP_NCLASS)) ? 127 : 255;
6306 read_char_range(common, 0, bit, type == OP_NCLASS);
6307 #else
6308 read_char_range(common, 0, 255, type == OP_NCLASS);
6309 #endif
6310
6311 if (check_class_ranges(common, (const sljit_ub *)cc, type == OP_NCLASS, FALSE, backtracks))
6312 return cc + 32 / sizeof(pcre_uchar);
6313
6314 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
6315 jump[0] = NULL;
6316 if (common->utf)
6317 {
6318 jump[0] = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, bit);
6319 if (type == OP_CLASS)
6320 {
6321 add_jump(compiler, backtracks, jump[0]);
6322 jump[0] = NULL;
6323 }
6324 }
6325 #elif !defined COMPILE_PCRE8
6326 jump[0] = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
6327 if (type == OP_CLASS)
6328 {
6329 add_jump(compiler, backtracks, jump[0]);
6330 jump[0] = NULL;
6331 }
6332 #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
6333
6334 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
6335 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
6336 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
6337 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
6338 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
6339 add_jump(compiler, backtracks, JUMP(SLJIT_ZERO));
6340
6341 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
6342 if (jump[0] != NULL)
6343 JUMPHERE(jump[0]);
6344 #endif
6345 return cc + 32 / sizeof(pcre_uchar);
6346
6347 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
6348 case OP_XCLASS:
6349 if (check_str_ptr)
6350 detect_partial_match(common, backtracks);
6351 compile_xclass_matchingpath(common, cc + LINK_SIZE, backtracks);
6352 return cc + GET(cc, 0) - 1;
6353 #endif
6354 }
6355 SLJIT_ASSERT_STOP();
6356 return cc;
6357 }
6358
6359 static SLJIT_INLINE pcre_uchar *compile_charn_matchingpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, jump_list **backtracks)
6360 {
6361 /* This function consumes at least one input character. */
6362 /* To decrease the number of length checks, we try to concatenate the fixed length character sequences. */
6363 DEFINE_COMPILER;
6364 pcre_uchar *ccbegin = cc;
6365 compare_context context;
6366 int size;
6367
6368 context.length = 0;
6369 do
6370 {
6371 if (cc >= ccend)
6372 break;
6373
6374 if (*cc == OP_CHAR)
6375 {
6376 size = 1;
6377 #ifdef SUPPORT_UTF
6378 if (common->utf && HAS_EXTRALEN(cc[1]))
6379 size += GET_EXTRALEN(cc[1]);
6380 #endif
6381 }
6382 else if (*cc == OP_CHARI)
6383 {
6384 size = 1;
6385 #ifdef SUPPORT_UTF
6386 if (common->utf)
6387 {
6388 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
6389 size = 0;
6390 else if (HAS_EXTRALEN(cc[1]))
6391 size += GET_EXTRALEN(cc[1]);
6392 }
6393 else
6394 #endif
6395 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
6396 size = 0;
6397 }
6398 else
6399 size = 0;
6400
6401 cc += 1 + size;
6402 context.length += IN_UCHARS(size);
6403 }
6404 while (size > 0 && context.length <= 128);
6405
6406 cc = ccbegin;
6407 if (context.length > 0)
6408 {
6409 /* We have a fixed-length byte sequence. */
6410 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, context.length);
6411 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
6412
6413 context.sourcereg = -1;
6414 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
6415 context.ucharptr = 0;
6416 #endif
6417 do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, backtracks); while (context.length > 0);
6418 return cc;
6419 }
6420
6421 /* A non-fixed length character will be checked if length == 0. */
6422 return compile_char1_matchingpath(common, *cc, cc + 1, backtracks, TRUE);
6423 }
6424
6425 /* Forward definitions. */
6426 static void compile_matchingpath(compiler_common *, pcre_uchar *, pcre_uchar *, backtrack_common *);
6427 static void compile_backtrackingpath(compiler_common *, struct backtrack_common *);
6428
6429 #define PUSH_BACKTRACK(size, ccstart, error) \
6430 do \
6431 { \
6432 backtrack = sljit_alloc_memory(compiler, (size)); \
6433 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
6434 return error; \
6435 memset(backtrack, 0, size); \
6436 backtrack->prev = parent->top; \
6437 backtrack->cc = (ccstart); \
6438 parent->top = backtrack; \
6439 } \
6440 while (0)
6441
6442 #define PUSH_BACKTRACK_NOVALUE(size, ccstart) \
6443 do \
6444 { \
6445 backtrack = sljit_alloc_memory(compiler, (size)); \
6446 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
6447 return; \
6448 memset(backtrack, 0, size); \
6449 backtrack->prev = parent->top; \
6450 backtrack->cc = (ccstart); \
6451 parent->top = backtrack; \
6452 } \
6453 while (0)
6454
6455 #define BACKTRACK_AS(type) ((type *)backtrack)
6456
6457 static void compile_dnref_search(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
6458 {
6459 /* The OVECTOR offset goes to TMP2. */
6460 DEFINE_COMPILER;
6461 int count = GET2(cc, 1 + IMM2_SIZE);
6462 pcre_uchar *slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
6463 unsigned int offset;
6464 jump_list *found = NULL;
6465
6466 SLJIT_ASSERT(*cc == OP_DNREF || *cc == OP_DNREFI);
6467
6468 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
6469
6470 count--;
6471 while (count-- > 0)
6472 {
6473 offset = GET2(slot, 0) << 1;
6474 GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
6475 add_jump(compiler, &found, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0));
6476 slot += common->name_entry_size;
6477 }
6478
6479 offset = GET2(slot, 0) << 1;
6480 GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
6481 if (backtracks != NULL && !common->jscript_compat)
6482 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0));
6483
6484 set_jumps(found, LABEL());
6485 }
6486
6487 static void compile_ref_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks, BOOL withchecks, BOOL emptyfail)
6488 {
6489 DEFINE_COMPILER;
6490 BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
6491 int offset = 0;
6492 struct sljit_jump *jump = NULL;
6493 struct sljit_jump *partial;
6494 struct sljit_jump *nopartial;
6495
6496 if (ref)
6497 {
6498 offset = GET2(cc, 1) << 1;
6499 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
6500 /* OVECTOR(1) contains the "string begin - 1" constant. */
6501 if (withchecks && !common->jscript_compat)
6502 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
6503 }
6504 else
6505 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
6506
6507 #if defined SUPPORT_UTF && defined SUPPORT_UCP
6508 if (common->utf && *cc == OP_REFI)
6509 {
6510 SLJIT_ASSERT(TMP1 == SLJIT_R0 && STACK_TOP == SLJIT_R1 && TMP2 == SLJIT_R2);
6511 if (ref)
6512 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
6513 else
6514 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
6515
6516 if (withchecks)
6517 jump = CMP(SLJIT_EQUAL, TMP1, 0, TMP2, 0);
6518
6519 /* Needed to save important temporary registers. */
6520 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STACK_TOP, 0);
6521 OP1(SLJIT_MOV, SLJIT_R1, 0, ARGUMENTS, 0);
6522 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, uchar_ptr), STR_PTR, 0);
6523 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_utf_caselesscmp));
6524 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6525 if (common->mode == JIT_COMPILE)
6526 add_jump(compiler, backtracks, CMP(SLJIT_LESS_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1));
6527 else
6528 {
6529 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
6530 nopartial = CMP(SLJIT_NOT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
6531 check_partial(common, FALSE);
6532 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
6533 JUMPHERE(nopartial);
6534 }
6535 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
6536 }
6537 else
6538 #endif /* SUPPORT_UTF && SUPPORT_UCP */
6539 {
6540 if (ref)
6541 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP1, 0);
6542 else
6543 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
6544
6545 if (withchecks)
6546 jump = JUMP(SLJIT_ZERO);
6547
6548 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
6549 partial = CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0);
6550 if (common->mode == JIT_COMPILE)
6551 add_jump(compiler, backtracks, partial);
6552
6553 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
6554 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
6555
6556 if (common->mode != JIT_COMPILE)
6557 {
6558 nopartial = JUMP(SLJIT_JUMP);
6559 JUMPHERE(partial);
6560 /* TMP2 -= STR_END - STR_PTR */
6561 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, STR_PTR, 0);
6562 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, STR_END, 0);
6563 partial = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0);
6564 OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
6565 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
6566 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
6567 JUMPHERE(partial);
6568 check_partial(common, FALSE);
6569 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
6570 JUMPHERE(nopartial);
6571 }
6572 }
6573
6574 if (jump != NULL)
6575 {
6576 if (emptyfail)
6577 add_jump(compiler, backtracks, jump);
6578 else
6579 JUMPHERE(jump);
6580 }
6581 }
6582
6583 static SLJIT_INLINE pcre_uchar *compile_ref_iterator_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6584 {
6585 DEFINE_COMPILER;
6586 BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
6587 backtrack_common *backtrack;
6588 pcre_uchar type;
6589 int offset = 0;
6590 struct sljit_label *label;
6591 struct sljit_jump *zerolength;
6592 struct sljit_jump *jump = NULL;
6593 pcre_uchar *ccbegin = cc;
6594 int min = 0, max = 0;
6595 BOOL minimize;
6596
6597 PUSH_BACKTRACK(sizeof(ref_iterator_backtrack), cc, NULL);
6598
6599 if (ref)
6600 offset = GET2(cc, 1) << 1;
6601 else
6602 cc += IMM2_SIZE;
6603 type = cc[1 + IMM2_SIZE];
6604
6605 SLJIT_COMPILE_ASSERT((OP_CRSTAR & 0x1) == 0, crstar_opcode_must_be_even);
6606 minimize = (type & 0x1) != 0;
6607 switch(type)
6608 {
6609 case OP_CRSTAR:
6610 case OP_CRMINSTAR:
6611 min = 0;
6612 max = 0;
6613 cc += 1 + IMM2_SIZE + 1;
6614 break;
6615 case OP_CRPLUS:
6616 case OP_CRMINPLUS:
6617 min = 1;
6618 max = 0;
6619 cc += 1 + IMM2_SIZE + 1;
6620 break;
6621 case OP_CRQUERY:
6622 case OP_CRMINQUERY:
6623 min = 0;
6624 max = 1;
6625 cc += 1 + IMM2_SIZE + 1;
6626 break;
6627 case OP_CRRANGE:
6628 case OP_CRMINRANGE:
6629 min = GET2(cc, 1 + IMM2_SIZE + 1);
6630 max = GET2(cc, 1 + IMM2_SIZE + 1 + IMM2_SIZE);
6631 cc += 1 + IMM2_SIZE + 1 + 2 * IMM2_SIZE;
6632 break;
6633 default:
6634 SLJIT_ASSERT_STOP();
6635 break;
6636 }
6637
6638 if (!minimize)
6639 {
6640 if (min == 0)
6641 {
6642 allocate_stack(common, 2);
6643 if (ref)
6644 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
6645 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6646 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
6647 /* Temporary release of STR_PTR. */
6648 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
6649 /* Handles both invalid and empty cases. Since the minimum repeat,
6650 is zero the invalid case is basically the same as an empty case. */
6651 if (ref)
6652 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
6653 else
6654 {
6655 compile_dnref_search(common, ccbegin, NULL);
6656 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
6657 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, TMP2, 0);
6658 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
6659 }
6660 /* Restore if not zero length. */
6661 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
6662 }
6663 else
6664 {
6665 allocate_stack(common, 1);
6666 if (ref)
6667 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
6668 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6669 if (ref)
6670 {
6671 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
6672 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
6673 }
6674 else
6675 {
6676 compile_dnref_search(common, ccbegin, &backtrack->topbacktracks);
6677 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
6678 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, TMP2, 0);
6679 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
6680 }
6681 }
6682
6683 if (min > 1 || max > 1)
6684 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, SLJIT_IMM, 0);
6685
6686 label = LABEL();
6687 if (!ref)
6688 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1);
6689 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, FALSE, FALSE);
6690
6691 if (min > 1 || max > 1)
6692 {
6693 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0);
6694 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
6695 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, TMP1, 0);
6696 if (min > 1)
6697 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min, label);
6698 if (max > 1)
6699 {
6700 jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max);
6701 allocate_stack(common, 1);
6702 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6703 JUMPTO(SLJIT_JUMP, label);
6704 JUMPHERE(jump);
6705 }
6706 }
6707
6708 if (max == 0)
6709 {
6710 /* Includes min > 1 case as well. */
6711 allocate_stack(common, 1);
6712 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6713 JUMPTO(SLJIT_JUMP, label);
6714 }
6715
6716 JUMPHERE(zerolength);
6717 BACKTRACK_AS(ref_iterator_backtrack)->matchingpath = LABEL();
6718
6719 count_match(common);
6720 return cc;
6721 }
6722
6723 allocate_stack(common, ref ? 2 : 3);
6724 if (ref)
6725 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
6726 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6727 if (type != OP_CRMINSTAR)
6728 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
6729
6730 if (min == 0)
6731 {
6732 /* Handles both invalid and empty cases. Since the minimum repeat,
6733 is zero the invalid case is basically the same as an empty case. */
6734 if (ref)
6735 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
6736 else
6737 {
6738 compile_dnref_search(common, ccbegin, NULL);
6739 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
6740 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
6741 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
6742 }
6743 /* Length is non-zero, we can match real repeats. */
6744 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6745 jump = JUMP(SLJIT_JUMP);
6746 }
6747 else
6748 {
6749 if (ref)
6750 {
6751 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
6752 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
6753 }
6754 else
6755 {
6756 compile_dnref_search(common, ccbegin, &backtrack->topbacktracks);
6757 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
6758 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
6759 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
6760 }
6761 }
6762
6763 BACKTRACK_AS(ref_iterator_backtrack)->matchingpath = LABEL();
6764 if (max > 0)
6765 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max));
6766
6767 if (!ref)
6768 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
6769 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, TRUE, TRUE);
6770 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6771
6772 if (min > 1)
6773 {
6774 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
6775 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
6776 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
6777 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min, BACKTRACK_AS(ref_iterator_backtrack)->matchingpath);
6778 }
6779 else if (max > 0)
6780 OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
6781
6782 if (jump != NULL)
6783 JUMPHERE(jump);
6784 JUMPHERE(zerolength);
6785
6786 count_match(common);
6787 return cc;
6788 }
6789
6790 static SLJIT_INLINE pcre_uchar *compile_recurse_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6791 {
6792 DEFINE_COMPILER;
6793 backtrack_common *backtrack;
6794 recurse_entry *entry = common->entries;
6795 recurse_entry *prev = NULL;
6796 sljit_sw start = GET(cc, 1);
6797 pcre_uchar *start_cc;
6798 BOOL needs_control_head;
6799
6800 PUSH_BACKTRACK(sizeof(recurse_backtrack), cc, NULL);
6801
6802 /* Inlining simple patterns. */
6803 if (get_framesize(common, common->start + start, NULL, TRUE, &needs_control_head) == no_stack)
6804 {
6805 start_cc = common->start + start;
6806 compile_matchingpath(common, next_opcode(common, start_cc), bracketend(start_cc) - (1 + LINK_SIZE), backtrack);
6807 BACKTRACK_AS(recurse_backtrack)->inlined_pattern = TRUE;