/[pcre]/code/trunk/pcre_jit_compile.c
ViewVC logotype

Contents of /code/trunk/pcre_jit_compile.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1629 - (show annotations)
Tue Feb 9 14:22:55 2016 UTC (3 years, 9 months ago) by zherczeg
File MIME type: text/plain
File size: 345457 byte(s)
Error occurred while calculating annotation data.
Add SSE2 support for first character search.
1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Written by Philip Hazel
9 Copyright (c) 1997-2013 University of Cambridge
10
11 The machine code generator part (this module) was written by Zoltan Herczeg
12 Copyright (c) 2010-2013
13
14 -----------------------------------------------------------------------------
15 Redistribution and use in source and binary forms, with or without
16 modification, are permitted provided that the following conditions are met:
17
18 * Redistributions of source code must retain the above copyright notice,
19 this list of conditions and the following disclaimer.
20
21 * Redistributions in binary form must reproduce the above copyright
22 notice, this list of conditions and the following disclaimer in the
23 documentation and/or other materials provided with the distribution.
24
25 * Neither the name of the University of Cambridge nor the names of its
26 contributors may be used to endorse or promote products derived from
27 this software without specific prior written permission.
28
29 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 POSSIBILITY OF SUCH DAMAGE.
40 -----------------------------------------------------------------------------
41 */
42
43 #ifdef HAVE_CONFIG_H
44 #include "config.h"
45 #endif
46
47 #include "pcre_internal.h"
48
49 #if defined SUPPORT_JIT
50
51 /* All-in-one: Since we use the JIT compiler only from here,
52 we just include it. This way we don't need to touch the build
53 system files. */
54
55 #define SLJIT_MALLOC(size, allocator_data) (PUBL(malloc))(size)
56 #define SLJIT_FREE(ptr, allocator_data) (PUBL(free))(ptr)
57 #define SLJIT_CONFIG_AUTO 1
58 #define SLJIT_CONFIG_STATIC 1
59 #define SLJIT_VERBOSE 0
60 #define SLJIT_DEBUG 0
61
62 #include "sljit/sljitLir.c"
63
64 #if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED
65 #error Unsupported architecture
66 #endif
67
68 /* Defines for debugging purposes. */
69
70 /* 1 - Use unoptimized capturing brackets.
71 2 - Enable capture_last_ptr (includes option 1). */
72 /* #define DEBUG_FORCE_UNOPTIMIZED_CBRAS 2 */
73
74 /* 1 - Always have a control head. */
75 /* #define DEBUG_FORCE_CONTROL_HEAD 1 */
76
77 /* Allocate memory for the regex stack on the real machine stack.
78 Fast, but limited size. */
79 #define MACHINE_STACK_SIZE 32768
80
81 /* Growth rate for stack allocated by the OS. Should be the multiply
82 of page size. */
83 #define STACK_GROWTH_RATE 8192
84
85 /* Enable to check that the allocation could destroy temporaries. */
86 #if defined SLJIT_DEBUG && SLJIT_DEBUG
87 #define DESTROY_REGISTERS 1
88 #endif
89
90 /*
91 Short summary about the backtracking mechanism empolyed by the jit code generator:
92
93 The code generator follows the recursive nature of the PERL compatible regular
94 expressions. The basic blocks of regular expressions are condition checkers
95 whose execute different commands depending on the result of the condition check.
96 The relationship between the operators can be horizontal (concatenation) and
97 vertical (sub-expression) (See struct backtrack_common for more details).
98
99 'ab' - 'a' and 'b' regexps are concatenated
100 'a+' - 'a' is the sub-expression of the '+' operator
101
102 The condition checkers are boolean (true/false) checkers. Machine code is generated
103 for the checker itself and for the actions depending on the result of the checker.
104 The 'true' case is called as the matching path (expected path), and the other is called as
105 the 'backtrack' path. Branch instructions are expesive for all CPUs, so we avoid taken
106 branches on the matching path.
107
108 Greedy star operator (*) :
109 Matching path: match happens.
110 Backtrack path: match failed.
111 Non-greedy star operator (*?) :
112 Matching path: no need to perform a match.
113 Backtrack path: match is required.
114
115 The following example shows how the code generated for a capturing bracket
116 with two alternatives. Let A, B, C, D are arbirary regular expressions, and
117 we have the following regular expression:
118
119 A(B|C)D
120
121 The generated code will be the following:
122
123 A matching path
124 '(' matching path (pushing arguments to the stack)
125 B matching path
126 ')' matching path (pushing arguments to the stack)
127 D matching path
128 return with successful match
129
130 D backtrack path
131 ')' backtrack path (If we arrived from "C" jump to the backtrack of "C")
132 B backtrack path
133 C expected path
134 jump to D matching path
135 C backtrack path
136 A backtrack path
137
138 Notice, that the order of backtrack code paths are the opposite of the fast
139 code paths. In this way the topmost value on the stack is always belong
140 to the current backtrack code path. The backtrack path must check
141 whether there is a next alternative. If so, it needs to jump back to
142 the matching path eventually. Otherwise it needs to clear out its own stack
143 frame and continue the execution on the backtrack code paths.
144 */
145
146 /*
147 Saved stack frames:
148
149 Atomic blocks and asserts require reloading the values of private data
150 when the backtrack mechanism performed. Because of OP_RECURSE, the data
151 are not necessarly known in compile time, thus we need a dynamic restore
152 mechanism.
153
154 The stack frames are stored in a chain list, and have the following format:
155 ([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]
156
157 Thus we can restore the private data to a particular point in the stack.
158 */
159
160 typedef struct jit_arguments {
161 /* Pointers first. */
162 struct sljit_stack *stack;
163 const pcre_uchar *str;
164 const pcre_uchar *begin;
165 const pcre_uchar *end;
166 int *offsets;
167 pcre_uchar *uchar_ptr;
168 pcre_uchar *mark_ptr;
169 void *callout_data;
170 /* Everything else after. */
171 pcre_uint32 limit_match;
172 int real_offset_count;
173 int offset_count;
174 pcre_uint8 notbol;
175 pcre_uint8 noteol;
176 pcre_uint8 notempty;
177 pcre_uint8 notempty_atstart;
178 } jit_arguments;
179
180 typedef struct executable_functions {
181 void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES];
182 void *read_only_data_heads[JIT_NUMBER_OF_COMPILE_MODES];
183 sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];
184 PUBL(jit_callback) callback;
185 void *userdata;
186 pcre_uint32 top_bracket;
187 pcre_uint32 limit_match;
188 } executable_functions;
189
190 typedef struct jump_list {
191 struct sljit_jump *jump;
192 struct jump_list *next;
193 } jump_list;
194
195 typedef struct stub_list {
196 struct sljit_jump *start;
197 struct sljit_label *quit;
198 struct stub_list *next;
199 } stub_list;
200
201 typedef struct label_addr_list {
202 struct sljit_label *label;
203 sljit_uw *update_addr;
204 struct label_addr_list *next;
205 } label_addr_list;
206
207 enum frame_types {
208 no_frame = -1,
209 no_stack = -2
210 };
211
212 enum control_types {
213 type_mark = 0,
214 type_then_trap = 1
215 };
216
217 typedef int (SLJIT_CALL *jit_function)(jit_arguments *args);
218
219 /* The following structure is the key data type for the recursive
220 code generator. It is allocated by compile_matchingpath, and contains
221 the arguments for compile_backtrackingpath. Must be the first member
222 of its descendants. */
223 typedef struct backtrack_common {
224 /* Concatenation stack. */
225 struct backtrack_common *prev;
226 jump_list *nextbacktracks;
227 /* Internal stack (for component operators). */
228 struct backtrack_common *top;
229 jump_list *topbacktracks;
230 /* Opcode pointer. */
231 pcre_uchar *cc;
232 } backtrack_common;
233
234 typedef struct assert_backtrack {
235 backtrack_common common;
236 jump_list *condfailed;
237 /* Less than 0 if a frame is not needed. */
238 int framesize;
239 /* Points to our private memory word on the stack. */
240 int private_data_ptr;
241 /* For iterators. */
242 struct sljit_label *matchingpath;
243 } assert_backtrack;
244
245 typedef struct bracket_backtrack {
246 backtrack_common common;
247 /* Where to coninue if an alternative is successfully matched. */
248 struct sljit_label *alternative_matchingpath;
249 /* For rmin and rmax iterators. */
250 struct sljit_label *recursive_matchingpath;
251 /* For greedy ? operator. */
252 struct sljit_label *zero_matchingpath;
253 /* Contains the branches of a failed condition. */
254 union {
255 /* Both for OP_COND, OP_SCOND. */
256 jump_list *condfailed;
257 assert_backtrack *assert;
258 /* For OP_ONCE. Less than 0 if not needed. */
259 int framesize;
260 } u;
261 /* Points to our private memory word on the stack. */
262 int private_data_ptr;
263 } bracket_backtrack;
264
265 typedef struct bracketpos_backtrack {
266 backtrack_common common;
267 /* Points to our private memory word on the stack. */
268 int private_data_ptr;
269 /* Reverting stack is needed. */
270 int framesize;
271 /* Allocated stack size. */
272 int stacksize;
273 } bracketpos_backtrack;
274
275 typedef struct braminzero_backtrack {
276 backtrack_common common;
277 struct sljit_label *matchingpath;
278 } braminzero_backtrack;
279
280 typedef struct char_iterator_backtrack {
281 backtrack_common common;
282 /* Next iteration. */
283 struct sljit_label *matchingpath;
284 } char_iterator_backtrack;
285
286 typedef struct ref_iterator_backtrack {
287 backtrack_common common;
288 /* Next iteration. */
289 struct sljit_label *matchingpath;
290 } ref_iterator_backtrack;
291
292 typedef struct recurse_entry {
293 struct recurse_entry *next;
294 /* Contains the function entry. */
295 struct sljit_label *entry;
296 /* Collects the calls until the function is not created. */
297 jump_list *calls;
298 /* Points to the starting opcode. */
299 sljit_sw start;
300 } recurse_entry;
301
302 typedef struct recurse_backtrack {
303 backtrack_common common;
304 BOOL inlined_pattern;
305 } recurse_backtrack;
306
307 #define OP_THEN_TRAP OP_TABLE_LENGTH
308
309 typedef struct then_trap_backtrack {
310 backtrack_common common;
311 /* If then_trap is not NULL, this structure contains the real
312 then_trap for the backtracking path. */
313 struct then_trap_backtrack *then_trap;
314 /* Points to the starting opcode. */
315 sljit_sw start;
316 /* Exit point for the then opcodes of this alternative. */
317 jump_list *quit;
318 /* Frame size of the current alternative. */
319 int framesize;
320 } then_trap_backtrack;
321
322 #define MAX_RANGE_SIZE 4
323
324 typedef struct compiler_common {
325 /* The sljit ceneric compiler. */
326 struct sljit_compiler *compiler;
327 /* First byte code. */
328 pcre_uchar *start;
329 /* Maps private data offset to each opcode. */
330 sljit_si *private_data_ptrs;
331 /* Chain list of read-only data ptrs. */
332 void *read_only_data_head;
333 /* Tells whether the capturing bracket is optimized. */
334 sljit_ub *optimized_cbracket;
335 /* Tells whether the starting offset is a target of then. */
336 sljit_ub *then_offsets;
337 /* Current position where a THEN must jump. */
338 then_trap_backtrack *then_trap;
339 /* Starting offset of private data for capturing brackets. */
340 sljit_si cbra_ptr;
341 /* Output vector starting point. Must be divisible by 2. */
342 sljit_si ovector_start;
343 /* Points to the starting character of the current match. */
344 sljit_si start_ptr;
345 /* Last known position of the requested byte. */
346 sljit_si req_char_ptr;
347 /* Head of the last recursion. */
348 sljit_si recursive_head_ptr;
349 /* First inspected character for partial matching.
350 (Needed for avoiding zero length partial matches.) */
351 sljit_si start_used_ptr;
352 /* Starting pointer for partial soft matches. */
353 sljit_si hit_start;
354 /* End pointer of the first line. */
355 sljit_si first_line_end;
356 /* Points to the marked string. */
357 sljit_si mark_ptr;
358 /* Recursive control verb management chain. */
359 sljit_si control_head_ptr;
360 /* Points to the last matched capture block index. */
361 sljit_si capture_last_ptr;
362
363 /* Flipped and lower case tables. */
364 const sljit_ub *fcc;
365 sljit_sw lcc;
366 /* Mode can be PCRE_STUDY_JIT_COMPILE and others. */
367 int mode;
368 /* TRUE, when minlength is greater than 0. */
369 BOOL might_be_empty;
370 /* \K is found in the pattern. */
371 BOOL has_set_som;
372 /* (*SKIP:arg) is found in the pattern. */
373 BOOL has_skip_arg;
374 /* (*THEN) is found in the pattern. */
375 BOOL has_then;
376 /* Currently in recurse or negative assert. */
377 BOOL local_exit;
378 /* Currently in a positive assert. */
379 BOOL positive_assert;
380 /* Newline control. */
381 int nltype;
382 sljit_ui nlmax;
383 sljit_ui nlmin;
384 int newline;
385 int bsr_nltype;
386 sljit_ui bsr_nlmax;
387 sljit_ui bsr_nlmin;
388 /* Dollar endonly. */
389 int endonly;
390 /* Tables. */
391 sljit_sw ctypes;
392 /* Named capturing brackets. */
393 pcre_uchar *name_table;
394 sljit_sw name_count;
395 sljit_sw name_entry_size;
396
397 /* Labels and jump lists. */
398 struct sljit_label *partialmatchlabel;
399 struct sljit_label *quit_label;
400 struct sljit_label *forced_quit_label;
401 struct sljit_label *accept_label;
402 struct sljit_label *ff_newline_shortcut;
403 stub_list *stubs;
404 label_addr_list *label_addrs;
405 recurse_entry *entries;
406 recurse_entry *currententry;
407 jump_list *partialmatch;
408 jump_list *quit;
409 jump_list *positive_assert_quit;
410 jump_list *forced_quit;
411 jump_list *accept;
412 jump_list *calllimit;
413 jump_list *stackalloc;
414 jump_list *revertframes;
415 jump_list *wordboundary;
416 jump_list *anynewline;
417 jump_list *hspace;
418 jump_list *vspace;
419 jump_list *casefulcmp;
420 jump_list *caselesscmp;
421 jump_list *reset_match;
422 BOOL jscript_compat;
423 #ifdef SUPPORT_UTF
424 BOOL utf;
425 #ifdef SUPPORT_UCP
426 BOOL use_ucp;
427 jump_list *getucd;
428 #endif
429 #ifdef COMPILE_PCRE8
430 jump_list *utfreadchar;
431 jump_list *utfreadchar16;
432 jump_list *utfreadtype8;
433 #endif
434 #endif /* SUPPORT_UTF */
435 } compiler_common;
436
437 /* For byte_sequence_compare. */
438
439 typedef struct compare_context {
440 int length;
441 int sourcereg;
442 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
443 int ucharptr;
444 union {
445 sljit_si asint;
446 sljit_uh asushort;
447 #if defined COMPILE_PCRE8
448 sljit_ub asbyte;
449 sljit_ub asuchars[4];
450 #elif defined COMPILE_PCRE16
451 sljit_uh asuchars[2];
452 #elif defined COMPILE_PCRE32
453 sljit_ui asuchars[1];
454 #endif
455 } c;
456 union {
457 sljit_si asint;
458 sljit_uh asushort;
459 #if defined COMPILE_PCRE8
460 sljit_ub asbyte;
461 sljit_ub asuchars[4];
462 #elif defined COMPILE_PCRE16
463 sljit_uh asuchars[2];
464 #elif defined COMPILE_PCRE32
465 sljit_ui asuchars[1];
466 #endif
467 } oc;
468 #endif
469 } compare_context;
470
471 /* Undefine sljit macros. */
472 #undef CMP
473
474 /* Used for accessing the elements of the stack. */
475 #define STACK(i) ((-(i) - 1) * (int)sizeof(sljit_sw))
476
477 #define TMP1 SLJIT_R0
478 #define TMP2 SLJIT_R2
479 #define TMP3 SLJIT_R3
480 #define STR_PTR SLJIT_S0
481 #define STR_END SLJIT_S1
482 #define STACK_TOP SLJIT_R1
483 #define STACK_LIMIT SLJIT_S2
484 #define COUNT_MATCH SLJIT_S3
485 #define ARGUMENTS SLJIT_S4
486 #define RETURN_ADDR SLJIT_R4
487
488 /* Local space layout. */
489 /* These two locals can be used by the current opcode. */
490 #define LOCALS0 (0 * sizeof(sljit_sw))
491 #define LOCALS1 (1 * sizeof(sljit_sw))
492 /* Two local variables for possessive quantifiers (char1 cannot use them). */
493 #define POSSESSIVE0 (2 * sizeof(sljit_sw))
494 #define POSSESSIVE1 (3 * sizeof(sljit_sw))
495 /* Max limit of recursions. */
496 #define LIMIT_MATCH (4 * sizeof(sljit_sw))
497 /* The output vector is stored on the stack, and contains pointers
498 to characters. The vector data is divided into two groups: the first
499 group contains the start / end character pointers, and the second is
500 the start pointers when the end of the capturing group has not yet reached. */
501 #define OVECTOR_START (common->ovector_start)
502 #define OVECTOR(i) (OVECTOR_START + (i) * (sljit_sw)sizeof(sljit_sw))
503 #define OVECTOR_PRIV(i) (common->cbra_ptr + (i) * (sljit_sw)sizeof(sljit_sw))
504 #define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start])
505
506 #if defined COMPILE_PCRE8
507 #define MOV_UCHAR SLJIT_MOV_UB
508 #define MOVU_UCHAR SLJIT_MOVU_UB
509 #elif defined COMPILE_PCRE16
510 #define MOV_UCHAR SLJIT_MOV_UH
511 #define MOVU_UCHAR SLJIT_MOVU_UH
512 #elif defined COMPILE_PCRE32
513 #define MOV_UCHAR SLJIT_MOV_UI
514 #define MOVU_UCHAR SLJIT_MOVU_UI
515 #else
516 #error Unsupported compiling mode
517 #endif
518
519 /* Shortcuts. */
520 #define DEFINE_COMPILER \
521 struct sljit_compiler *compiler = common->compiler
522 #define OP1(op, dst, dstw, src, srcw) \
523 sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw))
524 #define OP2(op, dst, dstw, src1, src1w, src2, src2w) \
525 sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w))
526 #define LABEL() \
527 sljit_emit_label(compiler)
528 #define JUMP(type) \
529 sljit_emit_jump(compiler, (type))
530 #define JUMPTO(type, label) \
531 sljit_set_label(sljit_emit_jump(compiler, (type)), (label))
532 #define JUMPHERE(jump) \
533 sljit_set_label((jump), sljit_emit_label(compiler))
534 #define SET_LABEL(jump, label) \
535 sljit_set_label((jump), (label))
536 #define CMP(type, src1, src1w, src2, src2w) \
537 sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))
538 #define CMPTO(type, src1, src1w, src2, src2w, label) \
539 sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))
540 #define OP_FLAGS(op, dst, dstw, src, srcw, type) \
541 sljit_emit_op_flags(compiler, (op), (dst), (dstw), (src), (srcw), (type))
542 #define GET_LOCAL_BASE(dst, dstw, offset) \
543 sljit_get_local_base(compiler, (dst), (dstw), (offset))
544
545 #define READ_CHAR_MAX 0x7fffffff
546
547 static pcre_uchar *bracketend(pcre_uchar *cc)
548 {
549 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
550 do cc += GET(cc, 1); while (*cc == OP_ALT);
551 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
552 cc += 1 + LINK_SIZE;
553 return cc;
554 }
555
556 static int no_alternatives(pcre_uchar *cc)
557 {
558 int count = 0;
559 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
560 do
561 {
562 cc += GET(cc, 1);
563 count++;
564 }
565 while (*cc == OP_ALT);
566 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
567 return count;
568 }
569
570 static int ones_in_half_byte[16] = {
571 /* 0 */ 0, 1, 1, 2, /* 4 */ 1, 2, 2, 3,
572 /* 8 */ 1, 2, 2, 3, /* 12 */ 2, 3, 3, 4
573 };
574
575 /* Functions whose might need modification for all new supported opcodes:
576 next_opcode
577 check_opcode_types
578 set_private_data_ptrs
579 get_framesize
580 init_frame
581 get_private_data_copy_length
582 copy_private_data
583 compile_matchingpath
584 compile_backtrackingpath
585 */
586
587 static pcre_uchar *next_opcode(compiler_common *common, pcre_uchar *cc)
588 {
589 SLJIT_UNUSED_ARG(common);
590 switch(*cc)
591 {
592 case OP_SOD:
593 case OP_SOM:
594 case OP_SET_SOM:
595 case OP_NOT_WORD_BOUNDARY:
596 case OP_WORD_BOUNDARY:
597 case OP_NOT_DIGIT:
598 case OP_DIGIT:
599 case OP_NOT_WHITESPACE:
600 case OP_WHITESPACE:
601 case OP_NOT_WORDCHAR:
602 case OP_WORDCHAR:
603 case OP_ANY:
604 case OP_ALLANY:
605 case OP_NOTPROP:
606 case OP_PROP:
607 case OP_ANYNL:
608 case OP_NOT_HSPACE:
609 case OP_HSPACE:
610 case OP_NOT_VSPACE:
611 case OP_VSPACE:
612 case OP_EXTUNI:
613 case OP_EODN:
614 case OP_EOD:
615 case OP_CIRC:
616 case OP_CIRCM:
617 case OP_DOLL:
618 case OP_DOLLM:
619 case OP_CRSTAR:
620 case OP_CRMINSTAR:
621 case OP_CRPLUS:
622 case OP_CRMINPLUS:
623 case OP_CRQUERY:
624 case OP_CRMINQUERY:
625 case OP_CRRANGE:
626 case OP_CRMINRANGE:
627 case OP_CRPOSSTAR:
628 case OP_CRPOSPLUS:
629 case OP_CRPOSQUERY:
630 case OP_CRPOSRANGE:
631 case OP_CLASS:
632 case OP_NCLASS:
633 case OP_REF:
634 case OP_REFI:
635 case OP_DNREF:
636 case OP_DNREFI:
637 case OP_RECURSE:
638 case OP_CALLOUT:
639 case OP_ALT:
640 case OP_KET:
641 case OP_KETRMAX:
642 case OP_KETRMIN:
643 case OP_KETRPOS:
644 case OP_REVERSE:
645 case OP_ASSERT:
646 case OP_ASSERT_NOT:
647 case OP_ASSERTBACK:
648 case OP_ASSERTBACK_NOT:
649 case OP_ONCE:
650 case OP_ONCE_NC:
651 case OP_BRA:
652 case OP_BRAPOS:
653 case OP_CBRA:
654 case OP_CBRAPOS:
655 case OP_COND:
656 case OP_SBRA:
657 case OP_SBRAPOS:
658 case OP_SCBRA:
659 case OP_SCBRAPOS:
660 case OP_SCOND:
661 case OP_CREF:
662 case OP_DNCREF:
663 case OP_RREF:
664 case OP_DNRREF:
665 case OP_DEF:
666 case OP_BRAZERO:
667 case OP_BRAMINZERO:
668 case OP_BRAPOSZERO:
669 case OP_PRUNE:
670 case OP_SKIP:
671 case OP_THEN:
672 case OP_COMMIT:
673 case OP_FAIL:
674 case OP_ACCEPT:
675 case OP_ASSERT_ACCEPT:
676 case OP_CLOSE:
677 case OP_SKIPZERO:
678 return cc + PRIV(OP_lengths)[*cc];
679
680 case OP_CHAR:
681 case OP_CHARI:
682 case OP_NOT:
683 case OP_NOTI:
684 case OP_STAR:
685 case OP_MINSTAR:
686 case OP_PLUS:
687 case OP_MINPLUS:
688 case OP_QUERY:
689 case OP_MINQUERY:
690 case OP_UPTO:
691 case OP_MINUPTO:
692 case OP_EXACT:
693 case OP_POSSTAR:
694 case OP_POSPLUS:
695 case OP_POSQUERY:
696 case OP_POSUPTO:
697 case OP_STARI:
698 case OP_MINSTARI:
699 case OP_PLUSI:
700 case OP_MINPLUSI:
701 case OP_QUERYI:
702 case OP_MINQUERYI:
703 case OP_UPTOI:
704 case OP_MINUPTOI:
705 case OP_EXACTI:
706 case OP_POSSTARI:
707 case OP_POSPLUSI:
708 case OP_POSQUERYI:
709 case OP_POSUPTOI:
710 case OP_NOTSTAR:
711 case OP_NOTMINSTAR:
712 case OP_NOTPLUS:
713 case OP_NOTMINPLUS:
714 case OP_NOTQUERY:
715 case OP_NOTMINQUERY:
716 case OP_NOTUPTO:
717 case OP_NOTMINUPTO:
718 case OP_NOTEXACT:
719 case OP_NOTPOSSTAR:
720 case OP_NOTPOSPLUS:
721 case OP_NOTPOSQUERY:
722 case OP_NOTPOSUPTO:
723 case OP_NOTSTARI:
724 case OP_NOTMINSTARI:
725 case OP_NOTPLUSI:
726 case OP_NOTMINPLUSI:
727 case OP_NOTQUERYI:
728 case OP_NOTMINQUERYI:
729 case OP_NOTUPTOI:
730 case OP_NOTMINUPTOI:
731 case OP_NOTEXACTI:
732 case OP_NOTPOSSTARI:
733 case OP_NOTPOSPLUSI:
734 case OP_NOTPOSQUERYI:
735 case OP_NOTPOSUPTOI:
736 cc += PRIV(OP_lengths)[*cc];
737 #ifdef SUPPORT_UTF
738 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
739 #endif
740 return cc;
741
742 /* Special cases. */
743 case OP_TYPESTAR:
744 case OP_TYPEMINSTAR:
745 case OP_TYPEPLUS:
746 case OP_TYPEMINPLUS:
747 case OP_TYPEQUERY:
748 case OP_TYPEMINQUERY:
749 case OP_TYPEUPTO:
750 case OP_TYPEMINUPTO:
751 case OP_TYPEEXACT:
752 case OP_TYPEPOSSTAR:
753 case OP_TYPEPOSPLUS:
754 case OP_TYPEPOSQUERY:
755 case OP_TYPEPOSUPTO:
756 return cc + PRIV(OP_lengths)[*cc] - 1;
757
758 case OP_ANYBYTE:
759 #ifdef SUPPORT_UTF
760 if (common->utf) return NULL;
761 #endif
762 return cc + 1;
763
764 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
765 case OP_XCLASS:
766 return cc + GET(cc, 1);
767 #endif
768
769 case OP_MARK:
770 case OP_PRUNE_ARG:
771 case OP_SKIP_ARG:
772 case OP_THEN_ARG:
773 return cc + 1 + 2 + cc[1];
774
775 default:
776 /* All opcodes are supported now! */
777 SLJIT_ASSERT_STOP();
778 return NULL;
779 }
780 }
781
782 static BOOL check_opcode_types(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
783 {
784 int count;
785 pcre_uchar *slot;
786
787 /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
788 while (cc < ccend)
789 {
790 switch(*cc)
791 {
792 case OP_SET_SOM:
793 common->has_set_som = TRUE;
794 common->might_be_empty = TRUE;
795 cc += 1;
796 break;
797
798 case OP_REF:
799 case OP_REFI:
800 common->optimized_cbracket[GET2(cc, 1)] = 0;
801 cc += 1 + IMM2_SIZE;
802 break;
803
804 case OP_CBRAPOS:
805 case OP_SCBRAPOS:
806 common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] = 0;
807 cc += 1 + LINK_SIZE + IMM2_SIZE;
808 break;
809
810 case OP_COND:
811 case OP_SCOND:
812 /* Only AUTO_CALLOUT can insert this opcode. We do
813 not intend to support this case. */
814 if (cc[1 + LINK_SIZE] == OP_CALLOUT)
815 return FALSE;
816 cc += 1 + LINK_SIZE;
817 break;
818
819 case OP_CREF:
820 common->optimized_cbracket[GET2(cc, 1)] = 0;
821 cc += 1 + IMM2_SIZE;
822 break;
823
824 case OP_DNREF:
825 case OP_DNREFI:
826 case OP_DNCREF:
827 count = GET2(cc, 1 + IMM2_SIZE);
828 slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
829 while (count-- > 0)
830 {
831 common->optimized_cbracket[GET2(slot, 0)] = 0;
832 slot += common->name_entry_size;
833 }
834 cc += 1 + 2 * IMM2_SIZE;
835 break;
836
837 case OP_RECURSE:
838 /* Set its value only once. */
839 if (common->recursive_head_ptr == 0)
840 {
841 common->recursive_head_ptr = common->ovector_start;
842 common->ovector_start += sizeof(sljit_sw);
843 }
844 cc += 1 + LINK_SIZE;
845 break;
846
847 case OP_CALLOUT:
848 if (common->capture_last_ptr == 0)
849 {
850 common->capture_last_ptr = common->ovector_start;
851 common->ovector_start += sizeof(sljit_sw);
852 }
853 cc += 2 + 2 * LINK_SIZE;
854 break;
855
856 case OP_THEN_ARG:
857 common->has_then = TRUE;
858 common->control_head_ptr = 1;
859 /* Fall through. */
860
861 case OP_PRUNE_ARG:
862 case OP_MARK:
863 if (common->mark_ptr == 0)
864 {
865 common->mark_ptr = common->ovector_start;
866 common->ovector_start += sizeof(sljit_sw);
867 }
868 cc += 1 + 2 + cc[1];
869 break;
870
871 case OP_THEN:
872 common->has_then = TRUE;
873 common->control_head_ptr = 1;
874 /* Fall through. */
875
876 case OP_PRUNE:
877 case OP_SKIP:
878 cc += 1;
879 break;
880
881 case OP_SKIP_ARG:
882 common->control_head_ptr = 1;
883 common->has_skip_arg = TRUE;
884 cc += 1 + 2 + cc[1];
885 break;
886
887 default:
888 cc = next_opcode(common, cc);
889 if (cc == NULL)
890 return FALSE;
891 break;
892 }
893 }
894 return TRUE;
895 }
896
897 static int get_class_iterator_size(pcre_uchar *cc)
898 {
899 switch(*cc)
900 {
901 case OP_CRSTAR:
902 case OP_CRPLUS:
903 return 2;
904
905 case OP_CRMINSTAR:
906 case OP_CRMINPLUS:
907 case OP_CRQUERY:
908 case OP_CRMINQUERY:
909 return 1;
910
911 case OP_CRRANGE:
912 case OP_CRMINRANGE:
913 if (GET2(cc, 1) == GET2(cc, 1 + IMM2_SIZE))
914 return 0;
915 return 2;
916
917 default:
918 return 0;
919 }
920 }
921
922 static BOOL detect_repeat(compiler_common *common, pcre_uchar *begin)
923 {
924 pcre_uchar *end = bracketend(begin);
925 pcre_uchar *next;
926 pcre_uchar *next_end;
927 pcre_uchar *max_end;
928 pcre_uchar type;
929 sljit_sw length = end - begin;
930 int min, max, i;
931
932 /* Detect fixed iterations first. */
933 if (end[-(1 + LINK_SIZE)] != OP_KET)
934 return FALSE;
935
936 /* Already detected repeat. */
937 if (common->private_data_ptrs[end - common->start - LINK_SIZE] != 0)
938 return TRUE;
939
940 next = end;
941 min = 1;
942 while (1)
943 {
944 if (*next != *begin)
945 break;
946 next_end = bracketend(next);
947 if (next_end - next != length || memcmp(begin, next, IN_UCHARS(length)) != 0)
948 break;
949 next = next_end;
950 min++;
951 }
952
953 if (min == 2)
954 return FALSE;
955
956 max = 0;
957 max_end = next;
958 if (*next == OP_BRAZERO || *next == OP_BRAMINZERO)
959 {
960 type = *next;
961 while (1)
962 {
963 if (next[0] != type || next[1] != OP_BRA || next[2 + LINK_SIZE] != *begin)
964 break;
965 next_end = bracketend(next + 2 + LINK_SIZE);
966 if (next_end - next != (length + 2 + LINK_SIZE) || memcmp(begin, next + 2 + LINK_SIZE, IN_UCHARS(length)) != 0)
967 break;
968 next = next_end;
969 max++;
970 }
971
972 if (next[0] == type && next[1] == *begin && max >= 1)
973 {
974 next_end = bracketend(next + 1);
975 if (next_end - next == (length + 1) && memcmp(begin, next + 1, IN_UCHARS(length)) == 0)
976 {
977 for (i = 0; i < max; i++, next_end += 1 + LINK_SIZE)
978 if (*next_end != OP_KET)
979 break;
980
981 if (i == max)
982 {
983 common->private_data_ptrs[max_end - common->start - LINK_SIZE] = next_end - max_end;
984 common->private_data_ptrs[max_end - common->start - LINK_SIZE + 1] = (type == OP_BRAZERO) ? OP_UPTO : OP_MINUPTO;
985 /* +2 the original and the last. */
986 common->private_data_ptrs[max_end - common->start - LINK_SIZE + 2] = max + 2;
987 if (min == 1)
988 return TRUE;
989 min--;
990 max_end -= (1 + LINK_SIZE) + GET(max_end, -LINK_SIZE);
991 }
992 }
993 }
994 }
995
996 if (min >= 3)
997 {
998 common->private_data_ptrs[end - common->start - LINK_SIZE] = max_end - end;
999 common->private_data_ptrs[end - common->start - LINK_SIZE + 1] = OP_EXACT;
1000 common->private_data_ptrs[end - common->start - LINK_SIZE + 2] = min;
1001 return TRUE;
1002 }
1003
1004 return FALSE;
1005 }
1006
1007 #define CASE_ITERATOR_PRIVATE_DATA_1 \
1008 case OP_MINSTAR: \
1009 case OP_MINPLUS: \
1010 case OP_QUERY: \
1011 case OP_MINQUERY: \
1012 case OP_MINSTARI: \
1013 case OP_MINPLUSI: \
1014 case OP_QUERYI: \
1015 case OP_MINQUERYI: \
1016 case OP_NOTMINSTAR: \
1017 case OP_NOTMINPLUS: \
1018 case OP_NOTQUERY: \
1019 case OP_NOTMINQUERY: \
1020 case OP_NOTMINSTARI: \
1021 case OP_NOTMINPLUSI: \
1022 case OP_NOTQUERYI: \
1023 case OP_NOTMINQUERYI:
1024
1025 #define CASE_ITERATOR_PRIVATE_DATA_2A \
1026 case OP_STAR: \
1027 case OP_PLUS: \
1028 case OP_STARI: \
1029 case OP_PLUSI: \
1030 case OP_NOTSTAR: \
1031 case OP_NOTPLUS: \
1032 case OP_NOTSTARI: \
1033 case OP_NOTPLUSI:
1034
1035 #define CASE_ITERATOR_PRIVATE_DATA_2B \
1036 case OP_UPTO: \
1037 case OP_MINUPTO: \
1038 case OP_UPTOI: \
1039 case OP_MINUPTOI: \
1040 case OP_NOTUPTO: \
1041 case OP_NOTMINUPTO: \
1042 case OP_NOTUPTOI: \
1043 case OP_NOTMINUPTOI:
1044
1045 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_1 \
1046 case OP_TYPEMINSTAR: \
1047 case OP_TYPEMINPLUS: \
1048 case OP_TYPEQUERY: \
1049 case OP_TYPEMINQUERY:
1050
1051 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2A \
1052 case OP_TYPESTAR: \
1053 case OP_TYPEPLUS:
1054
1055 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2B \
1056 case OP_TYPEUPTO: \
1057 case OP_TYPEMINUPTO:
1058
1059 static void set_private_data_ptrs(compiler_common *common, int *private_data_start, pcre_uchar *ccend)
1060 {
1061 pcre_uchar *cc = common->start;
1062 pcre_uchar *alternative;
1063 pcre_uchar *end = NULL;
1064 int private_data_ptr = *private_data_start;
1065 int space, size, bracketlen;
1066 BOOL repeat_check = TRUE;
1067
1068 while (cc < ccend)
1069 {
1070 space = 0;
1071 size = 0;
1072 bracketlen = 0;
1073 if (private_data_ptr > SLJIT_MAX_LOCAL_SIZE)
1074 break;
1075
1076 if (repeat_check && (*cc == OP_ONCE || *cc == OP_ONCE_NC || *cc == OP_BRA || *cc == OP_CBRA || *cc == OP_COND))
1077 {
1078 if (detect_repeat(common, cc))
1079 {
1080 /* These brackets are converted to repeats, so no global
1081 based single character repeat is allowed. */
1082 if (cc >= end)
1083 end = bracketend(cc);
1084 }
1085 }
1086 repeat_check = TRUE;
1087
1088 switch(*cc)
1089 {
1090 case OP_KET:
1091 if (common->private_data_ptrs[cc + 1 - common->start] != 0)
1092 {
1093 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1094 private_data_ptr += sizeof(sljit_sw);
1095 cc += common->private_data_ptrs[cc + 1 - common->start];
1096 }
1097 cc += 1 + LINK_SIZE;
1098 break;
1099
1100 case OP_ASSERT:
1101 case OP_ASSERT_NOT:
1102 case OP_ASSERTBACK:
1103 case OP_ASSERTBACK_NOT:
1104 case OP_ONCE:
1105 case OP_ONCE_NC:
1106 case OP_BRAPOS:
1107 case OP_SBRA:
1108 case OP_SBRAPOS:
1109 case OP_SCOND:
1110 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1111 private_data_ptr += sizeof(sljit_sw);
1112 bracketlen = 1 + LINK_SIZE;
1113 break;
1114
1115 case OP_CBRAPOS:
1116 case OP_SCBRAPOS:
1117 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1118 private_data_ptr += sizeof(sljit_sw);
1119 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1120 break;
1121
1122 case OP_COND:
1123 /* Might be a hidden SCOND. */
1124 alternative = cc + GET(cc, 1);
1125 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1126 {
1127 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1128 private_data_ptr += sizeof(sljit_sw);
1129 }
1130 bracketlen = 1 + LINK_SIZE;
1131 break;
1132
1133 case OP_BRA:
1134 bracketlen = 1 + LINK_SIZE;
1135 break;
1136
1137 case OP_CBRA:
1138 case OP_SCBRA:
1139 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1140 break;
1141
1142 case OP_BRAZERO:
1143 case OP_BRAMINZERO:
1144 case OP_BRAPOSZERO:
1145 repeat_check = FALSE;
1146 size = 1;
1147 break;
1148
1149 CASE_ITERATOR_PRIVATE_DATA_1
1150 space = 1;
1151 size = -2;
1152 break;
1153
1154 CASE_ITERATOR_PRIVATE_DATA_2A
1155 space = 2;
1156 size = -2;
1157 break;
1158
1159 CASE_ITERATOR_PRIVATE_DATA_2B
1160 space = 2;
1161 size = -(2 + IMM2_SIZE);
1162 break;
1163
1164 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1165 space = 1;
1166 size = 1;
1167 break;
1168
1169 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1170 if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
1171 space = 2;
1172 size = 1;
1173 break;
1174
1175 case OP_TYPEUPTO:
1176 if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
1177 space = 2;
1178 size = 1 + IMM2_SIZE;
1179 break;
1180
1181 case OP_TYPEMINUPTO:
1182 space = 2;
1183 size = 1 + IMM2_SIZE;
1184 break;
1185
1186 case OP_CLASS:
1187 case OP_NCLASS:
1188 size += 1 + 32 / sizeof(pcre_uchar);
1189 space = get_class_iterator_size(cc + size);
1190 break;
1191
1192 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1193 case OP_XCLASS:
1194 size = GET(cc, 1);
1195 space = get_class_iterator_size(cc + size);
1196 break;
1197 #endif
1198
1199 default:
1200 cc = next_opcode(common, cc);
1201 SLJIT_ASSERT(cc != NULL);
1202 break;
1203 }
1204
1205 /* Character iterators, which are not inside a repeated bracket,
1206 gets a private slot instead of allocating it on the stack. */
1207 if (space > 0 && cc >= end)
1208 {
1209 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1210 private_data_ptr += sizeof(sljit_sw) * space;
1211 }
1212
1213 if (size != 0)
1214 {
1215 if (size < 0)
1216 {
1217 cc += -size;
1218 #ifdef SUPPORT_UTF
1219 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1220 #endif
1221 }
1222 else
1223 cc += size;
1224 }
1225
1226 if (bracketlen > 0)
1227 {
1228 if (cc >= end)
1229 {
1230 end = bracketend(cc);
1231 if (end[-1 - LINK_SIZE] == OP_KET)
1232 end = NULL;
1233 }
1234 cc += bracketlen;
1235 }
1236 }
1237 *private_data_start = private_data_ptr;
1238 }
1239
1240 /* Returns with a frame_types (always < 0) if no need for frame. */
1241 static int get_framesize(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL recursive, BOOL *needs_control_head)
1242 {
1243 int length = 0;
1244 int possessive = 0;
1245 BOOL stack_restore = FALSE;
1246 BOOL setsom_found = recursive;
1247 BOOL setmark_found = recursive;
1248 /* The last capture is a local variable even for recursions. */
1249 BOOL capture_last_found = FALSE;
1250
1251 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
1252 SLJIT_ASSERT(common->control_head_ptr != 0);
1253 *needs_control_head = TRUE;
1254 #else
1255 *needs_control_head = FALSE;
1256 #endif
1257
1258 if (ccend == NULL)
1259 {
1260 ccend = bracketend(cc) - (1 + LINK_SIZE);
1261 if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))
1262 {
1263 possessive = length = (common->capture_last_ptr != 0) ? 5 : 3;
1264 /* This is correct regardless of common->capture_last_ptr. */
1265 capture_last_found = TRUE;
1266 }
1267 cc = next_opcode(common, cc);
1268 }
1269
1270 SLJIT_ASSERT(cc != NULL);
1271 while (cc < ccend)
1272 switch(*cc)
1273 {
1274 case OP_SET_SOM:
1275 SLJIT_ASSERT(common->has_set_som);
1276 stack_restore = TRUE;
1277 if (!setsom_found)
1278 {
1279 length += 2;
1280 setsom_found = TRUE;
1281 }
1282 cc += 1;
1283 break;
1284
1285 case OP_MARK:
1286 case OP_PRUNE_ARG:
1287 case OP_THEN_ARG:
1288 SLJIT_ASSERT(common->mark_ptr != 0);
1289 stack_restore = TRUE;
1290 if (!setmark_found)
1291 {
1292 length += 2;
1293 setmark_found = TRUE;
1294 }
1295 if (common->control_head_ptr != 0)
1296 *needs_control_head = TRUE;
1297 cc += 1 + 2 + cc[1];
1298 break;
1299
1300 case OP_RECURSE:
1301 stack_restore = TRUE;
1302 if (common->has_set_som && !setsom_found)
1303 {
1304 length += 2;
1305 setsom_found = TRUE;
1306 }
1307 if (common->mark_ptr != 0 && !setmark_found)
1308 {
1309 length += 2;
1310 setmark_found = TRUE;
1311 }
1312 if (common->capture_last_ptr != 0 && !capture_last_found)
1313 {
1314 length += 2;
1315 capture_last_found = TRUE;
1316 }
1317 cc += 1 + LINK_SIZE;
1318 break;
1319
1320 case OP_CBRA:
1321 case OP_CBRAPOS:
1322 case OP_SCBRA:
1323 case OP_SCBRAPOS:
1324 stack_restore = TRUE;
1325 if (common->capture_last_ptr != 0 && !capture_last_found)
1326 {
1327 length += 2;
1328 capture_last_found = TRUE;
1329 }
1330 length += 3;
1331 cc += 1 + LINK_SIZE + IMM2_SIZE;
1332 break;
1333
1334 case OP_THEN:
1335 stack_restore = TRUE;
1336 if (common->control_head_ptr != 0)
1337 *needs_control_head = TRUE;
1338 cc ++;
1339 break;
1340
1341 default:
1342 stack_restore = TRUE;
1343 /* Fall through. */
1344
1345 case OP_NOT_WORD_BOUNDARY:
1346 case OP_WORD_BOUNDARY:
1347 case OP_NOT_DIGIT:
1348 case OP_DIGIT:
1349 case OP_NOT_WHITESPACE:
1350 case OP_WHITESPACE:
1351 case OP_NOT_WORDCHAR:
1352 case OP_WORDCHAR:
1353 case OP_ANY:
1354 case OP_ALLANY:
1355 case OP_ANYBYTE:
1356 case OP_NOTPROP:
1357 case OP_PROP:
1358 case OP_ANYNL:
1359 case OP_NOT_HSPACE:
1360 case OP_HSPACE:
1361 case OP_NOT_VSPACE:
1362 case OP_VSPACE:
1363 case OP_EXTUNI:
1364 case OP_EODN:
1365 case OP_EOD:
1366 case OP_CIRC:
1367 case OP_CIRCM:
1368 case OP_DOLL:
1369 case OP_DOLLM:
1370 case OP_CHAR:
1371 case OP_CHARI:
1372 case OP_NOT:
1373 case OP_NOTI:
1374
1375 case OP_EXACT:
1376 case OP_POSSTAR:
1377 case OP_POSPLUS:
1378 case OP_POSQUERY:
1379 case OP_POSUPTO:
1380
1381 case OP_EXACTI:
1382 case OP_POSSTARI:
1383 case OP_POSPLUSI:
1384 case OP_POSQUERYI:
1385 case OP_POSUPTOI:
1386
1387 case OP_NOTEXACT:
1388 case OP_NOTPOSSTAR:
1389 case OP_NOTPOSPLUS:
1390 case OP_NOTPOSQUERY:
1391 case OP_NOTPOSUPTO:
1392
1393 case OP_NOTEXACTI:
1394 case OP_NOTPOSSTARI:
1395 case OP_NOTPOSPLUSI:
1396 case OP_NOTPOSQUERYI:
1397 case OP_NOTPOSUPTOI:
1398
1399 case OP_TYPEEXACT:
1400 case OP_TYPEPOSSTAR:
1401 case OP_TYPEPOSPLUS:
1402 case OP_TYPEPOSQUERY:
1403 case OP_TYPEPOSUPTO:
1404
1405 case OP_CLASS:
1406 case OP_NCLASS:
1407 case OP_XCLASS:
1408 case OP_CALLOUT:
1409
1410 cc = next_opcode(common, cc);
1411 SLJIT_ASSERT(cc != NULL);
1412 break;
1413 }
1414
1415 /* Possessive quantifiers can use a special case. */
1416 if (SLJIT_UNLIKELY(possessive == length))
1417 return stack_restore ? no_frame : no_stack;
1418
1419 if (length > 0)
1420 return length + 1;
1421 return stack_restore ? no_frame : no_stack;
1422 }
1423
1424 static void init_frame(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, int stackpos, int stacktop, BOOL recursive)
1425 {
1426 DEFINE_COMPILER;
1427 BOOL setsom_found = recursive;
1428 BOOL setmark_found = recursive;
1429 /* The last capture is a local variable even for recursions. */
1430 BOOL capture_last_found = FALSE;
1431 int offset;
1432
1433 /* >= 1 + shortest item size (2) */
1434 SLJIT_UNUSED_ARG(stacktop);
1435 SLJIT_ASSERT(stackpos >= stacktop + 2);
1436
1437 stackpos = STACK(stackpos);
1438 if (ccend == NULL)
1439 {
1440 ccend = bracketend(cc) - (1 + LINK_SIZE);
1441 if (recursive || (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS))
1442 cc = next_opcode(common, cc);
1443 }
1444
1445 SLJIT_ASSERT(cc != NULL);
1446 while (cc < ccend)
1447 switch(*cc)
1448 {
1449 case OP_SET_SOM:
1450 SLJIT_ASSERT(common->has_set_som);
1451 if (!setsom_found)
1452 {
1453 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
1454 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1455 stackpos += (int)sizeof(sljit_sw);
1456 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1457 stackpos += (int)sizeof(sljit_sw);
1458 setsom_found = TRUE;
1459 }
1460 cc += 1;
1461 break;
1462
1463 case OP_MARK:
1464 case OP_PRUNE_ARG:
1465 case OP_THEN_ARG:
1466 SLJIT_ASSERT(common->mark_ptr != 0);
1467 if (!setmark_found)
1468 {
1469 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
1470 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1471 stackpos += (int)sizeof(sljit_sw);
1472 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1473 stackpos += (int)sizeof(sljit_sw);
1474 setmark_found = TRUE;
1475 }
1476 cc += 1 + 2 + cc[1];
1477 break;
1478
1479 case OP_RECURSE:
1480 if (common->has_set_som && !setsom_found)
1481 {
1482 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
1483 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1484 stackpos += (int)sizeof(sljit_sw);
1485 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1486 stackpos += (int)sizeof(sljit_sw);
1487 setsom_found = TRUE;
1488 }
1489 if (common->mark_ptr != 0 && !setmark_found)
1490 {
1491 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
1492 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1493 stackpos += (int)sizeof(sljit_sw);
1494 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1495 stackpos += (int)sizeof(sljit_sw);
1496 setmark_found = TRUE;
1497 }
1498 if (common->capture_last_ptr != 0 && !capture_last_found)
1499 {
1500 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
1501 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1502 stackpos += (int)sizeof(sljit_sw);
1503 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1504 stackpos += (int)sizeof(sljit_sw);
1505 capture_last_found = TRUE;
1506 }
1507 cc += 1 + LINK_SIZE;
1508 break;
1509
1510 case OP_CBRA:
1511 case OP_CBRAPOS:
1512 case OP_SCBRA:
1513 case OP_SCBRAPOS:
1514 if (common->capture_last_ptr != 0 && !capture_last_found)
1515 {
1516 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
1517 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1518 stackpos += (int)sizeof(sljit_sw);
1519 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1520 stackpos += (int)sizeof(sljit_sw);
1521 capture_last_found = TRUE;
1522 }
1523 offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
1524 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));
1525 stackpos += (int)sizeof(sljit_sw);
1526 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
1527 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
1528 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1529 stackpos += (int)sizeof(sljit_sw);
1530 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
1531 stackpos += (int)sizeof(sljit_sw);
1532
1533 cc += 1 + LINK_SIZE + IMM2_SIZE;
1534 break;
1535
1536 default:
1537 cc = next_opcode(common, cc);
1538 SLJIT_ASSERT(cc != NULL);
1539 break;
1540 }
1541
1542 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, 0);
1543 SLJIT_ASSERT(stackpos == STACK(stacktop));
1544 }
1545
1546 static SLJIT_INLINE int get_private_data_copy_length(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL needs_control_head)
1547 {
1548 int private_data_length = needs_control_head ? 3 : 2;
1549 int size;
1550 pcre_uchar *alternative;
1551 /* Calculate the sum of the private machine words. */
1552 while (cc < ccend)
1553 {
1554 size = 0;
1555 switch(*cc)
1556 {
1557 case OP_KET:
1558 if (PRIVATE_DATA(cc) != 0)
1559 {
1560 private_data_length++;
1561 SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0);
1562 cc += PRIVATE_DATA(cc + 1);
1563 }
1564 cc += 1 + LINK_SIZE;
1565 break;
1566
1567 case OP_ASSERT:
1568 case OP_ASSERT_NOT:
1569 case OP_ASSERTBACK:
1570 case OP_ASSERTBACK_NOT:
1571 case OP_ONCE:
1572 case OP_ONCE_NC:
1573 case OP_BRAPOS:
1574 case OP_SBRA:
1575 case OP_SBRAPOS:
1576 case OP_SCOND:
1577 private_data_length++;
1578 SLJIT_ASSERT(PRIVATE_DATA(cc) != 0);
1579 cc += 1 + LINK_SIZE;
1580 break;
1581
1582 case OP_CBRA:
1583 case OP_SCBRA:
1584 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1585 private_data_length++;
1586 cc += 1 + LINK_SIZE + IMM2_SIZE;
1587 break;
1588
1589 case OP_CBRAPOS:
1590 case OP_SCBRAPOS:
1591 private_data_length += 2;
1592 cc += 1 + LINK_SIZE + IMM2_SIZE;
1593 break;
1594
1595 case OP_COND:
1596 /* Might be a hidden SCOND. */
1597 alternative = cc + GET(cc, 1);
1598 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1599 private_data_length++;
1600 cc += 1 + LINK_SIZE;
1601 break;
1602
1603 CASE_ITERATOR_PRIVATE_DATA_1
1604 if (PRIVATE_DATA(cc))
1605 private_data_length++;
1606 cc += 2;
1607 #ifdef SUPPORT_UTF
1608 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1609 #endif
1610 break;
1611
1612 CASE_ITERATOR_PRIVATE_DATA_2A
1613 if (PRIVATE_DATA(cc))
1614 private_data_length += 2;
1615 cc += 2;
1616 #ifdef SUPPORT_UTF
1617 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1618 #endif
1619 break;
1620
1621 CASE_ITERATOR_PRIVATE_DATA_2B
1622 if (PRIVATE_DATA(cc))
1623 private_data_length += 2;
1624 cc += 2 + IMM2_SIZE;
1625 #ifdef SUPPORT_UTF
1626 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1627 #endif
1628 break;
1629
1630 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1631 if (PRIVATE_DATA(cc))
1632 private_data_length++;
1633 cc += 1;
1634 break;
1635
1636 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1637 if (PRIVATE_DATA(cc))
1638 private_data_length += 2;
1639 cc += 1;
1640 break;
1641
1642 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1643 if (PRIVATE_DATA(cc))
1644 private_data_length += 2;
1645 cc += 1 + IMM2_SIZE;
1646 break;
1647
1648 case OP_CLASS:
1649 case OP_NCLASS:
1650 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1651 case OP_XCLASS:
1652 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1653 #else
1654 size = 1 + 32 / (int)sizeof(pcre_uchar);
1655 #endif
1656 if (PRIVATE_DATA(cc))
1657 private_data_length += get_class_iterator_size(cc + size);
1658 cc += size;
1659 break;
1660
1661 default:
1662 cc = next_opcode(common, cc);
1663 SLJIT_ASSERT(cc != NULL);
1664 break;
1665 }
1666 }
1667 SLJIT_ASSERT(cc == ccend);
1668 return private_data_length;
1669 }
1670
1671 static void copy_private_data(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend,
1672 BOOL save, int stackptr, int stacktop, BOOL needs_control_head)
1673 {
1674 DEFINE_COMPILER;
1675 int srcw[2];
1676 int count, size;
1677 BOOL tmp1next = TRUE;
1678 BOOL tmp1empty = TRUE;
1679 BOOL tmp2empty = TRUE;
1680 pcre_uchar *alternative;
1681 enum {
1682 start,
1683 loop,
1684 end
1685 } status;
1686
1687 status = save ? start : loop;
1688 stackptr = STACK(stackptr - 2);
1689 stacktop = STACK(stacktop - 1);
1690
1691 if (!save)
1692 {
1693 stackptr += (needs_control_head ? 2 : 1) * sizeof(sljit_sw);
1694 if (stackptr < stacktop)
1695 {
1696 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1697 stackptr += sizeof(sljit_sw);
1698 tmp1empty = FALSE;
1699 }
1700 if (stackptr < stacktop)
1701 {
1702 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1703 stackptr += sizeof(sljit_sw);
1704 tmp2empty = FALSE;
1705 }
1706 /* The tmp1next must be TRUE in either way. */
1707 }
1708
1709 do
1710 {
1711 count = 0;
1712 switch(status)
1713 {
1714 case start:
1715 SLJIT_ASSERT(save && common->recursive_head_ptr != 0);
1716 count = 1;
1717 srcw[0] = common->recursive_head_ptr;
1718 if (needs_control_head)
1719 {
1720 SLJIT_ASSERT(common->control_head_ptr != 0);
1721 count = 2;
1722 srcw[1] = common->control_head_ptr;
1723 }
1724 status = loop;
1725 break;
1726
1727 case loop:
1728 if (cc >= ccend)
1729 {
1730 status = end;
1731 break;
1732 }
1733
1734 switch(*cc)
1735 {
1736 case OP_KET:
1737 if (PRIVATE_DATA(cc) != 0)
1738 {
1739 count = 1;
1740 srcw[0] = PRIVATE_DATA(cc);
1741 SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0);
1742 cc += PRIVATE_DATA(cc + 1);
1743 }
1744 cc += 1 + LINK_SIZE;
1745 break;
1746
1747 case OP_ASSERT:
1748 case OP_ASSERT_NOT:
1749 case OP_ASSERTBACK:
1750 case OP_ASSERTBACK_NOT:
1751 case OP_ONCE:
1752 case OP_ONCE_NC:
1753 case OP_BRAPOS:
1754 case OP_SBRA:
1755 case OP_SBRAPOS:
1756 case OP_SCOND:
1757 count = 1;
1758 srcw[0] = PRIVATE_DATA(cc);
1759 SLJIT_ASSERT(srcw[0] != 0);
1760 cc += 1 + LINK_SIZE;
1761 break;
1762
1763 case OP_CBRA:
1764 case OP_SCBRA:
1765 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1766 {
1767 count = 1;
1768 srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1769 }
1770 cc += 1 + LINK_SIZE + IMM2_SIZE;
1771 break;
1772
1773 case OP_CBRAPOS:
1774 case OP_SCBRAPOS:
1775 count = 2;
1776 srcw[0] = PRIVATE_DATA(cc);
1777 srcw[1] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1778 SLJIT_ASSERT(srcw[0] != 0 && srcw[1] != 0);
1779 cc += 1 + LINK_SIZE + IMM2_SIZE;
1780 break;
1781
1782 case OP_COND:
1783 /* Might be a hidden SCOND. */
1784 alternative = cc + GET(cc, 1);
1785 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1786 {
1787 count = 1;
1788 srcw[0] = PRIVATE_DATA(cc);
1789 SLJIT_ASSERT(srcw[0] != 0);
1790 }
1791 cc += 1 + LINK_SIZE;
1792 break;
1793
1794 CASE_ITERATOR_PRIVATE_DATA_1
1795 if (PRIVATE_DATA(cc))
1796 {
1797 count = 1;
1798 srcw[0] = PRIVATE_DATA(cc);
1799 }
1800 cc += 2;
1801 #ifdef SUPPORT_UTF
1802 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1803 #endif
1804 break;
1805
1806 CASE_ITERATOR_PRIVATE_DATA_2A
1807 if (PRIVATE_DATA(cc))
1808 {
1809 count = 2;
1810 srcw[0] = PRIVATE_DATA(cc);
1811 srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
1812 }
1813 cc += 2;
1814 #ifdef SUPPORT_UTF
1815 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1816 #endif
1817 break;
1818
1819 CASE_ITERATOR_PRIVATE_DATA_2B
1820 if (PRIVATE_DATA(cc))
1821 {
1822 count = 2;
1823 srcw[0] = PRIVATE_DATA(cc);
1824 srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
1825 }
1826 cc += 2 + IMM2_SIZE;
1827 #ifdef SUPPORT_UTF
1828 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1829 #endif
1830 break;
1831
1832 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1833 if (PRIVATE_DATA(cc))
1834 {
1835 count = 1;
1836 srcw[0] = PRIVATE_DATA(cc);
1837 }
1838 cc += 1;
1839 break;
1840
1841 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1842 if (PRIVATE_DATA(cc))
1843 {
1844 count = 2;
1845 srcw[0] = PRIVATE_DATA(cc);
1846 srcw[1] = srcw[0] + sizeof(sljit_sw);
1847 }
1848 cc += 1;
1849 break;
1850
1851 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1852 if (PRIVATE_DATA(cc))
1853 {
1854 count = 2;
1855 srcw[0] = PRIVATE_DATA(cc);
1856 srcw[1] = srcw[0] + sizeof(sljit_sw);
1857 }
1858 cc += 1 + IMM2_SIZE;
1859 break;
1860
1861 case OP_CLASS:
1862 case OP_NCLASS:
1863 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1864 case OP_XCLASS:
1865 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1866 #else
1867 size = 1 + 32 / (int)sizeof(pcre_uchar);
1868 #endif
1869 if (PRIVATE_DATA(cc))
1870 switch(get_class_iterator_size(cc + size))
1871 {
1872 case 1:
1873 count = 1;
1874 srcw[0] = PRIVATE_DATA(cc);
1875 break;
1876
1877 case 2:
1878 count = 2;
1879 srcw[0] = PRIVATE_DATA(cc);
1880 srcw[1] = srcw[0] + sizeof(sljit_sw);
1881 break;
1882
1883 default:
1884 SLJIT_ASSERT_STOP();
1885 break;
1886 }
1887 cc += size;
1888 break;
1889
1890 default:
1891 cc = next_opcode(common, cc);
1892 SLJIT_ASSERT(cc != NULL);
1893 break;
1894 }
1895 break;
1896
1897 case end:
1898 SLJIT_ASSERT_STOP();
1899 break;
1900 }
1901
1902 while (count > 0)
1903 {
1904 count--;
1905 if (save)
1906 {
1907 if (tmp1next)
1908 {
1909 if (!tmp1empty)
1910 {
1911 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1912 stackptr += sizeof(sljit_sw);
1913 }
1914 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), srcw[count]);
1915 tmp1empty = FALSE;
1916 tmp1next = FALSE;
1917 }
1918 else
1919 {
1920 if (!tmp2empty)
1921 {
1922 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1923 stackptr += sizeof(sljit_sw);
1924 }
1925 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), srcw[count]);
1926 tmp2empty = FALSE;
1927 tmp1next = TRUE;
1928 }
1929 }
1930 else
1931 {
1932 if (tmp1next)
1933 {
1934 SLJIT_ASSERT(!tmp1empty);
1935 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), srcw[count], TMP1, 0);
1936 tmp1empty = stackptr >= stacktop;
1937 if (!tmp1empty)
1938 {
1939 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1940 stackptr += sizeof(sljit_sw);
1941 }
1942 tmp1next = FALSE;
1943 }
1944 else
1945 {
1946 SLJIT_ASSERT(!tmp2empty);
1947 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), srcw[count], TMP2, 0);
1948 tmp2empty = stackptr >= stacktop;
1949 if (!tmp2empty)
1950 {
1951 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1952 stackptr += sizeof(sljit_sw);
1953 }
1954 tmp1next = TRUE;
1955 }
1956 }
1957 }
1958 }
1959 while (status != end);
1960
1961 if (save)
1962 {
1963 if (tmp1next)
1964 {
1965 if (!tmp1empty)
1966 {
1967 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1968 stackptr += sizeof(sljit_sw);
1969 }
1970 if (!tmp2empty)
1971 {
1972 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1973 stackptr += sizeof(sljit_sw);
1974 }
1975 }
1976 else
1977 {
1978 if (!tmp2empty)
1979 {
1980 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1981 stackptr += sizeof(sljit_sw);
1982 }
1983 if (!tmp1empty)
1984 {
1985 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1986 stackptr += sizeof(sljit_sw);
1987 }
1988 }
1989 }
1990 SLJIT_ASSERT(cc == ccend && stackptr == stacktop && (save || (tmp1empty && tmp2empty)));
1991 }
1992
1993 static SLJIT_INLINE pcre_uchar *set_then_offsets(compiler_common *common, pcre_uchar *cc, pcre_uint8 *current_offset)
1994 {
1995 pcre_uchar *end = bracketend(cc);
1996 BOOL has_alternatives = cc[GET(cc, 1)] == OP_ALT;
1997
1998 /* Assert captures then. */
1999 if (*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT)
2000 current_offset = NULL;
2001 /* Conditional block does not. */
2002 if (*cc == OP_COND || *cc == OP_SCOND)
2003 has_alternatives = FALSE;
2004
2005 cc = next_opcode(common, cc);
2006 if (has_alternatives)
2007 current_offset = common->then_offsets + (cc - common->start);
2008
2009 while (cc < end)
2010 {
2011 if ((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND))
2012 cc = set_then_offsets(common, cc, current_offset);
2013 else
2014 {
2015 if (*cc == OP_ALT && has_alternatives)
2016 current_offset = common->then_offsets + (cc + 1 + LINK_SIZE - common->start);
2017 if (*cc >= OP_THEN && *cc <= OP_THEN_ARG && current_offset != NULL)
2018 *current_offset = 1;
2019 cc = next_opcode(common, cc);
2020 }
2021 }
2022
2023 return end;
2024 }
2025
2026 #undef CASE_ITERATOR_PRIVATE_DATA_1
2027 #undef CASE_ITERATOR_PRIVATE_DATA_2A
2028 #undef CASE_ITERATOR_PRIVATE_DATA_2B
2029 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_1
2030 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
2031 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
2032
2033 static SLJIT_INLINE BOOL is_powerof2(unsigned int value)
2034 {
2035 return (value & (value - 1)) == 0;
2036 }
2037
2038 static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label)
2039 {
2040 while (list)
2041 {
2042 /* sljit_set_label is clever enough to do nothing
2043 if either the jump or the label is NULL. */
2044 SET_LABEL(list->jump, label);
2045 list = list->next;
2046 }
2047 }
2048
2049 static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump *jump)
2050 {
2051 jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list));
2052 if (list_item)
2053 {
2054 list_item->next = *list;
2055 list_item->jump = jump;
2056 *list = list_item;
2057 }
2058 }
2059
2060 static void add_stub(compiler_common *common, struct sljit_jump *start)
2061 {
2062 DEFINE_COMPILER;
2063 stub_list *list_item = sljit_alloc_memory(compiler, sizeof(stub_list));
2064
2065 if (list_item)
2066 {
2067 list_item->start = start;
2068 list_item->quit = LABEL();
2069 list_item->next = common->stubs;
2070 common->stubs = list_item;
2071 }
2072 }
2073
2074 static void flush_stubs(compiler_common *common)
2075 {
2076 DEFINE_COMPILER;
2077 stub_list *list_item = common->stubs;
2078
2079 while (list_item)
2080 {
2081 JUMPHERE(list_item->start);
2082 add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));
2083 JUMPTO(SLJIT_JUMP, list_item->quit);
2084 list_item = list_item->next;
2085 }
2086 common->stubs = NULL;
2087 }
2088
2089 static void add_label_addr(compiler_common *common, sljit_uw *update_addr)
2090 {
2091 DEFINE_COMPILER;
2092 label_addr_list *label_addr;
2093
2094 label_addr = sljit_alloc_memory(compiler, sizeof(label_addr_list));
2095 if (label_addr == NULL)
2096 return;
2097 label_addr->label = LABEL();
2098 label_addr->update_addr = update_addr;
2099 label_addr->next = common->label_addrs;
2100 common->label_addrs = label_addr;
2101 }
2102
2103 static SLJIT_INLINE void count_match(compiler_common *common)
2104 {
2105 DEFINE_COMPILER;
2106
2107 OP2(SLJIT_SUB | SLJIT_SET_E, COUNT_MATCH, 0, COUNT_MATCH, 0, SLJIT_IMM, 1);
2108 add_jump(compiler, &common->calllimit, JUMP(SLJIT_ZERO));
2109 }
2110
2111 static SLJIT_INLINE void allocate_stack(compiler_common *common, int size)
2112 {
2113 /* May destroy all locals and registers except TMP2. */
2114 DEFINE_COMPILER;
2115
2116 SLJIT_ASSERT(size > 0);
2117 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
2118 #ifdef DESTROY_REGISTERS
2119 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);
2120 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
2121 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
2122 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, TMP1, 0);
2123 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP1, 0);
2124 #endif
2125 add_stub(common, CMP(SLJIT_GREATER, STACK_TOP, 0, STACK_LIMIT, 0));
2126 }
2127
2128 static SLJIT_INLINE void free_stack(compiler_common *common, int size)
2129 {
2130 DEFINE_COMPILER;
2131
2132 SLJIT_ASSERT(size > 0);
2133 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
2134 }
2135
2136 static sljit_uw * allocate_read_only_data(compiler_common *common, sljit_uw size)
2137 {
2138 DEFINE_COMPILER;
2139 sljit_uw *result;
2140
2141 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
2142 return NULL;
2143
2144 result = (sljit_uw *)SLJIT_MALLOC(size + sizeof(sljit_uw), compiler->allocator_data);
2145 if (SLJIT_UNLIKELY(result == NULL))
2146 {
2147 sljit_set_compiler_memory_error(compiler);
2148 return NULL;
2149 }
2150
2151 *(void**)result = common->read_only_data_head;
2152 common->read_only_data_head = (void *)result;
2153 return result + 1;
2154 }
2155
2156 static void free_read_only_data(void *current, void *allocator_data)
2157 {
2158 void *next;
2159
2160 SLJIT_UNUSED_ARG(allocator_data);
2161
2162 while (current != NULL)
2163 {
2164 next = *(void**)current;
2165 SLJIT_FREE(current, allocator_data);
2166 current = next;
2167 }
2168 }
2169
2170 static SLJIT_INLINE void reset_ovector(compiler_common *common, int length)
2171 {
2172 DEFINE_COMPILER;
2173 struct sljit_label *loop;
2174 int i;
2175
2176 /* At this point we can freely use all temporary registers. */
2177 SLJIT_ASSERT(length > 1);
2178 /* TMP1 returns with begin - 1. */
2179 OP2(SLJIT_SUB, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_S0), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));
2180 if (length < 8)
2181 {
2182 for (i = 1; i < length; i++)
2183 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), SLJIT_R0, 0);
2184 }
2185 else
2186 {
2187 GET_LOCAL_BASE(SLJIT_R1, 0, OVECTOR_START);
2188 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, length - 1);
2189 loop = LABEL();
2190 OP1(SLJIT_MOVU, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw), SLJIT_R0, 0);
2191 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, 1);
2192 JUMPTO(SLJIT_NOT_ZERO, loop);
2193 }
2194 }
2195
2196 static SLJIT_INLINE void do_reset_match(compiler_common *common, int length)
2197 {
2198 DEFINE_COMPILER;
2199 struct sljit_label *loop;
2200 int i;
2201
2202 SLJIT_ASSERT(length > 1);
2203 /* OVECTOR(1) contains the "string begin - 1" constant. */
2204 if (length > 2)
2205 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
2206 if (length < 8)
2207 {
2208 for (i = 2; i < length; i++)
2209 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), TMP1, 0);
2210 }
2211 else
2212 {
2213 GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + sizeof(sljit_sw));
2214 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
2215 loop = LABEL();
2216 OP1(SLJIT_MOVU, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
2217 OP2(SLJIT_SUB | SLJIT_SET_E, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
2218 JUMPTO(SLJIT_NOT_ZERO, loop);
2219 }
2220
2221 OP1(SLJIT_MOV, STACK_TOP, 0, ARGUMENTS, 0);
2222 if (common->mark_ptr != 0)
2223 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, SLJIT_IMM, 0);
2224 if (common->control_head_ptr != 0)
2225 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
2226 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(jit_arguments, stack));
2227 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
2228 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(struct sljit_stack, base));
2229 }
2230
2231 static sljit_sw SLJIT_CALL do_search_mark(sljit_sw *current, const pcre_uchar *skip_arg)
2232 {
2233 while (current != NULL)
2234 {
2235 switch (current[-2])
2236 {
2237 case type_then_trap:
2238 break;
2239
2240 case type_mark:
2241 if (STRCMP_UC_UC(skip_arg, (pcre_uchar *)current[-3]) == 0)
2242 return current[-4];
2243 break;
2244
2245 default:
2246 SLJIT_ASSERT_STOP();
2247 break;
2248 }
2249 SLJIT_ASSERT(current > (sljit_sw*)current[-1]);
2250 current = (sljit_sw*)current[-1];
2251 }
2252 return -1;
2253 }
2254
2255 static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)
2256 {
2257 DEFINE_COMPILER;
2258 struct sljit_label *loop;
2259 struct sljit_jump *early_quit;
2260
2261 /* At this point we can freely use all registers. */
2262 OP1(SLJIT_MOV, SLJIT_S2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
2263 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(1), STR_PTR, 0);
2264
2265 OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0);
2266 if (common->mark_ptr != 0)
2267 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
2268 OP1(SLJIT_MOV_SI, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, offset_count));
2269 if (common->mark_ptr != 0)
2270 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_R2, 0);
2271 OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, offsets), SLJIT_IMM, sizeof(int));
2272 OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, begin));
2273 GET_LOCAL_BASE(SLJIT_S0, 0, OVECTOR_START);
2274 /* Unlikely, but possible */
2275 early_quit = CMP(SLJIT_EQUAL, SLJIT_R1, 0, SLJIT_IMM, 0);
2276 loop = LABEL();
2277 OP2(SLJIT_SUB, SLJIT_S1, 0, SLJIT_MEM1(SLJIT_S0), 0, SLJIT_R0, 0);
2278 OP2(SLJIT_ADD, SLJIT_S0, 0, SLJIT_S0, 0, SLJIT_IMM, sizeof(sljit_sw));
2279 /* Copy the integer value to the output buffer */
2280 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2281 OP2(SLJIT_ASHR, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_IMM, UCHAR_SHIFT);
2282 #endif
2283 OP1(SLJIT_MOVU_SI, SLJIT_MEM1(SLJIT_R2), sizeof(int), SLJIT_S1, 0);
2284 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
2285 JUMPTO(SLJIT_NOT_ZERO, loop);
2286 JUMPHERE(early_quit);
2287
2288 /* Calculate the return value, which is the maximum ovector value. */
2289 if (topbracket > 1)
2290 {
2291 GET_LOCAL_BASE(SLJIT_R0, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_sw));
2292 OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, topbracket + 1);
2293
2294 /* OVECTOR(0) is never equal to SLJIT_S2. */
2295 loop = LABEL();
2296 OP1(SLJIT_MOVU, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), -(2 * (sljit_sw)sizeof(sljit_sw)));
2297 OP2(SLJIT_SUB, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
2298 CMPTO(SLJIT_EQUAL, SLJIT_R2, 0, SLJIT_S2, 0, loop);
2299 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_R1, 0);
2300 }
2301 else
2302 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
2303 }
2304
2305 static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *quit)
2306 {
2307 DEFINE_COMPILER;
2308 struct sljit_jump *jump;
2309
2310 SLJIT_COMPILE_ASSERT(STR_END == SLJIT_S1, str_end_must_be_saved_reg2);
2311 SLJIT_ASSERT(common->start_used_ptr != 0 && common->start_ptr != 0
2312 && (common->mode == JIT_PARTIAL_SOFT_COMPILE ? common->hit_start != 0 : common->hit_start == 0));
2313
2314 OP1(SLJIT_MOV, SLJIT_R1, 0, ARGUMENTS, 0);
2315 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_PARTIAL);
2316 OP1(SLJIT_MOV_SI, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, real_offset_count));
2317 CMPTO(SLJIT_SIG_LESS, SLJIT_R2, 0, SLJIT_IMM, 2, quit);
2318
2319 /* Store match begin and end. */
2320 OP1(SLJIT_MOV, SLJIT_S0, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, begin));
2321 OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, offsets));
2322
2323 jump = CMP(SLJIT_SIG_LESS, SLJIT_R2, 0, SLJIT_IMM, 3);
2324 OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_ptr : (common->hit_start + (int)sizeof(sljit_sw)), SLJIT_S0, 0);
2325 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2326 OP2(SLJIT_ASHR, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, UCHAR_SHIFT);
2327 #endif
2328 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_R1), 2 * sizeof(int), SLJIT_R2, 0);
2329 JUMPHERE(jump);
2330
2331 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_used_ptr : common->hit_start);
2332 OP2(SLJIT_SUB, SLJIT_S1, 0, STR_END, 0, SLJIT_S0, 0);
2333 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2334 OP2(SLJIT_ASHR, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_IMM, UCHAR_SHIFT);
2335 #endif
2336 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_R1), sizeof(int), SLJIT_S1, 0);
2337
2338 OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_S0, 0);
2339 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2340 OP2(SLJIT_ASHR, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, UCHAR_SHIFT);
2341 #endif
2342 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_R1), 0, SLJIT_R2, 0);
2343
2344 JUMPTO(SLJIT_JUMP, quit);
2345 }
2346
2347 static SLJIT_INLINE void check_start_used_ptr(compiler_common *common)
2348 {
2349 /* May destroy TMP1. */
2350 DEFINE_COMPILER;
2351 struct sljit_jump *jump;
2352
2353 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2354 {
2355 /* The value of -1 must be kept for start_used_ptr! */
2356 OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, 1);
2357 /* Jumps if start_used_ptr < STR_PTR, or start_used_ptr == -1. Although overwriting
2358 is not necessary if start_used_ptr == STR_PTR, it does not hurt as well. */
2359 jump = CMP(SLJIT_LESS_EQUAL, TMP1, 0, STR_PTR, 0);
2360 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2361 JUMPHERE(jump);
2362 }
2363 else if (common->mode == JIT_PARTIAL_HARD_COMPILE)
2364 {
2365 jump = CMP(SLJIT_LESS_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2366 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2367 JUMPHERE(jump);
2368 }
2369 }
2370
2371 static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, pcre_uchar *cc)
2372 {
2373 /* Detects if the character has an othercase. */
2374 unsigned int c;
2375
2376 #ifdef SUPPORT_UTF
2377 if (common->utf)
2378 {
2379 GETCHAR(c, cc);
2380 if (c > 127)
2381 {
2382 #ifdef SUPPORT_UCP
2383 return c != UCD_OTHERCASE(c);
2384 #else
2385 return FALSE;
2386 #endif
2387 }
2388 #ifndef COMPILE_PCRE8
2389 return common->fcc[c] != c;
2390 #endif
2391 }
2392 else
2393 #endif
2394 c = *cc;
2395 return MAX_255(c) ? common->fcc[c] != c : FALSE;
2396 }
2397
2398 static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)
2399 {
2400 /* Returns with the othercase. */
2401 #ifdef SUPPORT_UTF
2402 if (common->utf && c > 127)
2403 {
2404 #ifdef SUPPORT_UCP
2405 return UCD_OTHERCASE(c);
2406 #else
2407 return c;
2408 #endif
2409 }
2410 #endif
2411 return TABLE_GET(c, common->fcc, c);
2412 }
2413
2414 static unsigned int char_get_othercase_bit(compiler_common *common, pcre_uchar *cc)
2415 {
2416 /* Detects if the character and its othercase has only 1 bit difference. */
2417 unsigned int c, oc, bit;
2418 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2419 int n;
2420 #endif
2421
2422 #ifdef SUPPORT_UTF
2423 if (common->utf)
2424 {
2425 GETCHAR(c, cc);
2426 if (c <= 127)
2427 oc = common->fcc[c];
2428 else
2429 {
2430 #ifdef SUPPORT_UCP
2431 oc = UCD_OTHERCASE(c);
2432 #else
2433 oc = c;
2434 #endif
2435 }
2436 }
2437 else
2438 {
2439 c = *cc;
2440 oc = TABLE_GET(c, common->fcc, c);
2441 }
2442 #else
2443 c = *cc;
2444 oc = TABLE_GET(c, common->fcc, c);
2445 #endif
2446
2447 SLJIT_ASSERT(c != oc);
2448
2449 bit = c ^ oc;
2450 /* Optimized for English alphabet. */
2451 if (c <= 127 && bit == 0x20)
2452 return (0 << 8) | 0x20;
2453
2454 /* Since c != oc, they must have at least 1 bit difference. */
2455 if (!is_powerof2(bit))
2456 return 0;
2457
2458 #if defined COMPILE_PCRE8
2459
2460 #ifdef SUPPORT_UTF
2461 if (common->utf && c > 127)
2462 {
2463 n = GET_EXTRALEN(*cc);
2464 while ((bit & 0x3f) == 0)
2465 {
2466 n--;
2467 bit >>= 6;
2468 }
2469 return (n << 8) | bit;
2470 }
2471 #endif /* SUPPORT_UTF */
2472 return (0 << 8) | bit;
2473
2474 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2475
2476 #ifdef SUPPORT_UTF
2477 if (common->utf && c > 65535)
2478 {
2479 if (bit >= (1 << 10))
2480 bit >>= 10;
2481 else
2482 return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8));
2483 }
2484 #endif /* SUPPORT_UTF */
2485 return (bit < 256) ? ((0 << 8) | bit) : ((1 << 8) | (bit >> 8));
2486
2487 #endif /* COMPILE_PCRE[8|16|32] */
2488 }
2489
2490 static void check_partial(compiler_common *common, BOOL force)
2491 {
2492 /* Checks whether a partial matching is occurred. Does not modify registers. */
2493 DEFINE_COMPILER;
2494 struct sljit_jump *jump = NULL;
2495
2496 SLJIT_ASSERT(!force || common->mode != JIT_COMPILE);
2497
2498 if (common->mode == JIT_COMPILE)
2499 return;
2500
2501 if (!force)
2502 jump = CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2503 else if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2504 jump = CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1);
2505
2506 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2507 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
2508 else
2509 {
2510 if (common->partialmatchlabel != NULL)
2511 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2512 else
2513 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2514 }
2515
2516 if (jump != NULL)
2517 JUMPHERE(jump);
2518 }
2519
2520 static void check_str_end(compiler_common *common, jump_list **end_reached)
2521 {
2522 /* Does not affect registers. Usually used in a tight spot. */
2523 DEFINE_COMPILER;
2524 struct sljit_jump *jump;
2525
2526 if (common->mode == JIT_COMPILE)
2527 {
2528 add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2529 return;
2530 }
2531
2532 jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
2533 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2534 {
2535 add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
2536 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
2537 add_jump(compiler, end_reached, JUMP(SLJIT_JUMP));
2538 }
2539 else
2540 {
2541 add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
2542 if (common->partialmatchlabel != NULL)
2543 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2544 else
2545 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2546 }
2547 JUMPHERE(jump);
2548 }
2549
2550 static void detect_partial_match(compiler_common *common, jump_list **backtracks)
2551 {
2552 DEFINE_COMPILER;
2553 struct sljit_jump *jump;
2554
2555 if (common->mode == JIT_COMPILE)
2556 {
2557 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2558 return;
2559 }
2560
2561 /* Partial matching mode. */
2562 jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
2563 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
2564 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2565 {
2566 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
2567 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
2568 }
2569 else
2570 {
2571 if (common->partialmatchlabel != NULL)
2572 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2573 else
2574 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2575 }
2576 JUMPHERE(jump);
2577 }
2578
2579 static void peek_char(compiler_common *common, sljit_ui max)
2580 {
2581 /* Reads the character into TMP1, keeps STR_PTR.
2582 Does not check STR_END. TMP2 Destroyed. */
2583 DEFINE_COMPILER;
2584 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2585 struct sljit_jump *jump;
2586 #endif
2587
2588 SLJIT_UNUSED_ARG(max);
2589
2590 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2591 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2592 if (common->utf)
2593 {
2594 if (max < 128) return;
2595
2596 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2597 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2598 add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2599 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2600 JUMPHERE(jump);
2601 }
2602 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2603
2604 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2605 if (common->utf)
2606 {
2607 if (max < 0xd800) return;
2608
2609 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2610 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2611 /* TMP2 contains the high surrogate. */
2612 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2613 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x40);
2614 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
2615 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
2616 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2617 JUMPHERE(jump);
2618 }
2619 #endif
2620 }
2621
2622 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2623
2624 static BOOL is_char7_bitset(const sljit_ub *bitset, BOOL nclass)
2625 {
2626 /* Tells whether the character codes below 128 are enough
2627 to determine a match. */
2628 const sljit_ub value = nclass ? 0xff : 0;
2629 const sljit_ub *end = bitset + 32;
2630
2631 bitset += 16;
2632 do
2633 {
2634 if (*bitset++ != value)
2635 return FALSE;
2636 }
2637 while (bitset < end);
2638 return TRUE;
2639 }
2640
2641 static void read_char7_type(compiler_common *common, BOOL full_read)
2642 {
2643 /* Reads the precise character type of a character into TMP1, if the character
2644 is less than 128. Otherwise it returns with zero. Does not check STR_END. The
2645 full_read argument tells whether characters above max are accepted or not. */
2646 DEFINE_COMPILER;
2647 struct sljit_jump *jump;
2648
2649 SLJIT_ASSERT(common->utf);
2650
2651 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2652 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2653
2654 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2655
2656 if (full_read)
2657 {
2658 jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
2659 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2660 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2661 JUMPHERE(jump);
2662 }
2663 }
2664
2665 #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
2666
2667 static void read_char_range(compiler_common *common, sljit_ui min, sljit_ui max, BOOL update_str_ptr)
2668 {
2669 /* Reads the precise value of a character into TMP1, if the character is
2670 between min and max (c >= min && c <= max). Otherwise it returns with a value
2671 outside the range. Does not check STR_END. */
2672 DEFINE_COMPILER;
2673 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2674 struct sljit_jump *jump;
2675 #endif
2676 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2677 struct sljit_jump *jump2;
2678 #endif
2679
2680 SLJIT_UNUSED_ARG(update_str_ptr);
2681 SLJIT_UNUSED_ARG(min);
2682 SLJIT_UNUSED_ARG(max);
2683 SLJIT_ASSERT(min <= max);
2684
2685 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2686 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2687
2688 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2689 if (common->utf)
2690 {
2691 if (max < 128 && !update_str_ptr) return;
2692
2693 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2694 if (min >= 0x10000)
2695 {
2696 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xf0);
2697 if (update_str_ptr)
2698 OP1(SLJIT_MOV_UB, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2699 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2700 jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x7);
2701 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2702 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2703 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2704 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2705 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2706 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2707 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2708 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2709 if (!update_str_ptr)
2710 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
2711 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2712 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2713 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2714 JUMPHERE(jump2);
2715 if (update_str_ptr)
2716 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
2717 }
2718 else if (min >= 0x800 && max <= 0xffff)
2719 {
2720 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xe0);
2721 if (update_str_ptr)
2722 OP1(SLJIT_MOV_UB, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2723 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2724 jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xf);
2725 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2726 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2727 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2728 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2729 if (!update_str_ptr)
2730 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
2731 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2732 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2733 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2734 JUMPHERE(jump2);
2735 if (update_str_ptr)
2736 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
2737 }
2738 else if (max >= 0x800)
2739 add_jump(compiler, (max < 0x10000) ? &common->utfreadchar16 : &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2740 else if (max < 128)
2741 {
2742 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2743 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2744 }
2745 else
2746 {
2747 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2748 if (!update_str_ptr)
2749 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2750 else
2751 OP1(SLJIT_MOV_UB, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2752 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2753 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2754 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2755 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2756 if (update_str_ptr)
2757 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
2758 }
2759 JUMPHERE(jump);
2760 }
2761 #endif
2762
2763 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2764 if (common->utf)
2765 {
2766 if (max >= 0x10000)
2767 {
2768 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2769 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2770 /* TMP2 contains the high surrogate. */
2771 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2772 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x40);
2773 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
2774 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2775 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
2776 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2777 JUMPHERE(jump);
2778 return;
2779 }
2780
2781 if (max < 0xd800 && !update_str_ptr) return;
2782
2783 /* Skip low surrogate if necessary. */
2784 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2785 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2786 if (update_str_ptr)
2787 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2788 if (max >= 0xd800)
2789 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x10000);
2790 JUMPHERE(jump);
2791 }
2792 #endif
2793 }
2794
2795 static SLJIT_INLINE void read_char(compiler_common *common)
2796 {
2797 read_char_range(common, 0, READ_CHAR_MAX, TRUE);
2798 }
2799
2800 static void read_char8_type(compiler_common *common, BOOL update_str_ptr)
2801 {
2802 /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */
2803 DEFINE_COMPILER;
2804 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
2805 struct sljit_jump *jump;
2806 #endif
2807 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2808 struct sljit_jump *jump2;
2809 #endif
2810
2811 SLJIT_UNUSED_ARG(update_str_ptr);
2812
2813 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2814 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2815
2816 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2817 if (common->utf)
2818 {
2819 /* This can be an extra read in some situations, but hopefully
2820 it is needed in most cases. */
2821 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2822 jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
2823 if (!update_str_ptr)
2824 {
2825 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2826 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2827 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2828 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2829 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2830 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
2831 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2832 jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
2833 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2834 JUMPHERE(jump2);
2835 }
2836 else
2837 add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));
2838 JUMPHERE(jump);
2839 return;
2840 }
2841 #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
2842
2843 #if !defined COMPILE_PCRE8
2844 /* The ctypes array contains only 256 values. */
2845 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2846 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
2847 #endif
2848 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2849 #if !defined COMPILE_PCRE8
2850 JUMPHERE(jump);
2851 #endif
2852
2853 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2854 if (common->utf && update_str_ptr)
2855 {
2856 /* Skip low surrogate if necessary. */
2857 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);
2858 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2859 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2860 JUMPHERE(jump);
2861 }
2862 #endif /* SUPPORT_UTF && COMPILE_PCRE16 */
2863 }
2864
2865 static void skip_char_back(compiler_common *common)
2866 {
2867 /* Goes one character back. Affects STR_PTR and TMP1. Does not check begin. */
2868 DEFINE_COMPILER;
2869 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2870 #if defined COMPILE_PCRE8
2871 struct sljit_label *label;
2872
2873 if (common->utf)
2874 {
2875 label = LABEL();
2876 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2877 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2878 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
2879 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);
2880 return;
2881 }
2882 #elif defined COMPILE_PCRE16
2883 if (common->utf)
2884 {
2885 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2886 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2887 /* Skip low surrogate if necessary. */
2888 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2889 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
2890 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
2891 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2892 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2893 return;
2894 }
2895 #endif /* COMPILE_PCRE[8|16] */
2896 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2897 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2898 }
2899
2900 static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpifmatch)
2901 {
2902 /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */
2903 DEFINE_COMPILER;
2904 struct sljit_jump *jump;
2905
2906 if (nltype == NLTYPE_ANY)
2907 {
2908 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
2909 add_jump(compiler, backtracks, JUMP(jumpifmatch ? SLJIT_NOT_ZERO : SLJIT_ZERO));
2910 }
2911 else if (nltype == NLTYPE_ANYCRLF)
2912 {
2913 if (jumpifmatch)
2914 {
2915 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR));
2916 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
2917 }
2918 else
2919 {
2920 jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
2921 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
2922 JUMPHERE(jump);
2923 }
2924 }
2925 else
2926 {
2927 SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);
2928 add_jump(compiler, backtracks, CMP(jumpifmatch ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
2929 }
2930 }
2931
2932 #ifdef SUPPORT_UTF
2933
2934 #if defined COMPILE_PCRE8
2935 static void do_utfreadchar(compiler_common *common)
2936 {
2937 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
2938 of the character (>= 0xc0). Return char value in TMP1, length in TMP2. */
2939 DEFINE_COMPILER;
2940 struct sljit_jump *jump;
2941
2942 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2943 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2944 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2945 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2946 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2947 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2948
2949 /* Searching for the first zero. */
2950 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
2951 jump = JUMP(SLJIT_NOT_ZERO);
2952 /* Two byte sequence. */
2953 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2954 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(2));
2955 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2956
2957 JUMPHERE(jump);
2958 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2959 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x800);
2960 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2961 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2962 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2963
2964 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10000);
2965 jump = JUMP(SLJIT_NOT_ZERO);
2966 /* Three byte sequence. */
2967 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
2968 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));
2969 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2970
2971 /* Four byte sequence. */
2972 JUMPHERE(jump);
2973 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2974 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
2975 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2976 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
2977 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2978 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2979 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(4));
2980 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2981 }
2982
2983 static void do_utfreadchar16(compiler_common *common)
2984 {
2985 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
2986 of the character (>= 0xc0). Return value in TMP1. */
2987 DEFINE_COMPILER;
2988 struct sljit_jump *jump;
2989
2990 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2991 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2992 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2993 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2994 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2995 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2996
2997 /* Searching for the first zero. */
2998 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
2999 jump = JUMP(SLJIT_NOT_ZERO);
3000 /* Two byte sequence. */
3001 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3002 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3003
3004 JUMPHERE(jump);
3005 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x400);
3006 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_NOT_ZERO);
3007 /* This code runs only in 8 bit mode. No need to shift the value. */
3008 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3009 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3010 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x800);
3011 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3012 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3013 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3014 /* Three byte sequence. */
3015 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
3016 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3017 }
3018
3019 static void do_utfreadtype8(compiler_common *common)
3020 {
3021 /* Fast decoding a UTF-8 character type. TMP2 contains the first byte
3022 of the character (>= 0xc0). Return value in TMP1. */
3023 DEFINE_COMPILER;
3024 struct sljit_jump *jump;
3025 struct sljit_jump *compare;
3026
3027 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3028
3029 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20);
3030 jump = JUMP(SLJIT_NOT_ZERO);
3031 /* Two byte sequence. */
3032 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3033 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3034 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
3035 /* The upper 5 bits are known at this point. */
3036 compare = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x3);
3037 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
3038 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3039 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
3040 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
3041 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3042
3043 JUMPHERE(compare);
3044 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
3045 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3046
3047 /* We only have types for characters less than 256. */
3048 JUMPHERE(jump);
3049 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3050 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
3051 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3052 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3053 }
3054
3055 #endif /* COMPILE_PCRE8 */
3056
3057 #endif /* SUPPORT_UTF */
3058
3059 #ifdef SUPPORT_UCP
3060
3061 /* UCD_BLOCK_SIZE must be 128 (see the assert below). */
3062 #define UCD_BLOCK_MASK 127
3063 #define UCD_BLOCK_SHIFT 7
3064
3065 static void do_getucd(compiler_common *common)
3066 {
3067 /* Search the UCD record for the character comes in TMP1.
3068 Returns chartype in TMP1 and UCD offset in TMP2. */
3069 DEFINE_COMPILER;
3070
3071 SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 8);
3072
3073 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3074 OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
3075 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
3076 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
3077 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
3078 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
3079 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
3080 OP1(SLJIT_MOV_UH, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
3081 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
3082 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
3083 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3084 }
3085 #endif
3086
3087 static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common, BOOL hascrorlf, BOOL firstline)
3088 {
3089 DEFINE_COMPILER;
3090 struct sljit_label *mainloop;
3091 struct sljit_label *newlinelabel = NULL;
3092 struct sljit_jump *start;
3093 struct sljit_jump *end = NULL;
3094 struct sljit_jump *nl = NULL;
3095 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3096 struct sljit_jump *singlechar;
3097 #endif
3098 jump_list *newline = NULL;
3099 BOOL newlinecheck = FALSE;
3100 BOOL readuchar = FALSE;
3101
3102 if (!(hascrorlf || firstline) && (common->nltype == NLTYPE_ANY ||
3103 common->nltype == NLTYPE_ANYCRLF || common->newline > 255))
3104 newlinecheck = TRUE;
3105
3106 if (firstline)
3107 {
3108 /* Search for the end of the first line. */
3109 SLJIT_ASSERT(common->first_line_end != 0);
3110 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
3111
3112 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3113 {
3114 mainloop = LABEL();
3115 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3116 end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3117 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
3118 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3119 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);
3120 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);
3121 JUMPHERE(end);
3122 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_SP), common->first_line_end, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3123 }
3124 else
3125 {
3126 end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3127 mainloop = LABEL();
3128 /* Continual stores does not cause data dependency. */
3129 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->first_line_end, STR_PTR, 0);
3130 read_char_range(common, common->nlmin, common->nlmax, TRUE);
3131 check_newlinechar(common, common->nltype, &newline, TRUE);
3132 CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, mainloop);
3133 JUMPHERE(end);
3134 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->first_line_end, STR_PTR, 0);
3135 set_jumps(newline, LABEL());
3136 }
3137
3138 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
3139 }
3140
3141 start = JUMP(SLJIT_JUMP);
3142
3143 if (newlinecheck)
3144 {
3145 newlinelabel = LABEL();
3146 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3147 end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3148 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3149 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
3150 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
3151 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3152 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
3153 #endif
3154 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3155 nl = JUMP(SLJIT_JUMP);
3156 }
3157
3158 mainloop = LABEL();
3159
3160 /* Increasing the STR_PTR here requires one less jump in the most common case. */
3161 #ifdef SUPPORT_UTF
3162 if (common->utf) readuchar = TRUE;
3163 #endif
3164 if (newlinecheck) readuchar = TRUE;
3165
3166 if (readuchar)
3167 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3168
3169 if (newlinecheck)
3170 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);
3171
3172 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3173 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3174 #if defined COMPILE_PCRE8
3175 if (common->utf)
3176 {
3177 singlechar = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
3178 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3179 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3180 JUMPHERE(singlechar);
3181 }
3182 #elif defined COMPILE_PCRE16
3183 if (common->utf)
3184 {
3185 singlechar = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
3186 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
3187 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3188 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
3189 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3190 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3191 JUMPHERE(singlechar);
3192 }
3193 #endif /* COMPILE_PCRE[8|16] */
3194 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
3195 JUMPHERE(start);
3196
3197 if (newlinecheck)
3198 {
3199 JUMPHERE(end);
3200 JUMPHERE(nl);
3201 }
3202
3203 return mainloop;
3204 }
3205
3206 #define MAX_N_CHARS 16
3207 #define MAX_N_BYTES 8
3208
3209 static SLJIT_INLINE void add_prefix_byte(pcre_uint8 byte, pcre_uint8 *bytes)
3210 {
3211 pcre_uint8 len = bytes[0];
3212 int i;
3213
3214 if (len == 255)
3215 return;
3216
3217 if (len == 0)
3218 {
3219 bytes[0] = 1;
3220 bytes[1] = byte;
3221 return;
3222 }
3223
3224 for (i = len; i > 0; i--)
3225 if (bytes[i] == byte)
3226 return;
3227
3228 if (len >= MAX_N_BYTES - 1)
3229 {
3230 bytes[0] = 255;
3231 return;
3232 }
3233
3234 len++;
3235 bytes[len] = byte;
3236 bytes[0] = len;
3237 }
3238
3239 static int scan_prefix(compiler_common *common, pcre_uchar *cc, pcre_uint32 *chars, pcre_uint8 *bytes, int max_chars, pcre_uint32 *rec_count)
3240 {
3241 /* Recursive function, which scans prefix literals. */
3242 BOOL last, any, caseless;
3243 int len, repeat, len_save, consumed = 0;
3244 pcre_uint32 chr, mask;
3245 pcre_uchar *alternative, *cc_save, *oc;
3246 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3247 pcre_uchar othercase[8];
3248 #elif defined SUPPORT_UTF && defined COMPILE_PCRE16
3249 pcre_uchar othercase[2];
3250 #else
3251 pcre_uchar othercase[1];
3252 #endif
3253
3254 repeat = 1;
3255 while (TRUE)
3256 {
3257 if (*rec_count == 0)
3258 return 0;
3259 (*rec_count)--;
3260
3261 last = TRUE;
3262 any = FALSE;
3263 caseless = FALSE;
3264
3265 switch (*cc)
3266 {
3267 case OP_CHARI:
3268 caseless = TRUE;
3269 case OP_CHAR:
3270 last = FALSE;
3271 cc++;
3272 break;
3273
3274 case OP_SOD:
3275 case OP_SOM:
3276 case OP_SET_SOM:
3277 case OP_NOT_WORD_BOUNDARY:
3278 case OP_WORD_BOUNDARY:
3279 case OP_EODN:
3280 case OP_EOD:
3281 case OP_CIRC:
3282 case OP_CIRCM:
3283 case OP_DOLL:
3284 case OP_DOLLM:
3285 /* Zero width assertions. */
3286 cc++;
3287 continue;
3288
3289 case OP_ASSERT:
3290 case OP_ASSERT_NOT:
3291 case OP_ASSERTBACK:
3292 case OP_ASSERTBACK_NOT:
3293 cc = bracketend(cc);
3294 continue;
3295
3296 case OP_PLUSI:
3297 case OP_MINPLUSI:
3298 case OP_POSPLUSI:
3299 caseless = TRUE;
3300 case OP_PLUS:
3301 case OP_MINPLUS:
3302 case OP_POSPLUS:
3303 cc++;
3304 break;
3305
3306 case OP_EXACTI:
3307 caseless = TRUE;
3308 case OP_EXACT:
3309 repeat = GET2(cc, 1);
3310 last = FALSE;
3311 cc += 1 + IMM2_SIZE;
3312 break;
3313
3314 case OP_QUERYI:
3315 case OP_MINQUERYI:
3316 case OP_POSQUERYI:
3317 caseless = TRUE;
3318 case OP_QUERY:
3319 case OP_MINQUERY:
3320 case OP_POSQUERY:
3321 len = 1;
3322 cc++;
3323 #ifdef SUPPORT_UTF
3324 if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
3325 #endif
3326 max_chars = scan_prefix(common, cc + len, chars, bytes, max_chars, rec_count);
3327 if (max_chars == 0)
3328 return consumed;
3329 last = FALSE;
3330 break;
3331
3332 case OP_KET:
3333 cc += 1 + LINK_SIZE;
3334 continue;
3335
3336 case OP_ALT:
3337 cc += GET(cc, 1);
3338 continue;
3339
3340 case OP_ONCE:
3341 case OP_ONCE_NC:
3342 case OP_BRA:
3343 case OP_BRAPOS:
3344 case OP_CBRA:
3345 case OP_CBRAPOS:
3346 alternative = cc + GET(cc, 1);
3347 while (*alternative == OP_ALT)
3348 {
3349 max_chars = scan_prefix(common, alternative + 1 + LINK_SIZE, chars, bytes, max_chars, rec_count);
3350 if (max_chars == 0)
3351 return consumed;
3352 alternative += GET(alternative, 1);
3353 }
3354
3355 if (*cc == OP_CBRA || *cc == OP_CBRAPOS)
3356 cc += IMM2_SIZE;
3357 cc += 1 + LINK_SIZE;
3358 continue;
3359
3360 case OP_CLASS:
3361 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3362 if (common->utf && !is_char7_bitset((const pcre_uint8 *)(cc + 1), FALSE)) return consumed;
3363 #endif
3364 any = TRUE;
3365 cc += 1 + 32 / sizeof(pcre_uchar);
3366 break;
3367
3368 case OP_NCLASS:
3369 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3370 if (common->utf) return consumed;
3371 #endif
3372 any = TRUE;
3373 cc += 1 + 32 / sizeof(pcre_uchar);
3374 break;
3375
3376 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3377 case OP_XCLASS:
3378 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3379 if (common->utf) return consumed;
3380 #endif
3381 any = TRUE;
3382 cc += GET(cc, 1);
3383 break;
3384 #endif
3385
3386 case OP_DIGIT:
3387 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3388 if (common->utf && !is_char7_bitset((const pcre_uint8 *)common->ctypes - cbit_length + cbit_digit, FALSE))
3389 return consumed;
3390 #endif
3391 any = TRUE;
3392 cc++;
3393 break;
3394
3395 case OP_WHITESPACE:
3396 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3397 if (common->utf && !is_char7_bitset((const pcre_uint8 *)common->ctypes - cbit_length + cbit_space, FALSE))
3398 return consumed;
3399 #endif
3400 any = TRUE;
3401 cc++;
3402 break;
3403
3404 case OP_WORDCHAR:
3405 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3406 if (common->utf && !is_char7_bitset((const pcre_uint8 *)common->ctypes - cbit_length + cbit_word, FALSE))
3407 return consumed;
3408 #endif
3409 any = TRUE;
3410 cc++;
3411 break;
3412
3413 case OP_NOT:
3414 case OP_NOTI:
3415 cc++;
3416 /* Fall through. */
3417 case OP_NOT_DIGIT:
3418 case OP_NOT_WHITESPACE:
3419 case OP_NOT_WORDCHAR:
3420 case OP_ANY:
3421 case OP_ALLANY:
3422 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3423 if (common->utf) return consumed;
3424 #endif
3425 any = TRUE;
3426 cc++;
3427 break;
3428
3429 #ifdef SUPPORT_UCP
3430 case OP_NOTPROP:
3431 case OP_PROP:
3432 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3433 if (common->utf) return consumed;
3434 #endif
3435 any = TRUE;
3436 cc += 1 + 2;
3437 break;
3438 #endif
3439
3440 case OP_TYPEEXACT:
3441 repeat = GET2(cc, 1);
3442 cc += 1 + IMM2_SIZE;
3443 continue;
3444
3445 case OP_NOTEXACT:
3446 case OP_NOTEXACTI:
3447 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3448 if (common->utf) return consumed;
3449 #endif
3450 any = TRUE;
3451 repeat = GET2(cc, 1);
3452 cc += 1 + IMM2_SIZE + 1;
3453 break;
3454
3455 default:
3456 return consumed;
3457 }
3458
3459 if (any)
3460 {
3461 #if defined COMPILE_PCRE8
3462 mask = 0xff;
3463 #elif defined COMPILE_PCRE16
3464 mask = 0xffff;
3465 #elif defined COMPILE_PCRE32
3466 mask = 0xffffffff;
3467 #else
3468 SLJIT_ASSERT_STOP();
3469 #endif
3470
3471 do
3472 {
3473 chars[0] = mask;
3474 chars[1] = mask;
3475 bytes[0] = 255;
3476
3477 consumed++;
3478 if (--max_chars == 0)
3479 return consumed;
3480 chars += 2;
3481 bytes += MAX_N_BYTES;
3482 }
3483 while (--repeat > 0);
3484
3485 repeat = 1;
3486 continue;
3487 }
3488
3489 len = 1;
3490 #ifdef SUPPORT_UTF
3491 if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
3492 #endif
3493
3494 if (caseless && char_has_othercase(common, cc))
3495 {
3496 #ifdef SUPPORT_UTF
3497 if (common->utf)
3498 {
3499 GETCHAR(chr, cc);
3500 if ((int)PRIV(ord2utf)(char_othercase(common, chr), othercase) != len)
3501 return consumed;
3502 }
3503 else
3504 #endif
3505 {
3506 chr = *cc;
3507 othercase[0] = TABLE_GET(chr, common->fcc, chr);
3508 }
3509 }
3510 else
3511 caseless = FALSE;
3512
3513 len_save = len;
3514 cc_save = cc;
3515 while (TRUE)
3516 {
3517 oc = othercase;
3518 do
3519 {
3520 chr = *cc;
3521 #ifdef COMPILE_PCRE32
3522 if (SLJIT_UNLIKELY(chr == NOTACHAR))
3523 return consumed;
3524 #endif
3525 add_prefix_byte((pcre_uint8)chr, bytes);
3526
3527 mask = 0;
3528 if (caseless)
3529 {
3530 add_prefix_byte((pcre_uint8)*oc, bytes);
3531 mask = *cc ^ *oc;
3532 chr |= mask;
3533 }
3534
3535 #ifdef COMPILE_PCRE32
3536 if (chars[0] == NOTACHAR && chars[1] == 0)
3537 #else
3538 if (chars[0] == NOTACHAR)
3539 #endif
3540 {
3541 chars[0] = chr;
3542 chars[1] = mask;
3543 }
3544 else
3545 {
3546 mask |= chars[0] ^ chr;
3547 chr |= mask;
3548 chars[0] = chr;
3549 chars[1] |= mask;
3550 }
3551
3552 len--;
3553 consumed++;
3554 if (--max_chars == 0)
3555 return consumed;
3556 chars += 2;
3557 bytes += MAX_N_BYTES;
3558 cc++;
3559 oc++;
3560 }
3561 while (len > 0);
3562
3563 if (--repeat == 0)
3564 break;
3565
3566 len = len_save;
3567 cc = cc_save;
3568 }
3569
3570 repeat = 1;
3571 if (last)
3572 return consumed;
3573 }
3574 }
3575
3576 #if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86)
3577
3578 static sljit_si character_to_int32(pcre_uchar chr)
3579 {
3580 sljit_si value = (sljit_si)chr;
3581 #if defined COMPILE_PCRE8
3582 #define SSE2_COMPARE_TYPE_INDEX 0
3583 return (value << 24) | (value << 16) | (value << 8) | value;
3584 #elif defined COMPILE_PCRE16
3585 #define SSE2_COMPARE_TYPE_INDEX 1
3586 return (value << 16) | value;
3587 #elif defined COMPILE_PCRE32
3588 #define SSE2_COMPARE_TYPE_INDEX 2
3589 return value;
3590 #else
3591 #error "Unsupported unit width"
3592 #endif
3593 }
3594
3595 static SLJIT_INLINE void fast_forward_first_char2_sse2(compiler_common *common, pcre_uchar char1, pcre_uchar char2)
3596 {
3597 DEFINE_COMPILER;
3598 struct sljit_label *start;
3599 struct sljit_jump *quit[3];
3600 struct sljit_jump *nomatch;
3601 sljit_ub instruction[8];
3602 sljit_si tmp1_ind = sljit_get_register_index(TMP1);
3603 sljit_si tmp2_ind = sljit_get_register_index(TMP2);
3604 sljit_si str_ptr_ind = sljit_get_register_index(STR_PTR);
3605 BOOL load_twice = FALSE;
3606 pcre_uchar bit;
3607
3608 bit = char1 ^ char2;
3609 if (!is_powerof2(bit))
3610 bit = 0;
3611
3612 if ((char1 != char2) && bit == 0)
3613 load_twice = TRUE;
3614
3615 quit[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3616
3617 /* First part (unaligned start) */
3618
3619 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char1 | bit));
3620
3621 SLJIT_ASSERT(tmp1_ind < 8 && tmp2_ind == 1);
3622
3623 /* MOVD xmm, r/m32 */
3624 instruction[0] = 0x66;
3625 instruction[1] = 0x0f;
3626 instruction[2] = 0x6e;
3627 instruction[3] = 0xc0 | (2 << 3) | tmp1_ind;
3628 sljit_emit_op_custom(compiler, instruction, 4);
3629
3630 if (char1 != char2)
3631 {
3632 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(bit != 0 ? bit : char2));
3633
3634 /* MOVD xmm, r/m32 */
3635 instruction[3] = 0xc0 | (3 << 3) | tmp1_ind;
3636 sljit_emit_op_custom(compiler, instruction, 4);
3637 }
3638
3639 /* PSHUFD xmm1, xmm2/m128, imm8 */
3640 instruction[2] = 0x70;
3641 instruction[3] = 0xc0 | (2 << 3) | 2;
3642 instruction[4] = 0;
3643 sljit_emit_op_custom(compiler, instruction, 5);
3644
3645 if (char1 != char2)
3646 {
3647 /* PSHUFD xmm1, xmm2/m128, imm8 */
3648 instruction[3] = 0xc0 | (3 << 3) | 3;
3649 instruction[4] = 0;
3650 sljit_emit_op_custom(compiler, instruction, 5);
3651 }
3652
3653 OP2(SLJIT_AND, TMP2, 0, STR_PTR, 0, SLJIT_IMM, 0xf);
3654 OP2(SLJIT_AND, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, ~0xf);
3655
3656 /* MOVDQA xmm1, xmm2/m128 */
3657 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3658
3659 if (str_ptr_ind < 8)
3660 {
3661 instruction[2] = 0x6f;
3662 instruction[3] = (0 << 3) | str_ptr_ind;
3663 sljit_emit_op_custom(compiler, instruction, 4);
3664
3665 if (load_twice)
3666 {
3667 instruction[3] = (1 << 3) | str_ptr_ind;
3668 sljit_emit_op_custom(compiler, instruction, 4);
3669 }
3670 }
3671 else
3672 {
3673 instruction[1] = 0x41;
3674 instruction[2] = 0x0f;
3675 instruction[3] = 0x6f;
3676 instruction[4] = (0 << 3) | (str_ptr_ind & 0x7);
3677 sljit_emit_op_custom(compiler, instruction, 5);
3678
3679 if (load_twice)
3680 {
3681 instruction[4] = (1 << 3) | str_ptr_ind;
3682 sljit_emit_op_custom(compiler, instruction, 5);
3683 }
3684 instruction[1] = 0x0f;
3685 }
3686
3687 #else
3688
3689 instruction[2] = 0x6f;
3690 instruction[3] = (0 << 3) | str_ptr_ind;
3691 sljit_emit_op_custom(compiler, instruction, 4);
3692
3693 if (load_twice)
3694 {
3695 instruction[3] = (1 << 3) | str_ptr_ind;
3696 sljit_emit_op_custom(compiler, instruction, 4);
3697 }
3698
3699 #endif
3700
3701 if (bit != 0)
3702 {
3703 /* POR xmm1, xmm2/m128 */
3704 instruction[2] = 0xeb;
3705 instruction[3] = 0xc0 | (0 << 3) | 3;
3706 sljit_emit_op_custom(compiler, instruction, 4);
3707 }
3708
3709 /* PCMPEQB/W/D xmm1, xmm2/m128 */
3710 instruction[2] = 0x74 + SSE2_COMPARE_TYPE_INDEX;
3711 instruction[3] = 0xc0 | (0 << 3) | 2;
3712 sljit_emit_op_custom(compiler, instruction, 4);
3713
3714 if (load_twice)
3715 {
3716 instruction[3] = 0xc0 | (1 << 3) | 3;
3717 sljit_emit_op_custom(compiler, instruction, 4);
3718 }
3719
3720 /* PMOVMSKB reg, xmm */
3721 instruction[2] = 0xd7;
3722 instruction[3] = 0xc0 | (tmp1_ind << 3) | 0;
3723 sljit_emit_op_custom(compiler, instruction, 4);
3724
3725 if (load_twice)
3726 {
3727 OP1(SLJIT_MOV, TMP3, 0, TMP2, 0);
3728 instruction[3] = 0xc0 | (tmp2_ind << 3) | 1;
3729 sljit_emit_op_custom(compiler, instruction, 4);
3730
3731 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3732 OP1(SLJIT_MOV, TMP2, 0, TMP3, 0);
3733 }
3734
3735 OP2(SLJIT_ASHR, TMP1, 0, TMP1, 0, TMP2, 0);
3736
3737 /* BSF r32, r/m32 */
3738 instruction[0] = 0x0f;
3739 instruction[1] = 0xbc;
3740 instruction[2] = 0xc0 | (tmp1_ind << 3) | tmp1_ind;
3741 sljit_emit_op_custom(compiler, instruction, 3);
3742
3743 nomatch = JUMP(SLJIT_ZERO);
3744
3745 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3746 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3747 quit[1] = JUMP(SLJIT_JUMP);
3748
3749 JUMPHERE(nomatch);
3750
3751 start = LABEL();
3752 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 16);
3753 quit[2] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3754
3755 /* Second part (aligned) */
3756
3757 instruction[0] = 0x66;
3758 instruction[1] = 0x0f;
3759
3760 /* MOVDQA xmm1, xmm2/m128 */
3761 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3762
3763 if (str_ptr_ind < 8)
3764 {
3765 instruction[2] = 0x6f;
3766 instruction[3] = (0 << 3) | str_ptr_ind;
3767 sljit_emit_op_custom(compiler, instruction, 4);
3768
3769 if (load_twice)
3770 {
3771 instruction[3] = (1 << 3) | str_ptr_ind;
3772 sljit_emit_op_custom(compiler, instruction, 4);
3773 }
3774 }
3775 else
3776 {
3777 instruction[1] = 0x41;
3778 instruction[2] = 0x0f;
3779 instruction[3] = 0x6f;
3780 instruction[4] = (0 << 3) | (str_ptr_ind & 0x7);
3781 sljit_emit_op_custom(compiler, instruction, 5);
3782
3783 if (load_twice)
3784 {
3785 instruction[4] = (1 << 3) | str_ptr_ind;
3786 sljit_emit_op_custom(compiler, instruction, 5);
3787 }
3788 instruction[1] = 0x0f;
3789 }
3790
3791 #else
3792
3793 instruction[2] = 0x6f;
3794 instruction[3] = (0 << 3) | str_ptr_ind;
3795 sljit_emit_op_custom(compiler, instruction, 4);
3796
3797 if (load_twice)
3798 {
3799 instruction[3] = (1 << 3) | str_ptr_ind;
3800 sljit_emit_op_custom(compiler, instruction, 4);
3801 }
3802
3803 #endif
3804
3805 if (bit != 0)
3806 {
3807 /* POR xmm1, xmm2/m128 */
3808 instruction[2] = 0xeb;
3809 instruction[3] = 0xc0 | (0 << 3) | 3;
3810 sljit_emit_op_custom(compiler, instruction, 4);
3811 }
3812
3813 /* PCMPEQB/W/D xmm1, xmm2/m128 */
3814 instruction[2] = 0x74 + SSE2_COMPARE_TYPE_INDEX;
3815 instruction[3] = 0xc0 | (0 << 3) | 2;
3816 sljit_emit_op_custom(compiler, instruction, 4);
3817
3818 if (load_twice)
3819 {
3820 instruction[3] = 0xc0 | (1 << 3) | 3;
3821 sljit_emit_op_custom(compiler, instruction, 4);
3822 }
3823
3824 /* PMOVMSKB reg, xmm */
3825 instruction[2] = 0xd7;
3826 instruction[3] = 0xc0 | (tmp1_ind << 3) | 0;
3827 sljit_emit_op_custom(compiler, instruction, 4);
3828
3829 if (load_twice)
3830 {
3831 instruction[3] = 0xc0 | (tmp2_ind << 3) | 1;
3832 sljit_emit_op_custom(compiler, instruction, 4);
3833
3834 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3835 }
3836
3837 /* BSF r32, r/m32 */
3838 instruction[0] = 0x0f;
3839 instruction[1] = 0xbc;
3840 instruction[2] = 0xc0 | (tmp1_ind << 3) | tmp1_ind;
3841 sljit_emit_op_custom(compiler, instruction, 3);
3842
3843 JUMPTO(SLJIT_ZERO, start);
3844
3845 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3846
3847 start = LABEL();
3848 SET_LABEL(quit[0], start);
3849 SET_LABEL(quit[1], start);
3850 SET_LABEL(quit[2], start);
3851 }
3852
3853 #undef SSE2_COMPARE_TYPE_INDEX
3854
3855 #endif
3856
3857 static void fast_forward_first_char2(compiler_common *common, pcre_uchar char1, pcre_uchar char2, sljit_si offset)
3858 {
3859 DEFINE_COMPILER;
3860 struct sljit_label *start;
3861 struct sljit_jump *quit;
3862 struct sljit_jump *found;
3863 pcre_uchar mask;
3864 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3865 struct sljit_label *utf_start = NULL;
3866 struct sljit_jump *utf_quit = NULL;
3867 #endif
3868 BOOL has_first_line_end = (common->first_line_end != 0);
3869
3870 if (offset > 0)
3871 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
3872
3873 if (has_first_line_end)
3874 {
3875 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
3876
3877 OP2(SLJIT_ADD, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end, SLJIT_IMM, IN_UCHARS(offset + 1));
3878 #if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86)
3879 if (sljit_x86_is_cmov_available())
3880 {
3881 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_END, 0, TMP3, 0);
3882 sljit_x86_emit_cmov(compiler, SLJIT_GREATER, STR_END, TMP3, 0);
3883 }
3884 #endif
3885 {
3886 quit = CMP(SLJIT_LESS_EQUAL, STR_END, 0, TMP3, 0);
3887 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3888 JUMPHERE(quit);
3889 }
3890 }
3891
3892 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3893 if (common->utf && offset > 0)
3894 utf_start = LABEL();
3895 #endif
3896
3897 #if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86)
3898
3899 /* SSE2 accelerated first character search. */
3900
3901 if (sljit_x86_is_sse2_available())
3902 {
3903 fast_forward_first_char2_sse2(common, char1, char2);
3904
3905 SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE || offset == 0);
3906 if (common->mode == JIT_COMPILE)
3907 {
3908 /* In complete mode, we don't need to run a match when STR_PTR == STR_END. */
3909 SLJIT_ASSERT(common->forced_quit_label == NULL);
3910 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_NOMATCH);
3911 add_jump(compiler, &common->forced_quit, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
3912
3913 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3914 if (common->utf && offset > 0)
3915 {
3916 SLJIT_ASSERT(common->mode == JIT_COMPILE);
3917
3918 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-offset));
3919 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3920 #if defined COMPILE_PCRE8
3921 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
3922 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, utf_start);
3923 #elif defined COMPILE_PCRE16
3924 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
3925 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00, utf_start);
3926 #else
3927 #error "Unknown code width"
3928 #endif
3929 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3930 }
3931 #endif
3932
3933 if (offset > 0)
3934 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
3935 }
3936 else if (sljit_x86_is_cmov_available())
3937 {
3938 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_PTR, 0, STR_END, 0);
3939 sljit_x86_emit_cmov(compiler, SLJIT_GREATER_EQUAL, STR_PTR, has_first_line_end ? SLJIT_MEM1(SLJIT_SP) : STR_END, has_first_line_end ? common->first_line_end : 0);
3940 }
3941 else
3942 {
3943 quit = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
3944 OP1(SLJIT_MOV, STR_PTR, 0, has_first_line_end ? SLJIT_MEM1(SLJIT_SP) : STR_END, has_first_line_end ? common->first_line_end : 0);
3945 JUMPHERE(quit);
3946 }
3947
3948 if (has_first_line_end)
3949 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3950 return;
3951 }
3952
3953 #endif
3954
3955 quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3956
3957 start = LABEL();
3958 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3959
3960 if (char1 == char2)
3961 found = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, char1);
3962 else
3963 {
3964 mask = char1 ^ char2;
3965 if (is_powerof2(mask))
3966 {
3967 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, mask);
3968 found = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, char1 | mask);
3969 }
3970 else
3971 {
3972 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, char1);
3973 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
3974 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, char2);
3975 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
3976 found = JUMP(SLJIT_NOT_ZERO);
3977 }
3978 }
3979
3980 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3981 CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, start);
3982
3983 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3984 if (common->utf && offset > 0)
3985 utf_quit = JUMP(SLJIT_JUMP);
3986 #endif
3987
3988 JUMPHERE(found);
3989
3990 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3991 if (common->utf && offset > 0)
3992 {
3993 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-offset));
3994 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3995 #if defined COMPILE_PCRE8
3996 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
3997 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, utf_start);
3998 #elif defined COMPILE_PCRE16
3999 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
4000 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00, utf_start);
4001 #else
4002 #error "Unknown code width"
4003 #endif
4004 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4005 JUMPHERE(utf_quit);
4006 }
4007 #endif
4008
4009 JUMPHERE(quit);
4010
4011 if (has_first_line_end)
4012 {
4013 quit = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
4014 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end);
4015 if (offset > 0)
4016 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
4017 JUMPHERE(quit);
4018 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
4019 }
4020
4021 if (offset > 0)
4022 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
4023 }
4024
4025 static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common, BOOL firstline)
4026 {
4027 DEFINE_COMPILER;
4028 struct sljit_label *start;
4029 struct sljit_jump *quit;
4030 pcre_uint32 chars[MAX_N_CHARS * 2];
4031 pcre_uint8 bytes[MAX_N_CHARS * MAX_N_BYTES];
4032 pcre_uint8 ones[MAX_N_CHARS];
4033 int offsets[3];
4034 pcre_uint32 mask;
4035 pcre_uint8 *byte_set, *byte_set_end;
4036 int i, max, from;
4037 int range_right = -1, range_len = 3 - 1;
4038 sljit_ub *update_table = NULL;
4039 BOOL in_range;
4040 pcre_uint32 rec_count;
4041
4042 for (i = 0; i < MAX_N_CHARS; i++)
4043 {
4044 chars[i << 1] = NOTACHAR;
4045 chars[(i << 1) + 1] = 0;
4046 bytes[i * MAX_N_BYTES] = 0;
4047 }
4048
4049 rec_count = 10000;
4050 max = scan_prefix(common, common->start, chars, bytes, MAX_N_CHARS, &rec_count);
4051
4052 if (max <= 1)
4053 return FALSE;
4054
4055 for (i = 0; i < max; i++)
4056 {
4057 mask = chars[(i << 1) + 1];
4058 ones[i] = ones_in_half_byte[mask & 0xf];
4059 mask >>= 4;
4060 while (mask != 0)
4061 {
4062 ones[i] += ones_in_half_byte[mask & 0xf];
4063 mask >>= 4;
4064 }
4065 }
4066
4067 in_range = FALSE;
4068 from = 0; /* Prevent compiler "uninitialized" warning */
4069 for (i = 0; i <= max; i++)
4070 {
4071 if (in_range && (i - from) > range_len && (bytes[(i - 1) * MAX_N_BYTES] <= 4))
4072 {
4073 range_len = i - from;
4074 range_right = i - 1;
4075 }
4076
4077 if (i < max && bytes[i * MAX_N_BYTES] < 255)
4078 {
4079 if (!in_range)
4080 {
4081 in_range = TRUE;
4082 from = i;
4083 }
4084 }
4085 else if (in_range)
4086 in_range = FALSE;
4087 }
4088
4089 if (range_right >= 0)
4090 {
4091 update_table = (sljit_ub *)allocate_read_only_data(common, 256);
4092 if (update_table == NULL)
4093 return TRUE;
4094 memset(update_table, IN_UCHARS(range_len), 256);
4095
4096 for (i = 0; i < range_len; i++)
4097 {
4098 byte_set = bytes + ((range_right - i) * MAX_N_BYTES);
4099 SLJIT_ASSERT(byte_set[0] > 0 && byte_set[0] < 255);
4100 byte_set_end = byte_set + byte_set[0];
4101 byte_set++;
4102 while (byte_set <= byte_set_end)
4103 {
4104 if (update_table[*byte_set] > IN_UCHARS(i))
4105 update_table[*byte_set] = IN_UCHARS(i);
4106 byte_set++;
4107 }
4108 }
4109 }
4110
4111 offsets[0] = -1;
4112 /* Scan forward. */
4113 for (i = 0; i < max; i++)
4114 if (ones[i] <= 2) {
4115 offsets[0] = i;
4116 break;
4117 }
4118
4119 if (offsets[0] < 0 && range_right < 0)
4120 return FALSE;
4121
4122 if (offsets[0] >= 0)
4123 {
4124 /* Scan backward. */
4125 offsets[1] = -1;
4126 for (i = max - 1; i > offsets[0]; i--)
4127 if (ones[i] <= 2 && i != range_right)
4128 {
4129 offsets[1] = i;
4130 break;
4131 }
4132
4133 /* This case is handled better by fast_forward_first_char. */
4134 if (offsets[1] == -1 && offsets[0] == 0 && range_right < 0)
4135 return FALSE;
4136
4137 offsets[2] = -1;
4138 /* We only search for a middle character if there is no range check. */
4139 if (offsets[1] >= 0 && range_right == -1)
4140 {
4141 /* Scan from middle. */
4142 for (i = (offsets[0] + offsets[1]) / 2 + 1; i < offsets[1]; i++)
4143 if (ones[i] <= 2)
4144 {
4145 offsets[2] = i;
4146 break;
4147 }
4148
4149 if (offsets[2] == -1)
4150 {
4151 for (i = (offsets[0] + offsets[1]) / 2; i > offsets[0]; i--)
4152 if (ones[i] <= 2)
4153 {
4154 offsets[2] = i;
4155 break;
4156 }
4157 }
4158 }
4159
4160 SLJIT_ASSERT(offsets[1] == -1 || (offsets[0] < offsets[1]));
4161 SLJIT_ASSERT(offsets[2] == -1 || (offsets[0] < offsets[2] && offsets[1] > offsets[2]));
4162
4163 chars[0] = chars[offsets[0] << 1];
4164 chars[1] = chars[(offsets[0] << 1) + 1];
4165 if (offsets[2] >= 0)
4166 {
4167 chars[2] = chars[offsets[2] << 1];
4168 chars[3] = chars[(offsets[2] << 1) + 1];
4169 }
4170 if (offsets[1] >= 0)
4171 {
4172 chars[4] = chars[offsets[1] << 1];
4173 chars[5] = chars[(offsets[1] << 1) + 1];
4174 }
4175 }
4176
4177 max -= 1;
4178 if (firstline)
4179 {
4180 SLJIT_ASSERT(common->first_line_end != 0);
4181 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end);
4182 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
4183 OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
4184 quit = CMP(SLJIT_LESS_EQUAL, STR_END, 0, TMP1, 0);
4185 OP1(SLJIT_MOV, STR_END, 0, TMP1, 0);
4186 JUMPHERE(quit);
4187 }
4188 else
4189 OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
4190
4191 #if !(defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
4192 if (range_right >= 0)
4193 OP1(SLJIT_MOV, RETURN_ADDR, 0, SLJIT_IMM, (sljit_sw)update_table);
4194 #endif
4195
4196 start = LABEL();
4197 quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4198
4199 SLJIT_ASSERT(range_right >= 0 || offsets[0] >= 0);
4200
4201 if (range_right >= 0)
4202 {
4203 #if defined COMPILE_PCRE8 || (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN)
4204 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right));
4205 #else
4206 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right + 1) - 1);
4207 #endif
4208
4209 #if !(defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
4210 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(RETURN_ADDR, TMP1), 0);
4211 #else
4212 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)update_table);
4213 #endif
4214 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4215 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, start);
4216 }
4217
4218 if (offsets[0] >= 0)
4219 {
4220 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[0]));
4221 if (offsets[1] >= 0)
4222 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[1]));
4223 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4224
4225 if (chars[1] != 0)
4226 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[1]);
4227 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[0], start);
4228 if (offsets[2] >= 0)
4229 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[2] - 1));
4230
4231 if (offsets[1] >= 0)
4232 {
4233 if (chars[5] != 0)
4234 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, chars[5]);
4235 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, chars[4], start);
4236 }
4237
4238 if (offsets[2] >= 0)
4239 {
4240 if (chars[3] != 0)
4241 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[3]);
4242 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[2], start);
4243 }
4244 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4245 }
4246
4247 JUMPHERE(quit);
4248
4249 if (firstline)
4250 {
4251 if (range_right >= 0)
4252 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end);
4253 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
4254 if (range_right >= 0)
4255 {
4256 quit = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
4257 OP1(SLJIT_MOV, STR_PTR, 0, TMP1, 0);
4258 JUMPHERE(quit);
4259 }
4260 }
4261 else
4262 OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
4263 return TRUE;
4264 }
4265
4266 #undef MAX_N_CHARS
4267 #undef MAX_N_BYTES
4268
4269 static SLJIT_INLINE void fast_forward_first_char(compiler_common *common, pcre_uchar first_char, BOOL caseless)
4270 {
4271 pcre_uchar oc;
4272
4273 oc = first_char;
4274 if (caseless)
4275 {
4276 oc = TABLE_GET(first_char, common->fcc, first_char);
4277 #if defined SUPPORT_UTF && !defined COMPILE_PCRE8
4278 if (first_char > 127 && common->utf)
4279 oc = UCD_OTHERCASE(first_char);
4280 #endif
4281 }
4282
4283 fast_forward_first_char2(common, first_char, oc, 0);
4284 }
4285
4286 static SLJIT_INLINE void fast_forward_newline(compiler_common *common)
4287 {
4288 DEFINE_COMPILER;
4289 struct sljit_label *loop;
4290 struct sljit_jump *lastchar;
4291 struct sljit_jump *firstchar;
4292 struct sljit_jump *quit;
4293 struct sljit_jump *foundcr = NULL;
4294 struct sljit_jump *notfoundnl;
4295 jump_list *newline = NULL;
4296
4297 if (common->first_line_end != 0)
4298 {
4299 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
4300 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end);
4301 }
4302
4303 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4304 {
4305 lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4306 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4307 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
4308 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4309 firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
4310
4311 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2));
4312 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0);
4313 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_GREATER_EQUAL);
4314 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4315 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
4316 #endif
4317 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4318
4319 loop = LABEL();
4320 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4321 quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4322 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4323 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
4324 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop);
4325 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop);
4326
4327 JUMPHERE(quit);
4328 JUMPHERE(firstchar);
4329 JUMPHERE(lastchar);
4330
4331 if (common->first_line_end != 0)
4332 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
4333 return;
4334 }
4335
4336 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4337 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
4338 firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
4339 skip_char_back(common);
4340
4341 loop = LABEL();
4342 common->ff_newline_shortcut = loop;
4343
4344 read_char_range(common, common->nlmin, common->nlmax, TRUE);
4345 lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4346 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
4347 foundcr = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
4348 check_newlinechar(common, common->nltype, &newline, FALSE);
4349 set_jumps(newline, loop);
4350
4351 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
4352 {
4353 quit = JUMP(SLJIT_JUMP);
4354 JUMPHERE(foundcr);
4355 notfoundnl = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4356 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4357 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
4358 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
4359 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4360 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
4361 #endif
4362 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4363 JUMPHERE(notfoundnl);
4364 JUMPHERE(quit);
4365 }
4366 JUMPHERE(lastchar);
4367 JUMPHERE(firstchar);
4368
4369 if (common->first_line_end != 0)
4370 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
4371 }
4372
4373 static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks);
4374
4375 static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, sljit_ub *start_bits)
4376 {
4377 DEFINE_COMPILER;
4378 struct sljit_label *start;
4379 struct sljit_jump *quit;
4380 struct sljit_jump *found = NULL;
4381 jump_list *matches = NULL;
4382 #ifndef COMPILE_PCRE8
4383 struct sljit_jump *jump;
4384 #endif
4385
4386 if (common->first_line_end != 0)
4387 {
4388 OP1(SLJIT_MOV, RETURN_ADDR, 0, STR_END, 0);
4389 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end);
4390 }
4391
4392 start = LABEL();
4393 quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4394 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4395 #ifdef SUPPORT_UTF
4396 if (common->utf)
4397 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
4398 #endif
4399
4400 if (!check_class_ranges(common, start_bits, (start_bits[31] & 0x80) != 0, TRUE, &matches))
4401 {
4402 #ifndef COMPILE_PCRE8
4403 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 255);
4404 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 255);
4405 JUMPHERE(jump);
4406 #endif
4407 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
4408 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
4409 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)start_bits);
4410 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4411 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4412 found = JUMP(SLJIT_NOT_ZERO);
4413 }
4414
4415 #ifdef SUPPORT_UTF
4416 if (common->utf)
4417 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
4418 #endif
4419 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4420 #ifdef SUPPORT_UTF
4421 #if defined COMPILE_PCRE8
4422 if (common->utf)
4423 {
4424 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start);
4425 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4426 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4427 }
4428 #elif defined COMPILE_PCRE16
4429 if (common->utf)
4430 {
4431 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800, start);
4432 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
4433 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4434 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
4435 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4436 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4437 }
4438 #endif /* COMPILE_PCRE[8|16] */
4439 #endif /* SUPPORT_UTF */
4440 JUMPTO(SLJIT_JUMP, start);
4441 if (found != NULL)
4442 JUMPHERE(found);
4443 if (matches != NULL)
4444 set_jumps(matches, LABEL());
4445 JUMPHERE(quit);
4446
4447 if (common->first_line_end != 0)
4448 OP1(SLJIT_MOV, STR_END, 0, RETURN_ADDR, 0);
4449 }
4450
4451 static SLJIT_INLINE struct sljit_jump *search_requested_char(compiler_common *common, pcre_uchar req_char, BOOL caseless, BOOL has_firstchar)
4452 {
4453 DEFINE_COMPILER;
4454 struct sljit_label *loop;
4455 struct sljit_jump *toolong;
4456 struct sljit_jump *alreadyfound;
4457 struct sljit_jump *found;
4458 struct sljit_jump *foundoc = NULL;
4459 struct sljit_jump *notfound;
4460 pcre_uint32 oc, bit;
4461
4462 SLJIT_ASSERT(common->req_char_ptr != 0);
4463 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr);
4464 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, REQ_BYTE_MAX);
4465 toolong = CMP(SLJIT_LESS, TMP1, 0, STR_END, 0);
4466 alreadyfound = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0);
4467
4468 if (has_firstchar)
4469 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4470 else
4471 OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);
4472
4473 loop = LABEL();
4474 notfound = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0);
4475
4476 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0);
4477 oc = req_char;
4478 if (caseless)
4479 {
4480 oc = TABLE_GET(req_char, common->fcc, req_char);
4481 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
4482 if (req_char > 127 && common->utf)
4483 oc = UCD_OTHERCASE(req_char);
4484 #endif
4485 }
4486 if (req_char == oc)
4487 found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
4488 else
4489 {
4490 bit = req_char ^ oc;
4491 if (is_powerof2(bit))
4492 {
4493 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);
4494 found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit);
4495 }
4496 else
4497 {
4498 found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
4499 foundoc = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, oc);
4500 }
4501 }
4502 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
4503 JUMPTO(SLJIT_JUMP, loop);
4504
4505 JUMPHERE(found);
4506 if (foundoc)
4507 JUMPHERE(foundoc);
4508 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr, TMP1, 0);
4509 JUMPHERE(alreadyfound);
4510 JUMPHERE(toolong);
4511 return notfound;
4512 }
4513
4514 static void do_revertframes(compiler_common *common)
4515 {
4516 DEFINE_COMPILER;
4517 struct sljit_jump *jump;
4518 struct sljit_label *mainloop;
4519
4520 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4521 OP1(SLJIT_MOV, TMP1, 0, STACK_TOP, 0);
4522 GET_LOCAL_BASE(TMP3, 0, 0);
4523
4524 /* Drop frames until we reach STACK_TOP. */
4525 mainloop = LABEL();
4526 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), 0);
4527 OP2(SLJIT_SUB | SLJIT_SET_S, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0);
4528 jump = JUMP(SLJIT_SIG_LESS_EQUAL);
4529
4530 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
4531 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw));
4532 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), SLJIT_MEM1(TMP1), 2 * sizeof(sljit_sw));
4533 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
4534 JUMPTO(SLJIT_JUMP, mainloop);
4535
4536 JUMPHERE(jump);
4537 jump = JUMP(SLJIT_SIG_LESS);
4538 /* End of dropping frames. */
4539 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4540
4541 JUMPHERE(jump);
4542 OP1(SLJIT_NEG, TMP2, 0, TMP2, 0);
4543 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
4544 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw));
4545 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_sw));
4546 JUMPTO(SLJIT_JUMP, mainloop);
4547 }
4548
4549 static void check_wordboundary(compiler_common *common)
4550 {
4551 DEFINE_COMPILER;
4552 struct sljit_jump *skipread;
4553 jump_list *skipread_list = NULL;
4554 #if !(defined COMPILE_PCRE8) || defined SUPPORT_UTF
4555 struct sljit_jump *jump;
4556 #endif
4557
4558 SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16);
4559
4560 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
4561 /* Get type of the previous char, and put it to LOCALS1. */
4562 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4563 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4564 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, SLJIT_IMM, 0);
4565 skipread = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
4566 skip_char_back(common);
4567 check_start_used_ptr(common);
4568 read_char(common);
4569
4570 /* Testing char type. */
4571 #ifdef SUPPORT_UCP
4572 if (common->use_ucp)
4573 {
4574 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
4575 jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
4576 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4577 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
4578 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
4579 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
4580 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
4581 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
4582 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
4583 JUMPHERE(jump);
4584 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP2, 0);
4585 }
4586 else
4587 #endif
4588 {
4589 #ifndef COMPILE_PCRE8
4590 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4591 #elif defined SUPPORT_UTF
4592 /* Here LOCALS1 has already been zeroed. */
4593 jump = NULL;
4594 if (common->utf)
4595 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4596 #endif /* COMPILE_PCRE8 */
4597 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);
4598 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */);
4599 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4600 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP1, 0);
4601 #ifndef COMPILE_PCRE8
4602 JUMPHERE(jump);
4603 #elif defined SUPPORT_UTF
4604 if (jump != NULL)
4605 JUMPHERE(jump);
4606 #endif /* COMPILE_PCRE8 */
4607 }
4608 JUMPHERE(skipread);
4609
4610 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
4611 check_str_end(common, &skipread_list);
4612 peek_char(common, READ_CHAR_MAX);
4613
4614 /* Testing char type. This is a code duplication. */
4615 #ifdef SUPPORT_UCP
4616 if (common->use_ucp)
4617 {
4618 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
4619 jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
4620 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4621 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
4622 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
4623 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
4624 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
4625 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
4626 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
4627 JUMPHERE(jump);
4628 }
4629 else
4630 #endif
4631 {
4632 #ifndef COMPILE_PCRE8
4633 /* TMP2 may be destroyed by peek_char. */
4634 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
4635 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4636 #elif defined SUPPORT_UTF
4637 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
4638 jump = NULL;
4639 if (common->utf)
4640 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4641 #endif
4642 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes);
4643 OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */);
4644 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
4645 #ifndef COMPILE_PCRE8
4646 JUMPHERE(jump);
4647 #elif defined SUPPORT_UTF
4648 if (jump != NULL)
4649 JUMPHERE(jump);
4650 #endif /* COMPILE_PCRE8 */
4651 }
4652 set_jumps(skipread_list, LABEL());
4653
4654 OP2(SLJIT_XOR | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
4655 sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
4656 }
4657
4658 static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
4659 {
4660 DEFINE_COMPILER;
4661 int ranges[MAX_RANGE_SIZE];
4662 pcre_uint8 bit, cbit, all;
4663 int i, byte, length = 0;
4664
4665 bit = bits[0] & 0x1;
4666 /* All bits will be zero or one (since bit is zero or one). */
4667 all = -bit;
4668
4669 for (i = 0; i < 256; )
4670 {
4671 byte = i >> 3;
4672 if ((i & 0x7) == 0 && bits[byte] == all)
4673 i += 8;
4674 else
4675 {
4676 cbit = (bits[byte] >> (i & 0x7)) & 0x1;
4677 if (cbit != bit)
4678 {
4679 if (length >= MAX_RANGE_SIZE)
4680 return FALSE;
4681 ranges[length] = i;
4682 length++;
4683 bit = cbit;
4684 all = -cbit;
4685 }
4686 i++;
4687 }
4688 }
4689
4690 if (((bit == 0) && nclass) || ((bit == 1) && !nclass))
4691 {
4692 if (length >= MAX_RANGE_SIZE)
4693 return FALSE;
4694 ranges[length] = 256;
4695 length++;
4696 }
4697
4698 if (length < 0 || length > 4)
4699 return FALSE;
4700
4701 bit = bits[0] & 0x1;
4702 if (invert) bit ^= 0x1;
4703
4704 /* No character is accepted. */
4705 if (length == 0 && bit == 0)
4706 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4707
4708 switch(length)
4709 {
4710 case 0:
4711 /* When bit != 0, all characters are accepted. */
4712 return TRUE;
4713
4714 case 1:
4715 add_jump(compiler, backtracks, CMP(bit == 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
4716 return TRUE;
4717
4718 case 2:
4719 if (ranges[0] + 1 != ranges[1])
4720 {
4721 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
4722 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
4723 }
4724 else
4725 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
4726 return TRUE;
4727
4728 case 3:
4729 if (bit != 0)
4730 {
4731 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
4732 if (ranges[0] + 1 != ranges[1])
4733 {
4734 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
4735 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
4736 }
4737 else
4738 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
4739 return TRUE;
4740 }
4741
4742 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[0]));
4743 if (ranges[1] + 1 != ranges[2])
4744 {
4745 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1]);
4746 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
4747 }
4748 else
4749 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1]));
4750 return TRUE;
4751
4752 case 4:
4753 if ((ranges[1] - ranges[0]) == (ranges[3] - ranges[2])
4754 && (ranges[0] | (ranges[2] - ranges[0])) == ranges[2]
4755 && (ranges[1] & (ranges[2] - ranges[0])) == 0
4756 && is_powerof2(ranges[2] - ranges[0]))
4757 {
4758 SLJIT_ASSERT((ranges[0] & (ranges[2] - ranges[0])) == 0 && (ranges[2] & ranges[3] & (ranges[2] - ranges[0])) != 0);
4759 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[0]);
4760 if (ranges[2] + 1 != ranges[3])
4761 {
4762 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);
4763 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
4764 }
4765 else
4766 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
4767 return TRUE;
4768 }
4769
4770 if (bit != 0)
4771 {
4772 i = 0;
4773 if (ranges[0] + 1 != ranges[1])
4774 {
4775 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
4776 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
4777 i = ranges[0];
4778 }
4779 else
4780 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
4781
4782 if (ranges[2] + 1 != ranges[3])
4783 {
4784 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - i);
4785 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
4786 }
4787 else
4788 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2] - i));
4789 return TRUE;
4790 }
4791
4792 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
4793 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[0]));
4794 if (ranges[1] + 1 != ranges[2])
4795 {
4796 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]);
4797 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
4798 }
4799 else
4800 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
4801 return TRUE;
4802
4803 default:
4804 SLJIT_ASSERT_STOP();
4805 return FALSE;
4806 }
4807 }
4808
4809 static void check_anynewline(compiler_common *common)
4810 {
4811 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
4812 DEFINE_COMPILER;
4813
4814 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4815
4816 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
4817 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
4818 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
4819 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
4820 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4821 #ifdef COMPILE_PCRE8
4822 if (common->utf)
4823 {
4824 #endif
4825 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4826 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
4827 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
4828 #ifdef COMPILE_PCRE8
4829 }
4830 #endif
4831 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
4832 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4833 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4834 }
4835
4836 static void check_hspace(compiler_common *common)
4837 {
4838 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
4839 DEFINE_COMPILER;
4840
4841 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4842
4843 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x09);
4844 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
4845 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
4846 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4847 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xa0);
4848 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4849 #ifdef COMPILE_PCRE8
4850 if (common->utf)
4851 {
4852 #endif
4853 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4854 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x1680);
4855 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4856 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e);
4857 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4858 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000);
4859 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000);
4860 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
4861 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000);
4862 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4863 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000);
4864 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4865 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000);
4866 #ifdef COMPILE_PCRE8
4867 }
4868 #endif
4869 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
4870 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4871
4872 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4873 }
4874
4875 static void check_vspace(compiler_common *common)
4876 {
4877 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
4878 DEFINE_COMPILER;
4879
4880 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4881
4882 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
4883 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
4884 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
4885 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
4886 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4887 #ifdef COMPILE_PCRE8
4888 if (common->utf)
4889 {
4890 #endif
4891 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4892 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
4893 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
4894 #ifdef COMPILE_PCRE8
4895 }
4896 #endif
4897 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
4898 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4899
4900 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4901 }
4902
4903 #define CHAR1 STR_END
4904 #define CHAR2 STACK_TOP
4905
4906 static void do_casefulcmp(compiler_common *common)
4907 {
4908 DEFINE_COMPILER;
4909 struct sljit_jump *jump;
4910 struct sljit_label *label;
4911
4912 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4913 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4914 OP1(SLJIT_MOV, TMP3, 0, CHAR1, 0);
4915 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, CHAR2, 0);
4916 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
4917 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4918
4919 label = LABEL();
4920 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
4921 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4922 jump = CMP(SLJIT_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
4923 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
4924 JUMPTO(SLJIT_NOT_ZERO, label);
4925
4926 JUMPHERE(jump);
4927 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4928 OP1(SLJIT_MOV, CHAR1, 0, TMP3, 0);
4929 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
4930 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4931 }
4932
4933 #define LCC_TABLE STACK_LIMIT
4934
4935 static void do_caselesscmp(compiler_common *common)
4936 {
4937 DEFINE_COMPILER;
4938 struct sljit_jump *jump;
4939 struct sljit_label *label;
4940
4941 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4942 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4943
4944 OP1(SLJIT_MOV, TMP3, 0, LCC_TABLE, 0);
4945 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, CHAR1, 0);
4946 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, CHAR2, 0);
4947 OP1(SLJIT_MOV, LCC_TABLE, 0, SLJIT_IMM, common->lcc);
4948 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
4949 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4950
4951 label = LABEL();
4952 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
4953 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4954 #ifndef COMPILE_PCRE8
4955 jump = CMP(SLJIT_GREATER, CHAR1, 0, SLJIT_IMM, 255);
4956 #endif
4957 OP1(SLJIT_MOV_UB, CHAR1, 0, SLJIT_MEM2(LCC_TABLE, CHAR1), 0);
4958 #ifndef COMPILE_PCRE8
4959 JUMPHERE(jump);
4960 jump = CMP(SLJIT_GREATER, CHAR2, 0, SLJIT_IMM, 255);
4961 #endif
4962 OP1(SLJIT_MOV_UB, CHAR2, 0, SLJIT_MEM2(LCC_TABLE, CHAR2), 0);
4963 #ifndef COMPILE_PCRE8
4964 JUMPHERE(jump);
4965 #endif
4966 jump = CMP(SLJIT_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
4967 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
4968 JUMPTO(SLJIT_NOT_ZERO, label);
4969
4970 JUMPHERE(jump);
4971 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4972 OP1(SLJIT_MOV, LCC_TABLE, 0, TMP3, 0);
4973 OP1(SLJIT_MOV, CHAR1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
4974 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
4975 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4976 }
4977
4978 #undef LCC_TABLE
4979 #undef CHAR1
4980 #undef CHAR2
4981
4982 #if defined SUPPORT_UTF && defined SUPPORT_UCP
4983
4984 static const pcre_uchar * SLJIT_CALL do_utf_caselesscmp(pcre_uchar *src1, jit_arguments *args, pcre_uchar *end1)
4985 {
4986 /* This function would be ineffective to do in JIT level. */
4987 pcre_uint32 c1, c2;
4988 const pcre_uchar *src2 = args->uchar_ptr;
4989 const pcre_uchar *end2 = args->end;
4990 const ucd_record *ur;
4991 const pcre_uint32 *pp;
4992
4993 while (src1 < end1)
4994 {
4995 if (src2 >= end2)
4996 return (pcre_uchar*)1;
4997 GETCHARINC(c1, src1);
4998 GETCHARINC(c2, src2);
4999 ur = GET_UCD(c2);
5000 if (c1 != c2 && c1 != c2 + ur->other_case)
5001 {
5002 pp = PRIV(ucd_caseless_sets) + ur->caseset;
5003 for (;;)
5004 {
5005 if (c1 < *pp) return NULL;
5006 if (c1 == *pp++) break;
5007 }
5008 }
5009 }
5010 return src2;
5011 }
5012
5013 #endif /* SUPPORT_UTF && SUPPORT_UCP */
5014
5015 static pcre_uchar *byte_sequence_compare(compiler_common *common, BOOL caseless, pcre_uchar *cc,
5016 compare_context *context, jump_list **backtracks)
5017 {
5018 DEFINE_COMPILER;
5019 unsigned int othercasebit = 0;
5020 pcre_uchar *othercasechar = NULL;
5021 #ifdef SUPPORT_UTF
5022 int utflength;
5023 #endif
5024
5025 if (caseless && char_has_othercase(common, cc))
5026 {
5027 othercasebit = char_get_othercase_bit(common, cc);
5028 SLJIT_ASSERT(othercasebit);
5029 /* Extracting bit difference info. */
5030 #if defined COMPILE_PCRE8
5031 othercasechar = cc + (othercasebit >> 8);
5032 othercasebit &= 0xff;
5033 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5034 /* Note that this code only handles characters in the BMP. If there
5035 ever are characters outside the BMP whose othercase differs in only one
5036 bit from itself (there currently are none), this code will need to be
5037 revised for COMPILE_PCRE32. */
5038 othercasechar = cc + (othercasebit >> 9);
5039 if ((othercasebit & 0x100) != 0)
5040 othercasebit = (othercasebit & 0xff) << 8;
5041 else
5042 othercasebit &= 0xff;
5043 #endif /* COMPILE_PCRE[8|16|32] */
5044 }
5045
5046 if (context->sourcereg == -1)
5047 {
5048 #if defined COMPILE_PCRE8
5049 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
5050 if (context->length >= 4)
5051 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
5052 else if (context->length >= 2)
5053 OP1(SLJIT_MOV_UH, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
5054 else
5055 #endif
5056 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
5057 #elif defined COMPILE_PCRE16
5058 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
5059 if (context->length >= 4)
5060 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
5061 else
5062 #endif
5063 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
5064 #elif defined COMPILE_PCRE32
5065 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
5066 #endif /* COMPILE_PCRE[8|16|32] */
5067 context->sourcereg = TMP2;
5068 }
5069
5070 #ifdef SUPPORT_UTF
5071 utflength = 1;
5072 if (common->utf && HAS_EXTRALEN(*cc))
5073 utflength += GET_EXTRALEN(*cc);
5074
5075 do
5076 {
5077 #endif
5078
5079 context->length -= IN_UCHARS(1);
5080 #if (defined SLJIT_UNALIGNED && SLJIT_UNALIGNED) && (defined COMPILE_PCRE8 || defined COMPILE_PCRE16)
5081
5082 /* Unaligned read is supported. */
5083 if (othercasebit != 0 && othercasechar == cc)
5084 {
5085 context->c.asuchars[context->ucharptr] = *cc | othercasebit;
5086 context->oc.asuchars[context->ucharptr] = othercasebit;
5087 }
5088 else
5089 {
5090 context->c.asuchars[context->ucharptr] = *cc;
5091 context->oc.asuchars[context->ucharptr] = 0;
5092 }
5093 context->ucharptr++;
5094
5095 #if defined COMPILE_PCRE8
5096 if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1))
5097 #else
5098 if (context->ucharptr >= 2 || context->length == 0)
5099 #endif
5100 {
5101 if (context->length >= 4)
5102 OP1(SLJIT_MOV_SI, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
5103 else if (context->length >= 2)
5104 OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
5105 #if defined COMPILE_PCRE8
5106 else if (context->length >= 1)
5107 OP1(SLJIT_MOV_UB, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
5108 #endif /* COMPILE_PCRE8 */
5109 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
5110
5111 switch(context->ucharptr)
5112 {
5113 case 4 / sizeof(pcre_uchar):
5114 if (context->oc.asint != 0)
5115 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint);
5116 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint));
5117 break;
5118
5119 case 2 / sizeof(pcre_uchar):
5120 if (context->oc.asushort != 0)
5121 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort);
5122 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort));
5123 break;
5124
5125 #ifdef COMPILE_PCRE8
5126 case 1:
5127 if (context->oc.asbyte != 0)
5128 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte);
5129 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte));
5130 break;
5131 #endif
5132
5133 default:
5134 SLJIT_ASSERT_STOP();
5135 break;
5136 }
5137 context->ucharptr = 0;
5138 }
5139
5140 #else
5141
5142 /* Unaligned read is unsupported or in 32 bit mode. */
5143 if (context->length >= 1)
5144 OP1(MOV_UCHAR, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
5145
5146 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
5147
5148 if (othercasebit != 0 && othercasechar == cc)
5149 {
5150 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit);
5151 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit));
5152 }
5153 else
5154 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc));
5155
5156 #endif
5157
5158 cc++;
5159 #ifdef SUPPORT_UTF
5160 utflength--;
5161 }
5162 while (utflength > 0);
5163 #endif
5164
5165 return cc;
5166 }
5167
5168 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
5169
5170 #define SET_TYPE_OFFSET(value) \
5171 if ((value) != typeoffset) \
5172 { \
5173 if ((value) < typeoffset) \
5174 OP2(SLJIT_ADD, typereg, 0, typereg, 0, SLJIT_IMM, typeoffset - (value)); \
5175 else \
5176 OP2(SLJIT_SUB, typereg, 0, typereg, 0, SLJIT_IMM, (value) - typeoffset); \
5177 } \
5178 typeoffset = (value);
5179
5180 #define SET_CHAR_OFFSET(value) \
5181 if ((value) != charoffset) \
5182 { \
5183 if ((value) < charoffset) \
5184 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(charoffset - (value))); \
5185 else \
5186 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)((value) - charoffset)); \
5187 } \
5188 charoffset = (value);
5189
5190 static pcre_uchar *compile_char1_matchingpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **backtracks, BOOL check_str_ptr);
5191
5192 static void compile_xclass_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
5193 {
5194 DEFINE_COMPILER;
5195 jump_list *found = NULL;
5196 jump_list **list = (cc[0] & XCL_NOT) == 0 ? &found : backtracks;
5197 sljit_uw c, charoffset, max = 256, min = READ_CHAR_MAX;
5198 struct sljit_jump *jump = NULL;
5199 pcre_uchar *ccbegin;
5200 int compares, invertcmp, numberofcmps;
5201 #if defined SUPPORT_UTF && (defined COMPILE_PCRE8 || defined COMPILE_PCRE16)
5202 BOOL utf = common->utf;
5203 #endif
5204
5205 #ifdef SUPPORT_UCP
5206 BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE;
5207 BOOL charsaved = FALSE;
5208 int typereg = TMP1;
5209 const sljit_ui *other_cases;
5210 sljit_uw typeoffset;
5211 #endif
5212
5213 /* Scanning the necessary info. */
5214 cc++;
5215 ccbegin = cc;
5216 compares = 0;
5217 if (cc[-1] & XCL_MAP)
5218 {
5219 min = 0;
5220 cc += 32 / sizeof(pcre_uchar);
5221 }
5222
5223 while (*cc != XCL_END)
5224 {
5225 compares++;
5226 if (*cc == XCL_SINGLE)
5227 {
5228 cc ++;
5229 GETCHARINCTEST(c, cc);
5230 if (c > max) max = c;
5231 if (c < min) min = c;
5232 #ifdef SUPPORT_UCP
5233 needschar = TRUE;
5234 #endif
5235 }
5236 else if (*cc == XCL_RANGE)
5237 {
5238 cc ++;
5239 GETCHARINCTEST(c, cc);
5240 if (c < min) min = c;
5241 GETCHARINCTEST(c, cc);
5242 if (c > max) max = c;
5243 #ifdef SUPPORT_UCP
5244 needschar = TRUE;
5245 #endif
5246 }
5247 #ifdef SUPPORT_UCP
5248 else
5249 {
5250 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
5251 cc++;
5252 if (*cc == PT_CLIST)
5253 {
5254 other_cases = PRIV(ucd_caseless_sets) + cc[1];
5255 while (*other_cases != NOTACHAR)
5256 {
5257 if (*other_cases > max) max = *other_cases;
5258 if (*other_cases < min) min = *other_cases;
5259 other_cases++;
5260 }
5261 }
5262 else
5263 {
5264 max = READ_CHAR_MAX;
5265 min = 0;
5266 }
5267
5268 switch(*cc)
5269 {
5270 case PT_ANY:
5271 /* Any either accepts everything or ignored. */
5272 if (cc[-1] == XCL_PROP)
5273 {
5274 compile_char1_matchingpath(common, OP_ALLANY, cc, backtracks, FALSE);
5275 if (list == backtracks)
5276 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5277 return;
5278 }
5279 break;
5280
5281 case PT_LAMP:
5282 case PT_GC:
5283 case PT_PC:
5284 case PT_ALNUM:
5285 needstype = TRUE;
5286 break;
5287
5288 case PT_SC:
5289 needsscript = TRUE;
5290 break;
5291
5292 case PT_SPACE:
5293 case PT_PXSPACE:
5294 case PT_WORD:
5295 case PT_PXGRAPH:
5296 case PT_PXPRINT:
5297 case PT_PXPUNCT:
5298 needstype = TRUE;
5299 needschar = TRUE;
5300 break;
5301
5302 case PT_CLIST:
5303 case PT_UCNC:
5304 needschar = TRUE;
5305 break;
5306
5307 default:
5308 SLJIT_ASSERT_STOP();
5309 break;
5310 }
5311 cc += 2;
5312 }
5313 #endif
5314 }
5315 SLJIT_ASSERT(compares > 0);
5316
5317 /* We are not necessary in utf mode even in 8 bit mode. */
5318 cc = ccbegin;
5319 read_char_range(common, min, max, (cc[-1] & XCL_NOT) != 0);
5320
5321 if ((cc[-1] & XCL_HASPROP) == 0)
5322 {
5323 if ((cc[-1] & XCL_MAP) != 0)
5324 {
5325 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
5326 if (!check_class_ranges(common, (const sljit_ub *)cc, (((const sljit_ub *)cc)[31] & 0x80) != 0, TRUE, &found))
5327 {
5328 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
5329 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
5330 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
5331 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
5332 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
5333 add_jump(compiler, &found, JUMP(SLJIT_NOT_ZERO));
5334 }
5335
5336 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5337 JUMPHERE(jump);
5338
5339 cc += 32 / sizeof(pcre_uchar);
5340 }
5341 else
5342 {
5343 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, min);
5344 add_jump(compiler, (cc[-1] & XCL_NOT) == 0 ? backtracks : &found, CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, max - min));
5345 }
5346 }
5347 else if ((cc[-1] & XCL_MAP) != 0)
5348 {
5349 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
5350 #ifdef SUPPORT_UCP
5351 charsaved = TRUE;
5352 #endif
5353 if (!check_class_ranges(common, (const pcre_uint8 *)cc, FALSE, TRUE, list))
5354 {
5355 #ifdef COMPILE_PCRE8
5356 jump = NULL;
5357 if (common->utf)
5358 #endif
5359 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
5360
5361 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
5362 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
5363 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
5364 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
5365 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
5366 add_jump(compiler, list, JUMP(SLJIT_NOT_ZERO));
5367
5368 #ifdef COMPILE_PCRE8
5369 if (common->utf)
5370 #endif
5371 JUMPHERE(jump);
5372 }
5373
5374 OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
5375 cc += 32 / sizeof(pcre_uchar);
5376 }
5377
5378 #ifdef SUPPORT_UCP
5379 if (needstype || needsscript)
5380 {
5381 if (needschar && !charsaved)
5382 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
5383
5384 OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
5385 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
5386 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
5387 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
5388 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5389 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
5390 OP1(SLJIT_MOV_UH, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
5391
5392 /* Before anything else, we deal with scripts. */
5393 if (needsscript)
5394 {
5395 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
5396 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
5397
5398 ccbegin = cc;
5399
5400 while (*cc != XCL_END)
5401 {
5402 if (*cc == XCL_SINGLE)
5403 {
5404 cc ++;
5405 GETCHARINCTEST(c, cc);
5406 }
5407 else if (*cc == XCL_RANGE)
5408 {
5409 cc ++;
5410 GETCHARINCTEST(c, cc);
5411 GETCHARINCTEST(c, cc);
5412 }
5413 else
5414 {
5415 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
5416 cc++;
5417 if (*cc == PT_SC)
5418 {
5419 compares--;
5420 invertcmp = (compares == 0 && list != backtracks);
5421 if (cc[-1] == XCL_NOTPROP)
5422 invertcmp ^= 0x1;
5423 jump = CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (int)cc[1]);
5424 add_jump(compiler, compares > 0 ? list : backtracks, jump);
5425 }
5426 cc += 2;
5427 }
5428 }
5429
5430 cc = ccbegin;
5431 }
5432
5433 if (needschar)
5434 {
5435 OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
5436 }
5437
5438 if (needstype)
5439 {
5440 if (!needschar)
5441 {
5442 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
5443 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
5444 }
5445 else
5446 {
5447 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
5448 OP1(SLJIT_MOV_UB, RETURN_ADDR, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
5449 typereg = RETURN_ADDR;
5450 }
5451 }
5452 }
5453 #endif
5454
5455 /* Generating code. */
5456 charoffset = 0;
5457 numberofcmps = 0;
5458 #ifdef SUPPORT_UCP
5459 typeoffset = 0;
5460 #endif
5461
5462 while (*cc != XCL_END)
5463 {
5464 compares--;
5465 invertcmp = (compares == 0 && list != backtracks);
5466 jump = NULL;
5467
5468 if (*cc == XCL_SINGLE)
5469 {
5470 cc ++;
5471 GETCHARINCTEST(c, cc);
5472
5473 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
5474 {
5475 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5476 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_EQUAL);
5477 numberofcmps++;
5478 }
5479 else if (numberofcmps > 0)
5480 {
5481 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5482 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5483 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5484 numberofcmps = 0;
5485 }
5486 else
5487 {
5488 jump = CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5489 numberofcmps = 0;
5490 }
5491 }
5492 else if (*cc == XCL_RANGE)
5493 {
5494 cc ++;
5495 GETCHARINCTEST(c, cc);
5496 SET_CHAR_OFFSET(c);
5497 GETCHARINCTEST(c, cc);
5498
5499 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
5500 {
5501 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5502 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_LESS_EQUAL);
5503 numberofcmps++;
5504 }
5505 else if (numberofcmps > 0)
5506 {
5507 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5508 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
5509 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5510 numberofcmps = 0;
5511 }
5512 else
5513 {
5514 jump = CMP(SLJIT_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5515 numberofcmps = 0;
5516 }
5517 }
5518 #ifdef SUPPORT_UCP
5519 else
5520 {
5521 if (*cc == XCL_NOTPROP)
5522 invertcmp ^= 0x1;
5523 cc++;
5524 switch(*cc)
5525 {
5526 case PT_ANY:
5527 if (!invertcmp)
5528 jump = JUMP(SLJIT_JUMP);
5529 break;
5530
5531 case PT_LAMP:
5532 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - typeoffset);
5533 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5534 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ll - typeoffset);
5535 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5536 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lt - typeoffset);
5537 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5538 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5539 break;
5540
5541 case PT_GC:
5542 c = PRIV(ucp_typerange)[(int)cc[1] * 2];
5543 SET_TYPE_OFFSET(c);
5544 jump = CMP(SLJIT_LESS_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, PRIV(ucp_typerange)[(int)cc[1] * 2 + 1] - c);
5545 break;
5546
5547 case PT_PC:
5548 jump = CMP(SLJIT_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, (int)cc[1] - typeoffset);
5549 break;
5550
5551 case PT_SC:
5552 compares++;
5553 /* Do nothing. */
5554 break;
5555
5556 case PT_SPACE:
5557 case PT_PXSPACE:
5558 SET_CHAR_OFFSET(9);
5559 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd - 0x9);
5560 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
5561
5562 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x9);
5563 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5564
5565 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x9);
5566 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5567
5568 SET_TYPE_OFFSET(ucp_Zl);
5569 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl);
5570 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
5571 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5572 break;
5573
5574 case PT_WORD:
5575 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_UNDERSCORE - charoffset));
5576 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5577 /* Fall through. */
5578
5579 case PT_ALNUM:
5580 SET_TYPE_OFFSET(ucp_Ll);
5581 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
5582 OP_FLAGS((*cc == PT_ALNUM) ? SLJIT_MOV : SLJIT_OR, TMP2, 0, (*cc == PT_ALNUM) ? SLJIT_UNUSED : TMP2, 0, SLJIT_LESS_EQUAL);
5583 SET_TYPE_OFFSET(ucp_Nd);
5584 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_No - ucp_Nd);
5585 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
5586 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5587 break;
5588
5589 case PT_CLIST:
5590 other_cases = PRIV(ucd_caseless_sets) + cc[1];
5591
5592 /* At least three characters are required.
5593 Otherwise this case would be handled by the normal code path. */
5594 SLJIT_ASSERT(other_cases[0] != NOTACHAR && other_cases[1] != NOTACHAR && other_cases[2] != NOTACHAR);
5595 SLJIT_ASSERT(other_cases[0] < other_cases[1] && other_cases[1] < other_cases[2]);
5596
5597 /* Optimizing character pairs, if their difference is power of 2. */
5598 if (is_powerof2(other_cases[1] ^ other_cases[0]))
5599 {
5600 if (charoffset == 0)
5601 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
5602 else
5603 {
5604 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
5605 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
5606 }
5607 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[1]);
5608 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5609 other_cases += 2;
5610 }
5611 else if (is_powerof2(other_cases[2] ^ other_cases[1]))
5612 {
5613 if (charoffset == 0)
5614 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[2] ^ other_cases[1]);
5615 else
5616 {
5617 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
5618 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
5619 }
5620 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[2]);
5621 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5622
5623 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(other_cases[0] - charoffset));
5624 OP_FLAGS(SLJIT_OR | ((other_cases[3] == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5625
5626 other_cases += 3;
5627 }
5628 else
5629 {
5630 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
5631 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5632 }
5633
5634 while (*other_cases != NOTACHAR)
5635 {
5636 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
5637 OP_FLAGS(SLJIT_OR | ((*other_cases == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5638 }
5639 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5640 break;
5641
5642 case PT_UCNC:
5643 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_DOLLAR_SIGN - charoffset));
5644 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5645 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_COMMERCIAL_AT - charoffset));
5646 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5647 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_GRAVE_ACCENT - charoffset));
5648 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5649
5650 SET_CHAR_OFFSET(0xa0);
5651 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(0xd7ff - charoffset));
5652 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
5653 SET_CHAR_OFFSET(0);
5654 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xe000 - 0);
5655 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_GREATER_EQUAL);
5656 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5657 break;
5658
5659 case PT_PXGRAPH:
5660 /* C and Z groups are the farthest two groups. */
5661 SET_TYPE_OFFSET(ucp_Ll);
5662 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
5663 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_GREATER);
5664
5665 jump = CMP(SLJIT_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
5666
5667 /* In case of ucp_Cf, we overwrite the result. */
5668 SET_CHAR_OFFSET(0x2066);
5669 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
5670 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
5671
5672 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
5673 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5674
5675 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x2066);
5676 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5677
5678 JUMPHERE(jump);
5679 jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
5680 break;
5681
5682 case PT_PXPRINT:
5683 /* C and Z groups are the farthest two groups. */
5684 SET_TYPE_OFFSET(ucp_Ll);
5685 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
5686 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_GREATER);
5687
5688 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Ll);
5689 OP_FLAGS(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_NOT_EQUAL);
5690
5691 jump = CMP(SLJIT_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
5692
5693 /* In case of ucp_Cf, we overwrite the result. */
5694 SET_CHAR_OFFSET(0x2066);
5695 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
5696 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
5697
5698 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
5699 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5700
5701 JUMPHERE(jump);
5702 jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
5703 break;
5704
5705 case PT_PXPUNCT:
5706 SET_TYPE_OFFSET(ucp_Sc);
5707 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Sc);
5708 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
5709
5710 SET_CHAR_OFFSET(0);
5711 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x7f);
5712 OP_FLAGS(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
5713
5714 SET_TYPE_OFFSET(ucp_Pc);
5715 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ps - ucp_Pc);
5716 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
5717 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5718 break;
5719
5720 default:
5721 SLJIT_ASSERT_STOP();
5722 break;
5723 }
5724 cc += 2;
5725 }
5726 #endif
5727
5728 if (jump != NULL)
5729 add_jump(compiler, compares > 0 ? list : backtracks, jump);
5730 }
5731
5732 if (found != NULL)
5733 set_jumps(found, LABEL());
5734 }
5735
5736 #undef SET_TYPE_OFFSET
5737 #undef SET_CHAR_OFFSET
5738
5739 #endif
5740
5741 static pcre_uchar *compile_simple_assertion_matchingpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **backtracks)
5742 {
5743 DEFINE_COMPILER;
5744 int length;
5745 struct sljit_jump *jump[4];
5746 #ifdef SUPPORT_UTF
5747 struct sljit_label *label;
5748 #endif /* SUPPORT_UTF */
5749
5750 switch(type)
5751 {
5752 case OP_SOD:
5753 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5754 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
5755 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
5756 return cc;
5757
5758 case OP_SOM:
5759 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5760 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
5761 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
5762 return cc;
5763
5764 case OP_NOT_WORD_BOUNDARY:
5765 case OP_WORD_BOUNDARY:
5766 add_jump(compiler, &common->wordboundary, JUMP(SLJIT_FAST_CALL));
5767 add_jump(compiler, backtracks, JUMP(type == OP_NOT_WORD_BOUNDARY ? SLJIT_NOT_ZERO : SLJIT_ZERO));
5768 return cc;
5769
5770 case OP_EODN:
5771 /* Requires rather complex checks. */
5772 jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5773 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
5774 {
5775 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
5776 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5777 if (common->mode == JIT_COMPILE)
5778 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0));
5779 else
5780 {
5781 jump[1] = CMP(SLJIT_EQUAL, TMP2, 0, STR_END, 0);
5782 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
5783 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS);
5784 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
5785 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_NOT_EQUAL);
5786 add_jump(compiler, backtracks, JUMP(SLJIT_NOT_EQUAL));
5787 check_partial(common, TRUE);
5788 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5789 JUMPHERE(jump[1]);
5790 }
5791 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
5792 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
5793 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
5794 }
5795 else if (common->nltype == NLTYPE_FIXED)
5796 {
5797 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5798 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5799 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0));
5800 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
5801 }
5802 else
5803 {
5804 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5805 jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
5806 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
5807 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
5808 jump[2] = JUMP(SLJIT_GREATER);
5809 add_jump(compiler, backtracks, JUMP(SLJIT_LESS));
5810 /* Equal. */
5811 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
5812 jump[3] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
5813 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5814
5815 JUMPHERE(jump[1]);
5816 if (common->nltype == NLTYPE_ANYCRLF)
5817 {
5818 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5819 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, STR_END, 0));
5820 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
5821 }
5822 else
5823 {
5824 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, STR_PTR, 0);
5825 read_char_range(common, common->nlmin, common->nlmax, TRUE);
5826 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
5827 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
5828 add_jump(compiler, backtracks, JUMP(SLJIT_ZERO));
5829 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
5830 }
5831 JUMPHERE(jump[2]);
5832 JUMPHERE(jump[3]);
5833 }
5834 JUMPHERE(jump[0]);
5835 check_partial(common, FALSE);
5836 return cc;
5837
5838 case OP_EOD:
5839 add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
5840 check_partial(common, FALSE);
5841 return cc;
5842
5843 case OP_DOLL:
5844 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
5845 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
5846 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5847
5848 if (!common->endonly)
5849 compile_simple_assertion_matchingpath(common, OP_EODN, cc, backtracks);
5850 else
5851 {
5852 add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
5853 check_partial(common, FALSE);
5854 }
5855 return cc;
5856
5857 case OP_DOLLM:
5858 jump[1] = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
5859 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
5860 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
5861 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5862 check_partial(common, FALSE);
5863 jump[0] = JUMP(SLJIT_JUMP);
5864 JUMPHERE(jump[1]);
5865
5866 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
5867 {
5868 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
5869 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5870 if (common->mode == JIT_COMPILE)
5871 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, TMP2, 0, STR_END, 0));
5872 else
5873 {
5874 jump[1] = CMP(SLJIT_LESS_EQUAL, TMP2, 0, STR_END, 0);
5875 /* STR_PTR = STR_END - IN_UCHARS(1) */
5876 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
5877 check_partial(common, TRUE);
5878 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5879 JUMPHERE(jump[1]);
5880 }
5881
5882 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
5883 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
5884 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
5885 }
5886 else
5887 {
5888 peek_char(common, common->nlmax);
5889 check_newlinechar(common, common->nltype, backtracks, FALSE);
5890 }
5891 JUMPHERE(jump[0]);
5892 return cc;
5893
5894 case OP_CIRC:
5895 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
5896 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
5897 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0));
5898 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
5899 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5900 return cc;
5901
5902 case OP_CIRCM:
5903 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
5904 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
5905 jump[1] = CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0);
5906 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
5907 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5908 jump[0] = JUMP(SLJIT_JUMP);
5909 JUMPHERE(jump[1]);
5910
5911 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
5912 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
5913 {
5914 OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
5915 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, TMP1, 0));
5916 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
5917 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
5918 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
5919 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
5920 }
5921 else
5922 {
5923 skip_char_back(common);
5924 read_char_range(common, common->nlmin, common->nlmax, TRUE);
5925 check_newlinechar(common, common->nltype, backtracks, FALSE);
5926 }
5927 JUMPHERE(jump[0]);
5928 return cc;
5929
5930 case OP_REVERSE:
5931 length = GET(cc, 0);
5932 if (length == 0)
5933 return cc + LINK_SIZE;
5934 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5935 #ifdef SUPPORT_UTF
5936 if (common->utf)
5937 {
5938 OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
5939 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, length);
5940 label = LABEL();
5941 add_jump(compiler, backtracks, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP3, 0));
5942 skip_char_back(common);
5943 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
5944 JUMPTO(SLJIT_NOT_ZERO, label);
5945 }
5946 else
5947 #endif
5948 {
5949 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
5950 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
5951 add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, TMP1, 0));
5952 }
5953 check_start_used_ptr(common);
5954 return cc + LINK_SIZE;
5955 }
5956 SLJIT_ASSERT_STOP();
5957 return cc;
5958 }
5959
5960 static pcre_uchar *compile_char1_matchingpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **backtracks, BOOL check_str_ptr)
5961 {
5962 DEFINE_COMPILER;
5963 int length;
5964 unsigned int c, oc, bit;
5965 compare_context context;
5966 struct sljit_jump *jump[3];
5967 jump_list *end_list;
5968 #ifdef SUPPORT_UTF
5969 struct sljit_label *label;
5970 #ifdef SUPPORT_UCP
5971 pcre_uchar propdata[5];
5972 #endif
5973 #endif /* SUPPORT_UTF */
5974
5975 switch(type)
5976 {
5977 case OP_NOT_DIGIT:
5978 case OP_DIGIT:
5979 /* Digits are usually 0-9, so it is worth to optimize them. */
5980 if (check_str_ptr)
5981 detect_partial_match(common, backtracks);
5982 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5983 if (common->utf && is_char7_bitset((const pcre_uint8*)common->ctypes - cbit_length + cbit_digit, FALSE))
5984 read_char7_type(common, type == OP_NOT_DIGIT);
5985 else
5986 #endif
5987 read_char8_type(common, type == OP_NOT_DIGIT);
5988 /* Flip the starting bit in the negative case. */
5989 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit);
5990 add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_ZERO : SLJIT_NOT_ZERO));
5991 return cc;
5992
5993 case OP_NOT_WHITESPACE:
5994 case OP_WHITESPACE:
5995 if (check_str_ptr)
5996 detect_partial_match(common, backtracks);
5997 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5998 if (common->utf && is_char7_bitset((const pcre_uint8*)common->ctypes - cbit_length + cbit_space, FALSE))
5999 read_char7_type(common, type == OP_NOT_WHITESPACE);
6000 else
6001 #endif
6002 read_char8_type(common, type == OP_NOT_WHITESPACE);
6003 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space);
6004 add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_ZERO : SLJIT_NOT_ZERO));
6005 return cc;
6006
6007 case OP_NOT_WORDCHAR:
6008 case OP_WORDCHAR:
6009 if (check_str_ptr)
6010 detect_partial_match(common, backtracks);
6011 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
6012 if (common->utf && is_char7_bitset((const pcre_uint8*)common->ctypes - cbit_length + cbit_word, FALSE))
6013 read_char7_type(common, type == OP_NOT_WORDCHAR);
6014 else
6015 #endif
6016 read_char8_type(common, type == OP_NOT_WORDCHAR);
6017 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word);
6018 add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_ZERO : SLJIT_NOT_ZERO));
6019 return cc;
6020
6021 case OP_ANY:
6022 if (check_str_ptr)
6023 detect_partial_match(common, backtracks);
6024 read_char_range(common, common->nlmin, common->nlmax, TRUE);
6025 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
6026 {
6027 jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
6028 end_list = NULL;
6029 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
6030 add_jump(compiler, &end_list, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
6031 else
6032 check_str_end(common, &end_list);
6033
6034 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6035 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff));
6036 set_jumps(end_list, LABEL());
6037 JUMPHERE(jump[0]);
6038 }
6039 else
6040 check_newlinechar(common, common->nltype, backtracks, TRUE);
6041 return cc;
6042
6043 case OP_ALLANY:
6044 if (check_str_ptr)
6045 detect_partial_match(common, backtracks);
6046 #ifdef SUPPORT_UTF
6047 if (common->utf)
6048 {
6049 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6050 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6051 #if defined COMPILE_PCRE8 || defined COMPILE_PCRE16
6052 #if defined COMPILE_PCRE8
6053 jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
6054 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
6055 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
6056 #elif defined COMPILE_PCRE16
6057 jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
6058 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
6059 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
6060 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
6061 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
6062 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
6063 #endif
6064 JUMPHERE(jump[0]);
6065 #endif /* COMPILE_PCRE[8|16] */
6066 return cc;
6067 }
6068 #endif
6069 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6070 return cc;
6071
6072 case OP_ANYBYTE:
6073 if (check_str_ptr)
6074 detect_partial_match(common, backtracks);
6075 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6076 return cc;
6077
6078 #ifdef SUPPORT_UTF
6079 #ifdef SUPPORT_UCP
6080 case OP_NOTPROP:
6081 case OP_PROP:
6082 propdata[0] = XCL_HASPROP;
6083 propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP;
6084 propdata[2] = cc[0];
6085 propdata[3] = cc[1];
6086 propdata[4] = XCL_END;
6087 if (check_str_ptr)
6088 detect_partial_match(common, backtracks);
6089 compile_xclass_matchingpath(common, propdata, backtracks);
6090 return cc + 2;
6091 #endif
6092 #endif
6093
6094 case OP_ANYNL:
6095 if (check_str_ptr)
6096 detect_partial_match(common, backtracks);
6097 read_char_range(common, common->bsr_nlmin, common->bsr_nlmax, FALSE);
6098 jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
6099 /* We don't need to handle soft partial matching case. */
6100 end_list = NULL;
6101 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
6102 add_jump(compiler, &end_list, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
6103 else
6104 check_str_end(common, &end_list);
6105 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6106 jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
6107 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6108 jump[2] = JUMP(SLJIT_JUMP);
6109 JUMPHERE(jump[0]);
6110 check_newlinechar(common, common->bsr_nltype, backtracks, FALSE);
6111 set_jumps(end_list, LABEL());
6112 JUMPHERE(jump[1]);
6113 JUMPHERE(jump[2]);
6114 return cc;
6115
6116 case OP_NOT_HSPACE:
6117 case OP_HSPACE:
6118 if (check_str_ptr)
6119 detect_partial_match(common, backtracks);
6120 read_char_range(common, 0x9, 0x3000, type == OP_NOT_HSPACE);
6121 add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));
6122 add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO));
6123 return cc;
6124
6125 case OP_NOT_VSPACE:
6126 case OP_VSPACE:
6127 if (check_str_ptr)
6128 detect_partial_match(common, backtracks);
6129 read_char_range(common, 0xa, 0x2029, type == OP_NOT_VSPACE);
6130 add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));
6131 add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO));
6132 return cc;
6133
6134 #ifdef SUPPORT_UCP
6135 case OP_EXTUNI:
6136 if (check_str_ptr)
6137 detect_partial_match(common, backtracks);
6138 read_char(common);
6139 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
6140 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
6141 /* Optimize register allocation: use a real register. */
6142 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STACK_TOP, 0);
6143 OP1(SLJIT_MOV_UB, STACK_TOP, 0, SLJIT_MEM2(TMP1, TMP2), 3);
6144
6145 label = LABEL();
6146 jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6147 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
6148 read_char(common);
6149 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
6150 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
6151 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM2(TMP1, TMP2), 3);
6152
6153 OP2(SLJIT_SHL, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2);
6154 OP1(SLJIT_MOV_UI, TMP1, 0, SLJIT_MEM1(STACK_TOP), (sljit_sw)PRIV(ucp_gbtable));
6155 OP1(SLJIT_MOV, STACK_TOP, 0, TMP2, 0);
6156 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
6157 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
6158 JUMPTO(SLJIT_NOT_ZERO, label);
6159
6160 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
6161 JUMPHERE(jump[0]);
6162 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6163
6164 if (common->mode == JIT_PARTIAL_HARD_COMPILE)
6165 {
6166 jump[0] = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
6167 /* Since we successfully read a char above, partial matching must occure. */
6168 check_partial(common, TRUE);
6169 JUMPHERE(jump[0]);
6170 }
6171 return cc;
6172 #endif
6173
6174 case OP_CHAR:
6175 case OP_CHARI:
6176 length = 1;
6177 #ifdef SUPPORT_UTF
6178 if (common->utf && HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc);
6179 #endif
6180 if (common->mode == JIT_COMPILE && (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0))
6181 {
6182 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
6183 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
6184
6185 context.length = IN_UCHARS(length);
6186 context.sourcereg = -1;
6187 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
6188 context.ucharptr = 0;
6189 #endif
6190 return byte_sequence_compare(common, type == OP_CHARI, cc, &context, backtracks);
6191 }
6192
6193 if (check_str_ptr)
6194 detect_partial_match(common, backtracks);
6195 #ifdef SUPPORT_UTF
6196 if (common->utf)
6197 {
6198 GETCHAR(c, cc);
6199 }
6200 else
6201 #endif
6202 c = *cc;
6203
6204 if (type == OP_CHAR || !char_has_othercase(common, cc))
6205 {
6206 read_char_range(common, c, c, FALSE);
6207 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c));
6208 return cc + length;
6209 }
6210 oc = char_othercase(common, c);
6211 read_char_range(common, c < oc ? c : oc, c > oc ? c : oc, FALSE);
6212 bit = c ^ oc;
6213 if (is_powerof2(bit))
6214 {
6215 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
6216 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
6217 return cc + length;
6218 }
6219 jump[0] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c);
6220 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, oc));
6221 JUMPHERE(jump[0]);
6222 return cc + length;
6223
6224 case OP_NOT:
6225 case OP_NOTI:
6226 if (check_str_ptr)
6227 detect_partial_match(common, backtracks);
6228 length = 1;
6229 #ifdef SUPPORT_UTF
6230 if (common->utf)
6231 {
6232 #ifdef COMPILE_PCRE8
6233 c = *cc;
6234 if (c < 128)
6235 {
6236 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6237 if (type == OP_NOT || !char_has_othercase(common, cc))
6238 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
6239 else
6240 {
6241 /* Since UTF8 code page is fixed, we know that c is in [a-z] or [A-Z] range. */
6242 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x20);
6243 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20));
6244 }
6245 /* Skip the variable-length character. */
6246 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6247 jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
6248 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
6249 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
6250 JUMPHERE(jump[0]);
6251 return cc + 1;
6252 }
6253 else
6254 #endif /* COMPILE_PCRE8 */
6255 {
6256 GETCHARLEN(c, cc, length);
6257 }
6258 }
6259 else
6260 #endif /* SUPPORT_UTF */
6261 c = *cc;
6262
6263 if (type == OP_NOT || !char_has_othercase(common, cc))
6264 {
6265 read_char_range(common, c, c, TRUE);
6266 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
6267 }
6268 else
6269 {
6270 oc = char_othercase(common, c);
6271 read_char_range(common, c < oc ? c : oc, c > oc ? c : oc, TRUE);
6272 bit = c ^ oc;
6273 if (is_powerof2(bit))
6274 {
6275 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
6276 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
6277 }
6278 else
6279 {
6280 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
6281 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, oc));
6282 }
6283 }
6284 return cc + length;
6285
6286 case OP_CLASS:
6287 case OP_NCLASS:
6288 if (check_str_ptr)
6289 detect_partial_match(common, backtracks);
6290
6291 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
6292 bit = (common->utf && is_char7_bitset((const sljit_ub *)cc, type == OP_NCLASS)) ? 127 : 255;
6293 read_char_range(common, 0, bit, type == OP_NCLASS);
6294 #else
6295 read_char_range(common, 0, 255, type == OP_NCLASS);
6296 #endif
6297
6298 if (check_class_ranges(common, (const sljit_ub *)cc, type == OP_NCLASS, FALSE, backtracks))
6299 return cc + 32 / sizeof(pcre_uchar);
6300
6301 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
6302 jump[0] = NULL;
6303 if (common->utf)
6304 {
6305 jump[0] = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, bit);
6306 if (type == OP_CLASS)
6307 {
6308 add_jump(compiler, backtracks, jump[0]);
6309 jump[0] = NULL;
6310 }
6311 }
6312 #elif !defined COMPILE_PCRE8
6313 jump[0] = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
6314 if (type == OP_CLASS)
6315 {
6316 add_jump(compiler, backtracks, jump[0]);
6317 jump[0] = NULL;
6318 }
6319 #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
6320
6321 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
6322 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
6323 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
6324 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
6325 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
6326 add_jump(compiler, backtracks, JUMP(SLJIT_ZERO));
6327
6328 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
6329 if (jump[0] != NULL)
6330 JUMPHERE(jump[0]);
6331 #endif
6332 return cc + 32 / sizeof(pcre_uchar);
6333
6334 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
6335 case OP_XCLASS:
6336 if (check_str_ptr)
6337 detect_partial_match(common, backtracks);
6338 compile_xclass_matchingpath(common, cc + LINK_SIZE, backtracks);
6339 return cc + GET(cc, 0) - 1;
6340 #endif
6341 }
6342 SLJIT_ASSERT_STOP();
6343 return cc;
6344 }
6345
6346 static SLJIT_INLINE pcre_uchar *compile_charn_matchingpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, jump_list **backtracks)
6347 {
6348 /* This function consumes at least one input character. */
6349 /* To decrease the number of length checks, we try to concatenate the fixed length character sequences. */
6350 DEFINE_COMPILER;
6351 pcre_uchar *ccbegin = cc;
6352 compare_context context;
6353 int size;
6354
6355 context.length = 0;
6356 do
6357 {
6358 if (cc >= ccend)
6359 break;
6360
6361 if (*cc == OP_CHAR)
6362 {
6363 size = 1;
6364 #ifdef SUPPORT_UTF
6365 if (common->utf && HAS_EXTRALEN(cc[1]))
6366 size += GET_EXTRALEN(cc[1]);
6367 #endif
6368 }
6369 else if (*cc == OP_CHARI)
6370 {
6371 size = 1;
6372 #ifdef SUPPORT_UTF
6373 if (common->utf)
6374 {
6375 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
6376 size = 0;
6377 else if (HAS_EXTRALEN(cc[1]))
6378 size += GET_EXTRALEN(cc[1]);
6379 }
6380 else
6381 #endif
6382 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
6383 size = 0;
6384 }
6385 else
6386 size = 0;
6387
6388 cc += 1 + size;
6389 context.length += IN_UCHARS(size);
6390 }
6391 while (size > 0 && context.length <= 128);
6392
6393 cc = ccbegin;
6394 if (context.length > 0)
6395 {
6396 /* We have a fixed-length byte sequence. */
6397 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, context.length);
6398 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
6399
6400 context.sourcereg = -1;
6401 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
6402 context.ucharptr = 0;
6403 #endif
6404 do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, backtracks); while (context.length > 0);
6405 return cc;
6406 }
6407
6408 /* A non-fixed length character will be checked if length == 0. */
6409 return compile_char1_matchingpath(common, *cc, cc + 1, backtracks, TRUE);
6410 }
6411
6412 /* Forward definitions. */
6413 static void compile_matchingpath(compiler_common *, pcre_uchar *, pcre_uchar *, backtrack_common *);
6414 static void compile_backtrackingpath(compiler_common *, struct backtrack_common *);
6415
6416 #define PUSH_BACKTRACK(size, ccstart, error) \
6417 do \
6418 { \
6419 backtrack = sljit_alloc_memory(compiler, (size)); \
6420 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
6421 return error; \
6422 memset(backtrack, 0, size); \
6423 backtrack->prev = parent->top; \
6424 backtrack->cc = (ccstart); \
6425 parent->top = backtrack; \
6426 } \
6427 while (0)
6428
6429 #define PUSH_BACKTRACK_NOVALUE(size, ccstart) \
6430 do \
6431 { \
6432 backtrack = sljit_alloc_memory(compiler, (size)); \
6433 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
6434 return; \
6435 memset(backtrack, 0, size); \
6436 backtrack->prev = parent->top; \
6437 backtrack->cc = (ccstart); \
6438 parent->top = backtrack; \
6439 } \
6440 while (0)
6441
6442 #define BACKTRACK_AS(type) ((type *)backtrack)
6443
6444 static void compile_dnref_search(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
6445 {
6446 /* The OVECTOR offset goes to TMP2. */
6447 DEFINE_COMPILER;
6448 int count = GET2(cc, 1 + IMM2_SIZE);
6449 pcre_uchar *slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
6450 unsigned int offset;
6451 jump_list *found = NULL;
6452
6453 SLJIT_ASSERT(*cc == OP_DNREF || *cc == OP_DNREFI);
6454
6455 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
6456
6457 count--;
6458 while (count-- > 0)
6459 {
6460 offset = GET2(slot, 0) << 1;
6461 GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
6462 add_jump(compiler, &found, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0));
6463 slot += common->name_entry_size;
6464 }
6465
6466 offset = GET2(slot, 0) << 1;
6467 GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
6468 if (backtracks != NULL && !common->jscript_compat)
6469 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0));
6470
6471 set_jumps(found, LABEL());
6472 }
6473
6474 static void compile_ref_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks, BOOL withchecks, BOOL emptyfail)
6475 {
6476 DEFINE_COMPILER;
6477 BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
6478 int offset = 0;
6479 struct sljit_jump *jump = NULL;
6480 struct sljit_jump *partial;
6481 struct sljit_jump *nopartial;
6482
6483 if (ref)
6484 {
6485 offset = GET2(cc, 1) << 1;
6486 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
6487 /* OVECTOR(1) contains the "string begin - 1" constant. */
6488 if (withchecks && !common->jscript_compat)
6489 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
6490 }
6491 else
6492 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
6493
6494 #if defined SUPPORT_UTF && defined SUPPORT_UCP
6495 if (common->utf && *cc == OP_REFI)
6496 {
6497 SLJIT_ASSERT(TMP1 == SLJIT_R0 && STACK_TOP == SLJIT_R1 && TMP2 == SLJIT_R2);
6498 if (ref)
6499 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
6500 else
6501 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
6502
6503 if (withchecks)
6504 jump = CMP(SLJIT_EQUAL, TMP1, 0, TMP2, 0);
6505
6506 /* Needed to save important temporary registers. */
6507 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STACK_TOP, 0);
6508 OP1(SLJIT_MOV, SLJIT_R1, 0, ARGUMENTS, 0);
6509 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, uchar_ptr), STR_PTR, 0);
6510 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_utf_caselesscmp));
6511 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6512 if (common->mode == JIT_COMPILE)
6513 add_jump(compiler, backtracks, CMP(SLJIT_LESS_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1));
6514 else
6515 {
6516 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
6517 nopartial = CMP(SLJIT_NOT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
6518 check_partial(common, FALSE);
6519 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
6520 JUMPHERE(nopartial);
6521 }
6522 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
6523 }
6524 else
6525 #endif /* SUPPORT_UTF && SUPPORT_UCP */
6526 {
6527 if (ref)
6528 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP1, 0);
6529 else
6530 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
6531
6532 if (withchecks)
6533 jump = JUMP(SLJIT_ZERO);
6534
6535 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
6536 partial = CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0);
6537 if (common->mode == JIT_COMPILE)
6538 add_jump(compiler, backtracks, partial);
6539
6540 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
6541 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
6542
6543 if (common->mode != JIT_COMPILE)
6544 {
6545 nopartial = JUMP(SLJIT_JUMP);
6546 JUMPHERE(partial);
6547 /* TMP2 -= STR_END - STR_PTR */
6548 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, STR_PTR, 0);
6549 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, STR_END, 0);
6550 partial = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0);
6551 OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
6552 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
6553 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
6554 JUMPHERE(partial);
6555 check_partial(common, FALSE);
6556 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
6557 JUMPHERE(nopartial);
6558 }
6559 }
6560
6561 if (jump != NULL)
6562 {
6563 if (emptyfail)
6564 add_jump(compiler, backtracks, jump);
6565 else
6566 JUMPHERE(jump);
6567 }
6568 }
6569
6570 static SLJIT_INLINE pcre_uchar *compile_ref_iterator_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6571 {
6572 DEFINE_COMPILER;
6573 BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
6574 backtrack_common *backtrack;
6575 pcre_uchar type;
6576 int offset = 0;
6577 struct sljit_label *label;
6578 struct sljit_jump *zerolength;
6579 struct sljit_jump *jump = NULL;
6580 pcre_uchar *ccbegin = cc;
6581 int min = 0, max = 0;
6582 BOOL minimize;
6583
6584 PUSH_BACKTRACK(sizeof(ref_iterator_backtrack), cc, NULL);
6585
6586 if (ref)
6587 offset = GET2(cc, 1) << 1;
6588 else
6589 cc += IMM2_SIZE;
6590 type = cc[1 + IMM2_SIZE];
6591
6592 SLJIT_COMPILE_ASSERT((OP_CRSTAR & 0x1) == 0, crstar_opcode_must_be_even);
6593 minimize = (type & 0x1) != 0;
6594 switch(type)
6595 {
6596 case OP_CRSTAR:
6597 case OP_CRMINSTAR:
6598 min = 0;
6599 max = 0;
6600 cc += 1 + IMM2_SIZE + 1;
6601 break;
6602 case OP_CRPLUS:
6603 case OP_CRMINPLUS:
6604 min = 1;
6605 max = 0;
6606 cc += 1 + IMM2_SIZE + 1;
6607 break;
6608 case OP_CRQUERY:
6609 case OP_CRMINQUERY:
6610 min = 0;
6611 max = 1;
6612 cc += 1 + IMM2_SIZE + 1;
6613 break;
6614 case OP_CRRANGE:
6615 case OP_CRMINRANGE:
6616 min = GET2(cc, 1 + IMM2_SIZE + 1);
6617 max = GET2(cc, 1 + IMM2_SIZE + 1 + IMM2_SIZE);
6618 cc += 1 + IMM2_SIZE + 1 + 2 * IMM2_SIZE;
6619 break;
6620 default:
6621 SLJIT_ASSERT_STOP();
6622 break;
6623 }
6624
6625 if (!minimize)
6626 {
6627 if (min == 0)
6628 {
6629 allocate_stack(common, 2);
6630 if (ref)
6631 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
6632 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6633 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
6634 /* Temporary release of STR_PTR. */
6635 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
6636 /* Handles both invalid and empty cases. Since the minimum repeat,
6637 is zero the invalid case is basically the same as an empty case. */
6638 if (ref)
6639 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
6640 else
6641 {
6642 compile_dnref_search(common, ccbegin, NULL);
6643 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
6644 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, TMP2, 0);
6645 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
6646 }
6647 /* Restore if not zero length. */
6648 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
6649 }
6650 else
6651 {
6652 allocate_stack(common, 1);
6653 if (ref)
6654 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
6655 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6656 if (ref)
6657 {
6658 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
6659 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
6660 }
6661 else
6662 {
6663 compile_dnref_search(common, ccbegin, &backtrack->topbacktracks);
6664 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
6665 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, TMP2, 0);
6666 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
6667 }
6668 }
6669
6670 if (min > 1 || max > 1)
6671 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, SLJIT_IMM, 0);
6672
6673 label = LABEL();
6674 if (!ref)
6675 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1);
6676 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, FALSE, FALSE);
6677
6678 if (min > 1 || max > 1)
6679 {
6680 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0);
6681 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
6682 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, TMP1, 0);
6683 if (min > 1)
6684 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min, label);
6685 if (max > 1)
6686 {
6687 jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max);
6688 allocate_stack(common, 1);
6689 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6690 JUMPTO(SLJIT_JUMP, label);
6691 JUMPHERE(jump);
6692 }
6693 }
6694
6695 if (max == 0)
6696 {
6697 /* Includes min > 1 case as well. */
6698 allocate_stack(common, 1);
6699 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6700 JUMPTO(SLJIT_JUMP, label);
6701 }
6702
6703 JUMPHERE(zerolength);
6704 BACKTRACK_AS(ref_iterator_backtrack)->matchingpath = LABEL();
6705
6706 count_match(common);
6707 return cc;
6708 }
6709
6710 allocate_stack(common, ref ? 2 : 3);
6711 if (ref)
6712 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
6713 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6714 if (type != OP_CRMINSTAR)
6715 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
6716
6717 if (min == 0)
6718 {
6719 /* Handles both invalid and empty cases. Since the minimum repeat,
6720 is zero the invalid case is basically the same as an empty case. */
6721 if (ref)
6722 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
6723 else
6724 {
6725 compile_dnref_search(common, ccbegin, NULL);
6726 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
6727 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
6728 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
6729 }
6730 /* Length is non-zero, we can match real repeats. */
6731 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6732 jump = JUMP(SLJIT_JUMP);
6733 }
6734 else
6735 {
6736 if (ref)
6737 {
6738 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
6739 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
6740 }
6741 else
6742 {
6743 compile_dnref_search(common, ccbegin, &backtrack->topbacktracks);
6744 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
6745 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
6746 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
6747 }
6748 }
6749
6750 BACKTRACK_AS(ref_iterator_backtrack)->matchingpath = LABEL();
6751 if (max > 0)
6752 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max));
6753
6754 if (!ref)
6755 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
6756 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, TRUE, TRUE);
6757 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6758
6759 if (min > 1)
6760 {
6761 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
6762 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
6763 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
6764 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min, BACKTRACK_AS(ref_iterator_backtrack)->matchingpath);
6765 }
6766 else if (max > 0)
6767 OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
6768
6769 if (jump != NULL)
6770 JUMPHERE(jump);
6771 JUMPHERE(zerolength);
6772
6773 count_match(common);
6774 return cc;
6775 }
6776
6777 static SLJIT_INLINE pcre_uchar *compile_recurse_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6778 {
6779 DEFINE_COMPILER;
6780 backtrack_common *backtrack;
6781 recurse_entry *entry = common->entries;
6782 recurse_entry *prev = NULL;
6783 sljit_sw start = GET(cc, 1);
6784 pcre_uchar *start_cc;
6785 BOOL needs_control_head;
6786
6787 PUSH_BACKTRACK(sizeof(recurse_backtrack), cc, NULL);
6788
6789 /* Inlining simple patterns. */
6790 if (get_framesize(common, common->start + start, NULL, TRUE, &needs_control_head) == no_stack)
6791 {
6792 start_cc = common->start + start;
6793 compile_matchingpath(common, next_opcode(common, start_cc), bracketend(start_cc) - (1 + LINK_SIZE), backtrack);
6794 BACKTRACK_AS(recurse_backtrack)->inlined_pattern = TRUE;
6795 return cc + 1 + LINK_SIZE;
6796 }
6797
6798 while (entry != NULL)
6799 {
6800 if (entry->start == start)
6801 break;
6802 prev = entry;
6803 entry = entry->next;
6804 }
6805
6806 if (entry == NULL)
6807 {
6808