/[pcre]/code/tags/pcre-8.38/pcre_jit_compile.c
ViewVC logotype

Contents of /code/tags/pcre-8.38/pcre_jit_compile.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1595 - (show annotations)
Mon Aug 24 09:49:06 2015 UTC (4 years, 3 months ago) by zherczeg
Original Path: code/trunk/pcre_jit_compile.c
File MIME type: text/plain
File size: 332063 byte(s)
Fix two assertion fails in JIT found by Karl Skomski.
1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Written by Philip Hazel
9 Copyright (c) 1997-2013 University of Cambridge
10
11 The machine code generator part (this module) was written by Zoltan Herczeg
12 Copyright (c) 2010-2013
13
14 -----------------------------------------------------------------------------
15 Redistribution and use in source and binary forms, with or without
16 modification, are permitted provided that the following conditions are met:
17
18 * Redistributions of source code must retain the above copyright notice,
19 this list of conditions and the following disclaimer.
20
21 * Redistributions in binary form must reproduce the above copyright
22 notice, this list of conditions and the following disclaimer in the
23 documentation and/or other materials provided with the distribution.
24
25 * Neither the name of the University of Cambridge nor the names of its
26 contributors may be used to endorse or promote products derived from
27 this software without specific prior written permission.
28
29 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 POSSIBILITY OF SUCH DAMAGE.
40 -----------------------------------------------------------------------------
41 */
42
43 #ifdef HAVE_CONFIG_H
44 #include "config.h"
45 #endif
46
47 #include "pcre_internal.h"
48
49 #if defined SUPPORT_JIT
50
51 /* All-in-one: Since we use the JIT compiler only from here,
52 we just include it. This way we don't need to touch the build
53 system files. */
54
55 #define SLJIT_MALLOC(size, allocator_data) (PUBL(malloc))(size)
56 #define SLJIT_FREE(ptr, allocator_data) (PUBL(free))(ptr)
57 #define SLJIT_CONFIG_AUTO 1
58 #define SLJIT_CONFIG_STATIC 1
59 #define SLJIT_VERBOSE 0
60 #define SLJIT_DEBUG 0
61
62 #include "sljit/sljitLir.c"
63
64 #if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED
65 #error Unsupported architecture
66 #endif
67
68 /* Defines for debugging purposes. */
69
70 /* 1 - Use unoptimized capturing brackets.
71 2 - Enable capture_last_ptr (includes option 1). */
72 /* #define DEBUG_FORCE_UNOPTIMIZED_CBRAS 2 */
73
74 /* 1 - Always have a control head. */
75 /* #define DEBUG_FORCE_CONTROL_HEAD 1 */
76
77 /* Allocate memory for the regex stack on the real machine stack.
78 Fast, but limited size. */
79 #define MACHINE_STACK_SIZE 32768
80
81 /* Growth rate for stack allocated by the OS. Should be the multiply
82 of page size. */
83 #define STACK_GROWTH_RATE 8192
84
85 /* Enable to check that the allocation could destroy temporaries. */
86 #if defined SLJIT_DEBUG && SLJIT_DEBUG
87 #define DESTROY_REGISTERS 1
88 #endif
89
90 /*
91 Short summary about the backtracking mechanism empolyed by the jit code generator:
92
93 The code generator follows the recursive nature of the PERL compatible regular
94 expressions. The basic blocks of regular expressions are condition checkers
95 whose execute different commands depending on the result of the condition check.
96 The relationship between the operators can be horizontal (concatenation) and
97 vertical (sub-expression) (See struct backtrack_common for more details).
98
99 'ab' - 'a' and 'b' regexps are concatenated
100 'a+' - 'a' is the sub-expression of the '+' operator
101
102 The condition checkers are boolean (true/false) checkers. Machine code is generated
103 for the checker itself and for the actions depending on the result of the checker.
104 The 'true' case is called as the matching path (expected path), and the other is called as
105 the 'backtrack' path. Branch instructions are expesive for all CPUs, so we avoid taken
106 branches on the matching path.
107
108 Greedy star operator (*) :
109 Matching path: match happens.
110 Backtrack path: match failed.
111 Non-greedy star operator (*?) :
112 Matching path: no need to perform a match.
113 Backtrack path: match is required.
114
115 The following example shows how the code generated for a capturing bracket
116 with two alternatives. Let A, B, C, D are arbirary regular expressions, and
117 we have the following regular expression:
118
119 A(B|C)D
120
121 The generated code will be the following:
122
123 A matching path
124 '(' matching path (pushing arguments to the stack)
125 B matching path
126 ')' matching path (pushing arguments to the stack)
127 D matching path
128 return with successful match
129
130 D backtrack path
131 ')' backtrack path (If we arrived from "C" jump to the backtrack of "C")
132 B backtrack path
133 C expected path
134 jump to D matching path
135 C backtrack path
136 A backtrack path
137
138 Notice, that the order of backtrack code paths are the opposite of the fast
139 code paths. In this way the topmost value on the stack is always belong
140 to the current backtrack code path. The backtrack path must check
141 whether there is a next alternative. If so, it needs to jump back to
142 the matching path eventually. Otherwise it needs to clear out its own stack
143 frame and continue the execution on the backtrack code paths.
144 */
145
146 /*
147 Saved stack frames:
148
149 Atomic blocks and asserts require reloading the values of private data
150 when the backtrack mechanism performed. Because of OP_RECURSE, the data
151 are not necessarly known in compile time, thus we need a dynamic restore
152 mechanism.
153
154 The stack frames are stored in a chain list, and have the following format:
155 ([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]
156
157 Thus we can restore the private data to a particular point in the stack.
158 */
159
160 typedef struct jit_arguments {
161 /* Pointers first. */
162 struct sljit_stack *stack;
163 const pcre_uchar *str;
164 const pcre_uchar *begin;
165 const pcre_uchar *end;
166 int *offsets;
167 pcre_uchar *uchar_ptr;
168 pcre_uchar *mark_ptr;
169 void *callout_data;
170 /* Everything else after. */
171 pcre_uint32 limit_match;
172 int real_offset_count;
173 int offset_count;
174 pcre_uint8 notbol;
175 pcre_uint8 noteol;
176 pcre_uint8 notempty;
177 pcre_uint8 notempty_atstart;
178 } jit_arguments;
179
180 typedef struct executable_functions {
181 void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES];
182 void *read_only_data_heads[JIT_NUMBER_OF_COMPILE_MODES];
183 sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];
184 PUBL(jit_callback) callback;
185 void *userdata;
186 pcre_uint32 top_bracket;
187 pcre_uint32 limit_match;
188 } executable_functions;
189
190 typedef struct jump_list {
191 struct sljit_jump *jump;
192 struct jump_list *next;
193 } jump_list;
194
195 typedef struct stub_list {
196 struct sljit_jump *start;
197 struct sljit_label *quit;
198 struct stub_list *next;
199 } stub_list;
200
201 typedef struct label_addr_list {
202 struct sljit_label *label;
203 sljit_uw *update_addr;
204 struct label_addr_list *next;
205 } label_addr_list;
206
207 enum frame_types {
208 no_frame = -1,
209 no_stack = -2
210 };
211
212 enum control_types {
213 type_mark = 0,
214 type_then_trap = 1
215 };
216
217 typedef int (SLJIT_CALL *jit_function)(jit_arguments *args);
218
219 /* The following structure is the key data type for the recursive
220 code generator. It is allocated by compile_matchingpath, and contains
221 the arguments for compile_backtrackingpath. Must be the first member
222 of its descendants. */
223 typedef struct backtrack_common {
224 /* Concatenation stack. */
225 struct backtrack_common *prev;
226 jump_list *nextbacktracks;
227 /* Internal stack (for component operators). */
228 struct backtrack_common *top;
229 jump_list *topbacktracks;
230 /* Opcode pointer. */
231 pcre_uchar *cc;
232 } backtrack_common;
233
234 typedef struct assert_backtrack {
235 backtrack_common common;
236 jump_list *condfailed;
237 /* Less than 0 if a frame is not needed. */
238 int framesize;
239 /* Points to our private memory word on the stack. */
240 int private_data_ptr;
241 /* For iterators. */
242 struct sljit_label *matchingpath;
243 } assert_backtrack;
244
245 typedef struct bracket_backtrack {
246 backtrack_common common;
247 /* Where to coninue if an alternative is successfully matched. */
248 struct sljit_label *alternative_matchingpath;
249 /* For rmin and rmax iterators. */
250 struct sljit_label *recursive_matchingpath;
251 /* For greedy ? operator. */
252 struct sljit_label *zero_matchingpath;
253 /* Contains the branches of a failed condition. */
254 union {
255 /* Both for OP_COND, OP_SCOND. */
256 jump_list *condfailed;
257 assert_backtrack *assert;
258 /* For OP_ONCE. Less than 0 if not needed. */
259 int framesize;
260 } u;
261 /* Points to our private memory word on the stack. */
262 int private_data_ptr;
263 } bracket_backtrack;
264
265 typedef struct bracketpos_backtrack {
266 backtrack_common common;
267 /* Points to our private memory word on the stack. */
268 int private_data_ptr;
269 /* Reverting stack is needed. */
270 int framesize;
271 /* Allocated stack size. */
272 int stacksize;
273 } bracketpos_backtrack;
274
275 typedef struct braminzero_backtrack {
276 backtrack_common common;
277 struct sljit_label *matchingpath;
278 } braminzero_backtrack;
279
280 typedef struct iterator_backtrack {
281 backtrack_common common;
282 /* Next iteration. */
283 struct sljit_label *matchingpath;
284 } iterator_backtrack;
285
286 typedef struct recurse_entry {
287 struct recurse_entry *next;
288 /* Contains the function entry. */
289 struct sljit_label *entry;
290 /* Collects the calls until the function is not created. */
291 jump_list *calls;
292 /* Points to the starting opcode. */
293 sljit_sw start;
294 } recurse_entry;
295
296 typedef struct recurse_backtrack {
297 backtrack_common common;
298 BOOL inlined_pattern;
299 } recurse_backtrack;
300
301 #define OP_THEN_TRAP OP_TABLE_LENGTH
302
303 typedef struct then_trap_backtrack {
304 backtrack_common common;
305 /* If then_trap is not NULL, this structure contains the real
306 then_trap for the backtracking path. */
307 struct then_trap_backtrack *then_trap;
308 /* Points to the starting opcode. */
309 sljit_sw start;
310 /* Exit point for the then opcodes of this alternative. */
311 jump_list *quit;
312 /* Frame size of the current alternative. */
313 int framesize;
314 } then_trap_backtrack;
315
316 #define MAX_RANGE_SIZE 4
317
318 typedef struct compiler_common {
319 /* The sljit ceneric compiler. */
320 struct sljit_compiler *compiler;
321 /* First byte code. */
322 pcre_uchar *start;
323 /* Maps private data offset to each opcode. */
324 sljit_si *private_data_ptrs;
325 /* Chain list of read-only data ptrs. */
326 void *read_only_data_head;
327 /* Tells whether the capturing bracket is optimized. */
328 pcre_uint8 *optimized_cbracket;
329 /* Tells whether the starting offset is a target of then. */
330 pcre_uint8 *then_offsets;
331 /* Current position where a THEN must jump. */
332 then_trap_backtrack *then_trap;
333 /* Starting offset of private data for capturing brackets. */
334 int cbra_ptr;
335 /* Output vector starting point. Must be divisible by 2. */
336 int ovector_start;
337 /* Last known position of the requested byte. */
338 int req_char_ptr;
339 /* Head of the last recursion. */
340 int recursive_head_ptr;
341 /* First inspected character for partial matching. */
342 int start_used_ptr;
343 /* Starting pointer for partial soft matches. */
344 int hit_start;
345 /* End pointer of the first line. */
346 int first_line_end;
347 /* Points to the marked string. */
348 int mark_ptr;
349 /* Recursive control verb management chain. */
350 int control_head_ptr;
351 /* Points to the last matched capture block index. */
352 int capture_last_ptr;
353 /* Points to the starting position of the current match. */
354 int start_ptr;
355
356 /* Flipped and lower case tables. */
357 const pcre_uint8 *fcc;
358 sljit_sw lcc;
359 /* Mode can be PCRE_STUDY_JIT_COMPILE and others. */
360 int mode;
361 /* TRUE, when minlength is greater than 0. */
362 BOOL might_be_empty;
363 /* \K is found in the pattern. */
364 BOOL has_set_som;
365 /* (*SKIP:arg) is found in the pattern. */
366 BOOL has_skip_arg;
367 /* (*THEN) is found in the pattern. */
368 BOOL has_then;
369 /* Needs to know the start position anytime. */
370 BOOL needs_start_ptr;
371 /* Currently in recurse or negative assert. */
372 BOOL local_exit;
373 /* Currently in a positive assert. */
374 BOOL positive_assert;
375 /* Newline control. */
376 int nltype;
377 pcre_uint32 nlmax;
378 pcre_uint32 nlmin;
379 int newline;
380 int bsr_nltype;
381 pcre_uint32 bsr_nlmax;
382 pcre_uint32 bsr_nlmin;
383 /* Dollar endonly. */
384 int endonly;
385 /* Tables. */
386 sljit_sw ctypes;
387 /* Named capturing brackets. */
388 pcre_uchar *name_table;
389 sljit_sw name_count;
390 sljit_sw name_entry_size;
391
392 /* Labels and jump lists. */
393 struct sljit_label *partialmatchlabel;
394 struct sljit_label *quit_label;
395 struct sljit_label *forced_quit_label;
396 struct sljit_label *accept_label;
397 struct sljit_label *ff_newline_shortcut;
398 stub_list *stubs;
399 label_addr_list *label_addrs;
400 recurse_entry *entries;
401 recurse_entry *currententry;
402 jump_list *partialmatch;
403 jump_list *quit;
404 jump_list *positive_assert_quit;
405 jump_list *forced_quit;
406 jump_list *accept;
407 jump_list *calllimit;
408 jump_list *stackalloc;
409 jump_list *revertframes;
410 jump_list *wordboundary;
411 jump_list *anynewline;
412 jump_list *hspace;
413 jump_list *vspace;
414 jump_list *casefulcmp;
415 jump_list *caselesscmp;
416 jump_list *reset_match;
417 BOOL jscript_compat;
418 #ifdef SUPPORT_UTF
419 BOOL utf;
420 #ifdef SUPPORT_UCP
421 BOOL use_ucp;
422 #endif
423 #ifdef COMPILE_PCRE8
424 jump_list *utfreadchar;
425 jump_list *utfreadchar16;
426 jump_list *utfreadtype8;
427 #endif
428 #endif /* SUPPORT_UTF */
429 #ifdef SUPPORT_UCP
430 jump_list *getucd;
431 #endif
432 } compiler_common;
433
434 /* For byte_sequence_compare. */
435
436 typedef struct compare_context {
437 int length;
438 int sourcereg;
439 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
440 int ucharptr;
441 union {
442 sljit_si asint;
443 sljit_uh asushort;
444 #if defined COMPILE_PCRE8
445 sljit_ub asbyte;
446 sljit_ub asuchars[4];
447 #elif defined COMPILE_PCRE16
448 sljit_uh asuchars[2];
449 #elif defined COMPILE_PCRE32
450 sljit_ui asuchars[1];
451 #endif
452 } c;
453 union {
454 sljit_si asint;
455 sljit_uh asushort;
456 #if defined COMPILE_PCRE8
457 sljit_ub asbyte;
458 sljit_ub asuchars[4];
459 #elif defined COMPILE_PCRE16
460 sljit_uh asuchars[2];
461 #elif defined COMPILE_PCRE32
462 sljit_ui asuchars[1];
463 #endif
464 } oc;
465 #endif
466 } compare_context;
467
468 /* Undefine sljit macros. */
469 #undef CMP
470
471 /* Used for accessing the elements of the stack. */
472 #define STACK(i) ((-(i) - 1) * (int)sizeof(sljit_sw))
473
474 #define TMP1 SLJIT_R0
475 #define TMP2 SLJIT_R2
476 #define TMP3 SLJIT_R3
477 #define STR_PTR SLJIT_S0
478 #define STR_END SLJIT_S1
479 #define STACK_TOP SLJIT_R1
480 #define STACK_LIMIT SLJIT_S2
481 #define COUNT_MATCH SLJIT_S3
482 #define ARGUMENTS SLJIT_S4
483 #define RETURN_ADDR SLJIT_R4
484
485 /* Local space layout. */
486 /* These two locals can be used by the current opcode. */
487 #define LOCALS0 (0 * sizeof(sljit_sw))
488 #define LOCALS1 (1 * sizeof(sljit_sw))
489 /* Two local variables for possessive quantifiers (char1 cannot use them). */
490 #define POSSESSIVE0 (2 * sizeof(sljit_sw))
491 #define POSSESSIVE1 (3 * sizeof(sljit_sw))
492 /* Max limit of recursions. */
493 #define LIMIT_MATCH (4 * sizeof(sljit_sw))
494 /* The output vector is stored on the stack, and contains pointers
495 to characters. The vector data is divided into two groups: the first
496 group contains the start / end character pointers, and the second is
497 the start pointers when the end of the capturing group has not yet reached. */
498 #define OVECTOR_START (common->ovector_start)
499 #define OVECTOR(i) (OVECTOR_START + (i) * (sljit_sw)sizeof(sljit_sw))
500 #define OVECTOR_PRIV(i) (common->cbra_ptr + (i) * (sljit_sw)sizeof(sljit_sw))
501 #define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start])
502
503 #if defined COMPILE_PCRE8
504 #define MOV_UCHAR SLJIT_MOV_UB
505 #define MOVU_UCHAR SLJIT_MOVU_UB
506 #elif defined COMPILE_PCRE16
507 #define MOV_UCHAR SLJIT_MOV_UH
508 #define MOVU_UCHAR SLJIT_MOVU_UH
509 #elif defined COMPILE_PCRE32
510 #define MOV_UCHAR SLJIT_MOV_UI
511 #define MOVU_UCHAR SLJIT_MOVU_UI
512 #else
513 #error Unsupported compiling mode
514 #endif
515
516 /* Shortcuts. */
517 #define DEFINE_COMPILER \
518 struct sljit_compiler *compiler = common->compiler
519 #define OP1(op, dst, dstw, src, srcw) \
520 sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw))
521 #define OP2(op, dst, dstw, src1, src1w, src2, src2w) \
522 sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w))
523 #define LABEL() \
524 sljit_emit_label(compiler)
525 #define JUMP(type) \
526 sljit_emit_jump(compiler, (type))
527 #define JUMPTO(type, label) \
528 sljit_set_label(sljit_emit_jump(compiler, (type)), (label))
529 #define JUMPHERE(jump) \
530 sljit_set_label((jump), sljit_emit_label(compiler))
531 #define SET_LABEL(jump, label) \
532 sljit_set_label((jump), (label))
533 #define CMP(type, src1, src1w, src2, src2w) \
534 sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))
535 #define CMPTO(type, src1, src1w, src2, src2w, label) \
536 sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))
537 #define OP_FLAGS(op, dst, dstw, src, srcw, type) \
538 sljit_emit_op_flags(compiler, (op), (dst), (dstw), (src), (srcw), (type))
539 #define GET_LOCAL_BASE(dst, dstw, offset) \
540 sljit_get_local_base(compiler, (dst), (dstw), (offset))
541
542 #define READ_CHAR_MAX 0x7fffffff
543
544 static pcre_uchar *bracketend(pcre_uchar *cc)
545 {
546 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
547 do cc += GET(cc, 1); while (*cc == OP_ALT);
548 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
549 cc += 1 + LINK_SIZE;
550 return cc;
551 }
552
553 static int no_alternatives(pcre_uchar *cc)
554 {
555 int count = 0;
556 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
557 do
558 {
559 cc += GET(cc, 1);
560 count++;
561 }
562 while (*cc == OP_ALT);
563 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
564 return count;
565 }
566
567 static int ones_in_half_byte[16] = {
568 /* 0 */ 0, 1, 1, 2, /* 4 */ 1, 2, 2, 3,
569 /* 8 */ 1, 2, 2, 3, /* 12 */ 2, 3, 3, 4
570 };
571
572 /* Functions whose might need modification for all new supported opcodes:
573 next_opcode
574 check_opcode_types
575 set_private_data_ptrs
576 get_framesize
577 init_frame
578 get_private_data_copy_length
579 copy_private_data
580 compile_matchingpath
581 compile_backtrackingpath
582 */
583
584 static pcre_uchar *next_opcode(compiler_common *common, pcre_uchar *cc)
585 {
586 SLJIT_UNUSED_ARG(common);
587 switch(*cc)
588 {
589 case OP_SOD:
590 case OP_SOM:
591 case OP_SET_SOM:
592 case OP_NOT_WORD_BOUNDARY:
593 case OP_WORD_BOUNDARY:
594 case OP_NOT_DIGIT:
595 case OP_DIGIT:
596 case OP_NOT_WHITESPACE:
597 case OP_WHITESPACE:
598 case OP_NOT_WORDCHAR:
599 case OP_WORDCHAR:
600 case OP_ANY:
601 case OP_ALLANY:
602 case OP_NOTPROP:
603 case OP_PROP:
604 case OP_ANYNL:
605 case OP_NOT_HSPACE:
606 case OP_HSPACE:
607 case OP_NOT_VSPACE:
608 case OP_VSPACE:
609 case OP_EXTUNI:
610 case OP_EODN:
611 case OP_EOD:
612 case OP_CIRC:
613 case OP_CIRCM:
614 case OP_DOLL:
615 case OP_DOLLM:
616 case OP_CRSTAR:
617 case OP_CRMINSTAR:
618 case OP_CRPLUS:
619 case OP_CRMINPLUS:
620 case OP_CRQUERY:
621 case OP_CRMINQUERY:
622 case OP_CRRANGE:
623 case OP_CRMINRANGE:
624 case OP_CRPOSSTAR:
625 case OP_CRPOSPLUS:
626 case OP_CRPOSQUERY:
627 case OP_CRPOSRANGE:
628 case OP_CLASS:
629 case OP_NCLASS:
630 case OP_REF:
631 case OP_REFI:
632 case OP_DNREF:
633 case OP_DNREFI:
634 case OP_RECURSE:
635 case OP_CALLOUT:
636 case OP_ALT:
637 case OP_KET:
638 case OP_KETRMAX:
639 case OP_KETRMIN:
640 case OP_KETRPOS:
641 case OP_REVERSE:
642 case OP_ASSERT:
643 case OP_ASSERT_NOT:
644 case OP_ASSERTBACK:
645 case OP_ASSERTBACK_NOT:
646 case OP_ONCE:
647 case OP_ONCE_NC:
648 case OP_BRA:
649 case OP_BRAPOS:
650 case OP_CBRA:
651 case OP_CBRAPOS:
652 case OP_COND:
653 case OP_SBRA:
654 case OP_SBRAPOS:
655 case OP_SCBRA:
656 case OP_SCBRAPOS:
657 case OP_SCOND:
658 case OP_CREF:
659 case OP_DNCREF:
660 case OP_RREF:
661 case OP_DNRREF:
662 case OP_DEF:
663 case OP_BRAZERO:
664 case OP_BRAMINZERO:
665 case OP_BRAPOSZERO:
666 case OP_PRUNE:
667 case OP_SKIP:
668 case OP_THEN:
669 case OP_COMMIT:
670 case OP_FAIL:
671 case OP_ACCEPT:
672 case OP_ASSERT_ACCEPT:
673 case OP_CLOSE:
674 case OP_SKIPZERO:
675 return cc + PRIV(OP_lengths)[*cc];
676
677 case OP_CHAR:
678 case OP_CHARI:
679 case OP_NOT:
680 case OP_NOTI:
681 case OP_STAR:
682 case OP_MINSTAR:
683 case OP_PLUS:
684 case OP_MINPLUS:
685 case OP_QUERY:
686 case OP_MINQUERY:
687 case OP_UPTO:
688 case OP_MINUPTO:
689 case OP_EXACT:
690 case OP_POSSTAR:
691 case OP_POSPLUS:
692 case OP_POSQUERY:
693 case OP_POSUPTO:
694 case OP_STARI:
695 case OP_MINSTARI:
696 case OP_PLUSI:
697 case OP_MINPLUSI:
698 case OP_QUERYI:
699 case OP_MINQUERYI:
700 case OP_UPTOI:
701 case OP_MINUPTOI:
702 case OP_EXACTI:
703 case OP_POSSTARI:
704 case OP_POSPLUSI:
705 case OP_POSQUERYI:
706 case OP_POSUPTOI:
707 case OP_NOTSTAR:
708 case OP_NOTMINSTAR:
709 case OP_NOTPLUS:
710 case OP_NOTMINPLUS:
711 case OP_NOTQUERY:
712 case OP_NOTMINQUERY:
713 case OP_NOTUPTO:
714 case OP_NOTMINUPTO:
715 case OP_NOTEXACT:
716 case OP_NOTPOSSTAR:
717 case OP_NOTPOSPLUS:
718 case OP_NOTPOSQUERY:
719 case OP_NOTPOSUPTO:
720 case OP_NOTSTARI:
721 case OP_NOTMINSTARI:
722 case OP_NOTPLUSI:
723 case OP_NOTMINPLUSI:
724 case OP_NOTQUERYI:
725 case OP_NOTMINQUERYI:
726 case OP_NOTUPTOI:
727 case OP_NOTMINUPTOI:
728 case OP_NOTEXACTI:
729 case OP_NOTPOSSTARI:
730 case OP_NOTPOSPLUSI:
731 case OP_NOTPOSQUERYI:
732 case OP_NOTPOSUPTOI:
733 cc += PRIV(OP_lengths)[*cc];
734 #ifdef SUPPORT_UTF
735 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
736 #endif
737 return cc;
738
739 /* Special cases. */
740 case OP_TYPESTAR:
741 case OP_TYPEMINSTAR:
742 case OP_TYPEPLUS:
743 case OP_TYPEMINPLUS:
744 case OP_TYPEQUERY:
745 case OP_TYPEMINQUERY:
746 case OP_TYPEUPTO:
747 case OP_TYPEMINUPTO:
748 case OP_TYPEEXACT:
749 case OP_TYPEPOSSTAR:
750 case OP_TYPEPOSPLUS:
751 case OP_TYPEPOSQUERY:
752 case OP_TYPEPOSUPTO:
753 return cc + PRIV(OP_lengths)[*cc] - 1;
754
755 case OP_ANYBYTE:
756 #ifdef SUPPORT_UTF
757 if (common->utf) return NULL;
758 #endif
759 return cc + 1;
760
761 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
762 case OP_XCLASS:
763 return cc + GET(cc, 1);
764 #endif
765
766 case OP_MARK:
767 case OP_PRUNE_ARG:
768 case OP_SKIP_ARG:
769 case OP_THEN_ARG:
770 return cc + 1 + 2 + cc[1];
771
772 default:
773 /* All opcodes are supported now! */
774 SLJIT_ASSERT_STOP();
775 return NULL;
776 }
777 }
778
779 static BOOL check_opcode_types(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
780 {
781 int count;
782 pcre_uchar *slot;
783
784 /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
785 while (cc < ccend)
786 {
787 switch(*cc)
788 {
789 case OP_SET_SOM:
790 common->has_set_som = TRUE;
791 common->might_be_empty = TRUE;
792 cc += 1;
793 break;
794
795 case OP_REF:
796 case OP_REFI:
797 common->optimized_cbracket[GET2(cc, 1)] = 0;
798 cc += 1 + IMM2_SIZE;
799 break;
800
801 case OP_CBRAPOS:
802 case OP_SCBRAPOS:
803 common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] = 0;
804 cc += 1 + LINK_SIZE + IMM2_SIZE;
805 break;
806
807 case OP_COND:
808 case OP_SCOND:
809 /* Only AUTO_CALLOUT can insert this opcode. We do
810 not intend to support this case. */
811 if (cc[1 + LINK_SIZE] == OP_CALLOUT)
812 return FALSE;
813 cc += 1 + LINK_SIZE;
814 break;
815
816 case OP_CREF:
817 common->optimized_cbracket[GET2(cc, 1)] = 0;
818 cc += 1 + IMM2_SIZE;
819 break;
820
821 case OP_DNREF:
822 case OP_DNREFI:
823 case OP_DNCREF:
824 count = GET2(cc, 1 + IMM2_SIZE);
825 slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
826 while (count-- > 0)
827 {
828 common->optimized_cbracket[GET2(slot, 0)] = 0;
829 slot += common->name_entry_size;
830 }
831 cc += 1 + 2 * IMM2_SIZE;
832 break;
833
834 case OP_RECURSE:
835 /* Set its value only once. */
836 if (common->recursive_head_ptr == 0)
837 {
838 common->recursive_head_ptr = common->ovector_start;
839 common->ovector_start += sizeof(sljit_sw);
840 }
841 cc += 1 + LINK_SIZE;
842 break;
843
844 case OP_CALLOUT:
845 if (common->capture_last_ptr == 0)
846 {
847 common->capture_last_ptr = common->ovector_start;
848 common->ovector_start += sizeof(sljit_sw);
849 }
850 cc += 2 + 2 * LINK_SIZE;
851 break;
852
853 case OP_THEN_ARG:
854 common->has_then = TRUE;
855 common->control_head_ptr = 1;
856 /* Fall through. */
857
858 case OP_PRUNE_ARG:
859 common->needs_start_ptr = TRUE;
860 /* Fall through. */
861
862 case OP_MARK:
863 if (common->mark_ptr == 0)
864 {
865 common->mark_ptr = common->ovector_start;
866 common->ovector_start += sizeof(sljit_sw);
867 }
868 cc += 1 + 2 + cc[1];
869 break;
870
871 case OP_THEN:
872 common->has_then = TRUE;
873 common->control_head_ptr = 1;
874 /* Fall through. */
875
876 case OP_PRUNE:
877 case OP_SKIP:
878 common->needs_start_ptr = TRUE;
879 cc += 1;
880 break;
881
882 case OP_SKIP_ARG:
883 common->control_head_ptr = 1;
884 common->has_skip_arg = TRUE;
885 cc += 1 + 2 + cc[1];
886 break;
887
888 default:
889 cc = next_opcode(common, cc);
890 if (cc == NULL)
891 return FALSE;
892 break;
893 }
894 }
895 return TRUE;
896 }
897
898 static int get_class_iterator_size(pcre_uchar *cc)
899 {
900 switch(*cc)
901 {
902 case OP_CRSTAR:
903 case OP_CRPLUS:
904 return 2;
905
906 case OP_CRMINSTAR:
907 case OP_CRMINPLUS:
908 case OP_CRQUERY:
909 case OP_CRMINQUERY:
910 return 1;
911
912 case OP_CRRANGE:
913 case OP_CRMINRANGE:
914 if (GET2(cc, 1) == GET2(cc, 1 + IMM2_SIZE))
915 return 0;
916 return 2;
917
918 default:
919 return 0;
920 }
921 }
922
923 static BOOL detect_repeat(compiler_common *common, pcre_uchar *begin)
924 {
925 pcre_uchar *end = bracketend(begin);
926 pcre_uchar *next;
927 pcre_uchar *next_end;
928 pcre_uchar *max_end;
929 pcre_uchar type;
930 sljit_sw length = end - begin;
931 int min, max, i;
932
933 /* Detect fixed iterations first. */
934 if (end[-(1 + LINK_SIZE)] != OP_KET)
935 return FALSE;
936
937 /* Already detected repeat. */
938 if (common->private_data_ptrs[end - common->start - LINK_SIZE] != 0)
939 return TRUE;
940
941 next = end;
942 min = 1;
943 while (1)
944 {
945 if (*next != *begin)
946 break;
947 next_end = bracketend(next);
948 if (next_end - next != length || memcmp(begin, next, IN_UCHARS(length)) != 0)
949 break;
950 next = next_end;
951 min++;
952 }
953
954 if (min == 2)
955 return FALSE;
956
957 max = 0;
958 max_end = next;
959 if (*next == OP_BRAZERO || *next == OP_BRAMINZERO)
960 {
961 type = *next;
962 while (1)
963 {
964 if (next[0] != type || next[1] != OP_BRA || next[2 + LINK_SIZE] != *begin)
965 break;
966 next_end = bracketend(next + 2 + LINK_SIZE);
967 if (next_end - next != (length + 2 + LINK_SIZE) || memcmp(begin, next + 2 + LINK_SIZE, IN_UCHARS(length)) != 0)
968 break;
969 next = next_end;
970 max++;
971 }
972
973 if (next[0] == type && next[1] == *begin && max >= 1)
974 {
975 next_end = bracketend(next + 1);
976 if (next_end - next == (length + 1) && memcmp(begin, next + 1, IN_UCHARS(length)) == 0)
977 {
978 for (i = 0; i < max; i++, next_end += 1 + LINK_SIZE)
979 if (*next_end != OP_KET)
980 break;
981
982 if (i == max)
983 {
984 common->private_data_ptrs[max_end - common->start - LINK_SIZE] = next_end - max_end;
985 common->private_data_ptrs[max_end - common->start - LINK_SIZE + 1] = (type == OP_BRAZERO) ? OP_UPTO : OP_MINUPTO;
986 /* +2 the original and the last. */
987 common->private_data_ptrs[max_end - common->start - LINK_SIZE + 2] = max + 2;
988 if (min == 1)
989 return TRUE;
990 min--;
991 max_end -= (1 + LINK_SIZE) + GET(max_end, -LINK_SIZE);
992 }
993 }
994 }
995 }
996
997 if (min >= 3)
998 {
999 common->private_data_ptrs[end - common->start - LINK_SIZE] = max_end - end;
1000 common->private_data_ptrs[end - common->start - LINK_SIZE + 1] = OP_EXACT;
1001 common->private_data_ptrs[end - common->start - LINK_SIZE + 2] = min;
1002 return TRUE;
1003 }
1004
1005 return FALSE;
1006 }
1007
1008 #define CASE_ITERATOR_PRIVATE_DATA_1 \
1009 case OP_MINSTAR: \
1010 case OP_MINPLUS: \
1011 case OP_QUERY: \
1012 case OP_MINQUERY: \
1013 case OP_MINSTARI: \
1014 case OP_MINPLUSI: \
1015 case OP_QUERYI: \
1016 case OP_MINQUERYI: \
1017 case OP_NOTMINSTAR: \
1018 case OP_NOTMINPLUS: \
1019 case OP_NOTQUERY: \
1020 case OP_NOTMINQUERY: \
1021 case OP_NOTMINSTARI: \
1022 case OP_NOTMINPLUSI: \
1023 case OP_NOTQUERYI: \
1024 case OP_NOTMINQUERYI:
1025
1026 #define CASE_ITERATOR_PRIVATE_DATA_2A \
1027 case OP_STAR: \
1028 case OP_PLUS: \
1029 case OP_STARI: \
1030 case OP_PLUSI: \
1031 case OP_NOTSTAR: \
1032 case OP_NOTPLUS: \
1033 case OP_NOTSTARI: \
1034 case OP_NOTPLUSI:
1035
1036 #define CASE_ITERATOR_PRIVATE_DATA_2B \
1037 case OP_UPTO: \
1038 case OP_MINUPTO: \
1039 case OP_UPTOI: \
1040 case OP_MINUPTOI: \
1041 case OP_NOTUPTO: \
1042 case OP_NOTMINUPTO: \
1043 case OP_NOTUPTOI: \
1044 case OP_NOTMINUPTOI:
1045
1046 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_1 \
1047 case OP_TYPEMINSTAR: \
1048 case OP_TYPEMINPLUS: \
1049 case OP_TYPEQUERY: \
1050 case OP_TYPEMINQUERY:
1051
1052 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2A \
1053 case OP_TYPESTAR: \
1054 case OP_TYPEPLUS:
1055
1056 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2B \
1057 case OP_TYPEUPTO: \
1058 case OP_TYPEMINUPTO:
1059
1060 static void set_private_data_ptrs(compiler_common *common, int *private_data_start, pcre_uchar *ccend)
1061 {
1062 pcre_uchar *cc = common->start;
1063 pcre_uchar *alternative;
1064 pcre_uchar *end = NULL;
1065 int private_data_ptr = *private_data_start;
1066 int space, size, bracketlen;
1067 BOOL repeat_check = TRUE;
1068
1069 while (cc < ccend)
1070 {
1071 space = 0;
1072 size = 0;
1073 bracketlen = 0;
1074 if (private_data_ptr > SLJIT_MAX_LOCAL_SIZE)
1075 break;
1076
1077 if (repeat_check && (*cc == OP_ONCE || *cc == OP_ONCE_NC || *cc == OP_BRA || *cc == OP_CBRA || *cc == OP_COND))
1078 {
1079 if (detect_repeat(common, cc))
1080 {
1081 /* These brackets are converted to repeats, so no global
1082 based single character repeat is allowed. */
1083 if (cc >= end)
1084 end = bracketend(cc);
1085 }
1086 }
1087 repeat_check = TRUE;
1088
1089 switch(*cc)
1090 {
1091 case OP_KET:
1092 if (common->private_data_ptrs[cc + 1 - common->start] != 0)
1093 {
1094 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1095 private_data_ptr += sizeof(sljit_sw);
1096 cc += common->private_data_ptrs[cc + 1 - common->start];
1097 }
1098 cc += 1 + LINK_SIZE;
1099 break;
1100
1101 case OP_ASSERT:
1102 case OP_ASSERT_NOT:
1103 case OP_ASSERTBACK:
1104 case OP_ASSERTBACK_NOT:
1105 case OP_ONCE:
1106 case OP_ONCE_NC:
1107 case OP_BRAPOS:
1108 case OP_SBRA:
1109 case OP_SBRAPOS:
1110 case OP_SCOND:
1111 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1112 private_data_ptr += sizeof(sljit_sw);
1113 bracketlen = 1 + LINK_SIZE;
1114 break;
1115
1116 case OP_CBRAPOS:
1117 case OP_SCBRAPOS:
1118 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1119 private_data_ptr += sizeof(sljit_sw);
1120 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1121 break;
1122
1123 case OP_COND:
1124 /* Might be a hidden SCOND. */
1125 alternative = cc + GET(cc, 1);
1126 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1127 {
1128 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1129 private_data_ptr += sizeof(sljit_sw);
1130 }
1131 bracketlen = 1 + LINK_SIZE;
1132 break;
1133
1134 case OP_BRA:
1135 bracketlen = 1 + LINK_SIZE;
1136 break;
1137
1138 case OP_CBRA:
1139 case OP_SCBRA:
1140 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1141 break;
1142
1143 case OP_BRAZERO:
1144 case OP_BRAMINZERO:
1145 case OP_BRAPOSZERO:
1146 repeat_check = FALSE;
1147 size = 1;
1148 break;
1149
1150 CASE_ITERATOR_PRIVATE_DATA_1
1151 space = 1;
1152 size = -2;
1153 break;
1154
1155 CASE_ITERATOR_PRIVATE_DATA_2A
1156 space = 2;
1157 size = -2;
1158 break;
1159
1160 CASE_ITERATOR_PRIVATE_DATA_2B
1161 space = 2;
1162 size = -(2 + IMM2_SIZE);
1163 break;
1164
1165 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1166 space = 1;
1167 size = 1;
1168 break;
1169
1170 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1171 if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
1172 space = 2;
1173 size = 1;
1174 break;
1175
1176 case OP_TYPEUPTO:
1177 if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
1178 space = 2;
1179 size = 1 + IMM2_SIZE;
1180 break;
1181
1182 case OP_TYPEMINUPTO:
1183 space = 2;
1184 size = 1 + IMM2_SIZE;
1185 break;
1186
1187 case OP_CLASS:
1188 case OP_NCLASS:
1189 size += 1 + 32 / sizeof(pcre_uchar);
1190 space = get_class_iterator_size(cc + size);
1191 break;
1192
1193 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1194 case OP_XCLASS:
1195 size = GET(cc, 1);
1196 space = get_class_iterator_size(cc + size);
1197 break;
1198 #endif
1199
1200 default:
1201 cc = next_opcode(common, cc);
1202 SLJIT_ASSERT(cc != NULL);
1203 break;
1204 }
1205
1206 /* Character iterators, which are not inside a repeated bracket,
1207 gets a private slot instead of allocating it on the stack. */
1208 if (space > 0 && cc >= end)
1209 {
1210 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1211 private_data_ptr += sizeof(sljit_sw) * space;
1212 }
1213
1214 if (size != 0)
1215 {
1216 if (size < 0)
1217 {
1218 cc += -size;
1219 #ifdef SUPPORT_UTF
1220 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1221 #endif
1222 }
1223 else
1224 cc += size;
1225 }
1226
1227 if (bracketlen > 0)
1228 {
1229 if (cc >= end)
1230 {
1231 end = bracketend(cc);
1232 if (end[-1 - LINK_SIZE] == OP_KET)
1233 end = NULL;
1234 }
1235 cc += bracketlen;
1236 }
1237 }
1238 *private_data_start = private_data_ptr;
1239 }
1240
1241 /* Returns with a frame_types (always < 0) if no need for frame. */
1242 static int get_framesize(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL recursive, BOOL *needs_control_head)
1243 {
1244 int length = 0;
1245 int possessive = 0;
1246 BOOL stack_restore = FALSE;
1247 BOOL setsom_found = recursive;
1248 BOOL setmark_found = recursive;
1249 /* The last capture is a local variable even for recursions. */
1250 BOOL capture_last_found = FALSE;
1251
1252 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
1253 SLJIT_ASSERT(common->control_head_ptr != 0);
1254 *needs_control_head = TRUE;
1255 #else
1256 *needs_control_head = FALSE;
1257 #endif
1258
1259 if (ccend == NULL)
1260 {
1261 ccend = bracketend(cc) - (1 + LINK_SIZE);
1262 if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))
1263 {
1264 possessive = length = (common->capture_last_ptr != 0) ? 5 : 3;
1265 /* This is correct regardless of common->capture_last_ptr. */
1266 capture_last_found = TRUE;
1267 }
1268 cc = next_opcode(common, cc);
1269 }
1270
1271 SLJIT_ASSERT(cc != NULL);
1272 while (cc < ccend)
1273 switch(*cc)
1274 {
1275 case OP_SET_SOM:
1276 SLJIT_ASSERT(common->has_set_som);
1277 stack_restore = TRUE;
1278 if (!setsom_found)
1279 {
1280 length += 2;
1281 setsom_found = TRUE;
1282 }
1283 cc += 1;
1284 break;
1285
1286 case OP_MARK:
1287 case OP_PRUNE_ARG:
1288 case OP_THEN_ARG:
1289 SLJIT_ASSERT(common->mark_ptr != 0);
1290 stack_restore = TRUE;
1291 if (!setmark_found)
1292 {
1293 length += 2;
1294 setmark_found = TRUE;
1295 }
1296 if (common->control_head_ptr != 0)
1297 *needs_control_head = TRUE;
1298 cc += 1 + 2 + cc[1];
1299 break;
1300
1301 case OP_RECURSE:
1302 stack_restore = TRUE;
1303 if (common->has_set_som && !setsom_found)
1304 {
1305 length += 2;
1306 setsom_found = TRUE;
1307 }
1308 if (common->mark_ptr != 0 && !setmark_found)
1309 {
1310 length += 2;
1311 setmark_found = TRUE;
1312 }
1313 if (common->capture_last_ptr != 0 && !capture_last_found)
1314 {
1315 length += 2;
1316 capture_last_found = TRUE;
1317 }
1318 cc += 1 + LINK_SIZE;
1319 break;
1320
1321 case OP_CBRA:
1322 case OP_CBRAPOS:
1323 case OP_SCBRA:
1324 case OP_SCBRAPOS:
1325 stack_restore = TRUE;
1326 if (common->capture_last_ptr != 0 && !capture_last_found)
1327 {
1328 length += 2;
1329 capture_last_found = TRUE;
1330 }
1331 length += 3;
1332 cc += 1 + LINK_SIZE + IMM2_SIZE;
1333 break;
1334
1335 case OP_THEN:
1336 stack_restore = TRUE;
1337 if (common->control_head_ptr != 0)
1338 *needs_control_head = TRUE;
1339 cc ++;
1340 break;
1341
1342 default:
1343 stack_restore = TRUE;
1344 /* Fall through. */
1345
1346 case OP_NOT_WORD_BOUNDARY:
1347 case OP_WORD_BOUNDARY:
1348 case OP_NOT_DIGIT:
1349 case OP_DIGIT:
1350 case OP_NOT_WHITESPACE:
1351 case OP_WHITESPACE:
1352 case OP_NOT_WORDCHAR:
1353 case OP_WORDCHAR:
1354 case OP_ANY:
1355 case OP_ALLANY:
1356 case OP_ANYBYTE:
1357 case OP_NOTPROP:
1358 case OP_PROP:
1359 case OP_ANYNL:
1360 case OP_NOT_HSPACE:
1361 case OP_HSPACE:
1362 case OP_NOT_VSPACE:
1363 case OP_VSPACE:
1364 case OP_EXTUNI:
1365 case OP_EODN:
1366 case OP_EOD:
1367 case OP_CIRC:
1368 case OP_CIRCM:
1369 case OP_DOLL:
1370 case OP_DOLLM:
1371 case OP_CHAR:
1372 case OP_CHARI:
1373 case OP_NOT:
1374 case OP_NOTI:
1375
1376 case OP_EXACT:
1377 case OP_POSSTAR:
1378 case OP_POSPLUS:
1379 case OP_POSQUERY:
1380 case OP_POSUPTO:
1381
1382 case OP_EXACTI:
1383 case OP_POSSTARI:
1384 case OP_POSPLUSI:
1385 case OP_POSQUERYI:
1386 case OP_POSUPTOI:
1387
1388 case OP_NOTEXACT:
1389 case OP_NOTPOSSTAR:
1390 case OP_NOTPOSPLUS:
1391 case OP_NOTPOSQUERY:
1392 case OP_NOTPOSUPTO:
1393
1394 case OP_NOTEXACTI:
1395 case OP_NOTPOSSTARI:
1396 case OP_NOTPOSPLUSI:
1397 case OP_NOTPOSQUERYI:
1398 case OP_NOTPOSUPTOI:
1399
1400 case OP_TYPEEXACT:
1401 case OP_TYPEPOSSTAR:
1402 case OP_TYPEPOSPLUS:
1403 case OP_TYPEPOSQUERY:
1404 case OP_TYPEPOSUPTO:
1405
1406 case OP_CLASS:
1407 case OP_NCLASS:
1408 case OP_XCLASS:
1409
1410 cc = next_opcode(common, cc);
1411 SLJIT_ASSERT(cc != NULL);
1412 break;
1413 }
1414
1415 /* Possessive quantifiers can use a special case. */
1416 if (SLJIT_UNLIKELY(possessive == length))
1417 return stack_restore ? no_frame : no_stack;
1418
1419 if (length > 0)
1420 return length + 1;
1421 return stack_restore ? no_frame : no_stack;
1422 }
1423
1424 static void init_frame(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, int stackpos, int stacktop, BOOL recursive)
1425 {
1426 DEFINE_COMPILER;
1427 BOOL setsom_found = recursive;
1428 BOOL setmark_found = recursive;
1429 /* The last capture is a local variable even for recursions. */
1430 BOOL capture_last_found = FALSE;
1431 int offset;
1432
1433 /* >= 1 + shortest item size (2) */
1434 SLJIT_UNUSED_ARG(stacktop);
1435 SLJIT_ASSERT(stackpos >= stacktop + 2);
1436
1437 stackpos = STACK(stackpos);
1438 if (ccend == NULL)
1439 {
1440 ccend = bracketend(cc) - (1 + LINK_SIZE);
1441 if (recursive || (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS))
1442 cc = next_opcode(common, cc);
1443 }
1444
1445 SLJIT_ASSERT(cc != NULL);
1446 while (cc < ccend)
1447 switch(*cc)
1448 {
1449 case OP_SET_SOM:
1450 SLJIT_ASSERT(common->has_set_som);
1451 if (!setsom_found)
1452 {
1453 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
1454 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1455 stackpos += (int)sizeof(sljit_sw);
1456 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1457 stackpos += (int)sizeof(sljit_sw);
1458 setsom_found = TRUE;
1459 }
1460 cc += 1;
1461 break;
1462
1463 case OP_MARK:
1464 case OP_PRUNE_ARG:
1465 case OP_THEN_ARG:
1466 SLJIT_ASSERT(common->mark_ptr != 0);
1467 if (!setmark_found)
1468 {
1469 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
1470 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1471 stackpos += (int)sizeof(sljit_sw);
1472 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1473 stackpos += (int)sizeof(sljit_sw);
1474 setmark_found = TRUE;
1475 }
1476 cc += 1 + 2 + cc[1];
1477 break;
1478
1479 case OP_RECURSE:
1480 if (common->has_set_som && !setsom_found)
1481 {
1482 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
1483 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1484 stackpos += (int)sizeof(sljit_sw);
1485 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1486 stackpos += (int)sizeof(sljit_sw);
1487 setsom_found = TRUE;
1488 }
1489 if (common->mark_ptr != 0 && !setmark_found)
1490 {
1491 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
1492 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1493 stackpos += (int)sizeof(sljit_sw);
1494 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1495 stackpos += (int)sizeof(sljit_sw);
1496 setmark_found = TRUE;
1497 }
1498 if (common->capture_last_ptr != 0 && !capture_last_found)
1499 {
1500 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
1501 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1502 stackpos += (int)sizeof(sljit_sw);
1503 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1504 stackpos += (int)sizeof(sljit_sw);
1505 capture_last_found = TRUE;
1506 }
1507 cc += 1 + LINK_SIZE;
1508 break;
1509
1510 case OP_CBRA:
1511 case OP_CBRAPOS:
1512 case OP_SCBRA:
1513 case OP_SCBRAPOS:
1514 if (common->capture_last_ptr != 0 && !capture_last_found)
1515 {
1516 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
1517 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1518 stackpos += (int)sizeof(sljit_sw);
1519 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1520 stackpos += (int)sizeof(sljit_sw);
1521 capture_last_found = TRUE;
1522 }
1523 offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
1524 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));
1525 stackpos += (int)sizeof(sljit_sw);
1526 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
1527 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
1528 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1529 stackpos += (int)sizeof(sljit_sw);
1530 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
1531 stackpos += (int)sizeof(sljit_sw);
1532
1533 cc += 1 + LINK_SIZE + IMM2_SIZE;
1534 break;
1535
1536 default:
1537 cc = next_opcode(common, cc);
1538 SLJIT_ASSERT(cc != NULL);
1539 break;
1540 }
1541
1542 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, 0);
1543 SLJIT_ASSERT(stackpos == STACK(stacktop));
1544 }
1545
1546 static SLJIT_INLINE int get_private_data_copy_length(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL needs_control_head)
1547 {
1548 int private_data_length = needs_control_head ? 3 : 2;
1549 int size;
1550 pcre_uchar *alternative;
1551 /* Calculate the sum of the private machine words. */
1552 while (cc < ccend)
1553 {
1554 size = 0;
1555 switch(*cc)
1556 {
1557 case OP_KET:
1558 if (PRIVATE_DATA(cc) != 0)
1559 {
1560 private_data_length++;
1561 SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0);
1562 cc += PRIVATE_DATA(cc + 1);
1563 }
1564 cc += 1 + LINK_SIZE;
1565 break;
1566
1567 case OP_ASSERT:
1568 case OP_ASSERT_NOT:
1569 case OP_ASSERTBACK:
1570 case OP_ASSERTBACK_NOT:
1571 case OP_ONCE:
1572 case OP_ONCE_NC:
1573 case OP_BRAPOS:
1574 case OP_SBRA:
1575 case OP_SBRAPOS:
1576 case OP_SCOND:
1577 private_data_length++;
1578 SLJIT_ASSERT(PRIVATE_DATA(cc) != 0);
1579 cc += 1 + LINK_SIZE;
1580 break;
1581
1582 case OP_CBRA:
1583 case OP_SCBRA:
1584 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1585 private_data_length++;
1586 cc += 1 + LINK_SIZE + IMM2_SIZE;
1587 break;
1588
1589 case OP_CBRAPOS:
1590 case OP_SCBRAPOS:
1591 private_data_length += 2;
1592 cc += 1 + LINK_SIZE + IMM2_SIZE;
1593 break;
1594
1595 case OP_COND:
1596 /* Might be a hidden SCOND. */
1597 alternative = cc + GET(cc, 1);
1598 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1599 private_data_length++;
1600 cc += 1 + LINK_SIZE;
1601 break;
1602
1603 CASE_ITERATOR_PRIVATE_DATA_1
1604 if (PRIVATE_DATA(cc))
1605 private_data_length++;
1606 cc += 2;
1607 #ifdef SUPPORT_UTF
1608 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1609 #endif
1610 break;
1611
1612 CASE_ITERATOR_PRIVATE_DATA_2A
1613 if (PRIVATE_DATA(cc))
1614 private_data_length += 2;
1615 cc += 2;
1616 #ifdef SUPPORT_UTF
1617 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1618 #endif
1619 break;
1620
1621 CASE_ITERATOR_PRIVATE_DATA_2B
1622 if (PRIVATE_DATA(cc))
1623 private_data_length += 2;
1624 cc += 2 + IMM2_SIZE;
1625 #ifdef SUPPORT_UTF
1626 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1627 #endif
1628 break;
1629
1630 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1631 if (PRIVATE_DATA(cc))
1632 private_data_length++;
1633 cc += 1;
1634 break;
1635
1636 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1637 if (PRIVATE_DATA(cc))
1638 private_data_length += 2;
1639 cc += 1;
1640 break;
1641
1642 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1643 if (PRIVATE_DATA(cc))
1644 private_data_length += 2;
1645 cc += 1 + IMM2_SIZE;
1646 break;
1647
1648 case OP_CLASS:
1649 case OP_NCLASS:
1650 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1651 case OP_XCLASS:
1652 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1653 #else
1654 size = 1 + 32 / (int)sizeof(pcre_uchar);
1655 #endif
1656 if (PRIVATE_DATA(cc))
1657 private_data_length += get_class_iterator_size(cc + size);
1658 cc += size;
1659 break;
1660
1661 default:
1662 cc = next_opcode(common, cc);
1663 SLJIT_ASSERT(cc != NULL);
1664 break;
1665 }
1666 }
1667 SLJIT_ASSERT(cc == ccend);
1668 return private_data_length;
1669 }
1670
1671 static void copy_private_data(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend,
1672 BOOL save, int stackptr, int stacktop, BOOL needs_control_head)
1673 {
1674 DEFINE_COMPILER;
1675 int srcw[2];
1676 int count, size;
1677 BOOL tmp1next = TRUE;
1678 BOOL tmp1empty = TRUE;
1679 BOOL tmp2empty = TRUE;
1680 pcre_uchar *alternative;
1681 enum {
1682 start,
1683 loop,
1684 end
1685 } status;
1686
1687 status = save ? start : loop;
1688 stackptr = STACK(stackptr - 2);
1689 stacktop = STACK(stacktop - 1);
1690
1691 if (!save)
1692 {
1693 stackptr += (needs_control_head ? 2 : 1) * sizeof(sljit_sw);
1694 if (stackptr < stacktop)
1695 {
1696 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1697 stackptr += sizeof(sljit_sw);
1698 tmp1empty = FALSE;
1699 }
1700 if (stackptr < stacktop)
1701 {
1702 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1703 stackptr += sizeof(sljit_sw);
1704 tmp2empty = FALSE;
1705 }
1706 /* The tmp1next must be TRUE in either way. */
1707 }
1708
1709 do
1710 {
1711 count = 0;
1712 switch(status)
1713 {
1714 case start:
1715 SLJIT_ASSERT(save && common->recursive_head_ptr != 0);
1716 count = 1;
1717 srcw[0] = common->recursive_head_ptr;
1718 if (needs_control_head)
1719 {
1720 SLJIT_ASSERT(common->control_head_ptr != 0);
1721 count = 2;
1722 srcw[1] = common->control_head_ptr;
1723 }
1724 status = loop;
1725 break;
1726
1727 case loop:
1728 if (cc >= ccend)
1729 {
1730 status = end;
1731 break;
1732 }
1733
1734 switch(*cc)
1735 {
1736 case OP_KET:
1737 if (PRIVATE_DATA(cc) != 0)
1738 {
1739 count = 1;
1740 srcw[0] = PRIVATE_DATA(cc);
1741 SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0);
1742 cc += PRIVATE_DATA(cc + 1);
1743 }
1744 cc += 1 + LINK_SIZE;
1745 break;
1746
1747 case OP_ASSERT:
1748 case OP_ASSERT_NOT:
1749 case OP_ASSERTBACK:
1750 case OP_ASSERTBACK_NOT:
1751 case OP_ONCE:
1752 case OP_ONCE_NC:
1753 case OP_BRAPOS:
1754 case OP_SBRA:
1755 case OP_SBRAPOS:
1756 case OP_SCOND:
1757 count = 1;
1758 srcw[0] = PRIVATE_DATA(cc);
1759 SLJIT_ASSERT(srcw[0] != 0);
1760 cc += 1 + LINK_SIZE;
1761 break;
1762
1763 case OP_CBRA:
1764 case OP_SCBRA:
1765 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1766 {
1767 count = 1;
1768 srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1769 }
1770 cc += 1 + LINK_SIZE + IMM2_SIZE;
1771 break;
1772
1773 case OP_CBRAPOS:
1774 case OP_SCBRAPOS:
1775 count = 2;
1776 srcw[0] = PRIVATE_DATA(cc);
1777 srcw[1] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1778 SLJIT_ASSERT(srcw[0] != 0 && srcw[1] != 0);
1779 cc += 1 + LINK_SIZE + IMM2_SIZE;
1780 break;
1781
1782 case OP_COND:
1783 /* Might be a hidden SCOND. */
1784 alternative = cc + GET(cc, 1);
1785 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1786 {
1787 count = 1;
1788 srcw[0] = PRIVATE_DATA(cc);
1789 SLJIT_ASSERT(srcw[0] != 0);
1790 }
1791 cc += 1 + LINK_SIZE;
1792 break;
1793
1794 CASE_ITERATOR_PRIVATE_DATA_1
1795 if (PRIVATE_DATA(cc))
1796 {
1797 count = 1;
1798 srcw[0] = PRIVATE_DATA(cc);
1799 }
1800 cc += 2;
1801 #ifdef SUPPORT_UTF
1802 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1803 #endif
1804 break;
1805
1806 CASE_ITERATOR_PRIVATE_DATA_2A
1807 if (PRIVATE_DATA(cc))
1808 {
1809 count = 2;
1810 srcw[0] = PRIVATE_DATA(cc);
1811 srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
1812 }
1813 cc += 2;
1814 #ifdef SUPPORT_UTF
1815 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1816 #endif
1817 break;
1818
1819 CASE_ITERATOR_PRIVATE_DATA_2B
1820 if (PRIVATE_DATA(cc))
1821 {
1822 count = 2;
1823 srcw[0] = PRIVATE_DATA(cc);
1824 srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
1825 }
1826 cc += 2 + IMM2_SIZE;
1827 #ifdef SUPPORT_UTF
1828 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1829 #endif
1830 break;
1831
1832 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1833 if (PRIVATE_DATA(cc))
1834 {
1835 count = 1;
1836 srcw[0] = PRIVATE_DATA(cc);
1837 }
1838 cc += 1;
1839 break;
1840
1841 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1842 if (PRIVATE_DATA(cc))
1843 {
1844 count = 2;
1845 srcw[0] = PRIVATE_DATA(cc);
1846 srcw[1] = srcw[0] + sizeof(sljit_sw);
1847 }
1848 cc += 1;
1849 break;
1850
1851 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1852 if (PRIVATE_DATA(cc))
1853 {
1854 count = 2;
1855 srcw[0] = PRIVATE_DATA(cc);
1856 srcw[1] = srcw[0] + sizeof(sljit_sw);
1857 }
1858 cc += 1 + IMM2_SIZE;
1859 break;
1860
1861 case OP_CLASS:
1862 case OP_NCLASS:
1863 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1864 case OP_XCLASS:
1865 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1866 #else
1867 size = 1 + 32 / (int)sizeof(pcre_uchar);
1868 #endif
1869 if (PRIVATE_DATA(cc))
1870 switch(get_class_iterator_size(cc + size))
1871 {
1872 case 1:
1873 count = 1;
1874 srcw[0] = PRIVATE_DATA(cc);
1875 break;
1876
1877 case 2:
1878 count = 2;
1879 srcw[0] = PRIVATE_DATA(cc);
1880 srcw[1] = srcw[0] + sizeof(sljit_sw);
1881 break;
1882
1883 default:
1884 SLJIT_ASSERT_STOP();
1885 break;
1886 }
1887 cc += size;
1888 break;
1889
1890 default:
1891 cc = next_opcode(common, cc);
1892 SLJIT_ASSERT(cc != NULL);
1893 break;
1894 }
1895 break;
1896
1897 case end:
1898 SLJIT_ASSERT_STOP();
1899 break;
1900 }
1901
1902 while (count > 0)
1903 {
1904 count--;
1905 if (save)
1906 {
1907 if (tmp1next)
1908 {
1909 if (!tmp1empty)
1910 {
1911 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1912 stackptr += sizeof(sljit_sw);
1913 }
1914 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), srcw[count]);
1915 tmp1empty = FALSE;
1916 tmp1next = FALSE;
1917 }
1918 else
1919 {
1920 if (!tmp2empty)
1921 {
1922 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1923 stackptr += sizeof(sljit_sw);
1924 }
1925 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), srcw[count]);
1926 tmp2empty = FALSE;
1927 tmp1next = TRUE;
1928 }
1929 }
1930 else
1931 {
1932 if (tmp1next)
1933 {
1934 SLJIT_ASSERT(!tmp1empty);
1935 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), srcw[count], TMP1, 0);
1936 tmp1empty = stackptr >= stacktop;
1937 if (!tmp1empty)
1938 {
1939 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1940 stackptr += sizeof(sljit_sw);
1941 }
1942 tmp1next = FALSE;
1943 }
1944 else
1945 {
1946 SLJIT_ASSERT(!tmp2empty);
1947 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), srcw[count], TMP2, 0);
1948 tmp2empty = stackptr >= stacktop;
1949 if (!tmp2empty)
1950 {
1951 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1952 stackptr += sizeof(sljit_sw);
1953 }
1954 tmp1next = TRUE;
1955 }
1956 }
1957 }
1958 }
1959 while (status != end);
1960
1961 if (save)
1962 {
1963 if (tmp1next)
1964 {
1965 if (!tmp1empty)
1966 {
1967 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1968 stackptr += sizeof(sljit_sw);
1969 }
1970 if (!tmp2empty)
1971 {
1972 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1973 stackptr += sizeof(sljit_sw);
1974 }
1975 }
1976 else
1977 {
1978 if (!tmp2empty)
1979 {
1980 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1981 stackptr += sizeof(sljit_sw);
1982 }
1983 if (!tmp1empty)
1984 {
1985 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1986 stackptr += sizeof(sljit_sw);
1987 }
1988 }
1989 }
1990 SLJIT_ASSERT(cc == ccend && stackptr == stacktop && (save || (tmp1empty && tmp2empty)));
1991 }
1992
1993 static SLJIT_INLINE pcre_uchar *set_then_offsets(compiler_common *common, pcre_uchar *cc, pcre_uint8 *current_offset)
1994 {
1995 pcre_uchar *end = bracketend(cc);
1996 BOOL has_alternatives = cc[GET(cc, 1)] == OP_ALT;
1997
1998 /* Assert captures then. */
1999 if (*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT)
2000 current_offset = NULL;
2001 /* Conditional block does not. */
2002 if (*cc == OP_COND || *cc == OP_SCOND)
2003 has_alternatives = FALSE;
2004
2005 cc = next_opcode(common, cc);
2006 if (has_alternatives)
2007 current_offset = common->then_offsets + (cc - common->start);
2008
2009 while (cc < end)
2010 {
2011 if ((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND))
2012 cc = set_then_offsets(common, cc, current_offset);
2013 else
2014 {
2015 if (*cc == OP_ALT && has_alternatives)
2016 current_offset = common->then_offsets + (cc + 1 + LINK_SIZE - common->start);
2017 if (*cc >= OP_THEN && *cc <= OP_THEN_ARG && current_offset != NULL)
2018 *current_offset = 1;
2019 cc = next_opcode(common, cc);
2020 }
2021 }
2022
2023 return end;
2024 }
2025
2026 #undef CASE_ITERATOR_PRIVATE_DATA_1
2027 #undef CASE_ITERATOR_PRIVATE_DATA_2A
2028 #undef CASE_ITERATOR_PRIVATE_DATA_2B
2029 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_1
2030 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
2031 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
2032
2033 static SLJIT_INLINE BOOL is_powerof2(unsigned int value)
2034 {
2035 return (value & (value - 1)) == 0;
2036 }
2037
2038 static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label)
2039 {
2040 while (list)
2041 {
2042 /* sljit_set_label is clever enough to do nothing
2043 if either the jump or the label is NULL. */
2044 SET_LABEL(list->jump, label);
2045 list = list->next;
2046 }
2047 }
2048
2049 static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump *jump)
2050 {
2051 jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list));
2052 if (list_item)
2053 {
2054 list_item->next = *list;
2055 list_item->jump = jump;
2056 *list = list_item;
2057 }
2058 }
2059
2060 static void add_stub(compiler_common *common, struct sljit_jump *start)
2061 {
2062 DEFINE_COMPILER;
2063 stub_list *list_item = sljit_alloc_memory(compiler, sizeof(stub_list));
2064
2065 if (list_item)
2066 {
2067 list_item->start = start;
2068 list_item->quit = LABEL();
2069 list_item->next = common->stubs;
2070 common->stubs = list_item;
2071 }
2072 }
2073
2074 static void flush_stubs(compiler_common *common)
2075 {
2076 DEFINE_COMPILER;
2077 stub_list *list_item = common->stubs;
2078
2079 while (list_item)
2080 {
2081 JUMPHERE(list_item->start);
2082 add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));
2083 JUMPTO(SLJIT_JUMP, list_item->quit);
2084 list_item = list_item->next;
2085 }
2086 common->stubs = NULL;
2087 }
2088
2089 static void add_label_addr(compiler_common *common, sljit_uw *update_addr)
2090 {
2091 DEFINE_COMPILER;
2092 label_addr_list *label_addr;
2093
2094 label_addr = sljit_alloc_memory(compiler, sizeof(label_addr_list));
2095 if (label_addr == NULL)
2096 return;
2097 label_addr->label = LABEL();
2098 label_addr->update_addr = update_addr;
2099 label_addr->next = common->label_addrs;
2100 common->label_addrs = label_addr;
2101 }
2102
2103 static SLJIT_INLINE void count_match(compiler_common *common)
2104 {
2105 DEFINE_COMPILER;
2106
2107 OP2(SLJIT_SUB | SLJIT_SET_E, COUNT_MATCH, 0, COUNT_MATCH, 0, SLJIT_IMM, 1);
2108 add_jump(compiler, &common->calllimit, JUMP(SLJIT_ZERO));
2109 }
2110
2111 static SLJIT_INLINE void allocate_stack(compiler_common *common, int size)
2112 {
2113 /* May destroy all locals and registers except TMP2. */
2114 DEFINE_COMPILER;
2115
2116 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
2117 #ifdef DESTROY_REGISTERS
2118 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);
2119 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
2120 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
2121 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, TMP1, 0);
2122 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP1, 0);
2123 #endif
2124 add_stub(common, CMP(SLJIT_GREATER, STACK_TOP, 0, STACK_LIMIT, 0));
2125 }
2126
2127 static SLJIT_INLINE void free_stack(compiler_common *common, int size)
2128 {
2129 DEFINE_COMPILER;
2130 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
2131 }
2132
2133 static sljit_uw * allocate_read_only_data(compiler_common *common, sljit_uw size)
2134 {
2135 DEFINE_COMPILER;
2136 sljit_uw *result;
2137
2138 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
2139 return NULL;
2140
2141 result = (sljit_uw *)SLJIT_MALLOC(size + sizeof(sljit_uw), compiler->allocator_data);
2142 if (SLJIT_UNLIKELY(result == NULL))
2143 {
2144 sljit_set_compiler_memory_error(compiler);
2145 return NULL;
2146 }
2147
2148 *(void**)result = common->read_only_data_head;
2149 common->read_only_data_head = (void *)result;
2150 return result + 1;
2151 }
2152
2153 static void free_read_only_data(void *current, void *allocator_data)
2154 {
2155 void *next;
2156
2157 SLJIT_UNUSED_ARG(allocator_data);
2158
2159 while (current != NULL)
2160 {
2161 next = *(void**)current;
2162 SLJIT_FREE(current, allocator_data);
2163 current = next;
2164 }
2165 }
2166
2167 static SLJIT_INLINE void reset_ovector(compiler_common *common, int length)
2168 {
2169 DEFINE_COMPILER;
2170 struct sljit_label *loop;
2171 int i;
2172
2173 /* At this point we can freely use all temporary registers. */
2174 SLJIT_ASSERT(length > 1);
2175 /* TMP1 returns with begin - 1. */
2176 OP2(SLJIT_SUB, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_S0), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));
2177 if (length < 8)
2178 {
2179 for (i = 1; i < length; i++)
2180 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), SLJIT_R0, 0);
2181 }
2182 else
2183 {
2184 GET_LOCAL_BASE(SLJIT_R1, 0, OVECTOR_START);
2185 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, length - 1);
2186 loop = LABEL();
2187 OP1(SLJIT_MOVU, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw), SLJIT_R0, 0);
2188 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, 1);
2189 JUMPTO(SLJIT_NOT_ZERO, loop);
2190 }
2191 }
2192
2193 static SLJIT_INLINE void do_reset_match(compiler_common *common, int length)
2194 {
2195 DEFINE_COMPILER;
2196 struct sljit_label *loop;
2197 int i;
2198
2199 SLJIT_ASSERT(length > 1);
2200 /* OVECTOR(1) contains the "string begin - 1" constant. */
2201 if (length > 2)
2202 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
2203 if (length < 8)
2204 {
2205 for (i = 2; i < length; i++)
2206 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), TMP1, 0);
2207 }
2208 else
2209 {
2210 GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + sizeof(sljit_sw));
2211 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
2212 loop = LABEL();
2213 OP1(SLJIT_MOVU, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
2214 OP2(SLJIT_SUB | SLJIT_SET_E, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
2215 JUMPTO(SLJIT_NOT_ZERO, loop);
2216 }
2217
2218 OP1(SLJIT_MOV, STACK_TOP, 0, ARGUMENTS, 0);
2219 if (common->mark_ptr != 0)
2220 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, SLJIT_IMM, 0);
2221 if (common->control_head_ptr != 0)
2222 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
2223 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(jit_arguments, stack));
2224 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
2225 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(struct sljit_stack, base));
2226 }
2227
2228 static sljit_sw SLJIT_CALL do_search_mark(sljit_sw *current, const pcre_uchar *skip_arg)
2229 {
2230 while (current != NULL)
2231 {
2232 switch (current[-2])
2233 {
2234 case type_then_trap:
2235 break;
2236
2237 case type_mark:
2238 if (STRCMP_UC_UC(skip_arg, (pcre_uchar *)current[-3]) == 0)
2239 return current[-4];
2240 break;
2241
2242 default:
2243 SLJIT_ASSERT_STOP();
2244 break;
2245 }
2246 SLJIT_ASSERT(current > (sljit_sw*)current[-1]);
2247 current = (sljit_sw*)current[-1];
2248 }
2249 return -1;
2250 }
2251
2252 static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)
2253 {
2254 DEFINE_COMPILER;
2255 struct sljit_label *loop;
2256 struct sljit_jump *early_quit;
2257
2258 /* At this point we can freely use all registers. */
2259 OP1(SLJIT_MOV, SLJIT_S2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
2260 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(1), STR_PTR, 0);
2261
2262 OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0);
2263 if (common->mark_ptr != 0)
2264 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
2265 OP1(SLJIT_MOV_SI, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, offset_count));
2266 if (common->mark_ptr != 0)
2267 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_R2, 0);
2268 OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, offsets), SLJIT_IMM, sizeof(int));
2269 OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, begin));
2270 GET_LOCAL_BASE(SLJIT_S0, 0, OVECTOR_START);
2271 /* Unlikely, but possible */
2272 early_quit = CMP(SLJIT_EQUAL, SLJIT_R1, 0, SLJIT_IMM, 0);
2273 loop = LABEL();
2274 OP2(SLJIT_SUB, SLJIT_S1, 0, SLJIT_MEM1(SLJIT_S0), 0, SLJIT_R0, 0);
2275 OP2(SLJIT_ADD, SLJIT_S0, 0, SLJIT_S0, 0, SLJIT_IMM, sizeof(sljit_sw));
2276 /* Copy the integer value to the output buffer */
2277 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2278 OP2(SLJIT_ASHR, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_IMM, UCHAR_SHIFT);
2279 #endif
2280 OP1(SLJIT_MOVU_SI, SLJIT_MEM1(SLJIT_R2), sizeof(int), SLJIT_S1, 0);
2281 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
2282 JUMPTO(SLJIT_NOT_ZERO, loop);
2283 JUMPHERE(early_quit);
2284
2285 /* Calculate the return value, which is the maximum ovector value. */
2286 if (topbracket > 1)
2287 {
2288 GET_LOCAL_BASE(SLJIT_R0, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_sw));
2289 OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, topbracket + 1);
2290
2291 /* OVECTOR(0) is never equal to SLJIT_S2. */
2292 loop = LABEL();
2293 OP1(SLJIT_MOVU, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), -(2 * (sljit_sw)sizeof(sljit_sw)));
2294 OP2(SLJIT_SUB, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
2295 CMPTO(SLJIT_EQUAL, SLJIT_R2, 0, SLJIT_S2, 0, loop);
2296 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_R1, 0);
2297 }
2298 else
2299 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
2300 }
2301
2302 static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *quit)
2303 {
2304 DEFINE_COMPILER;
2305 struct sljit_jump *jump;
2306
2307 SLJIT_COMPILE_ASSERT(STR_END == SLJIT_S1, str_end_must_be_saved_reg2);
2308 SLJIT_ASSERT(common->start_used_ptr != 0 && common->start_ptr != 0
2309 && (common->mode == JIT_PARTIAL_SOFT_COMPILE ? common->hit_start != 0 : common->hit_start == 0));
2310
2311 OP1(SLJIT_MOV, SLJIT_R1, 0, ARGUMENTS, 0);
2312 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_PARTIAL);
2313 OP1(SLJIT_MOV_SI, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, real_offset_count));
2314 CMPTO(SLJIT_SIG_LESS, SLJIT_R2, 0, SLJIT_IMM, 2, quit);
2315
2316 /* Store match begin and end. */
2317 OP1(SLJIT_MOV, SLJIT_S0, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, begin));
2318 OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, offsets));
2319
2320 jump = CMP(SLJIT_SIG_LESS, SLJIT_R2, 0, SLJIT_IMM, 3);
2321 OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_ptr : (common->hit_start + (int)sizeof(sljit_sw)), SLJIT_S0, 0);
2322 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2323 OP2(SLJIT_ASHR, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, UCHAR_SHIFT);
2324 #endif
2325 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_R1), 2 * sizeof(int), SLJIT_R2, 0);
2326 JUMPHERE(jump);
2327
2328 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_used_ptr : common->hit_start);
2329 OP2(SLJIT_SUB, SLJIT_S1, 0, STR_END, 0, SLJIT_S0, 0);
2330 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2331 OP2(SLJIT_ASHR, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_IMM, UCHAR_SHIFT);
2332 #endif
2333 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_R1), sizeof(int), SLJIT_S1, 0);
2334
2335 OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_S0, 0);
2336 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2337 OP2(SLJIT_ASHR, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, UCHAR_SHIFT);
2338 #endif
2339 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_R1), 0, SLJIT_R2, 0);
2340
2341 JUMPTO(SLJIT_JUMP, quit);
2342 }
2343
2344 static SLJIT_INLINE void check_start_used_ptr(compiler_common *common)
2345 {
2346 /* May destroy TMP1. */
2347 DEFINE_COMPILER;
2348 struct sljit_jump *jump;
2349
2350 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2351 {
2352 /* The value of -1 must be kept for start_used_ptr! */
2353 OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, 1);
2354 /* Jumps if start_used_ptr < STR_PTR, or start_used_ptr == -1. Although overwriting
2355 is not necessary if start_used_ptr == STR_PTR, it does not hurt as well. */
2356 jump = CMP(SLJIT_LESS_EQUAL, TMP1, 0, STR_PTR, 0);
2357 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2358 JUMPHERE(jump);
2359 }
2360 else if (common->mode == JIT_PARTIAL_HARD_COMPILE)
2361 {
2362 jump = CMP(SLJIT_LESS_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2363 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2364 JUMPHERE(jump);
2365 }
2366 }
2367
2368 static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, pcre_uchar *cc)
2369 {
2370 /* Detects if the character has an othercase. */
2371 unsigned int c;
2372
2373 #ifdef SUPPORT_UTF
2374 if (common->utf)
2375 {
2376 GETCHAR(c, cc);
2377 if (c > 127)
2378 {
2379 #ifdef SUPPORT_UCP
2380 return c != UCD_OTHERCASE(c);
2381 #else
2382 return FALSE;
2383 #endif
2384 }
2385 #ifndef COMPILE_PCRE8
2386 return common->fcc[c] != c;
2387 #endif
2388 }
2389 else
2390 #endif
2391 c = *cc;
2392 return MAX_255(c) ? common->fcc[c] != c : FALSE;
2393 }
2394
2395 static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)
2396 {
2397 /* Returns with the othercase. */
2398 #ifdef SUPPORT_UTF
2399 if (common->utf && c > 127)
2400 {
2401 #ifdef SUPPORT_UCP
2402 return UCD_OTHERCASE(c);
2403 #else
2404 return c;
2405 #endif
2406 }
2407 #endif
2408 return TABLE_GET(c, common->fcc, c);
2409 }
2410
2411 static unsigned int char_get_othercase_bit(compiler_common *common, pcre_uchar *cc)
2412 {
2413 /* Detects if the character and its othercase has only 1 bit difference. */
2414 unsigned int c, oc, bit;
2415 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2416 int n;
2417 #endif
2418
2419 #ifdef SUPPORT_UTF
2420 if (common->utf)
2421 {
2422 GETCHAR(c, cc);
2423 if (c <= 127)
2424 oc = common->fcc[c];
2425 else
2426 {
2427 #ifdef SUPPORT_UCP
2428 oc = UCD_OTHERCASE(c);
2429 #else
2430 oc = c;
2431 #endif
2432 }
2433 }
2434 else
2435 {
2436 c = *cc;
2437 oc = TABLE_GET(c, common->fcc, c);
2438 }
2439 #else
2440 c = *cc;
2441 oc = TABLE_GET(c, common->fcc, c);
2442 #endif
2443
2444 SLJIT_ASSERT(c != oc);
2445
2446 bit = c ^ oc;
2447 /* Optimized for English alphabet. */
2448 if (c <= 127 && bit == 0x20)
2449 return (0 << 8) | 0x20;
2450
2451 /* Since c != oc, they must have at least 1 bit difference. */
2452 if (!is_powerof2(bit))
2453 return 0;
2454
2455 #if defined COMPILE_PCRE8
2456
2457 #ifdef SUPPORT_UTF
2458 if (common->utf && c > 127)
2459 {
2460 n = GET_EXTRALEN(*cc);
2461 while ((bit & 0x3f) == 0)
2462 {
2463 n--;
2464 bit >>= 6;
2465 }
2466 return (n << 8) | bit;
2467 }
2468 #endif /* SUPPORT_UTF */
2469 return (0 << 8) | bit;
2470
2471 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2472
2473 #ifdef SUPPORT_UTF
2474 if (common->utf && c > 65535)
2475 {
2476 if (bit >= (1 << 10))
2477 bit >>= 10;
2478 else
2479 return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8));
2480 }
2481 #endif /* SUPPORT_UTF */
2482 return (bit < 256) ? ((0 << 8) | bit) : ((1 << 8) | (bit >> 8));
2483
2484 #endif /* COMPILE_PCRE[8|16|32] */
2485 }
2486
2487 static void check_partial(compiler_common *common, BOOL force)
2488 {
2489 /* Checks whether a partial matching is occurred. Does not modify registers. */
2490 DEFINE_COMPILER;
2491 struct sljit_jump *jump = NULL;
2492
2493 SLJIT_ASSERT(!force || common->mode != JIT_COMPILE);
2494
2495 if (common->mode == JIT_COMPILE)
2496 return;
2497
2498 if (!force)
2499 jump = CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2500 else if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2501 jump = CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1);
2502
2503 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2504 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
2505 else
2506 {
2507 if (common->partialmatchlabel != NULL)
2508 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2509 else
2510 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2511 }
2512
2513 if (jump != NULL)
2514 JUMPHERE(jump);
2515 }
2516
2517 static void check_str_end(compiler_common *common, jump_list **end_reached)
2518 {
2519 /* Does not affect registers. Usually used in a tight spot. */
2520 DEFINE_COMPILER;
2521 struct sljit_jump *jump;
2522
2523 if (common->mode == JIT_COMPILE)
2524 {
2525 add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2526 return;
2527 }
2528
2529 jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
2530 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2531 {
2532 add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
2533 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
2534 add_jump(compiler, end_reached, JUMP(SLJIT_JUMP));
2535 }
2536 else
2537 {
2538 add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
2539 if (common->partialmatchlabel != NULL)
2540 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2541 else
2542 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2543 }
2544 JUMPHERE(jump);
2545 }
2546
2547 static void detect_partial_match(compiler_common *common, jump_list **backtracks)
2548 {
2549 DEFINE_COMPILER;
2550 struct sljit_jump *jump;
2551
2552 if (common->mode == JIT_COMPILE)
2553 {
2554 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2555 return;
2556 }
2557
2558 /* Partial matching mode. */
2559 jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
2560 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
2561 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2562 {
2563 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
2564 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
2565 }
2566 else
2567 {
2568 if (common->partialmatchlabel != NULL)
2569 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2570 else
2571 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2572 }
2573 JUMPHERE(jump);
2574 }
2575
2576 static void peek_char(compiler_common *common, pcre_uint32 max)
2577 {
2578 /* Reads the character into TMP1, keeps STR_PTR.
2579 Does not check STR_END. TMP2 Destroyed. */
2580 DEFINE_COMPILER;
2581 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2582 struct sljit_jump *jump;
2583 #endif
2584
2585 SLJIT_UNUSED_ARG(max);
2586
2587 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2588 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2589 if (common->utf)
2590 {
2591 if (max < 128) return;
2592
2593 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2594 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2595 add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2596 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2597 JUMPHERE(jump);
2598 }
2599 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2600
2601 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2602 if (common->utf)
2603 {
2604 if (max < 0xd800) return;
2605
2606 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2607 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2608 /* TMP2 contains the high surrogate. */
2609 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2610 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x40);
2611 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
2612 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
2613 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2614 JUMPHERE(jump);
2615 }
2616 #endif
2617 }
2618
2619 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2620
2621 static BOOL is_char7_bitset(const pcre_uint8 *bitset, BOOL nclass)
2622 {
2623 /* Tells whether the character codes below 128 are enough
2624 to determine a match. */
2625 const pcre_uint8 value = nclass ? 0xff : 0;
2626 const pcre_uint8 *end = bitset + 32;
2627
2628 bitset += 16;
2629 do
2630 {
2631 if (*bitset++ != value)
2632 return FALSE;
2633 }
2634 while (bitset < end);
2635 return TRUE;
2636 }
2637
2638 static void read_char7_type(compiler_common *common, BOOL full_read)
2639 {
2640 /* Reads the precise character type of a character into TMP1, if the character
2641 is less than 128. Otherwise it returns with zero. Does not check STR_END. The
2642 full_read argument tells whether characters above max are accepted or not. */
2643 DEFINE_COMPILER;
2644 struct sljit_jump *jump;
2645
2646 SLJIT_ASSERT(common->utf);
2647
2648 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2649 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2650
2651 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2652
2653 if (full_read)
2654 {
2655 jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
2656 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2657 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2658 JUMPHERE(jump);
2659 }
2660 }
2661
2662 #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
2663
2664 static void read_char_range(compiler_common *common, pcre_uint32 min, pcre_uint32 max, BOOL update_str_ptr)
2665 {
2666 /* Reads the precise value of a character into TMP1, if the character is
2667 between min and max (c >= min && c <= max). Otherwise it returns with a value
2668 outside the range. Does not check STR_END. */
2669 DEFINE_COMPILER;
2670 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2671 struct sljit_jump *jump;
2672 #endif
2673 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2674 struct sljit_jump *jump2;
2675 #endif
2676
2677 SLJIT_UNUSED_ARG(update_str_ptr);
2678 SLJIT_UNUSED_ARG(min);
2679 SLJIT_UNUSED_ARG(max);
2680 SLJIT_ASSERT(min <= max);
2681
2682 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2683 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2684
2685 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2686 if (common->utf)
2687 {
2688 if (max < 128 && !update_str_ptr) return;
2689
2690 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2691 if (min >= 0x10000)
2692 {
2693 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xf0);
2694 if (update_str_ptr)
2695 OP1(SLJIT_MOV_UB, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2696 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2697 jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x7);
2698 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2699 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2700 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2701 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2702 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2703 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2704 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2705 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2706 if (!update_str_ptr)
2707 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
2708 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2709 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2710 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2711 JUMPHERE(jump2);
2712 if (update_str_ptr)
2713 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
2714 }
2715 else if (min >= 0x800 && max <= 0xffff)
2716 {
2717 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xe0);
2718 if (update_str_ptr)
2719 OP1(SLJIT_MOV_UB, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2720 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2721 jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xf);
2722 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2723 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2724 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2725 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2726 if (!update_str_ptr)
2727 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
2728 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2729 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2730 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2731 JUMPHERE(jump2);
2732 if (update_str_ptr)
2733 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
2734 }
2735 else if (max >= 0x800)
2736 add_jump(compiler, (max < 0x10000) ? &common->utfreadchar16 : &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2737 else if (max < 128)
2738 {
2739 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2740 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2741 }
2742 else
2743 {
2744 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2745 if (!update_str_ptr)
2746 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2747 else
2748 OP1(SLJIT_MOV_UB, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2749 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2750 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2751 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2752 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2753 if (update_str_ptr)
2754 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
2755 }
2756 JUMPHERE(jump);
2757 }
2758 #endif
2759
2760 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2761 if (common->utf)
2762 {
2763 if (max >= 0x10000)
2764 {
2765 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2766 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2767 /* TMP2 contains the high surrogate. */
2768 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2769 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x40);
2770 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
2771 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2772 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
2773 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2774 JUMPHERE(jump);
2775 return;
2776 }
2777
2778 if (max < 0xd800 && !update_str_ptr) return;
2779
2780 /* Skip low surrogate if necessary. */
2781 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2782 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2783 if (update_str_ptr)
2784 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2785 if (max >= 0xd800)
2786 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x10000);
2787 JUMPHERE(jump);
2788 }
2789 #endif
2790 }
2791
2792 static SLJIT_INLINE void read_char(compiler_common *common)
2793 {
2794 read_char_range(common, 0, READ_CHAR_MAX, TRUE);
2795 }
2796
2797 static void read_char8_type(compiler_common *common, BOOL update_str_ptr)
2798 {
2799 /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */
2800 DEFINE_COMPILER;
2801 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
2802 struct sljit_jump *jump;
2803 #endif
2804 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2805 struct sljit_jump *jump2;
2806 #endif
2807
2808 SLJIT_UNUSED_ARG(update_str_ptr);
2809
2810 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2811 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2812
2813 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2814 if (common->utf)
2815 {
2816 /* This can be an extra read in some situations, but hopefully
2817 it is needed in most cases. */
2818 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2819 jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
2820 if (!update_str_ptr)
2821 {
2822 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2823 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2824 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2825 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2826 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2827 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
2828 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2829 jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
2830 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2831 JUMPHERE(jump2);
2832 }
2833 else
2834 add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));
2835 JUMPHERE(jump);
2836 return;
2837 }
2838 #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
2839
2840 #if !defined COMPILE_PCRE8
2841 /* The ctypes array contains only 256 values. */
2842 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2843 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
2844 #endif
2845 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2846 #if !defined COMPILE_PCRE8
2847 JUMPHERE(jump);
2848 #endif
2849
2850 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2851 if (common->utf && update_str_ptr)
2852 {
2853 /* Skip low surrogate if necessary. */
2854 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);
2855 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2856 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2857 JUMPHERE(jump);
2858 }
2859 #endif /* SUPPORT_UTF && COMPILE_PCRE16 */
2860 }
2861
2862 static void skip_char_back(compiler_common *common)
2863 {
2864 /* Goes one character back. Affects STR_PTR and TMP1. Does not check begin. */
2865 DEFINE_COMPILER;
2866 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2867 #if defined COMPILE_PCRE8
2868 struct sljit_label *label;
2869
2870 if (common->utf)
2871 {
2872 label = LABEL();
2873 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2874 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2875 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
2876 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);
2877 return;
2878 }
2879 #elif defined COMPILE_PCRE16
2880 if (common->utf)
2881 {
2882 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2883 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2884 /* Skip low surrogate if necessary. */
2885 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2886 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
2887 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
2888 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2889 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2890 return;
2891 }
2892 #endif /* COMPILE_PCRE[8|16] */
2893 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2894 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2895 }
2896
2897 static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpifmatch)
2898 {
2899 /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */
2900 DEFINE_COMPILER;
2901 struct sljit_jump *jump;
2902
2903 if (nltype == NLTYPE_ANY)
2904 {
2905 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
2906 add_jump(compiler, backtracks, JUMP(jumpifmatch ? SLJIT_NOT_ZERO : SLJIT_ZERO));
2907 }
2908 else if (nltype == NLTYPE_ANYCRLF)
2909 {
2910 if (jumpifmatch)
2911 {
2912 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR));
2913 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
2914 }
2915 else
2916 {
2917 jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
2918 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
2919 JUMPHERE(jump);
2920 }
2921 }
2922 else
2923 {
2924 SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);
2925 add_jump(compiler, backtracks, CMP(jumpifmatch ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
2926 }
2927 }
2928
2929 #ifdef SUPPORT_UTF
2930
2931 #if defined COMPILE_PCRE8
2932 static void do_utfreadchar(compiler_common *common)
2933 {
2934 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
2935 of the character (>= 0xc0). Return char value in TMP1, length in TMP2. */
2936 DEFINE_COMPILER;
2937 struct sljit_jump *jump;
2938
2939 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2940 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2941 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2942 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2943 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2944 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2945
2946 /* Searching for the first zero. */
2947 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
2948 jump = JUMP(SLJIT_NOT_ZERO);
2949 /* Two byte sequence. */
2950 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2951 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(2));
2952 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2953
2954 JUMPHERE(jump);
2955 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2956 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x800);
2957 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2958 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2959 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2960
2961 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10000);
2962 jump = JUMP(SLJIT_NOT_ZERO);
2963 /* Three byte sequence. */
2964 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
2965 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));
2966 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2967
2968 /* Four byte sequence. */
2969 JUMPHERE(jump);
2970 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2971 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
2972 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2973 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
2974 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2975 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2976 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(4));
2977 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2978 }
2979
2980 static void do_utfreadchar16(compiler_common *common)
2981 {
2982 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
2983 of the character (>= 0xc0). Return value in TMP1. */
2984 DEFINE_COMPILER;
2985 struct sljit_jump *jump;
2986
2987 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2988 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2989 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2990 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2991 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2992 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2993
2994 /* Searching for the first zero. */
2995 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
2996 jump = JUMP(SLJIT_NOT_ZERO);
2997 /* Two byte sequence. */
2998 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2999 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3000
3001 JUMPHERE(jump);
3002 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x400);
3003 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_NOT_ZERO);
3004 /* This code runs only in 8 bit mode. No need to shift the value. */
3005 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3006 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3007 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x800);
3008 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3009 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3010 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3011 /* Three byte sequence. */
3012 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
3013 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3014 }
3015
3016 static void do_utfreadtype8(compiler_common *common)
3017 {
3018 /* Fast decoding a UTF-8 character type. TMP2 contains the first byte
3019 of the character (>= 0xc0). Return value in TMP1. */
3020 DEFINE_COMPILER;
3021 struct sljit_jump *jump;
3022 struct sljit_jump *compare;
3023
3024 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3025
3026 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20);
3027 jump = JUMP(SLJIT_NOT_ZERO);
3028 /* Two byte sequence. */
3029 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3030 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3031 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
3032 /* The upper 5 bits are known at this point. */
3033 compare = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x3);
3034 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
3035 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3036 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
3037 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
3038 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3039
3040 JUMPHERE(compare);
3041 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
3042 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3043
3044 /* We only have types for characters less than 256. */
3045 JUMPHERE(jump);
3046 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3047 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
3048 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3049 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3050 }
3051
3052 #endif /* COMPILE_PCRE8 */
3053
3054 #endif /* SUPPORT_UTF */
3055
3056 #ifdef SUPPORT_UCP
3057
3058 /* UCD_BLOCK_SIZE must be 128 (see the assert below). */
3059 #define UCD_BLOCK_MASK 127
3060 #define UCD_BLOCK_SHIFT 7
3061
3062 static void do_getucd(compiler_common *common)
3063 {
3064 /* Search the UCD record for the character comes in TMP1.
3065 Returns chartype in TMP1 and UCD offset in TMP2. */
3066 DEFINE_COMPILER;
3067
3068 SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 8);
3069
3070 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3071 OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
3072 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
3073 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
3074 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
3075 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
3076 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
3077 OP1(SLJIT_MOV_UH, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
3078 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
3079 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
3080 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3081 }
3082 #endif
3083
3084 static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common, BOOL hascrorlf, BOOL firstline)
3085 {
3086 DEFINE_COMPILER;
3087 struct sljit_label *mainloop;
3088 struct sljit_label *newlinelabel = NULL;
3089 struct sljit_jump *start;
3090 struct sljit_jump *end = NULL;
3091 struct sljit_jump *nl = NULL;
3092 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3093 struct sljit_jump *singlechar;
3094 #endif
3095 jump_list *newline = NULL;
3096 BOOL newlinecheck = FALSE;
3097 BOOL readuchar = FALSE;
3098
3099 if (!(hascrorlf || firstline) && (common->nltype == NLTYPE_ANY ||
3100 common->nltype == NLTYPE_ANYCRLF || common->newline > 255))
3101 newlinecheck = TRUE;
3102
3103 if (firstline)
3104 {
3105 /* Search for the end of the first line. */
3106 SLJIT_ASSERT(common->first_line_end != 0);
3107 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
3108
3109 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3110 {
3111 mainloop = LABEL();
3112 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3113 end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3114 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
3115 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3116 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);
3117 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);
3118 JUMPHERE(end);
3119 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_SP), common->first_line_end, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3120 }
3121 else
3122 {
3123 end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3124 mainloop = LABEL();
3125 /* Continual stores does not cause data dependency. */
3126 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->first_line_end, STR_PTR, 0);
3127 read_char_range(common, common->nlmin, common->nlmax, TRUE);
3128 check_newlinechar(common, common->nltype, &newline, TRUE);
3129 CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, mainloop);
3130 JUMPHERE(end);
3131 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->first_line_end, STR_PTR, 0);
3132 set_jumps(newline, LABEL());
3133 }
3134
3135 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
3136 }
3137
3138 start = JUMP(SLJIT_JUMP);
3139
3140 if (newlinecheck)
3141 {
3142 newlinelabel = LABEL();
3143 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3144 end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3145 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3146 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
3147 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
3148 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3149 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
3150 #endif
3151 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3152 nl = JUMP(SLJIT_JUMP);
3153 }
3154
3155 mainloop = LABEL();
3156
3157 /* Increasing the STR_PTR here requires one less jump in the most common case. */
3158 #ifdef SUPPORT_UTF
3159 if (common->utf) readuchar = TRUE;
3160 #endif
3161 if (newlinecheck) readuchar = TRUE;
3162
3163 if (readuchar)
3164 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3165
3166 if (newlinecheck)
3167 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);
3168
3169 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3170 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3171 #if defined COMPILE_PCRE8
3172 if (common->utf)
3173 {
3174 singlechar = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
3175 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3176 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3177 JUMPHERE(singlechar);
3178 }
3179 #elif defined COMPILE_PCRE16
3180 if (common->utf)
3181 {
3182 singlechar = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
3183 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
3184 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3185 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
3186 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3187 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3188 JUMPHERE(singlechar);
3189 }
3190 #endif /* COMPILE_PCRE[8|16] */
3191 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
3192 JUMPHERE(start);
3193
3194 if (newlinecheck)
3195 {
3196 JUMPHERE(end);
3197 JUMPHERE(nl);
3198 }
3199
3200 return mainloop;
3201 }
3202
3203 #define MAX_N_CHARS 16
3204 #define MAX_N_BYTES 8
3205
3206 static SLJIT_INLINE void add_prefix_byte(pcre_uint8 byte, pcre_uint8 *bytes)
3207 {
3208 pcre_uint8 len = bytes[0];
3209 int i;
3210
3211 if (len == 255)
3212 return;
3213
3214 if (len == 0)
3215 {
3216 bytes[0] = 1;
3217 bytes[1] = byte;
3218 return;
3219 }
3220
3221 for (i = len; i > 0; i--)
3222 if (bytes[i] == byte)
3223 return;
3224
3225 if (len >= MAX_N_BYTES - 1)
3226 {
3227 bytes[0] = 255;
3228 return;
3229 }
3230
3231 len++;
3232 bytes[len] = byte;
3233 bytes[0] = len;
3234 }
3235
3236 static int scan_prefix(compiler_common *common, pcre_uchar *cc, pcre_uint32 *chars, pcre_uint8 *bytes, int max_chars, pcre_uint32 *rec_count)
3237 {
3238 /* Recursive function, which scans prefix literals. */
3239 BOOL last, any, caseless;
3240 int len, repeat, len_save, consumed = 0;
3241 pcre_uint32 chr, mask;
3242 pcre_uchar *alternative, *cc_save, *oc;
3243 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3244 pcre_uchar othercase[8];
3245 #elif defined SUPPORT_UTF && defined COMPILE_PCRE16
3246 pcre_uchar othercase[2];
3247 #else
3248 pcre_uchar othercase[1];
3249 #endif
3250
3251 repeat = 1;
3252 while (TRUE)
3253 {
3254 if (*rec_count == 0)
3255 return 0;
3256 (*rec_count)--;
3257
3258 last = TRUE;
3259 any = FALSE;
3260 caseless = FALSE;
3261
3262 switch (*cc)
3263 {
3264 case OP_CHARI:
3265 caseless = TRUE;
3266 case OP_CHAR:
3267 last = FALSE;
3268 cc++;
3269 break;
3270
3271 case OP_SOD:
3272 case OP_SOM:
3273 case OP_SET_SOM:
3274 case OP_NOT_WORD_BOUNDARY:
3275 case OP_WORD_BOUNDARY:
3276 case OP_EODN:
3277 case OP_EOD:
3278 case OP_CIRC:
3279 case OP_CIRCM:
3280 case OP_DOLL:
3281 case OP_DOLLM:
3282 /* Zero width assertions. */
3283 cc++;
3284 continue;
3285
3286 case OP_ASSERT:
3287 case OP_ASSERT_NOT:
3288 case OP_ASSERTBACK:
3289 case OP_ASSERTBACK_NOT:
3290 cc = bracketend(cc);
3291 continue;
3292
3293 case OP_PLUSI:
3294 case OP_MINPLUSI:
3295 case OP_POSPLUSI:
3296 caseless = TRUE;
3297 case OP_PLUS:
3298 case OP_MINPLUS:
3299 case OP_POSPLUS:
3300 cc++;
3301 break;
3302
3303 case OP_EXACTI:
3304 caseless = TRUE;
3305 case OP_EXACT:
3306 repeat = GET2(cc, 1);
3307 last = FALSE;
3308 cc += 1 + IMM2_SIZE;
3309 break;
3310
3311 case OP_QUERYI:
3312 case OP_MINQUERYI:
3313 case OP_POSQUERYI:
3314 caseless = TRUE;
3315 case OP_QUERY:
3316 case OP_MINQUERY:
3317 case OP_POSQUERY:
3318 len = 1;
3319 cc++;
3320 #ifdef SUPPORT_UTF
3321 if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
3322 #endif
3323 max_chars = scan_prefix(common, cc + len, chars, bytes, max_chars, rec_count);
3324 if (max_chars == 0)
3325 return consumed;
3326 last = FALSE;
3327 break;
3328
3329 case OP_KET:
3330 cc += 1 + LINK_SIZE;
3331 continue;
3332
3333 case OP_ALT:
3334 cc += GET(cc, 1);
3335 continue;
3336
3337 case OP_ONCE:
3338 case OP_ONCE_NC:
3339 case OP_BRA:
3340 case OP_BRAPOS:
3341 case OP_CBRA:
3342 case OP_CBRAPOS:
3343 alternative = cc + GET(cc, 1);
3344 while (*alternative == OP_ALT)
3345 {
3346 max_chars = scan_prefix(common, alternative + 1 + LINK_SIZE, chars, bytes, max_chars, rec_count);
3347 if (max_chars == 0)
3348 return consumed;
3349 alternative += GET(alternative, 1);
3350 }
3351
3352 if (*cc == OP_CBRA || *cc == OP_CBRAPOS)
3353 cc += IMM2_SIZE;
3354 cc += 1 + LINK_SIZE;
3355 continue;
3356
3357 case OP_CLASS:
3358 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3359 if (common->utf && !is_char7_bitset((const pcre_uint8 *)(cc + 1), FALSE)) return consumed;
3360 #endif
3361 any = TRUE;
3362 cc += 1 + 32 / sizeof(pcre_uchar);
3363 break;
3364
3365 case OP_NCLASS:
3366 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3367 if (common->utf) return consumed;
3368 #endif
3369 any = TRUE;
3370 cc += 1 + 32 / sizeof(pcre_uchar);
3371 break;
3372
3373 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3374 case OP_XCLASS:
3375 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3376 if (common->utf) return consumed;
3377 #endif
3378 any = TRUE;
3379 cc += GET(cc, 1);
3380 break;
3381 #endif
3382
3383 case OP_DIGIT:
3384 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3385 if (common->utf && !is_char7_bitset((const pcre_uint8 *)common->ctypes - cbit_length + cbit_digit, FALSE))
3386 return consumed;
3387 #endif
3388 any = TRUE;
3389 cc++;
3390 break;
3391
3392 case OP_WHITESPACE:
3393 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3394 if (common->utf && !is_char7_bitset((const pcre_uint8 *)common->ctypes - cbit_length + cbit_space, FALSE))
3395 return consumed;
3396 #endif
3397 any = TRUE;
3398 cc++;
3399 break;
3400
3401 case OP_WORDCHAR:
3402 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3403 if (common->utf && !is_char7_bitset((const pcre_uint8 *)common->ctypes - cbit_length + cbit_word, FALSE))
3404 return consumed;
3405 #endif
3406 any = TRUE;
3407 cc++;
3408 break;
3409
3410 case OP_NOT:
3411 case OP_NOTI:
3412 cc++;
3413 /* Fall through. */
3414 case OP_NOT_DIGIT:
3415 case OP_NOT_WHITESPACE:
3416 case OP_NOT_WORDCHAR:
3417 case OP_ANY:
3418 case OP_ALLANY:
3419 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3420 if (common->utf) return consumed;
3421 #endif
3422 any = TRUE;
3423 cc++;
3424 break;
3425
3426 #ifdef SUPPORT_UCP
3427 case OP_NOTPROP:
3428 case OP_PROP:
3429 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3430 if (common->utf) return consumed;
3431 #endif
3432 any = TRUE;
3433 cc += 1 + 2;
3434 break;
3435 #endif
3436
3437 case OP_TYPEEXACT:
3438 repeat = GET2(cc, 1);
3439 cc += 1 + IMM2_SIZE;
3440 continue;
3441
3442 case OP_NOTEXACT:
3443 case OP_NOTEXACTI:
3444 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3445 if (common->utf) return consumed;
3446 #endif
3447 any = TRUE;
3448 repeat = GET2(cc, 1);
3449 cc += 1 + IMM2_SIZE + 1;
3450 break;
3451
3452 default:
3453 return consumed;
3454 }
3455
3456 if (any)
3457 {
3458 #if defined COMPILE_PCRE8
3459 mask = 0xff;
3460 #elif defined COMPILE_PCRE16
3461 mask = 0xffff;
3462 #elif defined COMPILE_PCRE32
3463 mask = 0xffffffff;
3464 #else
3465 SLJIT_ASSERT_STOP();
3466 #endif
3467
3468 do
3469 {
3470 chars[0] = mask;
3471 chars[1] = mask;
3472 bytes[0] = 255;
3473
3474 consumed++;
3475 if (--max_chars == 0)
3476 return consumed;
3477 chars += 2;
3478 bytes += MAX_N_BYTES;
3479 }
3480 while (--repeat > 0);
3481
3482 repeat = 1;
3483 continue;
3484 }
3485
3486 len = 1;
3487 #ifdef SUPPORT_UTF
3488 if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
3489 #endif
3490
3491 if (caseless && char_has_othercase(common, cc))
3492 {
3493 #ifdef SUPPORT_UTF
3494 if (common->utf)
3495 {
3496 GETCHAR(chr, cc);
3497 if ((int)PRIV(ord2utf)(char_othercase(common, chr), othercase) != len)
3498 return consumed;
3499 }
3500 else
3501 #endif
3502 {
3503 chr = *cc;
3504 othercase[0] = TABLE_GET(chr, common->fcc, chr);
3505 }
3506 }
3507 else
3508 caseless = FALSE;
3509
3510 len_save = len;
3511 cc_save = cc;
3512 while (TRUE)
3513 {
3514 oc = othercase;
3515 do
3516 {
3517 chr = *cc;
3518 #ifdef COMPILE_PCRE32
3519 if (SLJIT_UNLIKELY(chr == NOTACHAR))
3520 return consumed;
3521 #endif
3522 add_prefix_byte((pcre_uint8)chr, bytes);
3523
3524 mask = 0;
3525 if (caseless)
3526 {
3527 add_prefix_byte((pcre_uint8)*oc, bytes);
3528 mask = *cc ^ *oc;
3529 chr |= mask;
3530 }
3531
3532 #ifdef COMPILE_PCRE32
3533 if (chars[0] == NOTACHAR && chars[1] == 0)
3534 #else
3535 if (chars[0] == NOTACHAR)
3536 #endif
3537 {
3538 chars[0] = chr;
3539 chars[1] = mask;
3540 }
3541 else
3542 {
3543 mask |= chars[0] ^ chr;
3544 chr |= mask;
3545 chars[0] = chr;
3546 chars[1] |= mask;
3547 }
3548
3549 len--;
3550 consumed++;
3551 if (--max_chars == 0)
3552 return consumed;
3553 chars += 2;
3554 bytes += MAX_N_BYTES;
3555 cc++;
3556 oc++;
3557 }
3558 while (len > 0);
3559
3560 if (--repeat == 0)
3561 break;
3562
3563 len = len_save;
3564 cc = cc_save;
3565 }
3566
3567 repeat = 1;
3568 if (last)
3569 return consumed;
3570 }
3571 }
3572
3573 static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common, BOOL firstline)
3574 {
3575 DEFINE_COMPILER;
3576 struct sljit_label *start;
3577 struct sljit_jump *quit;
3578 pcre_uint32 chars[MAX_N_CHARS * 2];
3579 pcre_uint8 bytes[MAX_N_CHARS * MAX_N_BYTES];
3580 pcre_uint8 ones[MAX_N_CHARS];
3581 int offsets[3];
3582 pcre_uint32 mask;
3583 pcre_uint8 *byte_set, *byte_set_end;
3584 int i, max, from;
3585 int range_right = -1, range_len = 3 - 1;
3586 sljit_ub *update_table = NULL;
3587 BOOL in_range;
3588 pcre_uint32 rec_count;
3589
3590 for (i = 0; i < MAX_N_CHARS; i++)
3591 {
3592 chars[i << 1] = NOTACHAR;
3593 chars[(i << 1) + 1] = 0;
3594 bytes[i * MAX_N_BYTES] = 0;
3595 }
3596
3597 rec_count = 10000;
3598 max = scan_prefix(common, common->start, chars, bytes, MAX_N_CHARS, &rec_count);
3599
3600 if (max <= 1)
3601 return FALSE;
3602
3603 for (i = 0; i < max; i++)
3604 {
3605 mask = chars[(i << 1) + 1];
3606 ones[i] = ones_in_half_byte[mask & 0xf];
3607 mask >>= 4;
3608 while (mask != 0)
3609 {
3610 ones[i] += ones_in_half_byte[mask & 0xf];
3611 mask >>= 4;
3612 }
3613 }
3614
3615 in_range = FALSE;
3616 from = 0; /* Prevent compiler "uninitialized" warning */
3617 for (i = 0; i <= max; i++)
3618 {
3619 if (in_range && (i - from) > range_len && (bytes[(i - 1) * MAX_N_BYTES] <= 4))
3620 {
3621 range_len = i - from;
3622 range_right = i - 1;
3623 }
3624
3625 if (i < max && bytes[i * MAX_N_BYTES] < 255)
3626 {
3627 if (!in_range)
3628 {
3629 in_range = TRUE;
3630 from = i;
3631 }
3632 }
3633 else if (in_range)
3634 in_range = FALSE;
3635 }
3636
3637 if (range_right >= 0)
3638 {
3639 update_table = (sljit_ub *)allocate_read_only_data(common, 256);
3640 if (update_table == NULL)
3641 return TRUE;
3642 memset(update_table, IN_UCHARS(range_len), 256);
3643
3644 for (i = 0; i < range_len; i++)
3645 {
3646 byte_set = bytes + ((range_right - i) * MAX_N_BYTES);
3647 SLJIT_ASSERT(byte_set[0] > 0 && byte_set[0] < 255);
3648 byte_set_end = byte_set + byte_set[0];
3649 byte_set++;
3650 while (byte_set <= byte_set_end)
3651 {
3652 if (update_table[*byte_set] > IN_UCHARS(i))
3653 update_table[*byte_set] = IN_UCHARS(i);
3654 byte_set++;
3655 }
3656 }
3657 }
3658
3659 offsets[0] = -1;
3660 /* Scan forward. */
3661 for (i = 0; i < max; i++)
3662 if (ones[i] <= 2) {
3663 offsets[0] = i;
3664 break;
3665 }
3666
3667 if (offsets[0] < 0 && range_right < 0)
3668 return FALSE;
3669
3670 if (offsets[0] >= 0)
3671 {
3672 /* Scan backward. */
3673 offsets[1] = -1;
3674 for (i = max - 1; i > offsets[0]; i--)
3675 if (ones[i] <= 2 && i != range_right)
3676 {
3677 offsets[1] = i;
3678 break;
3679 }
3680
3681 /* This case is handled better by fast_forward_first_char. */
3682 if (offsets[1] == -1 && offsets[0] == 0 && range_right < 0)
3683 return FALSE;
3684
3685 offsets[2] = -1;
3686 /* We only search for a middle character if there is no range check. */
3687 if (offsets[1] >= 0 && range_right == -1)
3688 {
3689 /* Scan from middle. */
3690 for (i = (offsets[0] + offsets[1]) / 2 + 1; i < offsets[1]; i++)
3691 if (ones[i] <= 2)
3692 {
3693 offsets[2] = i;
3694 break;
3695 }
3696
3697 if (offsets[2] == -1)
3698 {
3699 for (i = (offsets[0] + offsets[1]) / 2; i > offsets[0]; i--)
3700 if (ones[i] <= 2)
3701 {
3702 offsets[2] = i;
3703 break;
3704 }
3705 }
3706 }
3707
3708 SLJIT_ASSERT(offsets[1] == -1 || (offsets[0] < offsets[1]));
3709 SLJIT_ASSERT(offsets[2] == -1 || (offsets[0] < offsets[2] && offsets[1] > offsets[2]));
3710
3711 chars[0] = chars[offsets[0] << 1];
3712 chars[1] = chars[(offsets[0] << 1) + 1];
3713 if (offsets[2] >= 0)
3714 {
3715 chars[2] = chars[offsets[2] << 1];
3716 chars[3] = chars[(offsets[2] << 1) + 1];
3717 }
3718 if (offsets[1] >= 0)
3719 {
3720 chars[4] = chars[offsets[1] << 1];
3721 chars[5] = chars[(offsets[1] << 1) + 1];
3722 }
3723 }
3724
3725 max -= 1;
3726 if (firstline)
3727 {
3728 SLJIT_ASSERT(common->first_line_end != 0);
3729 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end);
3730 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
3731 OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
3732 quit = CMP(SLJIT_LESS_EQUAL, STR_END, 0, TMP1, 0);
3733 OP1(SLJIT_MOV, STR_END, 0, TMP1, 0);
3734 JUMPHERE(quit);
3735 }
3736 else
3737 OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
3738
3739 #if !(defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
3740 if (range_right >= 0)
3741 OP1(SLJIT_MOV, RETURN_ADDR, 0, SLJIT_IMM, (sljit_sw)update_table);
3742 #endif
3743
3744 start = LABEL();
3745 quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3746
3747 SLJIT_ASSERT(range_right >= 0 || offsets[0] >= 0);
3748
3749 if (range_right >= 0)
3750 {
3751 #if defined COMPILE_PCRE8 || (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN)
3752 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right));
3753 #else
3754 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right + 1) - 1);
3755 #endif
3756
3757 #if !(defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
3758 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(RETURN_ADDR, TMP1), 0);
3759 #else
3760 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)update_table);
3761 #endif
3762 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3763 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, start);
3764 }
3765
3766 if (offsets[0] >= 0)
3767 {
3768 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[0]));
3769 if (offsets[1] >= 0)
3770 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[1]));
3771 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3772
3773 if (chars[1] != 0)
3774 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[1]);
3775 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[0], start);
3776 if (offsets[2] >= 0)
3777 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[2] - 1));
3778
3779 if (offsets[1] >= 0)
3780 {
3781 if (chars[5] != 0)
3782 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, chars[5]);
3783 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, chars[4], start);
3784 }
3785
3786 if (offsets[2] >= 0)
3787 {
3788 if (chars[3] != 0)
3789 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[3]);
3790 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[2], start);
3791 }
3792 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3793 }
3794
3795 JUMPHERE(quit);
3796
3797 if (firstline)
3798 {
3799 if (range_right >= 0)
3800 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end);
3801 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3802 if (range_right >= 0)
3803 {
3804 quit = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
3805 OP1(SLJIT_MOV, STR_PTR, 0, TMP1, 0);
3806 JUMPHERE(quit);
3807 }
3808 }
3809 else
3810 OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
3811 return TRUE;
3812 }
3813
3814 #undef MAX_N_CHARS
3815 #undef MAX_N_BYTES
3816
3817 static SLJIT_INLINE void fast_forward_first_char(compiler_common *common, pcre_uchar first_char, BOOL caseless, BOOL firstline)
3818 {
3819 DEFINE_COMPILER;
3820 struct sljit_label *start;
3821 struct sljit_jump *quit;
3822 struct sljit_jump *found;
3823 pcre_uchar oc, bit;
3824
3825 if (firstline)
3826 {
3827 SLJIT_ASSERT(common->first_line_end != 0);
3828 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
3829 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end);
3830 }
3831
3832 start = LABEL();
3833 quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3834 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3835
3836 oc = first_char;
3837 if (caseless)
3838 {
3839 oc = TABLE_GET(first_char, common->fcc, first_char);
3840 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
3841 if (first_char > 127 && common->utf)
3842 oc = UCD_OTHERCASE(first_char);
3843 #endif
3844 }
3845 if (first_char == oc)
3846 found = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, first_char);
3847 else
3848 {
3849 bit = first_char ^ oc;
3850 if (is_powerof2(bit))
3851 {
3852 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, bit);
3853 found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, first_char | bit);
3854 }
3855 else
3856 {
3857 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, first_char);
3858 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
3859 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);
3860 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
3861 found = JUMP(SLJIT_NOT_ZERO);
3862 }
3863 }
3864
3865 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3866 JUMPTO(SLJIT_JUMP, start);
3867 JUMPHERE(found);
3868 JUMPHERE(quit);
3869
3870 if (firstline)
3871 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3872 }
3873
3874 static SLJIT_INLINE void fast_forward_newline(compiler_common *common, BOOL firstline)
3875 {
3876 DEFINE_COMPILER;
3877 struct sljit_label *loop;
3878 struct sljit_jump *lastchar;
3879 struct sljit_jump *firstchar;
3880 struct sljit_jump *quit;
3881 struct sljit_jump *foundcr = NULL;
3882 struct sljit_jump *notfoundnl;
3883 jump_list *newline = NULL;
3884
3885 if (firstline)
3886 {
3887 SLJIT_ASSERT(common->first_line_end != 0);
3888 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
3889 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end);
3890 }
3891
3892 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3893 {
3894 lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3895 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3896 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
3897 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
3898 firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
3899
3900 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2));
3901 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0);
3902 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_GREATER_EQUAL);
3903 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3904 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
3905 #endif
3906 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3907
3908 loop = LABEL();
3909 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3910 quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3911 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
3912 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
3913 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop);
3914 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop);
3915
3916 JUMPHERE(quit);
3917 JUMPHERE(firstchar);
3918 JUMPHERE(lastchar);
3919
3920 if (firstline)
3921 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3922 return;
3923 }
3924
3925 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3926 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
3927 firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
3928 skip_char_back(common);
3929
3930 loop = LABEL();
3931 common->ff_newline_shortcut = loop;
3932
3933 read_char_range(common, common->nlmin, common->nlmax, TRUE);
3934 lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3935 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
3936 foundcr = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
3937 check_newlinechar(common, common->nltype, &newline, FALSE);
3938 set_jumps(newline, loop);
3939
3940 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
3941 {
3942 quit = JUMP(SLJIT_JUMP);
3943 JUMPHERE(foundcr);
3944 notfoundnl = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3945 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3946 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
3947 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
3948 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3949 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
3950 #endif
3951 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3952 JUMPHERE(notfoundnl);
3953 JUMPHERE(quit);
3954 }
3955 JUMPHERE(lastchar);
3956 JUMPHERE(firstchar);
3957
3958 if (firstline)
3959 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3960 }
3961
3962 static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks);
3963
3964 static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, pcre_uint8 *start_bits, BOOL firstline)
3965 {
3966 DEFINE_COMPILER;
3967 struct sljit_label *start;
3968 struct sljit_jump *quit;
3969 struct sljit_jump *found = NULL;
3970 jump_list *matches = NULL;
3971 #ifndef COMPILE_PCRE8
3972 struct sljit_jump *jump;
3973 #endif
3974
3975 if (firstline)
3976 {
3977 SLJIT_ASSERT(common->first_line_end != 0);
3978 OP1(SLJIT_MOV, RETURN_ADDR, 0, STR_END, 0);
3979 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end);
3980 }
3981
3982 start = LABEL();
3983 quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3984 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3985 #ifdef SUPPORT_UTF
3986 if (common->utf)
3987 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
3988 #endif
3989
3990 if (!check_class_ranges(common, start_bits, (start_bits[31] & 0x80) != 0, TRUE, &matches))
3991 {
3992 #ifndef COMPILE_PCRE8
3993 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 255);
3994 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 255);
3995 JUMPHERE(jump);
3996 #endif
3997 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
3998 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
3999 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)start_bits);
4000 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4001 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4002 found = JUMP(SLJIT_NOT_ZERO);
4003 }
4004
4005 #ifdef SUPPORT_UTF
4006 if (common->utf)
4007 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
4008 #endif
4009 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4010 #ifdef SUPPORT_UTF
4011 #if defined COMPILE_PCRE8
4012 if (common->utf)
4013 {
4014 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start);
4015 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4016 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4017 }
4018 #elif defined COMPILE_PCRE16
4019 if (common->utf)
4020 {
4021 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800, start);
4022 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
4023 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4024 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
4025 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4026 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4027 }
4028 #endif /* COMPILE_PCRE[8|16] */
4029 #endif /* SUPPORT_UTF */
4030 JUMPTO(SLJIT_JUMP, start);
4031 if (found != NULL)
4032 JUMPHERE(found);
4033 if (matches != NULL)
4034 set_jumps(matches, LABEL());
4035 JUMPHERE(quit);
4036
4037 if (firstline)
4038 OP1(SLJIT_MOV, STR_END, 0, RETURN_ADDR, 0);
4039 }
4040
4041 static SLJIT_INLINE struct sljit_jump *search_requested_char(compiler_common *common, pcre_uchar req_char, BOOL caseless, BOOL has_firstchar)
4042 {
4043 DEFINE_COMPILER;
4044 struct sljit_label *loop;
4045 struct sljit_jump *toolong;
4046 struct sljit_jump *alreadyfound;
4047 struct sljit_jump *found;
4048 struct sljit_jump *foundoc = NULL;
4049 struct sljit_jump *notfound;
4050 pcre_uint32 oc, bit;
4051
4052 SLJIT_ASSERT(common->req_char_ptr != 0);
4053 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr);
4054 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, REQ_BYTE_MAX);
4055 toolong = CMP(SLJIT_LESS, TMP1, 0, STR_END, 0);
4056 alreadyfound = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0);
4057
4058 if (has_firstchar)
4059 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4060 else
4061 OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);
4062
4063 loop = LABEL();
4064 notfound = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0);
4065
4066 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0);
4067 oc = req_char;
4068 if (caseless)
4069 {
4070 oc = TABLE_GET(req_char, common->fcc, req_char);
4071 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
4072 if (req_char > 127 && common->utf)
4073 oc = UCD_OTHERCASE(req_char);
4074 #endif
4075 }
4076 if (req_char == oc)
4077 found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
4078 else
4079 {
4080 bit = req_char ^ oc;
4081 if (is_powerof2(bit))
4082 {
4083 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);
4084 found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit);
4085 }
4086 else
4087 {
4088 found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
4089 foundoc = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, oc);
4090 }
4091 }
4092 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
4093 JUMPTO(SLJIT_JUMP, loop);
4094
4095 JUMPHERE(found);
4096 if (foundoc)
4097 JUMPHERE(foundoc);
4098 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr, TMP1, 0);
4099 JUMPHERE(alreadyfound);
4100 JUMPHERE(toolong);
4101 return notfound;
4102 }
4103
4104 static void do_revertframes(compiler_common *common)
4105 {
4106 DEFINE_COMPILER;
4107 struct sljit_jump *jump;
4108 struct sljit_label *mainloop;
4109
4110 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4111 OP1(SLJIT_MOV, TMP1, 0, STACK_TOP, 0);
4112 GET_LOCAL_BASE(TMP3, 0, 0);
4113
4114 /* Drop frames until we reach STACK_TOP. */
4115 mainloop = LABEL();
4116 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), 0);
4117 OP2(SLJIT_SUB | SLJIT_SET_S, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0);
4118 jump = JUMP(SLJIT_SIG_LESS_EQUAL);
4119
4120 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
4121 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw));
4122 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), SLJIT_MEM1(TMP1), 2 * sizeof(sljit_sw));
4123 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
4124 JUMPTO(SLJIT_JUMP, mainloop);
4125
4126 JUMPHERE(jump);
4127 jump = JUMP(SLJIT_SIG_LESS);
4128 /* End of dropping frames. */
4129 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4130
4131 JUMPHERE(jump);
4132 OP1(SLJIT_NEG, TMP2, 0, TMP2, 0);
4133 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
4134 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw));
4135 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_sw));
4136 JUMPTO(SLJIT_JUMP, mainloop);
4137 }
4138
4139 static void check_wordboundary(compiler_common *common)
4140 {
4141 DEFINE_COMPILER;
4142 struct sljit_jump *skipread;
4143 jump_list *skipread_list = NULL;
4144 #if !(defined COMPILE_PCRE8) || defined SUPPORT_UTF
4145 struct sljit_jump *jump;
4146 #endif
4147
4148 SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16);
4149
4150 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
4151 /* Get type of the previous char, and put it to LOCALS1. */
4152 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4153 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4154 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, SLJIT_IMM, 0);
4155 skipread = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
4156 skip_char_back(common);
4157 check_start_used_ptr(common);
4158 read_char(common);
4159
4160 /* Testing char type. */
4161 #ifdef SUPPORT_UCP
4162 if (common->use_ucp)
4163 {
4164 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
4165 jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
4166 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4167 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
4168 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
4169 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
4170 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
4171 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
4172 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
4173 JUMPHERE(jump);
4174 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP2, 0);
4175 }
4176 else
4177 #endif
4178 {
4179 #ifndef COMPILE_PCRE8
4180 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4181 #elif defined SUPPORT_UTF
4182 /* Here LOCALS1 has already been zeroed. */
4183 jump = NULL;
4184 if (common->utf)
4185 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4186 #endif /* COMPILE_PCRE8 */
4187 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);
4188 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */);
4189 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4190 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP1, 0);
4191 #ifndef COMPILE_PCRE8
4192 JUMPHERE(jump);
4193 #elif defined SUPPORT_UTF
4194 if (jump != NULL)
4195 JUMPHERE(jump);
4196 #endif /* COMPILE_PCRE8 */
4197 }
4198 JUMPHERE(skipread);
4199
4200 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
4201 check_str_end(common, &skipread_list);
4202 peek_char(common, READ_CHAR_MAX);
4203
4204 /* Testing char type. This is a code duplication. */
4205 #ifdef SUPPORT_UCP
4206 if (common->use_ucp)
4207 {
4208 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
4209 jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
4210 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4211 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
4212 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
4213 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
4214 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
4215 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
4216 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
4217 JUMPHERE(jump);
4218 }
4219 else
4220 #endif
4221 {
4222 #ifndef COMPILE_PCRE8
4223 /* TMP2 may be destroyed by peek_char. */
4224 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
4225 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4226 #elif defined SUPPORT_UTF
4227 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
4228 jump = NULL;
4229 if (common->utf)
4230 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4231 #endif
4232 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes);
4233 OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */);
4234 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
4235 #ifndef COMPILE_PCRE8
4236 JUMPHERE(jump);
4237 #elif defined SUPPORT_UTF
4238 if (jump != NULL)
4239 JUMPHERE(jump);
4240 #endif /* COMPILE_PCRE8 */
4241 }
4242 set_jumps(skipread_list, LABEL());
4243
4244 OP2(SLJIT_XOR | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
4245 sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
4246 }
4247
4248 static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
4249 {
4250 DEFINE_COMPILER;
4251 int ranges[MAX_RANGE_SIZE];
4252 pcre_uint8 bit, cbit, all;
4253 int i, byte, length = 0;
4254
4255 bit = bits[0] & 0x1;
4256 /* All bits will be zero or one (since bit is zero or one). */
4257 all = -bit;
4258
4259 for (i = 0; i < 256; )
4260 {
4261 byte = i >> 3;
4262 if ((i & 0x7) == 0 && bits[byte] == all)
4263 i += 8;
4264 else
4265 {
4266 cbit = (bits[byte] >> (i & 0x7)) & 0x1;
4267 if (cbit != bit)
4268 {
4269 if (length >= MAX_RANGE_SIZE)
4270 return FALSE;
4271 ranges[length] = i;
4272 length++;
4273 bit = cbit;
4274 all = -cbit;
4275 }
4276 i++;
4277 }
4278 }
4279
4280 if (((bit == 0) && nclass) || ((bit == 1) && !nclass))
4281 {
4282 if (length >= MAX_RANGE_SIZE)
4283 return FALSE;
4284 ranges[length] = 256;
4285 length++;
4286 }
4287
4288 if (length < 0 || length > 4)
4289 return FALSE;
4290
4291 bit = bits[0] & 0x1;
4292 if (invert) bit ^= 0x1;
4293
4294 /* No character is accepted. */
4295 if (length == 0 && bit == 0)
4296 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4297
4298 switch(length)
4299 {
4300 case 0:
4301 /* When bit != 0, all characters are accepted. */
4302 return TRUE;
4303
4304 case 1:
4305 add_jump(compiler, backtracks, CMP(bit == 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
4306 return TRUE;
4307
4308 case 2:
4309 if (ranges[0] + 1 != ranges[1])
4310 {
4311 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
4312 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
4313 }
4314 else
4315 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
4316 return TRUE;
4317
4318 case 3:
4319 if (bit != 0)
4320 {
4321 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
4322 if (ranges[0] + 1 != ranges[1])
4323 {
4324 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
4325 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
4326 }
4327 else
4328 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
4329 return TRUE;
4330 }
4331
4332 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[0]));
4333 if (ranges[1] + 1 != ranges[2])
4334 {
4335 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1]);
4336 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
4337 }
4338 else
4339 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1]));
4340 return TRUE;
4341
4342 case 4:
4343 if ((ranges[1] - ranges[0]) == (ranges[3] - ranges[2])
4344 && (ranges[0] | (ranges[2] - ranges[0])) == ranges[2]
4345 && is_powerof2(ranges[2] - ranges[0]))
4346 {
4347 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[0]);
4348 if (ranges[2] + 1 != ranges[3])
4349 {
4350 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);
4351 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
4352 }
4353 else
4354 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
4355 return TRUE;
4356 }
4357
4358 if (bit != 0)
4359 {
4360 i = 0;
4361 if (ranges[0] + 1 != ranges[1])
4362 {
4363 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
4364 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
4365 i = ranges[0];
4366 }
4367 else
4368 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
4369
4370 if (ranges[2] + 1 != ranges[3])
4371 {
4372 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - i);
4373 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
4374 }
4375 else
4376 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2] - i));
4377 return TRUE;
4378 }
4379
4380 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
4381 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[0]));
4382 if (ranges[1] + 1 != ranges[2])
4383 {
4384 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]);
4385 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
4386 }
4387 else
4388 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
4389 return TRUE;
4390
4391 default:
4392 SLJIT_ASSERT_STOP();
4393 return FALSE;
4394 }
4395 }
4396
4397 static void check_anynewline(compiler_common *common)
4398 {
4399 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
4400 DEFINE_COMPILER;
4401
4402 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4403
4404 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
4405 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
4406 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
4407 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
4408 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4409 #ifdef COMPILE_PCRE8
4410 if (common->utf)
4411 {
4412 #endif
4413 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4414 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
4415 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
4416 #ifdef COMPILE_PCRE8
4417 }
4418 #endif
4419 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
4420 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4421 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4422 }
4423
4424 static void check_hspace(compiler_common *common)
4425 {
4426 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
4427 DEFINE_COMPILER;
4428
4429 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4430
4431 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x09);
4432 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
4433 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
4434 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4435 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xa0);
4436 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4437 #ifdef COMPILE_PCRE8
4438 if (common->utf)
4439 {
4440 #endif
4441 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4442 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x1680);
4443 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4444 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e);
4445 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4446 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000);
4447 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000);
4448 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
4449 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000);
4450 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4451 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000);
4452 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4453 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000);
4454 #ifdef COMPILE_PCRE8
4455 }
4456 #endif
4457 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
4458 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4459
4460 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4461 }
4462
4463 static void check_vspace(compiler_common *common)
4464 {
4465 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
4466 DEFINE_COMPILER;
4467
4468 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4469
4470 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
4471 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
4472 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
4473 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
4474 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4475 #ifdef COMPILE_PCRE8
4476 if (common->utf)
4477 {
4478 #endif
4479 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4480 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
4481 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
4482 #ifdef COMPILE_PCRE8
4483 }
4484 #endif
4485 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
4486 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4487
4488 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4489 }
4490
4491 #define CHAR1 STR_END
4492 #define CHAR2 STACK_TOP
4493
4494 static void do_casefulcmp(compiler_common *common)
4495 {
4496 DEFINE_COMPILER;
4497 struct sljit_jump *jump;
4498 struct sljit_label *label;
4499
4500 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4501 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4502 OP1(SLJIT_MOV, TMP3, 0, CHAR1, 0);
4503 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, CHAR2, 0);
4504 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
4505 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4506
4507 label = LABEL();
4508 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
4509 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4510 jump = CMP(SLJIT_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
4511 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
4512 JUMPTO(SLJIT_NOT_ZERO, label);
4513
4514 JUMPHERE(jump);
4515 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4516 OP1(SLJIT_MOV, CHAR1, 0, TMP3, 0);
4517 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
4518 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4519 }
4520
4521 #define LCC_TABLE STACK_LIMIT
4522
4523 static void do_caselesscmp(compiler_common *common)
4524 {
4525 DEFINE_COMPILER;
4526 struct sljit_jump *jump;
4527 struct sljit_label *label;
4528
4529 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4530 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4531
4532 OP1(SLJIT_MOV, TMP3, 0, LCC_TABLE, 0);
4533 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, CHAR1, 0);
4534 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, CHAR2, 0);
4535 OP1(SLJIT_MOV, LCC_TABLE, 0, SLJIT_IMM, common->lcc);
4536 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
4537 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4538
4539 label = LABEL();
4540 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
4541 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4542 #ifndef COMPILE_PCRE8
4543 jump = CMP(SLJIT_GREATER, CHAR1, 0, SLJIT_IMM, 255);
4544 #endif
4545 OP1(SLJIT_MOV_UB, CHAR1, 0, SLJIT_MEM2(LCC_TABLE, CHAR1), 0);
4546 #ifndef COMPILE_PCRE8
4547 JUMPHERE(jump);
4548 jump = CMP(SLJIT_GREATER, CHAR2, 0, SLJIT_IMM, 255);
4549 #endif
4550 OP1(SLJIT_MOV_UB, CHAR2, 0, SLJIT_MEM2(LCC_TABLE, CHAR2), 0);
4551 #ifndef COMPILE_PCRE8
4552 JUMPHERE(jump);
4553 #endif
4554 jump = CMP(SLJIT_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
4555 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
4556 JUMPTO(SLJIT_NOT_ZERO, label);
4557
4558 JUMPHERE(jump);
4559 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4560 OP1(SLJIT_MOV, LCC_TABLE, 0, TMP3, 0);
4561 OP1(SLJIT_MOV, CHAR1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
4562 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
4563 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4564 }
4565
4566 #undef LCC_TABLE
4567 #undef CHAR1
4568 #undef CHAR2
4569
4570 #if defined SUPPORT_UTF && defined SUPPORT_UCP
4571
4572 static const pcre_uchar * SLJIT_CALL do_utf_caselesscmp(pcre_uchar *src1, jit_arguments *args, pcre_uchar *end1)
4573 {
4574 /* This function would be ineffective to do in JIT level. */
4575 pcre_uint32 c1, c2;
4576 const pcre_uchar *src2 = args->uchar_ptr;
4577 const pcre_uchar *end2 = args->end;
4578 const ucd_record *ur;
4579 const pcre_uint32 *pp;
4580
4581 while (src1 < end1)
4582 {
4583 if (src2 >= end2)
4584 return (pcre_uchar*)1;
4585 GETCHARINC(c1, src1);
4586 GETCHARINC(c2, src2);
4587 ur = GET_UCD(c2);
4588 if (c1 != c2 && c1 != c2 + ur->other_case)
4589 {
4590 pp = PRIV(ucd_caseless_sets) + ur->caseset;
4591 for (;;)
4592 {
4593 if (c1 < *pp) return NULL;
4594 if (c1 == *pp++) break;
4595 }
4596 }
4597 }
4598 return src2;
4599 }
4600
4601 #endif /* SUPPORT_UTF && SUPPORT_UCP */
4602
4603 static pcre_uchar *byte_sequence_compare(compiler_common *common, BOOL caseless, pcre_uchar *cc,
4604 compare_context *context, jump_list **backtracks)
4605 {
4606 DEFINE_COMPILER;
4607 unsigned int othercasebit = 0;
4608 pcre_uchar *othercasechar = NULL;
4609 #ifdef SUPPORT_UTF
4610 int utflength;
4611 #endif
4612
4613 if (caseless && char_has_othercase(common, cc))
4614 {
4615 othercasebit = char_get_othercase_bit(common, cc);
4616 SLJIT_ASSERT(othercasebit);
4617 /* Extracting bit difference info. */
4618 #if defined COMPILE_PCRE8
4619 othercasechar = cc + (othercasebit >> 8);
4620 othercasebit &= 0xff;
4621 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4622 /* Note that this code only handles characters in the BMP. If there
4623 ever are characters outside the BMP whose othercase differs in only one
4624 bit from itself (there currently are none), this code will need to be
4625 revised for COMPILE_PCRE32. */
4626 othercasechar = cc + (othercasebit >> 9);
4627 if ((othercasebit & 0x100) != 0)
4628 othercasebit = (othercasebit & 0xff) << 8;
4629 else
4630 othercasebit &= 0xff;
4631 #endif /* COMPILE_PCRE[8|16|32] */
4632 }
4633
4634 if (context->sourcereg == -1)
4635 {
4636 #if defined COMPILE_PCRE8
4637 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
4638 if (context->length >= 4)
4639 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4640 else if (context->length >= 2)
4641 OP1(SLJIT_MOV_UH, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4642 else
4643 #endif
4644 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4645 #elif defined COMPILE_PCRE16
4646 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
4647 if (context->length >= 4)
4648 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4649 else
4650 #endif
4651 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4652 #elif defined COMPILE_PCRE32
4653 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4654 #endif /* COMPILE_PCRE[8|16|32] */
4655 context->sourcereg = TMP2;
4656 }
4657
4658 #ifdef SUPPORT_UTF
4659 utflength = 1;
4660 if (common->utf && HAS_EXTRALEN(*cc))
4661 utflength += GET_EXTRALEN(*cc);
4662
4663 do
4664 {
4665 #endif
4666
4667 context->length -= IN_UCHARS(1);
4668 #if (defined SLJIT_UNALIGNED && SLJIT_UNALIGNED) && (defined COMPILE_PCRE8 || defined COMPILE_PCRE16)
4669
4670 /* Unaligned read is supported. */
4671 if (othercasebit != 0 && othercasechar == cc)
4672 {
4673 context->c.asuchars[context->ucharptr] = *cc | othercasebit;
4674 context->oc.asuchars[context->ucharptr] = othercasebit;
4675 }
4676 else
4677 {
4678 context->c.asuchars[context->ucharptr] = *cc;
4679 context->oc.asuchars[context->ucharptr] = 0;
4680 }
4681 context->ucharptr++;
4682
4683 #if defined COMPILE_PCRE8
4684 if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1))
4685 #else
4686 if (context->ucharptr >= 2 || context->length == 0)
4687 #endif
4688 {
4689 if (context->length >= 4)
4690 OP1(SLJIT_MOV_SI, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
4691 else if (context->length >= 2)
4692 OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
4693 #if defined COMPILE_PCRE8
4694 else if (context->length >= 1)
4695 OP1(SLJIT_MOV_UB, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
4696 #endif /* COMPILE_PCRE8 */
4697 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
4698
4699 switch(context->ucharptr)
4700 {
4701 case 4 / sizeof(pcre_uchar):
4702 if (context->oc.asint != 0)
4703 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint);
4704 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint));
4705 break;
4706
4707 case 2 / sizeof(pcre_uchar):
4708 if (context->oc.asushort != 0)
4709 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort);
4710 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort));
4711 break;
4712
4713 #ifdef COMPILE_PCRE8
4714 case 1:
4715 if (context->oc.asbyte != 0)
4716 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte);
4717 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte));
4718 break;
4719 #endif
4720
4721 default:
4722 SLJIT_ASSERT_STOP();
4723 break;
4724 }
4725 context->ucharptr = 0;
4726 }
4727
4728 #else
4729
4730 /* Unaligned read is unsupported or in 32 bit mode. */
4731 if (context->length >= 1)
4732 OP1(MOV_UCHAR, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
4733
4734 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
4735
4736 if (othercasebit != 0 && othercasechar == cc)
4737 {
4738 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit);
4739 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit));
4740 }
4741 else
4742 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc));
4743
4744 #endif
4745
4746 cc++;
4747 #ifdef SUPPORT_UTF
4748 utflength--;
4749 }
4750 while (utflength > 0);
4751 #endif
4752
4753 return cc;
4754 }
4755
4756 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
4757
4758 #define SET_TYPE_OFFSET(value) \
4759 if ((value) != typeoffset) \
4760 { \
4761 if ((value) < typeoffset) \
4762 OP2(SLJIT_ADD, typereg, 0, typereg, 0, SLJIT_IMM, typeoffset - (value)); \
4763 else \
4764 OP2(SLJIT_SUB, typereg, 0, typereg, 0, SLJIT_IMM, (value) - typeoffset); \
4765 } \
4766 typeoffset = (value);
4767
4768 #define SET_CHAR_OFFSET(value) \
4769 if ((value) != charoffset) \
4770 { \
4771 if ((value) < charoffset) \
4772 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(charoffset - (value))); \
4773 else \
4774 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)((value) - charoffset)); \
4775 } \
4776 charoffset = (value);
4777
4778 static void compile_xclass_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
4779 {
4780 DEFINE_COMPILER;
4781 jump_list *found = NULL;
4782 jump_list **list = (cc[0] & XCL_NOT) == 0 ? &found : backtracks;
4783 sljit_uw c, charoffset, max = 256, min = READ_CHAR_MAX;
4784 struct sljit_jump *jump = NULL;
4785 pcre_uchar *ccbegin;
4786 int compares, invertcmp, numberofcmps;
4787 #if defined SUPPORT_UTF && (defined COMPILE_PCRE8 || defined COMPILE_PCRE16)
4788 BOOL utf = common->utf;
4789 #endif
4790
4791 #ifdef SUPPORT_UCP
4792 BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE;
4793 BOOL charsaved = FALSE;
4794 int typereg = TMP1, scriptreg = TMP1;
4795 const pcre_uint32 *other_cases;
4796 sljit_uw typeoffset;
4797 #endif
4798
4799 /* Scanning the necessary info. */
4800 cc++;
4801 ccbegin = cc;
4802 compares = 0;
4803 if (cc[-1] & XCL_MAP)
4804 {
4805 min = 0;
4806 cc += 32 / sizeof(pcre_uchar);
4807 }
4808
4809 while (*cc != XCL_END)
4810 {
4811 compares++;
4812 if (*cc == XCL_SINGLE)
4813 {
4814 cc ++;
4815 GETCHARINCTEST(c, cc);
4816 if (c > max) max = c;
4817 if (c < min) min = c;
4818 #ifdef SUPPORT_UCP
4819 needschar = TRUE;
4820 #endif
4821 }
4822 else if (*cc == XCL_RANGE)
4823 {
4824 cc ++;
4825 GETCHARINCTEST(c, cc);
4826 if (c < min) min = c;
4827 GETCHARINCTEST(c, cc);
4828 if (c > max) max = c;
4829 #ifdef SUPPORT_UCP
4830 needschar = TRUE;
4831 #endif
4832 }
4833 #ifdef SUPPORT_UCP
4834 else
4835 {
4836 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
4837 cc++;
4838 if (*cc == PT_CLIST)
4839 {
4840 other_cases = PRIV(ucd_caseless_sets) + cc[1];
4841 while (*other_cases != NOTACHAR)
4842 {
4843 if (*other_cases > max) max = *other_cases;
4844 if (*other_cases < min) min = *other_cases;
4845 other_cases++;
4846 }
4847 }
4848 else
4849 {
4850 max = READ_CHAR_MAX;
4851 min = 0;
4852 }
4853
4854 switch(*cc)
4855 {
4856 case PT_ANY:
4857 break;
4858
4859 case PT_LAMP:
4860 case PT_GC:
4861 case PT_PC:
4862 case PT_ALNUM:
4863 needstype = TRUE;
4864 break;
4865
4866 case PT_SC:
4867 needsscript = TRUE;
4868 break;
4869
4870 case PT_SPACE:
4871 case PT_PXSPACE:
4872 case PT_WORD:
4873 case PT_PXGRAPH:
4874 case PT_PXPRINT:
4875 case PT_PXPUNCT:
4876 needstype = TRUE;
4877 needschar = TRUE;
4878 break;
4879
4880 case PT_CLIST:
4881 case PT_UCNC:
4882 needschar = TRUE;
4883 break;
4884
4885 default:
4886 SLJIT_ASSERT_STOP();
4887 break;
4888 }
4889 cc += 2;
4890 }
4891 #endif
4892 }
4893
4894 /* We are not necessary in utf mode even in 8 bit mode. */
4895 cc = ccbegin;
4896 detect_partial_match(common, backtracks);
4897 read_char_range(common, min, max, (cc[-1] & XCL_NOT) != 0);
4898
4899 if ((cc[-1] & XCL_HASPROP) == 0)
4900 {
4901 if ((cc[-1] & XCL_MAP) != 0)
4902 {
4903 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4904 if (!check_class_ranges(common, (const pcre_uint8 *)cc, (((const pcre_uint8 *)cc)[31] & 0x80) != 0, TRUE, &found))
4905 {
4906 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
4907 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
4908 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
4909 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4910 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4911 add_jump(compiler, &found, JUMP(SLJIT_NOT_ZERO));
4912 }
4913
4914 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4915 JUMPHERE(jump);
4916
4917 cc += 32 / sizeof(pcre_uchar);
4918 }
4919 else
4920 {
4921 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, min);
4922 add_jump(compiler, (cc[-1] & XCL_NOT) == 0 ? backtracks : &found, CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, max - min));
4923 }
4924 }
4925 else if ((cc[-1] & XCL_MAP) != 0)
4926 {
4927 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
4928 #ifdef SUPPORT_UCP
4929 charsaved = TRUE;
4930 #endif
4931 if (!check_class_ranges(common, (const pcre_uint8 *)cc, FALSE, TRUE, list))
4932 {
4933 #ifdef COMPILE_PCRE8
4934 jump = NULL;
4935 if (common->utf)
4936 #endif
4937 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4938
4939 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
4940 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
4941 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
4942 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4943 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4944 add_jump(compiler, list, JUMP(SLJIT_NOT_ZERO));
4945
4946 #ifdef COMPILE_PCRE8
4947 if (common->utf)
4948 #endif
4949 JUMPHERE(jump);
4950 }
4951
4952 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
4953 cc += 32 / sizeof(pcre_uchar);
4954 }
4955
4956 #ifdef SUPPORT_UCP
4957 /* Simple register allocation. TMP1 is preferred if possible. */
4958 if (needstype || needsscript)
4959 {
4960 if (needschar && !charsaved)
4961 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
4962 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4963 if (needschar)
4964 {
4965 if (needstype)
4966 {
4967 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
4968 typereg = RETURN_ADDR;
4969 }
4970
4971 if (needsscript)
4972 scriptreg = TMP3;
4973 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
4974 }
4975 else if (needstype && needsscript)
4976 scriptreg = TMP3;
4977 /* In all other cases only one of them was specified, and that can goes to TMP1. */
4978
4979 if (needsscript)
4980 {
4981 if (scriptreg == TMP1)
4982 {
4983 OP1(SLJIT_MOV, scriptreg, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
4984 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM2(scriptreg, TMP2), 3);
4985 }
4986 else
4987 {
4988 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
4989 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
4990 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM1(TMP2), 0);
4991 }
4992 }
4993 }
4994 #endif
4995
4996 /* Generating code. */
4997 charoffset = 0;
4998 numberofcmps = 0;
4999 #ifdef SUPPORT_UCP
5000 typeoffset = 0;
5001 #endif
5002
5003 while (*cc != XCL_END)
5004 {
5005 compares--;
5006 invertcmp = (compares == 0 && list != backtracks);
5007 jump = NULL;
5008
5009 if (*cc == XCL_SINGLE)
5010 {
5011 cc ++;
5012 GETCHARINCTEST(c, cc);
5013
5014 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
5015 {
5016 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5017 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_EQUAL);
5018 numberofcmps++;
5019 }
5020 else if (numberofcmps > 0)
5021 {
5022 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5023 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5024 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5025 numberofcmps = 0;
5026 }
5027 else
5028 {
5029 jump = CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5030 numberofcmps = 0;
5031 }
5032 }
5033 else if (*cc == XCL_RANGE)
5034 {
5035 cc ++;
5036 GETCHARINCTEST(c, cc);
5037 SET_CHAR_OFFSET(c);
5038 GETCHARINCTEST(c, cc);
5039
5040 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
5041 {
5042 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5043 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_LESS_EQUAL);
5044 numberofcmps++;
5045 }
5046 else if (numberofcmps > 0)
5047 {
5048 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5049 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
5050 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5051 numberofcmps = 0;
5052 }
5053 else
5054 {
5055 jump = CMP(SLJIT_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5056 numberofcmps = 0;
5057 }
5058 }
5059 #ifdef SUPPORT_UCP
5060 else
5061 {
5062 if (*cc == XCL_NOTPROP)
5063 invertcmp ^= 0x1;
5064 cc++;
5065 switch(*cc)
5066 {
5067 case PT_ANY:
5068 if (list != backtracks)
5069 {
5070 if ((cc[-1] == XCL_NOTPROP && compares > 0) || (cc[-1] == XCL_PROP && compares == 0))
5071 continue;
5072 }
5073 else if (cc[-1] == XCL_NOTPROP)
5074 continue;
5075 jump = JUMP(SLJIT_JUMP);
5076 break;
5077
5078 case PT_LAMP:
5079 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - typeoffset);
5080 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5081 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ll - typeoffset);
5082 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5083 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lt - typeoffset);
5084 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5085 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5086 break;
5087
5088 case PT_GC:
5089 c = PRIV(ucp_typerange)[(int)cc[1] * 2];
5090 SET_TYPE_OFFSET(c);
5091 jump = CMP(SLJIT_LESS_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, PRIV(ucp_typerange)[(int)cc[1] * 2 + 1] - c);
5092 break;
5093
5094 case PT_PC:
5095 jump = CMP(SLJIT_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, (int)cc[1] - typeoffset);
5096 break;
5097
5098 case PT_SC:
5099 jump = CMP(SLJIT_EQUAL ^ invertcmp, scriptreg, 0, SLJIT_IMM, (int)cc[1]);
5100 break;
5101
5102 case PT_SPACE:
5103 case PT_PXSPACE:
5104 SET_CHAR_OFFSET(9);
5105 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd - 0x9);
5106 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
5107
5108 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x9);
5109 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5110
5111 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x9);
5112 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5113
5114 SET_TYPE_OFFSET(ucp_Zl);
5115 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl);
5116 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
5117 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5118 break;
5119
5120 case PT_WORD:
5121 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_UNDERSCORE - charoffset));
5122 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5123 /* Fall through. */
5124
5125 case PT_ALNUM:
5126 SET_TYPE_OFFSET(ucp_Ll);
5127 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
5128 OP_FLAGS((*cc == PT_ALNUM) ? SLJIT_MOV : SLJIT_OR, TMP2, 0, (*cc == PT_ALNUM) ? SLJIT_UNUSED : TMP2, 0, SLJIT_LESS_EQUAL);
5129 SET_TYPE_OFFSET(ucp_Nd);
5130 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_No - ucp_Nd);
5131 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
5132 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5133 break;
5134
5135 case PT_CLIST:
5136 other_cases = PRIV(ucd_caseless_sets) + cc[1];
5137
5138 /* At least three characters are required.
5139 Otherwise this case would be handled by the normal code path. */
5140 SLJIT_ASSERT(other_cases[0] != NOTACHAR && other_cases[1] != NOTACHAR && other_cases[2] != NOTACHAR);
5141 SLJIT_ASSERT(other_cases[0] < other_cases[1] && other_cases[1] < other_cases[2]);
5142
5143 /* Optimizing character pairs, if their difference is power of 2. */
5144 if (is_powerof2(other_cases[1] ^ other_cases[0]))
5145 {
5146 if (charoffset == 0)
5147 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
5148 else
5149 {
5150 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
5151 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
5152 }
5153 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[1]);
5154 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5155 other_cases += 2;
5156 }
5157 else if (is_powerof2(other_cases[2] ^ other_cases[1]))
5158 {
5159 if (charoffset == 0)
5160 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[2] ^ other_cases[1]);
5161 else
5162 {
5163 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
5164 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
5165 }
5166 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[2]);
5167 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5168
5169 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(other_cases[0] - charoffset));
5170 OP_FLAGS(SLJIT_OR | ((other_cases[3] == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5171
5172 other_cases += 3;
5173 }
5174 else
5175 {
5176 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
5177 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5178 }
5179
5180 while (*other_cases != NOTACHAR)
5181 {
5182 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
5183 OP_FLAGS(SLJIT_OR | ((*other_cases == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5184 }
5185 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5186 break;
5187
5188 case PT_UCNC:
5189 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_DOLLAR_SIGN - charoffset));
5190 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5191 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_COMMERCIAL_AT - charoffset));
5192 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5193 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_GRAVE_ACCENT - charoffset));
5194 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5195
5196 SET_CHAR_OFFSET(0xa0);
5197 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(0xd7ff - charoffset));
5198 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
5199 SET_CHAR_OFFSET(0);
5200 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xe000 - 0);
5201 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_GREATER_EQUAL);
5202 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5203 break;
5204
5205 case PT_PXGRAPH:
5206 /* C and Z groups are the farthest two groups. */
5207 SET_TYPE_OFFSET(ucp_Ll);
5208 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
5209 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_GREATER);
5210
5211 jump = CMP(SLJIT_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
5212
5213 /* In case of ucp_Cf, we overwrite the result. */
5214 SET_CHAR_OFFSET(0x2066);
5215 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
5216 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
5217
5218 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
5219 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5220
5221 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x2066);
5222 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5223
5224 JUMPHERE(jump);
5225 jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
5226 break;
5227
5228 case PT_PXPRINT:
5229 /* C and Z groups are the farthest two groups. */
5230 SET_TYPE_OFFSET(ucp_Ll);
5231 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
5232 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_GREATER);
5233
5234 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Ll);
5235 OP_FLAGS(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_NOT_EQUAL);
5236
5237 jump = CMP(SLJIT_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
5238
5239 /* In case of ucp_Cf, we overwrite the result. */
5240 SET_CHAR_OFFSET(0x2066);
5241 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
5242 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
5243
5244 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
5245 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5246
5247 JUMPHERE(jump);
5248 jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
5249 break;
5250
5251 case PT_PXPUNCT:
5252 SET_TYPE_OFFSET(ucp_Sc);
5253 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Sc);
5254 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
5255
5256 SET_CHAR_OFFSET(0);
5257 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xff);
5258 OP_FLAGS(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
5259
5260 SET_TYPE_OFFSET(ucp_Pc);
5261 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ps - ucp_Pc);
5262 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
5263 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5264 break;
5265 }
5266 cc += 2;
5267 }
5268 #endif
5269
5270 if (jump != NULL)
5271 add_jump(compiler, compares > 0 ? list : backtracks, jump);
5272 }
5273
5274 if (found != NULL)
5275 set_jumps(found, LABEL());
5276 }
5277
5278 #undef SET_TYPE_OFFSET
5279 #undef SET_CHAR_OFFSET
5280
5281 #endif
5282
5283 static pcre_uchar *compile_char1_matchingpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **backtracks)
5284 {
5285 DEFINE_COMPILER;
5286 int length;
5287 unsigned int c, oc, bit;
5288 compare_context context;
5289 struct sljit_jump *jump[4];
5290 jump_list *end_list;
5291 #ifdef SUPPORT_UTF
5292 struct sljit_label *label;
5293 #ifdef SUPPORT_UCP
5294 pcre_uchar propdata[5];
5295 #endif
5296 #endif /* SUPPORT_UTF */
5297
5298 switch(type)
5299 {
5300 case OP_SOD:
5301 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5302 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
5303 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
5304 return cc;
5305
5306 case OP_SOM:
5307 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5308 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
5309 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
5310 return cc;
5311
5312 case OP_NOT_WORD_BOUNDARY:
5313 case OP_WORD_BOUNDARY:
5314 add_jump(compiler, &common->wordboundary, JUMP(SLJIT_FAST_CALL));
5315 add_jump(compiler, backtracks, JUMP(type == OP_NOT_WORD_BOUNDARY ? SLJIT_NOT_ZERO : SLJIT_ZERO));
5316 return cc;
5317
5318 case OP_NOT_DIGIT:
5319 case OP_DIGIT:
5320 /* Digits are usually 0-9, so it is worth to optimize them. */
5321 detect_partial_match(common, backtracks);
5322 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5323 if (common->utf && is_char7_bitset((const pcre_uint8*)common->ctypes - cbit_length + cbit_digit, FALSE))
5324 read_char7_type(common, type == OP_NOT_DIGIT);
5325 else
5326 #endif
5327 read_char8_type(common, type == OP_NOT_DIGIT);
5328 /* Flip the starting bit in the negative case. */
5329 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit);
5330 add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_ZERO : SLJIT_NOT_ZERO));
5331 return cc;
5332
5333 case OP_NOT_WHITESPACE:
5334 case OP_WHITESPACE:
5335 detect_partial_match(common, backtracks);
5336 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5337 if (common->utf && is_char7_bitset((const pcre_uint8*)common->ctypes - cbit_length + cbit_space, FALSE))
5338 read_char7_type(common, type == OP_NOT_WHITESPACE);
5339 else
5340 #endif
5341 read_char8_type(common, type == OP_NOT_WHITESPACE);
5342 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space);
5343 add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_ZERO : SLJIT_NOT_ZERO));
5344 return cc;
5345
5346 case OP_NOT_WORDCHAR:
5347 case OP_WORDCHAR:
5348 detect_partial_match(common, backtracks);
5349 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5350 if (common->utf && is_char7_bitset((const pcre_uint8*)common->ctypes - cbit_length + cbit_word, FALSE))
5351 read_char7_type(common, type == OP_NOT_WORDCHAR);
5352 else
5353 #endif
5354 read_char8_type(common, type == OP_NOT_WORDCHAR);
5355 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word);
5356 add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_ZERO : SLJIT_NOT_ZERO));
5357 return cc;
5358
5359 case OP_ANY:
5360 detect_partial_match(common, backtracks);
5361 read_char_range(common, common->nlmin, common->nlmax, TRUE);
5362 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
5363 {
5364 jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
5365 end_list = NULL;
5366 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
5367 add_jump(compiler, &end_list, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
5368 else
5369 check_str_end(common, &end_list);
5370
5371 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5372 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff));
5373 set_jumps(end_list, LABEL());
5374 JUMPHERE(jump[0]);
5375 }
5376 else
5377 check_newlinechar(common, common->nltype, backtracks, TRUE);
5378 return cc;
5379
5380 case OP_ALLANY:
5381 detect_partial_match(common, backtracks);
5382 #ifdef SUPPORT_UTF
5383 if (common->utf)
5384 {
5385 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5386 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5387 #if defined COMPILE_PCRE8 || defined COMPILE_PCRE16
5388 #if defined COMPILE_PCRE8
5389 jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
5390 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
5391 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5392 #elif defined COMPILE_PCRE16
5393 jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
5394 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
5395 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
5396 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5397 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
5398 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5399 #endif
5400 JUMPHERE(jump[0]);
5401 #endif /* COMPILE_PCRE[8|16] */
5402 return cc;
5403 }
5404 #endif
5405 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5406 return cc;
5407
5408 case OP_ANYBYTE:
5409 detect_partial_match(common, backtracks);
5410 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5411 return cc;
5412
5413 #ifdef SUPPORT_UTF
5414 #ifdef SUPPORT_UCP
5415 case OP_NOTPROP:
5416 case OP_PROP:
5417 propdata[0] = XCL_HASPROP;
5418 propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP;
5419 propdata[2] = cc[0];
5420 propdata[3] = cc[1];
5421 propdata[4] = XCL_END;
5422 compile_xclass_matchingpath(common, propdata, backtracks);
5423 return cc + 2;
5424 #endif
5425 #endif
5426
5427 case OP_ANYNL:
5428 detect_partial_match(common, backtracks);
5429 read_char_range(common, common->bsr_nlmin, common->bsr_nlmax, FALSE);
5430 jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
5431 /* We don't need to handle soft partial matching case. */
5432 end_list = NULL;
5433 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
5434 add_jump(compiler, &end_list, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
5435 else
5436 check_str_end(common, &end_list);
5437 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5438 jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
5439 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5440 jump[2] = JUMP(SLJIT_JUMP);
5441 JUMPHERE(jump[0]);
5442 check_newlinechar(common, common->bsr_nltype, backtracks, FALSE);
5443 set_jumps(end_list, LABEL());
5444 JUMPHERE(jump[1]);
5445 JUMPHERE(jump[2]);
5446 return cc;
5447
5448 case OP_NOT_HSPACE:
5449 case OP_HSPACE:
5450 detect_partial_match(common, backtracks);
5451 read_char_range(common, 0x9, 0x3000, type == OP_NOT_HSPACE);
5452 add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));
5453 add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO));
5454 return cc;
5455
5456 case OP_NOT_VSPACE:
5457 case OP_VSPACE:
5458 detect_partial_match(common, backtracks);
5459 read_char_range(common, 0xa, 0x2029, type == OP_NOT_VSPACE);
5460 add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));
5461 add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO));
5462 return cc;
5463
5464 #ifdef SUPPORT_UCP
5465 case OP_EXTUNI:
5466 detect_partial_match(common, backtracks);
5467 read_char(common);
5468 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
5469 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
5470 /* Optimize register allocation: use a real register. */
5471 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STACK_TOP, 0);
5472 OP1(SLJIT_MOV_UB, STACK_TOP, 0, SLJIT_MEM2(TMP1, TMP2), 3);
5473
5474 label = LABEL();
5475 jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5476 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
5477 read_char(common);
5478 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
5479 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
5480 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM2(TMP1, TMP2), 3);
5481
5482 OP2(SLJIT_SHL, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2);
5483 OP1(SLJIT_MOV_UI, TMP1, 0, SLJIT_MEM1(STACK_TOP), (sljit_sw)PRIV(ucp_gbtable));
5484 OP1(SLJIT_MOV, STACK_TOP, 0, TMP2, 0);
5485 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
5486 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
5487 JUMPTO(SLJIT_NOT_ZERO, label);
5488
5489 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
5490 JUMPHERE(jump[0]);
5491 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
5492
5493 if (common->mode == JIT_PARTIAL_HARD_COMPILE)
5494 {
5495 jump[0] = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
5496 /* Since we successfully read a char above, partial matching must occure. */
5497 check_partial(common, TRUE);
5498 JUMPHERE(jump[0]);
5499 }
5500 return cc;
5501 #endif
5502
5503 case OP_EODN:
5504 /* Requires rather complex checks. */
5505 jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5506 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
5507 {
5508 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
5509 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5510 if (common->mode == JIT_COMPILE)
5511 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0));
5512 else
5513 {
5514 jump[1] = CMP(SLJIT_EQUAL, TMP2, 0, STR_END, 0);
5515 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
5516 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS);
5517 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
5518 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_NOT_EQUAL);
5519 add_jump(compiler, backtracks, JUMP(SLJIT_NOT_EQUAL));
5520 check_partial(common, TRUE);
5521 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5522 JUMPHERE(jump[1]);
5523 }
5524 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
5525 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
5526 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
5527 }
5528 else if (common->nltype == NLTYPE_FIXED)
5529 {
5530 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5531 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5532 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0));
5533 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
5534 }
5535 else
5536 {
5537 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5538 jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
5539 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
5540 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
5541 jump[2] = JUMP(SLJIT_GREATER);
5542 add_jump(compiler, backtracks, JUMP(SLJIT_LESS));
5543 /* Equal. */
5544 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
5545 jump[3] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
5546 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5547
5548 JUMPHERE(jump[1]);
5549 if (common->nltype == NLTYPE_ANYCRLF)
5550 {
5551 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5552 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, STR_END, 0));
5553 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
5554 }
5555 else
5556 {
5557 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, STR_PTR, 0);
5558 read_char_range(common, common->nlmin, common->nlmax, TRUE);
5559 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
5560 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
5561 add_jump(compiler, backtracks, JUMP(SLJIT_ZERO));
5562 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
5563 }
5564 JUMPHERE(jump[2]);
5565 JUMPHERE(jump[3]);
5566 }
5567 JUMPHERE(jump[0]);
5568 check_partial(common, FALSE);
5569 return cc;
5570
5571 case OP_EOD:
5572 add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
5573 check_partial(common, FALSE);
5574 return cc;
5575
5576 case OP_CIRC:
5577 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
5578 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
5579 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0));
5580 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
5581 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5582 return cc;
5583
5584 case OP_CIRCM:
5585 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
5586 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
5587 jump[1] = CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0);
5588 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
5589 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5590 jump[0] = JUMP(SLJIT_JUMP);
5591 JUMPHERE(jump[1]);
5592
5593 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
5594 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
5595 {
5596 OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
5597 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, TMP1, 0));
5598 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
5599 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
5600 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
5601 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
5602 }
5603 else
5604 {
5605 skip_char_back(common);
5606 read_char_range(common, common->nlmin, common->nlmax, TRUE);
5607 check_newlinechar(common, common->nltype, backtracks, FALSE);
5608 }
5609 JUMPHERE(jump[0]);
5610 return cc;
5611
5612 case OP_DOLL:
5613 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
5614 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
5615 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5616
5617 if (!common->endonly)
5618 compile_char1_matchingpath(common, OP_EODN, cc, backtracks);
5619 else
5620 {
5621 add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
5622 check_partial(common, FALSE);
5623 }
5624 return cc;
5625
5626 case OP_DOLLM:
5627 jump[1] = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
5628 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
5629 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
5630 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5631 check_partial(common, FALSE);
5632 jump[0] = JUMP(SLJIT_JUMP);
5633 JUMPHERE(jump[1]);
5634
5635 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
5636 {
5637 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
5638 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5639 if (common->mode == JIT_COMPILE)
5640 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, TMP2, 0, STR_END, 0));
5641 else
5642 {
5643 jump[1] = CMP(SLJIT_LESS_EQUAL, TMP2, 0, STR_END, 0);
5644 /* STR_PTR = STR_END - IN_UCHARS(1) */
5645 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
5646 check_partial(common, TRUE);
5647 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5648 JUMPHERE(jump[1]);
5649 }
5650
5651 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
5652 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
5653 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
5654 }
5655 else
5656 {
5657 peek_char(common, common->nlmax);
5658 check_newlinechar(common, common->nltype, backtracks, FALSE);
5659 }
5660 JUMPHERE(jump[0]);
5661 return cc;
5662
5663 case OP_CHAR:
5664 case OP_CHARI:
5665 length = 1;
5666 #ifdef SUPPORT_UTF
5667 if (common->utf && HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc);
5668 #endif
5669 if (common->mode == JIT_COMPILE && (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0))
5670 {
5671 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
5672 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
5673
5674 context.length = IN_UCHARS(length);
5675 context.sourcereg = -1;
5676 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
5677 context.ucharptr = 0;
5678 #endif
5679 return byte_sequence_compare(common, type == OP_CHARI, cc, &context, backtracks);
5680 }
5681
5682 detect_partial_match(common, backtracks);
5683 #ifdef SUPPORT_UTF
5684 if (common->utf)
5685 {
5686 GETCHAR(c, cc);
5687 }
5688 else
5689 #endif
5690 c = *cc;
5691
5692 if (type == OP_CHAR || !char_has_othercase(common, cc))
5693 {
5694 read_char_range(common, c, c, FALSE);
5695 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c));
5696 return cc + length;
5697 }
5698 oc = char_othercase(common, c);
5699 read_char_range(common, c < oc ? c : oc, c > oc ? c : oc, FALSE);
5700 bit = c ^ oc;
5701 if (is_powerof2(bit))
5702 {
5703 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
5704 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
5705 return cc + length;
5706 }
5707 jump[0] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c);
5708 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, oc));
5709 JUMPHERE(jump[0]);
5710 return cc + length;
5711
5712 case OP_NOT:
5713 case OP_NOTI:
5714 detect_partial_match(common, backtracks);
5715 length = 1;
5716 #ifdef SUPPORT_UTF
5717 if (common->utf)
5718 {
5719 #ifdef COMPILE_PCRE8
5720 c = *cc;
5721 if (c < 128)
5722 {
5723 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5724 if (type == OP_NOT || !char_has_othercase(common, cc))
5725 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
5726 else
5727 {
5728 /* Since UTF8 code page is fixed, we know that c is in [a-z] or [A-Z] range. */
5729 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x20);
5730 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20));
5731 }
5732 /* Skip the variable-length character. */
5733 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5734 jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
5735 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
5736 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5737 JUMPHERE(jump[0]);
5738 return cc + 1;
5739 }
5740 else
5741 #endif /* COMPILE_PCRE8 */
5742 {
5743 GETCHARLEN(c, cc, length);
5744 }
5745 }
5746 else
5747 #endif /* SUPPORT_UTF */
5748 c = *cc;
5749
5750 if (type == OP_NOT || !char_has_othercase(common, cc))
5751 {
5752 read_char_range(common, c, c, TRUE);
5753 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
5754 }
5755 else
5756 {
5757 oc = char_othercase(common, c);
5758 read_char_range(common, c < oc ? c : oc, c > oc ? c : oc, TRUE);
5759 bit = c ^ oc;
5760 if (is_powerof2(bit))
5761 {
5762 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
5763 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
5764 }
5765 else
5766 {
5767 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
5768 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, oc));
5769 }
5770 }
5771 return cc + length;
5772
5773 case OP_CLASS:
5774 case OP_NCLASS:
5775 detect_partial_match(common, backtracks);
5776
5777 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5778 bit = (common->utf && is_char7_bitset((const pcre_uint8 *)cc, type == OP_NCLASS)) ? 127 : 255;
5779 read_char_range(common, 0, bit, type == OP_NCLASS);
5780 #else
5781 read_char_range(common, 0, 255, type == OP_NCLASS);
5782 #endif
5783
5784 if (check_class_ranges(common, (const pcre_uint8 *)cc, type == OP_NCLASS, FALSE, backtracks))
5785 return cc + 32 / sizeof(pcre_uchar);
5786
5787 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5788 jump[0] = NULL;
5789 if (common->utf)
5790 {
5791 jump[0] = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, bit);
5792 if (type == OP_CLASS)
5793 {
5794 add_jump(compiler, backtracks, jump[0]);
5795 jump[0] = NULL;
5796 }
5797 }
5798 #elif !defined COMPILE_PCRE8
5799 jump[0] = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
5800 if (type == OP_CLASS)
5801 {
5802 add_jump(compiler, backtracks, jump[0]);
5803 jump[0] = NULL;
5804 }
5805 #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
5806
5807 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
5808 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
5809 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
5810 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
5811 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
5812 add_jump(compiler, backtracks, JUMP(SLJIT_ZERO));
5813
5814 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
5815 if (jump[0] != NULL)
5816 JUMPHERE(jump[0]);
5817 #endif
5818
5819 return cc + 32 / sizeof(pcre_uchar);
5820
5821 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5822 case OP_XCLASS:
5823 compile_xclass_matchingpath(common, cc + LINK_SIZE, backtracks);
5824 return cc + GET(cc, 0) - 1;
5825 #endif
5826
5827 case OP_REVERSE:
5828 length = GET(cc, 0);
5829 if (length == 0)
5830 return cc + LINK_SIZE;
5831 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5832 #ifdef SUPPORT_UTF
5833 if (common->utf)
5834 {
5835 OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
5836 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, length);
5837 label = LABEL();
5838 add_jump(compiler, backtracks, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP3, 0));
5839 skip_char_back(common);
5840 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
5841 JUMPTO(SLJIT_NOT_ZERO, label);
5842 }
5843 else
5844 #endif
5845 {
5846 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
5847 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
5848 add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, TMP1, 0));
5849 }
5850 check_start_used_ptr(common);
5851 return cc + LINK_SIZE;
5852 }
5853 SLJIT_ASSERT_STOP();
5854 return cc;
5855 }
5856
5857 static SLJIT_INLINE pcre_uchar *compile_charn_matchingpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, jump_list **backtracks)
5858 {
5859 /* This function consumes at least one input character. */
5860 /* To decrease the number of length checks, we try to concatenate the fixed length character sequences. */
5861 DEFINE_COMPILER;
5862 pcre_uchar *ccbegin = cc;
5863 compare_context context;
5864 int size;
5865
5866 context.length = 0;
5867 do
5868 {
5869 if (cc >= ccend)
5870 break;
5871
5872 if (*cc == OP_CHAR)
5873 {
5874 size = 1;
5875 #ifdef SUPPORT_UTF
5876 if (common->utf && HAS_EXTRALEN(cc[1]))
5877 size += GET_EXTRALEN(cc[1]);
5878 #endif
5879 }
5880 else if (*cc == OP_CHARI)
5881 {
5882 size = 1;
5883 #ifdef SUPPORT_UTF
5884 if (common->utf)
5885 {
5886 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
5887 size = 0;
5888 else if (HAS_EXTRALEN(cc[1]))
5889 size += GET_EXTRALEN(cc[1]);
5890 }
5891 else
5892 #endif
5893 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
5894 size = 0;
5895 }
5896 else
5897 size = 0;
5898
5899 cc += 1 + size;
5900 context.length += IN_UCHARS(size);
5901 }
5902 while (size > 0 && context.length <= 128);
5903
5904 cc = ccbegin;
5905 if (context.length > 0)
5906 {
5907 /* We have a fixed-length byte sequence. */
5908 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, context.length);
5909 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
5910
5911 context.sourcereg = -1;
5912 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
5913 context.ucharptr = 0;
5914 #endif
5915 do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, backtracks); while (context.length > 0);
5916 return cc;
5917 }
5918
5919 /* A non-fixed length character will be checked if length == 0. */
5920 return compile_char1_matchingpath(common, *cc, cc + 1, backtracks);
5921 }
5922
5923 /* Forward definitions. */
5924 static void compile_matchingpath(compiler_common *, pcre_uchar *, pcre_uchar *, backtrack_common *);
5925 static void compile_backtrackingpath(compiler_common *, struct backtrack_common *);
5926
5927 #define PUSH_BACKTRACK(size, ccstart, error) \
5928 do \
5929 { \
5930 backtrack = sljit_alloc_memory(compiler, (size)); \
5931 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
5932 return error; \
5933 memset(backtrack, 0, size); \
5934 backtrack->prev = parent->top; \
5935 backtrack->cc = (ccstart); \
5936 parent->top = backtrack; \
5937 } \
5938 while (0)
5939
5940 #define PUSH_BACKTRACK_NOVALUE(size, ccstart) \
5941 do \
5942 { \
5943 backtrack = sljit_alloc_memory(compiler, (size)); \
5944 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
5945 return; \
5946 memset(backtrack, 0, size); \
5947 backtrack->prev = parent->top; \
5948 backtrack->cc = (ccstart); \
5949 parent->top = backtrack; \
5950 } \
5951 while (0)
5952
5953 #define BACKTRACK_AS(type) ((type *)backtrack)
5954
5955 static void compile_dnref_search(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
5956 {
5957 /* The OVECTOR offset goes to TMP2. */
5958 DEFINE_COMPILER;
5959 int count = GET2(cc, 1 + IMM2_SIZE);
5960 pcre_uchar *slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
5961 unsigned int offset;
5962 jump_list *found = NULL;
5963
5964 SLJIT_ASSERT(*cc == OP_DNREF || *cc == OP_DNREFI);
5965
5966 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
5967
5968 count--;
5969 while (count-- > 0)
5970 {
5971 offset = GET2(slot, 0) << 1;
5972 GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
5973 add_jump(compiler, &found, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0));
5974 slot += common->name_entry_size;
5975 }
5976
5977 offset = GET2(slot, 0) << 1;
5978 GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
5979 if (backtracks != NULL && !common->jscript_compat)
5980 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0));
5981
5982 set_jumps(found, LABEL());
5983 }
5984
5985 static void compile_ref_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks, BOOL withchecks, BOOL emptyfail)
5986 {
5987 DEFINE_COMPILER;
5988 BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
5989 int offset = 0;
5990 struct sljit_jump *jump = NULL;
5991 struct sljit_jump *partial;
5992 struct sljit_jump *nopartial;
5993
5994 if (ref)
5995 {
5996 offset = GET2(cc, 1) << 1;
5997 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
5998 /* OVECTOR(1) contains the "string begin - 1" constant. */
5999 if (withchecks && !common->jscript_compat)
6000 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
6001 }
6002 else
6003 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
6004
6005 #if defined SUPPORT_UTF && defined SUPPORT_UCP
6006 if (common->utf && *cc == OP_REFI)
6007 {
6008 SLJIT_ASSERT(TMP1 == SLJIT_R0 && STACK_TOP == SLJIT_R1 && TMP2 == SLJIT_R2);
6009 if (ref)
6010 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
6011 else
6012 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
6013
6014 if (withchecks)
6015 jump = CMP(SLJIT_EQUAL, TMP1, 0, TMP2, 0);
6016
6017 /* Needed to save important temporary registers. */
6018 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STACK_TOP, 0);
6019 OP1(SLJIT_MOV, SLJIT_R1, 0, ARGUMENTS, 0);
6020 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, uchar_ptr), STR_PTR, 0);
6021 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_utf_caselesscmp));
6022 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6023 if (common->mode == JIT_COMPILE)
6024 add_jump(compiler, backtracks, CMP(SLJIT_LESS_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1));
6025 else
6026 {
6027 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
6028 nopartial = CMP(SLJIT_NOT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
6029 check_partial(common, FALSE);
6030 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
6031 JUMPHERE(nopartial);
6032 }
6033 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
6034 }
6035 else
6036 #endif /* SUPPORT_UTF && SUPPORT_UCP */
6037 {
6038 if (ref)
6039 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP1, 0);
6040 else
6041 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
6042
6043 if (withchecks)
6044 jump = JUMP(SLJIT_ZERO);
6045
6046 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
6047 partial = CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0);
6048 if (common->mode == JIT_COMPILE)
6049 add_jump(compiler, backtracks, partial);
6050
6051 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
6052 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
6053
6054 if (common->mode != JIT_COMPILE)
6055 {
6056 nopartial = JUMP(SLJIT_JUMP);
6057 JUMPHERE(partial);
6058 /* TMP2 -= STR_END - STR_PTR */
6059 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, STR_PTR, 0);
6060 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, STR_END, 0);
6061 partial = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0);
6062 OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
6063 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
6064 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
6065 JUMPHERE(partial);
6066 check_partial(common, FALSE);
6067 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
6068 JUMPHERE(nopartial);
6069 }
6070 }
6071
6072 if (jump != NULL)
6073 {
6074 if (emptyfail)
6075 add_jump(compiler, backtracks, jump);
6076 else
6077 JUMPHERE(jump);
6078 }
6079 }
6080
6081 static SLJIT_INLINE pcre_uchar *compile_ref_iterator_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6082 {
6083 DEFINE_COMPILER;
6084 BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
6085 backtrack_common *backtrack;
6086 pcre_uchar type;
6087 int offset = 0;
6088 struct sljit_label *label;
6089 struct sljit_jump *zerolength;
6090 struct sljit_jump *jump = NULL;
6091 pcre_uchar *ccbegin = cc;
6092 int min = 0, max = 0;
6093 BOOL minimize;
6094
6095 PUSH_BACKTRACK(sizeof(iterator_backtrack), cc, NULL);
6096
6097 if (ref)
6098 offset = GET2(cc, 1) << 1;
6099 else
6100 cc += IMM2_SIZE;
6101 type = cc[1 + IMM2_SIZE];
6102
6103 SLJIT_COMPILE_ASSERT((OP_CRSTAR & 0x1) == 0, crstar_opcode_must_be_even);
6104 minimize = (type & 0x1) != 0;
6105 switch(type)
6106 {
6107 case OP_CRSTAR:
6108 case OP_CRMINSTAR:
6109 min = 0;
6110 max = 0;
6111 cc += 1 + IMM2_SIZE + 1;
6112 break;
6113 case OP_CRPLUS:
6114 case OP_CRMINPLUS:
6115 min = 1;
6116 max = 0;
6117 cc += 1 + IMM2_SIZE + 1;
6118 break;
6119 case OP_CRQUERY:
6120 case OP_CRMINQUERY:
6121 min = 0;
6122 max = 1;
6123 cc += 1 + IMM2_SIZE + 1;
6124 break;
6125 case OP_CRRANGE:
6126 case OP_CRMINRANGE:
6127 min = GET2(cc, 1 + IMM2_SIZE + 1);
6128 max = GET2(cc, 1 + IMM2_SIZE + 1 + IMM2_SIZE);
6129 cc += 1 + IMM2_SIZE + 1 + 2 * IMM2_SIZE;
6130 break;
6131 default:
6132 SLJIT_ASSERT_STOP();
6133 break;
6134 }
6135
6136 if (!minimize)
6137 {
6138 if (min == 0)
6139 {
6140 allocate_stack(common, 2);
6141 if (ref)
6142 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
6143 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6144 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
6145 /* Temporary release of STR_PTR. */
6146 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
6147 /* Handles both invalid and empty cases. Since the minimum repeat,
6148 is zero the invalid case is basically the same as an empty case. */
6149 if (ref)
6150 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
6151 else
6152 {
6153 compile_dnref_search(common, ccbegin, NULL);
6154 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
6155 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, TMP2, 0);
6156 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
6157 }
6158 /* Restore if not zero length. */
6159 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
6160 }
6161 else
6162 {
6163 allocate_stack(common, 1);
6164 if (ref)
6165 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
6166 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6167 if (ref)
6168 {
6169 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
6170 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
6171 }
6172 else
6173 {
6174 compile_dnref_search(common, ccbegin, &backtrack->topbacktracks);
6175 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
6176 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, TMP2, 0);
6177 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
6178 }
6179 }
6180
6181 if (min > 1 || max > 1)
6182 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, SLJIT_IMM, 0);
6183
6184 label = LABEL();
6185 if (!ref)
6186 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1);
6187 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, FALSE, FALSE);
6188
6189 if (min > 1 || max > 1)
6190 {
6191 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0);
6192 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
6193 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, TMP1, 0);
6194 if (min > 1)
6195 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min, label);
6196 if (max > 1)
6197 {
6198 jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max);
6199 allocate_stack(common, 1);
6200 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6201 JUMPTO(SLJIT_JUMP, label);
6202 JUMPHERE(jump);
6203 }
6204 }
6205
6206 if (max == 0)
6207 {
6208 /* Includes min > 1 case as well. */
6209 allocate_stack(common, 1);
6210 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6211 JUMPTO(SLJIT_JUMP, label);
6212 }
6213
6214 JUMPHERE(zerolength);
6215 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
6216
6217 count_match(common);
6218 return cc;
6219 }
6220
6221 allocate_stack(common, ref ? 2 : 3);
6222 if (ref)
6223 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
6224 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6225 if (type != OP_CRMINSTAR)
6226 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
6227
6228 if (min == 0)
6229 {
6230 /* Handles both invalid and empty cases. Since the minimum repeat,
6231 is zero the invalid case is basically the same as an empty case. */
6232 if (ref)
6233 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
6234 else
6235 {
6236 compile_dnref_search(common, ccbegin, NULL);
6237 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
6238 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
6239 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
6240 }
6241 /* Length is non-zero, we can match real repeats. */
6242 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6243 jump = JUMP(SLJIT_JUMP);
6244 }
6245 else
6246 {
6247 if (ref)
6248 {
6249 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
6250 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
6251 }
6252 else
6253 {
6254 compile_dnref_search(common, ccbegin, &backtrack->topbacktracks);
6255 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
6256 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
6257 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
6258 }
6259 }
6260
6261 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
6262 if (max > 0)
6263 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max));
6264
6265 if (!ref)
6266 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
6267 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, TRUE, TRUE);
6268 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6269
6270 if (min > 1)
6271 {
6272 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
6273 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
6274 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
6275 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min, BACKTRACK_AS(iterator_backtrack)->matchingpath);
6276 }
6277 else if (max > 0)
6278 OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
6279
6280 if (jump != NULL)
6281 JUMPHERE(jump);
6282 JUMPHERE(zerolength);
6283
6284 count_match(common);
6285 return cc;
6286 }
6287
6288 static SLJIT_INLINE pcre_uchar *compile_recurse_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6289 {
6290 DEFINE_COMPILER;
6291 backtrack_common *backtrack;
6292 recurse_entry *entry = common->entries;
6293 recurse_entry *prev = NULL;
6294 sljit_sw start = GET(cc, 1);
6295 pcre_uchar *start_cc;
6296 BOOL needs_control_head;
6297
6298 PUSH_BACKTRACK(sizeof(recurse_backtrack), cc, NULL);
6299
6300 /* Inlining simple patterns. */
6301 if (get_framesize(common, common->start + start, NULL, TRUE, &needs_control_head) == no_stack)
6302 {
6303 start_cc = common->start + start;
6304 compile_matchingpath(common, next_opcode(common, start_cc), bracketend(start_cc) - (1 + LINK_SIZE), backtrack);
6305 BACKTRACK_AS(recurse_backtrack)->inlined_pattern = TRUE;
6306 return cc + 1 + LINK_SIZE;
6307 }
6308
6309 while (entry != NULL)
6310 {
6311 if (entry->start == start)
6312 break;
6313 prev = entry;
6314 entry = entry->next;
6315 }
6316
6317 if (entry == NULL)
6318 {
6319 entry = sljit_alloc_memory(compiler, sizeof(recurse_entry));
6320 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6321 return NULL;
6322 entry->next = NULL;
6323 entry->entry = NULL;
6324 entry->calls = NULL;
6325 entry->start = start;
6326
6327 if (prev != NULL)
6328 prev->next = entry;
6329 else
6330 common->entries = entry;
6331 }
6332
6333 if (common->has_set_som && common->mark_ptr != 0)
6334 {
6335 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
6336 allocate_stack(common, 2);
6337 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
6338 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
6339 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
6340 }
6341 else if (common->has_set_som || common->mark_ptr != 0)
6342 {
6343 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->has_set_som ? (int)(OVECTOR(0)) : common->mark_ptr);
6344 allocate_stack(common, 1);
6345 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
6346 }
6347
6348 if (entry->entry == NULL)
6349 add_jump(compiler, &entry->calls, JUMP(SLJIT_FAST_CALL));
6350 else
6351 JUMPTO(SLJIT_FAST_CALL, entry->entry);
6352 /* Leave if the match is failed. */
6353 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0));
6354 return cc + 1 + LINK_SIZE;
6355 }
6356
6357 static int SLJIT_CALL do_callout(struct jit_arguments *arguments, PUBL(callout_block) *callout_block, pcre_uchar **jit_ovector)
6358 {
6359 const pcre_uchar *begin = arguments->begin;
6360 int *offset_vector = arguments->offsets;
6361 int offset_count = arguments->offset_count;
6362 int i;
6363
6364 if (PUBL(callout) == NULL)
6365 return 0;
6366
6367 callout_block->version = 2;
6368 callout_block->callout_data = arguments->callout_data;
6369
6370 /* Offsets in subject. */
6371 callout_block->subject_length = arguments->end - arguments->begin;
6372 callout_block->start_match = (pcre_uchar*)callout_block->subject - arguments->begin;
6373 callout_block->current_position = (pcre_uchar*)callout_block->offset_vector - arguments->begin;
6374 #if defined COMPILE_PCRE8
6375 callout_block->subject = (PCRE_SPTR)begin;
6376 #elif defined COMPILE_PCRE16
6377 callout_block->subject = (PCRE_SPTR16)begin;
6378 #elif defined COMPILE_PCRE32
6379 callout_block->subject = (PCRE_SPTR32)begin;
6380 #endif
6381
6382 /* Convert and copy the JIT offset vector to the offset_vector array. */
6383 callout_block->capture_top = 0;
6384 callout_block->offset_vector = offset_vector;
6385 for (i = 2; i < offset_count; i += 2)
6386 {
6387 offset_vector[i] = jit_ovector[i] - begin;
6388 offset_vector[i + 1] = jit_ovector[i + 1] - begin;
6389 if (jit_ovector[i] >= begin)
6390 callout_block->capture_top = i;
6391 }
6392
6393 callout_block->capture_top = (callout_block->capture_top >> 1) + 1;
6394 if (offset_count > 0)
6395 offset_vector[0] = -1;
6396 if (offset_count > 1)
6397 offset_vector[1] = -1;
6398 return (*PUBL(callout))(callout_block);
6399 }
6400
6401 /* Aligning to 8 byte. */
6402 #define CALLOUT_ARG_SIZE \
6403 (((int)sizeof(PUBL(callout_block)) + 7) & ~7)
6404
6405 #define CALLOUT_ARG_OFFSET(arg) \
6406 (-CALLOUT_ARG_SIZE + SLJIT_OFFSETOF(PUBL(callout_block), arg))
6407
6408 static SLJIT_INLINE pcre_uchar *compile_callout_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6409 {
6410 DEFINE_COMPILER;
6411 backtrack_common *backtrack;
6412
6413 PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
6414
6415 allocate_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw));
6416
6417 SLJIT_ASSERT(common->capture_last_ptr != 0);
6418 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
6419 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6420 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_number), SLJIT_IMM, cc[1]);
6421 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_last), TMP2, 0);
6422
6423 /* These pointer sized fields temporarly stores internal variables. */
6424 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
6425 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(offset_vector), STR_PTR, 0);
6426 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(subject), TMP2, 0);
6427
6428 if (common->mark_ptr != 0)
6429 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr));
6430 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(pattern_position), SLJIT_IMM, GET(cc, 2));
6431 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(next_item_length), SLJIT_IMM, GET(cc, 2 + LINK_SIZE));
6432 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(mark), (common->mark_ptr != 0) ? TMP2 : SLJIT_IMM, 0);
6433
6434 /* Needed to save important temporary registers. */
6435 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STACK_TOP, 0);
6436 OP2(SLJIT_SUB, SLJIT_R1, 0, STACK_TOP, 0, SLJIT_IMM, CALLOUT_ARG_SIZE);
6437 GET_LOCAL_BASE(SLJIT_R2, 0, OVECTOR_START);
6438 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_callout));
6439 OP1(SLJIT_MOV_SI, SLJIT_RETURN_REG, 0, SLJIT_RETURN_REG, 0);
6440 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6441 free_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw));
6442
6443 /* Check return value. */
6444 OP2(SLJIT_SUB | SLJIT_SET_S, SLJIT_UNUSED, 0, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
6445 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_SIG_GREATER));
6446 if (common->forced_quit_label == NULL)
6447 add_jump(compiler, &common->forced_quit, JUMP(SLJIT_SIG_LESS));
6448 else
6449 JUMPTO(SLJIT_SIG_LESS, common->forced_quit_label);
6450 return cc + 2 + 2 * LINK_SIZE;
6451 }
6452
6453 #undef CALLOUT_ARG_SIZE
6454 #undef CALLOUT_ARG_OFFSET
6455
6456 static pcre_uchar *compile_assert_matchingpath(compiler_common *common, pcre_uchar *cc, assert_backtrack *backtrack, BOOL conditional)
6457 {
6458 DEFINE_COMPILER;
6459 int framesize;
6460 int extrasize;
6461 BOOL needs_control_head;
6462 int private_data_ptr;
6463 backtrack_common altbacktrack;
6464 pcre_uchar *ccbegin;
6465 pcre_uchar opcode;
6466 pcre_uchar bra = OP_BRA;
6467 jump_list *tmp = NULL;
6468 jump_list **target = (conditional) ? &backtrack->condfailed : &backtrack->common.topbacktracks;
6469 jump_list **found;
6470 /* Saving previous accept variables. */
6471 BOOL save_local_exit = common->local_exit;
6472 BOOL save_positive_assert = common->positive_assert;
6473 then_trap_backtrack *save_then_trap = common->then_trap;
6474 struct sljit_label *save_quit_label = common->quit_label;
6475 struct sljit_label *save_accept_label = common->accept_label;
6476 jump_list *save_quit = common->quit;
6477 jump_list *save_positive_assert_quit = common->positive_assert_quit;
6478 jump_list *save_accept = common->accept;
6479 struct sljit_jump *jump;
6480 struct sljit_jump *brajump = NULL;
6481
6482 /* Assert captures then. */
6483 common->then_trap = NULL;
6484
6485 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
6486 {
6487 SLJIT_ASSERT(!conditional);
6488 bra = *cc;
6489 cc++;
6490 }
6491 private_data_ptr = PRIVATE_DATA(cc);
6492 SLJIT_ASSERT(private_data_ptr != 0);
6493 framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
6494 backtrack->framesize = framesize;
6495 backtrack->private_data_ptr = private_data_ptr;
6496 opcode = *cc;
6497 SLJIT_ASSERT(opcode >= OP_ASSERT && opcode <= OP_ASSERTBACK_NOT);
6498 found = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) ? &tmp : target;
6499 ccbegin = cc;
6500 cc += GET(cc, 1);
6501
6502 if (bra == OP_BRAMINZERO)
6503 {
6504 /* This is a braminzero backtrack path. */
6505 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6506 free_stack(common, 1);
6507 brajump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
6508 }
6509
6510 if (framesize < 0)
6511 {
6512 extrasize = needs_control_head ? 2 : 1;
6513 if (framesize == no_frame)
6514 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
6515 allocate_stack(common, extrasize);
6516 if (needs_control_head)
6517 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
6518 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6519 if (needs_control_head)
6520 {
6521 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
6522 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
6523 }
6524 }
6525 else
6526 {
6527 extrasize = needs_control_head ? 3 : 2;
6528 allocate_stack(common, framesize + extrasize);
6529 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
6530 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw));
6531 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
6532 if (needs_control_head)
6533 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
6534 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6535 if (needs_control_head)
6536 {
6537 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
6538 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
6539 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
6540 }
6541 else
6542 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
6543 init_frame(common, ccbegin, NULL, framesize + extrasize - 1, extrasize, FALSE);
6544 }
6545
6546 memset(&altbacktrack, 0, sizeof(backtrack_common));
6547 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6548 {
6549 /* Negative assert is stronger than positive assert. */
6550 common->local_exit = TRUE;
6551 common->quit_label = NULL;
6552 common->quit = NULL;
6553 common->positive_assert = FALSE;
6554 }
6555 else
6556 common->positive_assert = TRUE;
6557 common->positive_assert_quit = NULL;
6558
6559 while (1)
6560 {
6561 common->accept_label = NULL;
6562 common->accept = NULL;
6563 altbacktrack.top = NULL;
6564 altbacktrack.topbacktracks = NULL;
6565
6566 if (*ccbegin == OP_ALT)
6567 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6568
6569 altbacktrack.cc = ccbegin;
6570 compile_matchingpath(common, ccbegin + 1 + LINK_SIZE, cc, &altbacktrack);
6571 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6572 {
6573 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6574 {
6575 common->local_exit = save_local_exit;
6576 common->quit_label = save_quit_label;
6577 common->quit = save_quit;
6578 }
6579 common->positive_assert = save_positive_assert;
6580 common->then_trap = save_then_trap;
6581 common->accept_label = save_accept_label;
6582 common->positive_assert_quit = save_positive_assert_quit;
6583 common->accept = save_accept;
6584 return NULL;
6585 }
6586 common->accept_label = LABEL();
6587 if (common->accept != NULL)
6588 set_jumps(common->accept, common->accept_label);
6589
6590 /* Reset stack. */
6591 if (framesize < 0)
6592 {
6593 if (framesize == no_frame)
6594 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
6595 else
6596 free_stack(common, extrasize);
6597 if (needs_control_head)
6598 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), 0);
6599 }
6600 else
6601 {
6602 if ((opcode != OP_ASSERT_NOT && opcode != OP_ASSERTBACK_NOT) || conditional)
6603 {
6604 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
6605 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
6606 if (needs_control_head)
6607 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), 0);
6608 }
6609 else
6610 {
6611 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
6612 if (needs_control_head)
6613 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), (framesize + 1) * sizeof(sljit_sw));
6614 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6615 }
6616 }
6617
6618 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6619 {
6620 /* We know that STR_PTR was stored on the top of the stack. */
6621 if (conditional)
6622 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), needs_control_head ? sizeof(sljit_sw) : 0);
6623 else if (bra == OP_BRAZERO)
6624 {
6625 if (framesize < 0)
6626 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 1) * sizeof(sljit_sw));
6627 else
6628 {
6629 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
6630 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (framesize + extrasize - 1) * sizeof(sljit_sw));
6631 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
6632 }
6633 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
6634 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6635 }
6636 else if (framesize >= 0)
6637 {
6638 /* For OP_BRA and OP_BRAMINZERO. */
6639 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
6640 }
6641 }
6642 add_jump(compiler, found, JUMP(SLJIT_JUMP));
6643
6644 compile_backtrackingpath(common, altbacktrack.top);
6645 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6646 {
6647 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6648 {
6649 common->local_exit = save_local_exit;
6650 common->quit_label = save_quit_label;
6651 common->quit = save_quit;
6652 }
6653 common->positive_assert = save_positive_assert;
6654 common->then_trap = save_then_trap;
6655 common->accept_label = save_accept_label;
6656 common->positive_assert_quit = save_positive_assert_quit;
6657 common->accept = save_accept;
6658 return NULL;
6659 }
6660 set_jumps(altbacktrack.topbacktracks, LABEL());
6661
6662 if (*cc != OP_ALT)
6663 break;
6664
6665 ccbegin = cc;
6666 cc += GET(cc, 1);
6667 }
6668
6669 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6670 {
6671 SLJIT_ASSERT(common->positive_assert_quit == NULL);
6672 /* Makes the check less complicated below. */
6673 common->positive_assert_quit = common->quit;
6674 }
6675
6676 /* None of them matched. */
6677 if (common->positive_assert_quit != NULL)
6678 {
6679 jump = JUMP(SLJIT_JUMP);
6680 set_jumps(common->positive_assert_quit, LABEL());
6681 SLJIT_ASSERT(framesize != no_stack);
6682 if (framesize < 0)
6683 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, extrasize * sizeof(sljit_sw));
6684 else
6685 {
6686 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
6687 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6688 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw));
6689 }
6690 JUMPHERE(jump);
6691 }
6692
6693 if (needs_control_head)
6694 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(1));
6695
6696 if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK)
6697 {
6698 /* Assert is failed. */
6699 if (conditional || bra == OP_BRAZERO)
6700 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6701
6702 if (framesize < 0)
6703 {
6704 /* The topmost item should be 0. */
6705 if (bra == OP_BRAZERO)
6706 {
6707 if (extrasize == 2)
6708 free_stack(common, 1);
6709 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6710 }
6711 else
6712 free_stack(common, extrasize);
6713 }
6714 else
6715 {
6716 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
6717 /* The topmost item should be 0. */
6718 if (bra == OP_BRAZERO)
6719 {
6720 free_stack(common, framesize + extrasize - 1);
6721 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6722 }
6723 else
6724 free_stack(common, framesize + extrasize);
6725 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
6726 }
6727 jump = JUMP(SLJIT_JUMP);
6728 if (bra != OP_BRAZERO)
6729 add_jump(compiler, target, jump);
6730
6731 /* Assert is successful. */
6732 set_jumps(tmp, LABEL());
6733 if (framesize < 0)
6734 {
6735 /* We know that STR_PTR was stored on the top of the stack. */
6736 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 1) * sizeof(sljit_sw));
6737 /* Keep the STR_PTR on the top of the stack. */
6738 if (bra == OP_BRAZERO)
6739 {
6740 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
6741 if (extrasize == 2)
6742 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6743 }
6744 else if (bra == OP_BRAMINZERO)
6745 {
6746 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
6747 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6748 }
6749 }
6750 else
6751 {
6752 if (bra == OP_BRA)
6753 {
6754 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
6755 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
6756 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 2) * sizeof(sljit_sw));
6757 }
6758 else
6759 {
6760 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
6761 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 2) * sizeof(sljit_sw));
6762 if (extrasize == 2)
6763 {
6764 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6765 if (bra == OP_BRAMINZERO)
6766 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6767 }
6768 else
6769 {
6770 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
6771 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), bra == OP_BRAZERO ? STR_PTR : SLJIT_IMM, 0);
6772 }
6773 }
6774 }
6775
6776 if (bra == OP_BRAZERO)
6777 {
6778 backtrack->matchingpath = LABEL();