/[pcre]/code/trunk/pcre_jit_compile.c
ViewVC logotype

Contents of /code/trunk/pcre_jit_compile.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1623 - (show annotations)
Thu Feb 4 17:13:10 2016 UTC (3 years, 7 months ago) by zherczeg
File MIME type: text/plain
File size: 333208 byte(s)
JIT code generator for assertion matching is refactored to a separat function.
1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Written by Philip Hazel
9 Copyright (c) 1997-2013 University of Cambridge
10
11 The machine code generator part (this module) was written by Zoltan Herczeg
12 Copyright (c) 2010-2013
13
14 -----------------------------------------------------------------------------
15 Redistribution and use in source and binary forms, with or without
16 modification, are permitted provided that the following conditions are met:
17
18 * Redistributions of source code must retain the above copyright notice,
19 this list of conditions and the following disclaimer.
20
21 * Redistributions in binary form must reproduce the above copyright
22 notice, this list of conditions and the following disclaimer in the
23 documentation and/or other materials provided with the distribution.
24
25 * Neither the name of the University of Cambridge nor the names of its
26 contributors may be used to endorse or promote products derived from
27 this software without specific prior written permission.
28
29 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 POSSIBILITY OF SUCH DAMAGE.
40 -----------------------------------------------------------------------------
41 */
42
43 #ifdef HAVE_CONFIG_H
44 #include "config.h"
45 #endif
46
47 #include "pcre_internal.h"
48
49 #if defined SUPPORT_JIT
50
51 /* All-in-one: Since we use the JIT compiler only from here,
52 we just include it. This way we don't need to touch the build
53 system files. */
54
55 #define SLJIT_MALLOC(size, allocator_data) (PUBL(malloc))(size)
56 #define SLJIT_FREE(ptr, allocator_data) (PUBL(free))(ptr)
57 #define SLJIT_CONFIG_AUTO 1
58 #define SLJIT_CONFIG_STATIC 1
59 #define SLJIT_VERBOSE 0
60 #define SLJIT_DEBUG 0
61
62 #include "sljit/sljitLir.c"
63
64 #if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED
65 #error Unsupported architecture
66 #endif
67
68 /* Defines for debugging purposes. */
69
70 /* 1 - Use unoptimized capturing brackets.
71 2 - Enable capture_last_ptr (includes option 1). */
72 /* #define DEBUG_FORCE_UNOPTIMIZED_CBRAS 2 */
73
74 /* 1 - Always have a control head. */
75 /* #define DEBUG_FORCE_CONTROL_HEAD 1 */
76
77 /* Allocate memory for the regex stack on the real machine stack.
78 Fast, but limited size. */
79 #define MACHINE_STACK_SIZE 32768
80
81 /* Growth rate for stack allocated by the OS. Should be the multiply
82 of page size. */
83 #define STACK_GROWTH_RATE 8192
84
85 /* Enable to check that the allocation could destroy temporaries. */
86 #if defined SLJIT_DEBUG && SLJIT_DEBUG
87 #define DESTROY_REGISTERS 1
88 #endif
89
90 /*
91 Short summary about the backtracking mechanism empolyed by the jit code generator:
92
93 The code generator follows the recursive nature of the PERL compatible regular
94 expressions. The basic blocks of regular expressions are condition checkers
95 whose execute different commands depending on the result of the condition check.
96 The relationship between the operators can be horizontal (concatenation) and
97 vertical (sub-expression) (See struct backtrack_common for more details).
98
99 'ab' - 'a' and 'b' regexps are concatenated
100 'a+' - 'a' is the sub-expression of the '+' operator
101
102 The condition checkers are boolean (true/false) checkers. Machine code is generated
103 for the checker itself and for the actions depending on the result of the checker.
104 The 'true' case is called as the matching path (expected path), and the other is called as
105 the 'backtrack' path. Branch instructions are expesive for all CPUs, so we avoid taken
106 branches on the matching path.
107
108 Greedy star operator (*) :
109 Matching path: match happens.
110 Backtrack path: match failed.
111 Non-greedy star operator (*?) :
112 Matching path: no need to perform a match.
113 Backtrack path: match is required.
114
115 The following example shows how the code generated for a capturing bracket
116 with two alternatives. Let A, B, C, D are arbirary regular expressions, and
117 we have the following regular expression:
118
119 A(B|C)D
120
121 The generated code will be the following:
122
123 A matching path
124 '(' matching path (pushing arguments to the stack)
125 B matching path
126 ')' matching path (pushing arguments to the stack)
127 D matching path
128 return with successful match
129
130 D backtrack path
131 ')' backtrack path (If we arrived from "C" jump to the backtrack of "C")
132 B backtrack path
133 C expected path
134 jump to D matching path
135 C backtrack path
136 A backtrack path
137
138 Notice, that the order of backtrack code paths are the opposite of the fast
139 code paths. In this way the topmost value on the stack is always belong
140 to the current backtrack code path. The backtrack path must check
141 whether there is a next alternative. If so, it needs to jump back to
142 the matching path eventually. Otherwise it needs to clear out its own stack
143 frame and continue the execution on the backtrack code paths.
144 */
145
146 /*
147 Saved stack frames:
148
149 Atomic blocks and asserts require reloading the values of private data
150 when the backtrack mechanism performed. Because of OP_RECURSE, the data
151 are not necessarly known in compile time, thus we need a dynamic restore
152 mechanism.
153
154 The stack frames are stored in a chain list, and have the following format:
155 ([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]
156
157 Thus we can restore the private data to a particular point in the stack.
158 */
159
160 typedef struct jit_arguments {
161 /* Pointers first. */
162 struct sljit_stack *stack;
163 const pcre_uchar *str;
164 const pcre_uchar *begin;
165 const pcre_uchar *end;
166 int *offsets;
167 pcre_uchar *uchar_ptr;
168 pcre_uchar *mark_ptr;
169 void *callout_data;
170 /* Everything else after. */
171 pcre_uint32 limit_match;
172 int real_offset_count;
173 int offset_count;
174 pcre_uint8 notbol;
175 pcre_uint8 noteol;
176 pcre_uint8 notempty;
177 pcre_uint8 notempty_atstart;
178 } jit_arguments;
179
180 typedef struct executable_functions {
181 void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES];
182 void *read_only_data_heads[JIT_NUMBER_OF_COMPILE_MODES];
183 sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];
184 PUBL(jit_callback) callback;
185 void *userdata;
186 pcre_uint32 top_bracket;
187 pcre_uint32 limit_match;
188 } executable_functions;
189
190 typedef struct jump_list {
191 struct sljit_jump *jump;
192 struct jump_list *next;
193 } jump_list;
194
195 typedef struct stub_list {
196 struct sljit_jump *start;
197 struct sljit_label *quit;
198 struct stub_list *next;
199 } stub_list;
200
201 typedef struct label_addr_list {
202 struct sljit_label *label;
203 sljit_uw *update_addr;
204 struct label_addr_list *next;
205 } label_addr_list;
206
207 enum frame_types {
208 no_frame = -1,
209 no_stack = -2
210 };
211
212 enum control_types {
213 type_mark = 0,
214 type_then_trap = 1
215 };
216
217 typedef int (SLJIT_CALL *jit_function)(jit_arguments *args);
218
219 /* The following structure is the key data type for the recursive
220 code generator. It is allocated by compile_matchingpath, and contains
221 the arguments for compile_backtrackingpath. Must be the first member
222 of its descendants. */
223 typedef struct backtrack_common {
224 /* Concatenation stack. */
225 struct backtrack_common *prev;
226 jump_list *nextbacktracks;
227 /* Internal stack (for component operators). */
228 struct backtrack_common *top;
229 jump_list *topbacktracks;
230 /* Opcode pointer. */
231 pcre_uchar *cc;
232 } backtrack_common;
233
234 typedef struct assert_backtrack {
235 backtrack_common common;
236 jump_list *condfailed;
237 /* Less than 0 if a frame is not needed. */
238 int framesize;
239 /* Points to our private memory word on the stack. */
240 int private_data_ptr;
241 /* For iterators. */
242 struct sljit_label *matchingpath;
243 } assert_backtrack;
244
245 typedef struct bracket_backtrack {
246 backtrack_common common;
247 /* Where to coninue if an alternative is successfully matched. */
248 struct sljit_label *alternative_matchingpath;
249 /* For rmin and rmax iterators. */
250 struct sljit_label *recursive_matchingpath;
251 /* For greedy ? operator. */
252 struct sljit_label *zero_matchingpath;
253 /* Contains the branches of a failed condition. */
254 union {
255 /* Both for OP_COND, OP_SCOND. */
256 jump_list *condfailed;
257 assert_backtrack *assert;
258 /* For OP_ONCE. Less than 0 if not needed. */
259 int framesize;
260 } u;
261 /* Points to our private memory word on the stack. */
262 int private_data_ptr;
263 } bracket_backtrack;
264
265 typedef struct bracketpos_backtrack {
266 backtrack_common common;
267 /* Points to our private memory word on the stack. */
268 int private_data_ptr;
269 /* Reverting stack is needed. */
270 int framesize;
271 /* Allocated stack size. */
272 int stacksize;
273 } bracketpos_backtrack;
274
275 typedef struct braminzero_backtrack {
276 backtrack_common common;
277 struct sljit_label *matchingpath;
278 } braminzero_backtrack;
279
280 typedef struct iterator_backtrack {
281 backtrack_common common;
282 /* Next iteration. */
283 struct sljit_label *matchingpath;
284 } iterator_backtrack;
285
286 typedef struct recurse_entry {
287 struct recurse_entry *next;
288 /* Contains the function entry. */
289 struct sljit_label *entry;
290 /* Collects the calls until the function is not created. */
291 jump_list *calls;
292 /* Points to the starting opcode. */
293 sljit_sw start;
294 } recurse_entry;
295
296 typedef struct recurse_backtrack {
297 backtrack_common common;
298 BOOL inlined_pattern;
299 } recurse_backtrack;
300
301 #define OP_THEN_TRAP OP_TABLE_LENGTH
302
303 typedef struct then_trap_backtrack {
304 backtrack_common common;
305 /* If then_trap is not NULL, this structure contains the real
306 then_trap for the backtracking path. */
307 struct then_trap_backtrack *then_trap;
308 /* Points to the starting opcode. */
309 sljit_sw start;
310 /* Exit point for the then opcodes of this alternative. */
311 jump_list *quit;
312 /* Frame size of the current alternative. */
313 int framesize;
314 } then_trap_backtrack;
315
316 #define MAX_RANGE_SIZE 4
317
318 typedef struct compiler_common {
319 /* The sljit ceneric compiler. */
320 struct sljit_compiler *compiler;
321 /* First byte code. */
322 pcre_uchar *start;
323 /* Maps private data offset to each opcode. */
324 sljit_si *private_data_ptrs;
325 /* Chain list of read-only data ptrs. */
326 void *read_only_data_head;
327 /* Tells whether the capturing bracket is optimized. */
328 pcre_uint8 *optimized_cbracket;
329 /* Tells whether the starting offset is a target of then. */
330 pcre_uint8 *then_offsets;
331 /* Current position where a THEN must jump. */
332 then_trap_backtrack *then_trap;
333 /* Starting offset of private data for capturing brackets. */
334 int cbra_ptr;
335 /* Output vector starting point. Must be divisible by 2. */
336 int ovector_start;
337 /* Last known position of the requested byte. */
338 int req_char_ptr;
339 /* Head of the last recursion. */
340 int recursive_head_ptr;
341 /* First inspected character for partial matching. */
342 int start_used_ptr;
343 /* Starting pointer for partial soft matches. */
344 int hit_start;
345 /* End pointer of the first line. */
346 int first_line_end;
347 /* Points to the marked string. */
348 int mark_ptr;
349 /* Recursive control verb management chain. */
350 int control_head_ptr;
351 /* Points to the last matched capture block index. */
352 int capture_last_ptr;
353 /* Points to the starting position of the current match. */
354 int start_ptr;
355
356 /* Flipped and lower case tables. */
357 const pcre_uint8 *fcc;
358 sljit_sw lcc;
359 /* Mode can be PCRE_STUDY_JIT_COMPILE and others. */
360 int mode;
361 /* TRUE, when minlength is greater than 0. */
362 BOOL might_be_empty;
363 /* \K is found in the pattern. */
364 BOOL has_set_som;
365 /* (*SKIP:arg) is found in the pattern. */
366 BOOL has_skip_arg;
367 /* (*THEN) is found in the pattern. */
368 BOOL has_then;
369 /* Needs to know the start position anytime. */
370 BOOL needs_start_ptr;
371 /* Currently in recurse or negative assert. */
372 BOOL local_exit;
373 /* Currently in a positive assert. */
374 BOOL positive_assert;
375 /* Newline control. */
376 int nltype;
377 pcre_uint32 nlmax;
378 pcre_uint32 nlmin;
379 int newline;
380 int bsr_nltype;
381 pcre_uint32 bsr_nlmax;
382 pcre_uint32 bsr_nlmin;
383 /* Dollar endonly. */
384 int endonly;
385 /* Tables. */
386 sljit_sw ctypes;
387 /* Named capturing brackets. */
388 pcre_uchar *name_table;
389 sljit_sw name_count;
390 sljit_sw name_entry_size;
391
392 /* Labels and jump lists. */
393 struct sljit_label *partialmatchlabel;
394 struct sljit_label *quit_label;
395 struct sljit_label *forced_quit_label;
396 struct sljit_label *accept_label;
397 struct sljit_label *ff_newline_shortcut;
398 stub_list *stubs;
399 label_addr_list *label_addrs;
400 recurse_entry *entries;
401 recurse_entry *currententry;
402 jump_list *partialmatch;
403 jump_list *quit;
404 jump_list *positive_assert_quit;
405 jump_list *forced_quit;
406 jump_list *accept;
407 jump_list *calllimit;
408 jump_list *stackalloc;
409 jump_list *revertframes;
410 jump_list *wordboundary;
411 jump_list *anynewline;
412 jump_list *hspace;
413 jump_list *vspace;
414 jump_list *casefulcmp;
415 jump_list *caselesscmp;
416 jump_list *reset_match;
417 BOOL jscript_compat;
418 #ifdef SUPPORT_UTF
419 BOOL utf;
420 #ifdef SUPPORT_UCP
421 BOOL use_ucp;
422 #endif
423 #ifdef COMPILE_PCRE8
424 jump_list *utfreadchar;
425 jump_list *utfreadchar16;
426 jump_list *utfreadtype8;
427 #endif
428 #endif /* SUPPORT_UTF */
429 #ifdef SUPPORT_UCP
430 jump_list *getucd;
431 #endif
432 } compiler_common;
433
434 /* For byte_sequence_compare. */
435
436 typedef struct compare_context {
437 int length;
438 int sourcereg;
439 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
440 int ucharptr;
441 union {
442 sljit_si asint;
443 sljit_uh asushort;
444 #if defined COMPILE_PCRE8
445 sljit_ub asbyte;
446 sljit_ub asuchars[4];
447 #elif defined COMPILE_PCRE16
448 sljit_uh asuchars[2];
449 #elif defined COMPILE_PCRE32
450 sljit_ui asuchars[1];
451 #endif
452 } c;
453 union {
454 sljit_si asint;
455 sljit_uh asushort;
456 #if defined COMPILE_PCRE8
457 sljit_ub asbyte;
458 sljit_ub asuchars[4];
459 #elif defined COMPILE_PCRE16
460 sljit_uh asuchars[2];
461 #elif defined COMPILE_PCRE32
462 sljit_ui asuchars[1];
463 #endif
464 } oc;
465 #endif
466 } compare_context;
467
468 /* Undefine sljit macros. */
469 #undef CMP
470
471 /* Used for accessing the elements of the stack. */
472 #define STACK(i) ((-(i) - 1) * (int)sizeof(sljit_sw))
473
474 #define TMP1 SLJIT_R0
475 #define TMP2 SLJIT_R2
476 #define TMP3 SLJIT_R3
477 #define STR_PTR SLJIT_S0
478 #define STR_END SLJIT_S1
479 #define STACK_TOP SLJIT_R1
480 #define STACK_LIMIT SLJIT_S2
481 #define COUNT_MATCH SLJIT_S3
482 #define ARGUMENTS SLJIT_S4
483 #define RETURN_ADDR SLJIT_R4
484
485 /* Local space layout. */
486 /* These two locals can be used by the current opcode. */
487 #define LOCALS0 (0 * sizeof(sljit_sw))
488 #define LOCALS1 (1 * sizeof(sljit_sw))
489 /* Two local variables for possessive quantifiers (char1 cannot use them). */
490 #define POSSESSIVE0 (2 * sizeof(sljit_sw))
491 #define POSSESSIVE1 (3 * sizeof(sljit_sw))
492 /* Max limit of recursions. */
493 #define LIMIT_MATCH (4 * sizeof(sljit_sw))
494 /* The output vector is stored on the stack, and contains pointers
495 to characters. The vector data is divided into two groups: the first
496 group contains the start / end character pointers, and the second is
497 the start pointers when the end of the capturing group has not yet reached. */
498 #define OVECTOR_START (common->ovector_start)
499 #define OVECTOR(i) (OVECTOR_START + (i) * (sljit_sw)sizeof(sljit_sw))
500 #define OVECTOR_PRIV(i) (common->cbra_ptr + (i) * (sljit_sw)sizeof(sljit_sw))
501 #define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start])
502
503 #if defined COMPILE_PCRE8
504 #define MOV_UCHAR SLJIT_MOV_UB
505 #define MOVU_UCHAR SLJIT_MOVU_UB
506 #elif defined COMPILE_PCRE16
507 #define MOV_UCHAR SLJIT_MOV_UH
508 #define MOVU_UCHAR SLJIT_MOVU_UH
509 #elif defined COMPILE_PCRE32
510 #define MOV_UCHAR SLJIT_MOV_UI
511 #define MOVU_UCHAR SLJIT_MOVU_UI
512 #else
513 #error Unsupported compiling mode
514 #endif
515
516 /* Shortcuts. */
517 #define DEFINE_COMPILER \
518 struct sljit_compiler *compiler = common->compiler
519 #define OP1(op, dst, dstw, src, srcw) \
520 sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw))
521 #define OP2(op, dst, dstw, src1, src1w, src2, src2w) \
522 sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w))
523 #define LABEL() \
524 sljit_emit_label(compiler)
525 #define JUMP(type) \
526 sljit_emit_jump(compiler, (type))
527 #define JUMPTO(type, label) \
528 sljit_set_label(sljit_emit_jump(compiler, (type)), (label))
529 #define JUMPHERE(jump) \
530 sljit_set_label((jump), sljit_emit_label(compiler))
531 #define SET_LABEL(jump, label) \
532 sljit_set_label((jump), (label))
533 #define CMP(type, src1, src1w, src2, src2w) \
534 sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))
535 #define CMPTO(type, src1, src1w, src2, src2w, label) \
536 sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))
537 #define OP_FLAGS(op, dst, dstw, src, srcw, type) \
538 sljit_emit_op_flags(compiler, (op), (dst), (dstw), (src), (srcw), (type))
539 #define GET_LOCAL_BASE(dst, dstw, offset) \
540 sljit_get_local_base(compiler, (dst), (dstw), (offset))
541
542 #define READ_CHAR_MAX 0x7fffffff
543
544 static pcre_uchar *bracketend(pcre_uchar *cc)
545 {
546 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
547 do cc += GET(cc, 1); while (*cc == OP_ALT);
548 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
549 cc += 1 + LINK_SIZE;
550 return cc;
551 }
552
553 static int no_alternatives(pcre_uchar *cc)
554 {
555 int count = 0;
556 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
557 do
558 {
559 cc += GET(cc, 1);
560 count++;
561 }
562 while (*cc == OP_ALT);
563 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
564 return count;
565 }
566
567 static int ones_in_half_byte[16] = {
568 /* 0 */ 0, 1, 1, 2, /* 4 */ 1, 2, 2, 3,
569 /* 8 */ 1, 2, 2, 3, /* 12 */ 2, 3, 3, 4
570 };
571
572 /* Functions whose might need modification for all new supported opcodes:
573 next_opcode
574 check_opcode_types
575 set_private_data_ptrs
576 get_framesize
577 init_frame
578 get_private_data_copy_length
579 copy_private_data
580 compile_matchingpath
581 compile_backtrackingpath
582 */
583
584 static pcre_uchar *next_opcode(compiler_common *common, pcre_uchar *cc)
585 {
586 SLJIT_UNUSED_ARG(common);
587 switch(*cc)
588 {
589 case OP_SOD:
590 case OP_SOM:
591 case OP_SET_SOM:
592 case OP_NOT_WORD_BOUNDARY:
593 case OP_WORD_BOUNDARY:
594 case OP_NOT_DIGIT:
595 case OP_DIGIT:
596 case OP_NOT_WHITESPACE:
597 case OP_WHITESPACE:
598 case OP_NOT_WORDCHAR:
599 case OP_WORDCHAR:
600 case OP_ANY:
601 case OP_ALLANY:
602 case OP_NOTPROP:
603 case OP_PROP:
604 case OP_ANYNL:
605 case OP_NOT_HSPACE:
606 case OP_HSPACE:
607 case OP_NOT_VSPACE:
608 case OP_VSPACE:
609 case OP_EXTUNI:
610 case OP_EODN:
611 case OP_EOD:
612 case OP_CIRC:
613 case OP_CIRCM:
614 case OP_DOLL:
615 case OP_DOLLM:
616 case OP_CRSTAR:
617 case OP_CRMINSTAR:
618 case OP_CRPLUS:
619 case OP_CRMINPLUS:
620 case OP_CRQUERY:
621 case OP_CRMINQUERY:
622 case OP_CRRANGE:
623 case OP_CRMINRANGE:
624 case OP_CRPOSSTAR:
625 case OP_CRPOSPLUS:
626 case OP_CRPOSQUERY:
627 case OP_CRPOSRANGE:
628 case OP_CLASS:
629 case OP_NCLASS:
630 case OP_REF:
631 case OP_REFI:
632 case OP_DNREF:
633 case OP_DNREFI:
634 case OP_RECURSE:
635 case OP_CALLOUT:
636 case OP_ALT:
637 case OP_KET:
638 case OP_KETRMAX:
639 case OP_KETRMIN:
640 case OP_KETRPOS:
641 case OP_REVERSE:
642 case OP_ASSERT:
643 case OP_ASSERT_NOT:
644 case OP_ASSERTBACK:
645 case OP_ASSERTBACK_NOT:
646 case OP_ONCE:
647 case OP_ONCE_NC:
648 case OP_BRA:
649 case OP_BRAPOS:
650 case OP_CBRA:
651 case OP_CBRAPOS:
652 case OP_COND:
653 case OP_SBRA:
654 case OP_SBRAPOS:
655 case OP_SCBRA:
656 case OP_SCBRAPOS:
657 case OP_SCOND:
658 case OP_CREF:
659 case OP_DNCREF:
660 case OP_RREF:
661 case OP_DNRREF:
662 case OP_DEF:
663 case OP_BRAZERO:
664 case OP_BRAMINZERO:
665 case OP_BRAPOSZERO:
666 case OP_PRUNE:
667 case OP_SKIP:
668 case OP_THEN:
669 case OP_COMMIT:
670 case OP_FAIL:
671 case OP_ACCEPT:
672 case OP_ASSERT_ACCEPT:
673 case OP_CLOSE:
674 case OP_SKIPZERO:
675 return cc + PRIV(OP_lengths)[*cc];
676
677 case OP_CHAR:
678 case OP_CHARI:
679 case OP_NOT:
680 case OP_NOTI:
681 case OP_STAR:
682 case OP_MINSTAR:
683 case OP_PLUS:
684 case OP_MINPLUS:
685 case OP_QUERY:
686 case OP_MINQUERY:
687 case OP_UPTO:
688 case OP_MINUPTO:
689 case OP_EXACT:
690 case OP_POSSTAR:
691 case OP_POSPLUS:
692 case OP_POSQUERY:
693 case OP_POSUPTO:
694 case OP_STARI:
695 case OP_MINSTARI:
696 case OP_PLUSI:
697 case OP_MINPLUSI:
698 case OP_QUERYI:
699 case OP_MINQUERYI:
700 case OP_UPTOI:
701 case OP_MINUPTOI:
702 case OP_EXACTI:
703 case OP_POSSTARI:
704 case OP_POSPLUSI:
705 case OP_POSQUERYI:
706 case OP_POSUPTOI:
707 case OP_NOTSTAR:
708 case OP_NOTMINSTAR:
709 case OP_NOTPLUS:
710 case OP_NOTMINPLUS:
711 case OP_NOTQUERY:
712 case OP_NOTMINQUERY:
713 case OP_NOTUPTO:
714 case OP_NOTMINUPTO:
715 case OP_NOTEXACT:
716 case OP_NOTPOSSTAR:
717 case OP_NOTPOSPLUS:
718 case OP_NOTPOSQUERY:
719 case OP_NOTPOSUPTO:
720 case OP_NOTSTARI:
721 case OP_NOTMINSTARI:
722 case OP_NOTPLUSI:
723 case OP_NOTMINPLUSI:
724 case OP_NOTQUERYI:
725 case OP_NOTMINQUERYI:
726 case OP_NOTUPTOI:
727 case OP_NOTMINUPTOI:
728 case OP_NOTEXACTI:
729 case OP_NOTPOSSTARI:
730 case OP_NOTPOSPLUSI:
731 case OP_NOTPOSQUERYI:
732 case OP_NOTPOSUPTOI:
733 cc += PRIV(OP_lengths)[*cc];
734 #ifdef SUPPORT_UTF
735 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
736 #endif
737 return cc;
738
739 /* Special cases. */
740 case OP_TYPESTAR:
741 case OP_TYPEMINSTAR:
742 case OP_TYPEPLUS:
743 case OP_TYPEMINPLUS:
744 case OP_TYPEQUERY:
745 case OP_TYPEMINQUERY:
746 case OP_TYPEUPTO:
747 case OP_TYPEMINUPTO:
748 case OP_TYPEEXACT:
749 case OP_TYPEPOSSTAR:
750 case OP_TYPEPOSPLUS:
751 case OP_TYPEPOSQUERY:
752 case OP_TYPEPOSUPTO:
753 return cc + PRIV(OP_lengths)[*cc] - 1;
754
755 case OP_ANYBYTE:
756 #ifdef SUPPORT_UTF
757 if (common->utf) return NULL;
758 #endif
759 return cc + 1;
760
761 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
762 case OP_XCLASS:
763 return cc + GET(cc, 1);
764 #endif
765
766 case OP_MARK:
767 case OP_PRUNE_ARG:
768 case OP_SKIP_ARG:
769 case OP_THEN_ARG:
770 return cc + 1 + 2 + cc[1];
771
772 default:
773 /* All opcodes are supported now! */
774 SLJIT_ASSERT_STOP();
775 return NULL;
776 }
777 }
778
779 static BOOL check_opcode_types(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
780 {
781 int count;
782 pcre_uchar *slot;
783
784 /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
785 while (cc < ccend)
786 {
787 switch(*cc)
788 {
789 case OP_SET_SOM:
790 common->has_set_som = TRUE;
791 common->might_be_empty = TRUE;
792 cc += 1;
793 break;
794
795 case OP_REF:
796 case OP_REFI:
797 common->optimized_cbracket[GET2(cc, 1)] = 0;
798 cc += 1 + IMM2_SIZE;
799 break;
800
801 case OP_CBRAPOS:
802 case OP_SCBRAPOS:
803 common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] = 0;
804 cc += 1 + LINK_SIZE + IMM2_SIZE;
805 break;
806
807 case OP_COND:
808 case OP_SCOND:
809 /* Only AUTO_CALLOUT can insert this opcode. We do
810 not intend to support this case. */
811 if (cc[1 + LINK_SIZE] == OP_CALLOUT)
812 return FALSE;
813 cc += 1 + LINK_SIZE;
814 break;
815
816 case OP_CREF:
817 common->optimized_cbracket[GET2(cc, 1)] = 0;
818 cc += 1 + IMM2_SIZE;
819 break;
820
821 case OP_DNREF:
822 case OP_DNREFI:
823 case OP_DNCREF:
824 count = GET2(cc, 1 + IMM2_SIZE);
825 slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
826 while (count-- > 0)
827 {
828 common->optimized_cbracket[GET2(slot, 0)] = 0;
829 slot += common->name_entry_size;
830 }
831 cc += 1 + 2 * IMM2_SIZE;
832 break;
833
834 case OP_RECURSE:
835 /* Set its value only once. */
836 if (common->recursive_head_ptr == 0)
837 {
838 common->recursive_head_ptr = common->ovector_start;
839 common->ovector_start += sizeof(sljit_sw);
840 }
841 cc += 1 + LINK_SIZE;
842 break;
843
844 case OP_CALLOUT:
845 if (common->capture_last_ptr == 0)
846 {
847 common->capture_last_ptr = common->ovector_start;
848 common->ovector_start += sizeof(sljit_sw);
849 }
850 cc += 2 + 2 * LINK_SIZE;
851 break;
852
853 case OP_THEN_ARG:
854 common->has_then = TRUE;
855 common->control_head_ptr = 1;
856 /* Fall through. */
857
858 case OP_PRUNE_ARG:
859 common->needs_start_ptr = TRUE;
860 /* Fall through. */
861
862 case OP_MARK:
863 if (common->mark_ptr == 0)
864 {
865 common->mark_ptr = common->ovector_start;
866 common->ovector_start += sizeof(sljit_sw);
867 }
868 cc += 1 + 2 + cc[1];
869 break;
870
871 case OP_THEN:
872 common->has_then = TRUE;
873 common->control_head_ptr = 1;
874 /* Fall through. */
875
876 case OP_PRUNE:
877 case OP_SKIP:
878 common->needs_start_ptr = TRUE;
879 cc += 1;
880 break;
881
882 case OP_SKIP_ARG:
883 common->control_head_ptr = 1;
884 common->has_skip_arg = TRUE;
885 cc += 1 + 2 + cc[1];
886 break;
887
888 default:
889 cc = next_opcode(common, cc);
890 if (cc == NULL)
891 return FALSE;
892 break;
893 }
894 }
895 return TRUE;
896 }
897
898 static int get_class_iterator_size(pcre_uchar *cc)
899 {
900 switch(*cc)
901 {
902 case OP_CRSTAR:
903 case OP_CRPLUS:
904 return 2;
905
906 case OP_CRMINSTAR:
907 case OP_CRMINPLUS:
908 case OP_CRQUERY:
909 case OP_CRMINQUERY:
910 return 1;
911
912 case OP_CRRANGE:
913 case OP_CRMINRANGE:
914 if (GET2(cc, 1) == GET2(cc, 1 + IMM2_SIZE))
915 return 0;
916 return 2;
917
918 default:
919 return 0;
920 }
921 }
922
923 static BOOL detect_repeat(compiler_common *common, pcre_uchar *begin)
924 {
925 pcre_uchar *end = bracketend(begin);
926 pcre_uchar *next;
927 pcre_uchar *next_end;
928 pcre_uchar *max_end;
929 pcre_uchar type;
930 sljit_sw length = end - begin;
931 int min, max, i;
932
933 /* Detect fixed iterations first. */
934 if (end[-(1 + LINK_SIZE)] != OP_KET)
935 return FALSE;
936
937 /* Already detected repeat. */
938 if (common->private_data_ptrs[end - common->start - LINK_SIZE] != 0)
939 return TRUE;
940
941 next = end;
942 min = 1;
943 while (1)
944 {
945 if (*next != *begin)
946 break;
947 next_end = bracketend(next);
948 if (next_end - next != length || memcmp(begin, next, IN_UCHARS(length)) != 0)
949 break;
950 next = next_end;
951 min++;
952 }
953
954 if (min == 2)
955 return FALSE;
956
957 max = 0;
958 max_end = next;
959 if (*next == OP_BRAZERO || *next == OP_BRAMINZERO)
960 {
961 type = *next;
962 while (1)
963 {
964 if (next[0] != type || next[1] != OP_BRA || next[2 + LINK_SIZE] != *begin)
965 break;
966 next_end = bracketend(next + 2 + LINK_SIZE);
967 if (next_end - next != (length + 2 + LINK_SIZE) || memcmp(begin, next + 2 + LINK_SIZE, IN_UCHARS(length)) != 0)
968 break;
969 next = next_end;
970 max++;
971 }
972
973 if (next[0] == type && next[1] == *begin && max >= 1)
974 {
975 next_end = bracketend(next + 1);
976 if (next_end - next == (length + 1) && memcmp(begin, next + 1, IN_UCHARS(length)) == 0)
977 {
978 for (i = 0; i < max; i++, next_end += 1 + LINK_SIZE)
979 if (*next_end != OP_KET)
980 break;
981
982 if (i == max)
983 {
984 common->private_data_ptrs[max_end - common->start - LINK_SIZE] = next_end - max_end;
985 common->private_data_ptrs[max_end - common->start - LINK_SIZE + 1] = (type == OP_BRAZERO) ? OP_UPTO : OP_MINUPTO;
986 /* +2 the original and the last. */
987 common->private_data_ptrs[max_end - common->start - LINK_SIZE + 2] = max + 2;
988 if (min == 1)
989 return TRUE;
990 min--;
991 max_end -= (1 + LINK_SIZE) + GET(max_end, -LINK_SIZE);
992 }
993 }
994 }
995 }
996
997 if (min >= 3)
998 {
999 common->private_data_ptrs[end - common->start - LINK_SIZE] = max_end - end;
1000 common->private_data_ptrs[end - common->start - LINK_SIZE + 1] = OP_EXACT;
1001 common->private_data_ptrs[end - common->start - LINK_SIZE + 2] = min;
1002 return TRUE;
1003 }
1004
1005 return FALSE;
1006 }
1007
1008 #define CASE_ITERATOR_PRIVATE_DATA_1 \
1009 case OP_MINSTAR: \
1010 case OP_MINPLUS: \
1011 case OP_QUERY: \
1012 case OP_MINQUERY: \
1013 case OP_MINSTARI: \
1014 case OP_MINPLUSI: \
1015 case OP_QUERYI: \
1016 case OP_MINQUERYI: \
1017 case OP_NOTMINSTAR: \
1018 case OP_NOTMINPLUS: \
1019 case OP_NOTQUERY: \
1020 case OP_NOTMINQUERY: \
1021 case OP_NOTMINSTARI: \
1022 case OP_NOTMINPLUSI: \
1023 case OP_NOTQUERYI: \
1024 case OP_NOTMINQUERYI:
1025
1026 #define CASE_ITERATOR_PRIVATE_DATA_2A \
1027 case OP_STAR: \
1028 case OP_PLUS: \
1029 case OP_STARI: \
1030 case OP_PLUSI: \
1031 case OP_NOTSTAR: \
1032 case OP_NOTPLUS: \
1033 case OP_NOTSTARI: \
1034 case OP_NOTPLUSI:
1035
1036 #define CASE_ITERATOR_PRIVATE_DATA_2B \
1037 case OP_UPTO: \
1038 case OP_MINUPTO: \
1039 case OP_UPTOI: \
1040 case OP_MINUPTOI: \
1041 case OP_NOTUPTO: \
1042 case OP_NOTMINUPTO: \
1043 case OP_NOTUPTOI: \
1044 case OP_NOTMINUPTOI:
1045
1046 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_1 \
1047 case OP_TYPEMINSTAR: \
1048 case OP_TYPEMINPLUS: \
1049 case OP_TYPEQUERY: \
1050 case OP_TYPEMINQUERY:
1051
1052 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2A \
1053 case OP_TYPESTAR: \
1054 case OP_TYPEPLUS:
1055
1056 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2B \
1057 case OP_TYPEUPTO: \
1058 case OP_TYPEMINUPTO:
1059
1060 static void set_private_data_ptrs(compiler_common *common, int *private_data_start, pcre_uchar *ccend)
1061 {
1062 pcre_uchar *cc = common->start;
1063 pcre_uchar *alternative;
1064 pcre_uchar *end = NULL;
1065 int private_data_ptr = *private_data_start;
1066 int space, size, bracketlen;
1067 BOOL repeat_check = TRUE;
1068
1069 while (cc < ccend)
1070 {
1071 space = 0;
1072 size = 0;
1073 bracketlen = 0;
1074 if (private_data_ptr > SLJIT_MAX_LOCAL_SIZE)
1075 break;
1076
1077 if (repeat_check && (*cc == OP_ONCE || *cc == OP_ONCE_NC || *cc == OP_BRA || *cc == OP_CBRA || *cc == OP_COND))
1078 {
1079 if (detect_repeat(common, cc))
1080 {
1081 /* These brackets are converted to repeats, so no global
1082 based single character repeat is allowed. */
1083 if (cc >= end)
1084 end = bracketend(cc);
1085 }
1086 }
1087 repeat_check = TRUE;
1088
1089 switch(*cc)
1090 {
1091 case OP_KET:
1092 if (common->private_data_ptrs[cc + 1 - common->start] != 0)
1093 {
1094 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1095 private_data_ptr += sizeof(sljit_sw);
1096 cc += common->private_data_ptrs[cc + 1 - common->start];
1097 }
1098 cc += 1 + LINK_SIZE;
1099 break;
1100
1101 case OP_ASSERT:
1102 case OP_ASSERT_NOT:
1103 case OP_ASSERTBACK:
1104 case OP_ASSERTBACK_NOT:
1105 case OP_ONCE:
1106 case OP_ONCE_NC:
1107 case OP_BRAPOS:
1108 case OP_SBRA:
1109 case OP_SBRAPOS:
1110 case OP_SCOND:
1111 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1112 private_data_ptr += sizeof(sljit_sw);
1113 bracketlen = 1 + LINK_SIZE;
1114 break;
1115
1116 case OP_CBRAPOS:
1117 case OP_SCBRAPOS:
1118 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1119 private_data_ptr += sizeof(sljit_sw);
1120 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1121 break;
1122
1123 case OP_COND:
1124 /* Might be a hidden SCOND. */
1125 alternative = cc + GET(cc, 1);
1126 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1127 {
1128 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1129 private_data_ptr += sizeof(sljit_sw);
1130 }
1131 bracketlen = 1 + LINK_SIZE;
1132 break;
1133
1134 case OP_BRA:
1135 bracketlen = 1 + LINK_SIZE;
1136 break;
1137
1138 case OP_CBRA:
1139 case OP_SCBRA:
1140 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1141 break;
1142
1143 case OP_BRAZERO:
1144 case OP_BRAMINZERO:
1145 case OP_BRAPOSZERO:
1146 repeat_check = FALSE;
1147 size = 1;
1148 break;
1149
1150 CASE_ITERATOR_PRIVATE_DATA_1
1151 space = 1;
1152 size = -2;
1153 break;
1154
1155 CASE_ITERATOR_PRIVATE_DATA_2A
1156 space = 2;
1157 size = -2;
1158 break;
1159
1160 CASE_ITERATOR_PRIVATE_DATA_2B
1161 space = 2;
1162 size = -(2 + IMM2_SIZE);
1163 break;
1164
1165 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1166 space = 1;
1167 size = 1;
1168 break;
1169
1170 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1171 if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
1172 space = 2;
1173 size = 1;
1174 break;
1175
1176 case OP_TYPEUPTO:
1177 if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
1178 space = 2;
1179 size = 1 + IMM2_SIZE;
1180 break;
1181
1182 case OP_TYPEMINUPTO:
1183 space = 2;
1184 size = 1 + IMM2_SIZE;
1185 break;
1186
1187 case OP_CLASS:
1188 case OP_NCLASS:
1189 size += 1 + 32 / sizeof(pcre_uchar);
1190 space = get_class_iterator_size(cc + size);
1191 break;
1192
1193 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1194 case OP_XCLASS:
1195 size = GET(cc, 1);
1196 space = get_class_iterator_size(cc + size);
1197 break;
1198 #endif
1199
1200 default:
1201 cc = next_opcode(common, cc);
1202 SLJIT_ASSERT(cc != NULL);
1203 break;
1204 }
1205
1206 /* Character iterators, which are not inside a repeated bracket,
1207 gets a private slot instead of allocating it on the stack. */
1208 if (space > 0 && cc >= end)
1209 {
1210 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1211 private_data_ptr += sizeof(sljit_sw) * space;
1212 }
1213
1214 if (size != 0)
1215 {
1216 if (size < 0)
1217 {
1218 cc += -size;
1219 #ifdef SUPPORT_UTF
1220 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1221 #endif
1222 }
1223 else
1224 cc += size;
1225 }
1226
1227 if (bracketlen > 0)
1228 {
1229 if (cc >= end)
1230 {
1231 end = bracketend(cc);
1232 if (end[-1 - LINK_SIZE] == OP_KET)
1233 end = NULL;
1234 }
1235 cc += bracketlen;
1236 }
1237 }
1238 *private_data_start = private_data_ptr;
1239 }
1240
1241 /* Returns with a frame_types (always < 0) if no need for frame. */
1242 static int get_framesize(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL recursive, BOOL *needs_control_head)
1243 {
1244 int length = 0;
1245 int possessive = 0;
1246 BOOL stack_restore = FALSE;
1247 BOOL setsom_found = recursive;
1248 BOOL setmark_found = recursive;
1249 /* The last capture is a local variable even for recursions. */
1250 BOOL capture_last_found = FALSE;
1251
1252 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
1253 SLJIT_ASSERT(common->control_head_ptr != 0);
1254 *needs_control_head = TRUE;
1255 #else
1256 *needs_control_head = FALSE;
1257 #endif
1258
1259 if (ccend == NULL)
1260 {
1261 ccend = bracketend(cc) - (1 + LINK_SIZE);
1262 if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))
1263 {
1264 possessive = length = (common->capture_last_ptr != 0) ? 5 : 3;
1265 /* This is correct regardless of common->capture_last_ptr. */
1266 capture_last_found = TRUE;
1267 }
1268 cc = next_opcode(common, cc);
1269 }
1270
1271 SLJIT_ASSERT(cc != NULL);
1272 while (cc < ccend)
1273 switch(*cc)
1274 {
1275 case OP_SET_SOM:
1276 SLJIT_ASSERT(common->has_set_som);
1277 stack_restore = TRUE;
1278 if (!setsom_found)
1279 {
1280 length += 2;
1281 setsom_found = TRUE;
1282 }
1283 cc += 1;
1284 break;
1285
1286 case OP_MARK:
1287 case OP_PRUNE_ARG:
1288 case OP_THEN_ARG:
1289 SLJIT_ASSERT(common->mark_ptr != 0);
1290 stack_restore = TRUE;
1291 if (!setmark_found)
1292 {
1293 length += 2;
1294 setmark_found = TRUE;
1295 }
1296 if (common->control_head_ptr != 0)
1297 *needs_control_head = TRUE;
1298 cc += 1 + 2 + cc[1];
1299 break;
1300
1301 case OP_RECURSE:
1302 stack_restore = TRUE;
1303 if (common->has_set_som && !setsom_found)
1304 {
1305 length += 2;
1306 setsom_found = TRUE;
1307 }
1308 if (common->mark_ptr != 0 && !setmark_found)
1309 {
1310 length += 2;
1311 setmark_found = TRUE;
1312 }
1313 if (common->capture_last_ptr != 0 && !capture_last_found)
1314 {
1315 length += 2;
1316 capture_last_found = TRUE;
1317 }
1318 cc += 1 + LINK_SIZE;
1319 break;
1320
1321 case OP_CBRA:
1322 case OP_CBRAPOS:
1323 case OP_SCBRA:
1324 case OP_SCBRAPOS:
1325 stack_restore = TRUE;
1326 if (common->capture_last_ptr != 0 && !capture_last_found)
1327 {
1328 length += 2;
1329 capture_last_found = TRUE;
1330 }
1331 length += 3;
1332 cc += 1 + LINK_SIZE + IMM2_SIZE;
1333 break;
1334
1335 case OP_THEN:
1336 stack_restore = TRUE;
1337 if (common->control_head_ptr != 0)
1338 *needs_control_head = TRUE;
1339 cc ++;
1340 break;
1341
1342 default:
1343 stack_restore = TRUE;
1344 /* Fall through. */
1345
1346 case OP_NOT_WORD_BOUNDARY:
1347 case OP_WORD_BOUNDARY:
1348 case OP_NOT_DIGIT:
1349 case OP_DIGIT:
1350 case OP_NOT_WHITESPACE:
1351 case OP_WHITESPACE:
1352 case OP_NOT_WORDCHAR:
1353 case OP_WORDCHAR:
1354 case OP_ANY:
1355 case OP_ALLANY:
1356 case OP_ANYBYTE:
1357 case OP_NOTPROP:
1358 case OP_PROP:
1359 case OP_ANYNL:
1360 case OP_NOT_HSPACE:
1361 case OP_HSPACE:
1362 case OP_NOT_VSPACE:
1363 case OP_VSPACE:
1364 case OP_EXTUNI:
1365 case OP_EODN:
1366 case OP_EOD:
1367 case OP_CIRC:
1368 case OP_CIRCM:
1369 case OP_DOLL:
1370 case OP_DOLLM:
1371 case OP_CHAR:
1372 case OP_CHARI:
1373 case OP_NOT:
1374 case OP_NOTI:
1375
1376 case OP_EXACT:
1377 case OP_POSSTAR:
1378 case OP_POSPLUS:
1379 case OP_POSQUERY:
1380 case OP_POSUPTO:
1381
1382 case OP_EXACTI:
1383 case OP_POSSTARI:
1384 case OP_POSPLUSI:
1385 case OP_POSQUERYI:
1386 case OP_POSUPTOI:
1387
1388 case OP_NOTEXACT:
1389 case OP_NOTPOSSTAR:
1390 case OP_NOTPOSPLUS:
1391 case OP_NOTPOSQUERY:
1392 case OP_NOTPOSUPTO:
1393
1394 case OP_NOTEXACTI:
1395 case OP_NOTPOSSTARI:
1396 case OP_NOTPOSPLUSI:
1397 case OP_NOTPOSQUERYI:
1398 case OP_NOTPOSUPTOI:
1399
1400 case OP_TYPEEXACT:
1401 case OP_TYPEPOSSTAR:
1402 case OP_TYPEPOSPLUS:
1403 case OP_TYPEPOSQUERY:
1404 case OP_TYPEPOSUPTO:
1405
1406 case OP_CLASS:
1407 case OP_NCLASS:
1408 case OP_XCLASS:
1409
1410 cc = next_opcode(common, cc);
1411 SLJIT_ASSERT(cc != NULL);
1412 break;
1413 }
1414
1415 /* Possessive quantifiers can use a special case. */
1416 if (SLJIT_UNLIKELY(possessive == length))
1417 return stack_restore ? no_frame : no_stack;
1418
1419 if (length > 0)
1420 return length + 1;
1421 return stack_restore ? no_frame : no_stack;
1422 }
1423
1424 static void init_frame(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, int stackpos, int stacktop, BOOL recursive)
1425 {
1426 DEFINE_COMPILER;
1427 BOOL setsom_found = recursive;
1428 BOOL setmark_found = recursive;
1429 /* The last capture is a local variable even for recursions. */
1430 BOOL capture_last_found = FALSE;
1431 int offset;
1432
1433 /* >= 1 + shortest item size (2) */
1434 SLJIT_UNUSED_ARG(stacktop);
1435 SLJIT_ASSERT(stackpos >= stacktop + 2);
1436
1437 stackpos = STACK(stackpos);
1438 if (ccend == NULL)
1439 {
1440 ccend = bracketend(cc) - (1 + LINK_SIZE);
1441 if (recursive || (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS))
1442 cc = next_opcode(common, cc);
1443 }
1444
1445 SLJIT_ASSERT(cc != NULL);
1446 while (cc < ccend)
1447 switch(*cc)
1448 {
1449 case OP_SET_SOM:
1450 SLJIT_ASSERT(common->has_set_som);
1451 if (!setsom_found)
1452 {
1453 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
1454 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1455 stackpos += (int)sizeof(sljit_sw);
1456 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1457 stackpos += (int)sizeof(sljit_sw);
1458 setsom_found = TRUE;
1459 }
1460 cc += 1;
1461 break;
1462
1463 case OP_MARK:
1464 case OP_PRUNE_ARG:
1465 case OP_THEN_ARG:
1466 SLJIT_ASSERT(common->mark_ptr != 0);
1467 if (!setmark_found)
1468 {
1469 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
1470 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1471 stackpos += (int)sizeof(sljit_sw);
1472 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1473 stackpos += (int)sizeof(sljit_sw);
1474 setmark_found = TRUE;
1475 }
1476 cc += 1 + 2 + cc[1];
1477 break;
1478
1479 case OP_RECURSE:
1480 if (common->has_set_som && !setsom_found)
1481 {
1482 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
1483 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1484 stackpos += (int)sizeof(sljit_sw);
1485 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1486 stackpos += (int)sizeof(sljit_sw);
1487 setsom_found = TRUE;
1488 }
1489 if (common->mark_ptr != 0 && !setmark_found)
1490 {
1491 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
1492 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1493 stackpos += (int)sizeof(sljit_sw);
1494 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1495 stackpos += (int)sizeof(sljit_sw);
1496 setmark_found = TRUE;
1497 }
1498 if (common->capture_last_ptr != 0 && !capture_last_found)
1499 {
1500 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
1501 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1502 stackpos += (int)sizeof(sljit_sw);
1503 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1504 stackpos += (int)sizeof(sljit_sw);
1505 capture_last_found = TRUE;
1506 }
1507 cc += 1 + LINK_SIZE;
1508 break;
1509
1510 case OP_CBRA:
1511 case OP_CBRAPOS:
1512 case OP_SCBRA:
1513 case OP_SCBRAPOS:
1514 if (common->capture_last_ptr != 0 && !capture_last_found)
1515 {
1516 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
1517 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1518 stackpos += (int)sizeof(sljit_sw);
1519 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1520 stackpos += (int)sizeof(sljit_sw);
1521 capture_last_found = TRUE;
1522 }
1523 offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
1524 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));
1525 stackpos += (int)sizeof(sljit_sw);
1526 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
1527 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
1528 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1529 stackpos += (int)sizeof(sljit_sw);
1530 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
1531 stackpos += (int)sizeof(sljit_sw);
1532
1533 cc += 1 + LINK_SIZE + IMM2_SIZE;
1534 break;
1535
1536 default:
1537 cc = next_opcode(common, cc);
1538 SLJIT_ASSERT(cc != NULL);
1539 break;
1540 }
1541
1542 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, 0);
1543 SLJIT_ASSERT(stackpos == STACK(stacktop));
1544 }
1545
1546 static SLJIT_INLINE int get_private_data_copy_length(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL needs_control_head)
1547 {
1548 int private_data_length = needs_control_head ? 3 : 2;
1549 int size;
1550 pcre_uchar *alternative;
1551 /* Calculate the sum of the private machine words. */
1552 while (cc < ccend)
1553 {
1554 size = 0;
1555 switch(*cc)
1556 {
1557 case OP_KET:
1558 if (PRIVATE_DATA(cc) != 0)
1559 {
1560 private_data_length++;
1561 SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0);
1562 cc += PRIVATE_DATA(cc + 1);
1563 }
1564 cc += 1 + LINK_SIZE;
1565 break;
1566
1567 case OP_ASSERT:
1568 case OP_ASSERT_NOT:
1569 case OP_ASSERTBACK:
1570 case OP_ASSERTBACK_NOT:
1571 case OP_ONCE:
1572 case OP_ONCE_NC:
1573 case OP_BRAPOS:
1574 case OP_SBRA:
1575 case OP_SBRAPOS:
1576 case OP_SCOND:
1577 private_data_length++;
1578 SLJIT_ASSERT(PRIVATE_DATA(cc) != 0);
1579 cc += 1 + LINK_SIZE;
1580 break;
1581
1582 case OP_CBRA:
1583 case OP_SCBRA:
1584 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1585 private_data_length++;
1586 cc += 1 + LINK_SIZE + IMM2_SIZE;
1587 break;
1588
1589 case OP_CBRAPOS:
1590 case OP_SCBRAPOS:
1591 private_data_length += 2;
1592 cc += 1 + LINK_SIZE + IMM2_SIZE;
1593 break;
1594
1595 case OP_COND:
1596 /* Might be a hidden SCOND. */
1597 alternative = cc + GET(cc, 1);
1598 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1599 private_data_length++;
1600 cc += 1 + LINK_SIZE;
1601 break;
1602
1603 CASE_ITERATOR_PRIVATE_DATA_1
1604 if (PRIVATE_DATA(cc))
1605 private_data_length++;
1606 cc += 2;
1607 #ifdef SUPPORT_UTF
1608 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1609 #endif
1610 break;
1611
1612 CASE_ITERATOR_PRIVATE_DATA_2A
1613 if (PRIVATE_DATA(cc))
1614 private_data_length += 2;
1615 cc += 2;
1616 #ifdef SUPPORT_UTF
1617 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1618 #endif
1619 break;
1620
1621 CASE_ITERATOR_PRIVATE_DATA_2B
1622 if (PRIVATE_DATA(cc))
1623 private_data_length += 2;
1624 cc += 2 + IMM2_SIZE;
1625 #ifdef SUPPORT_UTF
1626 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1627 #endif
1628 break;
1629
1630 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1631 if (PRIVATE_DATA(cc))
1632 private_data_length++;
1633 cc += 1;
1634 break;
1635
1636 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1637 if (PRIVATE_DATA(cc))
1638 private_data_length += 2;
1639 cc += 1;
1640 break;
1641
1642 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1643 if (PRIVATE_DATA(cc))
1644 private_data_length += 2;
1645 cc += 1 + IMM2_SIZE;
1646 break;
1647
1648 case OP_CLASS:
1649 case OP_NCLASS:
1650 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1651 case OP_XCLASS:
1652 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1653 #else
1654 size = 1 + 32 / (int)sizeof(pcre_uchar);
1655 #endif
1656 if (PRIVATE_DATA(cc))
1657 private_data_length += get_class_iterator_size(cc + size);
1658 cc += size;
1659 break;
1660
1661 default:
1662 cc = next_opcode(common, cc);
1663 SLJIT_ASSERT(cc != NULL);
1664 break;
1665 }
1666 }
1667 SLJIT_ASSERT(cc == ccend);
1668 return private_data_length;
1669 }
1670
1671 static void copy_private_data(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend,
1672 BOOL save, int stackptr, int stacktop, BOOL needs_control_head)
1673 {
1674 DEFINE_COMPILER;
1675 int srcw[2];
1676 int count, size;
1677 BOOL tmp1next = TRUE;
1678 BOOL tmp1empty = TRUE;
1679 BOOL tmp2empty = TRUE;
1680 pcre_uchar *alternative;
1681 enum {
1682 start,
1683 loop,
1684 end
1685 } status;
1686
1687 status = save ? start : loop;
1688 stackptr = STACK(stackptr - 2);
1689 stacktop = STACK(stacktop - 1);
1690
1691 if (!save)
1692 {
1693 stackptr += (needs_control_head ? 2 : 1) * sizeof(sljit_sw);
1694 if (stackptr < stacktop)
1695 {
1696 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1697 stackptr += sizeof(sljit_sw);
1698 tmp1empty = FALSE;
1699 }
1700 if (stackptr < stacktop)
1701 {
1702 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1703 stackptr += sizeof(sljit_sw);
1704 tmp2empty = FALSE;
1705 }
1706 /* The tmp1next must be TRUE in either way. */
1707 }
1708
1709 do
1710 {
1711 count = 0;
1712 switch(status)
1713 {
1714 case start:
1715 SLJIT_ASSERT(save && common->recursive_head_ptr != 0);
1716 count = 1;
1717 srcw[0] = common->recursive_head_ptr;
1718 if (needs_control_head)
1719 {
1720 SLJIT_ASSERT(common->control_head_ptr != 0);
1721 count = 2;
1722 srcw[1] = common->control_head_ptr;
1723 }
1724 status = loop;
1725 break;
1726
1727 case loop:
1728 if (cc >= ccend)
1729 {
1730 status = end;
1731 break;
1732 }
1733
1734 switch(*cc)
1735 {
1736 case OP_KET:
1737 if (PRIVATE_DATA(cc) != 0)
1738 {
1739 count = 1;
1740 srcw[0] = PRIVATE_DATA(cc);
1741 SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0);
1742 cc += PRIVATE_DATA(cc + 1);
1743 }
1744 cc += 1 + LINK_SIZE;
1745 break;
1746
1747 case OP_ASSERT:
1748 case OP_ASSERT_NOT:
1749 case OP_ASSERTBACK:
1750 case OP_ASSERTBACK_NOT:
1751 case OP_ONCE:
1752 case OP_ONCE_NC:
1753 case OP_BRAPOS:
1754 case OP_SBRA:
1755 case OP_SBRAPOS:
1756 case OP_SCOND:
1757 count = 1;
1758 srcw[0] = PRIVATE_DATA(cc);
1759 SLJIT_ASSERT(srcw[0] != 0);
1760 cc += 1 + LINK_SIZE;
1761 break;
1762
1763 case OP_CBRA:
1764 case OP_SCBRA:
1765 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1766 {
1767 count = 1;
1768 srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1769 }
1770 cc += 1 + LINK_SIZE + IMM2_SIZE;
1771 break;
1772
1773 case OP_CBRAPOS:
1774 case OP_SCBRAPOS:
1775 count = 2;
1776 srcw[0] = PRIVATE_DATA(cc);
1777 srcw[1] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1778 SLJIT_ASSERT(srcw[0] != 0 && srcw[1] != 0);
1779 cc += 1 + LINK_SIZE + IMM2_SIZE;
1780 break;
1781
1782 case OP_COND:
1783 /* Might be a hidden SCOND. */
1784 alternative = cc + GET(cc, 1);
1785 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1786 {
1787 count = 1;
1788 srcw[0] = PRIVATE_DATA(cc);
1789 SLJIT_ASSERT(srcw[0] != 0);
1790 }
1791 cc += 1 + LINK_SIZE;
1792 break;
1793
1794 CASE_ITERATOR_PRIVATE_DATA_1
1795 if (PRIVATE_DATA(cc))
1796 {
1797 count = 1;
1798 srcw[0] = PRIVATE_DATA(cc);
1799 }
1800 cc += 2;
1801 #ifdef SUPPORT_UTF
1802 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1803 #endif
1804 break;
1805
1806 CASE_ITERATOR_PRIVATE_DATA_2A
1807 if (PRIVATE_DATA(cc))
1808 {
1809 count = 2;
1810 srcw[0] = PRIVATE_DATA(cc);
1811 srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
1812 }
1813 cc += 2;
1814 #ifdef SUPPORT_UTF
1815 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1816 #endif
1817 break;
1818
1819 CASE_ITERATOR_PRIVATE_DATA_2B
1820 if (PRIVATE_DATA(cc))
1821 {
1822 count = 2;
1823 srcw[0] = PRIVATE_DATA(cc);
1824 srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
1825 }
1826 cc += 2 + IMM2_SIZE;
1827 #ifdef SUPPORT_UTF
1828 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1829 #endif
1830 break;
1831
1832 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1833 if (PRIVATE_DATA(cc))
1834 {
1835 count = 1;
1836 srcw[0] = PRIVATE_DATA(cc);
1837 }
1838 cc += 1;
1839 break;
1840
1841 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1842 if (PRIVATE_DATA(cc))
1843 {
1844 count = 2;
1845 srcw[0] = PRIVATE_DATA(cc);
1846 srcw[1] = srcw[0] + sizeof(sljit_sw);
1847 }
1848 cc += 1;
1849 break;
1850
1851 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1852 if (PRIVATE_DATA(cc))
1853 {
1854 count = 2;
1855 srcw[0] = PRIVATE_DATA(cc);
1856 srcw[1] = srcw[0] + sizeof(sljit_sw);
1857 }
1858 cc += 1 + IMM2_SIZE;
1859 break;
1860
1861 case OP_CLASS:
1862 case OP_NCLASS:
1863 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1864 case OP_XCLASS:
1865 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1866 #else
1867 size = 1 + 32 / (int)sizeof(pcre_uchar);
1868 #endif
1869 if (PRIVATE_DATA(cc))
1870 switch(get_class_iterator_size(cc + size))
1871 {
1872 case 1:
1873 count = 1;
1874 srcw[0] = PRIVATE_DATA(cc);
1875 break;
1876
1877 case 2:
1878 count = 2;
1879 srcw[0] = PRIVATE_DATA(cc);
1880 srcw[1] = srcw[0] + sizeof(sljit_sw);
1881 break;
1882
1883 default:
1884 SLJIT_ASSERT_STOP();
1885 break;
1886 }
1887 cc += size;
1888 break;
1889
1890 default:
1891 cc = next_opcode(common, cc);
1892 SLJIT_ASSERT(cc != NULL);
1893 break;
1894 }
1895 break;
1896
1897 case end:
1898 SLJIT_ASSERT_STOP();
1899 break;
1900 }
1901
1902 while (count > 0)
1903 {
1904 count--;
1905 if (save)
1906 {
1907 if (tmp1next)
1908 {
1909 if (!tmp1empty)
1910 {
1911 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1912 stackptr += sizeof(sljit_sw);
1913 }
1914 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), srcw[count]);
1915 tmp1empty = FALSE;
1916 tmp1next = FALSE;
1917 }
1918 else
1919 {
1920 if (!tmp2empty)
1921 {
1922 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1923 stackptr += sizeof(sljit_sw);
1924 }
1925 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), srcw[count]);
1926 tmp2empty = FALSE;
1927 tmp1next = TRUE;
1928 }
1929 }
1930 else
1931 {
1932 if (tmp1next)
1933 {
1934 SLJIT_ASSERT(!tmp1empty);
1935 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), srcw[count], TMP1, 0);
1936 tmp1empty = stackptr >= stacktop;
1937 if (!tmp1empty)
1938 {
1939 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1940 stackptr += sizeof(sljit_sw);
1941 }
1942 tmp1next = FALSE;
1943 }
1944 else
1945 {
1946 SLJIT_ASSERT(!tmp2empty);
1947 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), srcw[count], TMP2, 0);
1948 tmp2empty = stackptr >= stacktop;
1949 if (!tmp2empty)
1950 {
1951 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1952 stackptr += sizeof(sljit_sw);
1953 }
1954 tmp1next = TRUE;
1955 }
1956 }
1957 }
1958 }
1959 while (status != end);
1960
1961 if (save)
1962 {
1963 if (tmp1next)
1964 {
1965 if (!tmp1empty)
1966 {
1967 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1968 stackptr += sizeof(sljit_sw);
1969 }
1970 if (!tmp2empty)
1971 {
1972 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1973 stackptr += sizeof(sljit_sw);
1974 }
1975 }
1976 else
1977 {
1978 if (!tmp2empty)
1979 {
1980 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1981 stackptr += sizeof(sljit_sw);
1982 }
1983 if (!tmp1empty)
1984 {
1985 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1986 stackptr += sizeof(sljit_sw);
1987 }
1988 }
1989 }
1990 SLJIT_ASSERT(cc == ccend && stackptr == stacktop && (save || (tmp1empty && tmp2empty)));
1991 }
1992
1993 static SLJIT_INLINE pcre_uchar *set_then_offsets(compiler_common *common, pcre_uchar *cc, pcre_uint8 *current_offset)
1994 {
1995 pcre_uchar *end = bracketend(cc);
1996 BOOL has_alternatives = cc[GET(cc, 1)] == OP_ALT;
1997
1998 /* Assert captures then. */
1999 if (*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT)
2000 current_offset = NULL;
2001 /* Conditional block does not. */
2002 if (*cc == OP_COND || *cc == OP_SCOND)
2003 has_alternatives = FALSE;
2004
2005 cc = next_opcode(common, cc);
2006 if (has_alternatives)
2007 current_offset = common->then_offsets + (cc - common->start);
2008
2009 while (cc < end)
2010 {
2011 if ((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND))
2012 cc = set_then_offsets(common, cc, current_offset);
2013 else
2014 {
2015 if (*cc == OP_ALT && has_alternatives)
2016 current_offset = common->then_offsets + (cc + 1 + LINK_SIZE - common->start);
2017 if (*cc >= OP_THEN && *cc <= OP_THEN_ARG && current_offset != NULL)
2018 *current_offset = 1;
2019 cc = next_opcode(common, cc);
2020 }
2021 }
2022
2023 return end;
2024 }
2025
2026 #undef CASE_ITERATOR_PRIVATE_DATA_1
2027 #undef CASE_ITERATOR_PRIVATE_DATA_2A
2028 #undef CASE_ITERATOR_PRIVATE_DATA_2B
2029 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_1
2030 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
2031 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
2032
2033 static SLJIT_INLINE BOOL is_powerof2(unsigned int value)
2034 {
2035 return (value & (value - 1)) == 0;
2036 }
2037
2038 static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label)
2039 {
2040 while (list)
2041 {
2042 /* sljit_set_label is clever enough to do nothing
2043 if either the jump or the label is NULL. */
2044 SET_LABEL(list->jump, label);
2045 list = list->next;
2046 }
2047 }
2048
2049 static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump *jump)
2050 {
2051 jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list));
2052 if (list_item)
2053 {
2054 list_item->next = *list;
2055 list_item->jump = jump;
2056 *list = list_item;
2057 }
2058 }
2059
2060 static void add_stub(compiler_common *common, struct sljit_jump *start)
2061 {
2062 DEFINE_COMPILER;
2063 stub_list *list_item = sljit_alloc_memory(compiler, sizeof(stub_list));
2064
2065 if (list_item)
2066 {
2067 list_item->start = start;
2068 list_item->quit = LABEL();
2069 list_item->next = common->stubs;
2070 common->stubs = list_item;
2071 }
2072 }
2073
2074 static void flush_stubs(compiler_common *common)
2075 {
2076 DEFINE_COMPILER;
2077 stub_list *list_item = common->stubs;
2078
2079 while (list_item)
2080 {
2081 JUMPHERE(list_item->start);
2082 add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));
2083 JUMPTO(SLJIT_JUMP, list_item->quit);
2084 list_item = list_item->next;
2085 }
2086 common->stubs = NULL;
2087 }
2088
2089 static void add_label_addr(compiler_common *common, sljit_uw *update_addr)
2090 {
2091 DEFINE_COMPILER;
2092 label_addr_list *label_addr;
2093
2094 label_addr = sljit_alloc_memory(compiler, sizeof(label_addr_list));
2095 if (label_addr == NULL)
2096 return;
2097 label_addr->label = LABEL();
2098 label_addr->update_addr = update_addr;
2099 label_addr->next = common->label_addrs;
2100 common->label_addrs = label_addr;
2101 }
2102
2103 static SLJIT_INLINE void count_match(compiler_common *common)
2104 {
2105 DEFINE_COMPILER;
2106
2107 OP2(SLJIT_SUB | SLJIT_SET_E, COUNT_MATCH, 0, COUNT_MATCH, 0, SLJIT_IMM, 1);
2108 add_jump(compiler, &common->calllimit, JUMP(SLJIT_ZERO));
2109 }
2110
2111 static SLJIT_INLINE void allocate_stack(compiler_common *common, int size)
2112 {
2113 /* May destroy all locals and registers except TMP2. */
2114 DEFINE_COMPILER;
2115
2116 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
2117 #ifdef DESTROY_REGISTERS
2118 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);
2119 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
2120 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
2121 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, TMP1, 0);
2122 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP1, 0);
2123 #endif
2124 add_stub(common, CMP(SLJIT_GREATER, STACK_TOP, 0, STACK_LIMIT, 0));
2125 }
2126
2127 static SLJIT_INLINE void free_stack(compiler_common *common, int size)
2128 {
2129 DEFINE_COMPILER;
2130 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
2131 }
2132
2133 static sljit_uw * allocate_read_only_data(compiler_common *common, sljit_uw size)
2134 {
2135 DEFINE_COMPILER;
2136 sljit_uw *result;
2137
2138 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
2139 return NULL;
2140
2141 result = (sljit_uw *)SLJIT_MALLOC(size + sizeof(sljit_uw), compiler->allocator_data);
2142 if (SLJIT_UNLIKELY(result == NULL))
2143 {
2144 sljit_set_compiler_memory_error(compiler);
2145 return NULL;
2146 }
2147
2148 *(void**)result = common->read_only_data_head;
2149 common->read_only_data_head = (void *)result;
2150 return result + 1;
2151 }
2152
2153 static void free_read_only_data(void *current, void *allocator_data)
2154 {
2155 void *next;
2156
2157 SLJIT_UNUSED_ARG(allocator_data);
2158
2159 while (current != NULL)
2160 {
2161 next = *(void**)current;
2162 SLJIT_FREE(current, allocator_data);
2163 current = next;
2164 }
2165 }
2166
2167 static SLJIT_INLINE void reset_ovector(compiler_common *common, int length)
2168 {
2169 DEFINE_COMPILER;
2170 struct sljit_label *loop;
2171 int i;
2172
2173 /* At this point we can freely use all temporary registers. */
2174 SLJIT_ASSERT(length > 1);
2175 /* TMP1 returns with begin - 1. */
2176 OP2(SLJIT_SUB, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_S0), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));
2177 if (length < 8)
2178 {
2179 for (i = 1; i < length; i++)
2180 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), SLJIT_R0, 0);
2181 }
2182 else
2183 {
2184 GET_LOCAL_BASE(SLJIT_R1, 0, OVECTOR_START);
2185 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, length - 1);
2186 loop = LABEL();
2187 OP1(SLJIT_MOVU, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw), SLJIT_R0, 0);
2188 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, 1);
2189 JUMPTO(SLJIT_NOT_ZERO, loop);
2190 }
2191 }
2192
2193 static SLJIT_INLINE void do_reset_match(compiler_common *common, int length)
2194 {
2195 DEFINE_COMPILER;
2196 struct sljit_label *loop;
2197 int i;
2198
2199 SLJIT_ASSERT(length > 1);
2200 /* OVECTOR(1) contains the "string begin - 1" constant. */
2201 if (length > 2)
2202 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
2203 if (length < 8)
2204 {
2205 for (i = 2; i < length; i++)
2206 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), TMP1, 0);
2207 }
2208 else
2209 {
2210 GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + sizeof(sljit_sw));
2211 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
2212 loop = LABEL();
2213 OP1(SLJIT_MOVU, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
2214 OP2(SLJIT_SUB | SLJIT_SET_E, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
2215 JUMPTO(SLJIT_NOT_ZERO, loop);
2216 }
2217
2218 OP1(SLJIT_MOV, STACK_TOP, 0, ARGUMENTS, 0);
2219 if (common->mark_ptr != 0)
2220 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, SLJIT_IMM, 0);
2221 if (common->control_head_ptr != 0)
2222 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
2223 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(jit_arguments, stack));
2224 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
2225 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(struct sljit_stack, base));
2226 }
2227
2228 static sljit_sw SLJIT_CALL do_search_mark(sljit_sw *current, const pcre_uchar *skip_arg)
2229 {
2230 while (current != NULL)
2231 {
2232 switch (current[-2])
2233 {
2234 case type_then_trap:
2235 break;
2236
2237 case type_mark:
2238 if (STRCMP_UC_UC(skip_arg, (pcre_uchar *)current[-3]) == 0)
2239 return current[-4];
2240 break;
2241
2242 default:
2243 SLJIT_ASSERT_STOP();
2244 break;
2245 }
2246 SLJIT_ASSERT(current > (sljit_sw*)current[-1]);
2247 current = (sljit_sw*)current[-1];
2248 }
2249 return -1;
2250 }
2251
2252 static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)
2253 {
2254 DEFINE_COMPILER;
2255 struct sljit_label *loop;
2256 struct sljit_jump *early_quit;
2257
2258 /* At this point we can freely use all registers. */
2259 OP1(SLJIT_MOV, SLJIT_S2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
2260 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(1), STR_PTR, 0);
2261
2262 OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0);
2263 if (common->mark_ptr != 0)
2264 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
2265 OP1(SLJIT_MOV_SI, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, offset_count));
2266 if (common->mark_ptr != 0)
2267 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_R2, 0);
2268 OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, offsets), SLJIT_IMM, sizeof(int));
2269 OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, begin));
2270 GET_LOCAL_BASE(SLJIT_S0, 0, OVECTOR_START);
2271 /* Unlikely, but possible */
2272 early_quit = CMP(SLJIT_EQUAL, SLJIT_R1, 0, SLJIT_IMM, 0);
2273 loop = LABEL();
2274 OP2(SLJIT_SUB, SLJIT_S1, 0, SLJIT_MEM1(SLJIT_S0), 0, SLJIT_R0, 0);
2275 OP2(SLJIT_ADD, SLJIT_S0, 0, SLJIT_S0, 0, SLJIT_IMM, sizeof(sljit_sw));
2276 /* Copy the integer value to the output buffer */
2277 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2278 OP2(SLJIT_ASHR, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_IMM, UCHAR_SHIFT);
2279 #endif
2280 OP1(SLJIT_MOVU_SI, SLJIT_MEM1(SLJIT_R2), sizeof(int), SLJIT_S1, 0);
2281 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
2282 JUMPTO(SLJIT_NOT_ZERO, loop);
2283 JUMPHERE(early_quit);
2284
2285 /* Calculate the return value, which is the maximum ovector value. */
2286 if (topbracket > 1)
2287 {
2288 GET_LOCAL_BASE(SLJIT_R0, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_sw));
2289 OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, topbracket + 1);
2290
2291 /* OVECTOR(0) is never equal to SLJIT_S2. */
2292 loop = LABEL();
2293 OP1(SLJIT_MOVU, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), -(2 * (sljit_sw)sizeof(sljit_sw)));
2294 OP2(SLJIT_SUB, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
2295 CMPTO(SLJIT_EQUAL, SLJIT_R2, 0, SLJIT_S2, 0, loop);
2296 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_R1, 0);
2297 }
2298 else
2299 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
2300 }
2301
2302 static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *quit)
2303 {
2304 DEFINE_COMPILER;
2305 struct sljit_jump *jump;
2306
2307 SLJIT_COMPILE_ASSERT(STR_END == SLJIT_S1, str_end_must_be_saved_reg2);
2308 SLJIT_ASSERT(common->start_used_ptr != 0 && common->start_ptr != 0
2309 && (common->mode == JIT_PARTIAL_SOFT_COMPILE ? common->hit_start != 0 : common->hit_start == 0));
2310
2311 OP1(SLJIT_MOV, SLJIT_R1, 0, ARGUMENTS, 0);
2312 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_PARTIAL);
2313 OP1(SLJIT_MOV_SI, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, real_offset_count));
2314 CMPTO(SLJIT_SIG_LESS, SLJIT_R2, 0, SLJIT_IMM, 2, quit);
2315
2316 /* Store match begin and end. */
2317 OP1(SLJIT_MOV, SLJIT_S0, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, begin));
2318 OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, offsets));
2319
2320 jump = CMP(SLJIT_SIG_LESS, SLJIT_R2, 0, SLJIT_IMM, 3);
2321 OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_ptr : (common->hit_start + (int)sizeof(sljit_sw)), SLJIT_S0, 0);
2322 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2323 OP2(SLJIT_ASHR, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, UCHAR_SHIFT);
2324 #endif
2325 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_R1), 2 * sizeof(int), SLJIT_R2, 0);
2326 JUMPHERE(jump);
2327
2328 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_used_ptr : common->hit_start);
2329 OP2(SLJIT_SUB, SLJIT_S1, 0, STR_END, 0, SLJIT_S0, 0);
2330 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2331 OP2(SLJIT_ASHR, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_IMM, UCHAR_SHIFT);
2332 #endif
2333 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_R1), sizeof(int), SLJIT_S1, 0);
2334
2335 OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_S0, 0);
2336 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2337 OP2(SLJIT_ASHR, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, UCHAR_SHIFT);
2338 #endif
2339 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_R1), 0, SLJIT_R2, 0);
2340
2341 JUMPTO(SLJIT_JUMP, quit);
2342 }
2343
2344 static SLJIT_INLINE void check_start_used_ptr(compiler_common *common)
2345 {
2346 /* May destroy TMP1. */
2347 DEFINE_COMPILER;
2348 struct sljit_jump *jump;
2349
2350 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2351 {
2352 /* The value of -1 must be kept for start_used_ptr! */
2353 OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, 1);
2354 /* Jumps if start_used_ptr < STR_PTR, or start_used_ptr == -1. Although overwriting
2355 is not necessary if start_used_ptr == STR_PTR, it does not hurt as well. */
2356 jump = CMP(SLJIT_LESS_EQUAL, TMP1, 0, STR_PTR, 0);
2357 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2358 JUMPHERE(jump);
2359 }
2360 else if (common->mode == JIT_PARTIAL_HARD_COMPILE)
2361 {
2362 jump = CMP(SLJIT_LESS_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2363 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2364 JUMPHERE(jump);
2365 }
2366 }
2367
2368 static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, pcre_uchar *cc)
2369 {
2370 /* Detects if the character has an othercase. */
2371 unsigned int c;
2372
2373 #ifdef SUPPORT_UTF
2374 if (common->utf)
2375 {
2376 GETCHAR(c, cc);
2377 if (c > 127)
2378 {
2379 #ifdef SUPPORT_UCP
2380 return c != UCD_OTHERCASE(c);
2381 #else
2382 return FALSE;
2383 #endif
2384 }
2385 #ifndef COMPILE_PCRE8
2386 return common->fcc[c] != c;
2387 #endif
2388 }
2389 else
2390 #endif
2391 c = *cc;
2392 return MAX_255(c) ? common->fcc[c] != c : FALSE;
2393 }
2394
2395 static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)
2396 {
2397 /* Returns with the othercase. */
2398 #ifdef SUPPORT_UTF
2399 if (common->utf && c > 127)
2400 {
2401 #ifdef SUPPORT_UCP
2402 return UCD_OTHERCASE(c);
2403 #else
2404 return c;
2405 #endif
2406 }
2407 #endif
2408 return TABLE_GET(c, common->fcc, c);
2409 }
2410
2411 static unsigned int char_get_othercase_bit(compiler_common *common, pcre_uchar *cc)
2412 {
2413 /* Detects if the character and its othercase has only 1 bit difference. */
2414 unsigned int c, oc, bit;
2415 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2416 int n;
2417 #endif
2418
2419 #ifdef SUPPORT_UTF
2420 if (common->utf)
2421 {
2422 GETCHAR(c, cc);
2423 if (c <= 127)
2424 oc = common->fcc[c];
2425 else
2426 {
2427 #ifdef SUPPORT_UCP
2428 oc = UCD_OTHERCASE(c);
2429 #else
2430 oc = c;
2431 #endif
2432 }
2433 }
2434 else
2435 {
2436 c = *cc;
2437 oc = TABLE_GET(c, common->fcc, c);
2438 }
2439 #else
2440 c = *cc;
2441 oc = TABLE_GET(c, common->fcc, c);
2442 #endif
2443
2444 SLJIT_ASSERT(c != oc);
2445
2446 bit = c ^ oc;
2447 /* Optimized for English alphabet. */
2448 if (c <= 127 && bit == 0x20)
2449 return (0 << 8) | 0x20;
2450
2451 /* Since c != oc, they must have at least 1 bit difference. */
2452 if (!is_powerof2(bit))
2453 return 0;
2454
2455 #if defined COMPILE_PCRE8
2456
2457 #ifdef SUPPORT_UTF
2458 if (common->utf && c > 127)
2459 {
2460 n = GET_EXTRALEN(*cc);
2461 while ((bit & 0x3f) == 0)
2462 {
2463 n--;
2464 bit >>= 6;
2465 }
2466 return (n << 8) | bit;
2467 }
2468 #endif /* SUPPORT_UTF */
2469 return (0 << 8) | bit;
2470
2471 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2472
2473 #ifdef SUPPORT_UTF
2474 if (common->utf && c > 65535)
2475 {
2476 if (bit >= (1 << 10))
2477 bit >>= 10;
2478 else
2479 return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8));
2480 }
2481 #endif /* SUPPORT_UTF */
2482 return (bit < 256) ? ((0 << 8) | bit) : ((1 << 8) | (bit >> 8));
2483
2484 #endif /* COMPILE_PCRE[8|16|32] */
2485 }
2486
2487 static void check_partial(compiler_common *common, BOOL force)
2488 {
2489 /* Checks whether a partial matching is occurred. Does not modify registers. */
2490 DEFINE_COMPILER;
2491 struct sljit_jump *jump = NULL;
2492
2493 SLJIT_ASSERT(!force || common->mode != JIT_COMPILE);
2494
2495 if (common->mode == JIT_COMPILE)
2496 return;
2497
2498 if (!force)
2499 jump = CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2500 else if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2501 jump = CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1);
2502
2503 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2504 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
2505 else
2506 {
2507 if (common->partialmatchlabel != NULL)
2508 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2509 else
2510 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2511 }
2512
2513 if (jump != NULL)
2514 JUMPHERE(jump);
2515 }
2516
2517 static void check_str_end(compiler_common *common, jump_list **end_reached)
2518 {
2519 /* Does not affect registers. Usually used in a tight spot. */
2520 DEFINE_COMPILER;
2521 struct sljit_jump *jump;
2522
2523 if (common->mode == JIT_COMPILE)
2524 {
2525 add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2526 return;
2527 }
2528
2529 jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
2530 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2531 {
2532 add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
2533 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
2534 add_jump(compiler, end_reached, JUMP(SLJIT_JUMP));
2535 }
2536 else
2537 {
2538 add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
2539 if (common->partialmatchlabel != NULL)
2540 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2541 else
2542 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2543 }
2544 JUMPHERE(jump);
2545 }
2546
2547 static void detect_partial_match(compiler_common *common, jump_list **backtracks)
2548 {
2549 DEFINE_COMPILER;
2550 struct sljit_jump *jump;
2551
2552 if (common->mode == JIT_COMPILE)
2553 {
2554 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2555 return;
2556 }
2557
2558 /* Partial matching mode. */
2559 jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
2560 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
2561 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2562 {
2563 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
2564 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
2565 }
2566 else
2567 {
2568 if (common->partialmatchlabel != NULL)
2569 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2570 else
2571 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2572 }
2573 JUMPHERE(jump);
2574 }
2575
2576 static void peek_char(compiler_common *common, pcre_uint32 max)
2577 {
2578 /* Reads the character into TMP1, keeps STR_PTR.
2579 Does not check STR_END. TMP2 Destroyed. */
2580 DEFINE_COMPILER;
2581 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2582 struct sljit_jump *jump;
2583 #endif
2584
2585 SLJIT_UNUSED_ARG(max);
2586
2587 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2588 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2589 if (common->utf)
2590 {
2591 if (max < 128) return;
2592
2593 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2594 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2595 add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2596 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2597 JUMPHERE(jump);
2598 }
2599 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2600
2601 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2602 if (common->utf)
2603 {
2604 if (max < 0xd800) return;
2605
2606 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2607 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2608 /* TMP2 contains the high surrogate. */
2609 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2610 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x40);
2611 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
2612 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
2613 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2614 JUMPHERE(jump);
2615 }
2616 #endif
2617 }
2618
2619 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2620
2621 static BOOL is_char7_bitset(const pcre_uint8 *bitset, BOOL nclass)
2622 {
2623 /* Tells whether the character codes below 128 are enough
2624 to determine a match. */
2625 const pcre_uint8 value = nclass ? 0xff : 0;
2626 const pcre_uint8 *end = bitset + 32;
2627
2628 bitset += 16;
2629 do
2630 {
2631 if (*bitset++ != value)
2632 return FALSE;
2633 }
2634 while (bitset < end);
2635 return TRUE;
2636 }
2637
2638 static void read_char7_type(compiler_common *common, BOOL full_read)
2639 {
2640 /* Reads the precise character type of a character into TMP1, if the character
2641 is less than 128. Otherwise it returns with zero. Does not check STR_END. The
2642 full_read argument tells whether characters above max are accepted or not. */
2643 DEFINE_COMPILER;
2644 struct sljit_jump *jump;
2645
2646 SLJIT_ASSERT(common->utf);
2647
2648 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2649 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2650
2651 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2652
2653 if (full_read)
2654 {
2655 jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
2656 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2657 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2658 JUMPHERE(jump);
2659 }
2660 }
2661
2662 #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
2663
2664 static void read_char_range(compiler_common *common, pcre_uint32 min, pcre_uint32 max, BOOL update_str_ptr)
2665 {
2666 /* Reads the precise value of a character into TMP1, if the character is
2667 between min and max (c >= min && c <= max). Otherwise it returns with a value
2668 outside the range. Does not check STR_END. */
2669 DEFINE_COMPILER;
2670 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2671 struct sljit_jump *jump;
2672 #endif
2673 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2674 struct sljit_jump *jump2;
2675 #endif
2676
2677 SLJIT_UNUSED_ARG(update_str_ptr);
2678 SLJIT_UNUSED_ARG(min);
2679 SLJIT_UNUSED_ARG(max);
2680 SLJIT_ASSERT(min <= max);
2681
2682 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2683 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2684
2685 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2686 if (common->utf)
2687 {
2688 if (max < 128 && !update_str_ptr) return;
2689
2690 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2691 if (min >= 0x10000)
2692 {
2693 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xf0);
2694 if (update_str_ptr)
2695 OP1(SLJIT_MOV_UB, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2696 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2697 jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x7);
2698 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2699 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2700 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2701 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2702 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2703 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2704 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2705 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2706 if (!update_str_ptr)
2707 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
2708 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2709 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2710 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2711 JUMPHERE(jump2);
2712 if (update_str_ptr)
2713 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
2714 }
2715 else if (min >= 0x800 && max <= 0xffff)
2716 {
2717 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xe0);
2718 if (update_str_ptr)
2719 OP1(SLJIT_MOV_UB, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2720 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2721 jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xf);
2722 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2723 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2724 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2725 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2726 if (!update_str_ptr)
2727 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
2728 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2729 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2730 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2731 JUMPHERE(jump2);
2732 if (update_str_ptr)
2733 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
2734 }
2735 else if (max >= 0x800)
2736 add_jump(compiler, (max < 0x10000) ? &common->utfreadchar16 : &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2737 else if (max < 128)
2738 {
2739 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2740 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2741 }
2742 else
2743 {
2744 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2745 if (!update_str_ptr)
2746 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2747 else
2748 OP1(SLJIT_MOV_UB, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2749 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2750 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2751 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2752 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2753 if (update_str_ptr)
2754 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
2755 }
2756 JUMPHERE(jump);
2757 }
2758 #endif
2759
2760 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2761 if (common->utf)
2762 {
2763 if (max >= 0x10000)
2764 {
2765 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2766 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2767 /* TMP2 contains the high surrogate. */
2768 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2769 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x40);
2770 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
2771 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2772 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
2773 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2774 JUMPHERE(jump);
2775 return;
2776 }
2777
2778 if (max < 0xd800 && !update_str_ptr) return;
2779
2780 /* Skip low surrogate if necessary. */
2781 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2782 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2783 if (update_str_ptr)
2784 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2785 if (max >= 0xd800)
2786 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x10000);
2787 JUMPHERE(jump);
2788 }
2789 #endif
2790 }
2791
2792 static SLJIT_INLINE void read_char(compiler_common *common)
2793 {
2794 read_char_range(common, 0, READ_CHAR_MAX, TRUE);
2795 }
2796
2797 static void read_char8_type(compiler_common *common, BOOL update_str_ptr)
2798 {
2799 /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */
2800 DEFINE_COMPILER;
2801 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
2802 struct sljit_jump *jump;
2803 #endif
2804 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2805 struct sljit_jump *jump2;
2806 #endif
2807
2808 SLJIT_UNUSED_ARG(update_str_ptr);
2809
2810 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2811 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2812
2813 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2814 if (common->utf)
2815 {
2816 /* This can be an extra read in some situations, but hopefully
2817 it is needed in most cases. */
2818 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2819 jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
2820 if (!update_str_ptr)
2821 {
2822 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2823 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2824 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2825 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2826 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2827 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
2828 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2829 jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
2830 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2831 JUMPHERE(jump2);
2832 }
2833 else
2834 add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));
2835 JUMPHERE(jump);
2836 return;
2837 }
2838 #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
2839
2840 #if !defined COMPILE_PCRE8
2841 /* The ctypes array contains only 256 values. */
2842 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2843 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
2844 #endif
2845 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2846 #if !defined COMPILE_PCRE8
2847 JUMPHERE(jump);
2848 #endif
2849
2850 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2851 if (common->utf && update_str_ptr)
2852 {
2853 /* Skip low surrogate if necessary. */
2854 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);
2855 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2856 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2857 JUMPHERE(jump);
2858 }
2859 #endif /* SUPPORT_UTF && COMPILE_PCRE16 */
2860 }
2861
2862 static void skip_char_back(compiler_common *common)
2863 {
2864 /* Goes one character back. Affects STR_PTR and TMP1. Does not check begin. */
2865 DEFINE_COMPILER;
2866 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2867 #if defined COMPILE_PCRE8
2868 struct sljit_label *label;
2869
2870 if (common->utf)
2871 {
2872 label = LABEL();
2873 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2874 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2875 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
2876 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);
2877 return;
2878 }
2879 #elif defined COMPILE_PCRE16
2880 if (common->utf)
2881 {
2882 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2883 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2884 /* Skip low surrogate if necessary. */
2885 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2886 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
2887 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
2888 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2889 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2890 return;
2891 }
2892 #endif /* COMPILE_PCRE[8|16] */
2893 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2894 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2895 }
2896
2897 static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpifmatch)
2898 {
2899 /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */
2900 DEFINE_COMPILER;
2901 struct sljit_jump *jump;
2902
2903 if (nltype == NLTYPE_ANY)
2904 {
2905 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
2906 add_jump(compiler, backtracks, JUMP(jumpifmatch ? SLJIT_NOT_ZERO : SLJIT_ZERO));
2907 }
2908 else if (nltype == NLTYPE_ANYCRLF)
2909 {
2910 if (jumpifmatch)
2911 {
2912 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR));
2913 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
2914 }
2915 else
2916 {
2917 jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
2918 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
2919 JUMPHERE(jump);
2920 }
2921 }
2922 else
2923 {
2924 SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);
2925 add_jump(compiler, backtracks, CMP(jumpifmatch ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
2926 }
2927 }
2928
2929 #ifdef SUPPORT_UTF
2930
2931 #if defined COMPILE_PCRE8
2932 static void do_utfreadchar(compiler_common *common)
2933 {
2934 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
2935 of the character (>= 0xc0). Return char value in TMP1, length in TMP2. */
2936 DEFINE_COMPILER;
2937 struct sljit_jump *jump;
2938
2939 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2940 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2941 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2942 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2943 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2944 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2945
2946 /* Searching for the first zero. */
2947 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
2948 jump = JUMP(SLJIT_NOT_ZERO);
2949 /* Two byte sequence. */
2950 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2951 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(2));
2952 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2953
2954 JUMPHERE(jump);
2955 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2956 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x800);
2957 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2958 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2959 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2960
2961 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10000);
2962 jump = JUMP(SLJIT_NOT_ZERO);
2963 /* Three byte sequence. */
2964 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
2965 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));
2966 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2967
2968 /* Four byte sequence. */
2969 JUMPHERE(jump);
2970 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2971 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
2972 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2973 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
2974 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2975 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2976 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(4));
2977 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2978 }
2979
2980 static void do_utfreadchar16(compiler_common *common)
2981 {
2982 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
2983 of the character (>= 0xc0). Return value in TMP1. */
2984 DEFINE_COMPILER;
2985 struct sljit_jump *jump;
2986
2987 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2988 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2989 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2990 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2991 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2992 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2993
2994 /* Searching for the first zero. */
2995 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
2996 jump = JUMP(SLJIT_NOT_ZERO);
2997 /* Two byte sequence. */
2998 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2999 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3000
3001 JUMPHERE(jump);
3002 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x400);
3003 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_NOT_ZERO);
3004 /* This code runs only in 8 bit mode. No need to shift the value. */
3005 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3006 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3007 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x800);
3008 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3009 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3010 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3011 /* Three byte sequence. */
3012 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
3013 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3014 }
3015
3016 static void do_utfreadtype8(compiler_common *common)
3017 {
3018 /* Fast decoding a UTF-8 character type. TMP2 contains the first byte
3019 of the character (>= 0xc0). Return value in TMP1. */
3020 DEFINE_COMPILER;
3021 struct sljit_jump *jump;
3022 struct sljit_jump *compare;
3023
3024 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3025
3026 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20);
3027 jump = JUMP(SLJIT_NOT_ZERO);
3028 /* Two byte sequence. */
3029 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3030 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3031 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
3032 /* The upper 5 bits are known at this point. */
3033 compare = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x3);
3034 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
3035 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3036 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
3037 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
3038 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3039
3040 JUMPHERE(compare);
3041 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
3042 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3043
3044 /* We only have types for characters less than 256. */
3045 JUMPHERE(jump);
3046 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3047 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
3048 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3049 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3050 }
3051
3052 #endif /* COMPILE_PCRE8 */
3053
3054 #endif /* SUPPORT_UTF */
3055
3056 #ifdef SUPPORT_UCP
3057
3058 /* UCD_BLOCK_SIZE must be 128 (see the assert below). */
3059 #define UCD_BLOCK_MASK 127
3060 #define UCD_BLOCK_SHIFT 7
3061
3062 static void do_getucd(compiler_common *common)
3063 {
3064 /* Search the UCD record for the character comes in TMP1.
3065 Returns chartype in TMP1 and UCD offset in TMP2. */
3066 DEFINE_COMPILER;
3067
3068 SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 8);
3069
3070 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3071 OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
3072 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
3073 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
3074 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
3075 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
3076 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
3077 OP1(SLJIT_MOV_UH, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
3078 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
3079 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
3080 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3081 }
3082 #endif
3083
3084 static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common, BOOL hascrorlf, BOOL firstline)
3085 {
3086 DEFINE_COMPILER;
3087 struct sljit_label *mainloop;
3088 struct sljit_label *newlinelabel = NULL;
3089 struct sljit_jump *start;
3090 struct sljit_jump *end = NULL;
3091 struct sljit_jump *nl = NULL;
3092 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3093 struct sljit_jump *singlechar;
3094 #endif
3095 jump_list *newline = NULL;
3096 BOOL newlinecheck = FALSE;
3097 BOOL readuchar = FALSE;
3098
3099 if (!(hascrorlf || firstline) && (common->nltype == NLTYPE_ANY ||
3100 common->nltype == NLTYPE_ANYCRLF || common->newline > 255))
3101 newlinecheck = TRUE;
3102
3103 if (firstline)
3104 {
3105 /* Search for the end of the first line. */
3106 SLJIT_ASSERT(common->first_line_end != 0);
3107 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
3108
3109 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3110 {
3111 mainloop = LABEL();
3112 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3113 end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3114 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
3115 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3116 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);
3117 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);
3118 JUMPHERE(end);
3119 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_SP), common->first_line_end, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3120 }
3121 else
3122 {
3123 end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3124 mainloop = LABEL();
3125 /* Continual stores does not cause data dependency. */
3126 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->first_line_end, STR_PTR, 0);
3127 read_char_range(common, common->nlmin, common->nlmax, TRUE);
3128 check_newlinechar(common, common->nltype, &newline, TRUE);
3129 CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, mainloop);
3130 JUMPHERE(end);
3131 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->first_line_end, STR_PTR, 0);
3132 set_jumps(newline, LABEL());
3133 }
3134
3135 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
3136 }
3137
3138 start = JUMP(SLJIT_JUMP);
3139
3140 if (newlinecheck)
3141 {
3142 newlinelabel = LABEL();
3143 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3144 end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3145 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3146 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
3147 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
3148 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3149 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
3150 #endif
3151 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3152 nl = JUMP(SLJIT_JUMP);
3153 }
3154
3155 mainloop = LABEL();
3156
3157 /* Increasing the STR_PTR here requires one less jump in the most common case. */
3158 #ifdef SUPPORT_UTF
3159 if (common->utf) readuchar = TRUE;
3160 #endif
3161 if (newlinecheck) readuchar = TRUE;
3162
3163 if (readuchar)
3164 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3165
3166 if (newlinecheck)
3167 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);
3168
3169 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3170 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3171 #if defined COMPILE_PCRE8
3172 if (common->utf)
3173 {
3174 singlechar = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
3175 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3176 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3177 JUMPHERE(singlechar);
3178 }
3179 #elif defined COMPILE_PCRE16
3180 if (common->utf)
3181 {
3182 singlechar = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
3183 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
3184 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3185 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
3186 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3187 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3188 JUMPHERE(singlechar);
3189 }
3190 #endif /* COMPILE_PCRE[8|16] */
3191 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
3192 JUMPHERE(start);
3193
3194 if (newlinecheck)
3195 {
3196 JUMPHERE(end);
3197 JUMPHERE(nl);
3198 }
3199
3200 return mainloop;
3201 }
3202
3203 #define MAX_N_CHARS 16
3204 #define MAX_N_BYTES 8
3205
3206 static SLJIT_INLINE void add_prefix_byte(pcre_uint8 byte, pcre_uint8 *bytes)
3207 {
3208 pcre_uint8 len = bytes[0];
3209 int i;
3210
3211 if (len == 255)
3212 return;
3213
3214 if (len == 0)
3215 {
3216 bytes[0] = 1;
3217 bytes[1] = byte;
3218 return;
3219 }
3220
3221 for (i = len; i > 0; i--)
3222 if (bytes[i] == byte)
3223 return;
3224
3225 if (len >= MAX_N_BYTES - 1)
3226 {
3227 bytes[0] = 255;
3228 return;
3229 }
3230
3231 len++;
3232 bytes[len] = byte;
3233 bytes[0] = len;
3234 }
3235
3236 static int scan_prefix(compiler_common *common, pcre_uchar *cc, pcre_uint32 *chars, pcre_uint8 *bytes, int max_chars, pcre_uint32 *rec_count)
3237 {
3238 /* Recursive function, which scans prefix literals. */
3239 BOOL last, any, caseless;
3240 int len, repeat, len_save, consumed = 0;
3241 pcre_uint32 chr, mask;
3242 pcre_uchar *alternative, *cc_save, *oc;
3243 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3244 pcre_uchar othercase[8];
3245 #elif defined SUPPORT_UTF && defined COMPILE_PCRE16
3246 pcre_uchar othercase[2];
3247 #else
3248 pcre_uchar othercase[1];
3249 #endif
3250
3251 repeat = 1;
3252 while (TRUE)
3253 {
3254 if (*rec_count == 0)
3255 return 0;
3256 (*rec_count)--;
3257
3258 last = TRUE;
3259 any = FALSE;
3260 caseless = FALSE;
3261
3262 switch (*cc)
3263 {
3264 case OP_CHARI:
3265 caseless = TRUE;
3266 case OP_CHAR:
3267 last = FALSE;
3268 cc++;
3269 break;
3270
3271 case OP_SOD:
3272 case OP_SOM:
3273 case OP_SET_SOM:
3274 case OP_NOT_WORD_BOUNDARY:
3275 case OP_WORD_BOUNDARY:
3276 case OP_EODN:
3277 case OP_EOD:
3278 case OP_CIRC:
3279 case OP_CIRCM:
3280 case OP_DOLL:
3281 case OP_DOLLM:
3282 /* Zero width assertions. */
3283 cc++;
3284 continue;
3285
3286 case OP_ASSERT:
3287 case OP_ASSERT_NOT:
3288 case OP_ASSERTBACK:
3289 case OP_ASSERTBACK_NOT:
3290 cc = bracketend(cc);
3291 continue;
3292
3293 case OP_PLUSI:
3294 case OP_MINPLUSI:
3295 case OP_POSPLUSI:
3296 caseless = TRUE;
3297 case OP_PLUS:
3298 case OP_MINPLUS:
3299 case OP_POSPLUS:
3300 cc++;
3301 break;
3302
3303 case OP_EXACTI:
3304 caseless = TRUE;
3305 case OP_EXACT:
3306 repeat = GET2(cc, 1);
3307 last = FALSE;
3308 cc += 1 + IMM2_SIZE;
3309 break;
3310
3311 case OP_QUERYI:
3312 case OP_MINQUERYI:
3313 case OP_POSQUERYI:
3314 caseless = TRUE;
3315 case OP_QUERY:
3316 case OP_MINQUERY:
3317 case OP_POSQUERY:
3318 len = 1;
3319 cc++;
3320 #ifdef SUPPORT_UTF
3321 if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
3322 #endif
3323 max_chars = scan_prefix(common, cc + len, chars, bytes, max_chars, rec_count);
3324 if (max_chars == 0)
3325 return consumed;
3326 last = FALSE;
3327 break;
3328
3329 case OP_KET:
3330 cc += 1 + LINK_SIZE;
3331 continue;
3332
3333 case OP_ALT:
3334 cc += GET(cc, 1);
3335 continue;
3336
3337 case OP_ONCE:
3338 case OP_ONCE_NC:
3339 case OP_BRA:
3340 case OP_BRAPOS:
3341 case OP_CBRA:
3342 case OP_CBRAPOS:
3343 alternative = cc + GET(cc, 1);
3344 while (*alternative == OP_ALT)
3345 {
3346 max_chars = scan_prefix(common, alternative + 1 + LINK_SIZE, chars, bytes, max_chars, rec_count);
3347 if (max_chars == 0)
3348 return consumed;
3349 alternative += GET(alternative, 1);
3350 }
3351
3352 if (*cc == OP_CBRA || *cc == OP_CBRAPOS)
3353 cc += IMM2_SIZE;
3354 cc += 1 + LINK_SIZE;
3355 continue;
3356
3357 case OP_CLASS:
3358 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3359 if (common->utf && !is_char7_bitset((const pcre_uint8 *)(cc + 1), FALSE)) return consumed;
3360 #endif
3361 any = TRUE;
3362 cc += 1 + 32 / sizeof(pcre_uchar);
3363 break;
3364
3365 case OP_NCLASS:
3366 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3367 if (common->utf) return consumed;
3368 #endif
3369 any = TRUE;
3370 cc += 1 + 32 / sizeof(pcre_uchar);
3371 break;
3372
3373 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3374 case OP_XCLASS:
3375 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3376 if (common->utf) return consumed;
3377 #endif
3378 any = TRUE;
3379 cc += GET(cc, 1);
3380 break;
3381 #endif
3382
3383 case OP_DIGIT:
3384 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3385 if (common->utf && !is_char7_bitset((const pcre_uint8 *)common->ctypes - cbit_length + cbit_digit, FALSE))
3386 return consumed;
3387 #endif
3388 any = TRUE;
3389 cc++;
3390 break;
3391
3392 case OP_WHITESPACE:
3393 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3394 if (common->utf && !is_char7_bitset((const pcre_uint8 *)common->ctypes - cbit_length + cbit_space, FALSE))
3395 return consumed;
3396 #endif
3397 any = TRUE;
3398 cc++;
3399 break;
3400
3401 case OP_WORDCHAR:
3402 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3403 if (common->utf && !is_char7_bitset((const pcre_uint8 *)common->ctypes - cbit_length + cbit_word, FALSE))
3404 return consumed;
3405 #endif
3406 any = TRUE;
3407 cc++;
3408 break;
3409
3410 case OP_NOT:
3411 case OP_NOTI:
3412 cc++;
3413 /* Fall through. */
3414 case OP_NOT_DIGIT:
3415 case OP_NOT_WHITESPACE:
3416 case OP_NOT_WORDCHAR:
3417 case OP_ANY:
3418 case OP_ALLANY:
3419 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3420 if (common->utf) return consumed;
3421 #endif
3422 any = TRUE;
3423 cc++;
3424 break;
3425
3426 #ifdef SUPPORT_UCP
3427 case OP_NOTPROP:
3428 case OP_PROP:
3429 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3430 if (common->utf) return consumed;
3431 #endif
3432 any = TRUE;
3433 cc += 1 + 2;
3434 break;
3435 #endif
3436
3437 case OP_TYPEEXACT:
3438 repeat = GET2(cc, 1);
3439 cc += 1 + IMM2_SIZE;
3440 continue;
3441
3442 case OP_NOTEXACT:
3443 case OP_NOTEXACTI:
3444 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3445 if (common->utf) return consumed;
3446 #endif
3447 any = TRUE;
3448 repeat = GET2(cc, 1);
3449 cc += 1 + IMM2_SIZE + 1;
3450 break;
3451
3452 default:
3453 return consumed;
3454 }
3455
3456 if (any)
3457 {
3458 #if defined COMPILE_PCRE8
3459 mask = 0xff;
3460 #elif defined COMPILE_PCRE16
3461 mask = 0xffff;
3462 #elif defined COMPILE_PCRE32
3463 mask = 0xffffffff;
3464 #else
3465 SLJIT_ASSERT_STOP();
3466 #endif
3467
3468 do
3469 {
3470 chars[0] = mask;
3471 chars[1] = mask;
3472 bytes[0] = 255;
3473
3474 consumed++;
3475 if (--max_chars == 0)
3476 return consumed;
3477 chars += 2;
3478 bytes += MAX_N_BYTES;
3479 }
3480 while (--repeat > 0);
3481
3482 repeat = 1;
3483 continue;
3484 }
3485
3486 len = 1;
3487 #ifdef SUPPORT_UTF
3488 if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
3489 #endif
3490
3491 if (caseless && char_has_othercase(common, cc))
3492 {
3493 #ifdef SUPPORT_UTF
3494 if (common->utf)
3495 {
3496 GETCHAR(chr, cc);
3497 if ((int)PRIV(ord2utf)(char_othercase(common, chr), othercase) != len)
3498 return consumed;
3499 }
3500 else
3501 #endif
3502 {
3503 chr = *cc;
3504 othercase[0] = TABLE_GET(chr, common->fcc, chr);
3505 }
3506 }
3507 else
3508 caseless = FALSE;
3509
3510 len_save = len;
3511 cc_save = cc;
3512 while (TRUE)
3513 {
3514 oc = othercase;
3515 do
3516 {
3517 chr = *cc;
3518 #ifdef COMPILE_PCRE32
3519 if (SLJIT_UNLIKELY(chr == NOTACHAR))
3520 return consumed;
3521 #endif
3522 add_prefix_byte((pcre_uint8)chr, bytes);
3523
3524 mask = 0;
3525 if (caseless)
3526 {
3527 add_prefix_byte((pcre_uint8)*oc, bytes);
3528 mask = *cc ^ *oc;
3529 chr |= mask;
3530 }
3531
3532 #ifdef COMPILE_PCRE32
3533 if (chars[0] == NOTACHAR && chars[1] == 0)
3534 #else
3535 if (chars[0] == NOTACHAR)
3536 #endif
3537 {
3538 chars[0] = chr;
3539 chars[1] = mask;
3540 }
3541 else
3542 {
3543 mask |= chars[0] ^ chr;
3544 chr |= mask;
3545 chars[0] = chr;
3546 chars[1] |= mask;
3547 }
3548
3549 len--;
3550 consumed++;
3551 if (--max_chars == 0)
3552 return consumed;
3553 chars += 2;
3554 bytes += MAX_N_BYTES;
3555 cc++;
3556 oc++;
3557 }
3558 while (len > 0);
3559
3560 if (--repeat == 0)
3561 break;
3562
3563 len = len_save;
3564 cc = cc_save;
3565 }
3566
3567 repeat = 1;
3568 if (last)
3569 return consumed;
3570 }
3571 }
3572
3573 static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common, BOOL firstline)
3574 {
3575 DEFINE_COMPILER;
3576 struct sljit_label *start;
3577 struct sljit_jump *quit;
3578 pcre_uint32 chars[MAX_N_CHARS * 2];
3579 pcre_uint8 bytes[MAX_N_CHARS * MAX_N_BYTES];
3580 pcre_uint8 ones[MAX_N_CHARS];
3581 int offsets[3];
3582 pcre_uint32 mask;
3583 pcre_uint8 *byte_set, *byte_set_end;
3584 int i, max, from;
3585 int range_right = -1, range_len = 3 - 1;
3586 sljit_ub *update_table = NULL;
3587 BOOL in_range;
3588 pcre_uint32 rec_count;
3589
3590 for (i = 0; i < MAX_N_CHARS; i++)
3591 {
3592 chars[i << 1] = NOTACHAR;
3593 chars[(i << 1) + 1] = 0;
3594 bytes[i * MAX_N_BYTES] = 0;
3595 }
3596
3597 rec_count = 10000;
3598 max = scan_prefix(common, common->start, chars, bytes, MAX_N_CHARS, &rec_count);
3599
3600 if (max <= 1)
3601 return FALSE;
3602
3603 for (i = 0; i < max; i++)
3604 {
3605 mask = chars[(i << 1) + 1];
3606 ones[i] = ones_in_half_byte[mask & 0xf];
3607 mask >>= 4;
3608 while (mask != 0)
3609 {
3610 ones[i] += ones_in_half_byte[mask & 0xf];
3611 mask >>= 4;
3612 }
3613 }
3614
3615 in_range = FALSE;
3616 from = 0; /* Prevent compiler "uninitialized" warning */
3617 for (i = 0; i <= max; i++)
3618 {
3619 if (in_range && (i - from) > range_len && (bytes[(i - 1) * MAX_N_BYTES] <= 4))
3620 {
3621 range_len = i - from;
3622 range_right = i - 1;
3623 }
3624
3625 if (i < max && bytes[i * MAX_N_BYTES] < 255)
3626 {
3627 if (!in_range)
3628 {
3629 in_range = TRUE;
3630 from = i;
3631 }
3632 }
3633 else if (in_range)
3634 in_range = FALSE;
3635 }
3636
3637 if (range_right >= 0)
3638 {
3639 update_table = (sljit_ub *)allocate_read_only_data(common, 256);
3640 if (update_table == NULL)
3641 return TRUE;
3642 memset(update_table, IN_UCHARS(range_len), 256);
3643
3644 for (i = 0; i < range_len; i++)
3645 {
3646 byte_set = bytes + ((range_right - i) * MAX_N_BYTES);
3647 SLJIT_ASSERT(byte_set[0] > 0 && byte_set[0] < 255);
3648 byte_set_end = byte_set + byte_set[0];
3649 byte_set++;
3650 while (byte_set <= byte_set_end)
3651 {
3652 if (update_table[*byte_set] > IN_UCHARS(i))
3653 update_table[*byte_set] = IN_UCHARS(i);
3654 byte_set++;
3655 }
3656 }
3657 }
3658
3659 offsets[0] = -1;
3660 /* Scan forward. */
3661 for (i = 0; i < max; i++)
3662 if (ones[i] <= 2) {
3663 offsets[0] = i;
3664 break;
3665 }
3666
3667 if (offsets[0] < 0 && range_right < 0)
3668 return FALSE;
3669
3670 if (offsets[0] >= 0)
3671 {
3672 /* Scan backward. */
3673 offsets[1] = -1;
3674 for (i = max - 1; i > offsets[0]; i--)
3675 if (ones[i] <= 2 && i != range_right)
3676 {
3677 offsets[1] = i;
3678 break;
3679 }
3680
3681 /* This case is handled better by fast_forward_first_char. */
3682 if (offsets[1] == -1 && offsets[0] == 0 && range_right < 0)
3683 return FALSE;
3684
3685 offsets[2] = -1;
3686 /* We only search for a middle character if there is no range check. */
3687 if (offsets[1] >= 0 && range_right == -1)
3688 {
3689 /* Scan from middle. */
3690 for (i = (offsets[0] + offsets[1]) / 2 + 1; i < offsets[1]; i++)
3691 if (ones[i] <= 2)
3692 {
3693 offsets[2] = i;
3694 break;
3695 }
3696
3697 if (offsets[2] == -1)
3698 {
3699 for (i = (offsets[0] + offsets[1]) / 2; i > offsets[0]; i--)
3700 if (ones[i] <= 2)
3701 {
3702 offsets[2] = i;
3703 break;
3704 }
3705 }
3706 }
3707
3708 SLJIT_ASSERT(offsets[1] == -1 || (offsets[0] < offsets[1]));
3709 SLJIT_ASSERT(offsets[2] == -1 || (offsets[0] < offsets[2] && offsets[1] > offsets[2]));
3710
3711 chars[0] = chars[offsets[0] << 1];
3712 chars[1] = chars[(offsets[0] << 1) + 1];
3713 if (offsets[2] >= 0)
3714 {
3715 chars[2] = chars[offsets[2] << 1];
3716 chars[3] = chars[(offsets[2] << 1) + 1];
3717 }
3718 if (offsets[1] >= 0)
3719 {
3720 chars[4] = chars[offsets[1] << 1];
3721 chars[5] = chars[(offsets[1] << 1) + 1];
3722 }
3723 }
3724
3725 max -= 1;
3726 if (firstline)
3727 {
3728 SLJIT_ASSERT(common->first_line_end != 0);
3729 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end);
3730 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
3731 OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
3732 quit = CMP(SLJIT_LESS_EQUAL, STR_END, 0, TMP1, 0);
3733 OP1(SLJIT_MOV, STR_END, 0, TMP1, 0);
3734 JUMPHERE(quit);
3735 }
3736 else
3737 OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
3738
3739 #if !(defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
3740 if (range_right >= 0)
3741 OP1(SLJIT_MOV, RETURN_ADDR, 0, SLJIT_IMM, (sljit_sw)update_table);
3742 #endif
3743
3744 start = LABEL();
3745 quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3746
3747 SLJIT_ASSERT(range_right >= 0 || offsets[0] >= 0);
3748
3749 if (range_right >= 0)
3750 {
3751 #if defined COMPILE_PCRE8 || (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN)
3752 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right));
3753 #else
3754 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right + 1) - 1);
3755 #endif
3756
3757 #if !(defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
3758 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(RETURN_ADDR, TMP1), 0);
3759 #else
3760 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)update_table);
3761 #endif
3762 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3763 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, start);
3764 }
3765
3766 if (offsets[0] >= 0)
3767 {
3768 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[0]));
3769 if (offsets[1] >= 0)
3770 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[1]));
3771 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3772
3773 if (chars[1] != 0)
3774 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[1]);
3775 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[0], start);
3776 if (offsets[2] >= 0)
3777 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[2] - 1));
3778
3779 if (offsets[1] >= 0)
3780 {
3781 if (chars[5] != 0)
3782 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, chars[5]);
3783 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, chars[4], start);
3784 }
3785
3786 if (offsets[2] >= 0)
3787 {
3788 if (chars[3] != 0)
3789 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[3]);
3790 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[2], start);
3791 }
3792 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3793 }
3794
3795 JUMPHERE(quit);
3796
3797 if (firstline)
3798 {
3799 if (range_right >= 0)
3800 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end);
3801 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3802 if (range_right >= 0)
3803 {
3804 quit = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
3805 OP1(SLJIT_MOV, STR_PTR, 0, TMP1, 0);
3806 JUMPHERE(quit);
3807 }
3808 }
3809 else
3810 OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
3811 return TRUE;
3812 }
3813
3814 #undef MAX_N_CHARS
3815 #undef MAX_N_BYTES
3816
3817 static SLJIT_INLINE void fast_forward_first_char(compiler_common *common, pcre_uchar first_char, BOOL caseless, BOOL firstline)
3818 {
3819 DEFINE_COMPILER;
3820 struct sljit_label *start;
3821 struct sljit_jump *quit;
3822 struct sljit_jump *found;
3823 pcre_uchar oc, bit;
3824
3825 if (firstline)
3826 {
3827 SLJIT_ASSERT(common->first_line_end != 0);
3828 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
3829 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end);
3830 }
3831
3832 start = LABEL();
3833 quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3834 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3835
3836 oc = first_char;
3837 if (caseless)
3838 {
3839 oc = TABLE_GET(first_char, common->fcc, first_char);
3840 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
3841 if (first_char > 127 && common->utf)
3842 oc = UCD_OTHERCASE(first_char);
3843 #endif
3844 }
3845 if (first_char == oc)
3846 found = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, first_char);
3847 else
3848 {
3849 bit = first_char ^ oc;
3850 if (is_powerof2(bit))
3851 {
3852 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, bit);
3853 found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, first_char | bit);
3854 }
3855 else
3856 {
3857 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, first_char);
3858 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
3859 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);
3860 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
3861 found = JUMP(SLJIT_NOT_ZERO);
3862 }
3863 }
3864
3865 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3866 JUMPTO(SLJIT_JUMP, start);
3867 JUMPHERE(found);
3868 JUMPHERE(quit);
3869
3870 if (firstline)
3871 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3872 }
3873
3874 static SLJIT_INLINE void fast_forward_newline(compiler_common *common, BOOL firstline)
3875 {
3876 DEFINE_COMPILER;
3877 struct sljit_label *loop;
3878 struct sljit_jump *lastchar;
3879 struct sljit_jump *firstchar;
3880 struct sljit_jump *quit;
3881 struct sljit_jump *foundcr = NULL;
3882 struct sljit_jump *notfoundnl;
3883 jump_list *newline = NULL;
3884
3885 if (firstline)
3886 {
3887 SLJIT_ASSERT(common->first_line_end != 0);
3888 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
3889 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end);
3890 }
3891
3892 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3893 {
3894 lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3895 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3896 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
3897 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
3898 firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
3899
3900 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2));
3901 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0);
3902 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_GREATER_EQUAL);
3903 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3904 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
3905 #endif
3906 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3907
3908 loop = LABEL();
3909 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3910 quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3911 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
3912 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
3913 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop);
3914 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop);
3915
3916 JUMPHERE(quit);
3917 JUMPHERE(firstchar);
3918 JUMPHERE(lastchar);
3919
3920 if (firstline)
3921 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3922 return;
3923 }
3924
3925 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3926 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
3927 firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
3928 skip_char_back(common);
3929
3930 loop = LABEL();
3931 common->ff_newline_shortcut = loop;
3932
3933 read_char_range(common, common->nlmin, common->nlmax, TRUE);
3934 lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3935 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
3936 foundcr = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
3937 check_newlinechar(common, common->nltype, &newline, FALSE);
3938 set_jumps(newline, loop);
3939
3940 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
3941 {
3942 quit = JUMP(SLJIT_JUMP);
3943 JUMPHERE(foundcr);
3944 notfoundnl = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3945 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3946 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
3947 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
3948 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3949 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
3950 #endif
3951 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3952 JUMPHERE(notfoundnl);
3953 JUMPHERE(quit);
3954 }
3955 JUMPHERE(lastchar);
3956 JUMPHERE(firstchar);
3957
3958 if (firstline)
3959 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3960 }
3961
3962 static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks);
3963
3964 static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, pcre_uint8 *start_bits, BOOL firstline)
3965 {
3966 DEFINE_COMPILER;
3967 struct sljit_label *start;
3968 struct sljit_jump *quit;
3969 struct sljit_jump *found = NULL;
3970 jump_list *matches = NULL;
3971 #ifndef COMPILE_PCRE8
3972 struct sljit_jump *jump;
3973 #endif
3974
3975 if (firstline)
3976 {
3977 SLJIT_ASSERT(common->first_line_end != 0);
3978 OP1(SLJIT_MOV, RETURN_ADDR, 0, STR_END, 0);
3979 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end);
3980 }
3981
3982 start = LABEL();
3983 quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3984 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3985 #ifdef SUPPORT_UTF
3986 if (common->utf)
3987 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
3988 #endif
3989
3990 if (!check_class_ranges(common, start_bits, (start_bits[31] & 0x80) != 0, TRUE, &matches))
3991 {
3992 #ifndef COMPILE_PCRE8
3993 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 255);
3994 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 255);
3995 JUMPHERE(jump);
3996 #endif
3997 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
3998 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
3999 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)start_bits);
4000 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4001 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4002 found = JUMP(SLJIT_NOT_ZERO);
4003 }
4004
4005 #ifdef SUPPORT_UTF
4006 if (common->utf)
4007 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
4008 #endif
4009 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4010 #ifdef SUPPORT_UTF
4011 #if defined COMPILE_PCRE8
4012 if (common->utf)
4013 {
4014 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start);
4015 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4016 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4017 }
4018 #elif defined COMPILE_PCRE16
4019 if (common->utf)
4020 {
4021 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800, start);
4022 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
4023 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4024 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
4025 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4026 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4027 }
4028 #endif /* COMPILE_PCRE[8|16] */
4029 #endif /* SUPPORT_UTF */
4030 JUMPTO(SLJIT_JUMP, start);
4031 if (found != NULL)
4032 JUMPHERE(found);
4033 if (matches != NULL)
4034 set_jumps(matches, LABEL());
4035 JUMPHERE(quit);
4036
4037 if (firstline)
4038 OP1(SLJIT_MOV, STR_END, 0, RETURN_ADDR, 0);
4039 }
4040
4041 static SLJIT_INLINE struct sljit_jump *search_requested_char(compiler_common *common, pcre_uchar req_char, BOOL caseless, BOOL has_firstchar)
4042 {
4043 DEFINE_COMPILER;
4044 struct sljit_label *loop;
4045 struct sljit_jump *toolong;
4046 struct sljit_jump *alreadyfound;
4047 struct sljit_jump *found;
4048 struct sljit_jump *foundoc = NULL;
4049 struct sljit_jump *notfound;
4050 pcre_uint32 oc, bit;
4051
4052 SLJIT_ASSERT(common->req_char_ptr != 0);
4053 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr);
4054 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, REQ_BYTE_MAX);
4055 toolong = CMP(SLJIT_LESS, TMP1, 0, STR_END, 0);
4056 alreadyfound = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0);
4057
4058 if (has_firstchar)
4059 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4060 else
4061 OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);
4062
4063 loop = LABEL();
4064 notfound = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0);
4065
4066 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0);
4067 oc = req_char;
4068 if (caseless)
4069 {
4070 oc = TABLE_GET(req_char, common->fcc, req_char);
4071 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
4072 if (req_char > 127 && common->utf)
4073 oc = UCD_OTHERCASE(req_char);
4074 #endif
4075 }
4076 if (req_char == oc)
4077 found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
4078 else
4079 {
4080 bit = req_char ^ oc;
4081 if (is_powerof2(bit))
4082 {
4083 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);
4084 found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit);
4085 }
4086 else
4087 {
4088 found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
4089 foundoc = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, oc);
4090 }
4091 }
4092 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
4093 JUMPTO(SLJIT_JUMP, loop);
4094
4095 JUMPHERE(found);
4096 if (foundoc)
4097 JUMPHERE(foundoc);
4098 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr, TMP1, 0);
4099 JUMPHERE(alreadyfound);
4100 JUMPHERE(toolong);
4101 return notfound;
4102 }
4103
4104 static void do_revertframes(compiler_common *common)
4105 {
4106 DEFINE_COMPILER;
4107 struct sljit_jump *jump;
4108 struct sljit_label *mainloop;
4109
4110 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4111 OP1(SLJIT_MOV, TMP1, 0, STACK_TOP, 0);
4112 GET_LOCAL_BASE(TMP3, 0, 0);
4113
4114 /* Drop frames until we reach STACK_TOP. */
4115 mainloop = LABEL();
4116 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), 0);
4117 OP2(SLJIT_SUB | SLJIT_SET_S, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0);
4118 jump = JUMP(SLJIT_SIG_LESS_EQUAL);
4119
4120 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
4121 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw));
4122 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), SLJIT_MEM1(TMP1), 2 * sizeof(sljit_sw));
4123 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
4124 JUMPTO(SLJIT_JUMP, mainloop);
4125
4126 JUMPHERE(jump);
4127 jump = JUMP(SLJIT_SIG_LESS);
4128 /* End of dropping frames. */
4129 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4130
4131 JUMPHERE(jump);
4132 OP1(SLJIT_NEG, TMP2, 0, TMP2, 0);
4133 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
4134 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw));
4135 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_sw));
4136 JUMPTO(SLJIT_JUMP, mainloop);
4137 }
4138
4139 static void check_wordboundary(compiler_common *common)
4140 {
4141 DEFINE_COMPILER;
4142 struct sljit_jump *skipread;
4143 jump_list *skipread_list = NULL;
4144 #if !(defined COMPILE_PCRE8) || defined SUPPORT_UTF
4145 struct sljit_jump *jump;
4146 #endif
4147
4148 SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16);
4149
4150 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
4151 /* Get type of the previous char, and put it to LOCALS1. */
4152 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4153 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4154 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, SLJIT_IMM, 0);
4155 skipread = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
4156 skip_char_back(common);
4157 check_start_used_ptr(common);
4158 read_char(common);
4159
4160 /* Testing char type. */
4161 #ifdef SUPPORT_UCP
4162 if (common->use_ucp)
4163 {
4164 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
4165 jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
4166 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4167 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
4168 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
4169 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
4170 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
4171 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
4172 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
4173 JUMPHERE(jump);
4174 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP2, 0);
4175 }
4176 else
4177 #endif
4178 {
4179 #ifndef COMPILE_PCRE8
4180 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4181 #elif defined SUPPORT_UTF
4182 /* Here LOCALS1 has already been zeroed. */
4183 jump = NULL;
4184 if (common->utf)
4185 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4186 #endif /* COMPILE_PCRE8 */
4187 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);
4188 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */);
4189 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4190 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP1, 0);
4191 #ifndef COMPILE_PCRE8
4192 JUMPHERE(jump);
4193 #elif defined SUPPORT_UTF
4194 if (jump != NULL)
4195 JUMPHERE(jump);
4196 #endif /* COMPILE_PCRE8 */
4197 }
4198 JUMPHERE(skipread);
4199
4200 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
4201 check_str_end(common, &skipread_list);
4202 peek_char(common, READ_CHAR_MAX);
4203
4204 /* Testing char type. This is a code duplication. */
4205 #ifdef SUPPORT_UCP
4206 if (common->use_ucp)
4207 {
4208 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
4209 jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
4210 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4211 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
4212 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
4213 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
4214 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
4215 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
4216 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
4217 JUMPHERE(jump);
4218 }
4219 else
4220 #endif
4221 {
4222 #ifndef COMPILE_PCRE8
4223 /* TMP2 may be destroyed by peek_char. */
4224 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
4225 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4226 #elif defined SUPPORT_UTF
4227 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
4228 jump = NULL;
4229 if (common->utf)
4230 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4231 #endif
4232 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes);
4233 OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */);
4234 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
4235 #ifndef COMPILE_PCRE8
4236 JUMPHERE(jump);
4237 #elif defined SUPPORT_UTF
4238 if (jump != NULL)
4239 JUMPHERE(jump);
4240 #endif /* COMPILE_PCRE8 */
4241 }
4242 set_jumps(skipread_list, LABEL());
4243
4244 OP2(SLJIT_XOR | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
4245 sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
4246 }
4247
4248 static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
4249 {
4250 DEFINE_COMPILER;
4251 int ranges[MAX_RANGE_SIZE];
4252 pcre_uint8 bit, cbit, all;
4253 int i, byte, length = 0;
4254
4255 bit = bits[0] & 0x1;
4256 /* All bits will be zero or one (since bit is zero or one). */
4257 all = -bit;
4258
4259 for (i = 0; i < 256; )
4260 {
4261 byte = i >> 3;
4262 if ((i & 0x7) == 0 && bits[byte] == all)
4263 i += 8;
4264 else
4265 {
4266 cbit = (bits[byte] >> (i & 0x7)) & 0x1;
4267 if (cbit != bit)
4268 {
4269 if (length >= MAX_RANGE_SIZE)
4270 return FALSE;
4271 ranges[length] = i;
4272 length++;
4273 bit = cbit;
4274 all = -cbit;
4275 }
4276 i++;
4277 }
4278 }
4279
4280 if (((bit == 0) && nclass) || ((bit == 1) && !nclass))
4281 {
4282 if (length >= MAX_RANGE_SIZE)
4283 return FALSE;
4284 ranges[length] = 256;
4285 length++;
4286 }
4287
4288 if (length < 0 || length > 4)
4289 return FALSE;
4290
4291 bit = bits[0] & 0x1;
4292 if (invert) bit ^= 0x1;
4293
4294 /* No character is accepted. */
4295 if (length == 0 && bit == 0)
4296 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4297
4298 switch(length)
4299 {
4300 case 0:
4301 /* When bit != 0, all characters are accepted. */
4302 return TRUE;
4303
4304 case 1:
4305 add_jump(compiler, backtracks, CMP(bit == 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
4306 return TRUE;
4307
4308 case 2:
4309 if (ranges[0] + 1 != ranges[1])
4310 {
4311 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
4312 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
4313 }
4314 else
4315 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
4316 return TRUE;
4317
4318 case 3:
4319 if (bit != 0)
4320 {
4321 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
4322 if (ranges[0] + 1 != ranges[1])
4323 {
4324 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
4325 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
4326 }
4327 else
4328 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
4329 return TRUE;
4330 }
4331
4332 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[0]));
4333 if (ranges[1] + 1 != ranges[2])
4334 {
4335 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1]);
4336 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
4337 }
4338 else
4339 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1]));
4340 return TRUE;
4341
4342 case 4:
4343 if ((ranges[1] - ranges[0]) == (ranges[3] - ranges[2])
4344 && (ranges[0] | (ranges[2] - ranges[0])) == ranges[2]
4345 && (ranges[1] & (ranges[2] - ranges[0])) == 0
4346 && is_powerof2(ranges[2] - ranges[0]))
4347 {
4348 SLJIT_ASSERT((ranges[0] & (ranges[2] - ranges[0])) == 0 && (ranges[2] & ranges[3] & (ranges[2] - ranges[0])) != 0);
4349 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[0]);
4350 if (ranges[2] + 1 != ranges[3])
4351 {
4352 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);
4353 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
4354 }
4355 else
4356 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
4357 return TRUE;
4358 }
4359
4360 if (bit != 0)
4361 {
4362 i = 0;
4363 if (ranges[0] + 1 != ranges[1])
4364 {
4365 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
4366 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
4367 i = ranges[0];
4368 }
4369 else
4370 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
4371
4372 if (ranges[2] + 1 != ranges[3])
4373 {
4374 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - i);
4375 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
4376 }
4377 else
4378 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2] - i));
4379 return TRUE;
4380 }
4381
4382 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
4383 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[0]));
4384 if (ranges[1] + 1 != ranges[2])
4385 {
4386 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]);
4387 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
4388 }
4389 else
4390 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
4391 return TRUE;
4392
4393 default:
4394 SLJIT_ASSERT_STOP();
4395 return FALSE;
4396 }
4397 }
4398
4399 static void check_anynewline(compiler_common *common)
4400 {
4401 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
4402 DEFINE_COMPILER;
4403
4404 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4405
4406 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
4407 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
4408 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
4409 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
4410 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4411 #ifdef COMPILE_PCRE8
4412 if (common->utf)
4413 {
4414 #endif
4415 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4416 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
4417 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
4418 #ifdef COMPILE_PCRE8
4419 }
4420 #endif
4421 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
4422 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4423 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4424 }
4425
4426 static void check_hspace(compiler_common *common)
4427 {
4428 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
4429 DEFINE_COMPILER;
4430
4431 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4432
4433 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x09);
4434 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
4435 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
4436 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4437 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xa0);
4438 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4439 #ifdef COMPILE_PCRE8
4440 if (common->utf)
4441 {
4442 #endif
4443 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4444 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x1680);
4445 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4446 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e);
4447 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4448 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000);
4449 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000);
4450 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
4451 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000);
4452 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4453 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000);
4454 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4455 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000);
4456 #ifdef COMPILE_PCRE8
4457 }
4458 #endif
4459 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
4460 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4461
4462 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4463 }
4464
4465 static void check_vspace(compiler_common *common)
4466 {
4467 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
4468 DEFINE_COMPILER;
4469
4470 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4471
4472 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
4473 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
4474 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
4475 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
4476 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4477 #ifdef COMPILE_PCRE8
4478 if (common->utf)
4479 {
4480 #endif
4481 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4482 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
4483 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
4484 #ifdef COMPILE_PCRE8
4485 }
4486 #endif
4487 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
4488 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4489
4490 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4491 }
4492
4493 #define CHAR1 STR_END
4494 #define CHAR2 STACK_TOP
4495
4496 static void do_casefulcmp(compiler_common *common)
4497 {
4498 DEFINE_COMPILER;
4499 struct sljit_jump *jump;
4500 struct sljit_label *label;
4501
4502 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4503 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4504 OP1(SLJIT_MOV, TMP3, 0, CHAR1, 0);
4505 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, CHAR2, 0);
4506 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
4507 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4508
4509 label = LABEL();
4510 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
4511 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4512 jump = CMP(SLJIT_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
4513 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
4514 JUMPTO(SLJIT_NOT_ZERO, label);
4515
4516 JUMPHERE(jump);
4517 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4518 OP1(SLJIT_MOV, CHAR1, 0, TMP3, 0);
4519 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
4520 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4521 }
4522
4523 #define LCC_TABLE STACK_LIMIT
4524
4525 static void do_caselesscmp(compiler_common *common)
4526 {
4527 DEFINE_COMPILER;
4528 struct sljit_jump *jump;
4529 struct sljit_label *label;
4530
4531 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4532 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4533
4534 OP1(SLJIT_MOV, TMP3, 0, LCC_TABLE, 0);
4535 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, CHAR1, 0);
4536 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, CHAR2, 0);
4537 OP1(SLJIT_MOV, LCC_TABLE, 0, SLJIT_IMM, common->lcc);
4538 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
4539 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4540
4541 label = LABEL();
4542 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
4543 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4544 #ifndef COMPILE_PCRE8
4545 jump = CMP(SLJIT_GREATER, CHAR1, 0, SLJIT_IMM, 255);
4546 #endif
4547 OP1(SLJIT_MOV_UB, CHAR1, 0, SLJIT_MEM2(LCC_TABLE, CHAR1), 0);
4548 #ifndef COMPILE_PCRE8
4549 JUMPHERE(jump);
4550 jump = CMP(SLJIT_GREATER, CHAR2, 0, SLJIT_IMM, 255);
4551 #endif
4552 OP1(SLJIT_MOV_UB, CHAR2, 0, SLJIT_MEM2(LCC_TABLE, CHAR2), 0);
4553 #ifndef COMPILE_PCRE8
4554 JUMPHERE(jump);
4555 #endif
4556 jump = CMP(SLJIT_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
4557 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
4558 JUMPTO(SLJIT_NOT_ZERO, label);
4559
4560 JUMPHERE(jump);
4561 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4562 OP1(SLJIT_MOV, LCC_TABLE, 0, TMP3, 0);
4563 OP1(SLJIT_MOV, CHAR1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
4564 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
4565 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4566 }
4567
4568 #undef LCC_TABLE
4569 #undef CHAR1
4570 #undef CHAR2
4571
4572 #if defined SUPPORT_UTF && defined SUPPORT_UCP
4573
4574 static const pcre_uchar * SLJIT_CALL do_utf_caselesscmp(pcre_uchar *src1, jit_arguments *args, pcre_uchar *end1)
4575 {
4576 /* This function would be ineffective to do in JIT level. */
4577 pcre_uint32 c1, c2;
4578 const pcre_uchar *src2 = args->uchar_ptr;
4579 const pcre_uchar *end2 = args->end;
4580 const ucd_record *ur;
4581 const pcre_uint32 *pp;
4582
4583 while (src1 < end1)
4584 {
4585 if (src2 >= end2)
4586 return (pcre_uchar*)1;
4587 GETCHARINC(c1, src1);
4588 GETCHARINC(c2, src2);
4589 ur = GET_UCD(c2);
4590 if (c1 != c2 && c1 != c2 + ur->other_case)
4591 {
4592 pp = PRIV(ucd_caseless_sets) + ur->caseset;
4593 for (;;)
4594 {
4595 if (c1 < *pp) return NULL;
4596 if (c1 == *pp++) break;
4597 }
4598 }
4599 }
4600 return src2;
4601 }
4602
4603 #endif /* SUPPORT_UTF && SUPPORT_UCP */
4604
4605 static pcre_uchar *byte_sequence_compare(compiler_common *common, BOOL caseless, pcre_uchar *cc,
4606 compare_context *context, jump_list **backtracks)
4607 {
4608 DEFINE_COMPILER;
4609 unsigned int othercasebit = 0;
4610 pcre_uchar *othercasechar = NULL;
4611 #ifdef SUPPORT_UTF
4612 int utflength;
4613 #endif
4614
4615 if (caseless && char_has_othercase(common, cc))
4616 {
4617 othercasebit = char_get_othercase_bit(common, cc);
4618 SLJIT_ASSERT(othercasebit);
4619 /* Extracting bit difference info. */
4620 #if defined COMPILE_PCRE8
4621 othercasechar = cc + (othercasebit >> 8);
4622 othercasebit &= 0xff;
4623 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4624 /* Note that this code only handles characters in the BMP. If there
4625 ever are characters outside the BMP whose othercase differs in only one
4626 bit from itself (there currently are none), this code will need to be
4627 revised for COMPILE_PCRE32. */
4628 othercasechar = cc + (othercasebit >> 9);
4629 if ((othercasebit & 0x100) != 0)
4630 othercasebit = (othercasebit & 0xff) << 8;
4631 else
4632 othercasebit &= 0xff;
4633 #endif /* COMPILE_PCRE[8|16|32] */
4634 }
4635
4636 if (context->sourcereg == -1)
4637 {
4638 #if defined COMPILE_PCRE8
4639 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
4640 if (context->length >= 4)
4641 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4642 else if (context->length >= 2)
4643 OP1(SLJIT_MOV_UH, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4644 else
4645 #endif
4646 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4647 #elif defined COMPILE_PCRE16
4648 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
4649 if (context->length >= 4)
4650 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4651 else
4652 #endif
4653 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4654 #elif defined COMPILE_PCRE32
4655 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4656 #endif /* COMPILE_PCRE[8|16|32] */
4657 context->sourcereg = TMP2;
4658 }
4659
4660 #ifdef SUPPORT_UTF
4661 utflength = 1;
4662 if (common->utf && HAS_EXTRALEN(*cc))
4663 utflength += GET_EXTRALEN(*cc);
4664
4665 do
4666 {
4667 #endif
4668
4669 context->length -= IN_UCHARS(1);
4670 #if (defined SLJIT_UNALIGNED && SLJIT_UNALIGNED) && (defined COMPILE_PCRE8 || defined COMPILE_PCRE16)
4671
4672 /* Unaligned read is supported. */
4673 if (othercasebit != 0 && othercasechar == cc)
4674 {
4675 context->c.asuchars[context->ucharptr] = *cc | othercasebit;
4676 context->oc.asuchars[context->ucharptr] = othercasebit;
4677 }
4678 else
4679 {
4680 context->c.asuchars[context->ucharptr] = *cc;
4681 context->oc.asuchars[context->ucharptr] = 0;
4682 }
4683 context->ucharptr++;
4684
4685 #if defined COMPILE_PCRE8
4686 if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1))
4687 #else
4688 if (context->ucharptr >= 2 || context->length == 0)
4689 #endif
4690 {
4691 if (context->length >= 4)
4692 OP1(SLJIT_MOV_SI, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
4693 else if (context->length >= 2)
4694 OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
4695 #if defined COMPILE_PCRE8
4696 else if (context->length >= 1)
4697 OP1(SLJIT_MOV_UB, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
4698 #endif /* COMPILE_PCRE8 */
4699 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
4700
4701 switch(context->ucharptr)
4702 {
4703 case 4 / sizeof(pcre_uchar):
4704 if (context->oc.asint != 0)
4705 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint);
4706 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint));
4707 break;
4708
4709 case 2 / sizeof(pcre_uchar):
4710 if (context->oc.asushort != 0)
4711 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort);
4712 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort));
4713 break;
4714
4715 #ifdef COMPILE_PCRE8
4716 case 1:
4717 if (context->oc.asbyte != 0)
4718 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte);
4719 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte));
4720 break;
4721 #endif
4722
4723 default:
4724 SLJIT_ASSERT_STOP();
4725 break;
4726 }
4727 context->ucharptr = 0;
4728 }
4729
4730 #else
4731
4732 /* Unaligned read is unsupported or in 32 bit mode. */
4733 if (context->length >= 1)
4734 OP1(MOV_UCHAR, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
4735
4736 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
4737
4738 if (othercasebit != 0 && othercasechar == cc)
4739 {
4740 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit);
4741 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit));
4742 }
4743 else
4744 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc));
4745
4746 #endif
4747
4748 cc++;
4749 #ifdef SUPPORT_UTF
4750 utflength--;
4751 }
4752 while (utflength > 0);
4753 #endif
4754
4755 return cc;
4756 }
4757
4758 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
4759
4760 #define SET_TYPE_OFFSET(value) \
4761 if ((value) != typeoffset) \
4762 { \
4763 if ((value) < typeoffset) \
4764 OP2(SLJIT_ADD, typereg, 0, typereg, 0, SLJIT_IMM, typeoffset - (value)); \
4765 else \
4766 OP2(SLJIT_SUB, typereg, 0, typereg, 0, SLJIT_IMM, (value) - typeoffset); \
4767 } \
4768 typeoffset = (value);
4769
4770 #define SET_CHAR_OFFSET(value) \
4771 if ((value) != charoffset) \
4772 { \
4773 if ((value) < charoffset) \
4774 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(charoffset - (value))); \
4775 else \
4776 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)((value) - charoffset)); \
4777 } \
4778 charoffset = (value);
4779
4780 static void compile_xclass_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
4781 {
4782 DEFINE_COMPILER;
4783 jump_list *found = NULL;
4784 jump_list **list = (cc[0] & XCL_NOT) == 0 ? &found : backtracks;
4785 sljit_uw c, charoffset, max = 256, min = READ_CHAR_MAX;
4786 struct sljit_jump *jump = NULL;
4787 pcre_uchar *ccbegin;
4788 int compares, invertcmp, numberofcmps;
4789 #if defined SUPPORT_UTF && (defined COMPILE_PCRE8 || defined COMPILE_PCRE16)
4790 BOOL utf = common->utf;
4791 #endif
4792
4793 #ifdef SUPPORT_UCP
4794 BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE;
4795 BOOL charsaved = FALSE;
4796 int typereg = TMP1, scriptreg = TMP1;
4797 const pcre_uint32 *other_cases;
4798 sljit_uw typeoffset;
4799 #endif
4800
4801 /* Scanning the necessary info. */
4802 cc++;
4803 ccbegin = cc;
4804 compares = 0;
4805 if (cc[-1] & XCL_MAP)
4806 {
4807 min = 0;
4808 cc += 32 / sizeof(pcre_uchar);
4809 }
4810
4811 while (*cc != XCL_END)
4812 {
4813 compares++;
4814 if (*cc == XCL_SINGLE)
4815 {
4816 cc ++;
4817 GETCHARINCTEST(c, cc);
4818 if (c > max) max = c;
4819 if (c < min) min = c;
4820 #ifdef SUPPORT_UCP
4821 needschar = TRUE;
4822 #endif
4823 }
4824 else if (*cc == XCL_RANGE)
4825 {
4826 cc ++;
4827 GETCHARINCTEST(c, cc);
4828 if (c < min) min = c;
4829 GETCHARINCTEST(c, cc);
4830 if (c > max) max = c;
4831 #ifdef SUPPORT_UCP
4832 needschar = TRUE;
4833 #endif
4834 }
4835 #ifdef SUPPORT_UCP
4836 else
4837 {
4838 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
4839 cc++;
4840 if (*cc == PT_CLIST)
4841 {
4842 other_cases = PRIV(ucd_caseless_sets) + cc[1];
4843 while (*other_cases != NOTACHAR)
4844 {
4845 if (*other_cases > max) max = *other_cases;
4846 if (*other_cases < min) min = *other_cases;
4847 other_cases++;
4848 }
4849 }
4850 else
4851 {
4852 max = READ_CHAR_MAX;
4853 min = 0;
4854 }
4855
4856 switch(*cc)
4857 {
4858 case PT_ANY:
4859 break;
4860
4861 case PT_LAMP:
4862 case PT_GC:
4863 case PT_PC:
4864 case PT_ALNUM:
4865 needstype = TRUE;
4866 break;
4867
4868 case PT_SC:
4869 needsscript = TRUE;
4870 break;
4871
4872 case PT_SPACE:
4873 case PT_PXSPACE:
4874 case PT_WORD:
4875 case PT_PXGRAPH:
4876 case PT_PXPRINT:
4877 case PT_PXPUNCT:
4878 needstype = TRUE;
4879 needschar = TRUE;
4880 break;
4881
4882 case PT_CLIST:
4883 case PT_UCNC:
4884 needschar = TRUE;
4885 break;
4886
4887 default:
4888 SLJIT_ASSERT_STOP();
4889 break;
4890 }
4891 cc += 2;
4892 }
4893 #endif
4894 }
4895
4896 /* We are not necessary in utf mode even in 8 bit mode. */
4897 cc = ccbegin;
4898 read_char_range(common, min, max, (cc[-1] & XCL_NOT) != 0);
4899
4900 if ((cc[-1] & XCL_HASPROP) == 0)
4901 {
4902 if ((cc[-1] & XCL_MAP) != 0)
4903 {
4904 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4905 if (!check_class_ranges(common, (const sljit_ub *)cc, (((const sljit_ub *)cc)[31] & 0x80) != 0, TRUE, &found))
4906 {
4907 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
4908 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
4909 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
4910 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4911 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4912 add_jump(compiler, &found, JUMP(SLJIT_NOT_ZERO));
4913 }
4914
4915 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4916 JUMPHERE(jump);
4917
4918 cc += 32 / sizeof(pcre_uchar);
4919 }
4920 else
4921 {
4922 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, min);
4923 add_jump(compiler, (cc[-1] & XCL_NOT) == 0 ? backtracks : &found, CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, max - min));
4924 }
4925 }
4926 else if ((cc[-1] & XCL_MAP) != 0)
4927 {
4928 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
4929 #ifdef SUPPORT_UCP
4930 charsaved = TRUE;
4931 #endif
4932 if (!check_class_ranges(common, (const pcre_uint8 *)cc, FALSE, TRUE, list))
4933 {
4934 #ifdef COMPILE_PCRE8
4935 jump = NULL;
4936 if (common->utf)
4937 #endif
4938 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4939
4940 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
4941 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
4942 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
4943 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4944 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4945 add_jump(compiler, list, JUMP(SLJIT_NOT_ZERO));
4946
4947 #ifdef COMPILE_PCRE8
4948 if (common->utf)
4949 #endif
4950 JUMPHERE(jump);
4951 }
4952
4953 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
4954 cc += 32 / sizeof(pcre_uchar);
4955 }
4956
4957 #ifdef SUPPORT_UCP
4958 /* Simple register allocation. TMP1 is preferred if possible. */
4959 if (needstype || needsscript)
4960 {
4961 if (needschar && !charsaved)
4962 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
4963 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4964 if (needschar)
4965 {
4966 if (needstype)
4967 {
4968 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
4969 typereg = RETURN_ADDR;
4970 }
4971
4972 if (needsscript)
4973 scriptreg = TMP3;
4974 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
4975 }
4976 else if (needstype && needsscript)
4977 scriptreg = TMP3;
4978 /* In all other cases only one of them was specified, and that can goes to TMP1. */
4979
4980 if (needsscript)
4981 {
4982 if (scriptreg == TMP1)
4983 {
4984 OP1(SLJIT_MOV, scriptreg, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
4985 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM2(scriptreg, TMP2), 3);
4986 }
4987 else
4988 {
4989 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
4990 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
4991 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM1(TMP2), 0);
4992 }
4993 }
4994 }
4995 #endif
4996
4997 /* Generating code. */
4998 charoffset = 0;
4999 numberofcmps = 0;
5000 #ifdef SUPPORT_UCP
5001 typeoffset = 0;
5002 #endif
5003
5004 while (*cc != XCL_END)
5005 {
5006 compares--;
5007 invertcmp = (compares == 0 && list != backtracks);
5008 jump = NULL;
5009
5010 if (*cc == XCL_SINGLE)
5011 {
5012 cc ++;
5013 GETCHARINCTEST(c, cc);
5014
5015 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
5016 {
5017 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5018 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_EQUAL);
5019 numberofcmps++;
5020 }
5021 else if (numberofcmps > 0)
5022 {
5023 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5024 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5025 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5026 numberofcmps = 0;
5027 }
5028 else
5029 {
5030 jump = CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5031 numberofcmps = 0;
5032 }
5033 }
5034 else if (*cc == XCL_RANGE)
5035 {
5036 cc ++;
5037 GETCHARINCTEST(c, cc);
5038 SET_CHAR_OFFSET(c);
5039 GETCHARINCTEST(c, cc);
5040
5041 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
5042 {
5043 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5044 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_LESS_EQUAL);
5045 numberofcmps++;
5046 }
5047 else if (numberofcmps > 0)
5048 {
5049 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5050 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
5051 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5052 numberofcmps = 0;
5053 }
5054 else
5055 {
5056 jump = CMP(SLJIT_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5057 numberofcmps = 0;
5058 }
5059 }
5060 #ifdef SUPPORT_UCP
5061 else
5062 {
5063 if (*cc == XCL_NOTPROP)
5064 invertcmp ^= 0x1;
5065 cc++;
5066 switch(*cc)
5067 {
5068 case PT_ANY:
5069 if (list != backtracks)
5070 {
5071 if ((cc[-1] == XCL_NOTPROP && compares > 0) || (cc[-1] == XCL_PROP && compares == 0))
5072 continue;
5073 }
5074 else if (cc[-1] == XCL_NOTPROP)
5075 continue;
5076 jump = JUMP(SLJIT_JUMP);
5077 break;
5078
5079 case PT_LAMP:
5080 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - typeoffset);
5081 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5082 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ll - typeoffset);
5083 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5084 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lt - typeoffset);
5085 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5086 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5087 break;
5088
5089 case PT_GC:
5090 c = PRIV(ucp_typerange)[(int)cc[1] * 2];
5091 SET_TYPE_OFFSET(c);
5092 jump = CMP(SLJIT_LESS_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, PRIV(ucp_typerange)[(int)cc[1] * 2 + 1] - c);
5093 break;
5094
5095 case PT_PC:
5096 jump = CMP(SLJIT_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, (int)cc[1] - typeoffset);
5097 break;
5098
5099 case PT_SC:
5100 jump = CMP(SLJIT_EQUAL ^ invertcmp, scriptreg, 0, SLJIT_IMM, (int)cc[1]);
5101 break;
5102
5103 case PT_SPACE:
5104 case PT_PXSPACE:
5105 SET_CHAR_OFFSET(9);
5106 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd - 0x9);
5107 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
5108
5109 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x9);
5110 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5111
5112 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x9);
5113 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5114
5115 SET_TYPE_OFFSET(ucp_Zl);
5116 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl);
5117 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
5118 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5119 break;
5120
5121 case PT_WORD:
5122 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_UNDERSCORE - charoffset));
5123 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5124 /* Fall through. */
5125
5126 case PT_ALNUM:
5127 SET_TYPE_OFFSET(ucp_Ll);
5128 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
5129 OP_FLAGS((*cc == PT_ALNUM) ? SLJIT_MOV : SLJIT_OR, TMP2, 0, (*cc == PT_ALNUM) ? SLJIT_UNUSED : TMP2, 0, SLJIT_LESS_EQUAL);
5130 SET_TYPE_OFFSET(ucp_Nd);
5131 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_No - ucp_Nd);
5132 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
5133 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5134 break;
5135
5136 case PT_CLIST:
5137 other_cases = PRIV(ucd_caseless_sets) + cc[1];
5138
5139 /* At least three characters are required.
5140 Otherwise this case would be handled by the normal code path. */
5141 SLJIT_ASSERT(other_cases[0] != NOTACHAR && other_cases[1] != NOTACHAR && other_cases[2] != NOTACHAR);
5142 SLJIT_ASSERT(other_cases[0] < other_cases[1] && other_cases[1] < other_cases[2]);
5143
5144 /* Optimizing character pairs, if their difference is power of 2. */
5145 if (is_powerof2(other_cases[1] ^ other_cases[0]))
5146 {
5147 if (charoffset == 0)
5148 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
5149 else
5150 {
5151 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
5152 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
5153 }
5154 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[1]);
5155 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5156 other_cases += 2;
5157 }
5158 else if (is_powerof2(other_cases[2] ^ other_cases[1]))
5159 {
5160 if (charoffset == 0)
5161 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[2] ^ other_cases[1]);
5162 else
5163 {
5164 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
5165 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
5166 }
5167 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[2]);
5168 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5169
5170 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(other_cases[0] - charoffset));
5171 OP_FLAGS(SLJIT_OR | ((other_cases[3] == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5172
5173 other_cases += 3;
5174 }
5175 else
5176 {
5177 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
5178 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5179 }
5180
5181 while (*other_cases != NOTACHAR)
5182 {
5183 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
5184 OP_FLAGS(SLJIT_OR | ((*other_cases == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5185 }
5186 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5187 break;
5188
5189 case PT_UCNC:
5190 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_DOLLAR_SIGN - charoffset));
5191 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5192 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_COMMERCIAL_AT - charoffset));
5193 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5194 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_GRAVE_ACCENT - charoffset));
5195 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5196
5197 SET_CHAR_OFFSET(0xa0);
5198 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(0xd7ff - charoffset));
5199 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
5200 SET_CHAR_OFFSET(0);
5201 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xe000 - 0);
5202 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_GREATER_EQUAL);
5203 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5204 break;
5205
5206 case PT_PXGRAPH:
5207 /* C and Z groups are the farthest two groups. */
5208 SET_TYPE_OFFSET(ucp_Ll);
5209 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
5210 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_GREATER);
5211
5212 jump = CMP(SLJIT_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
5213
5214 /* In case of ucp_Cf, we overwrite the result. */
5215 SET_CHAR_OFFSET(0x2066);
5216 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
5217 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
5218
5219 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
5220 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5221
5222 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x2066);
5223 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5224
5225 JUMPHERE(jump);
5226 jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
5227 break;
5228
5229 case PT_PXPRINT:
5230 /* C and Z groups are the farthest two groups. */
5231 SET_TYPE_OFFSET(ucp_Ll);
5232 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
5233 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_GREATER);
5234
5235 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Ll);
5236 OP_FLAGS(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_NOT_EQUAL);
5237
5238 jump = CMP(SLJIT_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
5239
5240 /* In case of ucp_Cf, we overwrite the result. */
5241 SET_CHAR_OFFSET(0x2066);
5242 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
5243 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
5244
5245 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
5246 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5247
5248 JUMPHERE(jump);
5249 jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
5250 break;
5251
5252 case PT_PXPUNCT:
5253 SET_TYPE_OFFSET(ucp_Sc);
5254 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Sc);
5255 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
5256
5257 SET_CHAR_OFFSET(0);
5258 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x7f);
5259 OP_FLAGS(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
5260
5261 SET_TYPE_OFFSET(ucp_Pc);
5262 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ps - ucp_Pc);
5263 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
5264 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5265 break;
5266 }
5267 cc += 2;
5268 }
5269 #endif
5270
5271 if (jump != NULL)
5272 add_jump(compiler, compares > 0 ? list : backtracks, jump);
5273 }
5274
5275 if (found != NULL)
5276 set_jumps(found, LABEL());
5277 }
5278
5279 #undef SET_TYPE_OFFSET
5280 #undef SET_CHAR_OFFSET
5281
5282 #endif
5283
5284 static pcre_uchar *compile_simple_assertion_matchingpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **backtracks)
5285 {
5286 DEFINE_COMPILER;
5287 int length;
5288 struct sljit_jump *jump[4];
5289 #ifdef SUPPORT_UTF
5290 struct sljit_label *label;
5291 #endif /* SUPPORT_UTF */
5292
5293 switch(type)
5294 {
5295 case OP_SOD:
5296 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5297 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
5298 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
5299 return cc;
5300
5301 case OP_SOM:
5302 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5303 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
5304 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
5305 return cc;
5306
5307 case OP_NOT_WORD_BOUNDARY:
5308 case OP_WORD_BOUNDARY:
5309 add_jump(compiler, &common->wordboundary, JUMP(SLJIT_FAST_CALL));
5310 add_jump(compiler, backtracks, JUMP(type == OP_NOT_WORD_BOUNDARY ? SLJIT_NOT_ZERO : SLJIT_ZERO));
5311 return cc;
5312
5313 case OP_EODN:
5314 /* Requires rather complex checks. */
5315 jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5316 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
5317 {
5318 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
5319 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5320 if (common->mode == JIT_COMPILE)
5321 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0));
5322 else
5323 {
5324 jump[1] = CMP(SLJIT_EQUAL, TMP2, 0, STR_END, 0);
5325 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
5326 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS);
5327 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
5328 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_NOT_EQUAL);
5329 add_jump(compiler, backtracks, JUMP(SLJIT_NOT_EQUAL));
5330 check_partial(common, TRUE);
5331 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5332 JUMPHERE(jump[1]);
5333 }
5334 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
5335 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
5336 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
5337 }
5338 else if (common->nltype == NLTYPE_FIXED)
5339 {
5340 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5341 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5342 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0));
5343 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
5344 }
5345 else
5346 {
5347 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5348 jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
5349 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
5350 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
5351 jump[2] = JUMP(SLJIT_GREATER);
5352 add_jump(compiler, backtracks, JUMP(SLJIT_LESS));
5353 /* Equal. */
5354 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
5355 jump[3] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
5356 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5357
5358 JUMPHERE(jump[1]);
5359 if (common->nltype == NLTYPE_ANYCRLF)
5360 {
5361 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5362 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, STR_END, 0));
5363 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
5364 }
5365 else
5366 {
5367 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, STR_PTR, 0);
5368 read_char_range(common, common->nlmin, common->nlmax, TRUE);
5369 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
5370 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
5371 add_jump(compiler, backtracks, JUMP(SLJIT_ZERO));
5372 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
5373 }
5374 JUMPHERE(jump[2]);
5375 JUMPHERE(jump[3]);
5376 }
5377 JUMPHERE(jump[0]);
5378 check_partial(common, FALSE);
5379 return cc;
5380
5381 case OP_EOD:
5382 add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
5383 check_partial(common, FALSE);
5384 return cc;
5385
5386 case OP_DOLL:
5387 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
5388 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
5389 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5390
5391 if (!common->endonly)
5392 compile_simple_assertion_matchingpath(common, OP_EODN, cc, backtracks);
5393 else
5394 {
5395 add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
5396 check_partial(common, FALSE);
5397 }
5398 return cc;
5399
5400 case OP_DOLLM:
5401 jump[1] = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
5402 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
5403 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
5404 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5405 check_partial(common, FALSE);
5406 jump[0] = JUMP(SLJIT_JUMP);
5407 JUMPHERE(jump[1]);
5408
5409 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
5410 {
5411 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
5412 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5413 if (common->mode == JIT_COMPILE)
5414 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, TMP2, 0, STR_END, 0));
5415 else
5416 {
5417 jump[1] = CMP(SLJIT_LESS_EQUAL, TMP2, 0, STR_END, 0);
5418 /* STR_PTR = STR_END - IN_UCHARS(1) */
5419 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
5420 check_partial(common, TRUE);
5421 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5422 JUMPHERE(jump[1]);
5423 }
5424
5425 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
5426 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
5427 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
5428 }
5429 else
5430 {
5431 peek_char(common, common->nlmax);
5432 check_newlinechar(common, common->nltype, backtracks, FALSE);
5433 }
5434 JUMPHERE(jump[0]);
5435 return cc;
5436
5437 case OP_CIRC:
5438 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
5439 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
5440 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0));
5441 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
5442 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5443 return cc;
5444
5445 case OP_CIRCM:
5446 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
5447 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
5448 jump[1] = CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0);
5449 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
5450 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5451 jump[0] = JUMP(SLJIT_JUMP);
5452 JUMPHERE(jump[1]);
5453
5454 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
5455 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
5456 {
5457 OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
5458 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, TMP1, 0));
5459 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
5460 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
5461 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
5462 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
5463 }
5464 else
5465 {
5466 skip_char_back(common);
5467 read_char_range(common, common->nlmin, common->nlmax, TRUE);
5468 check_newlinechar(common, common->nltype, backtracks, FALSE);
5469 }
5470 JUMPHERE(jump[0]);
5471 return cc;
5472
5473 case OP_REVERSE:
5474 length = GET(cc, 0);
5475 if (length == 0)
5476 return cc + LINK_SIZE;
5477 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5478 #ifdef SUPPORT_UTF
5479 if (common->utf)
5480 {
5481 OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
5482 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, length);
5483 label = LABEL();
5484 add_jump(compiler, backtracks, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP3, 0));
5485 skip_char_back(common);
5486 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
5487 JUMPTO(SLJIT_NOT_ZERO, label);
5488 }
5489 else
5490 #endif
5491 {
5492 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
5493 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
5494 add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, TMP1, 0));
5495 }
5496 check_start_used_ptr(common);
5497 return cc + LINK_SIZE;
5498 }
5499 SLJIT_ASSERT_STOP();
5500 return cc;
5501 }
5502
5503 static pcre_uchar *compile_char1_matchingpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **backtracks, BOOL check_str_ptr)
5504 {
5505 DEFINE_COMPILER;
5506 int length;
5507 unsigned int c, oc, bit;
5508 compare_context context;
5509 struct sljit_jump *jump[3];
5510 jump_list *end_list;
5511 #ifdef SUPPORT_UTF
5512 struct sljit_label *label;
5513 #ifdef SUPPORT_UCP
5514 pcre_uchar propdata[5];
5515 #endif
5516 #endif /* SUPPORT_UTF */
5517
5518 switch(type)
5519 {
5520 case OP_NOT_DIGIT:
5521 case OP_DIGIT:
5522 /* Digits are usually 0-9, so it is worth to optimize them. */
5523 if (check_str_ptr)
5524 detect_partial_match(common, backtracks);
5525 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5526 if (common->utf && is_char7_bitset((const pcre_uint8*)common->ctypes - cbit_length + cbit_digit, FALSE))
5527 read_char7_type(common, type == OP_NOT_DIGIT);
5528 else
5529 #endif
5530 read_char8_type(common, type == OP_NOT_DIGIT);
5531 /* Flip the starting bit in the negative case. */
5532 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit);
5533 add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_ZERO : SLJIT_NOT_ZERO));
5534 return cc;
5535
5536 case OP_NOT_WHITESPACE:
5537 case OP_WHITESPACE:
5538 if (check_str_ptr)
5539 detect_partial_match(common, backtracks);
5540 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5541 if (common->utf && is_char7_bitset((const pcre_uint8*)common->ctypes - cbit_length + cbit_space, FALSE))
5542 read_char7_type(common, type == OP_NOT_WHITESPACE);
5543 else
5544 #endif
5545 read_char8_type(common, type == OP_NOT_WHITESPACE);
5546 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space);
5547 add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_ZERO : SLJIT_NOT_ZERO));
5548 return cc;
5549
5550 case OP_NOT_WORDCHAR:
5551 case OP_WORDCHAR:
5552 if (check_str_ptr)
5553 detect_partial_match(common, backtracks);
5554 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5555 if (common->utf && is_char7_bitset((const pcre_uint8*)common->ctypes - cbit_length + cbit_word, FALSE))
5556 read_char7_type(common, type == OP_NOT_WORDCHAR);
5557 else
5558 #endif
5559 read_char8_type(common, type == OP_NOT_WORDCHAR);
5560 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word);
5561 add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_ZERO : SLJIT_NOT_ZERO));
5562 return cc;
5563
5564 case OP_ANY:
5565 if (check_str_ptr)
5566 detect_partial_match(common, backtracks);
5567 read_char_range(common, common->nlmin, common->nlmax, TRUE);
5568 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
5569 {
5570 jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
5571 end_list = NULL;
5572 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
5573 add_jump(compiler, &end_list, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
5574 else
5575 check_str_end(common, &end_list);
5576
5577 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5578 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff));
5579 set_jumps(end_list, LABEL());
5580 JUMPHERE(jump[0]);
5581 }
5582 else
5583 check_newlinechar(common, common->nltype, backtracks, TRUE);
5584 return cc;
5585
5586 case OP_ALLANY:
5587 if (check_str_ptr)
5588 detect_partial_match(common, backtracks);
5589 #ifdef SUPPORT_UTF
5590 if (common->utf)
5591 {
5592 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5593 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5594 #if defined COMPILE_PCRE8 || defined COMPILE_PCRE16
5595 #if defined COMPILE_PCRE8
5596 jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
5597 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
5598 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5599 #elif defined COMPILE_PCRE16
5600 jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
5601 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
5602 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
5603 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5604 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
5605 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5606 #endif
5607 JUMPHERE(jump[0]);
5608 #endif /* COMPILE_PCRE[8|16] */
5609 return cc;
5610 }
5611 #endif
5612 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5613 return cc;
5614
5615 case OP_ANYBYTE:
5616 if (check_str_ptr)
5617 detect_partial_match(common, backtracks);
5618 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5619 return cc;
5620
5621 #ifdef SUPPORT_UTF
5622 #ifdef SUPPORT_UCP
5623 case OP_NOTPROP:
5624 case OP_PROP:
5625 propdata[0] = XCL_HASPROP;
5626 propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP;
5627 propdata[2] = cc[0];
5628 propdata[3] = cc[1];
5629 propdata[4] = XCL_END;
5630 if (check_str_ptr)
5631 detect_partial_match(common, backtracks);
5632 compile_xclass_matchingpath(common, propdata, backtracks);
5633 return cc + 2;
5634 #endif
5635 #endif
5636
5637 case OP_ANYNL:
5638 if (check_str_ptr)
5639 detect_partial_match(common, backtracks);
5640 read_char_range(common, common->bsr_nlmin, common->bsr_nlmax, FALSE);
5641 jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
5642 /* We don't need to handle soft partial matching case. */
5643 end_list = NULL;
5644 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
5645 add_jump(compiler, &end_list, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
5646 else
5647 check_str_end(common, &end_list);
5648 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5649 jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
5650 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5651 jump[2] = JUMP(SLJIT_JUMP);
5652 JUMPHERE(jump[0]);
5653 check_newlinechar(common, common->bsr_nltype, backtracks, FALSE);
5654 set_jumps(end_list, LABEL());
5655 JUMPHERE(jump[1]);
5656 JUMPHERE(jump[2]);
5657 return cc;
5658
5659 case OP_NOT_HSPACE:
5660 case OP_HSPACE:
5661 if (check_str_ptr)
5662 detect_partial_match(common, backtracks);
5663 read_char_range(common, 0x9, 0x3000, type == OP_NOT_HSPACE);
5664 add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));
5665 add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO));
5666 return cc;
5667
5668 case OP_NOT_VSPACE:
5669 case OP_VSPACE:
5670 if (check_str_ptr)
5671 detect_partial_match(common, backtracks);
5672 read_char_range(common, 0xa, 0x2029, type == OP_NOT_VSPACE);
5673 add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));
5674 add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO));
5675 return cc;
5676
5677 #ifdef SUPPORT_UCP
5678 case OP_EXTUNI:
5679 if (check_str_ptr)
5680 detect_partial_match(common, backtracks);
5681 read_char(common);
5682 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
5683 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
5684 /* Optimize register allocation: use a real register. */
5685 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STACK_TOP, 0);
5686 OP1(SLJIT_MOV_UB, STACK_TOP, 0, SLJIT_MEM2(TMP1, TMP2), 3);
5687
5688 label = LABEL();
5689 jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5690 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
5691 read_char(common);
5692 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
5693 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
5694 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM2(TMP1, TMP2), 3);
5695
5696 OP2(SLJIT_SHL, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2);
5697 OP1(SLJIT_MOV_UI, TMP1, 0, SLJIT_MEM1(STACK_TOP), (sljit_sw)PRIV(ucp_gbtable));
5698 OP1(SLJIT_MOV, STACK_TOP, 0, TMP2, 0);
5699 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
5700 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
5701 JUMPTO(SLJIT_NOT_ZERO, label);
5702
5703 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
5704 JUMPHERE(jump[0]);
5705 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
5706
5707 if (common->mode == JIT_PARTIAL_HARD_COMPILE)
5708 {
5709 jump[0] = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
5710 /* Since we successfully read a char above, partial matching must occure. */
5711 check_partial(common, TRUE);
5712 JUMPHERE(jump[0]);
5713 }
5714 return cc;
5715 #endif
5716
5717 case OP_CHAR:
5718 case OP_CHARI:
5719 length = 1;
5720 #ifdef SUPPORT_UTF
5721 if (common->utf && HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc);
5722 #endif
5723 if (common->mode == JIT_COMPILE && (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0))
5724 {
5725 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
5726 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
5727
5728 context.length = IN_UCHARS(length);
5729 context.sourcereg = -1;
5730 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
5731 context.ucharptr = 0;
5732 #endif
5733 return byte_sequence_compare(common, type == OP_CHARI, cc, &context, backtracks);
5734 }
5735
5736 if (check_str_ptr)
5737 detect_partial_match(common, backtracks);
5738 #ifdef SUPPORT_UTF
5739 if (common->utf)
5740 {
5741 GETCHAR(c, cc);
5742 }
5743 else
5744 #endif
5745 c = *cc;
5746
5747 if (type == OP_CHAR || !char_has_othercase(common, cc))
5748 {
5749 read_char_range(common, c, c, FALSE);
5750 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c));
5751 return cc + length;
5752 }
5753 oc = char_othercase(common, c);
5754 read_char_range(common, c < oc ? c : oc, c > oc ? c : oc, FALSE);
5755 bit = c ^ oc;
5756 if (is_powerof2(bit))
5757 {
5758 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
5759 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
5760 return cc + length;
5761 }
5762 jump[0] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c);
5763 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, oc));
5764 JUMPHERE(jump[0]);
5765 return cc + length;
5766
5767 case OP_NOT:
5768 case OP_NOTI:
5769 if (check_str_ptr)
5770 detect_partial_match(common, backtracks);
5771 length = 1;
5772 #ifdef SUPPORT_UTF
5773 if (common->utf)
5774 {
5775 #ifdef COMPILE_PCRE8
5776 c = *cc;
5777 if (c < 128)
5778 {
5779 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5780 if (type == OP_NOT || !char_has_othercase(common, cc))
5781 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
5782 else
5783 {
5784 /* Since UTF8 code page is fixed, we know that c is in [a-z] or [A-Z] range. */
5785 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x20);
5786 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20));
5787 }
5788 /* Skip the variable-length character. */
5789 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5790 jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
5791 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
5792 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5793 JUMPHERE(jump[0]);
5794 return cc + 1;
5795 }
5796 else
5797 #endif /* COMPILE_PCRE8 */
5798 {
5799 GETCHARLEN(c, cc, length);
5800 }
5801 }
5802 else
5803 #endif /* SUPPORT_UTF */
5804 c = *cc;
5805
5806 if (type == OP_NOT || !char_has_othercase(common, cc))
5807 {
5808 read_char_range(common, c, c, TRUE);
5809 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
5810 }
5811 else
5812 {
5813 oc = char_othercase(common, c);
5814 read_char_range(common, c < oc ? c : oc, c > oc ? c : oc, TRUE);
5815 bit = c ^ oc;
5816 if (is_powerof2(bit))
5817 {
5818 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
5819 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
5820 }
5821 else
5822 {
5823 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
5824 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, oc));
5825 }
5826 }
5827 return cc + length;
5828
5829 case OP_CLASS:
5830 case OP_NCLASS:
5831 if (check_str_ptr)
5832 detect_partial_match(common, backtracks);
5833
5834 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5835 bit = (common->utf && is_char7_bitset((const sljit_ub *)cc, type == OP_NCLASS)) ? 127 : 255;
5836 read_char_range(common, 0, bit, type == OP_NCLASS);
5837 #else
5838 read_char_range(common, 0, 255, type == OP_NCLASS);
5839 #endif
5840
5841 if (check_class_ranges(common, (const sljit_ub *)cc, type == OP_NCLASS, FALSE, backtracks))
5842 return cc + 32 / sizeof(pcre_uchar);
5843
5844 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5845 jump[0] = NULL;
5846 if (common->utf)
5847 {
5848 jump[0] = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, bit);
5849 if (type == OP_CLASS)
5850 {
5851 add_jump(compiler, backtracks, jump[0]);
5852 jump[0] = NULL;
5853 }
5854 }
5855 #elif !defined COMPILE_PCRE8
5856 jump[0] = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
5857 if (type == OP_CLASS)
5858 {
5859 add_jump(compiler, backtracks, jump[0]);
5860 jump[0] = NULL;
5861 }
5862 #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
5863
5864 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
5865 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
5866 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
5867 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
5868 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
5869 add_jump(compiler, backtracks, JUMP(SLJIT_ZERO));
5870
5871 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
5872 if (jump[0] != NULL)
5873 JUMPHERE(jump[0]);
5874 #endif
5875 return cc + 32 / sizeof(pcre_uchar);
5876
5877 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5878 case OP_XCLASS:
5879 if (check_str_ptr)
5880 detect_partial_match(common, backtracks);
5881 compile_xclass_matchingpath(common, cc + LINK_SIZE, backtracks);
5882 return cc + GET(cc, 0) - 1;
5883 #endif
5884 }
5885 SLJIT_ASSERT_STOP();
5886 return cc;
5887 }
5888
5889 static SLJIT_INLINE pcre_uchar *compile_charn_matchingpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, jump_list **backtracks)
5890 {
5891 /* This function consumes at least one input character. */
5892 /* To decrease the number of length checks, we try to concatenate the fixed length character sequences. */
5893 DEFINE_COMPILER;
5894 pcre_uchar *ccbegin = cc;
5895 compare_context context;
5896 int size;
5897
5898 context.length = 0;
5899 do
5900 {
5901 if (cc >= ccend)
5902 break;
5903
5904 if (*cc == OP_CHAR)
5905 {
5906 size = 1;
5907 #ifdef SUPPORT_UTF
5908 if (common->utf && HAS_EXTRALEN(cc[1]))
5909 size += GET_EXTRALEN(cc[1]);
5910 #endif
5911 }
5912 else if (*cc == OP_CHARI)
5913 {
5914 size = 1;
5915 #ifdef SUPPORT_UTF
5916 if (common->utf)
5917 {
5918 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
5919 size = 0;
5920 else if (HAS_EXTRALEN(cc[1]))
5921 size += GET_EXTRALEN(cc[1]);
5922 }
5923 else
5924 #endif
5925 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
5926 size = 0;
5927 }
5928 else
5929 size = 0;
5930
5931 cc += 1 + size;
5932 context.length += IN_UCHARS(size);
5933 }
5934 while (size > 0 && context.length <= 128);
5935
5936 cc = ccbegin;
5937 if (context.length > 0)
5938 {
5939 /* We have a fixed-length byte sequence. */
5940 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, context.length);
5941 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
5942
5943 context.sourcereg = -1;
5944 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
5945 context.ucharptr = 0;
5946 #endif
5947 do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, backtracks); while (context.length > 0);
5948 return cc;
5949 }
5950
5951 /* A non-fixed length character will be checked if length == 0. */
5952 return compile_char1_matchingpath(common, *cc, cc + 1, backtracks, TRUE);
5953 }
5954
5955 /* Forward definitions. */
5956 static void compile_matchingpath(compiler_common *, pcre_uchar *, pcre_uchar *, backtrack_common *);
5957 static void compile_backtrackingpath(compiler_common *, struct backtrack_common *);
5958
5959 #define PUSH_BACKTRACK(size, ccstart, error) \
5960 do \
5961 { \
5962 backtrack = sljit_alloc_memory(compiler, (size)); \
5963 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
5964 return error; \
5965 memset(backtrack, 0, size); \
5966 backtrack->prev = parent->top; \
5967 backtrack->cc = (ccstart); \
5968 parent->top = backtrack; \
5969 } \
5970 while (0)
5971
5972 #define PUSH_BACKTRACK_NOVALUE(size, ccstart) \
5973 do \
5974 { \
5975 backtrack = sljit_alloc_memory(compiler, (size)); \
5976 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
5977 return; \
5978 memset(backtrack, 0, size); \
5979 backtrack->prev = parent->top; \
5980 backtrack->cc = (ccstart); \
5981 parent->top = backtrack; \
5982 } \
5983 while (0)
5984
5985 #define BACKTRACK_AS(type) ((type *)backtrack)
5986
5987 static void compile_dnref_search(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
5988 {
5989 /* The OVECTOR offset goes to TMP2. */
5990 DEFINE_COMPILER;
5991 int count = GET2(cc, 1 + IMM2_SIZE);
5992 pcre_uchar *slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
5993 unsigned int offset;
5994 jump_list *found = NULL;
5995
5996 SLJIT_ASSERT(*cc == OP_DNREF || *cc == OP_DNREFI);
5997
5998 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
5999
6000 count--;
6001 while (count-- > 0)
6002 {
6003 offset = GET2(slot, 0) << 1;
6004 GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
6005 add_jump(compiler, &found, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0));
6006 slot += common->name_entry_size;
6007 }
6008
6009 offset = GET2(slot, 0) << 1;
6010 GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
6011 if (backtracks != NULL && !common->jscript_compat)
6012 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0));
6013
6014 set_jumps(found, LABEL());
6015 }
6016
6017 static void compile_ref_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks, BOOL withchecks, BOOL emptyfail)
6018 {
6019 DEFINE_COMPILER;
6020 BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
6021 int offset = 0;
6022 struct sljit_jump *jump = NULL;
6023 struct sljit_jump *partial;
6024 struct sljit_jump *nopartial;
6025
6026 if (ref)
6027 {
6028 offset = GET2(cc, 1) << 1;
6029 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
6030 /* OVECTOR(1) contains the "string begin - 1" constant. */
6031 if (withchecks && !common->jscript_compat)
6032 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
6033 }
6034 else
6035 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
6036
6037 #if defined SUPPORT_UTF && defined SUPPORT_UCP
6038 if (common->utf && *cc == OP_REFI)
6039 {
6040 SLJIT_ASSERT(TMP1 == SLJIT_R0 && STACK_TOP == SLJIT_R1 && TMP2 == SLJIT_R2);
6041 if (ref)
6042 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
6043 else
6044 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
6045
6046 if (withchecks)
6047 jump = CMP(SLJIT_EQUAL, TMP1, 0, TMP2, 0);
6048
6049 /* Needed to save important temporary registers. */
6050 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STACK_TOP, 0);
6051 OP1(SLJIT_MOV, SLJIT_R1, 0, ARGUMENTS, 0);
6052 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, uchar_ptr), STR_PTR, 0);
6053 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_utf_caselesscmp));
6054 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6055 if (common->mode == JIT_COMPILE)
6056 add_jump(compiler, backtracks, CMP(SLJIT_LESS_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1));
6057 else
6058 {
6059 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
6060 nopartial = CMP(SLJIT_NOT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
6061 check_partial(common, FALSE);
6062 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
6063 JUMPHERE(nopartial);
6064 }
6065 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
6066 }
6067 else
6068 #endif /* SUPPORT_UTF && SUPPORT_UCP */
6069 {
6070 if (ref)
6071 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP1, 0);
6072 else
6073 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
6074
6075 if (withchecks)
6076 jump = JUMP(SLJIT_ZERO);
6077
6078 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
6079 partial = CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0);
6080 if (common->mode == JIT_COMPILE)
6081 add_jump(compiler, backtracks, partial);
6082
6083 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
6084 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
6085
6086 if (common->mode != JIT_COMPILE)
6087 {
6088 nopartial = JUMP(SLJIT_JUMP);
6089 JUMPHERE(partial);
6090 /* TMP2 -= STR_END - STR_PTR */
6091 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, STR_PTR, 0);
6092 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, STR_END, 0);
6093 partial = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0);
6094 OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
6095 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
6096 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
6097 JUMPHERE(partial);
6098 check_partial(common, FALSE);
6099 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
6100 JUMPHERE(nopartial);
6101 }
6102 }
6103
6104 if (jump != NULL)
6105 {
6106 if (emptyfail)
6107 add_jump(compiler, backtracks, jump);
6108 else
6109 JUMPHERE(jump);
6110 }
6111 }
6112
6113 static SLJIT_INLINE pcre_uchar *compile_ref_iterator_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6114 {
6115 DEFINE_COMPILER;
6116 BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
6117 backtrack_common *backtrack;
6118 pcre_uchar type;
6119 int offset = 0;
6120 struct sljit_label *label;
6121 struct sljit_jump *zerolength;
6122 struct sljit_jump *jump = NULL;
6123 pcre_uchar *ccbegin = cc;
6124 int min = 0, max = 0;
6125 BOOL minimize;
6126
6127 PUSH_BACKTRACK(sizeof(iterator_backtrack), cc, NULL);
6128
6129 if (ref)
6130 offset = GET2(cc, 1) << 1;
6131 else
6132 cc += IMM2_SIZE;
6133 type = cc[1 + IMM2_SIZE];
6134
6135 SLJIT_COMPILE_ASSERT((OP_CRSTAR & 0x1) == 0, crstar_opcode_must_be_even);
6136 minimize = (type & 0x1) != 0;
6137 switch(type)
6138 {
6139 case OP_CRSTAR:
6140 case OP_CRMINSTAR:
6141 min = 0;
6142 max = 0;
6143 cc += 1 + IMM2_SIZE + 1;
6144 break;
6145 case OP_CRPLUS:
6146 case OP_CRMINPLUS:
6147 min = 1;
6148 max = 0;
6149 cc += 1 + IMM2_SIZE + 1;
6150 break;
6151 case OP_CRQUERY:
6152 case OP_CRMINQUERY:
6153 min = 0;
6154 max = 1;
6155 cc += 1 + IMM2_SIZE + 1;
6156 break;
6157 case OP_CRRANGE:
6158 case OP_CRMINRANGE:
6159 min = GET2(cc, 1 + IMM2_SIZE + 1);
6160 max = GET2(cc, 1 + IMM2_SIZE + 1 + IMM2_SIZE);
6161 cc += 1 + IMM2_SIZE + 1 + 2 * IMM2_SIZE;
6162 break;
6163 default:
6164 SLJIT_ASSERT_STOP();
6165 break;
6166 }
6167
6168 if (!minimize)
6169 {
6170 if (min == 0)
6171 {
6172 allocate_stack(common, 2);
6173 if (ref)
6174 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
6175 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6176 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
6177 /* Temporary release of STR_PTR. */
6178 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
6179 /* Handles both invalid and empty cases. Since the minimum repeat,
6180 is zero the invalid case is basically the same as an empty case. */
6181 if (ref)
6182 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
6183 else
6184 {
6185 compile_dnref_search(common, ccbegin, NULL);
6186 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
6187 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, TMP2, 0);
6188 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
6189 }
6190 /* Restore if not zero length. */
6191 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
6192 }
6193 else
6194 {
6195 allocate_stack(common, 1);
6196 if (ref)
6197 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
6198 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6199 if (ref)
6200 {
6201 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
6202 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
6203 }
6204 else
6205 {
6206 compile_dnref_search(common, ccbegin, &backtrack->topbacktracks);
6207 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
6208 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, TMP2, 0);
6209 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
6210 }
6211 }
6212
6213 if (min > 1 || max > 1)
6214 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, SLJIT_IMM, 0);
6215
6216 label = LABEL();
6217 if (!ref)
6218 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1);
6219 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, FALSE, FALSE);
6220
6221 if (min > 1 || max > 1)
6222 {
6223 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0);
6224 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
6225 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, TMP1, 0);
6226 if (min > 1)
6227 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min, label);
6228 if (max > 1)
6229 {
6230 jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max);
6231 allocate_stack(common, 1);
6232 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6233 JUMPTO(SLJIT_JUMP, label);
6234 JUMPHERE(jump);
6235 }
6236 }
6237
6238 if (max == 0)
6239 {
6240 /* Includes min > 1 case as well. */
6241 allocate_stack(common, 1);
6242 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6243 JUMPTO(SLJIT_JUMP, label);
6244 }
6245
6246 JUMPHERE(zerolength);
6247 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
6248
6249 count_match(common);
6250 return cc;
6251 }
6252
6253 allocate_stack(common, ref ? 2 : 3);
6254 if (ref)
6255 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
6256 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6257 if (type != OP_CRMINSTAR)
6258 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
6259
6260 if (min == 0)
6261 {
6262 /* Handles both invalid and empty cases. Since the minimum repeat,
6263 is zero the invalid case is basically the same as an empty case. */
6264 if (ref)
6265 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
6266 else
6267 {
6268 compile_dnref_search(common, ccbegin, NULL);
6269 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
6270 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
6271 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
6272 }
6273 /* Length is non-zero, we can match real repeats. */
6274 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6275 jump = JUMP(SLJIT_JUMP);
6276 }
6277 else
6278 {
6279 if (ref)
6280 {
6281 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
6282 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
6283 }
6284 else
6285 {
6286 compile_dnref_search(common, ccbegin, &backtrack->topbacktracks);
6287 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
6288 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
6289 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
6290 }
6291 }
6292
6293 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
6294 if (max > 0)
6295 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max));
6296
6297 if (!ref)
6298 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
6299 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, TRUE, TRUE);
6300 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6301
6302 if (min > 1)
6303 {
6304 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
6305 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
6306 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
6307 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min, BACKTRACK_AS(iterator_backtrack)->matchingpath);
6308 }
6309 else if (max > 0)
6310 OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
6311
6312 if (jump != NULL)
6313 JUMPHERE(jump);
6314 JUMPHERE(zerolength);
6315
6316 count_match(common);
6317 return cc;
6318 }
6319
6320 static SLJIT_INLINE pcre_uchar *compile_recurse_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6321 {
6322 DEFINE_COMPILER;
6323 backtrack_common *backtrack;
6324 recurse_entry *entry = common->entries;
6325 recurse_entry *prev = NULL;
6326 sljit_sw start = GET(cc, 1);
6327 pcre_uchar *start_cc;
6328 BOOL needs_control_head;
6329
6330 PUSH_BACKTRACK(sizeof(recurse_backtrack), cc, NULL);
6331
6332 /* Inlining simple patterns. */
6333 if (get_framesize(common, common->start + start, NULL, TRUE, &needs_control_head) == no_stack)
6334 {
6335 start_cc = common->start + start;
6336 compile_matchingpath(common, next_opcode(common, start_cc), bracketend(start_cc) - (1 + LINK_SIZE), backtrack);
6337 BACKTRACK_AS(recurse_backtrack)->inlined_pattern = TRUE;
6338 return cc + 1 + LINK_SIZE;
6339 }
6340
6341 while (entry != NULL)
6342 {
6343 if (entry->start == start)
6344 break;
6345 prev = entry;
6346 entry = entry->next;
6347 }
6348
6349 if (entry == NULL)
6350 {
6351 entry = sljit_alloc_memory(compiler, sizeof(recurse_entry));
6352 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6353 return NULL;
6354 entry->next = NULL;
6355 entry->entry = NULL;
6356 entry->calls = NULL;
6357 entry->start = start;
6358
6359 if (prev != NULL)
6360 prev->next = entry;
6361 else
6362 common->entries = entry;
6363 }
6364
6365 if (common->has_set_som && common->mark_ptr != 0)
6366 {
6367 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
6368 allocate_stack(common, 2);
6369 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
6370 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
6371 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
6372 }
6373 else if (common->has_set_som || common->mark_ptr != 0)
6374 {
6375 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->has_set_som ? (int)(OVECTOR(0)) : common->mark_ptr);
6376 allocate_stack(common, 1);
6377 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
6378 }
6379
6380 if (entry->entry == NULL)
6381 add_jump(compiler, &entry->calls, JUMP(SLJIT_FAST_CALL));
6382 else
6383 JUMPTO(SLJIT_FAST_CALL, entry->entry);
6384 /* Leave if the match is failed. */
6385 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0));
6386 return cc + 1 + LINK_SIZE;
6387 }
6388
6389 static int SLJIT_CALL do_callout(struct jit_arguments *arguments, PUBL(callout_block) *callout_block, pcre_uchar **jit_ovector)
6390 {
6391 const pcre_uchar *begin = arguments->begin;
6392 int *offset_vector = arguments->offsets;
6393 int offset_count = arguments->offset_count;
6394 int i;
6395
6396 if (PUBL(callout) == NULL)
6397 return 0;
6398
6399 callout_block->version = 2;
6400 callout_block->callout_data = arguments->callout_data;
6401
6402 /* Offsets in subject. */
6403 callout_block->subject_length = arguments->end - arguments->begin;
6404 callout_block->start_match = (pcre_uchar*)callout_block->subject - arguments->begin;
6405 callout_block->current_position = (pcre_uchar*)callout_block->offset_vector - arguments->begin;
6406 #if defined COMPILE_PCRE8
6407 callout_block->subject = (PCRE_SPTR)begin;
6408 #elif defined COMPILE_PCRE16
6409 callout_block->subject = (PCRE_SPTR16)begin;
6410 #elif defined COMPILE_PCRE32
6411 callout_block->subject = (PCRE_SPTR32)begin;
6412 #endif
6413
6414 /* Convert and copy the JIT offset vector to the offset_vector array. */
6415 callout_block->capture_top = 0;
6416 callout_block->offset_vector = offset_vector;
6417 for (i = 2; i < offset_count; i += 2)
6418 {
6419 offset_vector[i] = jit_ovector[i] - begin;
6420 offset_vector[i + 1] = jit_ovector[i + 1] - begin;
6421 if (jit_ovector[i] >= begin)
6422 callout_block->capture_top = i;
6423 }
6424
6425 callout_block->capture_top = (callout_block->capture_top >> 1) + 1;
6426 if (offset_count > 0)
6427 offset_vector[0] = -1;
6428 if (offset_count > 1)
6429 offset_vector[1] = -1;
6430 return (*PUBL(callout))(callout_block);
6431 }
6432
6433 /* Aligning to 8 byte. */
6434 #define CALLOUT_ARG_SIZE \
6435 (((int)sizeof(PUBL(callout_block)) + 7) & ~7)
6436
6437 #define CALLOUT_ARG_OFFSET(arg) \
6438 (-CALLOUT_ARG_SIZE + SLJIT_OFFSETOF(PUBL(callout_block), arg))
6439
6440 static SLJIT_INLINE pcre_uchar *compile_callout_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6441 {
6442 DEFINE_COMPILER;
6443 backtrack_common *backtrack;
6444
6445 PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
6446
6447 allocate_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw));
6448
6449 SLJIT_ASSERT(common->capture_last_ptr != 0);
6450 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
6451 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6452 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_number), SLJIT_IMM, cc[1]);
6453 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_last), TMP2, 0);
6454
6455 /* These pointer sized fields temporarly stores internal variables. */
6456 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
6457 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(offset_vector), STR_PTR, 0);
6458 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(subject), TMP2, 0);
6459
6460 if (common->mark_ptr != 0)
6461 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr));
6462 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(pattern_position), SLJIT_IMM, GET(cc, 2));
6463 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(next_item_length), SLJIT_IMM, GET(cc, 2 + LINK_SIZE));
6464 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(mark), (common->mark_ptr != 0) ? TMP2 : SLJIT_IMM, 0);
6465
6466 /* Needed to save important temporary registers. */
6467 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STACK_TOP, 0);
6468 OP2(SLJIT_SUB, SLJIT_R1, 0, STACK_TOP, 0, SLJIT_IMM, CALLOUT_ARG_SIZE);
6469 GET_LOCAL_BASE(SLJIT_R2, 0, OVECTOR_START);
6470 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_callout));
6471 OP1(SLJIT_MOV_SI, SLJIT_RETURN_REG, 0, SLJIT_RETURN_REG, 0);
6472 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6473 free_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw));
6474
6475 /* Check return value. */
6476 OP2(SLJIT_SUB | SLJIT_SET_S, SLJIT_UNUSED, 0, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
6477 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_SIG_GREATER));
6478 if (common->forced_quit_label == NULL)
6479 add_jump(compiler, &common->forced_quit, JUMP(SLJIT_SIG_LESS));
6480 else
6481 JUMPTO(SLJIT_SIG_LESS, common->forced_quit_label);
6482 return cc + 2 + 2 * LINK_SIZE;
6483 }
6484
6485 #undef CALLOUT_ARG_SIZE
6486 #undef CALLOUT_ARG_OFFSET
6487
6488 static pcre_uchar *compile_assert_matchingpath(compiler_common *common, pcre_uchar *cc, assert_backtrack *backtrack, BOOL conditional)
6489 {
6490 DEFINE_COMPILER;
6491 int framesize;
6492 int extrasize;
6493 BOOL needs_control_head;
6494 int private_data_ptr;
6495 backtrack_common altbacktrack;
6496 pcre_uchar *ccbegin;
6497 pcre_uchar opcode;
6498 pcre_uchar bra = OP_BRA;
6499 jump_list *tmp = NULL;
6500 jump_list **target = (conditional) ? &backtrack->condfailed : &backtrack->common.topbacktracks;
6501 jump_list **found;
6502 /* Saving previous accept variables. */
6503 BOOL save_local_exit = common->local_exit;
6504 BOOL save_positive_assert = common->positive_assert;
6505 then_trap_backtrack *save_then_trap = common->then_trap;
6506 struct sljit_label *save_quit_label = common->quit_label;
6507 struct sljit_label *save_accept_label = common->accept_label;
6508 jump_list *save_quit = common->quit;
6509 jump_list *save_positive_assert_quit = common->positive_assert_quit;
6510 jump_list *save_accept = common->accept;
6511 struct sljit_jump *jump;
6512 struct sljit_jump *brajump = NULL;
6513
6514 /* Assert captures then. */
6515 common->then_trap = NULL;
6516
6517 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
6518 {
6519 SLJIT_ASSERT(!conditional);
6520 bra = *cc;
6521 cc++;
6522 }
6523 private_data_ptr = PRIVATE_DATA(cc);
6524 SLJIT_ASSERT(private_data_ptr != 0);
6525 framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
6526 backtrack->framesize = framesize;
6527 backtrack->private_data_ptr = private_data_ptr;
6528 opcode = *cc;
6529 SLJIT_ASSERT(opcode >= OP_ASSERT && opcode <= OP_ASSERTBACK_NOT);
6530 found = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) ? &tmp : target;
6531 ccbegin = cc;
6532 cc += GET(cc, 1);
6533
6534 if (bra == OP_BRAMINZERO)
6535 {
6536 /* This is a braminzero backtrack path. */
6537 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6538 free_stack(common, 1);
6539 brajump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
6540 }
6541
6542 if (framesize < 0)
6543 {
6544 extrasize = needs_control_head ? 2 : 1;
6545 if (framesize == no_frame)
6546 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
6547 allocate_stack(common, extrasize);
6548 if (needs_control_head)
6549 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
6550 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6551 if (needs_control_head)
6552 {
6553 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
6554 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
6555 }
6556 }
6557 else
6558 {
6559 extrasize = needs_control_head ? 3 : 2;
6560 allocate_stack(common, framesize + extrasize);
6561 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
6562 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw));
6563 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
6564 if (needs_control_head)
6565 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
6566 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6567 if (needs_control_head)
6568 {
6569 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
6570 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
6571 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
6572 }
6573 else
6574 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
6575 init_frame(common, ccbegin, NULL, framesize + extrasize - 1, extrasize, FALSE);
6576 }
6577
6578 memset(&altbacktrack, 0, sizeof(backtrack_common));
6579 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6580 {
6581 /* Negative assert is stronger than positive assert. */
6582 common->local_exit = TRUE;
6583 common->quit_label = NULL;
6584 common->quit = NULL;
6585 common->positive_assert = FALSE;
6586 }
6587 else
6588 common->positive_assert = TRUE;
6589 common->positive_assert_quit = NULL;
6590
6591 while (1)
6592 {
6593 common->accept_label = NULL;
6594 common->accept = NULL;
6595 altbacktrack.top = NULL;
6596 altbacktrack.topbacktracks = NULL;
6597
6598 if (*ccbegin == OP_ALT)
6599 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6600
6601 altbacktrack.cc = ccbegin;
6602 compile_matchingpath(common, ccbegin + 1 + LINK_SIZE, cc, &altbacktrack);
6603 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6604 {
6605 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6606 {
6607 common->local_exit = save_local_exit;
6608 common->quit_label = save_quit_label;
6609 common->quit = save_quit;
6610 }
6611 common->positive_assert = save_positive_assert;
6612 common->then_trap = save_then_trap;
6613 common->accept_label = save_accept_label;
6614 common->positive_assert_quit = save_positive_assert_quit;
6615 common->accept = save_accept;
6616 return NULL;
6617 }
6618 common->accept_label = LABEL();
6619 if (common->accept != NULL)
6620 set_jumps(common->accept, common->accept_label);
6621
6622 /* Reset stack. */
6623 if (framesize < 0)
6624 {
6625 if (framesize == no_frame)
6626 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
6627 else
6628 free_stack(common, extrasize);
6629 if (needs_control_head)
6630 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), 0);
6631 }
6632 else
6633 {
6634 if ((opcode != OP_ASSERT_NOT && opcode != OP_ASSERTBACK_NOT) || conditional)
6635 {
6636 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
6637 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
6638 if (needs_control_head)
6639 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), 0);
6640 }
6641 else
6642 {
6643 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
6644 if (needs_control_head)
6645 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), (framesize + 1) * sizeof(sljit_sw));
6646 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6647 }
6648 }
6649
6650 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6651 {
6652 /* We know that STR_PTR was stored on the top of the stack. */
6653 if (conditional)
6654 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), needs_control_head ? sizeof(sljit_sw) : 0);
6655 else if (bra == OP_BRAZERO)
6656 {
6657 if (framesize < 0)
6658 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 1) * sizeof(sljit_sw));
6659 else
6660 {
6661 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
6662 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (framesize + extrasize - 1) * sizeof(sljit_sw));
6663 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
6664 }
6665 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
6666 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6667 }
6668 else if (framesize >= 0)
6669 {
6670 /* For OP_BRA and OP_BRAMINZERO. */
6671 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
6672 }
6673 }
6674 add_jump(compiler, found, JUMP(SLJIT_JUMP));
6675
6676 compile_backtrackingpath(common, altbacktrack.top);
6677 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6678 {
6679 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6680 {
6681 common->local_exit = save_local_exit;
6682 common->quit_label = save_quit_label;
6683 common->quit = save_quit;
6684 }
6685 common->positive_assert = save_positive_assert;
6686 common->then_trap = save_then_trap;
6687 common->accept_label = save_accept_label;
6688 common->positive_assert_quit = save_positive_assert_quit;
6689 common->accept = save_accept;
6690 return NULL;
6691 }
6692 set_jumps(altbacktrack.topbacktracks, LABEL());
6693
6694 if (*cc != OP_ALT)
6695 break;
6696
6697 ccbegin = cc;
6698 cc += GET(cc, 1);
6699 }
6700
6701 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6702 {
6703 SLJIT_ASSERT(common->positive_assert_quit == NULL);
6704 /* Makes the check less complicated below. */
6705 common->positive_assert_quit = common->quit;
6706 }
6707
6708 /* None of them matched. */
6709 if (common->positive_assert_quit != NULL)
6710 {
6711 jump = JUMP(SLJIT_JUMP);
6712 set_jumps(common->positive_assert_quit, LABEL());
6713 SLJIT_ASSERT(framesize != no_stack);
6714 if (framesize < 0)
6715 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, extrasize * sizeof(sljit_sw));
6716 else
6717 {
6718 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
6719 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6720 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw));
6721 }
6722 JUMPHERE(jump);
6723 }
6724
6725 if (needs_control_head)
6726 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(1));
6727
6728 if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK)
6729 {
6730 /* Assert is failed. */
6731 if (conditional || bra == OP_BRAZERO)
6732 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6733
6734 if (framesize < 0)
6735 {
6736 /* The topmost item should be 0. */
6737 if (bra == OP_BRAZERO)
6738 {
6739 if (extrasize == 2)
6740 free_stack(common, 1);
6741 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6742 }
6743 else
6744 free_stack(common, extrasize);
6745 }
6746 else
6747 {
6748 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
6749 /* The topmost item should be 0. */
6750 if (bra == OP_BRAZERO)
6751 {
6752 free_stack(common, framesize + extrasize - 1);
6753 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6754 }
6755 else
6756 free_stack(common, framesize + extrasize);
6757 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
6758 }
6759 jump = JUMP(SLJIT_JUMP);
6760 if (bra != OP_BRAZERO)
6761 add_jump(compiler, target, jump);
6762
6763 /* Assert is successful. */
6764 set_jumps(tmp, LABEL());
6765 if (framesize < 0)
6766 {
6767 /* We know that STR_PTR was stored on the top of the stack. */
6768 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 1) * sizeof(sljit_sw));
6769 /* Keep the STR_PTR on the top of the stack. */
6770 if (bra == OP_BRAZERO)
6771 {
6772 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
6773 if (extrasize == 2)
6774 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6775 }
6776 else if (bra == OP_BRAMINZERO)
6777 {
6778 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
6779 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6780 }
6781 }
6782 else
6783