/[pcre]/code/trunk/pcre_jit_compile.c
ViewVC logotype

Contents of /code/trunk/pcre_jit_compile.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1435 - (show annotations)
Tue Jan 7 07:47:12 2014 UTC (5 years, 11 months ago) by zherczeg
File MIME type: text/plain
File size: 328186 byte(s)
JIT: Optimize might be empty checks.
1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Written by Philip Hazel
9 Copyright (c) 1997-2013 University of Cambridge
10
11 The machine code generator part (this module) was written by Zoltan Herczeg
12 Copyright (c) 2010-2013
13
14 -----------------------------------------------------------------------------
15 Redistribution and use in source and binary forms, with or without
16 modification, are permitted provided that the following conditions are met:
17
18 * Redistributions of source code must retain the above copyright notice,
19 this list of conditions and the following disclaimer.
20
21 * Redistributions in binary form must reproduce the above copyright
22 notice, this list of conditions and the following disclaimer in the
23 documentation and/or other materials provided with the distribution.
24
25 * Neither the name of the University of Cambridge nor the names of its
26 contributors may be used to endorse or promote products derived from
27 this software without specific prior written permission.
28
29 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 POSSIBILITY OF SUCH DAMAGE.
40 -----------------------------------------------------------------------------
41 */
42
43 #ifdef HAVE_CONFIG_H
44 #include "config.h"
45 #endif
46
47 #include "pcre_internal.h"
48
49 #if defined SUPPORT_JIT
50
51 /* All-in-one: Since we use the JIT compiler only from here,
52 we just include it. This way we don't need to touch the build
53 system files. */
54
55 #define SLJIT_MALLOC(size) (PUBL(malloc))(size)
56 #define SLJIT_FREE(ptr) (PUBL(free))(ptr)
57 #define SLJIT_CONFIG_AUTO 1
58 #define SLJIT_CONFIG_STATIC 1
59 #define SLJIT_VERBOSE 0
60 #define SLJIT_DEBUG 0
61
62 #include "sljit/sljitLir.c"
63
64 #if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED
65 #error Unsupported architecture
66 #endif
67
68 /* Defines for debugging purposes. */
69
70 /* 1 - Use unoptimized capturing brackets.
71 2 - Enable capture_last_ptr (includes option 1). */
72 /* #define DEBUG_FORCE_UNOPTIMIZED_CBRAS 2 */
73
74 /* 1 - Always have a control head. */
75 /* #define DEBUG_FORCE_CONTROL_HEAD 1 */
76
77 /* Allocate memory for the regex stack on the real machine stack.
78 Fast, but limited size. */
79 #define MACHINE_STACK_SIZE 32768
80
81 /* Growth rate for stack allocated by the OS. Should be the multiply
82 of page size. */
83 #define STACK_GROWTH_RATE 8192
84
85 /* Enable to check that the allocation could destroy temporaries. */
86 #if defined SLJIT_DEBUG && SLJIT_DEBUG
87 #define DESTROY_REGISTERS 1
88 #endif
89
90 /*
91 Short summary about the backtracking mechanism empolyed by the jit code generator:
92
93 The code generator follows the recursive nature of the PERL compatible regular
94 expressions. The basic blocks of regular expressions are condition checkers
95 whose execute different commands depending on the result of the condition check.
96 The relationship between the operators can be horizontal (concatenation) and
97 vertical (sub-expression) (See struct backtrack_common for more details).
98
99 'ab' - 'a' and 'b' regexps are concatenated
100 'a+' - 'a' is the sub-expression of the '+' operator
101
102 The condition checkers are boolean (true/false) checkers. Machine code is generated
103 for the checker itself and for the actions depending on the result of the checker.
104 The 'true' case is called as the matching path (expected path), and the other is called as
105 the 'backtrack' path. Branch instructions are expesive for all CPUs, so we avoid taken
106 branches on the matching path.
107
108 Greedy star operator (*) :
109 Matching path: match happens.
110 Backtrack path: match failed.
111 Non-greedy star operator (*?) :
112 Matching path: no need to perform a match.
113 Backtrack path: match is required.
114
115 The following example shows how the code generated for a capturing bracket
116 with two alternatives. Let A, B, C, D are arbirary regular expressions, and
117 we have the following regular expression:
118
119 A(B|C)D
120
121 The generated code will be the following:
122
123 A matching path
124 '(' matching path (pushing arguments to the stack)
125 B matching path
126 ')' matching path (pushing arguments to the stack)
127 D matching path
128 return with successful match
129
130 D backtrack path
131 ')' backtrack path (If we arrived from "C" jump to the backtrack of "C")
132 B backtrack path
133 C expected path
134 jump to D matching path
135 C backtrack path
136 A backtrack path
137
138 Notice, that the order of backtrack code paths are the opposite of the fast
139 code paths. In this way the topmost value on the stack is always belong
140 to the current backtrack code path. The backtrack path must check
141 whether there is a next alternative. If so, it needs to jump back to
142 the matching path eventually. Otherwise it needs to clear out its own stack
143 frame and continue the execution on the backtrack code paths.
144 */
145
146 /*
147 Saved stack frames:
148
149 Atomic blocks and asserts require reloading the values of private data
150 when the backtrack mechanism performed. Because of OP_RECURSE, the data
151 are not necessarly known in compile time, thus we need a dynamic restore
152 mechanism.
153
154 The stack frames are stored in a chain list, and have the following format:
155 ([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]
156
157 Thus we can restore the private data to a particular point in the stack.
158 */
159
160 typedef struct jit_arguments {
161 /* Pointers first. */
162 struct sljit_stack *stack;
163 const pcre_uchar *str;
164 const pcre_uchar *begin;
165 const pcre_uchar *end;
166 int *offsets;
167 pcre_uchar *uchar_ptr;
168 pcre_uchar *mark_ptr;
169 void *callout_data;
170 /* Everything else after. */
171 pcre_uint32 limit_match;
172 int real_offset_count;
173 int offset_count;
174 pcre_uint8 notbol;
175 pcre_uint8 noteol;
176 pcre_uint8 notempty;
177 pcre_uint8 notempty_atstart;
178 } jit_arguments;
179
180 typedef struct executable_functions {
181 void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES];
182 sljit_uw *read_only_data[JIT_NUMBER_OF_COMPILE_MODES];
183 sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];
184 PUBL(jit_callback) callback;
185 void *userdata;
186 pcre_uint32 top_bracket;
187 pcre_uint32 limit_match;
188 } executable_functions;
189
190 typedef struct jump_list {
191 struct sljit_jump *jump;
192 struct jump_list *next;
193 } jump_list;
194
195 typedef struct stub_list {
196 struct sljit_jump *start;
197 struct sljit_label *quit;
198 struct stub_list *next;
199 } stub_list;
200
201 typedef struct label_addr_list {
202 struct sljit_label *label;
203 sljit_uw *addr;
204 struct label_addr_list *next;
205 } label_addr_list;
206
207 enum frame_types {
208 no_frame = -1,
209 no_stack = -2
210 };
211
212 enum control_types {
213 type_mark = 0,
214 type_then_trap = 1
215 };
216
217 typedef int (SLJIT_CALL *jit_function)(jit_arguments *args);
218
219 /* The following structure is the key data type for the recursive
220 code generator. It is allocated by compile_matchingpath, and contains
221 the arguments for compile_backtrackingpath. Must be the first member
222 of its descendants. */
223 typedef struct backtrack_common {
224 /* Concatenation stack. */
225 struct backtrack_common *prev;
226 jump_list *nextbacktracks;
227 /* Internal stack (for component operators). */
228 struct backtrack_common *top;
229 jump_list *topbacktracks;
230 /* Opcode pointer. */
231 pcre_uchar *cc;
232 } backtrack_common;
233
234 typedef struct assert_backtrack {
235 backtrack_common common;
236 jump_list *condfailed;
237 /* Less than 0 if a frame is not needed. */
238 int framesize;
239 /* Points to our private memory word on the stack. */
240 int private_data_ptr;
241 /* For iterators. */
242 struct sljit_label *matchingpath;
243 } assert_backtrack;
244
245 typedef struct bracket_backtrack {
246 backtrack_common common;
247 /* Where to coninue if an alternative is successfully matched. */
248 struct sljit_label *alternative_matchingpath;
249 /* For rmin and rmax iterators. */
250 struct sljit_label *recursive_matchingpath;
251 /* For greedy ? operator. */
252 struct sljit_label *zero_matchingpath;
253 /* Contains the branches of a failed condition. */
254 union {
255 /* Both for OP_COND, OP_SCOND. */
256 jump_list *condfailed;
257 assert_backtrack *assert;
258 /* For OP_ONCE. Less than 0 if not needed. */
259 int framesize;
260 } u;
261 /* Points to our private memory word on the stack. */
262 int private_data_ptr;
263 } bracket_backtrack;
264
265 typedef struct bracketpos_backtrack {
266 backtrack_common common;
267 /* Points to our private memory word on the stack. */
268 int private_data_ptr;
269 /* Reverting stack is needed. */
270 int framesize;
271 /* Allocated stack size. */
272 int stacksize;
273 } bracketpos_backtrack;
274
275 typedef struct braminzero_backtrack {
276 backtrack_common common;
277 struct sljit_label *matchingpath;
278 } braminzero_backtrack;
279
280 typedef struct iterator_backtrack {
281 backtrack_common common;
282 /* Next iteration. */
283 struct sljit_label *matchingpath;
284 } iterator_backtrack;
285
286 typedef struct recurse_entry {
287 struct recurse_entry *next;
288 /* Contains the function entry. */
289 struct sljit_label *entry;
290 /* Collects the calls until the function is not created. */
291 jump_list *calls;
292 /* Points to the starting opcode. */
293 sljit_sw start;
294 } recurse_entry;
295
296 typedef struct recurse_backtrack {
297 backtrack_common common;
298 BOOL inlined_pattern;
299 } recurse_backtrack;
300
301 #define OP_THEN_TRAP OP_TABLE_LENGTH
302
303 typedef struct then_trap_backtrack {
304 backtrack_common common;
305 /* If then_trap is not NULL, this structure contains the real
306 then_trap for the backtracking path. */
307 struct then_trap_backtrack *then_trap;
308 /* Points to the starting opcode. */
309 sljit_sw start;
310 /* Exit point for the then opcodes of this alternative. */
311 jump_list *quit;
312 /* Frame size of the current alternative. */
313 int framesize;
314 } then_trap_backtrack;
315
316 #define MAX_RANGE_SIZE 4
317
318 typedef struct compiler_common {
319 /* The sljit ceneric compiler. */
320 struct sljit_compiler *compiler;
321 /* First byte code. */
322 pcre_uchar *start;
323 /* Maps private data offset to each opcode. */
324 sljit_si *private_data_ptrs;
325 /* This read-only data is available during runtime. */
326 sljit_uw *read_only_data;
327 /* The total size of the read-only data. */
328 sljit_uw read_only_data_size;
329 /* The next free entry of the read_only_data. */
330 sljit_uw *read_only_data_ptr;
331 /* Tells whether the capturing bracket is optimized. */
332 pcre_uint8 *optimized_cbracket;
333 /* Tells whether the starting offset is a target of then. */
334 pcre_uint8 *then_offsets;
335 /* Current position where a THEN must jump. */
336 then_trap_backtrack *then_trap;
337 /* Starting offset of private data for capturing brackets. */
338 int cbra_ptr;
339 /* Output vector starting point. Must be divisible by 2. */
340 int ovector_start;
341 /* Last known position of the requested byte. */
342 int req_char_ptr;
343 /* Head of the last recursion. */
344 int recursive_head_ptr;
345 /* First inspected character for partial matching. */
346 int start_used_ptr;
347 /* Starting pointer for partial soft matches. */
348 int hit_start;
349 /* End pointer of the first line. */
350 int first_line_end;
351 /* Points to the marked string. */
352 int mark_ptr;
353 /* Recursive control verb management chain. */
354 int control_head_ptr;
355 /* Points to the last matched capture block index. */
356 int capture_last_ptr;
357 /* Points to the starting position of the current match. */
358 int start_ptr;
359
360 /* Flipped and lower case tables. */
361 const pcre_uint8 *fcc;
362 sljit_sw lcc;
363 /* Mode can be PCRE_STUDY_JIT_COMPILE and others. */
364 int mode;
365 /* TRUE, when minlength is greater than 0. */
366 BOOL might_be_empty;
367 /* \K is found in the pattern. */
368 BOOL has_set_som;
369 /* (*SKIP:arg) is found in the pattern. */
370 BOOL has_skip_arg;
371 /* (*THEN) is found in the pattern. */
372 BOOL has_then;
373 /* Needs to know the start position anytime. */
374 BOOL needs_start_ptr;
375 /* Currently in recurse or negative assert. */
376 BOOL local_exit;
377 /* Currently in a positive assert. */
378 BOOL positive_assert;
379 /* Newline control. */
380 int nltype;
381 pcre_uint32 nlmax;
382 pcre_uint32 nlmin;
383 int newline;
384 int bsr_nltype;
385 pcre_uint32 bsr_nlmax;
386 pcre_uint32 bsr_nlmin;
387 /* Dollar endonly. */
388 int endonly;
389 /* Tables. */
390 sljit_sw ctypes;
391 /* Named capturing brackets. */
392 pcre_uchar *name_table;
393 sljit_sw name_count;
394 sljit_sw name_entry_size;
395
396 /* Labels and jump lists. */
397 struct sljit_label *partialmatchlabel;
398 struct sljit_label *quit_label;
399 struct sljit_label *forced_quit_label;
400 struct sljit_label *accept_label;
401 stub_list *stubs;
402 label_addr_list *label_addrs;
403 recurse_entry *entries;
404 recurse_entry *currententry;
405 jump_list *partialmatch;
406 jump_list *quit;
407 jump_list *positive_assert_quit;
408 jump_list *forced_quit;
409 jump_list *accept;
410 jump_list *calllimit;
411 jump_list *stackalloc;
412 jump_list *revertframes;
413 jump_list *wordboundary;
414 jump_list *anynewline;
415 jump_list *hspace;
416 jump_list *vspace;
417 jump_list *casefulcmp;
418 jump_list *caselesscmp;
419 jump_list *reset_match;
420 BOOL jscript_compat;
421 #ifdef SUPPORT_UTF
422 BOOL utf;
423 #ifdef SUPPORT_UCP
424 BOOL use_ucp;
425 #endif
426 #ifdef COMPILE_PCRE8
427 jump_list *utfreadchar;
428 jump_list *utfreadchar16;
429 jump_list *utfreadtype8;
430 #endif
431 #endif /* SUPPORT_UTF */
432 #ifdef SUPPORT_UCP
433 jump_list *getucd;
434 #endif
435 } compiler_common;
436
437 /* For byte_sequence_compare. */
438
439 typedef struct compare_context {
440 int length;
441 int sourcereg;
442 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
443 int ucharptr;
444 union {
445 sljit_si asint;
446 sljit_uh asushort;
447 #if defined COMPILE_PCRE8
448 sljit_ub asbyte;
449 sljit_ub asuchars[4];
450 #elif defined COMPILE_PCRE16
451 sljit_uh asuchars[2];
452 #elif defined COMPILE_PCRE32
453 sljit_ui asuchars[1];
454 #endif
455 } c;
456 union {
457 sljit_si asint;
458 sljit_uh asushort;
459 #if defined COMPILE_PCRE8
460 sljit_ub asbyte;
461 sljit_ub asuchars[4];
462 #elif defined COMPILE_PCRE16
463 sljit_uh asuchars[2];
464 #elif defined COMPILE_PCRE32
465 sljit_ui asuchars[1];
466 #endif
467 } oc;
468 #endif
469 } compare_context;
470
471 /* Undefine sljit macros. */
472 #undef CMP
473
474 /* Used for accessing the elements of the stack. */
475 #define STACK(i) ((-(i) - 1) * (int)sizeof(sljit_sw))
476
477 #define TMP1 SLJIT_SCRATCH_REG1
478 #define TMP2 SLJIT_SCRATCH_REG3
479 #define TMP3 SLJIT_TEMPORARY_EREG2
480 #define STR_PTR SLJIT_SAVED_REG1
481 #define STR_END SLJIT_SAVED_REG2
482 #define STACK_TOP SLJIT_SCRATCH_REG2
483 #define STACK_LIMIT SLJIT_SAVED_REG3
484 #define ARGUMENTS SLJIT_SAVED_EREG1
485 #define COUNT_MATCH SLJIT_SAVED_EREG2
486 #define RETURN_ADDR SLJIT_TEMPORARY_EREG1
487
488 /* Local space layout. */
489 /* These two locals can be used by the current opcode. */
490 #define LOCALS0 (0 * sizeof(sljit_sw))
491 #define LOCALS1 (1 * sizeof(sljit_sw))
492 /* Two local variables for possessive quantifiers (char1 cannot use them). */
493 #define POSSESSIVE0 (2 * sizeof(sljit_sw))
494 #define POSSESSIVE1 (3 * sizeof(sljit_sw))
495 /* Max limit of recursions. */
496 #define LIMIT_MATCH (4 * sizeof(sljit_sw))
497 /* The output vector is stored on the stack, and contains pointers
498 to characters. The vector data is divided into two groups: the first
499 group contains the start / end character pointers, and the second is
500 the start pointers when the end of the capturing group has not yet reached. */
501 #define OVECTOR_START (common->ovector_start)
502 #define OVECTOR(i) (OVECTOR_START + (i) * (sljit_sw)sizeof(sljit_sw))
503 #define OVECTOR_PRIV(i) (common->cbra_ptr + (i) * (sljit_sw)sizeof(sljit_sw))
504 #define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start])
505
506 #if defined COMPILE_PCRE8
507 #define MOV_UCHAR SLJIT_MOV_UB
508 #define MOVU_UCHAR SLJIT_MOVU_UB
509 #elif defined COMPILE_PCRE16
510 #define MOV_UCHAR SLJIT_MOV_UH
511 #define MOVU_UCHAR SLJIT_MOVU_UH
512 #elif defined COMPILE_PCRE32
513 #define MOV_UCHAR SLJIT_MOV_UI
514 #define MOVU_UCHAR SLJIT_MOVU_UI
515 #else
516 #error Unsupported compiling mode
517 #endif
518
519 /* Shortcuts. */
520 #define DEFINE_COMPILER \
521 struct sljit_compiler *compiler = common->compiler
522 #define OP1(op, dst, dstw, src, srcw) \
523 sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw))
524 #define OP2(op, dst, dstw, src1, src1w, src2, src2w) \
525 sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w))
526 #define LABEL() \
527 sljit_emit_label(compiler)
528 #define JUMP(type) \
529 sljit_emit_jump(compiler, (type))
530 #define JUMPTO(type, label) \
531 sljit_set_label(sljit_emit_jump(compiler, (type)), (label))
532 #define JUMPHERE(jump) \
533 sljit_set_label((jump), sljit_emit_label(compiler))
534 #define SET_LABEL(jump, label) \
535 sljit_set_label((jump), (label))
536 #define CMP(type, src1, src1w, src2, src2w) \
537 sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))
538 #define CMPTO(type, src1, src1w, src2, src2w, label) \
539 sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))
540 #define OP_FLAGS(op, dst, dstw, src, srcw, type) \
541 sljit_emit_op_flags(compiler, (op), (dst), (dstw), (src), (srcw), (type))
542 #define GET_LOCAL_BASE(dst, dstw, offset) \
543 sljit_get_local_base(compiler, (dst), (dstw), (offset))
544
545 #define READ_CHAR_MAX 0x7fffffff
546
547 static pcre_uchar* bracketend(pcre_uchar* cc)
548 {
549 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
550 do cc += GET(cc, 1); while (*cc == OP_ALT);
551 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
552 cc += 1 + LINK_SIZE;
553 return cc;
554 }
555
556 static int no_alternatives(pcre_uchar* cc)
557 {
558 int count = 0;
559 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
560 do
561 {
562 cc += GET(cc, 1);
563 count++;
564 }
565 while (*cc == OP_ALT);
566 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
567 return count;
568 }
569
570 static int ones_in_half_byte[16] = {
571 /* 0 */ 0, 1, 1, 2, /* 4 */ 1, 2, 2, 3,
572 /* 8 */ 1, 2, 2, 3, /* 12 */ 2, 3, 3, 4
573 };
574
575 /* Functions whose might need modification for all new supported opcodes:
576 next_opcode
577 check_opcode_types
578 set_private_data_ptrs
579 get_framesize
580 init_frame
581 get_private_data_copy_length
582 copy_private_data
583 compile_matchingpath
584 compile_backtrackingpath
585 */
586
587 static pcre_uchar *next_opcode(compiler_common *common, pcre_uchar *cc)
588 {
589 SLJIT_UNUSED_ARG(common);
590 switch(*cc)
591 {
592 case OP_SOD:
593 case OP_SOM:
594 case OP_SET_SOM:
595 case OP_NOT_WORD_BOUNDARY:
596 case OP_WORD_BOUNDARY:
597 case OP_NOT_DIGIT:
598 case OP_DIGIT:
599 case OP_NOT_WHITESPACE:
600 case OP_WHITESPACE:
601 case OP_NOT_WORDCHAR:
602 case OP_WORDCHAR:
603 case OP_ANY:
604 case OP_ALLANY:
605 case OP_NOTPROP:
606 case OP_PROP:
607 case OP_ANYNL:
608 case OP_NOT_HSPACE:
609 case OP_HSPACE:
610 case OP_NOT_VSPACE:
611 case OP_VSPACE:
612 case OP_EXTUNI:
613 case OP_EODN:
614 case OP_EOD:
615 case OP_CIRC:
616 case OP_CIRCM:
617 case OP_DOLL:
618 case OP_DOLLM:
619 case OP_CRSTAR:
620 case OP_CRMINSTAR:
621 case OP_CRPLUS:
622 case OP_CRMINPLUS:
623 case OP_CRQUERY:
624 case OP_CRMINQUERY:
625 case OP_CRRANGE:
626 case OP_CRMINRANGE:
627 case OP_CRPOSSTAR:
628 case OP_CRPOSPLUS:
629 case OP_CRPOSQUERY:
630 case OP_CRPOSRANGE:
631 case OP_CLASS:
632 case OP_NCLASS:
633 case OP_REF:
634 case OP_REFI:
635 case OP_DNREF:
636 case OP_DNREFI:
637 case OP_RECURSE:
638 case OP_CALLOUT:
639 case OP_ALT:
640 case OP_KET:
641 case OP_KETRMAX:
642 case OP_KETRMIN:
643 case OP_KETRPOS:
644 case OP_REVERSE:
645 case OP_ASSERT:
646 case OP_ASSERT_NOT:
647 case OP_ASSERTBACK:
648 case OP_ASSERTBACK_NOT:
649 case OP_ONCE:
650 case OP_ONCE_NC:
651 case OP_BRA:
652 case OP_BRAPOS:
653 case OP_CBRA:
654 case OP_CBRAPOS:
655 case OP_COND:
656 case OP_SBRA:
657 case OP_SBRAPOS:
658 case OP_SCBRA:
659 case OP_SCBRAPOS:
660 case OP_SCOND:
661 case OP_CREF:
662 case OP_DNCREF:
663 case OP_RREF:
664 case OP_DNRREF:
665 case OP_DEF:
666 case OP_BRAZERO:
667 case OP_BRAMINZERO:
668 case OP_BRAPOSZERO:
669 case OP_PRUNE:
670 case OP_SKIP:
671 case OP_THEN:
672 case OP_COMMIT:
673 case OP_FAIL:
674 case OP_ACCEPT:
675 case OP_ASSERT_ACCEPT:
676 case OP_CLOSE:
677 case OP_SKIPZERO:
678 return cc + PRIV(OP_lengths)[*cc];
679
680 case OP_CHAR:
681 case OP_CHARI:
682 case OP_NOT:
683 case OP_NOTI:
684 case OP_STAR:
685 case OP_MINSTAR:
686 case OP_PLUS:
687 case OP_MINPLUS:
688 case OP_QUERY:
689 case OP_MINQUERY:
690 case OP_UPTO:
691 case OP_MINUPTO:
692 case OP_EXACT:
693 case OP_POSSTAR:
694 case OP_POSPLUS:
695 case OP_POSQUERY:
696 case OP_POSUPTO:
697 case OP_STARI:
698 case OP_MINSTARI:
699 case OP_PLUSI:
700 case OP_MINPLUSI:
701 case OP_QUERYI:
702 case OP_MINQUERYI:
703 case OP_UPTOI:
704 case OP_MINUPTOI:
705 case OP_EXACTI:
706 case OP_POSSTARI:
707 case OP_POSPLUSI:
708 case OP_POSQUERYI:
709 case OP_POSUPTOI:
710 case OP_NOTSTAR:
711 case OP_NOTMINSTAR:
712 case OP_NOTPLUS:
713 case OP_NOTMINPLUS:
714 case OP_NOTQUERY:
715 case OP_NOTMINQUERY:
716 case OP_NOTUPTO:
717 case OP_NOTMINUPTO:
718 case OP_NOTEXACT:
719 case OP_NOTPOSSTAR:
720 case OP_NOTPOSPLUS:
721 case OP_NOTPOSQUERY:
722 case OP_NOTPOSUPTO:
723 case OP_NOTSTARI:
724 case OP_NOTMINSTARI:
725 case OP_NOTPLUSI:
726 case OP_NOTMINPLUSI:
727 case OP_NOTQUERYI:
728 case OP_NOTMINQUERYI:
729 case OP_NOTUPTOI:
730 case OP_NOTMINUPTOI:
731 case OP_NOTEXACTI:
732 case OP_NOTPOSSTARI:
733 case OP_NOTPOSPLUSI:
734 case OP_NOTPOSQUERYI:
735 case OP_NOTPOSUPTOI:
736 cc += PRIV(OP_lengths)[*cc];
737 #ifdef SUPPORT_UTF
738 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
739 #endif
740 return cc;
741
742 /* Special cases. */
743 case OP_TYPESTAR:
744 case OP_TYPEMINSTAR:
745 case OP_TYPEPLUS:
746 case OP_TYPEMINPLUS:
747 case OP_TYPEQUERY:
748 case OP_TYPEMINQUERY:
749 case OP_TYPEUPTO:
750 case OP_TYPEMINUPTO:
751 case OP_TYPEEXACT:
752 case OP_TYPEPOSSTAR:
753 case OP_TYPEPOSPLUS:
754 case OP_TYPEPOSQUERY:
755 case OP_TYPEPOSUPTO:
756 return cc + PRIV(OP_lengths)[*cc] - 1;
757
758 case OP_ANYBYTE:
759 #ifdef SUPPORT_UTF
760 if (common->utf) return NULL;
761 #endif
762 return cc + 1;
763
764 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
765 case OP_XCLASS:
766 return cc + GET(cc, 1);
767 #endif
768
769 case OP_MARK:
770 case OP_PRUNE_ARG:
771 case OP_SKIP_ARG:
772 case OP_THEN_ARG:
773 return cc + 1 + 2 + cc[1];
774
775 default:
776 /* All opcodes are supported now! */
777 SLJIT_ASSERT_STOP();
778 return NULL;
779 }
780 }
781
782 static BOOL check_opcode_types(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
783 {
784 int count;
785 pcre_uchar *slot;
786
787 /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
788 while (cc < ccend)
789 {
790 switch(*cc)
791 {
792 case OP_SET_SOM:
793 common->has_set_som = TRUE;
794 common->might_be_empty = TRUE;
795 cc += 1;
796 break;
797
798 case OP_REF:
799 case OP_REFI:
800 common->optimized_cbracket[GET2(cc, 1)] = 0;
801 cc += 1 + IMM2_SIZE;
802 break;
803
804 case OP_BRA:
805 case OP_CBRA:
806 case OP_SBRA:
807 case OP_SCBRA:
808 count = no_alternatives(cc);
809 if (count > 4)
810 common->read_only_data_size += count * sizeof(sljit_uw);
811 cc += 1 + LINK_SIZE + (*cc == OP_CBRA || *cc == OP_SCBRA ? IMM2_SIZE : 0);
812 break;
813
814 case OP_CBRAPOS:
815 case OP_SCBRAPOS:
816 common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] = 0;
817 cc += 1 + LINK_SIZE + IMM2_SIZE;
818 break;
819
820 case OP_COND:
821 case OP_SCOND:
822 /* Only AUTO_CALLOUT can insert this opcode. We do
823 not intend to support this case. */
824 if (cc[1 + LINK_SIZE] == OP_CALLOUT)
825 return FALSE;
826 cc += 1 + LINK_SIZE;
827 break;
828
829 case OP_CREF:
830 common->optimized_cbracket[GET2(cc, 1)] = 0;
831 cc += 1 + IMM2_SIZE;
832 break;
833
834 case OP_DNREF:
835 case OP_DNREFI:
836 case OP_DNCREF:
837 count = GET2(cc, 1 + IMM2_SIZE);
838 slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
839 while (count-- > 0)
840 {
841 common->optimized_cbracket[GET2(slot, 0)] = 0;
842 slot += common->name_entry_size;
843 }
844 cc += 1 + 2 * IMM2_SIZE;
845 break;
846
847 case OP_RECURSE:
848 /* Set its value only once. */
849 if (common->recursive_head_ptr == 0)
850 {
851 common->recursive_head_ptr = common->ovector_start;
852 common->ovector_start += sizeof(sljit_sw);
853 }
854 cc += 1 + LINK_SIZE;
855 break;
856
857 case OP_CALLOUT:
858 if (common->capture_last_ptr == 0)
859 {
860 common->capture_last_ptr = common->ovector_start;
861 common->ovector_start += sizeof(sljit_sw);
862 }
863 cc += 2 + 2 * LINK_SIZE;
864 break;
865
866 case OP_THEN_ARG:
867 common->has_then = TRUE;
868 common->control_head_ptr = 1;
869 /* Fall through. */
870
871 case OP_PRUNE_ARG:
872 common->needs_start_ptr = TRUE;
873 /* Fall through. */
874
875 case OP_MARK:
876 if (common->mark_ptr == 0)
877 {
878 common->mark_ptr = common->ovector_start;
879 common->ovector_start += sizeof(sljit_sw);
880 }
881 cc += 1 + 2 + cc[1];
882 break;
883
884 case OP_THEN:
885 common->has_then = TRUE;
886 common->control_head_ptr = 1;
887 /* Fall through. */
888
889 case OP_PRUNE:
890 case OP_SKIP:
891 common->needs_start_ptr = TRUE;
892 cc += 1;
893 break;
894
895 case OP_SKIP_ARG:
896 common->control_head_ptr = 1;
897 common->has_skip_arg = TRUE;
898 cc += 1 + 2 + cc[1];
899 break;
900
901 default:
902 cc = next_opcode(common, cc);
903 if (cc == NULL)
904 return FALSE;
905 break;
906 }
907 }
908 return TRUE;
909 }
910
911 static int get_class_iterator_size(pcre_uchar *cc)
912 {
913 switch(*cc)
914 {
915 case OP_CRSTAR:
916 case OP_CRPLUS:
917 return 2;
918
919 case OP_CRMINSTAR:
920 case OP_CRMINPLUS:
921 case OP_CRQUERY:
922 case OP_CRMINQUERY:
923 return 1;
924
925 case OP_CRRANGE:
926 case OP_CRMINRANGE:
927 if (GET2(cc, 1) == GET2(cc, 1 + IMM2_SIZE))
928 return 0;
929 return 2;
930
931 default:
932 return 0;
933 }
934 }
935
936 static BOOL detect_repeat(compiler_common *common, pcre_uchar *begin)
937 {
938 pcre_uchar *end = bracketend(begin);
939 pcre_uchar *next;
940 pcre_uchar *next_end;
941 pcre_uchar *max_end;
942 pcre_uchar type;
943 sljit_sw length = end - begin;
944 int min, max, i;
945
946 /* Detect fixed iterations first. */
947 if (end[-(1 + LINK_SIZE)] != OP_KET)
948 return FALSE;
949
950 /* Already detected repeat. */
951 if (common->private_data_ptrs[end - common->start - LINK_SIZE] != 0)
952 return TRUE;
953
954 next = end;
955 min = 1;
956 while (1)
957 {
958 if (*next != *begin)
959 break;
960 next_end = bracketend(next);
961 if (next_end - next != length || memcmp(begin, next, IN_UCHARS(length)) != 0)
962 break;
963 next = next_end;
964 min++;
965 }
966
967 if (min == 2)
968 return FALSE;
969
970 max = 0;
971 max_end = next;
972 if (*next == OP_BRAZERO || *next == OP_BRAMINZERO)
973 {
974 type = *next;
975 while (1)
976 {
977 if (next[0] != type || next[1] != OP_BRA || next[2 + LINK_SIZE] != *begin)
978 break;
979 next_end = bracketend(next + 2 + LINK_SIZE);
980 if (next_end - next != (length + 2 + LINK_SIZE) || memcmp(begin, next + 2 + LINK_SIZE, IN_UCHARS(length)) != 0)
981 break;
982 next = next_end;
983 max++;
984 }
985
986 if (next[0] == type && next[1] == *begin && max >= 1)
987 {
988 next_end = bracketend(next + 1);
989 if (next_end - next == (length + 1) && memcmp(begin, next + 1, IN_UCHARS(length)) == 0)
990 {
991 for (i = 0; i < max; i++, next_end += 1 + LINK_SIZE)
992 if (*next_end != OP_KET)
993 break;
994
995 if (i == max)
996 {
997 common->private_data_ptrs[max_end - common->start - LINK_SIZE] = next_end - max_end;
998 common->private_data_ptrs[max_end - common->start - LINK_SIZE + 1] = (type == OP_BRAZERO) ? OP_UPTO : OP_MINUPTO;
999 /* +2 the original and the last. */
1000 common->private_data_ptrs[max_end - common->start - LINK_SIZE + 2] = max + 2;
1001 if (min == 1)
1002 return TRUE;
1003 min--;
1004 max_end -= (1 + LINK_SIZE) + GET(max_end, -LINK_SIZE);
1005 }
1006 }
1007 }
1008 }
1009
1010 if (min >= 3)
1011 {
1012 common->private_data_ptrs[end - common->start - LINK_SIZE] = max_end - end;
1013 common->private_data_ptrs[end - common->start - LINK_SIZE + 1] = OP_EXACT;
1014 common->private_data_ptrs[end - common->start - LINK_SIZE + 2] = min;
1015 return TRUE;
1016 }
1017
1018 return FALSE;
1019 }
1020
1021 #define CASE_ITERATOR_PRIVATE_DATA_1 \
1022 case OP_MINSTAR: \
1023 case OP_MINPLUS: \
1024 case OP_QUERY: \
1025 case OP_MINQUERY: \
1026 case OP_MINSTARI: \
1027 case OP_MINPLUSI: \
1028 case OP_QUERYI: \
1029 case OP_MINQUERYI: \
1030 case OP_NOTMINSTAR: \
1031 case OP_NOTMINPLUS: \
1032 case OP_NOTQUERY: \
1033 case OP_NOTMINQUERY: \
1034 case OP_NOTMINSTARI: \
1035 case OP_NOTMINPLUSI: \
1036 case OP_NOTQUERYI: \
1037 case OP_NOTMINQUERYI:
1038
1039 #define CASE_ITERATOR_PRIVATE_DATA_2A \
1040 case OP_STAR: \
1041 case OP_PLUS: \
1042 case OP_STARI: \
1043 case OP_PLUSI: \
1044 case OP_NOTSTAR: \
1045 case OP_NOTPLUS: \
1046 case OP_NOTSTARI: \
1047 case OP_NOTPLUSI:
1048
1049 #define CASE_ITERATOR_PRIVATE_DATA_2B \
1050 case OP_UPTO: \
1051 case OP_MINUPTO: \
1052 case OP_UPTOI: \
1053 case OP_MINUPTOI: \
1054 case OP_NOTUPTO: \
1055 case OP_NOTMINUPTO: \
1056 case OP_NOTUPTOI: \
1057 case OP_NOTMINUPTOI:
1058
1059 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_1 \
1060 case OP_TYPEMINSTAR: \
1061 case OP_TYPEMINPLUS: \
1062 case OP_TYPEQUERY: \
1063 case OP_TYPEMINQUERY:
1064
1065 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2A \
1066 case OP_TYPESTAR: \
1067 case OP_TYPEPLUS:
1068
1069 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2B \
1070 case OP_TYPEUPTO: \
1071 case OP_TYPEMINUPTO:
1072
1073 static void set_private_data_ptrs(compiler_common *common, int *private_data_start, pcre_uchar *ccend)
1074 {
1075 pcre_uchar *cc = common->start;
1076 pcre_uchar *alternative;
1077 pcre_uchar *end = NULL;
1078 int private_data_ptr = *private_data_start;
1079 int space, size, bracketlen;
1080
1081 while (cc < ccend)
1082 {
1083 space = 0;
1084 size = 0;
1085 bracketlen = 0;
1086 if (private_data_ptr > SLJIT_MAX_LOCAL_SIZE)
1087 return;
1088
1089 if (*cc == OP_ONCE || *cc == OP_ONCE_NC || *cc == OP_BRA || *cc == OP_CBRA || *cc == OP_COND)
1090 if (detect_repeat(common, cc))
1091 {
1092 /* These brackets are converted to repeats, so no global
1093 based single character repeat is allowed. */
1094 if (cc >= end)
1095 end = bracketend(cc);
1096 }
1097
1098 switch(*cc)
1099 {
1100 case OP_KET:
1101 if (common->private_data_ptrs[cc + 1 - common->start] != 0)
1102 {
1103 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1104 private_data_ptr += sizeof(sljit_sw);
1105 cc += common->private_data_ptrs[cc + 1 - common->start];
1106 }
1107 cc += 1 + LINK_SIZE;
1108 break;
1109
1110 case OP_ASSERT:
1111 case OP_ASSERT_NOT:
1112 case OP_ASSERTBACK:
1113 case OP_ASSERTBACK_NOT:
1114 case OP_ONCE:
1115 case OP_ONCE_NC:
1116 case OP_BRAPOS:
1117 case OP_SBRA:
1118 case OP_SBRAPOS:
1119 case OP_SCOND:
1120 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1121 private_data_ptr += sizeof(sljit_sw);
1122 bracketlen = 1 + LINK_SIZE;
1123 break;
1124
1125 case OP_CBRAPOS:
1126 case OP_SCBRAPOS:
1127 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1128 private_data_ptr += sizeof(sljit_sw);
1129 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1130 break;
1131
1132 case OP_COND:
1133 /* Might be a hidden SCOND. */
1134 alternative = cc + GET(cc, 1);
1135 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1136 {
1137 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1138 private_data_ptr += sizeof(sljit_sw);
1139 }
1140 bracketlen = 1 + LINK_SIZE;
1141 break;
1142
1143 case OP_BRA:
1144 bracketlen = 1 + LINK_SIZE;
1145 break;
1146
1147 case OP_CBRA:
1148 case OP_SCBRA:
1149 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1150 break;
1151
1152 CASE_ITERATOR_PRIVATE_DATA_1
1153 space = 1;
1154 size = -2;
1155 break;
1156
1157 CASE_ITERATOR_PRIVATE_DATA_2A
1158 space = 2;
1159 size = -2;
1160 break;
1161
1162 CASE_ITERATOR_PRIVATE_DATA_2B
1163 space = 2;
1164 size = -(2 + IMM2_SIZE);
1165 break;
1166
1167 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1168 space = 1;
1169 size = 1;
1170 break;
1171
1172 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1173 if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
1174 space = 2;
1175 size = 1;
1176 break;
1177
1178 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1179 if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
1180 space = 2;
1181 size = 1 + IMM2_SIZE;
1182 break;
1183
1184 case OP_CLASS:
1185 case OP_NCLASS:
1186 size += 1 + 32 / sizeof(pcre_uchar);
1187 space = get_class_iterator_size(cc + size);
1188 break;
1189
1190 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1191 case OP_XCLASS:
1192 size = GET(cc, 1);
1193 space = get_class_iterator_size(cc + size);
1194 break;
1195 #endif
1196
1197 default:
1198 cc = next_opcode(common, cc);
1199 SLJIT_ASSERT(cc != NULL);
1200 break;
1201 }
1202
1203 /* Character iterators, which are not inside a repeated bracket,
1204 gets a private slot instead of allocating it on the stack. */
1205 if (space > 0 && cc >= end)
1206 {
1207 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1208 private_data_ptr += sizeof(sljit_sw) * space;
1209 }
1210
1211 if (size != 0)
1212 {
1213 if (size < 0)
1214 {
1215 cc += -size;
1216 #ifdef SUPPORT_UTF
1217 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1218 #endif
1219 }
1220 else
1221 cc += size;
1222 }
1223
1224 if (bracketlen > 0)
1225 {
1226 if (cc >= end)
1227 {
1228 end = bracketend(cc);
1229 if (end[-1 - LINK_SIZE] == OP_KET)
1230 end = NULL;
1231 }
1232 cc += bracketlen;
1233 }
1234 }
1235 *private_data_start = private_data_ptr;
1236 }
1237
1238 /* Returns with a frame_types (always < 0) if no need for frame. */
1239 static int get_framesize(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL recursive, BOOL* needs_control_head)
1240 {
1241 int length = 0;
1242 int possessive = 0;
1243 BOOL stack_restore = FALSE;
1244 BOOL setsom_found = recursive;
1245 BOOL setmark_found = recursive;
1246 /* The last capture is a local variable even for recursions. */
1247 BOOL capture_last_found = FALSE;
1248
1249 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
1250 SLJIT_ASSERT(common->control_head_ptr != 0);
1251 *needs_control_head = TRUE;
1252 #else
1253 *needs_control_head = FALSE;
1254 #endif
1255
1256 if (ccend == NULL)
1257 {
1258 ccend = bracketend(cc) - (1 + LINK_SIZE);
1259 if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))
1260 {
1261 possessive = length = (common->capture_last_ptr != 0) ? 5 : 3;
1262 /* This is correct regardless of common->capture_last_ptr. */
1263 capture_last_found = TRUE;
1264 }
1265 cc = next_opcode(common, cc);
1266 }
1267
1268 SLJIT_ASSERT(cc != NULL);
1269 while (cc < ccend)
1270 switch(*cc)
1271 {
1272 case OP_SET_SOM:
1273 SLJIT_ASSERT(common->has_set_som);
1274 stack_restore = TRUE;
1275 if (!setsom_found)
1276 {
1277 length += 2;
1278 setsom_found = TRUE;
1279 }
1280 cc += 1;
1281 break;
1282
1283 case OP_MARK:
1284 case OP_PRUNE_ARG:
1285 case OP_THEN_ARG:
1286 SLJIT_ASSERT(common->mark_ptr != 0);
1287 stack_restore = TRUE;
1288 if (!setmark_found)
1289 {
1290 length += 2;
1291 setmark_found = TRUE;
1292 }
1293 if (common->control_head_ptr != 0)
1294 *needs_control_head = TRUE;
1295 cc += 1 + 2 + cc[1];
1296 break;
1297
1298 case OP_RECURSE:
1299 stack_restore = TRUE;
1300 if (common->has_set_som && !setsom_found)
1301 {
1302 length += 2;
1303 setsom_found = TRUE;
1304 }
1305 if (common->mark_ptr != 0 && !setmark_found)
1306 {
1307 length += 2;
1308 setmark_found = TRUE;
1309 }
1310 if (common->capture_last_ptr != 0 && !capture_last_found)
1311 {
1312 length += 2;
1313 capture_last_found = TRUE;
1314 }
1315 cc += 1 + LINK_SIZE;
1316 break;
1317
1318 case OP_CBRA:
1319 case OP_CBRAPOS:
1320 case OP_SCBRA:
1321 case OP_SCBRAPOS:
1322 stack_restore = TRUE;
1323 if (common->capture_last_ptr != 0 && !capture_last_found)
1324 {
1325 length += 2;
1326 capture_last_found = TRUE;
1327 }
1328 length += 3;
1329 cc += 1 + LINK_SIZE + IMM2_SIZE;
1330 break;
1331
1332 default:
1333 stack_restore = TRUE;
1334 /* Fall through. */
1335
1336 case OP_NOT_WORD_BOUNDARY:
1337 case OP_WORD_BOUNDARY:
1338 case OP_NOT_DIGIT:
1339 case OP_DIGIT:
1340 case OP_NOT_WHITESPACE:
1341 case OP_WHITESPACE:
1342 case OP_NOT_WORDCHAR:
1343 case OP_WORDCHAR:
1344 case OP_ANY:
1345 case OP_ALLANY:
1346 case OP_ANYBYTE:
1347 case OP_NOTPROP:
1348 case OP_PROP:
1349 case OP_ANYNL:
1350 case OP_NOT_HSPACE:
1351 case OP_HSPACE:
1352 case OP_NOT_VSPACE:
1353 case OP_VSPACE:
1354 case OP_EXTUNI:
1355 case OP_EODN:
1356 case OP_EOD:
1357 case OP_CIRC:
1358 case OP_CIRCM:
1359 case OP_DOLL:
1360 case OP_DOLLM:
1361 case OP_CHAR:
1362 case OP_CHARI:
1363 case OP_NOT:
1364 case OP_NOTI:
1365
1366 case OP_EXACT:
1367 case OP_POSSTAR:
1368 case OP_POSPLUS:
1369 case OP_POSQUERY:
1370 case OP_POSUPTO:
1371
1372 case OP_EXACTI:
1373 case OP_POSSTARI:
1374 case OP_POSPLUSI:
1375 case OP_POSQUERYI:
1376 case OP_POSUPTOI:
1377
1378 case OP_NOTEXACT:
1379 case OP_NOTPOSSTAR:
1380 case OP_NOTPOSPLUS:
1381 case OP_NOTPOSQUERY:
1382 case OP_NOTPOSUPTO:
1383
1384 case OP_NOTEXACTI:
1385 case OP_NOTPOSSTARI:
1386 case OP_NOTPOSPLUSI:
1387 case OP_NOTPOSQUERYI:
1388 case OP_NOTPOSUPTOI:
1389
1390 case OP_TYPEEXACT:
1391 case OP_TYPEPOSSTAR:
1392 case OP_TYPEPOSPLUS:
1393 case OP_TYPEPOSQUERY:
1394 case OP_TYPEPOSUPTO:
1395
1396 case OP_CLASS:
1397 case OP_NCLASS:
1398 case OP_XCLASS:
1399
1400 cc = next_opcode(common, cc);
1401 SLJIT_ASSERT(cc != NULL);
1402 break;
1403 }
1404
1405 /* Possessive quantifiers can use a special case. */
1406 if (SLJIT_UNLIKELY(possessive == length))
1407 return stack_restore ? no_frame : no_stack;
1408
1409 if (length > 0)
1410 return length + 1;
1411 return stack_restore ? no_frame : no_stack;
1412 }
1413
1414 static void init_frame(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, int stackpos, int stacktop, BOOL recursive)
1415 {
1416 DEFINE_COMPILER;
1417 BOOL setsom_found = recursive;
1418 BOOL setmark_found = recursive;
1419 /* The last capture is a local variable even for recursions. */
1420 BOOL capture_last_found = FALSE;
1421 int offset;
1422
1423 /* >= 1 + shortest item size (2) */
1424 SLJIT_UNUSED_ARG(stacktop);
1425 SLJIT_ASSERT(stackpos >= stacktop + 2);
1426
1427 stackpos = STACK(stackpos);
1428 if (ccend == NULL)
1429 {
1430 ccend = bracketend(cc) - (1 + LINK_SIZE);
1431 if (recursive || (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS))
1432 cc = next_opcode(common, cc);
1433 }
1434
1435 SLJIT_ASSERT(cc != NULL);
1436 while (cc < ccend)
1437 switch(*cc)
1438 {
1439 case OP_SET_SOM:
1440 SLJIT_ASSERT(common->has_set_som);
1441 if (!setsom_found)
1442 {
1443 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
1444 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1445 stackpos += (int)sizeof(sljit_sw);
1446 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1447 stackpos += (int)sizeof(sljit_sw);
1448 setsom_found = TRUE;
1449 }
1450 cc += 1;
1451 break;
1452
1453 case OP_MARK:
1454 case OP_PRUNE_ARG:
1455 case OP_THEN_ARG:
1456 SLJIT_ASSERT(common->mark_ptr != 0);
1457 if (!setmark_found)
1458 {
1459 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
1460 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1461 stackpos += (int)sizeof(sljit_sw);
1462 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1463 stackpos += (int)sizeof(sljit_sw);
1464 setmark_found = TRUE;
1465 }
1466 cc += 1 + 2 + cc[1];
1467 break;
1468
1469 case OP_RECURSE:
1470 if (common->has_set_som && !setsom_found)
1471 {
1472 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
1473 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1474 stackpos += (int)sizeof(sljit_sw);
1475 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1476 stackpos += (int)sizeof(sljit_sw);
1477 setsom_found = TRUE;
1478 }
1479 if (common->mark_ptr != 0 && !setmark_found)
1480 {
1481 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
1482 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1483 stackpos += (int)sizeof(sljit_sw);
1484 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1485 stackpos += (int)sizeof(sljit_sw);
1486 setmark_found = TRUE;
1487 }
1488 if (common->capture_last_ptr != 0 && !capture_last_found)
1489 {
1490 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
1491 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1492 stackpos += (int)sizeof(sljit_sw);
1493 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1494 stackpos += (int)sizeof(sljit_sw);
1495 capture_last_found = TRUE;
1496 }
1497 cc += 1 + LINK_SIZE;
1498 break;
1499
1500 case OP_CBRA:
1501 case OP_CBRAPOS:
1502 case OP_SCBRA:
1503 case OP_SCBRAPOS:
1504 if (common->capture_last_ptr != 0 && !capture_last_found)
1505 {
1506 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
1507 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1508 stackpos += (int)sizeof(sljit_sw);
1509 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1510 stackpos += (int)sizeof(sljit_sw);
1511 capture_last_found = TRUE;
1512 }
1513 offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
1514 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));
1515 stackpos += (int)sizeof(sljit_sw);
1516 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
1517 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
1518 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1519 stackpos += (int)sizeof(sljit_sw);
1520 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
1521 stackpos += (int)sizeof(sljit_sw);
1522
1523 cc += 1 + LINK_SIZE + IMM2_SIZE;
1524 break;
1525
1526 default:
1527 cc = next_opcode(common, cc);
1528 SLJIT_ASSERT(cc != NULL);
1529 break;
1530 }
1531
1532 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, 0);
1533 SLJIT_ASSERT(stackpos == STACK(stacktop));
1534 }
1535
1536 static SLJIT_INLINE int get_private_data_copy_length(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL needs_control_head)
1537 {
1538 int private_data_length = needs_control_head ? 3 : 2;
1539 int size;
1540 pcre_uchar *alternative;
1541 /* Calculate the sum of the private machine words. */
1542 while (cc < ccend)
1543 {
1544 size = 0;
1545 switch(*cc)
1546 {
1547 case OP_KET:
1548 if (PRIVATE_DATA(cc) != 0)
1549 private_data_length++;
1550 cc += 1 + LINK_SIZE;
1551 break;
1552
1553 case OP_ASSERT:
1554 case OP_ASSERT_NOT:
1555 case OP_ASSERTBACK:
1556 case OP_ASSERTBACK_NOT:
1557 case OP_ONCE:
1558 case OP_ONCE_NC:
1559 case OP_BRAPOS:
1560 case OP_SBRA:
1561 case OP_SBRAPOS:
1562 case OP_SCOND:
1563 private_data_length++;
1564 cc += 1 + LINK_SIZE;
1565 break;
1566
1567 case OP_CBRA:
1568 case OP_SCBRA:
1569 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1570 private_data_length++;
1571 cc += 1 + LINK_SIZE + IMM2_SIZE;
1572 break;
1573
1574 case OP_CBRAPOS:
1575 case OP_SCBRAPOS:
1576 private_data_length += 2;
1577 cc += 1 + LINK_SIZE + IMM2_SIZE;
1578 break;
1579
1580 case OP_COND:
1581 /* Might be a hidden SCOND. */
1582 alternative = cc + GET(cc, 1);
1583 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1584 private_data_length++;
1585 cc += 1 + LINK_SIZE;
1586 break;
1587
1588 CASE_ITERATOR_PRIVATE_DATA_1
1589 if (PRIVATE_DATA(cc))
1590 private_data_length++;
1591 cc += 2;
1592 #ifdef SUPPORT_UTF
1593 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1594 #endif
1595 break;
1596
1597 CASE_ITERATOR_PRIVATE_DATA_2A
1598 if (PRIVATE_DATA(cc))
1599 private_data_length += 2;
1600 cc += 2;
1601 #ifdef SUPPORT_UTF
1602 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1603 #endif
1604 break;
1605
1606 CASE_ITERATOR_PRIVATE_DATA_2B
1607 if (PRIVATE_DATA(cc))
1608 private_data_length += 2;
1609 cc += 2 + IMM2_SIZE;
1610 #ifdef SUPPORT_UTF
1611 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1612 #endif
1613 break;
1614
1615 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1616 if (PRIVATE_DATA(cc))
1617 private_data_length++;
1618 cc += 1;
1619 break;
1620
1621 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1622 if (PRIVATE_DATA(cc))
1623 private_data_length += 2;
1624 cc += 1;
1625 break;
1626
1627 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1628 if (PRIVATE_DATA(cc))
1629 private_data_length += 2;
1630 cc += 1 + IMM2_SIZE;
1631 break;
1632
1633 case OP_CLASS:
1634 case OP_NCLASS:
1635 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1636 case OP_XCLASS:
1637 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1638 #else
1639 size = 1 + 32 / (int)sizeof(pcre_uchar);
1640 #endif
1641 if (PRIVATE_DATA(cc))
1642 private_data_length += get_class_iterator_size(cc + size);
1643 cc += size;
1644 break;
1645
1646 default:
1647 cc = next_opcode(common, cc);
1648 SLJIT_ASSERT(cc != NULL);
1649 break;
1650 }
1651 }
1652 SLJIT_ASSERT(cc == ccend);
1653 return private_data_length;
1654 }
1655
1656 static void copy_private_data(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend,
1657 BOOL save, int stackptr, int stacktop, BOOL needs_control_head)
1658 {
1659 DEFINE_COMPILER;
1660 int srcw[2];
1661 int count, size;
1662 BOOL tmp1next = TRUE;
1663 BOOL tmp1empty = TRUE;
1664 BOOL tmp2empty = TRUE;
1665 pcre_uchar *alternative;
1666 enum {
1667 start,
1668 loop,
1669 end
1670 } status;
1671
1672 status = save ? start : loop;
1673 stackptr = STACK(stackptr - 2);
1674 stacktop = STACK(stacktop - 1);
1675
1676 if (!save)
1677 {
1678 stackptr += (needs_control_head ? 2 : 1) * sizeof(sljit_sw);
1679 if (stackptr < stacktop)
1680 {
1681 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1682 stackptr += sizeof(sljit_sw);
1683 tmp1empty = FALSE;
1684 }
1685 if (stackptr < stacktop)
1686 {
1687 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1688 stackptr += sizeof(sljit_sw);
1689 tmp2empty = FALSE;
1690 }
1691 /* The tmp1next must be TRUE in either way. */
1692 }
1693
1694 do
1695 {
1696 count = 0;
1697 switch(status)
1698 {
1699 case start:
1700 SLJIT_ASSERT(save && common->recursive_head_ptr != 0);
1701 count = 1;
1702 srcw[0] = common->recursive_head_ptr;
1703 if (needs_control_head)
1704 {
1705 SLJIT_ASSERT(common->control_head_ptr != 0);
1706 count = 2;
1707 srcw[1] = common->control_head_ptr;
1708 }
1709 status = loop;
1710 break;
1711
1712 case loop:
1713 if (cc >= ccend)
1714 {
1715 status = end;
1716 break;
1717 }
1718
1719 switch(*cc)
1720 {
1721 case OP_KET:
1722 if (PRIVATE_DATA(cc) != 0)
1723 {
1724 count = 1;
1725 srcw[0] = PRIVATE_DATA(cc);
1726 }
1727 cc += 1 + LINK_SIZE;
1728 break;
1729
1730 case OP_ASSERT:
1731 case OP_ASSERT_NOT:
1732 case OP_ASSERTBACK:
1733 case OP_ASSERTBACK_NOT:
1734 case OP_ONCE:
1735 case OP_ONCE_NC:
1736 case OP_BRAPOS:
1737 case OP_SBRA:
1738 case OP_SBRAPOS:
1739 case OP_SCOND:
1740 count = 1;
1741 srcw[0] = PRIVATE_DATA(cc);
1742 SLJIT_ASSERT(srcw[0] != 0);
1743 cc += 1 + LINK_SIZE;
1744 break;
1745
1746 case OP_CBRA:
1747 case OP_SCBRA:
1748 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1749 {
1750 count = 1;
1751 srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1752 }
1753 cc += 1 + LINK_SIZE + IMM2_SIZE;
1754 break;
1755
1756 case OP_CBRAPOS:
1757 case OP_SCBRAPOS:
1758 count = 2;
1759 srcw[0] = PRIVATE_DATA(cc);
1760 srcw[1] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1761 SLJIT_ASSERT(srcw[0] != 0 && srcw[1] != 0);
1762 cc += 1 + LINK_SIZE + IMM2_SIZE;
1763 break;
1764
1765 case OP_COND:
1766 /* Might be a hidden SCOND. */
1767 alternative = cc + GET(cc, 1);
1768 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1769 {
1770 count = 1;
1771 srcw[0] = PRIVATE_DATA(cc);
1772 SLJIT_ASSERT(srcw[0] != 0);
1773 }
1774 cc += 1 + LINK_SIZE;
1775 break;
1776
1777 CASE_ITERATOR_PRIVATE_DATA_1
1778 if (PRIVATE_DATA(cc))
1779 {
1780 count = 1;
1781 srcw[0] = PRIVATE_DATA(cc);
1782 }
1783 cc += 2;
1784 #ifdef SUPPORT_UTF
1785 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1786 #endif
1787 break;
1788
1789 CASE_ITERATOR_PRIVATE_DATA_2A
1790 if (PRIVATE_DATA(cc))
1791 {
1792 count = 2;
1793 srcw[0] = PRIVATE_DATA(cc);
1794 srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
1795 }
1796 cc += 2;
1797 #ifdef SUPPORT_UTF
1798 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1799 #endif
1800 break;
1801
1802 CASE_ITERATOR_PRIVATE_DATA_2B
1803 if (PRIVATE_DATA(cc))
1804 {
1805 count = 2;
1806 srcw[0] = PRIVATE_DATA(cc);
1807 srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
1808 }
1809 cc += 2 + IMM2_SIZE;
1810 #ifdef SUPPORT_UTF
1811 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1812 #endif
1813 break;
1814
1815 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1816 if (PRIVATE_DATA(cc))
1817 {
1818 count = 1;
1819 srcw[0] = PRIVATE_DATA(cc);
1820 }
1821 cc += 1;
1822 break;
1823
1824 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1825 if (PRIVATE_DATA(cc))
1826 {
1827 count = 2;
1828 srcw[0] = PRIVATE_DATA(cc);
1829 srcw[1] = srcw[0] + sizeof(sljit_sw);
1830 }
1831 cc += 1;
1832 break;
1833
1834 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1835 if (PRIVATE_DATA(cc))
1836 {
1837 count = 2;
1838 srcw[0] = PRIVATE_DATA(cc);
1839 srcw[1] = srcw[0] + sizeof(sljit_sw);
1840 }
1841 cc += 1 + IMM2_SIZE;
1842 break;
1843
1844 case OP_CLASS:
1845 case OP_NCLASS:
1846 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1847 case OP_XCLASS:
1848 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1849 #else
1850 size = 1 + 32 / (int)sizeof(pcre_uchar);
1851 #endif
1852 if (PRIVATE_DATA(cc))
1853 switch(get_class_iterator_size(cc + size))
1854 {
1855 case 1:
1856 count = 1;
1857 srcw[0] = PRIVATE_DATA(cc);
1858 break;
1859
1860 case 2:
1861 count = 2;
1862 srcw[0] = PRIVATE_DATA(cc);
1863 srcw[1] = srcw[0] + sizeof(sljit_sw);
1864 break;
1865
1866 default:
1867 SLJIT_ASSERT_STOP();
1868 break;
1869 }
1870 cc += size;
1871 break;
1872
1873 default:
1874 cc = next_opcode(common, cc);
1875 SLJIT_ASSERT(cc != NULL);
1876 break;
1877 }
1878 break;
1879
1880 case end:
1881 SLJIT_ASSERT_STOP();
1882 break;
1883 }
1884
1885 while (count > 0)
1886 {
1887 count--;
1888 if (save)
1889 {
1890 if (tmp1next)
1891 {
1892 if (!tmp1empty)
1893 {
1894 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1895 stackptr += sizeof(sljit_sw);
1896 }
1897 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);
1898 tmp1empty = FALSE;
1899 tmp1next = FALSE;
1900 }
1901 else
1902 {
1903 if (!tmp2empty)
1904 {
1905 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1906 stackptr += sizeof(sljit_sw);
1907 }
1908 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);
1909 tmp2empty = FALSE;
1910 tmp1next = TRUE;
1911 }
1912 }
1913 else
1914 {
1915 if (tmp1next)
1916 {
1917 SLJIT_ASSERT(!tmp1empty);
1918 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP1, 0);
1919 tmp1empty = stackptr >= stacktop;
1920 if (!tmp1empty)
1921 {
1922 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1923 stackptr += sizeof(sljit_sw);
1924 }
1925 tmp1next = FALSE;
1926 }
1927 else
1928 {
1929 SLJIT_ASSERT(!tmp2empty);
1930 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP2, 0);
1931 tmp2empty = stackptr >= stacktop;
1932 if (!tmp2empty)
1933 {
1934 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1935 stackptr += sizeof(sljit_sw);
1936 }
1937 tmp1next = TRUE;
1938 }
1939 }
1940 }
1941 }
1942 while (status != end);
1943
1944 if (save)
1945 {
1946 if (tmp1next)
1947 {
1948 if (!tmp1empty)
1949 {
1950 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1951 stackptr += sizeof(sljit_sw);
1952 }
1953 if (!tmp2empty)
1954 {
1955 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1956 stackptr += sizeof(sljit_sw);
1957 }
1958 }
1959 else
1960 {
1961 if (!tmp2empty)
1962 {
1963 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1964 stackptr += sizeof(sljit_sw);
1965 }
1966 if (!tmp1empty)
1967 {
1968 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1969 stackptr += sizeof(sljit_sw);
1970 }
1971 }
1972 }
1973 SLJIT_ASSERT(cc == ccend && stackptr == stacktop && (save || (tmp1empty && tmp2empty)));
1974 }
1975
1976 static SLJIT_INLINE pcre_uchar *set_then_offsets(compiler_common *common, pcre_uchar *cc, pcre_uint8 *current_offset)
1977 {
1978 pcre_uchar *end = bracketend(cc);
1979 BOOL has_alternatives = cc[GET(cc, 1)] == OP_ALT;
1980
1981 /* Assert captures then. */
1982 if (*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT)
1983 current_offset = NULL;
1984 /* Conditional block does not. */
1985 if (*cc == OP_COND || *cc == OP_SCOND)
1986 has_alternatives = FALSE;
1987
1988 cc = next_opcode(common, cc);
1989 if (has_alternatives)
1990 current_offset = common->then_offsets + (cc - common->start);
1991
1992 while (cc < end)
1993 {
1994 if ((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND))
1995 cc = set_then_offsets(common, cc, current_offset);
1996 else
1997 {
1998 if (*cc == OP_ALT && has_alternatives)
1999 current_offset = common->then_offsets + (cc + 1 + LINK_SIZE - common->start);
2000 if (*cc >= OP_THEN && *cc <= OP_THEN_ARG && current_offset != NULL)
2001 *current_offset = 1;
2002 cc = next_opcode(common, cc);
2003 }
2004 }
2005
2006 return end;
2007 }
2008
2009 #undef CASE_ITERATOR_PRIVATE_DATA_1
2010 #undef CASE_ITERATOR_PRIVATE_DATA_2A
2011 #undef CASE_ITERATOR_PRIVATE_DATA_2B
2012 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_1
2013 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
2014 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
2015
2016 static SLJIT_INLINE BOOL is_powerof2(unsigned int value)
2017 {
2018 return (value & (value - 1)) == 0;
2019 }
2020
2021 static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label)
2022 {
2023 while (list)
2024 {
2025 /* sljit_set_label is clever enough to do nothing
2026 if either the jump or the label is NULL. */
2027 SET_LABEL(list->jump, label);
2028 list = list->next;
2029 }
2030 }
2031
2032 static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump* jump)
2033 {
2034 jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list));
2035 if (list_item)
2036 {
2037 list_item->next = *list;
2038 list_item->jump = jump;
2039 *list = list_item;
2040 }
2041 }
2042
2043 static void add_stub(compiler_common *common, struct sljit_jump *start)
2044 {
2045 DEFINE_COMPILER;
2046 stub_list* list_item = sljit_alloc_memory(compiler, sizeof(stub_list));
2047
2048 if (list_item)
2049 {
2050 list_item->start = start;
2051 list_item->quit = LABEL();
2052 list_item->next = common->stubs;
2053 common->stubs = list_item;
2054 }
2055 }
2056
2057 static void flush_stubs(compiler_common *common)
2058 {
2059 DEFINE_COMPILER;
2060 stub_list* list_item = common->stubs;
2061
2062 while (list_item)
2063 {
2064 JUMPHERE(list_item->start);
2065 add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));
2066 JUMPTO(SLJIT_JUMP, list_item->quit);
2067 list_item = list_item->next;
2068 }
2069 common->stubs = NULL;
2070 }
2071
2072 static void add_label_addr(compiler_common *common)
2073 {
2074 DEFINE_COMPILER;
2075 label_addr_list *label_addr;
2076
2077 label_addr = sljit_alloc_memory(compiler, sizeof(label_addr_list));
2078 if (label_addr == NULL)
2079 return;
2080 label_addr->label = LABEL();
2081 label_addr->addr = common->read_only_data_ptr;
2082 label_addr->next = common->label_addrs;
2083 common->label_addrs = label_addr;
2084 common->read_only_data_ptr++;
2085 }
2086
2087 static SLJIT_INLINE void count_match(compiler_common *common)
2088 {
2089 DEFINE_COMPILER;
2090
2091 OP2(SLJIT_SUB | SLJIT_SET_E, COUNT_MATCH, 0, COUNT_MATCH, 0, SLJIT_IMM, 1);
2092 add_jump(compiler, &common->calllimit, JUMP(SLJIT_C_ZERO));
2093 }
2094
2095 static SLJIT_INLINE void allocate_stack(compiler_common *common, int size)
2096 {
2097 /* May destroy all locals and registers except TMP2. */
2098 DEFINE_COMPILER;
2099
2100 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
2101 #ifdef DESTROY_REGISTERS
2102 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);
2103 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
2104 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
2105 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, TMP1, 0);
2106 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);
2107 #endif
2108 add_stub(common, CMP(SLJIT_C_GREATER, STACK_TOP, 0, STACK_LIMIT, 0));
2109 }
2110
2111 static SLJIT_INLINE void free_stack(compiler_common *common, int size)
2112 {
2113 DEFINE_COMPILER;
2114 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
2115 }
2116
2117 static SLJIT_INLINE void reset_ovector(compiler_common *common, int length)
2118 {
2119 DEFINE_COMPILER;
2120 struct sljit_label *loop;
2121 int i;
2122
2123 /* At this point we can freely use all temporary registers. */
2124 SLJIT_ASSERT(length > 1);
2125 /* TMP1 returns with begin - 1. */
2126 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG1, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));
2127 if (length < 8)
2128 {
2129 for (i = 1; i < length; i++)
2130 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(i), SLJIT_SCRATCH_REG1, 0);
2131 }
2132 else
2133 {
2134 GET_LOCAL_BASE(SLJIT_SCRATCH_REG2, 0, OVECTOR_START);
2135 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, length - 1);
2136 loop = LABEL();
2137 OP1(SLJIT_MOVU, SLJIT_MEM1(SLJIT_SCRATCH_REG2), sizeof(sljit_sw), SLJIT_SCRATCH_REG1, 0);
2138 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 1);
2139 JUMPTO(SLJIT_C_NOT_ZERO, loop);
2140 }
2141 }
2142
2143 static SLJIT_INLINE void do_reset_match(compiler_common *common, int length)
2144 {
2145 DEFINE_COMPILER;
2146 struct sljit_label *loop;
2147 int i;
2148
2149 SLJIT_ASSERT(length > 1);
2150 /* OVECTOR(1) contains the "string begin - 1" constant. */
2151 if (length > 2)
2152 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
2153 if (length < 8)
2154 {
2155 for (i = 2; i < length; i++)
2156 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(i), TMP1, 0);
2157 }
2158 else
2159 {
2160 GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + sizeof(sljit_sw));
2161 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
2162 loop = LABEL();
2163 OP1(SLJIT_MOVU, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
2164 OP2(SLJIT_SUB | SLJIT_SET_E, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
2165 JUMPTO(SLJIT_C_NOT_ZERO, loop);
2166 }
2167
2168 OP1(SLJIT_MOV, STACK_TOP, 0, ARGUMENTS, 0);
2169 if (common->mark_ptr != 0)
2170 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr, SLJIT_IMM, 0);
2171 if (common->control_head_ptr != 0)
2172 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_IMM, 0);
2173 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(jit_arguments, stack));
2174 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_ptr);
2175 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(struct sljit_stack, base));
2176 }
2177
2178 static sljit_sw SLJIT_CALL do_search_mark(sljit_sw *current, const pcre_uchar *skip_arg)
2179 {
2180 while (current != NULL)
2181 {
2182 switch (current[-2])
2183 {
2184 case type_then_trap:
2185 break;
2186
2187 case type_mark:
2188 if (STRCMP_UC_UC(skip_arg, (pcre_uchar *)current[-3]) == 0)
2189 return current[-4];
2190 break;
2191
2192 default:
2193 SLJIT_ASSERT_STOP();
2194 break;
2195 }
2196 current = (sljit_sw*)current[-1];
2197 }
2198 return -1;
2199 }
2200
2201 static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)
2202 {
2203 DEFINE_COMPILER;
2204 struct sljit_label *loop;
2205 struct sljit_jump *early_quit;
2206
2207 /* At this point we can freely use all registers. */
2208 OP1(SLJIT_MOV, SLJIT_SAVED_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
2209 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1), STR_PTR, 0);
2210
2211 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, ARGUMENTS, 0);
2212 if (common->mark_ptr != 0)
2213 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
2214 OP1(SLJIT_MOV_SI, SLJIT_SCRATCH_REG2, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, offset_count));
2215 if (common->mark_ptr != 0)
2216 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_SCRATCH_REG3, 0);
2217 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, offsets), SLJIT_IMM, sizeof(int));
2218 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, begin));
2219 GET_LOCAL_BASE(SLJIT_SAVED_REG1, 0, OVECTOR_START);
2220 /* Unlikely, but possible */
2221 early_quit = CMP(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, 0);
2222 loop = LABEL();
2223 OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), 0, SLJIT_SCRATCH_REG1, 0);
2224 OP2(SLJIT_ADD, SLJIT_SAVED_REG1, 0, SLJIT_SAVED_REG1, 0, SLJIT_IMM, sizeof(sljit_sw));
2225 /* Copy the integer value to the output buffer */
2226 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2227 OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, UCHAR_SHIFT);
2228 #endif
2229 OP1(SLJIT_MOVU_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG3), sizeof(int), SLJIT_SAVED_REG2, 0);
2230 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_SCRATCH_REG2, 0, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, 1);
2231 JUMPTO(SLJIT_C_NOT_ZERO, loop);
2232 JUMPHERE(early_quit);
2233
2234 /* Calculate the return value, which is the maximum ovector value. */
2235 if (topbracket > 1)
2236 {
2237 GET_LOCAL_BASE(SLJIT_SCRATCH_REG1, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_sw));
2238 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, topbracket + 1);
2239
2240 /* OVECTOR(0) is never equal to SLJIT_SAVED_REG3. */
2241 loop = LABEL();
2242 OP1(SLJIT_MOVU, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), -(2 * (sljit_sw)sizeof(sljit_sw)));
2243 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG2, 0, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, 1);
2244 CMPTO(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG3, 0, SLJIT_SAVED_REG3, 0, loop);
2245 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_SCRATCH_REG2, 0);
2246 }
2247 else
2248 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
2249 }
2250
2251 static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *quit)
2252 {
2253 DEFINE_COMPILER;
2254 struct sljit_jump *jump;
2255
2256 SLJIT_COMPILE_ASSERT(STR_END == SLJIT_SAVED_REG2, str_end_must_be_saved_reg2);
2257 SLJIT_ASSERT(common->start_used_ptr != 0 && common->start_ptr != 0
2258 && (common->mode == JIT_PARTIAL_SOFT_COMPILE ? common->hit_start != 0 : common->hit_start == 0));
2259
2260 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, ARGUMENTS, 0);
2261 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_PARTIAL);
2262 OP1(SLJIT_MOV_SI, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, real_offset_count));
2263 CMPTO(SLJIT_C_SIG_LESS, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 2, quit);
2264
2265 /* Store match begin and end. */
2266 OP1(SLJIT_MOV, SLJIT_SAVED_REG1, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, begin));
2267 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, offsets));
2268
2269 jump = CMP(SLJIT_C_SIG_LESS, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 3);
2270 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_ptr : (common->hit_start + (int)sizeof(sljit_sw)), SLJIT_SAVED_REG1, 0);
2271 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2272 OP2(SLJIT_ASHR, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, UCHAR_SHIFT);
2273 #endif
2274 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG2), 2 * sizeof(int), SLJIT_SCRATCH_REG3, 0);
2275 JUMPHERE(jump);
2276
2277 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_used_ptr : common->hit_start);
2278 OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, STR_END, 0, SLJIT_SAVED_REG1, 0);
2279 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2280 OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, UCHAR_SHIFT);
2281 #endif
2282 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG2), sizeof(int), SLJIT_SAVED_REG2, 0);
2283
2284 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_SAVED_REG1, 0);
2285 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2286 OP2(SLJIT_ASHR, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, UCHAR_SHIFT);
2287 #endif
2288 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG2), 0, SLJIT_SCRATCH_REG3, 0);
2289
2290 JUMPTO(SLJIT_JUMP, quit);
2291 }
2292
2293 static SLJIT_INLINE void check_start_used_ptr(compiler_common *common)
2294 {
2295 /* May destroy TMP1. */
2296 DEFINE_COMPILER;
2297 struct sljit_jump *jump;
2298
2299 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2300 {
2301 /* The value of -1 must be kept for start_used_ptr! */
2302 OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, 1);
2303 /* Jumps if start_used_ptr < STR_PTR, or start_used_ptr == -1. Although overwriting
2304 is not necessary if start_used_ptr == STR_PTR, it does not hurt as well. */
2305 jump = CMP(SLJIT_C_LESS_EQUAL, TMP1, 0, STR_PTR, 0);
2306 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2307 JUMPHERE(jump);
2308 }
2309 else if (common->mode == JIT_PARTIAL_HARD_COMPILE)
2310 {
2311 jump = CMP(SLJIT_C_LESS_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2312 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2313 JUMPHERE(jump);
2314 }
2315 }
2316
2317 static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, pcre_uchar* cc)
2318 {
2319 /* Detects if the character has an othercase. */
2320 unsigned int c;
2321
2322 #ifdef SUPPORT_UTF
2323 if (common->utf)
2324 {
2325 GETCHAR(c, cc);
2326 if (c > 127)
2327 {
2328 #ifdef SUPPORT_UCP
2329 return c != UCD_OTHERCASE(c);
2330 #else
2331 return FALSE;
2332 #endif
2333 }
2334 #ifndef COMPILE_PCRE8
2335 return common->fcc[c] != c;
2336 #endif
2337 }
2338 else
2339 #endif
2340 c = *cc;
2341 return MAX_255(c) ? common->fcc[c] != c : FALSE;
2342 }
2343
2344 static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)
2345 {
2346 /* Returns with the othercase. */
2347 #ifdef SUPPORT_UTF
2348 if (common->utf && c > 127)
2349 {
2350 #ifdef SUPPORT_UCP
2351 return UCD_OTHERCASE(c);
2352 #else
2353 return c;
2354 #endif
2355 }
2356 #endif
2357 return TABLE_GET(c, common->fcc, c);
2358 }
2359
2360 static unsigned int char_get_othercase_bit(compiler_common *common, pcre_uchar* cc)
2361 {
2362 /* Detects if the character and its othercase has only 1 bit difference. */
2363 unsigned int c, oc, bit;
2364 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2365 int n;
2366 #endif
2367
2368 #ifdef SUPPORT_UTF
2369 if (common->utf)
2370 {
2371 GETCHAR(c, cc);
2372 if (c <= 127)
2373 oc = common->fcc[c];
2374 else
2375 {
2376 #ifdef SUPPORT_UCP
2377 oc = UCD_OTHERCASE(c);
2378 #else
2379 oc = c;
2380 #endif
2381 }
2382 }
2383 else
2384 {
2385 c = *cc;
2386 oc = TABLE_GET(c, common->fcc, c);
2387 }
2388 #else
2389 c = *cc;
2390 oc = TABLE_GET(c, common->fcc, c);
2391 #endif
2392
2393 SLJIT_ASSERT(c != oc);
2394
2395 bit = c ^ oc;
2396 /* Optimized for English alphabet. */
2397 if (c <= 127 && bit == 0x20)
2398 return (0 << 8) | 0x20;
2399
2400 /* Since c != oc, they must have at least 1 bit difference. */
2401 if (!is_powerof2(bit))
2402 return 0;
2403
2404 #if defined COMPILE_PCRE8
2405
2406 #ifdef SUPPORT_UTF
2407 if (common->utf && c > 127)
2408 {
2409 n = GET_EXTRALEN(*cc);
2410 while ((bit & 0x3f) == 0)
2411 {
2412 n--;
2413 bit >>= 6;
2414 }
2415 return (n << 8) | bit;
2416 }
2417 #endif /* SUPPORT_UTF */
2418 return (0 << 8) | bit;
2419
2420 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2421
2422 #ifdef SUPPORT_UTF
2423 if (common->utf && c > 65535)
2424 {
2425 if (bit >= (1 << 10))
2426 bit >>= 10;
2427 else
2428 return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8));
2429 }
2430 #endif /* SUPPORT_UTF */
2431 return (bit < 256) ? ((0 << 8) | bit) : ((1 << 8) | (bit >> 8));
2432
2433 #endif /* COMPILE_PCRE[8|16|32] */
2434 }
2435
2436 static void check_partial(compiler_common *common, BOOL force)
2437 {
2438 /* Checks whether a partial matching is occurred. Does not modify registers. */
2439 DEFINE_COMPILER;
2440 struct sljit_jump *jump = NULL;
2441
2442 SLJIT_ASSERT(!force || common->mode != JIT_COMPILE);
2443
2444 if (common->mode == JIT_COMPILE)
2445 return;
2446
2447 if (!force)
2448 jump = CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2449 else if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2450 jump = CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, -1);
2451
2452 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2453 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, 0);
2454 else
2455 {
2456 if (common->partialmatchlabel != NULL)
2457 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2458 else
2459 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2460 }
2461
2462 if (jump != NULL)
2463 JUMPHERE(jump);
2464 }
2465
2466 static void check_str_end(compiler_common *common, jump_list **end_reached)
2467 {
2468 /* Does not affect registers. Usually used in a tight spot. */
2469 DEFINE_COMPILER;
2470 struct sljit_jump *jump;
2471
2472 if (common->mode == JIT_COMPILE)
2473 {
2474 add_jump(compiler, end_reached, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2475 return;
2476 }
2477
2478 jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
2479 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2480 {
2481 add_jump(compiler, end_reached, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0));
2482 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, 0);
2483 add_jump(compiler, end_reached, JUMP(SLJIT_JUMP));
2484 }
2485 else
2486 {
2487 add_jump(compiler, end_reached, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0));
2488 if (common->partialmatchlabel != NULL)
2489 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2490 else
2491 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2492 }
2493 JUMPHERE(jump);
2494 }
2495
2496 static void detect_partial_match(compiler_common *common, jump_list **backtracks)
2497 {
2498 DEFINE_COMPILER;
2499 struct sljit_jump *jump;
2500
2501 if (common->mode == JIT_COMPILE)
2502 {
2503 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2504 return;
2505 }
2506
2507 /* Partial matching mode. */
2508 jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
2509 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0));
2510 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2511 {
2512 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, 0);
2513 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
2514 }
2515 else
2516 {
2517 if (common->partialmatchlabel != NULL)
2518 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2519 else
2520 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2521 }
2522 JUMPHERE(jump);
2523 }
2524
2525 static void peek_char(compiler_common *common, pcre_uint32 max)
2526 {
2527 /* Reads the character into TMP1, keeps STR_PTR.
2528 Does not check STR_END. TMP2 Destroyed. */
2529 DEFINE_COMPILER;
2530 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2531 struct sljit_jump *jump;
2532 #endif
2533
2534 SLJIT_UNUSED_ARG(max);
2535
2536 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2537 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2538 if (common->utf)
2539 {
2540 if (max < 128) return;
2541
2542 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2543 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2544 add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2545 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2546 JUMPHERE(jump);
2547 }
2548 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2549
2550 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2551 if (common->utf)
2552 {
2553 if (max < 0xd800) return;
2554
2555 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2556 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2557 /* TMP2 contains the high surrogate. */
2558 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2559 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x40);
2560 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
2561 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
2562 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2563 JUMPHERE(jump);
2564 }
2565 #endif
2566 }
2567
2568 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2569
2570 static BOOL is_char7_bitset(const pcre_uint8* bitset, BOOL nclass)
2571 {
2572 /* Tells whether the character codes below 128 are enough
2573 to determine a match. */
2574 const pcre_uint8 value = nclass ? 0xff : 0;
2575 const pcre_uint8* end = bitset + 32;
2576
2577 bitset += 16;
2578 do
2579 {
2580 if (*bitset++ != value)
2581 return FALSE;
2582 }
2583 while (bitset < end);
2584 return TRUE;
2585 }
2586
2587 static void read_char7_type(compiler_common *common, BOOL full_read)
2588 {
2589 /* Reads the precise character type of a character into TMP1, if the character
2590 is less than 128. Otherwise it returns with zero. Does not check STR_END. The
2591 full_read argument tells whether characters above max are accepted or not. */
2592 DEFINE_COMPILER;
2593 struct sljit_jump *jump;
2594
2595 SLJIT_ASSERT(common->utf);
2596
2597 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2598 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2599
2600 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2601
2602 if (full_read)
2603 {
2604 jump = CMP(SLJIT_C_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
2605 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2606 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2607 JUMPHERE(jump);
2608 }
2609 }
2610
2611 #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
2612
2613 static void read_char_range(compiler_common *common, pcre_uint32 min, pcre_uint32 max, BOOL update_str_ptr)
2614 {
2615 /* Reads the precise value of a character into TMP1, if the character is
2616 between min and max (c >= min && c <= max). Otherwise it returns with a value
2617 outside the range. Does not check STR_END. */
2618 DEFINE_COMPILER;
2619 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2620 struct sljit_jump *jump;
2621 #endif
2622 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2623 struct sljit_jump *jump2;
2624 #endif
2625
2626 SLJIT_UNUSED_ARG(update_str_ptr);
2627 SLJIT_UNUSED_ARG(min);
2628 SLJIT_UNUSED_ARG(max);
2629 SLJIT_ASSERT(min <= max);
2630
2631 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2632 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2633
2634 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2635 if (common->utf)
2636 {
2637 if (max < 128 && !update_str_ptr) return;
2638
2639 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2640 if (min >= 0x10000)
2641 {
2642 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xf0);
2643 if (update_str_ptr)
2644 OP1(SLJIT_MOV_UB, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2645 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2646 jump2 = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0x7);
2647 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2648 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2649 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2650 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2651 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2652 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2653 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2654 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2655 if (!update_str_ptr)
2656 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
2657 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2658 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2659 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2660 JUMPHERE(jump2);
2661 if (update_str_ptr)
2662 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
2663 }
2664 else if (min >= 0x800 && max <= 0xffff)
2665 {
2666 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xe0);
2667 if (update_str_ptr)
2668 OP1(SLJIT_MOV_UB, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2669 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2670 jump2 = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0xf);
2671 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2672 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2673 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2674 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2675 if (!update_str_ptr)
2676 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
2677 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2678 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2679 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2680 JUMPHERE(jump2);
2681 if (update_str_ptr)
2682 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
2683 }
2684 else if (max >= 0x800)
2685 add_jump(compiler, (max < 0x10000) ? &common->utfreadchar16 : &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2686 else if (max < 128)
2687 {
2688 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2689 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2690 }
2691 else
2692 {
2693 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2694 if (!update_str_ptr)
2695 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2696 else
2697 OP1(SLJIT_MOV_UB, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2698 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2699 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2700 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2701 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2702 if (update_str_ptr)
2703 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
2704 }
2705 JUMPHERE(jump);
2706 }
2707 #endif
2708
2709 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2710 if (common->utf)
2711 {
2712 if (max >= 0x10000)
2713 {
2714 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2715 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2716 /* TMP2 contains the high surrogate. */
2717 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2718 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x40);
2719 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
2720 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2721 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
2722 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2723 JUMPHERE(jump);
2724 return;
2725 }
2726
2727 if (max < 0xd800 && !update_str_ptr) return;
2728
2729 /* Skip low surrogate if necessary. */
2730 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2731 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2732 if (update_str_ptr)
2733 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2734 if (max >= 0xd800)
2735 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x10000);
2736 JUMPHERE(jump);
2737 }
2738 #endif
2739 }
2740
2741 static SLJIT_INLINE void read_char(compiler_common *common)
2742 {
2743 read_char_range(common, 0, READ_CHAR_MAX, TRUE);
2744 }
2745
2746 static void read_char8_type(compiler_common *common, BOOL update_str_ptr)
2747 {
2748 /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */
2749 DEFINE_COMPILER;
2750 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
2751 struct sljit_jump *jump;
2752 #endif
2753 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2754 struct sljit_jump *jump2;
2755 #endif
2756
2757 SLJIT_UNUSED_ARG(update_str_ptr);
2758
2759 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2760 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2761
2762 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2763 if (common->utf)
2764 {
2765 /* This can be an extra read in some situations, but hopefully
2766 it is needed in most cases. */
2767 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2768 jump = CMP(SLJIT_C_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
2769 if (!update_str_ptr)
2770 {
2771 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2772 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2773 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2774 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2775 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2776 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
2777 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2778 jump2 = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2779 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2780 JUMPHERE(jump2);
2781 }
2782 else
2783 add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));
2784 JUMPHERE(jump);
2785 return;
2786 }
2787 #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
2788
2789 #if !defined COMPILE_PCRE8
2790 /* The ctypes array contains only 256 values. */
2791 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2792 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2793 #endif
2794 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2795 #if !defined COMPILE_PCRE8
2796 JUMPHERE(jump);
2797 #endif
2798
2799 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2800 if (common->utf && update_str_ptr)
2801 {
2802 /* Skip low surrogate if necessary. */
2803 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);
2804 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2805 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2806 JUMPHERE(jump);
2807 }
2808 #endif /* SUPPORT_UTF && COMPILE_PCRE16 */
2809 }
2810
2811 static void skip_char_back(compiler_common *common)
2812 {
2813 /* Goes one character back. Affects STR_PTR and TMP1. Does not check begin. */
2814 DEFINE_COMPILER;
2815 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2816 #if defined COMPILE_PCRE8
2817 struct sljit_label *label;
2818
2819 if (common->utf)
2820 {
2821 label = LABEL();
2822 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2823 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2824 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
2825 CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);
2826 return;
2827 }
2828 #elif defined COMPILE_PCRE16
2829 if (common->utf)
2830 {
2831 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2832 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2833 /* Skip low surrogate if necessary. */
2834 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2835 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
2836 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2837 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2838 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2839 return;
2840 }
2841 #endif /* COMPILE_PCRE[8|16] */
2842 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2843 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2844 }
2845
2846 static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpifmatch)
2847 {
2848 /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */
2849 DEFINE_COMPILER;
2850 struct sljit_jump *jump;
2851
2852 if (nltype == NLTYPE_ANY)
2853 {
2854 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
2855 add_jump(compiler, backtracks, JUMP(jumpifmatch ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
2856 }
2857 else if (nltype == NLTYPE_ANYCRLF)
2858 {
2859 if (jumpifmatch)
2860 {
2861 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR));
2862 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
2863 }
2864 else
2865 {
2866 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
2867 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
2868 JUMPHERE(jump);
2869 }
2870 }
2871 else
2872 {
2873 SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);
2874 add_jump(compiler, backtracks, CMP(jumpifmatch ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
2875 }
2876 }
2877
2878 #ifdef SUPPORT_UTF
2879
2880 #if defined COMPILE_PCRE8
2881 static void do_utfreadchar(compiler_common *common)
2882 {
2883 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
2884 of the character (>= 0xc0). Return char value in TMP1, length in TMP2. */
2885 DEFINE_COMPILER;
2886 struct sljit_jump *jump;
2887
2888 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2889 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2890 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2891 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2892 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2893 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2894
2895 /* Searching for the first zero. */
2896 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
2897 jump = JUMP(SLJIT_C_NOT_ZERO);
2898 /* Two byte sequence. */
2899 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2900 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(2));
2901 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2902
2903 JUMPHERE(jump);
2904 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2905 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x800);
2906 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2907 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2908 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2909
2910 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10000);
2911 jump = JUMP(SLJIT_C_NOT_ZERO);
2912 /* Three byte sequence. */
2913 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
2914 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));
2915 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2916
2917 /* Four byte sequence. */
2918 JUMPHERE(jump);
2919 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2920 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
2921 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2922 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
2923 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2924 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2925 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(4));
2926 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2927 }
2928
2929 static void do_utfreadchar16(compiler_common *common)
2930 {
2931 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
2932 of the character (>= 0xc0). Return value in TMP1. */
2933 DEFINE_COMPILER;
2934 struct sljit_jump *jump;
2935
2936 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2937 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2938 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2939 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2940 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2941 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2942
2943 /* Searching for the first zero. */
2944 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
2945 jump = JUMP(SLJIT_C_NOT_ZERO);
2946 /* Two byte sequence. */
2947 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2948 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2949
2950 JUMPHERE(jump);
2951 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x400);
2952 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_NOT_ZERO);
2953 /* This code runs only in 8 bit mode. No need to shift the value. */
2954 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2955 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2956 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x800);
2957 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2958 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2959 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2960 /* Three byte sequence. */
2961 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
2962 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2963 }
2964
2965 static void do_utfreadtype8(compiler_common *common)
2966 {
2967 /* Fast decoding a UTF-8 character type. TMP2 contains the first byte
2968 of the character (>= 0xc0). Return value in TMP1. */
2969 DEFINE_COMPILER;
2970 struct sljit_jump *jump;
2971 struct sljit_jump *compare;
2972
2973 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2974
2975 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20);
2976 jump = JUMP(SLJIT_C_NOT_ZERO);
2977 /* Two byte sequence. */
2978 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2979 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2980 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
2981 /* The upper 5 bits are known at this point. */
2982 compare = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0x3);
2983 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2984 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2985 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
2986 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2987 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2988
2989 JUMPHERE(compare);
2990 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2991 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2992
2993 /* We only have types for characters less than 256. */
2994 JUMPHERE(jump);
2995 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2996 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2997 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2998 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2999 }
3000
3001 #endif /* COMPILE_PCRE8 */
3002
3003 #endif /* SUPPORT_UTF */
3004
3005 #ifdef SUPPORT_UCP
3006
3007 /* UCD_BLOCK_SIZE must be 128 (see the assert below). */
3008 #define UCD_BLOCK_MASK 127
3009 #define UCD_BLOCK_SHIFT 7
3010
3011 static void do_getucd(compiler_common *common)
3012 {
3013 /* Search the UCD record for the character comes in TMP1.
3014 Returns chartype in TMP1 and UCD offset in TMP2. */
3015 DEFINE_COMPILER;
3016
3017 SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 8);
3018
3019 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3020 OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
3021 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
3022 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
3023 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
3024 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
3025 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
3026 OP1(SLJIT_MOV_UH, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
3027 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
3028 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
3029 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3030 }
3031 #endif
3032
3033 static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common, BOOL hascrorlf, BOOL firstline)
3034 {
3035 DEFINE_COMPILER;
3036 struct sljit_label *mainloop;
3037 struct sljit_label *newlinelabel = NULL;
3038 struct sljit_jump *start;
3039 struct sljit_jump *end = NULL;
3040 struct sljit_jump *nl = NULL;
3041 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3042 struct sljit_jump *singlechar;
3043 #endif
3044 jump_list *newline = NULL;
3045 BOOL newlinecheck = FALSE;
3046 BOOL readuchar = FALSE;
3047
3048 if (!(hascrorlf || firstline) && (common->nltype == NLTYPE_ANY ||
3049 common->nltype == NLTYPE_ANYCRLF || common->newline > 255))
3050 newlinecheck = TRUE;
3051
3052 if (firstline)
3053 {
3054 /* Search for the end of the first line. */
3055 SLJIT_ASSERT(common->first_line_end != 0);
3056 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
3057
3058 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3059 {
3060 mainloop = LABEL();
3061 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3062 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3063 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
3064 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3065 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);
3066 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);
3067 JUMPHERE(end);
3068 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3069 }
3070 else
3071 {
3072 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3073 mainloop = LABEL();
3074 /* Continual stores does not cause data dependency. */
3075 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0);
3076 read_char_range(common, common->nlmin, common->nlmax, TRUE);
3077 check_newlinechar(common, common->nltype, &newline, TRUE);
3078 CMPTO(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0, mainloop);
3079 JUMPHERE(end);
3080 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0);
3081 set_jumps(newline, LABEL());
3082 }
3083
3084 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
3085 }
3086
3087 start = JUMP(SLJIT_JUMP);
3088
3089 if (newlinecheck)
3090 {
3091 newlinelabel = LABEL();
3092 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3093 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3094 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3095 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
3096 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3097 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3098 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
3099 #endif
3100 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3101 nl = JUMP(SLJIT_JUMP);
3102 }
3103
3104 mainloop = LABEL();
3105
3106 /* Increasing the STR_PTR here requires one less jump in the most common case. */
3107 #ifdef SUPPORT_UTF
3108 if (common->utf) readuchar = TRUE;
3109 #endif
3110 if (newlinecheck) readuchar = TRUE;
3111
3112 if (readuchar)
3113 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3114
3115 if (newlinecheck)
3116 CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);
3117
3118 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3119 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3120 #if defined COMPILE_PCRE8
3121 if (common->utf)
3122 {
3123 singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
3124 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3125 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3126 JUMPHERE(singlechar);
3127 }
3128 #elif defined COMPILE_PCRE16
3129 if (common->utf)
3130 {
3131 singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
3132 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
3133 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3134 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3135 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3136 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3137 JUMPHERE(singlechar);
3138 }
3139 #endif /* COMPILE_PCRE[8|16] */
3140 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
3141 JUMPHERE(start);
3142
3143 if (newlinecheck)
3144 {
3145 JUMPHERE(end);
3146 JUMPHERE(nl);
3147 }
3148
3149 return mainloop;
3150 }
3151
3152 static int scan_prefix(compiler_common *common, pcre_uchar *cc, pcre_uint32 *chars, int max_chars)
3153 {
3154 /* Recursive function, which scans prefix literals. */
3155 int len, repeat, len_save, consumed = 0;
3156 pcre_uint32 caseless, chr, mask;
3157 pcre_uchar *alternative, *cc_save;
3158 BOOL last, any;
3159
3160 repeat = 1;
3161 while (TRUE)
3162 {
3163 last = TRUE;
3164 any = FALSE;
3165 caseless = 0;
3166 switch (*cc)
3167 {
3168 case OP_CHARI:
3169 caseless = 1;
3170 case OP_CHAR:
3171 last = FALSE;
3172 cc++;
3173 break;
3174
3175 case OP_SOD:
3176 case OP_SOM:
3177 case OP_SET_SOM:
3178 case OP_NOT_WORD_BOUNDARY:
3179 case OP_WORD_BOUNDARY:
3180 case OP_EODN:
3181 case OP_EOD:
3182 case OP_CIRC:
3183 case OP_CIRCM:
3184 case OP_DOLL:
3185 case OP_DOLLM:
3186 /* Zero width assertions. */
3187 cc++;
3188 continue;
3189
3190 case OP_PLUS:
3191 case OP_MINPLUS:
3192 case OP_POSPLUS:
3193 cc++;
3194 break;
3195
3196 case OP_EXACTI:
3197 caseless = 1;
3198 case OP_EXACT:
3199 repeat = GET2(cc, 1);
3200 last = FALSE;
3201 cc += 1 + IMM2_SIZE;
3202 break;
3203
3204 case OP_PLUSI:
3205 case OP_MINPLUSI:
3206 case OP_POSPLUSI:
3207 caseless = 1;
3208 cc++;
3209 break;
3210
3211 case OP_KET:
3212 cc += 1 + LINK_SIZE;
3213 continue;
3214
3215 case OP_ALT:
3216 cc += GET(cc, 1);
3217 continue;
3218
3219 case OP_ONCE:
3220 case OP_ONCE_NC:
3221 case OP_BRA:
3222 case OP_BRAPOS:
3223 case OP_CBRA:
3224 case OP_CBRAPOS:
3225 alternative = cc + GET(cc, 1);
3226 while (*alternative == OP_ALT)
3227 {
3228 max_chars = scan_prefix(common, alternative + 1 + LINK_SIZE, chars, max_chars);
3229 if (max_chars == 0)
3230 return consumed;
3231 alternative += GET(alternative, 1);
3232 }
3233
3234 if (*cc == OP_CBRA || *cc == OP_CBRAPOS)
3235 cc += IMM2_SIZE;
3236 cc += 1 + LINK_SIZE;
3237 continue;
3238
3239 case OP_CLASS:
3240 case OP_NCLASS:
3241 any = TRUE;
3242 cc += 1 + 32 / sizeof(pcre_uchar);
3243 break;
3244
3245 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3246 case OP_XCLASS:
3247 any = TRUE;
3248 cc += GET(cc, 1);
3249 break;
3250 #endif
3251
3252 case OP_NOT_DIGIT:
3253 case OP_DIGIT:
3254 case OP_NOT_WHITESPACE:
3255 case OP_WHITESPACE:
3256 case OP_NOT_WORDCHAR:
3257 case OP_WORDCHAR:
3258 case OP_ANY:
3259 case OP_ALLANY:
3260 any = TRUE;
3261 cc++;
3262 break;
3263
3264 #ifdef SUPPORT_UCP
3265 case OP_NOTPROP:
3266 case OP_PROP:
3267 any = TRUE;
3268 cc += 1 + 2;
3269 break;
3270 #endif
3271
3272 case OP_TYPEEXACT:
3273 repeat = GET2(cc, 1);
3274 cc += 1 + IMM2_SIZE;
3275 continue;
3276
3277 default:
3278 return consumed;
3279 }
3280
3281 if (any)
3282 {
3283 #ifdef SUPPORT_UTF
3284 if (common->utf) return consumed;
3285 #endif
3286 #if defined COMPILE_PCRE8
3287 mask = 0xff;
3288 #elif defined COMPILE_PCRE16
3289 mask = 0xffff;
3290 #elif defined COMPILE_PCRE32
3291 mask = 0xffffffff;
3292 #else
3293 SLJIT_ASSERT_STOP();
3294 #endif
3295
3296 do
3297 {
3298 chars[0] = mask;
3299 chars[1] = mask;
3300
3301 if (--max_chars == 0)
3302 return consumed;
3303 consumed++;
3304 chars += 2;
3305 }
3306 while (--repeat > 0);
3307
3308 repeat = 1;
3309 continue;
3310 }
3311
3312 len = 1;
3313 #ifdef SUPPORT_UTF
3314 if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
3315 #endif
3316
3317 if (caseless != 0 && char_has_othercase(common, cc))
3318 {
3319 caseless = char_get_othercase_bit(common, cc);
3320 if (caseless == 0)
3321 return consumed;
3322 #ifdef COMPILE_PCRE8
3323 caseless = ((caseless & 0xff) << 8) | (len - (caseless >> 8));
3324 #else
3325 if ((caseless & 0x100) != 0)
3326 caseless = ((caseless & 0xff) << 16) | (len - (caseless >> 9));
3327 else
3328 caseless = ((caseless & 0xff) << 8) | (len - (caseless >> 9));
3329 #endif
3330 }
3331 else
3332 caseless = 0;
3333
3334 len_save = len;
3335 cc_save = cc;
3336 while (TRUE)
3337 {
3338 do
3339 {
3340 chr = *cc;
3341 #ifdef COMPILE_PCRE32
3342 if (SLJIT_UNLIKELY(chr == NOTACHAR))
3343 return consumed;
3344 #endif
3345 mask = 0;
3346 if ((pcre_uint32)len == (caseless & 0xff))
3347 {
3348 mask = caseless >> 8;
3349 chr |= mask;
3350 }
3351
3352 if (chars[0] == NOTACHAR)
3353 {
3354 chars[0] = chr;
3355 chars[1] = mask;
3356 }
3357 else
3358 {
3359 mask |= chars[0] ^ chr;
3360 chr |= mask;
3361 chars[0] = chr;
3362 chars[1] |= mask;
3363 }
3364
3365 len--;
3366 if (--max_chars == 0)
3367 return consumed;
3368 consumed++;
3369 chars += 2;
3370 cc++;
3371 }
3372 while (len > 0);
3373
3374 if (--repeat == 0)
3375 break;
3376
3377 len = len_save;
3378 cc = cc_save;
3379 }
3380
3381 repeat = 1;
3382 if (last)
3383 return consumed;
3384 }
3385 }
3386
3387 #define MAX_N_CHARS 16
3388
3389 static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common, BOOL firstline)
3390 {
3391 DEFINE_COMPILER;
3392 struct sljit_label *start;
3393 struct sljit_jump *quit;
3394 pcre_uint32 chars[MAX_N_CHARS * 2];
3395 pcre_uint8 ones[MAX_N_CHARS];
3396 pcre_uint32 mask;
3397 int i, max;
3398 int offsets[3];
3399
3400 for (i = 0; i < MAX_N_CHARS; i++)
3401 {
3402 chars[i << 1] = NOTACHAR;
3403 chars[(i << 1) + 1] = 0;
3404 }
3405
3406 max = scan_prefix(common, common->start, chars, MAX_N_CHARS);
3407
3408 if (max <= 1)
3409 return FALSE;
3410
3411 for (i = 0; i < max; i++)
3412 {
3413 mask = chars[(i << 1) + 1];
3414 ones[i] = ones_in_half_byte[mask & 0xf];
3415 mask >>= 4;
3416 while (mask != 0)
3417 {
3418 ones[i] += ones_in_half_byte[mask & 0xf];
3419 mask >>= 4;
3420 }
3421 }
3422
3423 offsets[0] = -1;
3424 /* Scan forward. */
3425 for (i = 0; i < max; i++)
3426 if (ones[i] <= 2) {
3427 offsets[0] = i;
3428 break;
3429 }
3430
3431 if (offsets[0] == -1)
3432 return FALSE;
3433
3434 /* Scan backward. */
3435 offsets[1] = -1;
3436 for (i = max - 1; i > offsets[0]; i--)
3437 if (ones[i] <= 2) {
3438 offsets[1] = i;
3439 break;
3440 }
3441
3442 offsets[2] = -1;
3443 if (offsets[1] >= 0)
3444 {
3445 /* Scan from middle. */
3446 for (i = (offsets[0] + offsets[1]) / 2 + 1; i < offsets[1]; i++)
3447 if (ones[i] <= 2)
3448 {
3449 offsets[2] = i;
3450 break;
3451 }
3452
3453 if (offsets[2] == -1)
3454 {
3455 for (i = (offsets[0] + offsets[1]) / 2; i > offsets[0]; i--)
3456 if (ones[i] <= 2)
3457 {
3458 offsets[2] = i;
3459 break;
3460 }
3461 }
3462 }
3463
3464 SLJIT_ASSERT(offsets[1] == -1 || (offsets[0] < offsets[1]));
3465 SLJIT_ASSERT(offsets[2] == -1 || (offsets[0] < offsets[2] && offsets[1] > offsets[2]));
3466
3467 chars[0] = chars[offsets[0] << 1];
3468 chars[1] = chars[(offsets[0] << 1) + 1];
3469 if (offsets[2] >= 0)
3470 {
3471 chars[2] = chars[offsets[2] << 1];
3472 chars[3] = chars[(offsets[2] << 1) + 1];
3473 }
3474 if (offsets[1] >= 0)
3475 {
3476 chars[4] = chars[offsets[1] << 1];
3477 chars[5] = chars[(offsets[1] << 1) + 1];
3478 }
3479
3480 max -= 1;
3481 if (firstline)
3482 {
3483 SLJIT_ASSERT(common->first_line_end != 0);
3484 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
3485 OP2(SLJIT_SUB, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, SLJIT_IMM, IN_UCHARS(max));
3486 }
3487 else
3488 OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
3489
3490 start = LABEL();
3491 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3492
3493 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[0]));
3494 if (offsets[1] >= 0)
3495 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[1]));
3496 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3497
3498 if (chars[1] != 0)
3499 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[1]);
3500 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[0], start);
3501 if (offsets[2] >= 0)
3502 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[2] - 1));
3503
3504 if (offsets[1] >= 0)
3505 {
3506 if (chars[5] != 0)
3507 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, chars[5]);
3508 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, chars[4], start);
3509 }
3510
3511 if (offsets[2] >= 0)
3512 {
3513 if (chars[3] != 0)
3514 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[3]);
3515 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[2], start);
3516 }
3517 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3518
3519 JUMPHERE(quit);
3520
3521 if (firstline)
3522 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3523 else
3524 OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
3525 return TRUE;
3526 }
3527
3528 #undef MAX_N_CHARS
3529
3530 static SLJIT_INLINE void fast_forward_first_char(compiler_common *common, pcre_uchar first_char, BOOL caseless, BOOL firstline)
3531 {
3532 DEFINE_COMPILER;
3533 struct sljit_label *start;
3534 struct sljit_jump *quit;
3535 struct sljit_jump *found;
3536 pcre_uchar oc, bit;
3537
3538 if (firstline)
3539 {
3540 SLJIT_ASSERT(common->first_line_end != 0);
3541 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
3542 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
3543 }
3544
3545 start = LABEL();
3546 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3547 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3548
3549 oc = first_char;
3550 if (caseless)
3551 {
3552 oc = TABLE_GET(first_char, common->fcc, first_char);
3553 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
3554 if (first_char > 127 && common->utf)
3555 oc = UCD_OTHERCASE(first_char);
3556 #endif
3557 }
3558 if (first_char == oc)
3559 found = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, first_char);
3560 else
3561 {
3562 bit = first_char ^ oc;
3563 if (is_powerof2(bit))
3564 {
3565 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, bit);
3566 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, first_char | bit);
3567 }
3568 else
3569 {
3570 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, first_char);
3571 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3572 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);
3573 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3574 found = JUMP(SLJIT_C_NOT_ZERO);
3575 }
3576 }
3577
3578 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3579 JUMPTO(SLJIT_JUMP, start);
3580 JUMPHERE(found);
3581 JUMPHERE(quit);
3582
3583 if (firstline)
3584 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3585 }
3586
3587 static SLJIT_INLINE void fast_forward_newline(compiler_common *common, BOOL firstline)
3588 {
3589 DEFINE_COMPILER;
3590 struct sljit_label *loop;
3591 struct sljit_jump *lastchar;
3592 struct sljit_jump *firstchar;
3593 struct sljit_jump *quit;
3594 struct sljit_jump *foundcr = NULL;
3595 struct sljit_jump *notfoundnl;
3596 jump_list *newline = NULL;
3597
3598 if (firstline)
3599 {
3600 SLJIT_ASSERT(common->first_line_end != 0);
3601 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
3602 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
3603 }
3604
3605 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3606 {
3607 lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3608 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3609 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
3610 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
3611 firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
3612
3613 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2));
3614 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0);
3615 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_GREATER_EQUAL);
3616 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3617 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
3618 #endif
3619 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3620
3621 loop = LABEL();
3622 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3623 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3624 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
3625 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
3626 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop);
3627 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop);
3628
3629 JUMPHERE(quit);
3630 JUMPHERE(firstchar);
3631 JUMPHERE(lastchar);
3632
3633 if (firstline)
3634 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
3635 return;
3636 }
3637
3638 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3639 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
3640 firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
3641 skip_char_back(common);
3642
3643 loop = LABEL();
3644 read_char_range(common, common->nlmin, common->nlmax, TRUE);
3645 lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3646 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
3647 foundcr = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
3648 check_newlinechar(common, common->nltype, &newline, FALSE);
3649 set_jumps(newline, loop);
3650
3651 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
3652 {
3653 quit = JUMP(SLJIT_JUMP);
3654 JUMPHERE(foundcr);
3655 notfoundnl = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3656 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3657 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
3658 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3659 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3660 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
3661 #endif
3662 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3663 JUMPHERE(notfoundnl);
3664 JUMPHERE(quit);
3665 }
3666 JUMPHERE(lastchar);
3667 JUMPHERE(firstchar);
3668
3669 if (firstline)
3670 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3671 }
3672
3673 static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks);
3674
3675 static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, pcre_uint8 *start_bits, BOOL firstline)
3676 {
3677 DEFINE_COMPILER;
3678 struct sljit_label *start;
3679 struct sljit_jump *quit;
3680 struct sljit_jump *found = NULL;
3681 jump_list *matches = NULL;
3682 #ifndef COMPILE_PCRE8
3683 struct sljit_jump *jump;
3684 #endif
3685
3686 if (firstline)
3687 {
3688 SLJIT_ASSERT(common->first_line_end != 0);
3689 OP1(SLJIT_MOV, RETURN_ADDR, 0, STR_END, 0);
3690 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
3691 }
3692
3693 start = LABEL();
3694 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3695 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3696 #ifdef SUPPORT_UTF
3697 if (common->utf)
3698 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
3699 #endif
3700
3701 if (!check_class_ranges(common, start_bits, (start_bits[31] & 0x80) != 0, TRUE, &matches))
3702 {
3703 #ifndef COMPILE_PCRE8
3704 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 255);
3705 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 255);
3706 JUMPHERE(jump);
3707 #endif
3708 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
3709 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
3710 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)start_bits);
3711 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
3712 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
3713 found = JUMP(SLJIT_C_NOT_ZERO);
3714 }
3715
3716 #ifdef SUPPORT_UTF
3717 if (common->utf)
3718 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
3719 #endif
3720 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3721 #ifdef SUPPORT_UTF
3722 #if defined COMPILE_PCRE8
3723 if (common->utf)
3724 {
3725 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start);
3726 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3727 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3728 }
3729 #elif defined COMPILE_PCRE16
3730 if (common->utf)
3731 {
3732 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800, start);
3733 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
3734 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3735 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3736 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3737 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3738 }
3739 #endif /* COMPILE_PCRE[8|16] */
3740 #endif /* SUPPORT_UTF */
3741 JUMPTO(SLJIT_JUMP, start);
3742 if (found != NULL)
3743 JUMPHERE(found);
3744 if (matches != NULL)
3745 set_jumps(matches, LABEL());
3746 JUMPHERE(quit);
3747
3748 if (firstline)
3749 OP1(SLJIT_MOV, STR_END, 0, RETURN_ADDR, 0);
3750 }
3751
3752 static SLJIT_INLINE struct sljit_jump *search_requested_char(compiler_common *common, pcre_uchar req_char, BOOL caseless, BOOL has_firstchar)
3753 {
3754 DEFINE_COMPILER;
3755 struct sljit_label *loop;
3756 struct sljit_jump *toolong;
3757 struct sljit_jump *alreadyfound;
3758 struct sljit_jump *found;
3759 struct sljit_jump *foundoc = NULL;
3760 struct sljit_jump *notfound;
3761 pcre_uint32 oc, bit;
3762
3763 SLJIT_ASSERT(common->req_char_ptr != 0);
3764 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr);
3765 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, REQ_BYTE_MAX);
3766 toolong = CMP(SLJIT_C_LESS, TMP1, 0, STR_END, 0);
3767 alreadyfound = CMP(SLJIT_C_LESS, STR_PTR, 0, TMP2, 0);
3768
3769 if (has_firstchar)
3770 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3771 else
3772 OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);
3773
3774 loop = LABEL();
3775 notfound = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, STR_END, 0);
3776
3777 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0);
3778 oc = req_char;
3779 if (caseless)
3780 {
3781 oc = TABLE_GET(req_char, common->fcc, req_char);
3782 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
3783 if (req_char > 127 && common->utf)
3784 oc = UCD_OTHERCASE(req_char);
3785 #endif
3786 }
3787 if (req_char == oc)
3788 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
3789 else
3790 {
3791 bit = req_char ^ oc;
3792 if (is_powerof2(bit))
3793 {
3794 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);
3795 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit);
3796 }
3797 else
3798 {
3799 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
3800 foundoc = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, oc);
3801 }
3802 }
3803 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
3804 JUMPTO(SLJIT_JUMP, loop);
3805
3806 JUMPHERE(found);
3807 if (foundoc)
3808 JUMPHERE(foundoc);
3809 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr, TMP1, 0);
3810 JUMPHERE(alreadyfound);
3811 JUMPHERE(toolong);
3812 return notfound;
3813 }
3814
3815 static void do_revertframes(compiler_common *common)
3816 {
3817 DEFINE_COMPILER;
3818 struct sljit_jump *jump;
3819 struct sljit_label *mainloop;
3820
3821 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3822 OP1(SLJIT_MOV, TMP1, 0, STACK_TOP, 0);
3823 GET_LOCAL_BASE(TMP3, 0, 0);
3824
3825 /* Drop frames until we reach STACK_TOP. */
3826 mainloop = LABEL();
3827 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), 0);
3828 OP2(SLJIT_SUB | SLJIT_SET_S, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0);
3829 jump = JUMP(SLJIT_C_SIG_LESS_EQUAL);
3830
3831 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
3832 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw));
3833 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), SLJIT_MEM1(TMP1), 2 * sizeof(sljit_sw));
3834 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
3835 JUMPTO(SLJIT_JUMP, mainloop);
3836
3837 JUMPHERE(jump);
3838 jump = JUMP(SLJIT_C_SIG_LESS);
3839 /* End of dropping frames. */
3840 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3841
3842 JUMPHERE(jump);
3843 OP1(SLJIT_NEG, TMP2, 0, TMP2, 0);
3844 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
3845 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw));
3846 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_sw));
3847 JUMPTO(SLJIT_JUMP, mainloop);
3848 }
3849
3850 static void check_wordboundary(compiler_common *common)
3851 {
3852 DEFINE_COMPILER;
3853 struct sljit_jump *skipread;
3854 jump_list *skipread_list = NULL;
3855 #if !(defined COMPILE_PCRE8) || defined SUPPORT_UTF
3856 struct sljit_jump *jump;
3857 #endif
3858
3859 SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16);
3860
3861 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3862 /* Get type of the previous char, and put it to LOCALS1. */
3863 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3864 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
3865 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, 0);
3866 skipread = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
3867 skip_char_back(common);
3868 check_start_used_ptr(common);
3869 read_char(common);
3870
3871 /* Testing char type. */
3872 #ifdef SUPPORT_UCP
3873 if (common->use_ucp)
3874 {
3875 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
3876 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
3877 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
3878 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
3879 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
3880 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
3881 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
3882 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
3883 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
3884 JUMPHERE(jump);
3885 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP2, 0);
3886 }
3887 else
3888 #endif
3889 {
3890 #ifndef COMPILE_PCRE8
3891 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3892 #elif defined SUPPORT_UTF
3893 /* Here LOCALS1 has already been zeroed. */
3894 jump = NULL;
3895 if (common->utf)
3896 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3897 #endif /* COMPILE_PCRE8 */
3898 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);
3899 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */);
3900 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3901 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);
3902 #ifndef COMPILE_PCRE8
3903 JUMPHERE(jump);
3904 #elif defined SUPPORT_UTF
3905 if (jump != NULL)
3906 JUMPHERE(jump);
3907 #endif /* COMPILE_PCRE8 */
3908 }
3909 JUMPHERE(skipread);
3910
3911 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3912 check_str_end(common, &skipread_list);
3913 peek_char(common, READ_CHAR_MAX);
3914
3915 /* Testing char type. This is a code duplication. */
3916 #ifdef SUPPORT_UCP
3917 if (common->use_ucp)
3918 {
3919 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
3920 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
3921 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
3922 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
3923 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
3924 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
3925 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
3926 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
3927 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
3928 JUMPHERE(jump);
3929 }
3930 else
3931 #endif
3932 {
3933 #ifndef COMPILE_PCRE8
3934 /* TMP2 may be destroyed by peek_char. */
3935 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3936 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3937 #elif defined SUPPORT_UTF
3938 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3939 jump = NULL;
3940 if (common->utf)
3941 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3942 #endif
3943 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes);
3944 OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */);
3945 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
3946 #ifndef COMPILE_PCRE8
3947 JUMPHERE(jump);
3948 #elif defined SUPPORT_UTF
3949 if (jump != NULL)
3950 JUMPHERE(jump);
3951 #endif /* COMPILE_PCRE8 */
3952 }
3953 set_jumps(skipread_list, LABEL());
3954
3955 OP2(SLJIT_XOR | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
3956 sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3957 }
3958
3959 static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
3960 {
3961 DEFINE_COMPILER;
3962 int ranges[MAX_RANGE_SIZE];
3963 pcre_uint8 bit, cbit, all;
3964 int i, byte, length = 0;
3965
3966 bit = bits[0] & 0x1;
3967 /* All bits will be zero or one (since bit is zero or one). */
3968 all = -bit;
3969
3970 for (i = 0; i < 256; )
3971 {
3972 byte = i >> 3;
3973 if ((i & 0x7) == 0 && bits[byte] == all)
3974 i += 8;
3975 else
3976 {
3977 cbit = (bits[byte] >> (i & 0x7)) & 0x1;
3978 if (cbit != bit)
3979 {
3980 if (length >= MAX_RANGE_SIZE)
3981 return FALSE;
3982 ranges[length] = i;
3983 length++;
3984 bit = cbit;
3985 all = -cbit;
3986 }
3987 i++;
3988 }
3989 }
3990
3991 if (((bit == 0) && nclass) || ((bit == 1) && !nclass))
3992 {
3993 if (length >= MAX_RANGE_SIZE)
3994 return FALSE;
3995 ranges[length] = 256;
3996 length++;
3997 }
3998
3999 if (length < 0 || length > 4)
4000 return FALSE;
4001
4002 bit = bits[0] & 0x1;
4003 if (invert) bit ^= 0x1;
4004
4005 /* No character is accepted. */
4006 if (length == 0 && bit == 0)
4007 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4008
4009 switch(length)
4010 {
4011 case 0:
4012 /* When bit != 0, all characters are accepted. */
4013 return TRUE;
4014
4015 case 1:
4016 add_jump(compiler, backtracks, CMP(bit == 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
4017 return TRUE;
4018
4019 case 2:
4020 if (ranges[0] + 1 != ranges[1])
4021 {
4022 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
4023 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
4024 }
4025 else
4026 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
4027 return TRUE;
4028
4029 case 3:
4030 if (bit != 0)
4031 {
4032 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
4033 if (ranges[0] + 1 != ranges[1])
4034 {
4035 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
4036 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
4037 }
4038 else
4039 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
4040 return TRUE;
4041 }
4042
4043 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[0]));
4044 if (ranges[1] + 1 != ranges[2])
4045 {
4046 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1]);
4047 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
4048 }
4049 else
4050 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1]));
4051 return TRUE;
4052
4053 case 4:
4054 if ((ranges[1] - ranges[0]) == (ranges[3] - ranges[2])
4055 && (ranges[0] | (ranges[2] - ranges[0])) == ranges[2]
4056 && is_powerof2(ranges[2] - ranges[0]))
4057 {
4058 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[0]);
4059 if (ranges[2] + 1 != ranges[3])
4060 {
4061 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);
4062 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
4063 }
4064 else
4065 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
4066 return TRUE;
4067 }
4068
4069 if (bit != 0)
4070 {
4071 i = 0;
4072 if (ranges[0] + 1 != ranges[1])
4073 {
4074 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
4075 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
4076 i = ranges[0];
4077 }
4078 else
4079 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
4080
4081 if (ranges[2] + 1 != ranges[3])
4082 {
4083 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - i);
4084 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
4085 }
4086 else
4087 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2] - i));
4088 return TRUE;
4089 }
4090
4091 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
4092 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[0]));
4093 if (ranges[1] + 1 != ranges[2])
4094 {
4095 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]);
4096 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
4097 }
4098 else
4099 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
4100 return TRUE;
4101
4102 default:
4103 SLJIT_ASSERT_STOP();
4104 return FALSE;
4105 }
4106 }
4107
4108 static void check_anynewline(compiler_common *common)
4109 {
4110 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
4111 DEFINE_COMPILER;
4112
4113 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4114
4115 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
4116 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
4117 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
4118 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
4119 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4120 #ifdef COMPILE_PCRE8
4121 if (common->utf)
4122 {
4123 #endif
4124 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4125 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
4126 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
4127 #ifdef COMPILE_PCRE8
4128 }
4129 #endif
4130 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
4131 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4132 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4133 }
4134
4135 static void check_hspace(compiler_common *common)
4136 {
4137 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
4138 DEFINE_COMPILER;
4139
4140 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4141
4142 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x09);
4143 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4144 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
4145 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4146 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xa0);
4147 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4148 #ifdef COMPILE_PCRE8
4149 if (common->utf)
4150 {
4151 #endif
4152 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4153 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x1680);
4154 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4155 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e);
4156 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4157 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000);
4158 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000);
4159 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4160 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000);
4161 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4162 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000);
4163 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4164 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000);
4165 #ifdef COMPILE_PCRE8
4166 }
4167 #endif
4168 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
4169 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4170
4171 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4172 }
4173
4174 static void check_vspace(compiler_common *common)
4175 {
4176 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
4177 DEFINE_COMPILER;
4178
4179 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4180
4181 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
4182 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
4183 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
4184 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
4185 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4186 #ifdef COMPILE_PCRE8
4187 if (common->utf)
4188 {
4189 #endif
4190 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4191 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
4192 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
4193 #ifdef COMPILE_PCRE8
4194 }
4195 #endif
4196 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
4197 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4198
4199 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4200 }
4201
4202 #define CHAR1 STR_END
4203 #define CHAR2 STACK_TOP
4204
4205 static void do_casefulcmp(compiler_common *common)
4206 {
4207 DEFINE_COMPILER;
4208 struct sljit_jump *jump;
4209 struct sljit_label *label;
4210
4211 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4212 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4213 OP1(SLJIT_MOV, TMP3, 0, CHAR1, 0);
4214 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR2, 0);
4215 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
4216 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4217
4218 label = LABEL();
4219 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
4220 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4221 jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
4222 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
4223 JUMPTO(SLJIT_C_NOT_ZERO, label);
4224
4225 JUMPHERE(jump);
4226 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4227 OP1(SLJIT_MOV, CHAR1, 0, TMP3, 0);
4228 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
4229 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4230 }
4231
4232 #define LCC_TABLE STACK_LIMIT
4233
4234 static void do_caselesscmp(compiler_common *common)
4235 {
4236 DEFINE_COMPILER;
4237 struct sljit_jump *jump;
4238 struct sljit_label *label;
4239
4240 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4241 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4242
4243 OP1(SLJIT_MOV, TMP3, 0, LCC_TABLE, 0);
4244 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR1, 0);
4245 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, CHAR2, 0);
4246 OP1(SLJIT_MOV, LCC_TABLE, 0, SLJIT_IMM, common->lcc);
4247 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
4248 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4249
4250 label = LABEL();
4251 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
4252 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4253 #ifndef COMPILE_PCRE8
4254 jump = CMP(SLJIT_C_GREATER, CHAR1, 0, SLJIT_IMM, 255);
4255 #endif
4256 OP1(SLJIT_MOV_UB, CHAR1, 0, SLJIT_MEM2(LCC_TABLE, CHAR1), 0);
4257 #ifndef COMPILE_PCRE8
4258 JUMPHERE(jump);
4259 jump = CMP(SLJIT_C_GREATER, CHAR2, 0, SLJIT_IMM, 255);
4260 #endif
4261 OP1(SLJIT_MOV_UB, CHAR2, 0, SLJIT_MEM2(LCC_TABLE, CHAR2), 0);
4262 #ifndef COMPILE_PCRE8
4263 JUMPHERE(jump);
4264 #endif
4265 jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
4266 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
4267 JUMPTO(SLJIT_C_NOT_ZERO, label);
4268
4269 JUMPHERE(jump);
4270 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4271 OP1(SLJIT_MOV, LCC_TABLE, 0, TMP3, 0);
4272 OP1(SLJIT_MOV, CHAR1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
4273 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
4274 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4275 }
4276
4277 #undef LCC_TABLE
4278 #undef CHAR1
4279 #undef CHAR2
4280
4281 #if defined SUPPORT_UTF && defined SUPPORT_UCP
4282
4283 static const pcre_uchar * SLJIT_CALL do_utf_caselesscmp(pcre_uchar *src1, jit_arguments *args, pcre_uchar *end1)
4284 {
4285 /* This function would be ineffective to do in JIT level. */
4286 pcre_uint32 c1, c2;
4287 const pcre_uchar *src2 = args->uchar_ptr;
4288 const pcre_uchar *end2 = args->end;
4289 const ucd_record *ur;
4290 const pcre_uint32 *pp;
4291
4292 while (src1 < end1)
4293 {
4294 if (src2 >= end2)
4295 return (pcre_uchar*)1;
4296 GETCHARINC(c1, src1);
4297 GETCHARINC(c2, src2);
4298 ur = GET_UCD(c2);
4299 if (c1 != c2 && c1 != c2 + ur->other_case)
4300 {
4301 pp = PRIV(ucd_caseless_sets) + ur->caseset;
4302 for (;;)
4303 {
4304 if (c1 < *pp) return NULL;
4305 if (c1 == *pp++) break;
4306 }
4307 }
4308 }
4309 return src2;
4310 }
4311
4312 #endif /* SUPPORT_UTF && SUPPORT_UCP */
4313
4314 static pcre_uchar *byte_sequence_compare(compiler_common *common, BOOL caseless, pcre_uchar *cc,
4315 compare_context* context, jump_list **backtracks)
4316 {
4317 DEFINE_COMPILER;
4318 unsigned int othercasebit = 0;
4319 pcre_uchar *othercasechar = NULL;
4320 #ifdef SUPPORT_UTF
4321 int utflength;
4322 #endif
4323
4324 if (caseless && char_has_othercase(common, cc))
4325 {
4326 othercasebit = char_get_othercase_bit(common, cc);
4327 SLJIT_ASSERT(othercasebit);
4328 /* Extracting bit difference info. */
4329 #if defined COMPILE_PCRE8
4330 othercasechar = cc + (othercasebit >> 8);
4331 othercasebit &= 0xff;
4332 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4333 /* Note that this code only handles characters in the BMP. If there
4334 ever are characters outside the BMP whose othercase differs in only one
4335 bit from itself (there currently are none), this code will need to be
4336 revised for COMPILE_PCRE32. */
4337 othercasechar = cc + (othercasebit >> 9);
4338 if ((othercasebit & 0x100) != 0)
4339 othercasebit = (othercasebit & 0xff) << 8;
4340 else
4341 othercasebit &= 0xff;
4342 #endif /* COMPILE_PCRE[8|16|32] */
4343 }
4344
4345 if (context->sourcereg == -1)
4346 {
4347 #if defined COMPILE_PCRE8
4348 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
4349 if (context->length >= 4)
4350 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4351 else if (context->length >= 2)
4352 OP1(SLJIT_MOV_UH, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4353 else
4354 #endif
4355 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4356 #elif defined COMPILE_PCRE16
4357 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
4358 if (context->length >= 4)
4359 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4360 else
4361 #endif
4362 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4363 #elif defined COMPILE_PCRE32
4364 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4365 #endif /* COMPILE_PCRE[8|16|32] */
4366 context->sourcereg = TMP2;
4367 }
4368
4369 #ifdef SUPPORT_UTF
4370 utflength = 1;
4371 if (common->utf && HAS_EXTRALEN(*cc))
4372 utflength += GET_EXTRALEN(*cc);
4373
4374 do
4375 {
4376 #endif
4377
4378 context->length -= IN_UCHARS(1);
4379 #if (defined SLJIT_UNALIGNED && SLJIT_UNALIGNED) && (defined COMPILE_PCRE8 || defined COMPILE_PCRE16)
4380
4381 /* Unaligned read is supported. */
4382 if (othercasebit != 0 && othercasechar == cc)
4383 {
4384 context->c.asuchars[context->ucharptr] = *cc | othercasebit;
4385 context->oc.asuchars[context->ucharptr] = othercasebit;
4386 }
4387 else
4388 {
4389 context->c.asuchars[context->ucharptr] = *cc;
4390 context->oc.asuchars[context->ucharptr] = 0;
4391 }
4392 context->ucharptr++;
4393
4394 #if defined COMPILE_PCRE8
4395 if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1))
4396 #else
4397 if (context->ucharptr >= 2 || context->length == 0)
4398 #endif
4399 {
4400 if (context->length >= 4)
4401 OP1(SLJIT_MOV_SI, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
4402 else if (context->length >= 2)
4403 OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
4404 #if defined COMPILE_PCRE8
4405 else if (context->length >= 1)
4406 OP1(SLJIT_MOV_UB, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
4407 #endif /* COMPILE_PCRE8 */
4408 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
4409
4410 switch(context->ucharptr)
4411 {
4412 case 4 / sizeof(pcre_uchar):
4413 if (context->oc.asint != 0)
4414 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint);
4415 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint));
4416 break;
4417
4418 case 2 / sizeof(pcre_uchar):
4419 if (context->oc.asushort != 0)
4420 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort);
4421 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort));
4422 break;
4423
4424 #ifdef COMPILE_PCRE8
4425 case 1:
4426 if (context->oc.asbyte != 0)
4427 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte);
4428 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte));
4429 break;
4430 #endif
4431
4432 default:
4433 SLJIT_ASSERT_STOP();
4434 break;
4435 }
4436 context->ucharptr = 0;
4437 }
4438
4439 #else
4440
4441 /* Unaligned read is unsupported or in 32 bit mode. */
4442 if (context->length >= 1)
4443 OP1(MOV_UCHAR, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
4444
4445 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
4446
4447 if (othercasebit != 0 && othercasechar == cc)
4448 {
4449 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit);
4450 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit));
4451 }
4452 else
4453 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc));
4454
4455 #endif
4456
4457 cc++;
4458 #ifdef SUPPORT_UTF
4459 utflength--;
4460 }
4461 while (utflength > 0);
4462 #endif
4463
4464 return cc;
4465 }
4466
4467 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
4468
4469 #define SET_TYPE_OFFSET(value) \
4470 if ((value) != typeoffset) \
4471 { \
4472 if ((value) < typeoffset) \
4473 OP2(SLJIT_ADD, typereg, 0, typereg, 0, SLJIT_IMM, typeoffset - (value)); \
4474 else \
4475 OP2(SLJIT_SUB, typereg, 0, typereg, 0, SLJIT_IMM, (value) - typeoffset); \
4476 } \
4477 typeoffset = (value);
4478
4479 #define SET_CHAR_OFFSET(value) \
4480 if ((value) != charoffset) \
4481 { \
4482 if ((value) < charoffset) \
4483 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(charoffset - (value))); \
4484 else \
4485 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)((value) - charoffset)); \
4486 } \
4487 charoffset = (value);
4488
4489 static void compile_xclass_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
4490 {
4491 DEFINE_COMPILER;
4492 jump_list *found = NULL;
4493 jump_list **list = (cc[0] & XCL_NOT) == 0 ? &found : backtracks;
4494 sljit_uw c, charoffset, max = 256, min = READ_CHAR_MAX;
4495 struct sljit_jump *jump = NULL;
4496 pcre_uchar *ccbegin;
4497 int compares, invertcmp, numberofcmps;
4498 #if defined SUPPORT_UTF && (defined COMPILE_PCRE8 || defined COMPILE_PCRE16)
4499 BOOL utf = common->utf;
4500 #endif
4501
4502 #ifdef SUPPORT_UCP
4503 BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE;
4504 BOOL charsaved = FALSE;
4505 int typereg = TMP1, scriptreg = TMP1;
4506 const pcre_uint32 *other_cases;
4507 sljit_uw typeoffset;
4508 #endif
4509
4510 /* Scanning the necessary info. */
4511 cc++;
4512 ccbegin = cc;
4513 compares = 0;
4514 if (cc[-1] & XCL_MAP)
4515 {
4516 min = 0;
4517 cc += 32 / sizeof(pcre_uchar);
4518 }
4519
4520 while (*cc != XCL_END)
4521 {
4522 compares++;
4523 if (*cc == XCL_SINGLE)
4524 {
4525 cc ++;
4526 GETCHARINCTEST(c, cc);
4527 if (c > max) max = c;
4528 if (c < min) min = c;
4529 #ifdef SUPPORT_UCP
4530 needschar = TRUE;
4531 #endif
4532 }
4533 else if (*cc == XCL_RANGE)
4534 {
4535 cc ++;
4536 GETCHARINCTEST(c, cc);
4537 if (c < min) min = c;
4538 GETCHARINCTEST(c, cc);
4539 if (c > max) max = c;
4540 #ifdef SUPPORT_UCP
4541 needschar = TRUE;
4542 #endif
4543 }
4544 #ifdef SUPPORT_UCP
4545 else
4546 {
4547 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
4548 cc++;
4549 if (*cc == PT_CLIST)
4550 {
4551 other_cases = PRIV(ucd_caseless_sets) + cc[1];
4552 while (*other_cases != NOTACHAR)
4553 {
4554 if (*other_cases > max) max = *other_cases;
4555 if (*other_cases < min) min = *other_cases;
4556 other_cases++;
4557 }
4558 }
4559 else
4560 {
4561 max = READ_CHAR_MAX;
4562 min = 0;
4563 }
4564
4565 switch(*cc)
4566 {
4567 case PT_ANY:
4568 break;
4569
4570 case PT_LAMP:
4571 case PT_GC:
4572 case PT_PC:
4573 case PT_ALNUM:
4574 needstype = TRUE;
4575 break;
4576
4577 case PT_SC:
4578 needsscript = TRUE;
4579 break;
4580
4581 case PT_SPACE:
4582 case PT_PXSPACE:
4583 case PT_WORD:
4584 case PT_PXGRAPH:
4585 case PT_PXPRINT:
4586 case PT_PXPUNCT:
4587 needstype = TRUE;
4588 needschar = TRUE;
4589 break;
4590
4591 case PT_CLIST:
4592 case PT_UCNC:
4593 needschar = TRUE;
4594 break;
4595
4596 default:
4597 SLJIT_ASSERT_STOP();
4598 break;
4599 }
4600 cc += 2;
4601 }
4602 #endif
4603 }
4604
4605 /* We are not necessary in utf mode even in 8 bit mode. */
4606 cc = ccbegin;
4607 detect_partial_match(common, backtracks);
4608 read_char_range(common, min, max, (cc[-1] & XCL_NOT) != 0);
4609
4610 if ((cc[-1] & XCL_HASPROP) == 0)
4611 {
4612 if ((cc[-1] & XCL_MAP) != 0)
4613 {
4614 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
4615 if (!check_class_ranges(common, (const pcre_uint8 *)cc, (((const pcre_uint8 *)cc)[31] & 0x80) != 0, TRUE, &found))
4616 {
4617 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
4618 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
4619 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
4620 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4621 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4622 add_jump(compiler, &found, JUMP(SLJIT_C_NOT_ZERO));
4623 }
4624
4625 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4626 JUMPHERE(jump);
4627
4628 cc += 32 / sizeof(pcre_uchar);
4629 }
4630 else
4631 {
4632 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, min);
4633 add_jump(compiler, (cc[-1] & XCL_NOT) == 0 ? backtracks : &found, CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, max - min));
4634 }
4635 }
4636 else if ((cc[-1] & XCL_MAP) != 0)
4637 {
4638 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
4639 #ifdef SUPPORT_UCP
4640 charsaved = TRUE;
4641 #endif
4642 if (!check_class_ranges(common, (const pcre_uint8 *)cc, FALSE, TRUE, list))
4643 {
4644 #ifdef COMPILE_PCRE8
4645 SLJIT_ASSERT(common->utf);
4646 #endif
4647 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
4648
4649 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
4650 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
4651 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
4652 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4653 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4654 add_jump(compiler, list, JUMP(SLJIT_C_NOT_ZERO));
4655
4656 JUMPHERE(jump);
4657 }
4658
4659 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
4660 cc += 32 / sizeof(pcre_uchar);
4661 }
4662
4663 #ifdef SUPPORT_UCP
4664 /* Simple register allocation. TMP1 is preferred if possible. */
4665 if (needstype || needsscript)
4666 {
4667 if (needschar && !charsaved)
4668 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
4669 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4670 if (needschar)
4671 {
4672 if (needstype)
4673 {
4674 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
4675 typereg = RETURN_ADDR;
4676 }
4677
4678 if (needsscript)
4679 scriptreg = TMP3;
4680 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
4681 }
4682 else if (needstype && needsscript)
4683 scriptreg = TMP3;
4684 /* In all other cases only one of them was specified, and that can goes to TMP1. */
4685
4686 if (needsscript)
4687 {
4688 if (scriptreg == TMP1)
4689 {
4690 OP1(SLJIT_MOV, scriptreg, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
4691 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM2(scriptreg, TMP2), 3);
4692 }
4693 else
4694 {
4695 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
4696 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
4697 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM1(TMP2), 0);
4698 }
4699 }
4700 }
4701 #endif
4702
4703 /* Generating code. */
4704 charoffset = 0;
4705 numberofcmps = 0;
4706 #ifdef SUPPORT_UCP
4707 typeoffset = 0;
4708 #endif
4709
4710 while (*cc != XCL_END)
4711 {
4712 compares--;
4713 invertcmp = (compares == 0 && list != backtracks);
4714 jump = NULL;
4715
4716 if (*cc == XCL_SINGLE)
4717 {
4718 cc ++;
4719 GETCHARINCTEST(c, cc);
4720
4721 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
4722 {
4723 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
4724 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_EQUAL);
4725 numberofcmps++;
4726 }
4727 else if (numberofcmps > 0)
4728 {
4729 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
4730 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4731 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4732 numberofcmps = 0;
4733 }
4734 else
4735 {
4736 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
4737 numberofcmps = 0;
4738 }
4739 }
4740 else if (*cc == XCL_RANGE)
4741 {
4742 cc ++;
4743 GETCHARINCTEST(c, cc);
4744 SET_CHAR_OFFSET(c);
4745 GETCHARINCTEST(c, cc);
4746
4747 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
4748 {
4749 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
4750 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_LESS_EQUAL);
4751 numberofcmps++;
4752 }
4753 else if (numberofcmps > 0)
4754 {
4755 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
4756 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4757 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4758 numberofcmps = 0;
4759 }
4760 else
4761 {
4762 jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
4763 numberofcmps = 0;
4764 }
4765 }
4766 #ifdef SUPPORT_UCP
4767 else
4768 {
4769 if (*cc == XCL_NOTPROP)
4770 invertcmp ^= 0x1;
4771 cc++;
4772 switch(*cc)
4773 {
4774 case PT_ANY:
4775 if (list != backtracks)
4776 {
4777 if ((cc[-1] == XCL_NOTPROP && compares > 0) || (cc[-1] == XCL_PROP && compares == 0))
4778 continue;
4779 }
4780 else if (cc[-1] == XCL_NOTPROP)
4781 continue;
4782 jump = JUMP(SLJIT_JUMP);
4783 break;
4784
4785 case PT_LAMP:
4786 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - typeoffset);
4787 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4788 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ll - typeoffset);
4789 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4790 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lt - typeoffset);
4791 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4792 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4793 break;
4794
4795 case PT_GC:
4796 c = PRIV(ucp_typerange)[(int)cc[1] * 2];
4797 SET_TYPE_OFFSET(c);
4798 jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, PRIV(ucp_typerange)[(int)cc[1] * 2 + 1] - c);
4799 break;
4800
4801 case PT_PC:
4802 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, (int)cc[1] - typeoffset);
4803 break;
4804
4805 case PT_SC:
4806 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, scriptreg, 0, SLJIT_IMM, (int)cc[1]);
4807 break;
4808
4809 case PT_SPACE:
4810 case PT_PXSPACE:
4811 SET_CHAR_OFFSET(9);
4812 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd - 0x9);
4813 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
4814
4815 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x9);
4816 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4817
4818 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x9);
4819 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4820
4821 SET_TYPE_OFFSET(ucp_Zl);
4822 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl);
4823 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4824 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4825 break;
4826
4827 case PT_WORD:
4828 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_UNDERSCORE - charoffset));
4829 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4830 /* Fall through. */
4831
4832 case PT_ALNUM:
4833 SET_TYPE_OFFSET(ucp_Ll);
4834 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
4835 OP_FLAGS((*cc == PT_ALNUM) ? SLJIT_MOV : SLJIT_OR, TMP2, 0, (*cc == PT_ALNUM) ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_LESS_EQUAL);
4836 SET_TYPE_OFFSET(ucp_Nd);
4837 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_No - ucp_Nd);
4838 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4839 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4840 break;
4841
4842 case PT_CLIST:
4843 other_cases = PRIV(ucd_caseless_sets) + cc[1];
4844
4845 /* At least three characters are required.
4846 Otherwise this case would be handled by the normal code path. */
4847 SLJIT_ASSERT(other_cases[0] != NOTACHAR && other_cases[1] != NOTACHAR && other_cases[2] != NOTACHAR);
4848 SLJIT_ASSERT(other_cases[0] < other_cases[1] && other_cases[1] < other_cases[2]);
4849
4850 /* Optimizing character pairs, if their difference is power of 2. */
4851 if (is_powerof2(other_cases[1] ^ other_cases[0]))
4852 {
4853 if (charoffset == 0)
4854 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
4855 else
4856 {
4857 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
4858 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
4859 }
4860 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[1]);
4861 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4862 other_cases += 2;
4863 }
4864 else if (is_powerof2(other_cases[2] ^ other_cases[1]))
4865 {
4866 if (charoffset == 0)
4867 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[2] ^ other_cases[1]);
4868 else
4869 {
4870 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
4871 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
4872 }
4873 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[2]);
4874 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4875
4876 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(other_cases[0] - charoffset));
4877 OP_FLAGS(SLJIT_OR | ((other_cases[3] == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4878
4879 other_cases += 3;
4880 }
4881 else
4882 {
4883 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
4884 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4885 }
4886
4887 while (*other_cases != NOTACHAR)
4888 {
4889 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
4890 OP_FLAGS(SLJIT_OR | ((*other_cases == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4891 }
4892 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4893 break;
4894
4895 case PT_UCNC:
4896 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_DOLLAR_SIGN - charoffset));
4897 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4898 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_COMMERCIAL_AT - charoffset));
4899 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4900 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_GRAVE_ACCENT - charoffset));
4901 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4902
4903 SET_CHAR_OFFSET(0xa0);
4904 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(0xd7ff - charoffset));
4905 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4906 SET_CHAR_OFFSET(0);
4907 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xe000 - 0);
4908 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_GREATER_EQUAL);
4909 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4910 break;
4911
4912 case PT_PXGRAPH:
4913 /* C and Z groups are the farthest two groups. */
4914 SET_TYPE_OFFSET(ucp_Ll);
4915 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
4916 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_GREATER);
4917
4918 jump = CMP(SLJIT_C_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
4919
4920 /* In case of ucp_Cf, we overwrite the result. */
4921 SET_CHAR_OFFSET(0x2066);
4922 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
4923 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
4924
4925 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
4926 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4927
4928 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x2066);
4929 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4930
4931 JUMPHERE(jump);
4932 jump = CMP(SLJIT_C_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
4933 break;
4934
4935 case PT_PXPRINT:
4936 /* C and Z groups are the farthest two groups. */
4937 SET_TYPE_OFFSET(ucp_Ll);
4938 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
4939 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_GREATER);
4940
4941 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Ll);
4942 OP_FLAGS(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_C_NOT_EQUAL);
4943
4944 jump = CMP(SLJIT_C_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
4945
4946 /* In case of ucp_Cf, we overwrite the result. */
4947 SET_CHAR_OFFSET(0x2066);
4948 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
4949 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
4950
4951 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
4952 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4953
4954 JUMPHERE(jump);
4955 jump = CMP(SLJIT_C_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
4956 break;
4957
4958 case PT_PXPUNCT:
4959 SET_TYPE_OFFSET(ucp_Sc);
4960 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Sc);
4961 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
4962
4963 SET_CHAR_OFFSET(0);
4964 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xff);
4965 OP_FLAGS(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4966
4967 SET_TYPE_OFFSET(ucp_Pc);
4968 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ps - ucp_Pc);
4969 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4970 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4971 break;
4972 }
4973 cc += 2;
4974 }
4975 #endif
4976
4977 if (jump != NULL)
4978 add_jump(compiler, compares > 0 ? list : backtracks, jump);
4979 }
4980
4981 if (found != NULL)
4982 set_jumps(found, LABEL());
4983 }
4984
4985 #undef SET_TYPE_OFFSET
4986 #undef SET_CHAR_OFFSET
4987
4988 #endif
4989
4990 static pcre_uchar *compile_char1_matchingpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **backtracks)
4991 {
4992 DEFINE_COMPILER;
4993 int length;
4994 unsigned int c, oc, bit;
4995 compare_context context;
4996 struct sljit_jump *jump[4];
4997 jump_list *end_list;
4998 #ifdef SUPPORT_UTF
4999 struct sljit_label *label;
5000 #ifdef SUPPORT_UCP
5001 pcre_uchar propdata[5];
5002 #endif
5003 #endif /* SUPPORT_UTF */
5004
5005 switch(type)
5006 {
5007 case OP_SOD:
5008 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5009 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
5010 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
5011 return cc;
5012
5013 case OP_SOM:
5014 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5015 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
5016 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
5017 return cc;
5018
5019 case OP_NOT_WORD_BOUNDARY:
5020 case OP_WORD_BOUNDARY:
5021 add_jump(compiler, &common->wordboundary, JUMP(SLJIT_FAST_CALL));
5022 add_jump(compiler, backtracks, JUMP(type == OP_NOT_WORD_BOUNDARY ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
5023 return cc;
5024
5025 case OP_NOT_DIGIT:
5026 case OP_DIGIT:
5027 /* Digits are usually 0-9, so it is worth to optimize them. */
5028 detect_partial_match(common, backtracks);
5029 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5030 if (common->utf && is_char7_bitset((const pcre_uint8*)common->ctypes - cbit_length + cbit_digit, FALSE))
5031 read_char7_type(common, type == OP_NOT_DIGIT);
5032 else
5033 #endif
5034 read_char8_type(common, type == OP_NOT_DIGIT);
5035 /* Flip the starting bit in the negative case. */
5036 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit);
5037 add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
5038 return cc;
5039
5040 case OP_NOT_WHITESPACE:
5041 case OP_WHITESPACE:
5042 detect_partial_match(common, backtracks);
5043 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5044 if (common->utf && is_char7_bitset((const pcre_uint8*)common->ctypes - cbit_length + cbit_space, FALSE))
5045 read_char7_type(common, type == OP_NOT_WHITESPACE);
5046 else
5047 #endif
5048 read_char8_type(common, type == OP_NOT_WHITESPACE);
5049 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space);
5050 add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
5051 return cc;
5052
5053 case OP_NOT_WORDCHAR:
5054 case OP_WORDCHAR:
5055 detect_partial_match(common, backtracks);
5056 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5057 if (common->utf && is_char7_bitset((const pcre_uint8*)common->ctypes - cbit_length + cbit_word, FALSE))
5058 read_char7_type(common, type == OP_NOT_WORDCHAR);
5059 else
5060 #endif
5061 read_char8_type(common, type == OP_NOT_WORDCHAR);
5062 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word);
5063 add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
5064 return cc;
5065
5066 case OP_ANY:
5067 detect_partial_match(common, backtracks);
5068 read_char_range(common, common->nlmin, common->nlmax, TRUE);
5069 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
5070 {
5071 jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
5072 end_list = NULL;
5073 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
5074 add_jump(compiler, &end_list, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
5075 else
5076 check_str_end(common, &end_list);
5077
5078 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5079 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff));
5080 set_jumps(end_list, LABEL());
5081 JUMPHERE(jump[0]);
5082 }
5083 else
5084 check_newlinechar(common, common->nltype, backtracks, TRUE);
5085 return cc;
5086
5087 case OP_ALLANY:
5088 detect_partial_match(common, backtracks);
5089 #ifdef SUPPORT_UTF
5090 if (common->utf)
5091 {
5092 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5093 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5094 #if defined COMPILE_PCRE8 || defined COMPILE_PCRE16
5095 #if defined COMPILE_PCRE8
5096 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
5097 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
5098 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5099 #elif defined COMPILE_PCRE16
5100 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
5101 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
5102 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
5103 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
5104 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
5105 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5106 #endif
5107 JUMPHERE(jump[0]);
5108 #endif /* COMPILE_PCRE[8|16] */
5109 return cc;
5110 }
5111 #endif
5112 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5113 return cc;
5114
5115 case OP_ANYBYTE:
5116 detect_partial_match(common, backtracks);
5117 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5118 return cc;
5119
5120 #ifdef SUPPORT_UTF
5121 #ifdef SUPPORT_UCP
5122 case OP_NOTPROP:
5123 case OP_PROP:
5124 propdata[0] = XCL_HASPROP;
5125 propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP;
5126 propdata[2] = cc[0];
5127 propdata[3] = cc[1];
5128 propdata[4] = XCL_END;
5129 compile_xclass_matchingpath(common, propdata, backtracks);
5130 return cc + 2;
5131 #endif
5132 #endif
5133
5134 case OP_ANYNL:
5135 detect_partial_match(common, backtracks);
5136 read_char_range(common, common->bsr_nlmin, common->bsr_nlmax, FALSE);
5137 jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
5138 /* We don't need to handle soft partial matching case. */
5139 end_list = NULL;
5140 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
5141 add_jump(compiler, &end_list, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
5142 else
5143 check_str_end(common, &end_list);
5144 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5145 jump[1] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
5146 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5147 jump[2] = JUMP(SLJIT_JUMP);
5148 JUMPHERE(jump[0]);
5149 check_newlinechar(common, common->bsr_nltype, backtracks, FALSE);
5150 set_jumps(end_list, LABEL());
5151 JUMPHERE(jump[1]);
5152 JUMPHERE(jump[2]);
5153 return cc;
5154
5155 case OP_NOT_HSPACE:
5156 case OP_HSPACE:
5157 detect_partial_match(common, backtracks);
5158 read_char_range(common, 0x9, 0x3000, type == OP_NOT_HSPACE);
5159 add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));
5160 add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
5161 return cc;
5162
5163 case OP_NOT_VSPACE:
5164 case OP_VSPACE:
5165 detect_partial_match(common, backtracks);
5166 read_char_range(common, 0xa, 0x2029, type == OP_NOT_VSPACE);
5167 add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));
5168 add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
5169 return cc;
5170
5171 #ifdef SUPPORT_UCP
5172 case OP_EXTUNI:
5173 detect_partial_match(common, backtracks);
5174 read_char(common);
5175 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
5176 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
5177 /* Optimize register allocation: use a real register. */
5178 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
5179 OP1(SLJIT_MOV_UB, STACK_TOP, 0, SLJIT_MEM2(TMP1, TMP2), 3);
5180
5181 label = LABEL();
5182 jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5183 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
5184 read_char(common);
5185 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
5186 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
5187 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM2(TMP1, TMP2), 3);
5188
5189 OP2(SLJIT_SHL, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2);
5190 OP1(SLJIT_MOV_UI, TMP1, 0, SLJIT_MEM1(STACK_TOP), (sljit_sw)PRIV(ucp_gbtable));
5191 OP1(SLJIT_MOV, STACK_TOP, 0, TMP2, 0);
5192 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
5193 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
5194 JUMPTO(SLJIT_C_NOT_ZERO, label);
5195
5196 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
5197 JUMPHERE(jump[0]);
5198 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
5199
5200 if (common->mode == JIT_PARTIAL_HARD_COMPILE)
5201 {
5202 jump[0] = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
5203 /* Since we successfully read a char above, partial matching must occure. */
5204 check_partial(common, TRUE);
5205 JUMPHERE(jump[0]);
5206 }
5207 return cc;
5208 #endif
5209
5210 case OP_EODN:
5211 /* Requires rather complex checks. */
5212 jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5213 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
5214 {
5215 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
5216 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5217 if (common->mode == JIT_COMPILE)
5218 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0));
5219 else
5220 {
5221 jump[1] = CMP(SLJIT_C_EQUAL, TMP2, 0, STR_END, 0);
5222 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
5223 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS);
5224 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
5225 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_NOT_EQUAL);
5226 add_jump(compiler, backtracks, JUMP(SLJIT_C_NOT_EQUAL));
5227 check_partial(common, TRUE);
5228 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5229 JUMPHERE(jump[1]);
5230 }
5231 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
5232 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
5233 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
5234 }
5235 else if (common->nltype == NLTYPE_FIXED)
5236 {
5237 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5238 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5239 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0));
5240 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
5241 }
5242 else
5243 {
5244 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5245 jump[1] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
5246 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
5247 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
5248 jump[2] = JUMP(SLJIT_C_GREATER);
5249 add_jump(compiler, backtracks, JUMP(SLJIT_C_LESS));
5250 /* Equal. */
5251 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
5252 jump[3] = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
5253 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5254
5255 JUMPHERE(jump[1]);
5256 if (common->nltype == NLTYPE_ANYCRLF)
5257 {
5258 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5259 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP2, 0, STR_END, 0));
5260 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
5261 }
5262 else
5263 {
5264 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, STR_PTR, 0);
5265 read_char_range(common, common->nlmin, common->nlmax, TRUE);
5266 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
5267 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
5268 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
5269 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
5270 }
5271 JUMPHERE(jump[2]);
5272 JUMPHERE(jump[3]);
5273 }
5274 JUMPHERE(jump[0]);
5275 check_partial(common, FALSE);
5276 return cc;
5277
5278 case OP_EOD:
5279 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0));
5280 check_partial(common, FALSE);
5281 return cc;
5282
5283 case OP_CIRC:
5284 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
5285 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
5286 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0));
5287 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
5288 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5289 return cc;
5290
5291 case OP_CIRCM:
5292 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
5293 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
5294 jump[1] = CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0);
5295 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
5296 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5297 jump[0] = JUMP(SLJIT_JUMP);
5298 JUMPHERE(jump[1]);
5299
5300 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
5301 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
5302 {
5303 OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
5304 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP2, 0, TMP1, 0));
5305 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
5306 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
5307 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
5308 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
5309 }
5310 else
5311 {
5312 skip_char_back(common);
5313 read_char_range(common, common->nlmin, common->nlmax, TRUE);
5314 check_newlinechar(common, common->nltype, backtracks, FALSE);
5315 }
5316 JUMPHERE(jump[0]);
5317 return cc;
5318
5319 case OP_DOLL:
5320 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
5321 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
5322 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5323
5324 if (!common->endonly)
5325 compile_char1_matchingpath(common, OP_EODN, cc, backtracks);
5326 else
5327 {
5328 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0));
5329 check_partial(common, FALSE);
5330 }
5331 return cc;
5332
5333 case OP_DOLLM:
5334 jump[1] = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
5335 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
5336 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
5337 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5338 check_partial(common, FALSE);
5339 jump[0] = JUMP(SLJIT_JUMP);
5340 JUMPHERE(jump[1]);
5341
5342 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
5343 {
5344 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
5345 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5346 if (common->mode == JIT_COMPILE)
5347 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, TMP2, 0, STR_END, 0));
5348 else
5349 {
5350 jump[1] = CMP(SLJIT_C_LESS_EQUAL, TMP2, 0, STR_END, 0);
5351 /* STR_PTR = STR_END - IN_UCHARS(1) */
5352 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
5353 check_partial(common, TRUE);
5354 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5355 JUMPHERE(jump[1]);
5356 }
5357
5358 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
5359 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
5360 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
5361 }
5362 else
5363 {
5364 peek_char(common, common->nlmax);
5365 check_newlinechar(common, common->nltype, backtracks, FALSE);
5366 }
5367 JUMPHERE(jump[0]);
5368 return cc;
5369
5370 case OP_CHAR:
5371 case OP_CHARI:
5372 length = 1;
5373 #ifdef SUPPORT_UTF
5374 if (common->utf && HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc);
5375 #endif
5376 if (common->mode == JIT_COMPILE && (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0))
5377 {
5378 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
5379 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
5380
5381 context.length = IN_UCHARS(length);
5382 context.sourcereg = -1;
5383 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
5384 context.ucharptr = 0;
5385 #endif
5386 return byte_sequence_compare(common, type == OP_CHARI, cc, &context, backtracks);
5387 }
5388
5389 detect_partial_match(common, backtracks);
5390 #ifdef SUPPORT_UTF
5391 if (common->utf)
5392 {
5393 GETCHAR(c, cc);
5394 }
5395 else
5396 #endif
5397 c = *cc;
5398
5399 if (type == OP_CHAR || !char_has_othercase(common, cc))
5400 {
5401 read_char_range(common, c, c, FALSE);
5402 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c));
5403 return cc + length;
5404 }
5405 oc = char_othercase(common, c);
5406 read_char_range(common, c < oc ? c : oc, c > oc ? c : oc, FALSE);
5407 bit = c ^ oc;
5408 if (is_powerof2(bit))
5409 {
5410 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
5411 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
5412 return cc + length;
5413 }
5414 jump[0] = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c);
5415 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, oc));
5416 JUMPHERE(jump[0]);
5417 return cc + length;
5418
5419 case OP_NOT:
5420 case OP_NOTI:
5421 detect_partial_match(common, backtracks);
5422 length = 1;
5423 #ifdef SUPPORT_UTF
5424 if (common->utf)
5425 {
5426 #ifdef COMPILE_PCRE8
5427 c = *cc;
5428 if (c < 128)
5429 {
5430 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5431 if (type == OP_NOT || !char_has_othercase(common, cc))
5432 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
5433 else
5434 {
5435 /* Since UTF8 code page is fixed, we know that c is in [a-z] or [A-Z] range. */
5436 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x20);
5437 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20));
5438 }
5439 /* Skip the variable-length character. */
5440 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5441 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
5442 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
5443 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5444 JUMPHERE(jump[0]);
5445 return cc + 1;
5446 }
5447 else
5448 #endif /* COMPILE_PCRE8 */
5449 {
5450 GETCHARLEN(c, cc, length);
5451 }
5452 }
5453 else
5454 #endif /* SUPPORT_UTF */
5455 c = *cc;
5456
5457 if (type == OP_NOT || !char_has_othercase(common, cc))
5458 {
5459 read_char_range(common, c, c, TRUE);
5460 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
5461 }
5462 else
5463 {
5464 oc = char_othercase(common, c);
5465 read_char_range(common, c < oc ? c : oc, c > oc ? c : oc, TRUE);
5466 bit = c ^ oc;
5467 if (is_powerof2(bit))
5468 {
5469 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
5470 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
5471 }
5472 else
5473 {
5474 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
5475 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, oc));
5476 }
5477 }
5478 return cc + length;
5479
5480 case OP_CLASS:
5481 case OP_NCLASS:
5482 detect_partial_match(common, backtracks);
5483
5484 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5485 bit = (common->utf && is_char7_bitset((const pcre_uint8 *)cc, type == OP_NCLASS)) ? 127 : 255;
5486 read_char_range(common, 0, bit, type == OP_NCLASS);
5487 #else
5488 read_char_range(common, 0, 255, type == OP_NCLASS);
5489 #endif
5490
5491 if (check_class_ranges(common, (const pcre_uint8 *)cc, type == OP_NCLASS, FALSE, backtracks))
5492 return cc + 32 / sizeof(pcre_uchar);
5493
5494 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5495 jump[0] = NULL;
5496 if (common->utf)
5497 {
5498 jump[0] = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, bit);
5499 if (type == OP_CLASS)
5500 {
5501 add_jump(compiler, backtracks, jump[0]);
5502 jump[0] = NULL;
5503 }
5504 }
5505 #elif !defined COMPILE_PCRE8
5506 jump[0] = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
5507 if (type == OP_CLASS)
5508 {
5509 add_jump(compiler, backtracks, jump[0]);
5510 jump[0] = NULL;
5511 }
5512 #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
5513
5514 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
5515 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
5516 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
5517 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
5518 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
5519 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
5520
5521 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
5522 if (jump[0] != NULL)
5523 JUMPHERE(jump[0]);
5524 #endif
5525
5526 return cc + 32 / sizeof(pcre_uchar);
5527
5528 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5529 case OP_XCLASS:
5530 compile_xclass_matchingpath(common, cc + LINK_SIZE, backtracks);
5531 return cc + GET(cc, 0) - 1;
5532 #endif
5533
5534 case OP_REVERSE:
5535 length = GET(cc, 0);
5536 if (length == 0)
5537 return cc + LINK_SIZE;
5538 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5539 #ifdef SUPPORT_UTF
5540 if (common->utf)
5541 {
5542 OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
5543 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, length);
5544 label = LABEL();
5545 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP3, 0));
5546 skip_char_back(common);
5547 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
5548 JUMPTO(SLJIT_C_NOT_ZERO, label);
5549 }
5550 else
5551 #endif
5552 {
5553 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
5554 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
5555 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, TMP1, 0));
5556 }
5557 check_start_used_ptr(common);
5558 return cc + LINK_SIZE;
5559 }
5560 SLJIT_ASSERT_STOP();
5561 return cc;
5562 }
5563
5564 static SLJIT_INLINE pcre_uchar *compile_charn_matchingpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, jump_list **backtracks)
5565 {
5566 /* This function consumes at least one input character. */
5567 /* To decrease the number of length checks, we try to concatenate the fixed length character sequences. */
5568 DEFINE_COMPILER;
5569 pcre_uchar *ccbegin = cc;
5570 compare_context context;
5571 int size;
5572
5573 context.length = 0;
5574 do
5575 {
5576 if (cc >= ccend)
5577 break;
5578
5579 if (*cc == OP_CHAR)
5580 {
5581 size = 1;
5582 #ifdef SUPPORT_UTF
5583 if (common->utf && HAS_EXTRALEN(cc[1]))
5584 size += GET_EXTRALEN(cc[1]);
5585 #endif
5586 }
5587 else if (*cc == OP_CHARI)
5588 {
5589 size = 1;
5590 #ifdef SUPPORT_UTF
5591 if (common->utf)
5592 {
5593 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
5594 size = 0;
5595 else if (HAS_EXTRALEN(cc[1]))
5596 size += GET_EXTRALEN(cc[1]);
5597 }
5598 else
5599 #endif
5600 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
5601 size = 0;
5602 }
5603 else
5604 size = 0;
5605
5606 cc += 1 + size;
5607 context.length += IN_UCHARS(size);
5608 }
5609 while (size > 0 && context.length <= 128);
5610
5611 cc = ccbegin;
5612 if (context.length > 0)
5613 {
5614 /* We have a fixed-length byte sequence. */
5615 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, context.length);
5616 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
5617
5618 context.sourcereg = -1;
5619 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
5620 context.ucharptr = 0;
5621 #endif
5622 do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, backtracks); while (context.length > 0);
5623 return cc;
5624 }
5625
5626 /* A non-fixed length character will be checked if length == 0. */
5627 return compile_char1_matchingpath(common, *cc, cc + 1, backtracks);
5628 }
5629
5630 /* Forward definitions. */
5631 static void compile_matchingpath(compiler_common *, pcre_uchar *, pcre_uchar *, backtrack_common *);
5632 static void compile_backtrackingpath(compiler_common *, struct backtrack_common *);
5633
5634 #define PUSH_BACKTRACK(size, ccstart, error) \
5635 do \
5636 { \
5637 backtrack = sljit_alloc_memory(compiler, (size)); \
5638 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
5639 return error; \
5640 memset(backtrack, 0, size); \
5641 backtrack->prev = parent->top; \
5642 backtrack->cc = (ccstart); \
5643 parent->top = backtrack; \
5644 } \
5645 while (0)
5646
5647 #define PUSH_BACKTRACK_NOVALUE(size, ccstart) \
5648 do \
5649 { \
5650 backtrack = sljit_alloc_memory(compiler, (size)); \
5651 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
5652 return; \
5653 memset(backtrack, 0, size); \
5654 backtrack->prev = parent->top; \
5655 backtrack->cc = (ccstart); \
5656 parent->top = backtrack; \
5657 } \
5658 while (0)
5659
5660 #define BACKTRACK_AS(type) ((type *)backtrack)
5661
5662 static void compile_dnref_search(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
5663 {
5664 /* The OVECTOR offset goes to TMP2. */
5665 DEFINE_COMPILER;
5666 int count = GET2(cc, 1 + IMM2_SIZE);
5667 pcre_uchar *slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
5668 unsigned int offset;
5669 jump_list *found = NULL;
5670
5671 SLJIT_ASSERT(*cc == OP_DNREF || *cc == OP_DNREFI);
5672
5673 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
5674
5675 count--;
5676 while (count-- > 0)
5677 {
5678 offset = GET2(slot, 0) << 1;
5679 GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
5680 add_jump(compiler, &found, CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0));
5681 slot += common->name_entry_size;
5682 }
5683
5684 offset = GET2(slot, 0) << 1;
5685 GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
5686 if (backtracks != NULL && !common->jscript_compat)
5687 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0));
5688
5689 set_jumps(found, LABEL());
5690 }
5691
5692 static void compile_ref_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks, BOOL withchecks, BOOL emptyfail)
5693 {
5694 DEFINE_COMPILER;
5695 BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
5696 int offset = 0;
5697 struct sljit_jump *jump = NULL;
5698 struct sljit_jump *partial;
5699 struct sljit_jump *nopartial;
5700
5701 if (ref)
5702 {
5703 offset = GET2(cc, 1) << 1;
5704 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
5705 /* OVECTOR(1) contains the "string begin - 1" constant. */
5706 if (withchecks && !common->jscript_compat)
5707 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
5708 }
5709 else
5710 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
5711
5712 #if defined SUPPORT_UTF && defined SUPPORT_UCP
5713 if (common->utf && *cc == OP_REFI)
5714 {
5715 SLJIT_ASSERT(TMP1 == SLJIT_SCRATCH_REG1 && STACK_TOP == SLJIT_SCRATCH_REG2 && TMP2 == SLJIT_SCRATCH_REG3);
5716 if (ref)
5717 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
5718 else
5719 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
5720
5721 if (withchecks)
5722 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, TMP2, 0);
5723
5724 /* Needed to save important temporary registers. */
5725 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
5726 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, ARGUMENTS, 0);
5727 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, uchar_ptr), STR_PTR, 0);
5728 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_utf_caselesscmp));
5729 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
5730 if (common->mode == JIT_COMPILE)
5731 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1));
5732 else
5733 {
5734 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
5735 nopartial = CMP(SLJIT_C_NOT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
5736 check_partial(common, FALSE);
5737 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5738 JUMPHERE(nopartial);
5739 }
5740 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
5741 }
5742 else
5743 #endif /* SUPPORT_UTF && SUPPORT_UCP */
5744 {
5745 if (ref)
5746 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP1, 0);
5747 else
5748 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
5749
5750 if (withchecks)
5751 jump = JUMP(SLJIT_C_ZERO);
5752
5753 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
5754 partial = CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0);
5755 if (common->mode == JIT_COMPILE)
5756 add_jump(compiler, backtracks, partial);
5757
5758 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
5759 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5760
5761 if (common->mode != JIT_COMPILE)
5762 {
5763 nopartial = JUMP(SLJIT_JUMP);
5764 JUMPHERE(partial);
5765 /* TMP2 -= STR_END - STR_PTR */
5766 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, STR_PTR, 0);
5767 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, STR_END, 0);
5768 partial = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0);
5769 OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
5770 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
5771 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5772 JUMPHERE(partial);
5773 check_partial(common, FALSE);
5774 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5775 JUMPHERE(nopartial);
5776 }
5777 }
5778
5779 if (jump != NULL)
5780 {
5781 if (emptyfail)
5782 add_jump(compiler, backtracks, jump);
5783 else
5784 JUMPHERE(jump);
5785 }
5786 }
5787
5788 static SLJIT_INLINE pcre_uchar *compile_ref_iterator_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
5789 {
5790 DEFINE_COMPILER;
5791 BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
5792 backtrack_common *backtrack;
5793 pcre_uchar type;
5794 int offset = 0;
5795 struct sljit_label *label;
5796 struct sljit_jump *zerolength;
5797 struct sljit_jump *jump = NULL;
5798 pcre_uchar *ccbegin = cc;
5799 int min = 0, max = 0;
5800 BOOL minimize;
5801
5802 PUSH_BACKTRACK(sizeof(iterator_backtrack), cc, NULL);
5803
5804 if (ref)
5805 offset = GET2(cc, 1) << 1;
5806 else
5807 cc += IMM2_SIZE;
5808 type = cc[1 + IMM2_SIZE];
5809
5810 SLJIT_COMPILE_ASSERT((OP_CRSTAR & 0x1) == 0, crstar_opcode_must_be_even);
5811 minimize = (type & 0x1) != 0;
5812 switch(type)
5813 {
5814 case OP_CRSTAR:
5815 case OP_CRMINSTAR:
5816 min = 0;
5817 max = 0;
5818 cc += 1 + IMM2_SIZE + 1;
5819 break;
5820 case OP_CRPLUS:
5821 case OP_CRMINPLUS:
5822 min = 1;
5823 max = 0;
5824 cc += 1 + IMM2_SIZE + 1;
5825 break;
5826 case OP_CRQUERY:
5827 case OP_CRMINQUERY:
5828 min = 0;
5829 max = 1;
5830 cc += 1 + IMM2_SIZE + 1;
5831 break;
5832 case OP_CRRANGE:
5833 case OP_CRMINRANGE:
5834 min = GET2(cc, 1 + IMM2_SIZE + 1);
5835 max = GET2(cc, 1 + IMM2_SIZE + 1 + IMM2_SIZE);
5836 cc += 1 + IMM2_SIZE + 1 + 2 * IMM2_SIZE;
5837 break;
5838 default:
5839 SLJIT_ASSERT_STOP();
5840 break;
5841 }
5842
5843 if (!minimize)
5844 {
5845 if (min == 0)
5846 {
5847 allocate_stack(common, 2);
5848 if (ref)
5849 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
5850 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5851 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
5852 /* Temporary release of STR_PTR. */
5853 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5854 /* Handles both invalid and empty cases. Since the minimum repeat,
5855 is zero the invalid case is basically the same as an empty case. */
5856 if (ref)
5857 zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
5858 else
5859 {
5860 compile_dnref_search(common, ccbegin, NULL);
5861 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
5862 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, TMP2, 0);
5863 zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
5864 }
5865 /* Restore if not zero length. */
5866 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5867 }
5868 else
5869 {
5870 allocate_stack(common, 1);
5871 if (ref)
5872 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
5873 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5874 if (ref)
5875 {
5876 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
5877 zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
5878 }
5879 else
5880 {
5881 compile_dnref_search(common, ccbegin, &backtrack->topbacktracks);
5882 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
5883 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, TMP2, 0);
5884 zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
5885 }
5886 }
5887
5888 if (min > 1 || max > 1)
5889 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 0);
5890
5891 label = LABEL();
5892 if (!ref)
5893 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);
5894 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, FALSE, FALSE);
5895
5896 if (min > 1 || max > 1)
5897 {
5898 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
5899 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
5900 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0);
5901 if (min > 1)
5902 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, label);
5903 if (max > 1)
5904 {
5905 jump = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max);
5906 allocate_stack(common, 1);
5907 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5908 JUMPTO(SLJIT_JUMP, label);
5909 JUMPHERE(jump);
5910 }
5911 }
5912
5913 if (max == 0)
5914 {
5915 /* Includes min > 1 case as well. */
5916 allocate_stack(common, 1);
5917 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5918 JUMPTO(SLJIT_JUMP, label);
5919 }
5920
5921 JUMPHERE(zerolength);
5922 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
5923
5924 count_match(common);
5925 return cc;
5926 }
5927
5928 allocate_stack(common, ref ? 2 : 3);
5929 if (ref)
5930 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
5931 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5932 if (type != OP_CRMINSTAR)
5933 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
5934
5935 if (min == 0)
5936 {
5937 /* Handles both invalid and empty cases. Since the minimum repeat,
5938 is zero the invalid case is basically the same as an empty case. */
5939 if (ref)
5940 zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
5941 else
5942 {
5943 compile_dnref_search(common, ccbegin, NULL);
5944 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
5945 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
5946 zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
5947 }
5948 /* Length is non-zero, we can match real repeats. */
5949 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5950 jump = JUMP(SLJIT_JUMP);
5951 }
5952 else
5953 {
5954 if (ref)
5955 {
5956 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
5957 zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
5958 }
5959 else
5960 {
5961 compile_dnref_search(common, ccbegin, &backtrack->topbacktracks);
5962 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
5963 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
5964 zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
5965 }
5966 }
5967
5968 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
5969 if (max > 0)
5970 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max));
5971
5972 if (!ref)
5973 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
5974 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, TRUE, TRUE);
5975 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5976
5977 if (min > 1)
5978 {
5979 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5980 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
5981 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5982 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, BACKTRACK_AS(iterator_backtrack)->matchingpath);
5983 }
5984 else if (max > 0)
5985 OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
5986
5987 if (jump != NULL)
5988 JUMPHERE(jump);
5989 JUMPHERE(zerolength);
5990
5991 count_match(common);
5992 return cc;
5993 }
5994
5995 static SLJIT_INLINE pcre_uchar *compile_recurse_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
5996 {
5997 DEFINE_COMPILER;
5998 backtrack_common *backtrack;
5999 recurse_entry *entry = common->entries;
6000 recurse_entry *prev = NULL;
6001 sljit_sw start = GET(cc, 1);
6002 pcre_uchar *start_cc;
6003 BOOL needs_control_head;
6004
6005 PUSH_BACKTRACK(sizeof(recurse_backtrack), cc, NULL);
6006
6007 /* Inlining simple patterns. */
6008 if (get_framesize(common, common->start + start, NULL, TRUE, &needs_control_head) == no_stack)
6009 {
6010 start_cc = common->start + start;
6011 compile_matchingpath(common, next_opcode(common, start_cc), bracketend(start_cc) - (1 + LINK_SIZE), backtrack);
6012 BACKTRACK_AS(recurse_backtrack)->inlined_pattern = TRUE;
6013 return cc + 1 + LINK_SIZE;
6014 }
6015
6016 while (entry != NULL)
6017 {
6018 if (entry->start == start)
6019 break;
6020 prev = entry;
6021 entry = entry->next;
6022 }
6023
6024 if (entry == NULL)
6025 {
6026 entry = sljit_alloc_memory(compiler, sizeof(recurse_entry));
6027 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6028 return NULL;
6029 entry->next = NULL;
6030 entry->entry = NULL;
6031 entry->calls = NULL;
6032 entry->start = start;
6033
6034 if (prev != NULL)
6035 prev->next = entry;
6036 else
6037 common->entries = entry;
6038 }
6039
6040 if (common->has_set_som && common->mark_ptr != 0)
6041 {
6042 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
6043 allocate_stack(common, 2);
6044 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
6045 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
6046 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
6047 }
6048 else if (common->has_set_som || common->mark_ptr != 0)
6049 {
6050 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->has_set_som ? (int)(OVECTOR(0)) : common->mark_ptr);
6051 allocate_stack(common, 1);
6052 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
6053 }
6054
6055 if (entry->entry == NULL)
6056 add_jump(compiler, &entry->calls, JUMP(SLJIT_FAST_CALL));
6057 else
6058 JUMPTO(SLJIT_FAST_CALL, entry->entry);
6059 /* Leave if the match is failed. */
6060 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0));
6061 return cc + 1 + LINK_SIZE;
6062 }
6063
6064 static int SLJIT_CALL do_callout(struct jit_arguments* arguments, PUBL(callout_block) *callout_block, pcre_uchar **jit_ovector)
6065 {
6066 const pcre_uchar *begin = arguments->begin;
6067 int *offset_vector = arguments->offsets;
6068 int offset_count = arguments->offset_count;
6069 int i;
6070
6071 if (PUBL(callout) == NULL)
6072 return 0;
6073
6074 callout_block->version = 2;
6075 callout_block->callout_data = arguments->callout_data;
6076
6077 /* Offsets in subject. */
6078 callout_block->subject_length = arguments->end - arguments->begin;
6079 callout_block->start_match = (pcre_uchar*)callout_block->subject - arguments->begin;
6080 callout_block->current_position = (pcre_uchar*)callout_block->offset_vector - arguments->begin;
6081 #if defined COMPILE_PCRE8
6082 callout_block->subject = (PCRE_SPTR)begin;
6083 #elif defined COMPILE_PCRE16
6084 callout_block->subject = (PCRE_SPTR16)begin;
6085 #elif defined COMPILE_PCRE32
6086 callout_block->subject = (PCRE_SPTR32)begin;
6087 #endif
6088
6089 /* Convert and copy the JIT offset vector to the offset_vector array. */
6090 callout_block->capture_top = 0;
6091 callout_block->offset_vector = offset_vector;
6092 for (i = 2; i < offset_count; i += 2)
6093 {
6094 offset_vector[i] = jit_ovector[i] - begin;
6095 offset_vector[i + 1] = jit_ovector[i + 1] - begin;
6096 if (jit_ovector[i] >= begin)
6097 callout_block->capture_top = i;
6098 }
6099
6100 callout_block->capture_top = (callout_block->capture_top >> 1) + 1;
6101 if (offset_count > 0)
6102 offset_vector[0] = -1;
6103 if (offset_count > 1)
6104 offset_vector[1] = -1;
6105 return (*PUBL(callout))(callout_block);
6106 }
6107
6108 /* Aligning to 8 byte. */
6109 #define CALLOUT_ARG_SIZE \
6110 (((int)sizeof(PUBL(callout_block)) + 7) & ~7)
6111
6112 #define CALLOUT_ARG_OFFSET(arg) \
6113 (-CALLOUT_ARG_SIZE + SLJIT_OFFSETOF(PUBL(callout_block), arg))
6114
6115 static SLJIT_INLINE pcre_uchar *compile_callout_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6116 {
6117 DEFINE_COMPILER;
6118 backtrack_common *backtrack;
6119
6120 PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
6121
6122 allocate_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw));
6123
6124 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
6125 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6126 SLJIT_ASSERT(common->capture_last_ptr != 0);
6127 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_number), SLJIT_IMM, cc[1]);
6128 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_last), TMP2, 0);
6129
6130 /* These pointer sized fields temporarly stores internal variables. */
6131 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
6132 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(offset_vector), STR_PTR, 0);
6133 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(subject), TMP2, 0);
6134
6135 if (common->mark_ptr != 0)
6136 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr));
6137 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(pattern_position), SLJIT_IMM, GET(cc, 2));
6138 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(next_item_length), SLJIT_IMM, GET(cc, 2 + LINK_SIZE));
6139 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(mark), (common->mark_ptr != 0) ? TMP2 : SLJIT_IMM, 0);
6140
6141 /* Needed to save important temporary registers. */
6142 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
6143 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG2, 0, STACK_TOP, 0, SLJIT_IMM, CALLOUT_ARG_SIZE);
6144 GET_LOCAL_BASE(SLJIT_SCRATCH_REG3, 0, OVECTOR_START);
6145 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_callout));
6146 OP1(SLJIT_MOV_SI, SLJIT_RETURN_REG, 0, SLJIT_RETURN_REG, 0);
6147 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
6148 free_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw));
6149
6150 /* Check return value. */
6151 OP2(SLJIT_SUB | SLJIT_SET_S, SLJIT_UNUSED, 0, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
6152 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_C_SIG_GREATER));
6153 if (common->forced_quit_label == NULL)
6154 add_jump(compiler, &common->forced_quit, JUMP(SLJIT_C_SIG_LESS));
6155 else
6156 JUMPTO(SLJIT_C_SIG_LESS, common->forced_quit_label);
6157 return cc + 2 + 2 * LINK_SIZE;
6158 }
6159
6160 #undef CALLOUT_ARG_SIZE
6161 #undef CALLOUT_ARG_OFFSET
6162
6163 static pcre_uchar *compile_assert_matchingpath(compiler_common *common, pcre_uchar *cc, assert_backtrack *backtrack, BOOL conditional)
6164 {
6165 DEFINE_COMPILER;
6166 int framesize;
6167 int extrasize;
6168 BOOL needs_control_head;
6169 int private_data_ptr;
6170 backtrack_common altbacktrack;
6171 pcre_uchar *ccbegin;
6172 pcre_uchar opcode;
6173 pcre_uchar bra = OP_BRA;
6174 jump_list *tmp = NULL;
6175 jump_list **target = (conditional) ? &backtrack->condfailed : &backtrack->common.topbacktracks;
6176 jump_list **found;
6177 /* Saving previous accept variables. */
6178 BOOL save_local_exit = common->local_exit;
6179 BOOL save_positive_assert = common->positive_assert;
6180 then_trap_backtrack *save_then_trap = common->then_trap;
6181 struct sljit_label *save_quit_label = common->quit_label;
6182 struct sljit_label *save_accept_label = common->accept_label;
6183 jump_list *save_quit = common->quit;
6184 jump_list *save_positive_assert_quit = common->positive_assert_quit;
6185 jump_list *save_accept = common->accept;
6186 struct sljit_jump *jump;
6187 struct sljit_jump *brajump = NULL;
6188
6189 /* Assert captures then. */
6190 common->then_trap = NULL;
6191
6192 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
6193 {
6194 SLJIT_ASSERT(!conditional);
6195 bra = *cc;
6196 cc++;
6197 }
6198 private_data_ptr = PRIVATE_DATA(cc);
6199 SLJIT_ASSERT(private_data_ptr != 0);
6200 framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
6201 backtrack->framesize = framesize;
6202 backtrack->private_data_ptr = private_data_ptr;
6203 opcode = *cc;
6204 SLJIT_ASSERT(opcode >= OP_ASSERT && opcode <= OP_ASSERTBACK_NOT);
6205 found = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) ? &tmp : target;
6206 ccbegin = cc;
6207 cc += GET(cc, 1);
6208
6209 if (bra == OP_BRAMINZERO)
6210 {
6211 /* This is a braminzero backtrack path. */
6212 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6213 free_stack(common, 1);
6214 brajump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
6215 }
6216
6217 if (framesize < 0)
6218 {
6219 extrasize = needs_control_head ? 2 : 1;
6220 if (framesize == no_frame)
6221 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);
6222 allocate_stack(common, extrasize);
6223 if (needs_control_head)
6224 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
6225 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6226 if (needs_control_head)
6227 {
6228 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_IMM, 0);
6229 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
6230 }
6231 }
6232 else
6233 {
6234 extrasize = needs_control_head ? 3 : 2;
6235 allocate_stack(common, framesize + extrasize);
6236 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6237 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw));
6238 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0);
6239 if (needs_control_head)
6240 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
6241 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6242 if (needs_control_head)
6243 {
6244 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
6245 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
6246 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_IMM, 0);
6247 }
6248 else
6249 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
6250 init_frame(common, ccbegin, NULL, framesize + extrasize - 1, extrasize, FALSE);
6251 }
6252
6253 memset(&altbacktrack, 0, sizeof(backtrack_common));
6254 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6255 {
6256 /* Negative assert is stronger than positive assert. */
6257 common->local_exit = TRUE;
6258 common->quit_label = NULL;
6259 common->quit = NULL;
6260 common->positive_assert = FALSE;
6261 }
6262 else
6263 common->positive_assert = TRUE;
6264 common->positive_assert_quit = NULL;
6265
6266 while (1)
6267 {
6268 common->accept_label = NULL;
6269 common->accept = NULL;
6270 altbacktrack.top = NULL;
6271 altbacktrack.topbacktracks = NULL;
6272
6273 if (*ccbegin == OP_ALT)
6274 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6275
6276 altbacktrack.cc = ccbegin;
6277 compile_matchingpath(common, ccbegin + 1 + LINK_SIZE, cc, &altbacktrack);
6278 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6279 {
6280 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6281 {
6282 common->local_exit = save_local_exit;
6283 common->quit_label = save_quit_label;
6284 common->quit = save_quit;
6285 }
6286 common->positive_assert = save_positive_assert;
6287 common->then_trap = save_then_trap;
6288 common->accept_label = save_accept_label;
6289 common->positive_assert_quit = save_positive_assert_quit;
6290 common->accept = save_accept;
6291 return NULL;
6292 }
6293 common->accept_label = LABEL();
6294 if (common->accept != NULL)
6295 set_jumps(common->accept, common->accept_label);
6296
6297 /* Reset stack. */
6298 if (framesize < 0)
6299 {
6300 if (framesize == no_frame)
6301 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6302 else
6303 free_stack(common, extrasize);
6304 if (needs_control_head)
6305 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), 0);
6306 }
6307 else
6308 {
6309 if ((opcode != OP_ASSERT_NOT && opcode != OP_ASSERTBACK_NOT) || conditional)
6310 {
6311 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
6312 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
6313 if (needs_control_head)
6314 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), 0);
6315 }
6316 else
6317 {
6318 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6319 if (needs_control_head)
6320 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), (framesize + 1) * sizeof(sljit_sw));
6321 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6322 }
6323 }
6324
6325 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6326 {
6327 /* We know that STR_PTR was stored on the top of the stack. */
6328 if (conditional)
6329 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), needs_control_head ? sizeof(sljit_sw) : 0);
6330 else if (bra == OP_BRAZERO)
6331 {
6332 if (framesize < 0)
6333 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 1) * sizeof(sljit_sw));
6334 else
6335 {
6336 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
6337 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (framesize + extrasize - 1) * sizeof(sljit_sw));
6338 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
6339 }
6340 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
6341 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6342 }
6343 else if (framesize >= 0)
6344 {
6345 /* For OP_BRA and OP_BRAMINZERO. */
6346 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
6347 }
6348 }
6349 add_jump(compiler, found, JUMP(SLJIT_JUMP));
6350
6351 compile_backtrackingpath(common, altbacktrack.top);
6352 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6353 {
6354 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6355 {
6356 common->local_exit = save_local_exit;
6357 common->quit_label = save_quit_label;
6358 common->quit = save_quit;
6359 }
6360 common->positive_assert = save_positive_assert;
6361 common->then_trap = save_then_trap;
6362 common->accept_label = save_accept_label;
6363 common->positive_assert_quit = save_positive_assert_quit;
6364 common->accept = save_accept;
6365 return NULL;
6366 }
6367 set_jumps(altbacktrack.topbacktracks, LABEL());
6368
6369 if (*cc != OP_ALT)
6370 break;
6371
6372 ccbegin = cc;
6373 cc += GET(cc, 1);
6374 }
6375
6376 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6377 {
6378 SLJIT_ASSERT(common->positive_assert_quit == NULL);
6379 /* Makes the check less complicated below. */
6380 common->positive_assert_quit = common->quit;
6381 }
6382
6383 /* None of them matched. */
6384 if (common->positive_assert_quit != NULL)
6385 {
6386 jump = JUMP(SLJIT_JUMP);
6387 set_jumps(common->positive_assert_quit, LABEL());
6388 SLJIT_ASSERT(framesize != no_stack);
6389 if (framesize < 0)
6390 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, extrasize * sizeof(sljit_sw));
6391 else
6392 {
6393 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6394 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6395 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw));
6396 }
6397 JUMPHERE(jump);
6398 }
6399
6400 if (needs_control_head)
6401 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(1));
6402
6403 if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK)
6404 {
6405 /* Assert is failed. */
6406 if (conditional || bra == OP_BRAZERO)
6407 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6408
6409 if (framesize < 0)
6410 {
6411 /* The topmost item should be 0. */
6412 if (bra == OP_BRAZERO)
6413 {
6414 if (extrasize == 2)
6415 free_stack(common, 1);
6416 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6417 }
6418 else
6419 free_stack(common, extrasize);
6420 }
6421 else
6422 {
6423 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
6424 /* The topmost item should be 0. */
6425 if (bra == OP_BRAZERO)
6426 {
6427 free_stack(common, framesize + extrasize - 1);
6428 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6429 }
6430 else
6431 free_stack(common, framesize + extrasize);
6432 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
6433 }
6434 jump = JUMP(SLJIT_JUMP);
6435 if (bra != OP_BRAZERO)
6436 add_jump(compiler, target, jump);
6437
6438 /* Assert is successful. */
6439 set_jumps(tmp, LABEL());
6440 if (framesize < 0)
6441 {
6442 /* We know that STR_PTR was stored on the top of the stack. */
6443 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 1) * sizeof(sljit_sw));
6444 /* Keep the STR_PTR on the top of the stack. */
6445 if (bra == OP_BRAZERO)
6446 {
6447 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
6448 if (extrasize == 2)
6449 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6450 }
6451 else if (bra == OP_BRAMINZERO)
6452 {
6453 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
6454 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6455 }
6456 }
6457 else
6458 {
6459 if (bra == OP_BRA)
6460 {
6461 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
6462 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
6463 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 2) * sizeof(sljit_sw));
6464 }
6465 else
6466 {
6467 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
6468 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 2) * sizeof(sljit_sw));
6469 if (extrasize == 2)
6470 {
6471 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6472 if (bra == OP_BRAMINZERO)
6473 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6474 }
6475 else
6476 {
6477 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
6478 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), bra == OP_BRAZERO ? STR_PTR : SLJIT_IMM, 0);
6479 }
6480 }
6481 }
6482
6483 if (bra == OP_BRAZERO)
6484 {
6485 backtrack->matchingpath = LABEL();
6486 SET_LABEL(jump, backtrack->matchingpath);
6487 }
6488 else if (bra == OP_BRAMINZERO)
6489 {
6490 JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
6491 JUMPHERE(brajump);
6492 if (framesize >= 0)
6493 {
6494 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6495 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6496 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
6497 }
6498 set_jumps(backtrack->common.topbacktracks, LABEL());
6499 }
6500 }
6501 else
6502 {
6503 /* AssertNot is successful. */
6504 if (framesize < 0)
6505 {
6506 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6507 if (bra != OP_BRA)
6508 {
6509 if (extrasize == 2)
6510 free_stack(common, 1);
6511 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6512 }
6513 else
6514 free_stack(common, extrasize);
6515 }
6516 else
6517 {
6518 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6519 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
6520 /* The topmost item should be 0. */
6521 if (bra != OP_BRA)
6522 {
6523 free_stack(common, framesize + extrasize - 1);
6524 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6525 }
6526 else
6527 free_stack(common, framesize + extrasize);
6528 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
6529 }
6530
6531 if (bra == OP_BRAZERO)
6532 backtrack->matchingpath = LABEL();
6533 else if (bra == OP_BRAMINZERO)
6534 {
6535 JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
6536 JUMPHERE(brajump);
6537 }
6538
6539 if (bra != OP_BRA)
6540 {
6541 SLJIT_ASSERT(found == &backtrack->common.topbacktracks);
6542 set_jumps(backtrack->common.topbacktracks, LABEL());
6543 backtrack->common.topbacktracks = NULL;
6544 }
6545 }
6546
6547 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6548 {
6549 common->local_exit = save_local_exit;
6550 common->quit_label = save_quit_label;
6551 common->quit = save_quit;
6552 }
6553 common->positive_assert = save_positive_assert;
6554 common->then_trap = save_then_trap;
6555 common->accept_label = save_accept_label;
6556 common->positive_assert_quit = save_positive_assert_quit;
6557 common->accept = save_accept;
6558 return cc + 1 + LINK_SIZE;
6559 }
6560
6561 static SLJIT_INLINE void match_once_common(compiler_common *common, pcre_uchar ket, int framesize, int private_data_ptr, BOOL has_alternatives, BOOL needs_control_head)
6562 {
6563 DEFINE_COMPILER;
6564 int stacksize;
6565
6566 if (framesize < 0)
6567 {
6568 if (framesize == no_frame)
6569 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6570 else
6571 {
6572 stacksize = needs_control_head ? 1 : 0;
6573 if (ket != OP_KET || has_alternatives)
6574 stacksize++;
6575 free_stack(common, stacksize);
6576 }
6577
6578 if (needs_control_head)
6579 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), (ket != OP_KET || has_alternatives) ? sizeof(sljit_sw) : 0);
6580
6581 /* TMP2 which is set here used by OP_KETRMAX below. */
6582 if (ket == OP_KETRMAX)
6583 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), 0);
6584 else if (ket == OP_KETRMIN)
6585 {
6586 /* Move the STR_PTR to the private_data_ptr. */
6587 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), 0);
6588 }
6589 }
6590 else
6591 {
6592 stacksize = (ket != OP_KET || has_alternatives) ? 2 : 1;
6593 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + stacksize) * sizeof(sljit_sw));
6594 if (needs_control_head)
6595 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), 0);
6596
6597 if (ket == OP_KETRMAX)
6598 {
6599 /* TMP2 which is set here used by OP_KETRMAX below. */
6600 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6601 }
6602 }
6603 if (needs_control_head)
6604 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, TMP1, 0);
6605 }
6606
6607 static SLJIT_INLINE int match_capture_common(compiler_common *common, int stacksize, int offset, int private_data_ptr)
6608 {
6609 DEFINE_COMPILER;
6610
6611 if (common->capture_last_ptr != 0)
6612 {
6613 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
6614 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
6615 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
6616 stacksize++;
6617 }
6618 if (common->optimized_cbracket[offset >> 1] == 0)
6619 {
6620 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
6621 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
6622 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
6623 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6624 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
6625 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
6626 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
6627 stacksize += 2;
6628 }
6629 return stacksize;
6630 }
6631
6632 /*
6633 Handling bracketed expressions is probably the most complex part.
6634
6635 Stack layout naming characters:
6636 S - Push the current STR_PTR
6637 0 - Push a 0 (NULL)
6638 A - Push the current STR_PTR. Needed for restoring the STR_PTR
6639 before the next alternative. Not pushed if there are no alternatives.
6640 M - Any values pushed by the current alternative. Can be empty, or anything.
6641 C - Push the previous OVECTOR(i), OVECTOR(i+1) and OVECTOR_PRIV(i) to the stack.
6642 L - Push the previous local (pointed by localptr) to the stack
6643 () - opional values stored on the stack
6644 ()* - optonal, can be stored multiple times
6645
6646 The following list shows the regular expression templates, their PCRE byte codes
6647 and stack layout supported by pcre-sljit.
6648
6649 (?:) OP_BRA | OP_KET A M
6650 () OP_CBRA | OP_KET C M
6651 (?:)+ OP_BRA | OP_KETRMAX 0 A M S ( A M S )*
6652 OP_SBRA | OP_KETRMAX 0 L M S ( L M S )*
6653 (?:)+? OP_BRA | OP_KETRMIN 0 A M S ( A M S )*
6654 OP_SBRA | OP_KETRMIN 0 L M S ( L M S )*
6655 ()+ OP_CBRA | OP_KETRMAX 0 C M S ( C M S )*
6656 OP_SCBRA | OP_KETRMAX 0 C M S ( C M S )*
6657 ()+? OP_CBRA | OP_KETRMIN 0 C M S ( C M S )*
6658 OP_SCBRA | OP_KETRMIN 0 C M S ( C M S )*
6659 (?:)? OP_BRAZERO | OP_BRA | OP_KET S ( A M 0 )
6660 (?:)?? OP_BRAMINZERO | OP_BRA | OP_KET S ( A M 0 )
6661 ()? OP_BRAZERO | OP_CBRA | OP_KET S ( C M 0 )
6662 ()?? OP_BRAMINZERO | OP_CBRA | OP_KET S ( C M 0 )
6663 (?:)* OP_BRAZERO | OP_BRA | OP_KETRMAX S 0 ( A M S )*
6664 OP_BRAZERO | OP_SBRA | OP_KETRMAX S 0 ( L M S )*
6665 (?:)*? OP_BRAMINZERO | OP_BRA | OP_KETRMIN S 0 ( A M S )*
6666 OP_BRAMINZERO | OP_SBRA | OP_KETRMIN S 0 ( L M S )*
6667 ()* OP_BRAZERO | OP_CBRA | OP_KETRMAX S 0 ( C M S )*
6668 OP_BRAZERO | OP_SCBRA | OP_KETRMAX S 0 ( C M S )*
6669 ()*? OP_BRAMINZERO | OP_CBRA | OP_KETRMIN S 0 ( C M S )*
6670 OP_BRAMINZERO | OP_SCBRA | OP_KETRMIN S 0 ( C M S )*
6671
6672
6673 Stack layout naming characters:
6674 A - Push the alternative index (starting from 0) on the stack.
6675 Not pushed if there is no alternatives.
6676 M - Any values pushed by the current alternative. Can be empty, or anything.
6677
6678 The next list shows the possible content of a bracket:
6679 (|) OP_*BRA | OP_ALT ... M A
6680 (?()|) OP_*COND | OP_ALT M A
6681 (?>|) OP_ONCE | OP_ALT ... [stack trace] M A
6682 (?>|) OP_ONCE_NC | OP_ALT ... [stack trace] M A
6683 Or nothing, if trace is unnecessary
6684 */
6685
6686 static pcre_uchar *compile_bracket_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6687 {
6688 DEFINE_COMPILER;
6689 backtrack_common *backtrack;
6690 pcre_uchar opcode;
6691 int private_data_ptr = 0;
6692 int offset = 0;
6693 int i, stacksize;
6694 int repeat_ptr = 0, repeat_length = 0;
6695 int repeat_type = 0, repeat_count = 0;
6696 pcre_uchar *ccbegin;
6697 pcre_uchar *matchingpath;
6698 pcre_uchar *slot;
6699 pcre_uchar bra = OP_BRA;
6700 pcre_uchar ket;
6701 assert_backtrack *assert;
6702 BOOL has_alternatives;
6703 BOOL needs_control_head = FALSE;
6704 struct sljit_jump *jump;
6705 struct sljit_jump *skip;
6706 struct sljit_label *rmax_label = NULL;
6707 struct sljit_jump *braminzero = NULL;
6708
6709 PUSH_BACKTRACK(sizeof(bracket_backtrack), cc, NULL);
6710
6711 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
6712 {
6713 bra = *cc;
6714 cc++;
6715 opcode = *cc;
6716 }
6717
6718 opcode = *cc;
6719 ccbegin = cc;
6720 matchingpath = bracketend(cc) - 1 - LINK_SIZE;
6721 ket = *matchingpath;
6722 if (ket == OP_KET && PRIVATE_DATA(matchingpath) != 0)
6723 {
6724 repeat_ptr = PRIVATE_DATA(matchingpath);
6725 repeat_length = PRIVATE_DATA(matchingpath + 1);
6726 repeat_type = PRIVATE_DATA(matchingpath + 2);
6727 repeat_count = PRIVATE_DATA(matchingpath + 3);
6728 SLJIT_ASSERT(repeat_length != 0 && repeat_type != 0 && repeat_count != 0);
6729 if (repeat_type == OP_UPTO)
6730 ket = OP_KETRMAX;
6731 if (repeat_type == OP_MINUPTO)
6732 ket = OP_KETRMIN;
6733 }
6734
6735 if ((opcode == OP_COND || opcode == OP_SCOND) && cc[1 + LINK_SIZE] == OP_DEF)
6736 {
6737 /* Drop this bracket_backtrack. */
6738 parent->top = backtrack->prev;
6739 return matchingpath + 1 + LINK_SIZE + repeat_length;
6740 }
6741
6742 matchingpath = ccbegin + 1 + LINK_SIZE;
6743 SLJIT_ASSERT(ket == OP_KET || ket == OP_KETRMAX || ket == OP_KETRMIN);
6744