/[pcre]/code/trunk/pcre_jit_compile.c
ViewVC logotype

Contents of /code/trunk/pcre_jit_compile.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1434 - (show annotations)
Mon Jan 6 20:04:50 2014 UTC (5 years, 9 months ago) by zherczeg
File MIME type: text/plain
File size: 327860 byte(s)
Error occurred while calculating annotation data.
JIT: Optimize brackets with more than four alternatives.
1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Written by Philip Hazel
9 Copyright (c) 1997-2013 University of Cambridge
10
11 The machine code generator part (this module) was written by Zoltan Herczeg
12 Copyright (c) 2010-2013
13
14 -----------------------------------------------------------------------------
15 Redistribution and use in source and binary forms, with or without
16 modification, are permitted provided that the following conditions are met:
17
18 * Redistributions of source code must retain the above copyright notice,
19 this list of conditions and the following disclaimer.
20
21 * Redistributions in binary form must reproduce the above copyright
22 notice, this list of conditions and the following disclaimer in the
23 documentation and/or other materials provided with the distribution.
24
25 * Neither the name of the University of Cambridge nor the names of its
26 contributors may be used to endorse or promote products derived from
27 this software without specific prior written permission.
28
29 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 POSSIBILITY OF SUCH DAMAGE.
40 -----------------------------------------------------------------------------
41 */
42
43 #ifdef HAVE_CONFIG_H
44 #include "config.h"
45 #endif
46
47 #include "pcre_internal.h"
48
49 #if defined SUPPORT_JIT
50
51 /* All-in-one: Since we use the JIT compiler only from here,
52 we just include it. This way we don't need to touch the build
53 system files. */
54
55 #define SLJIT_MALLOC(size) (PUBL(malloc))(size)
56 #define SLJIT_FREE(ptr) (PUBL(free))(ptr)
57 #define SLJIT_CONFIG_AUTO 1
58 #define SLJIT_CONFIG_STATIC 1
59 #define SLJIT_VERBOSE 0
60 #define SLJIT_DEBUG 0
61
62 #include "sljit/sljitLir.c"
63
64 #if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED
65 #error Unsupported architecture
66 #endif
67
68 /* Defines for debugging purposes. */
69
70 /* 1 - Use unoptimized capturing brackets.
71 2 - Enable capture_last_ptr (includes option 1). */
72 /* #define DEBUG_FORCE_UNOPTIMIZED_CBRAS 2 */
73
74 /* 1 - Always have a control head. */
75 /* #define DEBUG_FORCE_CONTROL_HEAD 1 */
76
77 /* Allocate memory for the regex stack on the real machine stack.
78 Fast, but limited size. */
79 #define MACHINE_STACK_SIZE 32768
80
81 /* Growth rate for stack allocated by the OS. Should be the multiply
82 of page size. */
83 #define STACK_GROWTH_RATE 8192
84
85 /* Enable to check that the allocation could destroy temporaries. */
86 #if defined SLJIT_DEBUG && SLJIT_DEBUG
87 #define DESTROY_REGISTERS 1
88 #endif
89
90 /*
91 Short summary about the backtracking mechanism empolyed by the jit code generator:
92
93 The code generator follows the recursive nature of the PERL compatible regular
94 expressions. The basic blocks of regular expressions are condition checkers
95 whose execute different commands depending on the result of the condition check.
96 The relationship between the operators can be horizontal (concatenation) and
97 vertical (sub-expression) (See struct backtrack_common for more details).
98
99 'ab' - 'a' and 'b' regexps are concatenated
100 'a+' - 'a' is the sub-expression of the '+' operator
101
102 The condition checkers are boolean (true/false) checkers. Machine code is generated
103 for the checker itself and for the actions depending on the result of the checker.
104 The 'true' case is called as the matching path (expected path), and the other is called as
105 the 'backtrack' path. Branch instructions are expesive for all CPUs, so we avoid taken
106 branches on the matching path.
107
108 Greedy star operator (*) :
109 Matching path: match happens.
110 Backtrack path: match failed.
111 Non-greedy star operator (*?) :
112 Matching path: no need to perform a match.
113 Backtrack path: match is required.
114
115 The following example shows how the code generated for a capturing bracket
116 with two alternatives. Let A, B, C, D are arbirary regular expressions, and
117 we have the following regular expression:
118
119 A(B|C)D
120
121 The generated code will be the following:
122
123 A matching path
124 '(' matching path (pushing arguments to the stack)
125 B matching path
126 ')' matching path (pushing arguments to the stack)
127 D matching path
128 return with successful match
129
130 D backtrack path
131 ')' backtrack path (If we arrived from "C" jump to the backtrack of "C")
132 B backtrack path
133 C expected path
134 jump to D matching path
135 C backtrack path
136 A backtrack path
137
138 Notice, that the order of backtrack code paths are the opposite of the fast
139 code paths. In this way the topmost value on the stack is always belong
140 to the current backtrack code path. The backtrack path must check
141 whether there is a next alternative. If so, it needs to jump back to
142 the matching path eventually. Otherwise it needs to clear out its own stack
143 frame and continue the execution on the backtrack code paths.
144 */
145
146 /*
147 Saved stack frames:
148
149 Atomic blocks and asserts require reloading the values of private data
150 when the backtrack mechanism performed. Because of OP_RECURSE, the data
151 are not necessarly known in compile time, thus we need a dynamic restore
152 mechanism.
153
154 The stack frames are stored in a chain list, and have the following format:
155 ([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]
156
157 Thus we can restore the private data to a particular point in the stack.
158 */
159
160 typedef struct jit_arguments {
161 /* Pointers first. */
162 struct sljit_stack *stack;
163 const pcre_uchar *str;
164 const pcre_uchar *begin;
165 const pcre_uchar *end;
166 int *offsets;
167 pcre_uchar *uchar_ptr;
168 pcre_uchar *mark_ptr;
169 void *callout_data;
170 /* Everything else after. */
171 pcre_uint32 limit_match;
172 int real_offset_count;
173 int offset_count;
174 pcre_uint8 notbol;
175 pcre_uint8 noteol;
176 pcre_uint8 notempty;
177 pcre_uint8 notempty_atstart;
178 } jit_arguments;
179
180 typedef struct executable_functions {
181 void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES];
182 sljit_uw *read_only_data[JIT_NUMBER_OF_COMPILE_MODES];
183 sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];
184 PUBL(jit_callback) callback;
185 void *userdata;
186 pcre_uint32 top_bracket;
187 pcre_uint32 limit_match;
188 } executable_functions;
189
190 typedef struct jump_list {
191 struct sljit_jump *jump;
192 struct jump_list *next;
193 } jump_list;
194
195 typedef struct stub_list {
196 struct sljit_jump *start;
197 struct sljit_label *quit;
198 struct stub_list *next;
199 } stub_list;
200
201 typedef struct label_addr_list {
202 struct sljit_label *label;
203 sljit_uw *addr;
204 struct label_addr_list *next;
205 } label_addr_list;
206
207 enum frame_types {
208 no_frame = -1,
209 no_stack = -2
210 };
211
212 enum control_types {
213 type_mark = 0,
214 type_then_trap = 1
215 };
216
217 typedef int (SLJIT_CALL *jit_function)(jit_arguments *args);
218
219 /* The following structure is the key data type for the recursive
220 code generator. It is allocated by compile_matchingpath, and contains
221 the arguments for compile_backtrackingpath. Must be the first member
222 of its descendants. */
223 typedef struct backtrack_common {
224 /* Concatenation stack. */
225 struct backtrack_common *prev;
226 jump_list *nextbacktracks;
227 /* Internal stack (for component operators). */
228 struct backtrack_common *top;
229 jump_list *topbacktracks;
230 /* Opcode pointer. */
231 pcre_uchar *cc;
232 } backtrack_common;
233
234 typedef struct assert_backtrack {
235 backtrack_common common;
236 jump_list *condfailed;
237 /* Less than 0 if a frame is not needed. */
238 int framesize;
239 /* Points to our private memory word on the stack. */
240 int private_data_ptr;
241 /* For iterators. */
242 struct sljit_label *matchingpath;
243 } assert_backtrack;
244
245 typedef struct bracket_backtrack {
246 backtrack_common common;
247 /* Where to coninue if an alternative is successfully matched. */
248 struct sljit_label *alternative_matchingpath;
249 /* For rmin and rmax iterators. */
250 struct sljit_label *recursive_matchingpath;
251 /* For greedy ? operator. */
252 struct sljit_label *zero_matchingpath;
253 /* Contains the branches of a failed condition. */
254 union {
255 /* Both for OP_COND, OP_SCOND. */
256 jump_list *condfailed;
257 assert_backtrack *assert;
258 /* For OP_ONCE. Less than 0 if not needed. */
259 int framesize;
260 } u;
261 /* Points to our private memory word on the stack. */
262 int private_data_ptr;
263 } bracket_backtrack;
264
265 typedef struct bracketpos_backtrack {
266 backtrack_common common;
267 /* Points to our private memory word on the stack. */
268 int private_data_ptr;
269 /* Reverting stack is needed. */
270 int framesize;
271 /* Allocated stack size. */
272 int stacksize;
273 } bracketpos_backtrack;
274
275 typedef struct braminzero_backtrack {
276 backtrack_common common;
277 struct sljit_label *matchingpath;
278 } braminzero_backtrack;
279
280 typedef struct iterator_backtrack {
281 backtrack_common common;
282 /* Next iteration. */
283 struct sljit_label *matchingpath;
284 } iterator_backtrack;
285
286 typedef struct recurse_entry {
287 struct recurse_entry *next;
288 /* Contains the function entry. */
289 struct sljit_label *entry;
290 /* Collects the calls until the function is not created. */
291 jump_list *calls;
292 /* Points to the starting opcode. */
293 sljit_sw start;
294 } recurse_entry;
295
296 typedef struct recurse_backtrack {
297 backtrack_common common;
298 BOOL inlined_pattern;
299 } recurse_backtrack;
300
301 #define OP_THEN_TRAP OP_TABLE_LENGTH
302
303 typedef struct then_trap_backtrack {
304 backtrack_common common;
305 /* If then_trap is not NULL, this structure contains the real
306 then_trap for the backtracking path. */
307 struct then_trap_backtrack *then_trap;
308 /* Points to the starting opcode. */
309 sljit_sw start;
310 /* Exit point for the then opcodes of this alternative. */
311 jump_list *quit;
312 /* Frame size of the current alternative. */
313 int framesize;
314 } then_trap_backtrack;
315
316 #define MAX_RANGE_SIZE 4
317
318 typedef struct compiler_common {
319 /* The sljit ceneric compiler. */
320 struct sljit_compiler *compiler;
321 /* First byte code. */
322 pcre_uchar *start;
323 /* Maps private data offset to each opcode. */
324 sljit_si *private_data_ptrs;
325 /* This read-only data is available during runtime. */
326 sljit_uw *read_only_data;
327 /* The total size of the read-only data. */
328 sljit_uw read_only_data_size;
329 /* The next free entry of the read_only_data. */
330 sljit_uw *read_only_data_ptr;
331 /* Tells whether the capturing bracket is optimized. */
332 pcre_uint8 *optimized_cbracket;
333 /* Tells whether the starting offset is a target of then. */
334 pcre_uint8 *then_offsets;
335 /* Current position where a THEN must jump. */
336 then_trap_backtrack *then_trap;
337 /* Starting offset of private data for capturing brackets. */
338 int cbra_ptr;
339 /* Output vector starting point. Must be divisible by 2. */
340 int ovector_start;
341 /* Last known position of the requested byte. */
342 int req_char_ptr;
343 /* Head of the last recursion. */
344 int recursive_head_ptr;
345 /* First inspected character for partial matching. */
346 int start_used_ptr;
347 /* Starting pointer for partial soft matches. */
348 int hit_start;
349 /* End pointer of the first line. */
350 int first_line_end;
351 /* Points to the marked string. */
352 int mark_ptr;
353 /* Recursive control verb management chain. */
354 int control_head_ptr;
355 /* Points to the last matched capture block index. */
356 int capture_last_ptr;
357 /* Points to the starting position of the current match. */
358 int start_ptr;
359
360 /* Flipped and lower case tables. */
361 const pcre_uint8 *fcc;
362 sljit_sw lcc;
363 /* Mode can be PCRE_STUDY_JIT_COMPILE and others. */
364 int mode;
365 /* \K is found in the pattern. */
366 BOOL has_set_som;
367 /* (*SKIP:arg) is found in the pattern. */
368 BOOL has_skip_arg;
369 /* (*THEN) is found in the pattern. */
370 BOOL has_then;
371 /* Needs to know the start position anytime. */
372 BOOL needs_start_ptr;
373 /* Currently in recurse or negative assert. */
374 BOOL local_exit;
375 /* Currently in a positive assert. */
376 BOOL positive_assert;
377 /* Newline control. */
378 int nltype;
379 pcre_uint32 nlmax;
380 pcre_uint32 nlmin;
381 int newline;
382 int bsr_nltype;
383 pcre_uint32 bsr_nlmax;
384 pcre_uint32 bsr_nlmin;
385 /* Dollar endonly. */
386 int endonly;
387 /* Tables. */
388 sljit_sw ctypes;
389 /* Named capturing brackets. */
390 pcre_uchar *name_table;
391 sljit_sw name_count;
392 sljit_sw name_entry_size;
393
394 /* Labels and jump lists. */
395 struct sljit_label *partialmatchlabel;
396 struct sljit_label *quit_label;
397 struct sljit_label *forced_quit_label;
398 struct sljit_label *accept_label;
399 stub_list *stubs;
400 label_addr_list *label_addrs;
401 recurse_entry *entries;
402 recurse_entry *currententry;
403 jump_list *partialmatch;
404 jump_list *quit;
405 jump_list *positive_assert_quit;
406 jump_list *forced_quit;
407 jump_list *accept;
408 jump_list *calllimit;
409 jump_list *stackalloc;
410 jump_list *revertframes;
411 jump_list *wordboundary;
412 jump_list *anynewline;
413 jump_list *hspace;
414 jump_list *vspace;
415 jump_list *casefulcmp;
416 jump_list *caselesscmp;
417 jump_list *reset_match;
418 BOOL jscript_compat;
419 #ifdef SUPPORT_UTF
420 BOOL utf;
421 #ifdef SUPPORT_UCP
422 BOOL use_ucp;
423 #endif
424 #ifdef COMPILE_PCRE8
425 jump_list *utfreadchar;
426 jump_list *utfreadchar16;
427 jump_list *utfreadtype8;
428 #endif
429 #endif /* SUPPORT_UTF */
430 #ifdef SUPPORT_UCP
431 jump_list *getucd;
432 #endif
433 } compiler_common;
434
435 /* For byte_sequence_compare. */
436
437 typedef struct compare_context {
438 int length;
439 int sourcereg;
440 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
441 int ucharptr;
442 union {
443 sljit_si asint;
444 sljit_uh asushort;
445 #if defined COMPILE_PCRE8
446 sljit_ub asbyte;
447 sljit_ub asuchars[4];
448 #elif defined COMPILE_PCRE16
449 sljit_uh asuchars[2];
450 #elif defined COMPILE_PCRE32
451 sljit_ui asuchars[1];
452 #endif
453 } c;
454 union {
455 sljit_si asint;
456 sljit_uh asushort;
457 #if defined COMPILE_PCRE8
458 sljit_ub asbyte;
459 sljit_ub asuchars[4];
460 #elif defined COMPILE_PCRE16
461 sljit_uh asuchars[2];
462 #elif defined COMPILE_PCRE32
463 sljit_ui asuchars[1];
464 #endif
465 } oc;
466 #endif
467 } compare_context;
468
469 /* Undefine sljit macros. */
470 #undef CMP
471
472 /* Used for accessing the elements of the stack. */
473 #define STACK(i) ((-(i) - 1) * (int)sizeof(sljit_sw))
474
475 #define TMP1 SLJIT_SCRATCH_REG1
476 #define TMP2 SLJIT_SCRATCH_REG3
477 #define TMP3 SLJIT_TEMPORARY_EREG2
478 #define STR_PTR SLJIT_SAVED_REG1
479 #define STR_END SLJIT_SAVED_REG2
480 #define STACK_TOP SLJIT_SCRATCH_REG2
481 #define STACK_LIMIT SLJIT_SAVED_REG3
482 #define ARGUMENTS SLJIT_SAVED_EREG1
483 #define COUNT_MATCH SLJIT_SAVED_EREG2
484 #define RETURN_ADDR SLJIT_TEMPORARY_EREG1
485
486 /* Local space layout. */
487 /* These two locals can be used by the current opcode. */
488 #define LOCALS0 (0 * sizeof(sljit_sw))
489 #define LOCALS1 (1 * sizeof(sljit_sw))
490 /* Two local variables for possessive quantifiers (char1 cannot use them). */
491 #define POSSESSIVE0 (2 * sizeof(sljit_sw))
492 #define POSSESSIVE1 (3 * sizeof(sljit_sw))
493 /* Max limit of recursions. */
494 #define LIMIT_MATCH (4 * sizeof(sljit_sw))
495 /* The output vector is stored on the stack, and contains pointers
496 to characters. The vector data is divided into two groups: the first
497 group contains the start / end character pointers, and the second is
498 the start pointers when the end of the capturing group has not yet reached. */
499 #define OVECTOR_START (common->ovector_start)
500 #define OVECTOR(i) (OVECTOR_START + (i) * (sljit_sw)sizeof(sljit_sw))
501 #define OVECTOR_PRIV(i) (common->cbra_ptr + (i) * (sljit_sw)sizeof(sljit_sw))
502 #define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start])
503
504 #if defined COMPILE_PCRE8
505 #define MOV_UCHAR SLJIT_MOV_UB
506 #define MOVU_UCHAR SLJIT_MOVU_UB
507 #elif defined COMPILE_PCRE16
508 #define MOV_UCHAR SLJIT_MOV_UH
509 #define MOVU_UCHAR SLJIT_MOVU_UH
510 #elif defined COMPILE_PCRE32
511 #define MOV_UCHAR SLJIT_MOV_UI
512 #define MOVU_UCHAR SLJIT_MOVU_UI
513 #else
514 #error Unsupported compiling mode
515 #endif
516
517 /* Shortcuts. */
518 #define DEFINE_COMPILER \
519 struct sljit_compiler *compiler = common->compiler
520 #define OP1(op, dst, dstw, src, srcw) \
521 sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw))
522 #define OP2(op, dst, dstw, src1, src1w, src2, src2w) \
523 sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w))
524 #define LABEL() \
525 sljit_emit_label(compiler)
526 #define JUMP(type) \
527 sljit_emit_jump(compiler, (type))
528 #define JUMPTO(type, label) \
529 sljit_set_label(sljit_emit_jump(compiler, (type)), (label))
530 #define JUMPHERE(jump) \
531 sljit_set_label((jump), sljit_emit_label(compiler))
532 #define SET_LABEL(jump, label) \
533 sljit_set_label((jump), (label))
534 #define CMP(type, src1, src1w, src2, src2w) \
535 sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))
536 #define CMPTO(type, src1, src1w, src2, src2w, label) \
537 sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))
538 #define OP_FLAGS(op, dst, dstw, src, srcw, type) \
539 sljit_emit_op_flags(compiler, (op), (dst), (dstw), (src), (srcw), (type))
540 #define GET_LOCAL_BASE(dst, dstw, offset) \
541 sljit_get_local_base(compiler, (dst), (dstw), (offset))
542
543 #define READ_CHAR_MAX 0x7fffffff
544
545 static pcre_uchar* bracketend(pcre_uchar* cc)
546 {
547 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
548 do cc += GET(cc, 1); while (*cc == OP_ALT);
549 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
550 cc += 1 + LINK_SIZE;
551 return cc;
552 }
553
554 static int no_alternatives(pcre_uchar* cc)
555 {
556 int count = 0;
557 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
558 do
559 {
560 cc += GET(cc, 1);
561 count++;
562 }
563 while (*cc == OP_ALT);
564 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
565 return count;
566 }
567
568 static int ones_in_half_byte[16] = {
569 /* 0 */ 0, 1, 1, 2, /* 4 */ 1, 2, 2, 3,
570 /* 8 */ 1, 2, 2, 3, /* 12 */ 2, 3, 3, 4
571 };
572
573 /* Functions whose might need modification for all new supported opcodes:
574 next_opcode
575 check_opcode_types
576 set_private_data_ptrs
577 get_framesize
578 init_frame
579 get_private_data_copy_length
580 copy_private_data
581 compile_matchingpath
582 compile_backtrackingpath
583 */
584
585 static pcre_uchar *next_opcode(compiler_common *common, pcre_uchar *cc)
586 {
587 SLJIT_UNUSED_ARG(common);
588 switch(*cc)
589 {
590 case OP_SOD:
591 case OP_SOM:
592 case OP_SET_SOM:
593 case OP_NOT_WORD_BOUNDARY:
594 case OP_WORD_BOUNDARY:
595 case OP_NOT_DIGIT:
596 case OP_DIGIT:
597 case OP_NOT_WHITESPACE:
598 case OP_WHITESPACE:
599 case OP_NOT_WORDCHAR:
600 case OP_WORDCHAR:
601 case OP_ANY:
602 case OP_ALLANY:
603 case OP_NOTPROP:
604 case OP_PROP:
605 case OP_ANYNL:
606 case OP_NOT_HSPACE:
607 case OP_HSPACE:
608 case OP_NOT_VSPACE:
609 case OP_VSPACE:
610 case OP_EXTUNI:
611 case OP_EODN:
612 case OP_EOD:
613 case OP_CIRC:
614 case OP_CIRCM:
615 case OP_DOLL:
616 case OP_DOLLM:
617 case OP_CRSTAR:
618 case OP_CRMINSTAR:
619 case OP_CRPLUS:
620 case OP_CRMINPLUS:
621 case OP_CRQUERY:
622 case OP_CRMINQUERY:
623 case OP_CRRANGE:
624 case OP_CRMINRANGE:
625 case OP_CRPOSSTAR:
626 case OP_CRPOSPLUS:
627 case OP_CRPOSQUERY:
628 case OP_CRPOSRANGE:
629 case OP_CLASS:
630 case OP_NCLASS:
631 case OP_REF:
632 case OP_REFI:
633 case OP_DNREF:
634 case OP_DNREFI:
635 case OP_RECURSE:
636 case OP_CALLOUT:
637 case OP_ALT:
638 case OP_KET:
639 case OP_KETRMAX:
640 case OP_KETRMIN:
641 case OP_KETRPOS:
642 case OP_REVERSE:
643 case OP_ASSERT:
644 case OP_ASSERT_NOT:
645 case OP_ASSERTBACK:
646 case OP_ASSERTBACK_NOT:
647 case OP_ONCE:
648 case OP_ONCE_NC:
649 case OP_BRA:
650 case OP_BRAPOS:
651 case OP_CBRA:
652 case OP_CBRAPOS:
653 case OP_COND:
654 case OP_SBRA:
655 case OP_SBRAPOS:
656 case OP_SCBRA:
657 case OP_SCBRAPOS:
658 case OP_SCOND:
659 case OP_CREF:
660 case OP_DNCREF:
661 case OP_RREF:
662 case OP_DNRREF:
663 case OP_DEF:
664 case OP_BRAZERO:
665 case OP_BRAMINZERO:
666 case OP_BRAPOSZERO:
667 case OP_PRUNE:
668 case OP_SKIP:
669 case OP_THEN:
670 case OP_COMMIT:
671 case OP_FAIL:
672 case OP_ACCEPT:
673 case OP_ASSERT_ACCEPT:
674 case OP_CLOSE:
675 case OP_SKIPZERO:
676 return cc + PRIV(OP_lengths)[*cc];
677
678 case OP_CHAR:
679 case OP_CHARI:
680 case OP_NOT:
681 case OP_NOTI:
682 case OP_STAR:
683 case OP_MINSTAR:
684 case OP_PLUS:
685 case OP_MINPLUS:
686 case OP_QUERY:
687 case OP_MINQUERY:
688 case OP_UPTO:
689 case OP_MINUPTO:
690 case OP_EXACT:
691 case OP_POSSTAR:
692 case OP_POSPLUS:
693 case OP_POSQUERY:
694 case OP_POSUPTO:
695 case OP_STARI:
696 case OP_MINSTARI:
697 case OP_PLUSI:
698 case OP_MINPLUSI:
699 case OP_QUERYI:
700 case OP_MINQUERYI:
701 case OP_UPTOI:
702 case OP_MINUPTOI:
703 case OP_EXACTI:
704 case OP_POSSTARI:
705 case OP_POSPLUSI:
706 case OP_POSQUERYI:
707 case OP_POSUPTOI:
708 case OP_NOTSTAR:
709 case OP_NOTMINSTAR:
710 case OP_NOTPLUS:
711 case OP_NOTMINPLUS:
712 case OP_NOTQUERY:
713 case OP_NOTMINQUERY:
714 case OP_NOTUPTO:
715 case OP_NOTMINUPTO:
716 case OP_NOTEXACT:
717 case OP_NOTPOSSTAR:
718 case OP_NOTPOSPLUS:
719 case OP_NOTPOSQUERY:
720 case OP_NOTPOSUPTO:
721 case OP_NOTSTARI:
722 case OP_NOTMINSTARI:
723 case OP_NOTPLUSI:
724 case OP_NOTMINPLUSI:
725 case OP_NOTQUERYI:
726 case OP_NOTMINQUERYI:
727 case OP_NOTUPTOI:
728 case OP_NOTMINUPTOI:
729 case OP_NOTEXACTI:
730 case OP_NOTPOSSTARI:
731 case OP_NOTPOSPLUSI:
732 case OP_NOTPOSQUERYI:
733 case OP_NOTPOSUPTOI:
734 cc += PRIV(OP_lengths)[*cc];
735 #ifdef SUPPORT_UTF
736 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
737 #endif
738 return cc;
739
740 /* Special cases. */
741 case OP_TYPESTAR:
742 case OP_TYPEMINSTAR:
743 case OP_TYPEPLUS:
744 case OP_TYPEMINPLUS:
745 case OP_TYPEQUERY:
746 case OP_TYPEMINQUERY:
747 case OP_TYPEUPTO:
748 case OP_TYPEMINUPTO:
749 case OP_TYPEEXACT:
750 case OP_TYPEPOSSTAR:
751 case OP_TYPEPOSPLUS:
752 case OP_TYPEPOSQUERY:
753 case OP_TYPEPOSUPTO:
754 return cc + PRIV(OP_lengths)[*cc] - 1;
755
756 case OP_ANYBYTE:
757 #ifdef SUPPORT_UTF
758 if (common->utf) return NULL;
759 #endif
760 return cc + 1;
761
762 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
763 case OP_XCLASS:
764 return cc + GET(cc, 1);
765 #endif
766
767 case OP_MARK:
768 case OP_PRUNE_ARG:
769 case OP_SKIP_ARG:
770 case OP_THEN_ARG:
771 return cc + 1 + 2 + cc[1];
772
773 default:
774 /* All opcodes are supported now! */
775 SLJIT_ASSERT_STOP();
776 return NULL;
777 }
778 }
779
780 static BOOL check_opcode_types(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
781 {
782 int count;
783 pcre_uchar *slot;
784
785 /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
786 while (cc < ccend)
787 {
788 switch(*cc)
789 {
790 case OP_SET_SOM:
791 common->has_set_som = TRUE;
792 cc += 1;
793 break;
794
795 case OP_REF:
796 case OP_REFI:
797 common->optimized_cbracket[GET2(cc, 1)] = 0;
798 cc += 1 + IMM2_SIZE;
799 break;
800
801 case OP_BRA:
802 case OP_CBRA:
803 case OP_SBRA:
804 case OP_SCBRA:
805 count = no_alternatives(cc);
806 if (count > 4)
807 common->read_only_data_size += count * sizeof(sljit_uw);
808 cc += 1 + LINK_SIZE + (*cc == OP_CBRA || *cc == OP_SCBRA ? IMM2_SIZE : 0);
809 break;
810
811 case OP_CBRAPOS:
812 case OP_SCBRAPOS:
813 common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] = 0;
814 cc += 1 + LINK_SIZE + IMM2_SIZE;
815 break;
816
817 case OP_COND:
818 case OP_SCOND:
819 /* Only AUTO_CALLOUT can insert this opcode. We do
820 not intend to support this case. */
821 if (cc[1 + LINK_SIZE] == OP_CALLOUT)
822 return FALSE;
823 cc += 1 + LINK_SIZE;
824 break;
825
826 case OP_CREF:
827 common->optimized_cbracket[GET2(cc, 1)] = 0;
828 cc += 1 + IMM2_SIZE;
829 break;
830
831 case OP_DNREF:
832 case OP_DNREFI:
833 case OP_DNCREF:
834 count = GET2(cc, 1 + IMM2_SIZE);
835 slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
836 while (count-- > 0)
837 {
838 common->optimized_cbracket[GET2(slot, 0)] = 0;
839 slot += common->name_entry_size;
840 }
841 cc += 1 + 2 * IMM2_SIZE;
842 break;
843
844 case OP_RECURSE:
845 /* Set its value only once. */
846 if (common->recursive_head_ptr == 0)
847 {
848 common->recursive_head_ptr = common->ovector_start;
849 common->ovector_start += sizeof(sljit_sw);
850 }
851 cc += 1 + LINK_SIZE;
852 break;
853
854 case OP_CALLOUT:
855 if (common->capture_last_ptr == 0)
856 {
857 common->capture_last_ptr = common->ovector_start;
858 common->ovector_start += sizeof(sljit_sw);
859 }
860 cc += 2 + 2 * LINK_SIZE;
861 break;
862
863 case OP_THEN_ARG:
864 common->has_then = TRUE;
865 common->control_head_ptr = 1;
866 /* Fall through. */
867
868 case OP_PRUNE_ARG:
869 common->needs_start_ptr = TRUE;
870 /* Fall through. */
871
872 case OP_MARK:
873 if (common->mark_ptr == 0)
874 {
875 common->mark_ptr = common->ovector_start;
876 common->ovector_start += sizeof(sljit_sw);
877 }
878 cc += 1 + 2 + cc[1];
879 break;
880
881 case OP_THEN:
882 common->has_then = TRUE;
883 common->control_head_ptr = 1;
884 /* Fall through. */
885
886 case OP_PRUNE:
887 case OP_SKIP:
888 common->needs_start_ptr = TRUE;
889 cc += 1;
890 break;
891
892 case OP_SKIP_ARG:
893 common->control_head_ptr = 1;
894 common->has_skip_arg = TRUE;
895 cc += 1 + 2 + cc[1];
896 break;
897
898 default:
899 cc = next_opcode(common, cc);
900 if (cc == NULL)
901 return FALSE;
902 break;
903 }
904 }
905 return TRUE;
906 }
907
908 static int get_class_iterator_size(pcre_uchar *cc)
909 {
910 switch(*cc)
911 {
912 case OP_CRSTAR:
913 case OP_CRPLUS:
914 return 2;
915
916 case OP_CRMINSTAR:
917 case OP_CRMINPLUS:
918 case OP_CRQUERY:
919 case OP_CRMINQUERY:
920 return 1;
921
922 case OP_CRRANGE:
923 case OP_CRMINRANGE:
924 if (GET2(cc, 1) == GET2(cc, 1 + IMM2_SIZE))
925 return 0;
926 return 2;
927
928 default:
929 return 0;
930 }
931 }
932
933 static BOOL detect_repeat(compiler_common *common, pcre_uchar *begin)
934 {
935 pcre_uchar *end = bracketend(begin);
936 pcre_uchar *next;
937 pcre_uchar *next_end;
938 pcre_uchar *max_end;
939 pcre_uchar type;
940 sljit_sw length = end - begin;
941 int min, max, i;
942
943 /* Detect fixed iterations first. */
944 if (end[-(1 + LINK_SIZE)] != OP_KET)
945 return FALSE;
946
947 /* Already detected repeat. */
948 if (common->private_data_ptrs[end - common->start - LINK_SIZE] != 0)
949 return TRUE;
950
951 next = end;
952 min = 1;
953 while (1)
954 {
955 if (*next != *begin)
956 break;
957 next_end = bracketend(next);
958 if (next_end - next != length || memcmp(begin, next, IN_UCHARS(length)) != 0)
959 break;
960 next = next_end;
961 min++;
962 }
963
964 if (min == 2)
965 return FALSE;
966
967 max = 0;
968 max_end = next;
969 if (*next == OP_BRAZERO || *next == OP_BRAMINZERO)
970 {
971 type = *next;
972 while (1)
973 {
974 if (next[0] != type || next[1] != OP_BRA || next[2 + LINK_SIZE] != *begin)
975 break;
976 next_end = bracketend(next + 2 + LINK_SIZE);
977 if (next_end - next != (length + 2 + LINK_SIZE) || memcmp(begin, next + 2 + LINK_SIZE, IN_UCHARS(length)) != 0)
978 break;
979 next = next_end;
980 max++;
981 }
982
983 if (next[0] == type && next[1] == *begin && max >= 1)
984 {
985 next_end = bracketend(next + 1);
986 if (next_end - next == (length + 1) && memcmp(begin, next + 1, IN_UCHARS(length)) == 0)
987 {
988 for (i = 0; i < max; i++, next_end += 1 + LINK_SIZE)
989 if (*next_end != OP_KET)
990 break;
991
992 if (i == max)
993 {
994 common->private_data_ptrs[max_end - common->start - LINK_SIZE] = next_end - max_end;
995 common->private_data_ptrs[max_end - common->start - LINK_SIZE + 1] = (type == OP_BRAZERO) ? OP_UPTO : OP_MINUPTO;
996 /* +2 the original and the last. */
997 common->private_data_ptrs[max_end - common->start - LINK_SIZE + 2] = max + 2;
998 if (min == 1)
999 return TRUE;
1000 min--;
1001 max_end -= (1 + LINK_SIZE) + GET(max_end, -LINK_SIZE);
1002 }
1003 }
1004 }
1005 }
1006
1007 if (min >= 3)
1008 {
1009 common->private_data_ptrs[end - common->start - LINK_SIZE] = max_end - end;
1010 common->private_data_ptrs[end - common->start - LINK_SIZE + 1] = OP_EXACT;
1011 common->private_data_ptrs[end - common->start - LINK_SIZE + 2] = min;
1012 return TRUE;
1013 }
1014
1015 return FALSE;
1016 }
1017
1018 #define CASE_ITERATOR_PRIVATE_DATA_1 \
1019 case OP_MINSTAR: \
1020 case OP_MINPLUS: \
1021 case OP_QUERY: \
1022 case OP_MINQUERY: \
1023 case OP_MINSTARI: \
1024 case OP_MINPLUSI: \
1025 case OP_QUERYI: \
1026 case OP_MINQUERYI: \
1027 case OP_NOTMINSTAR: \
1028 case OP_NOTMINPLUS: \
1029 case OP_NOTQUERY: \
1030 case OP_NOTMINQUERY: \
1031 case OP_NOTMINSTARI: \
1032 case OP_NOTMINPLUSI: \
1033 case OP_NOTQUERYI: \
1034 case OP_NOTMINQUERYI:
1035
1036 #define CASE_ITERATOR_PRIVATE_DATA_2A \
1037 case OP_STAR: \
1038 case OP_PLUS: \
1039 case OP_STARI: \
1040 case OP_PLUSI: \
1041 case OP_NOTSTAR: \
1042 case OP_NOTPLUS: \
1043 case OP_NOTSTARI: \
1044 case OP_NOTPLUSI:
1045
1046 #define CASE_ITERATOR_PRIVATE_DATA_2B \
1047 case OP_UPTO: \
1048 case OP_MINUPTO: \
1049 case OP_UPTOI: \
1050 case OP_MINUPTOI: \
1051 case OP_NOTUPTO: \
1052 case OP_NOTMINUPTO: \
1053 case OP_NOTUPTOI: \
1054 case OP_NOTMINUPTOI:
1055
1056 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_1 \
1057 case OP_TYPEMINSTAR: \
1058 case OP_TYPEMINPLUS: \
1059 case OP_TYPEQUERY: \
1060 case OP_TYPEMINQUERY:
1061
1062 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2A \
1063 case OP_TYPESTAR: \
1064 case OP_TYPEPLUS:
1065
1066 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2B \
1067 case OP_TYPEUPTO: \
1068 case OP_TYPEMINUPTO:
1069
1070 static void set_private_data_ptrs(compiler_common *common, int *private_data_start, pcre_uchar *ccend)
1071 {
1072 pcre_uchar *cc = common->start;
1073 pcre_uchar *alternative;
1074 pcre_uchar *end = NULL;
1075 int private_data_ptr = *private_data_start;
1076 int space, size, bracketlen;
1077
1078 while (cc < ccend)
1079 {
1080 space = 0;
1081 size = 0;
1082 bracketlen = 0;
1083 if (private_data_ptr > SLJIT_MAX_LOCAL_SIZE)
1084 return;
1085
1086 if (*cc == OP_ONCE || *cc == OP_ONCE_NC || *cc == OP_BRA || *cc == OP_CBRA || *cc == OP_COND)
1087 if (detect_repeat(common, cc))
1088 {
1089 /* These brackets are converted to repeats, so no global
1090 based single character repeat is allowed. */
1091 if (cc >= end)
1092 end = bracketend(cc);
1093 }
1094
1095 switch(*cc)
1096 {
1097 case OP_KET:
1098 if (common->private_data_ptrs[cc + 1 - common->start] != 0)
1099 {
1100 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1101 private_data_ptr += sizeof(sljit_sw);
1102 cc += common->private_data_ptrs[cc + 1 - common->start];
1103 }
1104 cc += 1 + LINK_SIZE;
1105 break;
1106
1107 case OP_ASSERT:
1108 case OP_ASSERT_NOT:
1109 case OP_ASSERTBACK:
1110 case OP_ASSERTBACK_NOT:
1111 case OP_ONCE:
1112 case OP_ONCE_NC:
1113 case OP_BRAPOS:
1114 case OP_SBRA:
1115 case OP_SBRAPOS:
1116 case OP_SCOND:
1117 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1118 private_data_ptr += sizeof(sljit_sw);
1119 bracketlen = 1 + LINK_SIZE;
1120 break;
1121
1122 case OP_CBRAPOS:
1123 case OP_SCBRAPOS:
1124 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1125 private_data_ptr += sizeof(sljit_sw);
1126 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1127 break;
1128
1129 case OP_COND:
1130 /* Might be a hidden SCOND. */
1131 alternative = cc + GET(cc, 1);
1132 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1133 {
1134 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1135 private_data_ptr += sizeof(sljit_sw);
1136 }
1137 bracketlen = 1 + LINK_SIZE;
1138 break;
1139
1140 case OP_BRA:
1141 bracketlen = 1 + LINK_SIZE;
1142 break;
1143
1144 case OP_CBRA:
1145 case OP_SCBRA:
1146 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1147 break;
1148
1149 CASE_ITERATOR_PRIVATE_DATA_1
1150 space = 1;
1151 size = -2;
1152 break;
1153
1154 CASE_ITERATOR_PRIVATE_DATA_2A
1155 space = 2;
1156 size = -2;
1157 break;
1158
1159 CASE_ITERATOR_PRIVATE_DATA_2B
1160 space = 2;
1161 size = -(2 + IMM2_SIZE);
1162 break;
1163
1164 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1165 space = 1;
1166 size = 1;
1167 break;
1168
1169 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1170 if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
1171 space = 2;
1172 size = 1;
1173 break;
1174
1175 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1176 if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
1177 space = 2;
1178 size = 1 + IMM2_SIZE;
1179 break;
1180
1181 case OP_CLASS:
1182 case OP_NCLASS:
1183 size += 1 + 32 / sizeof(pcre_uchar);
1184 space = get_class_iterator_size(cc + size);
1185 break;
1186
1187 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1188 case OP_XCLASS:
1189 size = GET(cc, 1);
1190 space = get_class_iterator_size(cc + size);
1191 break;
1192 #endif
1193
1194 default:
1195 cc = next_opcode(common, cc);
1196 SLJIT_ASSERT(cc != NULL);
1197 break;
1198 }
1199
1200 /* Character iterators, which are not inside a repeated bracket,
1201 gets a private slot instead of allocating it on the stack. */
1202 if (space > 0 && cc >= end)
1203 {
1204 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1205 private_data_ptr += sizeof(sljit_sw) * space;
1206 }
1207
1208 if (size != 0)
1209 {
1210 if (size < 0)
1211 {
1212 cc += -size;
1213 #ifdef SUPPORT_UTF
1214 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1215 #endif
1216 }
1217 else
1218 cc += size;
1219 }
1220
1221 if (bracketlen > 0)
1222 {
1223 if (cc >= end)
1224 {
1225 end = bracketend(cc);
1226 if (end[-1 - LINK_SIZE] == OP_KET)
1227 end = NULL;
1228 }
1229 cc += bracketlen;
1230 }
1231 }
1232 *private_data_start = private_data_ptr;
1233 }
1234
1235 /* Returns with a frame_types (always < 0) if no need for frame. */
1236 static int get_framesize(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL recursive, BOOL* needs_control_head)
1237 {
1238 int length = 0;
1239 int possessive = 0;
1240 BOOL stack_restore = FALSE;
1241 BOOL setsom_found = recursive;
1242 BOOL setmark_found = recursive;
1243 /* The last capture is a local variable even for recursions. */
1244 BOOL capture_last_found = FALSE;
1245
1246 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
1247 SLJIT_ASSERT(common->control_head_ptr != 0);
1248 *needs_control_head = TRUE;
1249 #else
1250 *needs_control_head = FALSE;
1251 #endif
1252
1253 if (ccend == NULL)
1254 {
1255 ccend = bracketend(cc) - (1 + LINK_SIZE);
1256 if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))
1257 {
1258 possessive = length = (common->capture_last_ptr != 0) ? 5 : 3;
1259 /* This is correct regardless of common->capture_last_ptr. */
1260 capture_last_found = TRUE;
1261 }
1262 cc = next_opcode(common, cc);
1263 }
1264
1265 SLJIT_ASSERT(cc != NULL);
1266 while (cc < ccend)
1267 switch(*cc)
1268 {
1269 case OP_SET_SOM:
1270 SLJIT_ASSERT(common->has_set_som);
1271 stack_restore = TRUE;
1272 if (!setsom_found)
1273 {
1274 length += 2;
1275 setsom_found = TRUE;
1276 }
1277 cc += 1;
1278 break;
1279
1280 case OP_MARK:
1281 case OP_PRUNE_ARG:
1282 case OP_THEN_ARG:
1283 SLJIT_ASSERT(common->mark_ptr != 0);
1284 stack_restore = TRUE;
1285 if (!setmark_found)
1286 {
1287 length += 2;
1288 setmark_found = TRUE;
1289 }
1290 if (common->control_head_ptr != 0)
1291 *needs_control_head = TRUE;
1292 cc += 1 + 2 + cc[1];
1293 break;
1294
1295 case OP_RECURSE:
1296 stack_restore = TRUE;
1297 if (common->has_set_som && !setsom_found)
1298 {
1299 length += 2;
1300 setsom_found = TRUE;
1301 }
1302 if (common->mark_ptr != 0 && !setmark_found)
1303 {
1304 length += 2;
1305 setmark_found = TRUE;
1306 }
1307 if (common->capture_last_ptr != 0 && !capture_last_found)
1308 {
1309 length += 2;
1310 capture_last_found = TRUE;
1311 }
1312 cc += 1 + LINK_SIZE;
1313 break;
1314
1315 case OP_CBRA:
1316 case OP_CBRAPOS:
1317 case OP_SCBRA:
1318 case OP_SCBRAPOS:
1319 stack_restore = TRUE;
1320 if (common->capture_last_ptr != 0 && !capture_last_found)
1321 {
1322 length += 2;
1323 capture_last_found = TRUE;
1324 }
1325 length += 3;
1326 cc += 1 + LINK_SIZE + IMM2_SIZE;
1327 break;
1328
1329 default:
1330 stack_restore = TRUE;
1331 /* Fall through. */
1332
1333 case OP_NOT_WORD_BOUNDARY:
1334 case OP_WORD_BOUNDARY:
1335 case OP_NOT_DIGIT:
1336 case OP_DIGIT:
1337 case OP_NOT_WHITESPACE:
1338 case OP_WHITESPACE:
1339 case OP_NOT_WORDCHAR:
1340 case OP_WORDCHAR:
1341 case OP_ANY:
1342 case OP_ALLANY:
1343 case OP_ANYBYTE:
1344 case OP_NOTPROP:
1345 case OP_PROP:
1346 case OP_ANYNL:
1347 case OP_NOT_HSPACE:
1348 case OP_HSPACE:
1349 case OP_NOT_VSPACE:
1350 case OP_VSPACE:
1351 case OP_EXTUNI:
1352 case OP_EODN:
1353 case OP_EOD:
1354 case OP_CIRC:
1355 case OP_CIRCM:
1356 case OP_DOLL:
1357 case OP_DOLLM:
1358 case OP_CHAR:
1359 case OP_CHARI:
1360 case OP_NOT:
1361 case OP_NOTI:
1362
1363 case OP_EXACT:
1364 case OP_POSSTAR:
1365 case OP_POSPLUS:
1366 case OP_POSQUERY:
1367 case OP_POSUPTO:
1368
1369 case OP_EXACTI:
1370 case OP_POSSTARI:
1371 case OP_POSPLUSI:
1372 case OP_POSQUERYI:
1373 case OP_POSUPTOI:
1374
1375 case OP_NOTEXACT:
1376 case OP_NOTPOSSTAR:
1377 case OP_NOTPOSPLUS:
1378 case OP_NOTPOSQUERY:
1379 case OP_NOTPOSUPTO:
1380
1381 case OP_NOTEXACTI:
1382 case OP_NOTPOSSTARI:
1383 case OP_NOTPOSPLUSI:
1384 case OP_NOTPOSQUERYI:
1385 case OP_NOTPOSUPTOI:
1386
1387 case OP_TYPEEXACT:
1388 case OP_TYPEPOSSTAR:
1389 case OP_TYPEPOSPLUS:
1390 case OP_TYPEPOSQUERY:
1391 case OP_TYPEPOSUPTO:
1392
1393 case OP_CLASS:
1394 case OP_NCLASS:
1395 case OP_XCLASS:
1396
1397 cc = next_opcode(common, cc);
1398 SLJIT_ASSERT(cc != NULL);
1399 break;
1400 }
1401
1402 /* Possessive quantifiers can use a special case. */
1403 if (SLJIT_UNLIKELY(possessive == length))
1404 return stack_restore ? no_frame : no_stack;
1405
1406 if (length > 0)
1407 return length + 1;
1408 return stack_restore ? no_frame : no_stack;
1409 }
1410
1411 static void init_frame(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, int stackpos, int stacktop, BOOL recursive)
1412 {
1413 DEFINE_COMPILER;
1414 BOOL setsom_found = recursive;
1415 BOOL setmark_found = recursive;
1416 /* The last capture is a local variable even for recursions. */
1417 BOOL capture_last_found = FALSE;
1418 int offset;
1419
1420 /* >= 1 + shortest item size (2) */
1421 SLJIT_UNUSED_ARG(stacktop);
1422 SLJIT_ASSERT(stackpos >= stacktop + 2);
1423
1424 stackpos = STACK(stackpos);
1425 if (ccend == NULL)
1426 {
1427 ccend = bracketend(cc) - (1 + LINK_SIZE);
1428 if (recursive || (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS))
1429 cc = next_opcode(common, cc);
1430 }
1431
1432 SLJIT_ASSERT(cc != NULL);
1433 while (cc < ccend)
1434 switch(*cc)
1435 {
1436 case OP_SET_SOM:
1437 SLJIT_ASSERT(common->has_set_som);
1438 if (!setsom_found)
1439 {
1440 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
1441 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1442 stackpos += (int)sizeof(sljit_sw);
1443 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1444 stackpos += (int)sizeof(sljit_sw);
1445 setsom_found = TRUE;
1446 }
1447 cc += 1;
1448 break;
1449
1450 case OP_MARK:
1451 case OP_PRUNE_ARG:
1452 case OP_THEN_ARG:
1453 SLJIT_ASSERT(common->mark_ptr != 0);
1454 if (!setmark_found)
1455 {
1456 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
1457 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1458 stackpos += (int)sizeof(sljit_sw);
1459 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1460 stackpos += (int)sizeof(sljit_sw);
1461 setmark_found = TRUE;
1462 }
1463 cc += 1 + 2 + cc[1];
1464 break;
1465
1466 case OP_RECURSE:
1467 if (common->has_set_som && !setsom_found)
1468 {
1469 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
1470 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1471 stackpos += (int)sizeof(sljit_sw);
1472 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1473 stackpos += (int)sizeof(sljit_sw);
1474 setsom_found = TRUE;
1475 }
1476 if (common->mark_ptr != 0 && !setmark_found)
1477 {
1478 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
1479 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1480 stackpos += (int)sizeof(sljit_sw);
1481 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1482 stackpos += (int)sizeof(sljit_sw);
1483 setmark_found = TRUE;
1484 }
1485 if (common->capture_last_ptr != 0 && !capture_last_found)
1486 {
1487 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
1488 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1489 stackpos += (int)sizeof(sljit_sw);
1490 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1491 stackpos += (int)sizeof(sljit_sw);
1492 capture_last_found = TRUE;
1493 }
1494 cc += 1 + LINK_SIZE;
1495 break;
1496
1497 case OP_CBRA:
1498 case OP_CBRAPOS:
1499 case OP_SCBRA:
1500 case OP_SCBRAPOS:
1501 if (common->capture_last_ptr != 0 && !capture_last_found)
1502 {
1503 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
1504 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1505 stackpos += (int)sizeof(sljit_sw);
1506 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1507 stackpos += (int)sizeof(sljit_sw);
1508 capture_last_found = TRUE;
1509 }
1510 offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
1511 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));
1512 stackpos += (int)sizeof(sljit_sw);
1513 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
1514 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
1515 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1516 stackpos += (int)sizeof(sljit_sw);
1517 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
1518 stackpos += (int)sizeof(sljit_sw);
1519
1520 cc += 1 + LINK_SIZE + IMM2_SIZE;
1521 break;
1522
1523 default:
1524 cc = next_opcode(common, cc);
1525 SLJIT_ASSERT(cc != NULL);
1526 break;
1527 }
1528
1529 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, 0);
1530 SLJIT_ASSERT(stackpos == STACK(stacktop));
1531 }
1532
1533 static SLJIT_INLINE int get_private_data_copy_length(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL needs_control_head)
1534 {
1535 int private_data_length = needs_control_head ? 3 : 2;
1536 int size;
1537 pcre_uchar *alternative;
1538 /* Calculate the sum of the private machine words. */
1539 while (cc < ccend)
1540 {
1541 size = 0;
1542 switch(*cc)
1543 {
1544 case OP_KET:
1545 if (PRIVATE_DATA(cc) != 0)
1546 private_data_length++;
1547 cc += 1 + LINK_SIZE;
1548 break;
1549
1550 case OP_ASSERT:
1551 case OP_ASSERT_NOT:
1552 case OP_ASSERTBACK:
1553 case OP_ASSERTBACK_NOT:
1554 case OP_ONCE:
1555 case OP_ONCE_NC:
1556 case OP_BRAPOS:
1557 case OP_SBRA:
1558 case OP_SBRAPOS:
1559 case OP_SCOND:
1560 private_data_length++;
1561 cc += 1 + LINK_SIZE;
1562 break;
1563
1564 case OP_CBRA:
1565 case OP_SCBRA:
1566 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1567 private_data_length++;
1568 cc += 1 + LINK_SIZE + IMM2_SIZE;
1569 break;
1570
1571 case OP_CBRAPOS:
1572 case OP_SCBRAPOS:
1573 private_data_length += 2;
1574 cc += 1 + LINK_SIZE + IMM2_SIZE;
1575 break;
1576
1577 case OP_COND:
1578 /* Might be a hidden SCOND. */
1579 alternative = cc + GET(cc, 1);
1580 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1581 private_data_length++;
1582 cc += 1 + LINK_SIZE;
1583 break;
1584
1585 CASE_ITERATOR_PRIVATE_DATA_1
1586 if (PRIVATE_DATA(cc))
1587 private_data_length++;
1588 cc += 2;
1589 #ifdef SUPPORT_UTF
1590 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1591 #endif
1592 break;
1593
1594 CASE_ITERATOR_PRIVATE_DATA_2A
1595 if (PRIVATE_DATA(cc))
1596 private_data_length += 2;
1597 cc += 2;
1598 #ifdef SUPPORT_UTF
1599 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1600 #endif
1601 break;
1602
1603 CASE_ITERATOR_PRIVATE_DATA_2B
1604 if (PRIVATE_DATA(cc))
1605 private_data_length += 2;
1606 cc += 2 + IMM2_SIZE;
1607 #ifdef SUPPORT_UTF
1608 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1609 #endif
1610 break;
1611
1612 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1613 if (PRIVATE_DATA(cc))
1614 private_data_length++;
1615 cc += 1;
1616 break;
1617
1618 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1619 if (PRIVATE_DATA(cc))
1620 private_data_length += 2;
1621 cc += 1;
1622 break;
1623
1624 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1625 if (PRIVATE_DATA(cc))
1626 private_data_length += 2;
1627 cc += 1 + IMM2_SIZE;
1628 break;
1629
1630 case OP_CLASS:
1631 case OP_NCLASS:
1632 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1633 case OP_XCLASS:
1634 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1635 #else
1636 size = 1 + 32 / (int)sizeof(pcre_uchar);
1637 #endif
1638 if (PRIVATE_DATA(cc))
1639 private_data_length += get_class_iterator_size(cc + size);
1640 cc += size;
1641 break;
1642
1643 default:
1644 cc = next_opcode(common, cc);
1645 SLJIT_ASSERT(cc != NULL);
1646 break;
1647 }
1648 }
1649 SLJIT_ASSERT(cc == ccend);
1650 return private_data_length;
1651 }
1652
1653 static void copy_private_data(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend,
1654 BOOL save, int stackptr, int stacktop, BOOL needs_control_head)
1655 {
1656 DEFINE_COMPILER;
1657 int srcw[2];
1658 int count, size;
1659 BOOL tmp1next = TRUE;
1660 BOOL tmp1empty = TRUE;
1661 BOOL tmp2empty = TRUE;
1662 pcre_uchar *alternative;
1663 enum {
1664 start,
1665 loop,
1666 end
1667 } status;
1668
1669 status = save ? start : loop;
1670 stackptr = STACK(stackptr - 2);
1671 stacktop = STACK(stacktop - 1);
1672
1673 if (!save)
1674 {
1675 stackptr += (needs_control_head ? 2 : 1) * sizeof(sljit_sw);
1676 if (stackptr < stacktop)
1677 {
1678 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1679 stackptr += sizeof(sljit_sw);
1680 tmp1empty = FALSE;
1681 }
1682 if (stackptr < stacktop)
1683 {
1684 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1685 stackptr += sizeof(sljit_sw);
1686 tmp2empty = FALSE;
1687 }
1688 /* The tmp1next must be TRUE in either way. */
1689 }
1690
1691 do
1692 {
1693 count = 0;
1694 switch(status)
1695 {
1696 case start:
1697 SLJIT_ASSERT(save && common->recursive_head_ptr != 0);
1698 count = 1;
1699 srcw[0] = common->recursive_head_ptr;
1700 if (needs_control_head)
1701 {
1702 SLJIT_ASSERT(common->control_head_ptr != 0);
1703 count = 2;
1704 srcw[1] = common->control_head_ptr;
1705 }
1706 status = loop;
1707 break;
1708
1709 case loop:
1710 if (cc >= ccend)
1711 {
1712 status = end;
1713 break;
1714 }
1715
1716 switch(*cc)
1717 {
1718 case OP_KET:
1719 if (PRIVATE_DATA(cc) != 0)
1720 {
1721 count = 1;
1722 srcw[0] = PRIVATE_DATA(cc);
1723 }
1724 cc += 1 + LINK_SIZE;
1725 break;
1726
1727 case OP_ASSERT:
1728 case OP_ASSERT_NOT:
1729 case OP_ASSERTBACK:
1730 case OP_ASSERTBACK_NOT:
1731 case OP_ONCE:
1732 case OP_ONCE_NC:
1733 case OP_BRAPOS:
1734 case OP_SBRA:
1735 case OP_SBRAPOS:
1736 case OP_SCOND:
1737 count = 1;
1738 srcw[0] = PRIVATE_DATA(cc);
1739 SLJIT_ASSERT(srcw[0] != 0);
1740 cc += 1 + LINK_SIZE;
1741 break;
1742
1743 case OP_CBRA:
1744 case OP_SCBRA:
1745 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1746 {
1747 count = 1;
1748 srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1749 }
1750 cc += 1 + LINK_SIZE + IMM2_SIZE;
1751 break;
1752
1753 case OP_CBRAPOS:
1754 case OP_SCBRAPOS:
1755 count = 2;
1756 srcw[0] = PRIVATE_DATA(cc);
1757 srcw[1] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1758 SLJIT_ASSERT(srcw[0] != 0 && srcw[1] != 0);
1759 cc += 1 + LINK_SIZE + IMM2_SIZE;
1760 break;
1761
1762 case OP_COND:
1763 /* Might be a hidden SCOND. */
1764 alternative = cc + GET(cc, 1);
1765 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1766 {
1767 count = 1;
1768 srcw[0] = PRIVATE_DATA(cc);
1769 SLJIT_ASSERT(srcw[0] != 0);
1770 }
1771 cc += 1 + LINK_SIZE;
1772 break;
1773
1774 CASE_ITERATOR_PRIVATE_DATA_1
1775 if (PRIVATE_DATA(cc))
1776 {
1777 count = 1;
1778 srcw[0] = PRIVATE_DATA(cc);
1779 }
1780 cc += 2;
1781 #ifdef SUPPORT_UTF
1782 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1783 #endif
1784 break;
1785
1786 CASE_ITERATOR_PRIVATE_DATA_2A
1787 if (PRIVATE_DATA(cc))
1788 {
1789 count = 2;
1790 srcw[0] = PRIVATE_DATA(cc);
1791 srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
1792 }
1793 cc += 2;
1794 #ifdef SUPPORT_UTF
1795 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1796 #endif
1797 break;
1798
1799 CASE_ITERATOR_PRIVATE_DATA_2B
1800 if (PRIVATE_DATA(cc))
1801 {
1802 count = 2;
1803 srcw[0] = PRIVATE_DATA(cc);
1804 srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
1805 }
1806 cc += 2 + IMM2_SIZE;
1807 #ifdef SUPPORT_UTF
1808 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1809 #endif
1810 break;
1811
1812 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1813 if (PRIVATE_DATA(cc))
1814 {
1815 count = 1;
1816 srcw[0] = PRIVATE_DATA(cc);
1817 }
1818 cc += 1;
1819 break;
1820
1821 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1822 if (PRIVATE_DATA(cc))
1823 {
1824 count = 2;
1825 srcw[0] = PRIVATE_DATA(cc);
1826 srcw[1] = srcw[0] + sizeof(sljit_sw);
1827 }
1828 cc += 1;
1829 break;
1830
1831 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1832 if (PRIVATE_DATA(cc))
1833 {
1834 count = 2;
1835 srcw[0] = PRIVATE_DATA(cc);
1836 srcw[1] = srcw[0] + sizeof(sljit_sw);
1837 }
1838 cc += 1 + IMM2_SIZE;
1839 break;
1840
1841 case OP_CLASS:
1842 case OP_NCLASS:
1843 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1844 case OP_XCLASS:
1845 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1846 #else
1847 size = 1 + 32 / (int)sizeof(pcre_uchar);
1848 #endif
1849 if (PRIVATE_DATA(cc))
1850 switch(get_class_iterator_size(cc + size))
1851 {
1852 case 1:
1853 count = 1;
1854 srcw[0] = PRIVATE_DATA(cc);
1855 break;
1856
1857 case 2:
1858 count = 2;
1859 srcw[0] = PRIVATE_DATA(cc);
1860 srcw[1] = srcw[0] + sizeof(sljit_sw);
1861 break;
1862
1863 default:
1864 SLJIT_ASSERT_STOP();
1865 break;
1866 }
1867 cc += size;
1868 break;
1869
1870 default:
1871 cc = next_opcode(common, cc);
1872 SLJIT_ASSERT(cc != NULL);
1873 break;
1874 }
1875 break;
1876
1877 case end:
1878 SLJIT_ASSERT_STOP();
1879 break;
1880 }
1881
1882 while (count > 0)
1883 {
1884 count--;
1885 if (save)
1886 {
1887 if (tmp1next)
1888 {
1889 if (!tmp1empty)
1890 {
1891 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1892 stackptr += sizeof(sljit_sw);
1893 }
1894 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);
1895 tmp1empty = FALSE;
1896 tmp1next = FALSE;
1897 }
1898 else
1899 {
1900 if (!tmp2empty)
1901 {
1902 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1903 stackptr += sizeof(sljit_sw);
1904 }
1905 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);
1906 tmp2empty = FALSE;
1907 tmp1next = TRUE;
1908 }
1909 }
1910 else
1911 {
1912 if (tmp1next)
1913 {
1914 SLJIT_ASSERT(!tmp1empty);
1915 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP1, 0);
1916 tmp1empty = stackptr >= stacktop;
1917 if (!tmp1empty)
1918 {
1919 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1920 stackptr += sizeof(sljit_sw);
1921 }
1922 tmp1next = FALSE;
1923 }
1924 else
1925 {
1926 SLJIT_ASSERT(!tmp2empty);
1927 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP2, 0);
1928 tmp2empty = stackptr >= stacktop;
1929 if (!tmp2empty)
1930 {
1931 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1932 stackptr += sizeof(sljit_sw);
1933 }
1934 tmp1next = TRUE;
1935 }
1936 }
1937 }
1938 }
1939 while (status != end);
1940
1941 if (save)
1942 {
1943 if (tmp1next)
1944 {
1945 if (!tmp1empty)
1946 {
1947 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1948 stackptr += sizeof(sljit_sw);
1949 }
1950 if (!tmp2empty)
1951 {
1952 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1953 stackptr += sizeof(sljit_sw);
1954 }
1955 }
1956 else
1957 {
1958 if (!tmp2empty)
1959 {
1960 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1961 stackptr += sizeof(sljit_sw);
1962 }
1963 if (!tmp1empty)
1964 {
1965 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1966 stackptr += sizeof(sljit_sw);
1967 }
1968 }
1969 }
1970 SLJIT_ASSERT(cc == ccend && stackptr == stacktop && (save || (tmp1empty && tmp2empty)));
1971 }
1972
1973 static SLJIT_INLINE pcre_uchar *set_then_offsets(compiler_common *common, pcre_uchar *cc, pcre_uint8 *current_offset)
1974 {
1975 pcre_uchar *end = bracketend(cc);
1976 BOOL has_alternatives = cc[GET(cc, 1)] == OP_ALT;
1977
1978 /* Assert captures then. */
1979 if (*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT)
1980 current_offset = NULL;
1981 /* Conditional block does not. */
1982 if (*cc == OP_COND || *cc == OP_SCOND)
1983 has_alternatives = FALSE;
1984
1985 cc = next_opcode(common, cc);
1986 if (has_alternatives)
1987 current_offset = common->then_offsets + (cc - common->start);
1988
1989 while (cc < end)
1990 {
1991 if ((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND))
1992 cc = set_then_offsets(common, cc, current_offset);
1993 else
1994 {
1995 if (*cc == OP_ALT && has_alternatives)
1996 current_offset = common->then_offsets + (cc + 1 + LINK_SIZE - common->start);
1997 if (*cc >= OP_THEN && *cc <= OP_THEN_ARG && current_offset != NULL)
1998 *current_offset = 1;
1999 cc = next_opcode(common, cc);
2000 }
2001 }
2002
2003 return end;
2004 }
2005
2006 #undef CASE_ITERATOR_PRIVATE_DATA_1
2007 #undef CASE_ITERATOR_PRIVATE_DATA_2A
2008 #undef CASE_ITERATOR_PRIVATE_DATA_2B
2009 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_1
2010 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
2011 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
2012
2013 static SLJIT_INLINE BOOL is_powerof2(unsigned int value)
2014 {
2015 return (value & (value - 1)) == 0;
2016 }
2017
2018 static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label)
2019 {
2020 while (list)
2021 {
2022 /* sljit_set_label is clever enough to do nothing
2023 if either the jump or the label is NULL. */
2024 SET_LABEL(list->jump, label);
2025 list = list->next;
2026 }
2027 }
2028
2029 static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump* jump)
2030 {
2031 jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list));
2032 if (list_item)
2033 {
2034 list_item->next = *list;
2035 list_item->jump = jump;
2036 *list = list_item;
2037 }
2038 }
2039
2040 static void add_stub(compiler_common *common, struct sljit_jump *start)
2041 {
2042 DEFINE_COMPILER;
2043 stub_list* list_item = sljit_alloc_memory(compiler, sizeof(stub_list));
2044
2045 if (list_item)
2046 {
2047 list_item->start = start;
2048 list_item->quit = LABEL();
2049 list_item->next = common->stubs;
2050 common->stubs = list_item;
2051 }
2052 }
2053
2054 static void flush_stubs(compiler_common *common)
2055 {
2056 DEFINE_COMPILER;
2057 stub_list* list_item = common->stubs;
2058
2059 while (list_item)
2060 {
2061 JUMPHERE(list_item->start);
2062 add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));
2063 JUMPTO(SLJIT_JUMP, list_item->quit);
2064 list_item = list_item->next;
2065 }
2066 common->stubs = NULL;
2067 }
2068
2069 static void add_label_addr(compiler_common *common)
2070 {
2071 DEFINE_COMPILER;
2072 label_addr_list *label_addr;
2073
2074 label_addr = sljit_alloc_memory(compiler, sizeof(label_addr_list));
2075 if (label_addr == NULL)
2076 return;
2077 label_addr->label = LABEL();
2078 label_addr->addr = common->read_only_data_ptr;
2079 label_addr->next = common->label_addrs;
2080 common->label_addrs = label_addr;
2081 common->read_only_data_ptr++;
2082 }
2083
2084 static SLJIT_INLINE void count_match(compiler_common *common)
2085 {
2086 DEFINE_COMPILER;
2087
2088 OP2(SLJIT_SUB | SLJIT_SET_E, COUNT_MATCH, 0, COUNT_MATCH, 0, SLJIT_IMM, 1);
2089 add_jump(compiler, &common->calllimit, JUMP(SLJIT_C_ZERO));
2090 }
2091
2092 static SLJIT_INLINE void allocate_stack(compiler_common *common, int size)
2093 {
2094 /* May destroy all locals and registers except TMP2. */
2095 DEFINE_COMPILER;
2096
2097 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
2098 #ifdef DESTROY_REGISTERS
2099 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);
2100 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
2101 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
2102 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, TMP1, 0);
2103 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);
2104 #endif
2105 add_stub(common, CMP(SLJIT_C_GREATER, STACK_TOP, 0, STACK_LIMIT, 0));
2106 }
2107
2108 static SLJIT_INLINE void free_stack(compiler_common *common, int size)
2109 {
2110 DEFINE_COMPILER;
2111 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
2112 }
2113
2114 static SLJIT_INLINE void reset_ovector(compiler_common *common, int length)
2115 {
2116 DEFINE_COMPILER;
2117 struct sljit_label *loop;
2118 int i;
2119
2120 /* At this point we can freely use all temporary registers. */
2121 SLJIT_ASSERT(length > 1);
2122 /* TMP1 returns with begin - 1. */
2123 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG1, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));
2124 if (length < 8)
2125 {
2126 for (i = 1; i < length; i++)
2127 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(i), SLJIT_SCRATCH_REG1, 0);
2128 }
2129 else
2130 {
2131 GET_LOCAL_BASE(SLJIT_SCRATCH_REG2, 0, OVECTOR_START);
2132 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, length - 1);
2133 loop = LABEL();
2134 OP1(SLJIT_MOVU, SLJIT_MEM1(SLJIT_SCRATCH_REG2), sizeof(sljit_sw), SLJIT_SCRATCH_REG1, 0);
2135 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 1);
2136 JUMPTO(SLJIT_C_NOT_ZERO, loop);
2137 }
2138 }
2139
2140 static SLJIT_INLINE void do_reset_match(compiler_common *common, int length)
2141 {
2142 DEFINE_COMPILER;
2143 struct sljit_label *loop;
2144 int i;
2145
2146 SLJIT_ASSERT(length > 1);
2147 /* OVECTOR(1) contains the "string begin - 1" constant. */
2148 if (length > 2)
2149 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
2150 if (length < 8)
2151 {
2152 for (i = 2; i < length; i++)
2153 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(i), TMP1, 0);
2154 }
2155 else
2156 {
2157 GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + sizeof(sljit_sw));
2158 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
2159 loop = LABEL();
2160 OP1(SLJIT_MOVU, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
2161 OP2(SLJIT_SUB | SLJIT_SET_E, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
2162 JUMPTO(SLJIT_C_NOT_ZERO, loop);
2163 }
2164
2165 OP1(SLJIT_MOV, STACK_TOP, 0, ARGUMENTS, 0);
2166 if (common->mark_ptr != 0)
2167 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr, SLJIT_IMM, 0);
2168 if (common->control_head_ptr != 0)
2169 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_IMM, 0);
2170 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(jit_arguments, stack));
2171 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_ptr);
2172 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(struct sljit_stack, base));
2173 }
2174
2175 static sljit_sw SLJIT_CALL do_search_mark(sljit_sw *current, const pcre_uchar *skip_arg)
2176 {
2177 while (current != NULL)
2178 {
2179 switch (current[-2])
2180 {
2181 case type_then_trap:
2182 break;
2183
2184 case type_mark:
2185 if (STRCMP_UC_UC(skip_arg, (pcre_uchar *)current[-3]) == 0)
2186 return current[-4];
2187 break;
2188
2189 default:
2190 SLJIT_ASSERT_STOP();
2191 break;
2192 }
2193 current = (sljit_sw*)current[-1];
2194 }
2195 return -1;
2196 }
2197
2198 static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)
2199 {
2200 DEFINE_COMPILER;
2201 struct sljit_label *loop;
2202 struct sljit_jump *early_quit;
2203
2204 /* At this point we can freely use all registers. */
2205 OP1(SLJIT_MOV, SLJIT_SAVED_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
2206 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1), STR_PTR, 0);
2207
2208 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, ARGUMENTS, 0);
2209 if (common->mark_ptr != 0)
2210 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
2211 OP1(SLJIT_MOV_SI, SLJIT_SCRATCH_REG2, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, offset_count));
2212 if (common->mark_ptr != 0)
2213 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_SCRATCH_REG3, 0);
2214 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, offsets), SLJIT_IMM, sizeof(int));
2215 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, begin));
2216 GET_LOCAL_BASE(SLJIT_SAVED_REG1, 0, OVECTOR_START);
2217 /* Unlikely, but possible */
2218 early_quit = CMP(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, 0);
2219 loop = LABEL();
2220 OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), 0, SLJIT_SCRATCH_REG1, 0);
2221 OP2(SLJIT_ADD, SLJIT_SAVED_REG1, 0, SLJIT_SAVED_REG1, 0, SLJIT_IMM, sizeof(sljit_sw));
2222 /* Copy the integer value to the output buffer */
2223 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2224 OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, UCHAR_SHIFT);
2225 #endif
2226 OP1(SLJIT_MOVU_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG3), sizeof(int), SLJIT_SAVED_REG2, 0);
2227 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_SCRATCH_REG2, 0, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, 1);
2228 JUMPTO(SLJIT_C_NOT_ZERO, loop);
2229 JUMPHERE(early_quit);
2230
2231 /* Calculate the return value, which is the maximum ovector value. */
2232 if (topbracket > 1)
2233 {
2234 GET_LOCAL_BASE(SLJIT_SCRATCH_REG1, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_sw));
2235 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, topbracket + 1);
2236
2237 /* OVECTOR(0) is never equal to SLJIT_SAVED_REG3. */
2238 loop = LABEL();
2239 OP1(SLJIT_MOVU, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), -(2 * (sljit_sw)sizeof(sljit_sw)));
2240 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG2, 0, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, 1);
2241 CMPTO(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG3, 0, SLJIT_SAVED_REG3, 0, loop);
2242 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_SCRATCH_REG2, 0);
2243 }
2244 else
2245 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
2246 }
2247
2248 static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *quit)
2249 {
2250 DEFINE_COMPILER;
2251 struct sljit_jump *jump;
2252
2253 SLJIT_COMPILE_ASSERT(STR_END == SLJIT_SAVED_REG2, str_end_must_be_saved_reg2);
2254 SLJIT_ASSERT(common->start_used_ptr != 0 && common->start_ptr != 0
2255 && (common->mode == JIT_PARTIAL_SOFT_COMPILE ? common->hit_start != 0 : common->hit_start == 0));
2256
2257 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, ARGUMENTS, 0);
2258 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_PARTIAL);
2259 OP1(SLJIT_MOV_SI, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, real_offset_count));
2260 CMPTO(SLJIT_C_SIG_LESS, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 2, quit);
2261
2262 /* Store match begin and end. */
2263 OP1(SLJIT_MOV, SLJIT_SAVED_REG1, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, begin));
2264 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, offsets));
2265
2266 jump = CMP(SLJIT_C_SIG_LESS, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 3);
2267 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_ptr : (common->hit_start + (int)sizeof(sljit_sw)), SLJIT_SAVED_REG1, 0);
2268 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2269 OP2(SLJIT_ASHR, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, UCHAR_SHIFT);
2270 #endif
2271 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG2), 2 * sizeof(int), SLJIT_SCRATCH_REG3, 0);
2272 JUMPHERE(jump);
2273
2274 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_used_ptr : common->hit_start);
2275 OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, STR_END, 0, SLJIT_SAVED_REG1, 0);
2276 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2277 OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, UCHAR_SHIFT);
2278 #endif
2279 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG2), sizeof(int), SLJIT_SAVED_REG2, 0);
2280
2281 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_SAVED_REG1, 0);
2282 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2283 OP2(SLJIT_ASHR, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, UCHAR_SHIFT);
2284 #endif
2285 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG2), 0, SLJIT_SCRATCH_REG3, 0);
2286
2287 JUMPTO(SLJIT_JUMP, quit);
2288 }
2289
2290 static SLJIT_INLINE void check_start_used_ptr(compiler_common *common)
2291 {
2292 /* May destroy TMP1. */
2293 DEFINE_COMPILER;
2294 struct sljit_jump *jump;
2295
2296 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2297 {
2298 /* The value of -1 must be kept for start_used_ptr! */
2299 OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, 1);
2300 /* Jumps if start_used_ptr < STR_PTR, or start_used_ptr == -1. Although overwriting
2301 is not necessary if start_used_ptr == STR_PTR, it does not hurt as well. */
2302 jump = CMP(SLJIT_C_LESS_EQUAL, TMP1, 0, STR_PTR, 0);
2303 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2304 JUMPHERE(jump);
2305 }
2306 else if (common->mode == JIT_PARTIAL_HARD_COMPILE)
2307 {
2308 jump = CMP(SLJIT_C_LESS_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2309 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2310 JUMPHERE(jump);
2311 }
2312 }
2313
2314 static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, pcre_uchar* cc)
2315 {
2316 /* Detects if the character has an othercase. */
2317 unsigned int c;
2318
2319 #ifdef SUPPORT_UTF
2320 if (common->utf)
2321 {
2322 GETCHAR(c, cc);
2323 if (c > 127)
2324 {
2325 #ifdef SUPPORT_UCP
2326 return c != UCD_OTHERCASE(c);
2327 #else
2328 return FALSE;
2329 #endif
2330 }
2331 #ifndef COMPILE_PCRE8
2332 return common->fcc[c] != c;
2333 #endif
2334 }
2335 else
2336 #endif
2337 c = *cc;
2338 return MAX_255(c) ? common->fcc[c] != c : FALSE;
2339 }
2340
2341 static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)
2342 {
2343 /* Returns with the othercase. */
2344 #ifdef SUPPORT_UTF
2345 if (common->utf && c > 127)
2346 {
2347 #ifdef SUPPORT_UCP
2348 return UCD_OTHERCASE(c);
2349 #else
2350 return c;
2351 #endif
2352 }
2353 #endif
2354 return TABLE_GET(c, common->fcc, c);
2355 }
2356
2357 static unsigned int char_get_othercase_bit(compiler_common *common, pcre_uchar* cc)
2358 {
2359 /* Detects if the character and its othercase has only 1 bit difference. */
2360 unsigned int c, oc, bit;
2361 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2362 int n;
2363 #endif
2364
2365 #ifdef SUPPORT_UTF
2366 if (common->utf)
2367 {
2368 GETCHAR(c, cc);
2369 if (c <= 127)
2370 oc = common->fcc[c];
2371 else
2372 {
2373 #ifdef SUPPORT_UCP
2374 oc = UCD_OTHERCASE(c);
2375 #else
2376 oc = c;
2377 #endif
2378 }
2379 }
2380 else
2381 {
2382 c = *cc;
2383 oc = TABLE_GET(c, common->fcc, c);
2384 }
2385 #else
2386 c = *cc;
2387 oc = TABLE_GET(c, common->fcc, c);
2388 #endif
2389
2390 SLJIT_ASSERT(c != oc);
2391
2392 bit = c ^ oc;
2393 /* Optimized for English alphabet. */
2394 if (c <= 127 && bit == 0x20)
2395 return (0 << 8) | 0x20;
2396
2397 /* Since c != oc, they must have at least 1 bit difference. */
2398 if (!is_powerof2(bit))
2399 return 0;
2400
2401 #if defined COMPILE_PCRE8
2402
2403 #ifdef SUPPORT_UTF
2404 if (common->utf && c > 127)
2405 {
2406 n = GET_EXTRALEN(*cc);
2407 while ((bit & 0x3f) == 0)
2408 {
2409 n--;
2410 bit >>= 6;
2411 }
2412 return (n << 8) | bit;
2413 }
2414 #endif /* SUPPORT_UTF */
2415 return (0 << 8) | bit;
2416
2417 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2418
2419 #ifdef SUPPORT_UTF
2420 if (common->utf && c > 65535)
2421 {
2422 if (bit >= (1 << 10))
2423 bit >>= 10;
2424 else
2425 return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8));
2426 }
2427 #endif /* SUPPORT_UTF */
2428 return (bit < 256) ? ((0 << 8) | bit) : ((1 << 8) | (bit >> 8));
2429
2430 #endif /* COMPILE_PCRE[8|16|32] */
2431 }
2432
2433 static void check_partial(compiler_common *common, BOOL force)
2434 {
2435 /* Checks whether a partial matching is occurred. Does not modify registers. */
2436 DEFINE_COMPILER;
2437 struct sljit_jump *jump = NULL;
2438
2439 SLJIT_ASSERT(!force || common->mode != JIT_COMPILE);
2440
2441 if (common->mode == JIT_COMPILE)
2442 return;
2443
2444 if (!force)
2445 jump = CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2446 else if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2447 jump = CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, -1);
2448
2449 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2450 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, 0);
2451 else
2452 {
2453 if (common->partialmatchlabel != NULL)
2454 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2455 else
2456 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2457 }
2458
2459 if (jump != NULL)
2460 JUMPHERE(jump);
2461 }
2462
2463 static void check_str_end(compiler_common *common, jump_list **end_reached)
2464 {
2465 /* Does not affect registers. Usually used in a tight spot. */
2466 DEFINE_COMPILER;
2467 struct sljit_jump *jump;
2468
2469 if (common->mode == JIT_COMPILE)
2470 {
2471 add_jump(compiler, end_reached, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2472 return;
2473 }
2474
2475 jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
2476 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2477 {
2478 add_jump(compiler, end_reached, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0));
2479 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, 0);
2480 add_jump(compiler, end_reached, JUMP(SLJIT_JUMP));
2481 }
2482 else
2483 {
2484 add_jump(compiler, end_reached, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0));
2485 if (common->partialmatchlabel != NULL)
2486 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2487 else
2488 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2489 }
2490 JUMPHERE(jump);
2491 }
2492
2493 static void detect_partial_match(compiler_common *common, jump_list **backtracks)
2494 {
2495 DEFINE_COMPILER;
2496 struct sljit_jump *jump;
2497
2498 if (common->mode == JIT_COMPILE)
2499 {
2500 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2501 return;
2502 }
2503
2504 /* Partial matching mode. */
2505 jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
2506 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0));
2507 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2508 {
2509 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, 0);
2510 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
2511 }
2512 else
2513 {
2514 if (common->partialmatchlabel != NULL)
2515 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2516 else
2517 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2518 }
2519 JUMPHERE(jump);
2520 }
2521
2522 static void peek_char(compiler_common *common, pcre_uint32 max)
2523 {
2524 /* Reads the character into TMP1, keeps STR_PTR.
2525 Does not check STR_END. TMP2 Destroyed. */
2526 DEFINE_COMPILER;
2527 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2528 struct sljit_jump *jump;
2529 #endif
2530
2531 SLJIT_UNUSED_ARG(max);
2532
2533 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2534 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2535 if (common->utf)
2536 {
2537 if (max < 128) return;
2538
2539 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2540 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2541 add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2542 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2543 JUMPHERE(jump);
2544 }
2545 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2546
2547 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2548 if (common->utf)
2549 {
2550 if (max < 0xd800) return;
2551
2552 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2553 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2554 /* TMP2 contains the high surrogate. */
2555 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2556 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x40);
2557 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
2558 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
2559 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2560 JUMPHERE(jump);
2561 }
2562 #endif
2563 }
2564
2565 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2566
2567 static BOOL is_char7_bitset(const pcre_uint8* bitset, BOOL nclass)
2568 {
2569 /* Tells whether the character codes below 128 are enough
2570 to determine a match. */
2571 const pcre_uint8 value = nclass ? 0xff : 0;
2572 const pcre_uint8* end = bitset + 32;
2573
2574 bitset += 16;
2575 do
2576 {
2577 if (*bitset++ != value)
2578 return FALSE;
2579 }
2580 while (bitset < end);
2581 return TRUE;
2582 }
2583
2584 static void read_char7_type(compiler_common *common, BOOL full_read)
2585 {
2586 /* Reads the precise character type of a character into TMP1, if the character
2587 is less than 128. Otherwise it returns with zero. Does not check STR_END. The
2588 full_read argument tells whether characters above max are accepted or not. */
2589 DEFINE_COMPILER;
2590 struct sljit_jump *jump;
2591
2592 SLJIT_ASSERT(common->utf);
2593
2594 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2595 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2596
2597 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2598
2599 if (full_read)
2600 {
2601 jump = CMP(SLJIT_C_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
2602 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2603 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2604 JUMPHERE(jump);
2605 }
2606 }
2607
2608 #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
2609
2610 static void read_char_range(compiler_common *common, pcre_uint32 min, pcre_uint32 max, BOOL update_str_ptr)
2611 {
2612 /* Reads the precise value of a character into TMP1, if the character is
2613 between min and max (c >= min && c <= max). Otherwise it returns with a value
2614 outside the range. Does not check STR_END. */
2615 DEFINE_COMPILER;
2616 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2617 struct sljit_jump *jump;
2618 #endif
2619 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2620 struct sljit_jump *jump2;
2621 #endif
2622
2623 SLJIT_UNUSED_ARG(update_str_ptr);
2624 SLJIT_UNUSED_ARG(min);
2625 SLJIT_UNUSED_ARG(max);
2626 SLJIT_ASSERT(min <= max);
2627
2628 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2629 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2630
2631 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2632 if (common->utf)
2633 {
2634 if (max < 128 && !update_str_ptr) return;
2635
2636 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2637 if (min >= 0x10000)
2638 {
2639 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xf0);
2640 if (update_str_ptr)
2641 OP1(SLJIT_MOV_UB, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2642 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2643 jump2 = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0x7);
2644 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2645 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2646 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2647 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2648 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2649 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2650 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2651 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2652 if (!update_str_ptr)
2653 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
2654 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2655 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2656 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2657 JUMPHERE(jump2);
2658 if (update_str_ptr)
2659 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
2660 }
2661 else if (min >= 0x800 && max <= 0xffff)
2662 {
2663 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xe0);
2664 if (update_str_ptr)
2665 OP1(SLJIT_MOV_UB, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2666 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2667 jump2 = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0xf);
2668 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2669 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2670 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2671 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2672 if (!update_str_ptr)
2673 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
2674 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2675 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2676 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2677 JUMPHERE(jump2);
2678 if (update_str_ptr)
2679 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
2680 }
2681 else if (max >= 0x800)
2682 add_jump(compiler, (max < 0x10000) ? &common->utfreadchar16 : &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2683 else if (max < 128)
2684 {
2685 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2686 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2687 }
2688 else
2689 {
2690 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2691 if (!update_str_ptr)
2692 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2693 else
2694 OP1(SLJIT_MOV_UB, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2695 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2696 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2697 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2698 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2699 if (update_str_ptr)
2700 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
2701 }
2702 JUMPHERE(jump);
2703 }
2704 #endif
2705
2706 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2707 if (common->utf)
2708 {
2709 if (max >= 0x10000)
2710 {
2711 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2712 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2713 /* TMP2 contains the high surrogate. */
2714 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2715 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x40);
2716 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
2717 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2718 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
2719 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2720 JUMPHERE(jump);
2721 return;
2722 }
2723
2724 if (max < 0xd800 && !update_str_ptr) return;
2725
2726 /* Skip low surrogate if necessary. */
2727 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2728 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2729 if (update_str_ptr)
2730 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2731 if (max >= 0xd800)
2732 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x10000);
2733 JUMPHERE(jump);
2734 }
2735 #endif
2736 }
2737
2738 static SLJIT_INLINE void read_char(compiler_common *common)
2739 {
2740 read_char_range(common, 0, READ_CHAR_MAX, TRUE);
2741 }
2742
2743 static void read_char8_type(compiler_common *common, BOOL update_str_ptr)
2744 {
2745 /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */
2746 DEFINE_COMPILER;
2747 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
2748 struct sljit_jump *jump;
2749 #endif
2750 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2751 struct sljit_jump *jump2;
2752 #endif
2753
2754 SLJIT_UNUSED_ARG(update_str_ptr);
2755
2756 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2757 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2758
2759 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2760 if (common->utf)
2761 {
2762 /* This can be an extra read in some situations, but hopefully
2763 it is needed in most cases. */
2764 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2765 jump = CMP(SLJIT_C_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
2766 if (!update_str_ptr)
2767 {
2768 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2769 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2770 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2771 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2772 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2773 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
2774 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2775 jump2 = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2776 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2777 JUMPHERE(jump2);
2778 }
2779 else
2780 add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));
2781 JUMPHERE(jump);
2782 return;
2783 }
2784 #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
2785
2786 #if !defined COMPILE_PCRE8
2787 /* The ctypes array contains only 256 values. */
2788 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2789 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2790 #endif
2791 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2792 #if !defined COMPILE_PCRE8
2793 JUMPHERE(jump);
2794 #endif
2795
2796 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2797 if (common->utf && update_str_ptr)
2798 {
2799 /* Skip low surrogate if necessary. */
2800 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);
2801 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2802 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2803 JUMPHERE(jump);
2804 }
2805 #endif /* SUPPORT_UTF && COMPILE_PCRE16 */
2806 }
2807
2808 static void skip_char_back(compiler_common *common)
2809 {
2810 /* Goes one character back. Affects STR_PTR and TMP1. Does not check begin. */
2811 DEFINE_COMPILER;
2812 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2813 #if defined COMPILE_PCRE8
2814 struct sljit_label *label;
2815
2816 if (common->utf)
2817 {
2818 label = LABEL();
2819 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2820 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2821 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
2822 CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);
2823 return;
2824 }
2825 #elif defined COMPILE_PCRE16
2826 if (common->utf)
2827 {
2828 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2829 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2830 /* Skip low surrogate if necessary. */
2831 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2832 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
2833 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2834 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2835 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2836 return;
2837 }
2838 #endif /* COMPILE_PCRE[8|16] */
2839 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2840 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2841 }
2842
2843 static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpifmatch)
2844 {
2845 /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */
2846 DEFINE_COMPILER;
2847 struct sljit_jump *jump;
2848
2849 if (nltype == NLTYPE_ANY)
2850 {
2851 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
2852 add_jump(compiler, backtracks, JUMP(jumpifmatch ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
2853 }
2854 else if (nltype == NLTYPE_ANYCRLF)
2855 {
2856 if (jumpifmatch)
2857 {
2858 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR));
2859 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
2860 }
2861 else
2862 {
2863 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
2864 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
2865 JUMPHERE(jump);
2866 }
2867 }
2868 else
2869 {
2870 SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);
2871 add_jump(compiler, backtracks, CMP(jumpifmatch ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
2872 }
2873 }
2874
2875 #ifdef SUPPORT_UTF
2876
2877 #if defined COMPILE_PCRE8
2878 static void do_utfreadchar(compiler_common *common)
2879 {
2880 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
2881 of the character (>= 0xc0). Return char value in TMP1, length in TMP2. */
2882 DEFINE_COMPILER;
2883 struct sljit_jump *jump;
2884
2885 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2886 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2887 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2888 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2889 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2890 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2891
2892 /* Searching for the first zero. */
2893 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
2894 jump = JUMP(SLJIT_C_NOT_ZERO);
2895 /* Two byte sequence. */
2896 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2897 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(2));
2898 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2899
2900 JUMPHERE(jump);
2901 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2902 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x800);
2903 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2904 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2905 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2906
2907 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10000);
2908 jump = JUMP(SLJIT_C_NOT_ZERO);
2909 /* Three byte sequence. */
2910 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
2911 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));
2912 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2913
2914 /* Four byte sequence. */
2915 JUMPHERE(jump);
2916 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2917 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
2918 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2919 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
2920 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2921 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2922 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(4));
2923 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2924 }
2925
2926 static void do_utfreadchar16(compiler_common *common)
2927 {
2928 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
2929 of the character (>= 0xc0). Return value in TMP1. */
2930 DEFINE_COMPILER;
2931 struct sljit_jump *jump;
2932
2933 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2934 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2935 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2936 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2937 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2938 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2939
2940 /* Searching for the first zero. */
2941 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
2942 jump = JUMP(SLJIT_C_NOT_ZERO);
2943 /* Two byte sequence. */
2944 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2945 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2946
2947 JUMPHERE(jump);
2948 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x400);
2949 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_NOT_ZERO);
2950 /* This code runs only in 8 bit mode. No need to shift the value. */
2951 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2952 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2953 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x800);
2954 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2955 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2956 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2957 /* Three byte sequence. */
2958 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
2959 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2960 }
2961
2962 static void do_utfreadtype8(compiler_common *common)
2963 {
2964 /* Fast decoding a UTF-8 character type. TMP2 contains the first byte
2965 of the character (>= 0xc0). Return value in TMP1. */
2966 DEFINE_COMPILER;
2967 struct sljit_jump *jump;
2968 struct sljit_jump *compare;
2969
2970 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2971
2972 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20);
2973 jump = JUMP(SLJIT_C_NOT_ZERO);
2974 /* Two byte sequence. */
2975 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2976 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2977 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
2978 /* The upper 5 bits are known at this point. */
2979 compare = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0x3);
2980 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2981 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2982 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
2983 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2984 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2985
2986 JUMPHERE(compare);
2987 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2988 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2989
2990 /* We only have types for characters less than 256. */
2991 JUMPHERE(jump);
2992 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2993 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2994 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2995 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2996 }
2997
2998 #endif /* COMPILE_PCRE8 */
2999
3000 #endif /* SUPPORT_UTF */
3001
3002 #ifdef SUPPORT_UCP
3003
3004 /* UCD_BLOCK_SIZE must be 128 (see the assert below). */
3005 #define UCD_BLOCK_MASK 127
3006 #define UCD_BLOCK_SHIFT 7
3007
3008 static void do_getucd(compiler_common *common)
3009 {
3010 /* Search the UCD record for the character comes in TMP1.
3011 Returns chartype in TMP1 and UCD offset in TMP2. */
3012 DEFINE_COMPILER;
3013
3014 SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 8);
3015
3016 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3017 OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
3018 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
3019 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
3020 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
3021 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
3022 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
3023 OP1(SLJIT_MOV_UH, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
3024 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
3025 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
3026 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3027 }
3028 #endif
3029
3030 static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common, BOOL hascrorlf, BOOL firstline)
3031 {
3032 DEFINE_COMPILER;
3033 struct sljit_label *mainloop;
3034 struct sljit_label *newlinelabel = NULL;
3035 struct sljit_jump *start;
3036 struct sljit_jump *end = NULL;
3037 struct sljit_jump *nl = NULL;
3038 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3039 struct sljit_jump *singlechar;
3040 #endif
3041 jump_list *newline = NULL;
3042 BOOL newlinecheck = FALSE;
3043 BOOL readuchar = FALSE;
3044
3045 if (!(hascrorlf || firstline) && (common->nltype == NLTYPE_ANY ||
3046 common->nltype == NLTYPE_ANYCRLF || common->newline > 255))
3047 newlinecheck = TRUE;
3048
3049 if (firstline)
3050 {
3051 /* Search for the end of the first line. */
3052 SLJIT_ASSERT(common->first_line_end != 0);
3053 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
3054
3055 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3056 {
3057 mainloop = LABEL();
3058 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3059 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3060 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
3061 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3062 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);
3063 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);
3064 JUMPHERE(end);
3065 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3066 }
3067 else
3068 {
3069 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3070 mainloop = LABEL();
3071 /* Continual stores does not cause data dependency. */
3072 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0);
3073 read_char_range(common, common->nlmin, common->nlmax, TRUE);
3074 check_newlinechar(common, common->nltype, &newline, TRUE);
3075 CMPTO(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0, mainloop);
3076 JUMPHERE(end);
3077 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0);
3078 set_jumps(newline, LABEL());
3079 }
3080
3081 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
3082 }
3083
3084 start = JUMP(SLJIT_JUMP);
3085
3086 if (newlinecheck)
3087 {
3088 newlinelabel = LABEL();
3089 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3090 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3091 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3092 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
3093 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3094 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3095 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
3096 #endif
3097 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3098 nl = JUMP(SLJIT_JUMP);
3099 }
3100
3101 mainloop = LABEL();
3102
3103 /* Increasing the STR_PTR here requires one less jump in the most common case. */
3104 #ifdef SUPPORT_UTF
3105 if (common->utf) readuchar = TRUE;
3106 #endif
3107 if (newlinecheck) readuchar = TRUE;
3108
3109 if (readuchar)
3110 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3111
3112 if (newlinecheck)
3113 CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);
3114
3115 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3116 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3117 #if defined COMPILE_PCRE8
3118 if (common->utf)
3119 {
3120 singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
3121 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3122 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3123 JUMPHERE(singlechar);
3124 }
3125 #elif defined COMPILE_PCRE16
3126 if (common->utf)
3127 {
3128 singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
3129 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
3130 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3131 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3132 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3133 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3134 JUMPHERE(singlechar);
3135 }
3136 #endif /* COMPILE_PCRE[8|16] */
3137 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
3138 JUMPHERE(start);
3139
3140 if (newlinecheck)
3141 {
3142 JUMPHERE(end);
3143 JUMPHERE(nl);
3144 }
3145
3146 return mainloop;
3147 }
3148
3149 static int scan_prefix(compiler_common *common, pcre_uchar *cc, pcre_uint32 *chars, int max_chars)
3150 {
3151 /* Recursive function, which scans prefix literals. */
3152 int len, repeat, len_save, consumed = 0;
3153 pcre_uint32 caseless, chr, mask;
3154 pcre_uchar *alternative, *cc_save;
3155 BOOL last, any;
3156
3157 repeat = 1;
3158 while (TRUE)
3159 {
3160 last = TRUE;
3161 any = FALSE;
3162 caseless = 0;
3163 switch (*cc)
3164 {
3165 case OP_CHARI:
3166 caseless = 1;
3167 case OP_CHAR:
3168 last = FALSE;
3169 cc++;
3170 break;
3171
3172 case OP_SOD:
3173 case OP_SOM:
3174 case OP_SET_SOM:
3175 case OP_NOT_WORD_BOUNDARY:
3176 case OP_WORD_BOUNDARY:
3177 case OP_EODN:
3178 case OP_EOD:
3179 case OP_CIRC:
3180 case OP_CIRCM:
3181 case OP_DOLL:
3182 case OP_DOLLM:
3183 /* Zero width assertions. */
3184 cc++;
3185 continue;
3186
3187 case OP_PLUS:
3188 case OP_MINPLUS:
3189 case OP_POSPLUS:
3190 cc++;
3191 break;
3192
3193 case OP_EXACTI:
3194 caseless = 1;
3195 case OP_EXACT:
3196 repeat = GET2(cc, 1);
3197 last = FALSE;
3198 cc += 1 + IMM2_SIZE;
3199 break;
3200
3201 case OP_PLUSI:
3202 case OP_MINPLUSI:
3203 case OP_POSPLUSI:
3204 caseless = 1;
3205 cc++;
3206 break;
3207
3208 case OP_KET:
3209 cc += 1 + LINK_SIZE;
3210 continue;
3211
3212 case OP_ALT:
3213 cc += GET(cc, 1);
3214 continue;
3215
3216 case OP_ONCE:
3217 case OP_ONCE_NC:
3218 case OP_BRA:
3219 case OP_BRAPOS:
3220 case OP_CBRA:
3221 case OP_CBRAPOS:
3222 alternative = cc + GET(cc, 1);
3223 while (*alternative == OP_ALT)
3224 {
3225 max_chars = scan_prefix(common, alternative + 1 + LINK_SIZE, chars, max_chars);
3226 if (max_chars == 0)
3227 return consumed;
3228 alternative += GET(alternative, 1);
3229 }
3230
3231 if (*cc == OP_CBRA || *cc == OP_CBRAPOS)
3232 cc += IMM2_SIZE;
3233 cc += 1 + LINK_SIZE;
3234 continue;
3235
3236 case OP_CLASS:
3237 case OP_NCLASS:
3238 any = TRUE;
3239 cc += 1 + 32 / sizeof(pcre_uchar);
3240 break;
3241
3242 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3243 case OP_XCLASS:
3244 any = TRUE;
3245 cc += GET(cc, 1);
3246 break;
3247 #endif
3248
3249 case OP_NOT_DIGIT:
3250 case OP_DIGIT:
3251 case OP_NOT_WHITESPACE:
3252 case OP_WHITESPACE:
3253 case OP_NOT_WORDCHAR:
3254 case OP_WORDCHAR:
3255 case OP_ANY:
3256 case OP_ALLANY:
3257 any = TRUE;
3258 cc++;
3259 break;
3260
3261 #ifdef SUPPORT_UCP
3262 case OP_NOTPROP:
3263 case OP_PROP:
3264 any = TRUE;
3265 cc += 1 + 2;
3266 break;
3267 #endif
3268
3269 case OP_TYPEEXACT:
3270 repeat = GET2(cc, 1);
3271 cc += 1 + IMM2_SIZE;
3272 continue;
3273
3274 default:
3275 return consumed;
3276 }
3277
3278 if (any)
3279 {
3280 #ifdef SUPPORT_UTF
3281 if (common->utf) return consumed;
3282 #endif
3283 #if defined COMPILE_PCRE8
3284 mask = 0xff;
3285 #elif defined COMPILE_PCRE16
3286 mask = 0xffff;
3287 #elif defined COMPILE_PCRE32
3288 mask = 0xffffffff;
3289 #else
3290 SLJIT_ASSERT_STOP();
3291 #endif
3292
3293 do
3294 {
3295 chars[0] = mask;
3296 chars[1] = mask;
3297
3298 if (--max_chars == 0)
3299 return consumed;
3300 consumed++;
3301 chars += 2;
3302 }
3303 while (--repeat > 0);
3304
3305 repeat = 1;
3306 continue;
3307 }
3308
3309 len = 1;
3310 #ifdef SUPPORT_UTF
3311 if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
3312 #endif
3313
3314 if (caseless != 0 && char_has_othercase(common, cc))
3315 {
3316 caseless = char_get_othercase_bit(common, cc);
3317 if (caseless == 0)
3318 return consumed;
3319 #ifdef COMPILE_PCRE8
3320 caseless = ((caseless & 0xff) << 8) | (len - (caseless >> 8));
3321 #else
3322 if ((caseless & 0x100) != 0)
3323 caseless = ((caseless & 0xff) << 16) | (len - (caseless >> 9));
3324 else
3325 caseless = ((caseless & 0xff) << 8) | (len - (caseless >> 9));
3326 #endif
3327 }
3328 else
3329 caseless = 0;
3330
3331 len_save = len;
3332 cc_save = cc;
3333 while (TRUE)
3334 {
3335 do
3336 {
3337 chr = *cc;
3338 #ifdef COMPILE_PCRE32
3339 if (SLJIT_UNLIKELY(chr == NOTACHAR))
3340 return consumed;
3341 #endif
3342 mask = 0;
3343 if ((pcre_uint32)len == (caseless & 0xff))
3344 {
3345 mask = caseless >> 8;
3346 chr |= mask;
3347 }
3348
3349 if (chars[0] == NOTACHAR)
3350 {
3351 chars[0] = chr;
3352 chars[1] = mask;
3353 }
3354 else
3355 {
3356 mask |= chars[0] ^ chr;
3357 chr |= mask;
3358 chars[0] = chr;
3359 chars[1] |= mask;
3360 }
3361
3362 len--;
3363 if (--max_chars == 0)
3364 return consumed;
3365 consumed++;
3366 chars += 2;
3367 cc++;
3368 }
3369 while (len > 0);
3370
3371 if (--repeat == 0)
3372 break;
3373
3374 len = len_save;
3375 cc = cc_save;
3376 }
3377
3378 repeat = 1;
3379 if (last)
3380 return consumed;
3381 }
3382 }
3383
3384 #define MAX_N_CHARS 16
3385
3386 static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common, BOOL firstline)
3387 {
3388 DEFINE_COMPILER;
3389 struct sljit_label *start;
3390 struct sljit_jump *quit;
3391 pcre_uint32 chars[MAX_N_CHARS * 2];
3392 pcre_uint8 ones[MAX_N_CHARS];
3393 pcre_uint32 mask;
3394 int i, max;
3395 int offsets[3];
3396
3397 for (i = 0; i < MAX_N_CHARS; i++)
3398 {
3399 chars[i << 1] = NOTACHAR;
3400 chars[(i << 1) + 1] = 0;
3401 }
3402
3403 max = scan_prefix(common, common->start, chars, MAX_N_CHARS);
3404
3405 if (max <= 1)
3406 return FALSE;
3407
3408 for (i = 0; i < max; i++)
3409 {
3410 mask = chars[(i << 1) + 1];
3411 ones[i] = ones_in_half_byte[mask & 0xf];
3412 mask >>= 4;
3413 while (mask != 0)
3414 {
3415 ones[i] += ones_in_half_byte[mask & 0xf];
3416 mask >>= 4;
3417 }
3418 }
3419
3420 offsets[0] = -1;
3421 /* Scan forward. */
3422 for (i = 0; i < max; i++)
3423 if (ones[i] <= 2) {
3424 offsets[0] = i;
3425 break;
3426 }
3427
3428 if (offsets[0] == -1)
3429 return FALSE;
3430
3431 /* Scan backward. */
3432 offsets[1] = -1;
3433 for (i = max - 1; i > offsets[0]; i--)
3434 if (ones[i] <= 2) {
3435 offsets[1] = i;
3436 break;
3437 }
3438
3439 offsets[2] = -1;
3440 if (offsets[1] >= 0)
3441 {
3442 /* Scan from middle. */
3443 for (i = (offsets[0] + offsets[1]) / 2 + 1; i < offsets[1]; i++)
3444 if (ones[i] <= 2)
3445 {
3446 offsets[2] = i;
3447 break;
3448 }
3449
3450 if (offsets[2] == -1)
3451 {
3452 for (i = (offsets[0] + offsets[1]) / 2; i > offsets[0]; i--)
3453 if (ones[i] <= 2)
3454 {
3455 offsets[2] = i;
3456 break;
3457 }
3458 }
3459 }
3460
3461 SLJIT_ASSERT(offsets[1] == -1 || (offsets[0] < offsets[1]));
3462 SLJIT_ASSERT(offsets[2] == -1 || (offsets[0] < offsets[2] && offsets[1] > offsets[2]));
3463
3464 chars[0] = chars[offsets[0] << 1];
3465 chars[1] = chars[(offsets[0] << 1) + 1];
3466 if (offsets[2] >= 0)
3467 {
3468 chars[2] = chars[offsets[2] << 1];
3469 chars[3] = chars[(offsets[2] << 1) + 1];
3470 }
3471 if (offsets[1] >= 0)
3472 {
3473 chars[4] = chars[offsets[1] << 1];
3474 chars[5] = chars[(offsets[1] << 1) + 1];
3475 }
3476
3477 max -= 1;
3478 if (firstline)
3479 {
3480 SLJIT_ASSERT(common->first_line_end != 0);
3481 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
3482 OP2(SLJIT_SUB, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, SLJIT_IMM, IN_UCHARS(max));
3483 }
3484 else
3485 OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
3486
3487 start = LABEL();
3488 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3489
3490 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[0]));
3491 if (offsets[1] >= 0)
3492 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[1]));
3493 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3494
3495 if (chars[1] != 0)
3496 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[1]);
3497 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[0], start);
3498 if (offsets[2] >= 0)
3499 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[2] - 1));
3500
3501 if (offsets[1] >= 0)
3502 {
3503 if (chars[5] != 0)
3504 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, chars[5]);
3505 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, chars[4], start);
3506 }
3507
3508 if (offsets[2] >= 0)
3509 {
3510 if (chars[3] != 0)
3511 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[3]);
3512 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[2], start);
3513 }
3514 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3515
3516 JUMPHERE(quit);
3517
3518 if (firstline)
3519 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3520 else
3521 OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
3522 return TRUE;
3523 }
3524
3525 #undef MAX_N_CHARS
3526
3527 static SLJIT_INLINE void fast_forward_first_char(compiler_common *common, pcre_uchar first_char, BOOL caseless, BOOL firstline)
3528 {
3529 DEFINE_COMPILER;
3530 struct sljit_label *start;
3531 struct sljit_jump *quit;
3532 struct sljit_jump *found;
3533 pcre_uchar oc, bit;
3534
3535 if (firstline)
3536 {
3537 SLJIT_ASSERT(common->first_line_end != 0);
3538 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
3539 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
3540 }
3541
3542 start = LABEL();
3543 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3544 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3545
3546 oc = first_char;
3547 if (caseless)
3548 {
3549 oc = TABLE_GET(first_char, common->fcc, first_char);
3550 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
3551 if (first_char > 127 && common->utf)
3552 oc = UCD_OTHERCASE(first_char);
3553 #endif
3554 }
3555 if (first_char == oc)
3556 found = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, first_char);
3557 else
3558 {
3559 bit = first_char ^ oc;
3560 if (is_powerof2(bit))
3561 {
3562 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, bit);
3563 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, first_char | bit);
3564 }
3565 else
3566 {
3567 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, first_char);
3568 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3569 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);
3570 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3571 found = JUMP(SLJIT_C_NOT_ZERO);
3572 }
3573 }
3574
3575 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3576 JUMPTO(SLJIT_JUMP, start);
3577 JUMPHERE(found);
3578 JUMPHERE(quit);
3579
3580 if (firstline)
3581 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3582 }
3583
3584 static SLJIT_INLINE void fast_forward_newline(compiler_common *common, BOOL firstline)
3585 {
3586 DEFINE_COMPILER;
3587 struct sljit_label *loop;
3588 struct sljit_jump *lastchar;
3589 struct sljit_jump *firstchar;
3590 struct sljit_jump *quit;
3591 struct sljit_jump *foundcr = NULL;
3592 struct sljit_jump *notfoundnl;
3593 jump_list *newline = NULL;
3594
3595 if (firstline)
3596 {
3597 SLJIT_ASSERT(common->first_line_end != 0);
3598 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
3599 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
3600 }
3601
3602 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3603 {
3604 lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3605 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3606 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
3607 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
3608 firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
3609
3610 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2));
3611 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0);
3612 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_GREATER_EQUAL);
3613 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3614 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
3615 #endif
3616 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3617
3618 loop = LABEL();
3619 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3620 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3621 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
3622 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
3623 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop);
3624 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop);
3625
3626 JUMPHERE(quit);
3627 JUMPHERE(firstchar);
3628 JUMPHERE(lastchar);
3629
3630 if (firstline)
3631 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
3632 return;
3633 }
3634
3635 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3636 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
3637 firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
3638 skip_char_back(common);
3639
3640 loop = LABEL();
3641 read_char_range(common, common->nlmin, common->nlmax, TRUE);
3642 lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3643 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
3644 foundcr = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
3645 check_newlinechar(common, common->nltype, &newline, FALSE);
3646 set_jumps(newline, loop);
3647
3648 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
3649 {
3650 quit = JUMP(SLJIT_JUMP);
3651 JUMPHERE(foundcr);
3652 notfoundnl = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3653 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3654 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
3655 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3656 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3657 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
3658 #endif
3659 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3660 JUMPHERE(notfoundnl);
3661 JUMPHERE(quit);
3662 }
3663 JUMPHERE(lastchar);
3664 JUMPHERE(firstchar);
3665
3666 if (firstline)
3667 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3668 }
3669
3670 static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks);
3671
3672 static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, pcre_uint8 *start_bits, BOOL firstline)
3673 {
3674 DEFINE_COMPILER;
3675 struct sljit_label *start;
3676 struct sljit_jump *quit;
3677 struct sljit_jump *found = NULL;
3678 jump_list *matches = NULL;
3679 #ifndef COMPILE_PCRE8
3680 struct sljit_jump *jump;
3681 #endif
3682
3683 if (firstline)
3684 {
3685 SLJIT_ASSERT(common->first_line_end != 0);
3686 OP1(SLJIT_MOV, RETURN_ADDR, 0, STR_END, 0);
3687 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
3688 }
3689
3690 start = LABEL();
3691 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3692 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3693 #ifdef SUPPORT_UTF
3694 if (common->utf)
3695 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
3696 #endif
3697
3698 if (!check_class_ranges(common, start_bits, (start_bits[31] & 0x80) != 0, TRUE, &matches))
3699 {
3700 #ifndef COMPILE_PCRE8
3701 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 255);
3702 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 255);
3703 JUMPHERE(jump);
3704 #endif
3705 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
3706 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
3707 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)start_bits);
3708 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
3709 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
3710 found = JUMP(SLJIT_C_NOT_ZERO);
3711 }
3712
3713 #ifdef SUPPORT_UTF
3714 if (common->utf)
3715 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
3716 #endif
3717 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3718 #ifdef SUPPORT_UTF
3719 #if defined COMPILE_PCRE8
3720 if (common->utf)
3721 {
3722 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start);
3723 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3724 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3725 }
3726 #elif defined COMPILE_PCRE16
3727 if (common->utf)
3728 {
3729 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800, start);
3730 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
3731 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3732 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3733 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3734 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3735 }
3736 #endif /* COMPILE_PCRE[8|16] */
3737 #endif /* SUPPORT_UTF */
3738 JUMPTO(SLJIT_JUMP, start);
3739 if (found != NULL)
3740 JUMPHERE(found);
3741 if (matches != NULL)
3742 set_jumps(matches, LABEL());
3743 JUMPHERE(quit);
3744
3745 if (firstline)
3746 OP1(SLJIT_MOV, STR_END, 0, RETURN_ADDR, 0);
3747 }
3748
3749 static SLJIT_INLINE struct sljit_jump *search_requested_char(compiler_common *common, pcre_uchar req_char, BOOL caseless, BOOL has_firstchar)
3750 {
3751 DEFINE_COMPILER;
3752 struct sljit_label *loop;
3753 struct sljit_jump *toolong;
3754 struct sljit_jump *alreadyfound;
3755 struct sljit_jump *found;
3756 struct sljit_jump *foundoc = NULL;
3757 struct sljit_jump *notfound;
3758 pcre_uint32 oc, bit;
3759
3760 SLJIT_ASSERT(common->req_char_ptr != 0);
3761 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr);
3762 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, REQ_BYTE_MAX);
3763 toolong = CMP(SLJIT_C_LESS, TMP1, 0, STR_END, 0);
3764 alreadyfound = CMP(SLJIT_C_LESS, STR_PTR, 0, TMP2, 0);
3765
3766 if (has_firstchar)
3767 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3768 else
3769 OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);
3770
3771 loop = LABEL();
3772 notfound = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, STR_END, 0);
3773
3774 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0);
3775 oc = req_char;
3776 if (caseless)
3777 {
3778 oc = TABLE_GET(req_char, common->fcc, req_char);
3779 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
3780 if (req_char > 127 && common->utf)
3781 oc = UCD_OTHERCASE(req_char);
3782 #endif
3783 }
3784 if (req_char == oc)
3785 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
3786 else
3787 {
3788 bit = req_char ^ oc;
3789 if (is_powerof2(bit))
3790 {
3791 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);
3792 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit);
3793 }
3794 else
3795 {
3796 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
3797 foundoc = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, oc);
3798 }
3799 }
3800 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
3801 JUMPTO(SLJIT_JUMP, loop);
3802
3803 JUMPHERE(found);
3804 if (foundoc)
3805 JUMPHERE(foundoc);
3806 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr, TMP1, 0);
3807 JUMPHERE(alreadyfound);
3808 JUMPHERE(toolong);
3809 return notfound;
3810 }
3811
3812 static void do_revertframes(compiler_common *common)
3813 {
3814 DEFINE_COMPILER;
3815 struct sljit_jump *jump;
3816 struct sljit_label *mainloop;
3817
3818 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3819 OP1(SLJIT_MOV, TMP1, 0, STACK_TOP, 0);
3820 GET_LOCAL_BASE(TMP3, 0, 0);
3821
3822 /* Drop frames until we reach STACK_TOP. */
3823 mainloop = LABEL();
3824 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), 0);
3825 OP2(SLJIT_SUB | SLJIT_SET_S, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0);
3826 jump = JUMP(SLJIT_C_SIG_LESS_EQUAL);
3827
3828 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
3829 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw));
3830 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), SLJIT_MEM1(TMP1), 2 * sizeof(sljit_sw));
3831 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
3832 JUMPTO(SLJIT_JUMP, mainloop);
3833
3834 JUMPHERE(jump);
3835 jump = JUMP(SLJIT_C_SIG_LESS);
3836 /* End of dropping frames. */
3837 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3838
3839 JUMPHERE(jump);
3840 OP1(SLJIT_NEG, TMP2, 0, TMP2, 0);
3841 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
3842 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw));
3843 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_sw));
3844 JUMPTO(SLJIT_JUMP, mainloop);
3845 }
3846
3847 static void check_wordboundary(compiler_common *common)
3848 {
3849 DEFINE_COMPILER;
3850 struct sljit_jump *skipread;
3851 jump_list *skipread_list = NULL;
3852 #if !(defined COMPILE_PCRE8) || defined SUPPORT_UTF
3853 struct sljit_jump *jump;
3854 #endif
3855
3856 SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16);
3857
3858 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3859 /* Get type of the previous char, and put it to LOCALS1. */
3860 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3861 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
3862 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, 0);
3863 skipread = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
3864 skip_char_back(common);
3865 check_start_used_ptr(common);
3866 read_char(common);
3867
3868 /* Testing char type. */
3869 #ifdef SUPPORT_UCP
3870 if (common->use_ucp)
3871 {
3872 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
3873 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
3874 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
3875 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
3876 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
3877 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
3878 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
3879 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
3880 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
3881 JUMPHERE(jump);
3882 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP2, 0);
3883 }
3884 else
3885 #endif
3886 {
3887 #ifndef COMPILE_PCRE8
3888 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3889 #elif defined SUPPORT_UTF
3890 /* Here LOCALS1 has already been zeroed. */
3891 jump = NULL;
3892 if (common->utf)
3893 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3894 #endif /* COMPILE_PCRE8 */
3895 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);
3896 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */);
3897 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3898 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);
3899 #ifndef COMPILE_PCRE8
3900 JUMPHERE(jump);
3901 #elif defined SUPPORT_UTF
3902 if (jump != NULL)
3903 JUMPHERE(jump);
3904 #endif /* COMPILE_PCRE8 */
3905 }
3906 JUMPHERE(skipread);
3907
3908 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3909 check_str_end(common, &skipread_list);
3910 peek_char(common, READ_CHAR_MAX);
3911
3912 /* Testing char type. This is a code duplication. */
3913 #ifdef SUPPORT_UCP
3914 if (common->use_ucp)
3915 {
3916 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
3917 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
3918 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
3919 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
3920 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
3921 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
3922 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
3923 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
3924 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
3925 JUMPHERE(jump);
3926 }
3927 else
3928 #endif
3929 {
3930 #ifndef COMPILE_PCRE8
3931 /* TMP2 may be destroyed by peek_char. */
3932 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3933 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3934 #elif defined SUPPORT_UTF
3935 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3936 jump = NULL;
3937 if (common->utf)
3938 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3939 #endif
3940 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes);
3941 OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */);
3942 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
3943 #ifndef COMPILE_PCRE8
3944 JUMPHERE(jump);
3945 #elif defined SUPPORT_UTF
3946 if (jump != NULL)
3947 JUMPHERE(jump);
3948 #endif /* COMPILE_PCRE8 */
3949 }
3950 set_jumps(skipread_list, LABEL());
3951
3952 OP2(SLJIT_XOR | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
3953 sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3954 }
3955
3956 static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
3957 {
3958 DEFINE_COMPILER;
3959 int ranges[MAX_RANGE_SIZE];
3960 pcre_uint8 bit, cbit, all;
3961 int i, byte, length = 0;
3962
3963 bit = bits[0] & 0x1;
3964 /* All bits will be zero or one (since bit is zero or one). */
3965 all = -bit;
3966
3967 for (i = 0; i < 256; )
3968 {
3969 byte = i >> 3;
3970 if ((i & 0x7) == 0 && bits[byte] == all)
3971 i += 8;
3972 else
3973 {
3974 cbit = (bits[byte] >> (i & 0x7)) & 0x1;
3975 if (cbit != bit)
3976 {
3977 if (length >= MAX_RANGE_SIZE)
3978 return FALSE;
3979 ranges[length] = i;
3980 length++;
3981 bit = cbit;
3982 all = -cbit;
3983 }
3984 i++;
3985 }
3986 }
3987
3988 if (((bit == 0) && nclass) || ((bit == 1) && !nclass))
3989 {
3990 if (length >= MAX_RANGE_SIZE)
3991 return FALSE;
3992 ranges[length] = 256;
3993 length++;
3994 }
3995
3996 if (length < 0 || length > 4)
3997 return FALSE;
3998
3999 bit = bits[0] & 0x1;
4000 if (invert) bit ^= 0x1;
4001
4002 /* No character is accepted. */
4003 if (length == 0 && bit == 0)
4004 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4005
4006 switch(length)
4007 {
4008 case 0:
4009 /* When bit != 0, all characters are accepted. */
4010 return TRUE;
4011
4012 case 1:
4013 add_jump(compiler, backtracks, CMP(bit == 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
4014 return TRUE;
4015
4016 case 2:
4017 if (ranges[0] + 1 != ranges[1])
4018 {
4019 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
4020 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
4021 }
4022 else
4023 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
4024 return TRUE;
4025
4026 case 3:
4027 if (bit != 0)
4028 {
4029 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
4030 if (ranges[0] + 1 != ranges[1])
4031 {
4032 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
4033 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
4034 }
4035 else
4036 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
4037 return TRUE;
4038 }
4039
4040 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[0]));
4041 if (ranges[1] + 1 != ranges[2])
4042 {
4043 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1]);
4044 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
4045 }
4046 else
4047 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1]));
4048 return TRUE;
4049
4050 case 4:
4051 if ((ranges[1] - ranges[0]) == (ranges[3] - ranges[2])
4052 && (ranges[0] | (ranges[2] - ranges[0])) == ranges[2]
4053 && is_powerof2(ranges[2] - ranges[0]))
4054 {
4055 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[0]);
4056 if (ranges[2] + 1 != ranges[3])
4057 {
4058 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);
4059 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
4060 }
4061 else
4062 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
4063 return TRUE;
4064 }
4065
4066 if (bit != 0)
4067 {
4068 i = 0;
4069 if (ranges[0] + 1 != ranges[1])
4070 {
4071 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
4072 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
4073 i = ranges[0];
4074 }
4075 else
4076 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
4077
4078 if (ranges[2] + 1 != ranges[3])
4079 {
4080 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - i);
4081 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
4082 }
4083 else
4084 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2] - i));
4085 return TRUE;
4086 }
4087
4088 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
4089 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[0]));
4090 if (ranges[1] + 1 != ranges[2])
4091 {
4092 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]);
4093 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
4094 }
4095 else
4096 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
4097 return TRUE;
4098
4099 default:
4100 SLJIT_ASSERT_STOP();
4101 return FALSE;
4102 }
4103 }
4104
4105 static void check_anynewline(compiler_common *common)
4106 {
4107 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
4108 DEFINE_COMPILER;
4109
4110 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4111
4112 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
4113 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
4114 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
4115 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
4116 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4117 #ifdef COMPILE_PCRE8
4118 if (common->utf)
4119 {
4120 #endif
4121 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4122 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
4123 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
4124 #ifdef COMPILE_PCRE8
4125 }
4126 #endif
4127 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
4128 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4129 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4130 }
4131
4132 static void check_hspace(compiler_common *common)
4133 {
4134 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
4135 DEFINE_COMPILER;
4136
4137 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4138
4139 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x09);
4140 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4141 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
4142 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4143 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xa0);
4144 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4145 #ifdef COMPILE_PCRE8
4146 if (common->utf)
4147 {
4148 #endif
4149 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4150 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x1680);
4151 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4152 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e);
4153 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4154 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000);
4155 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000);
4156 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4157 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000);
4158 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4159 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000);
4160 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4161 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000);
4162 #ifdef COMPILE_PCRE8
4163 }
4164 #endif
4165 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
4166 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4167
4168 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4169 }
4170
4171 static void check_vspace(compiler_common *common)
4172 {
4173 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
4174 DEFINE_COMPILER;
4175
4176 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4177
4178 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
4179 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
4180 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
4181 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
4182 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4183 #ifdef COMPILE_PCRE8
4184 if (common->utf)
4185 {
4186 #endif
4187 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4188 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
4189 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
4190 #ifdef COMPILE_PCRE8
4191 }
4192 #endif
4193 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
4194 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4195
4196 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4197 }
4198
4199 #define CHAR1 STR_END
4200 #define CHAR2 STACK_TOP
4201
4202 static void do_casefulcmp(compiler_common *common)
4203 {
4204 DEFINE_COMPILER;
4205 struct sljit_jump *jump;
4206 struct sljit_label *label;
4207
4208 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4209 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4210 OP1(SLJIT_MOV, TMP3, 0, CHAR1, 0);
4211 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR2, 0);
4212 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
4213 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4214
4215 label = LABEL();
4216 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
4217 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4218 jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
4219 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
4220 JUMPTO(SLJIT_C_NOT_ZERO, label);
4221
4222 JUMPHERE(jump);
4223 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4224 OP1(SLJIT_MOV, CHAR1, 0, TMP3, 0);
4225 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
4226 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4227 }
4228
4229 #define LCC_TABLE STACK_LIMIT
4230
4231 static void do_caselesscmp(compiler_common *common)
4232 {
4233 DEFINE_COMPILER;
4234 struct sljit_jump *jump;
4235 struct sljit_label *label;
4236
4237 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4238 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4239
4240 OP1(SLJIT_MOV, TMP3, 0, LCC_TABLE, 0);
4241 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR1, 0);
4242 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, CHAR2, 0);
4243 OP1(SLJIT_MOV, LCC_TABLE, 0, SLJIT_IMM, common->lcc);
4244 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
4245 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4246
4247 label = LABEL();
4248 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
4249 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4250 #ifndef COMPILE_PCRE8
4251 jump = CMP(SLJIT_C_GREATER, CHAR1, 0, SLJIT_IMM, 255);
4252 #endif
4253 OP1(SLJIT_MOV_UB, CHAR1, 0, SLJIT_MEM2(LCC_TABLE, CHAR1), 0);
4254 #ifndef COMPILE_PCRE8
4255 JUMPHERE(jump);
4256 jump = CMP(SLJIT_C_GREATER, CHAR2, 0, SLJIT_IMM, 255);
4257 #endif
4258 OP1(SLJIT_MOV_UB, CHAR2, 0, SLJIT_MEM2(LCC_TABLE, CHAR2), 0);
4259 #ifndef COMPILE_PCRE8
4260 JUMPHERE(jump);
4261 #endif
4262 jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
4263 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
4264 JUMPTO(SLJIT_C_NOT_ZERO, label);
4265
4266 JUMPHERE(jump);
4267 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4268 OP1(SLJIT_MOV, LCC_TABLE, 0, TMP3, 0);
4269 OP1(SLJIT_MOV, CHAR1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
4270 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
4271 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4272 }
4273
4274 #undef LCC_TABLE
4275 #undef CHAR1
4276 #undef CHAR2
4277
4278 #if defined SUPPORT_UTF && defined SUPPORT_UCP
4279
4280 static const pcre_uchar * SLJIT_CALL do_utf_caselesscmp(pcre_uchar *src1, jit_arguments *args, pcre_uchar *end1)
4281 {
4282 /* This function would be ineffective to do in JIT level. */
4283 pcre_uint32 c1, c2;
4284 const pcre_uchar *src2 = args->uchar_ptr;
4285 const pcre_uchar *end2 = args->end;
4286 const ucd_record *ur;
4287 const pcre_uint32 *pp;
4288
4289 while (src1 < end1)
4290 {
4291 if (src2 >= end2)
4292 return (pcre_uchar*)1;
4293 GETCHARINC(c1, src1);
4294 GETCHARINC(c2, src2);
4295 ur = GET_UCD(c2);
4296 if (c1 != c2 && c1 != c2 + ur->other_case)
4297 {
4298 pp = PRIV(ucd_caseless_sets) + ur->caseset;
4299 for (;;)
4300 {
4301 if (c1 < *pp) return NULL;
4302 if (c1 == *pp++) break;
4303 }
4304 }
4305 }
4306 return src2;
4307 }
4308
4309 #endif /* SUPPORT_UTF && SUPPORT_UCP */
4310
4311 static pcre_uchar *byte_sequence_compare(compiler_common *common, BOOL caseless, pcre_uchar *cc,
4312 compare_context* context, jump_list **backtracks)
4313 {
4314 DEFINE_COMPILER;
4315 unsigned int othercasebit = 0;
4316 pcre_uchar *othercasechar = NULL;
4317 #ifdef SUPPORT_UTF
4318 int utflength;
4319 #endif
4320
4321 if (caseless && char_has_othercase(common, cc))
4322 {
4323 othercasebit = char_get_othercase_bit(common, cc);
4324 SLJIT_ASSERT(othercasebit);
4325 /* Extracting bit difference info. */
4326 #if defined COMPILE_PCRE8
4327 othercasechar = cc + (othercasebit >> 8);
4328 othercasebit &= 0xff;
4329 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4330 /* Note that this code only handles characters in the BMP. If there
4331 ever are characters outside the BMP whose othercase differs in only one
4332 bit from itself (there currently are none), this code will need to be
4333 revised for COMPILE_PCRE32. */
4334 othercasechar = cc + (othercasebit >> 9);
4335 if ((othercasebit & 0x100) != 0)
4336 othercasebit = (othercasebit & 0xff) << 8;
4337 else
4338 othercasebit &= 0xff;
4339 #endif /* COMPILE_PCRE[8|16|32] */
4340 }
4341
4342 if (context->sourcereg == -1)
4343 {
4344 #if defined COMPILE_PCRE8
4345 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
4346 if (context->length >= 4)
4347 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4348 else if (context->length >= 2)
4349 OP1(SLJIT_MOV_UH, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4350 else
4351 #endif
4352 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4353 #elif defined COMPILE_PCRE16
4354 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
4355 if (context->length >= 4)
4356 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4357 else
4358 #endif
4359 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4360 #elif defined COMPILE_PCRE32
4361 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4362 #endif /* COMPILE_PCRE[8|16|32] */
4363 context->sourcereg = TMP2;
4364 }
4365
4366 #ifdef SUPPORT_UTF
4367 utflength = 1;
4368 if (common->utf && HAS_EXTRALEN(*cc))
4369 utflength += GET_EXTRALEN(*cc);
4370
4371 do
4372 {
4373 #endif
4374
4375 context->length -= IN_UCHARS(1);
4376 #if (defined SLJIT_UNALIGNED && SLJIT_UNALIGNED) && (defined COMPILE_PCRE8 || defined COMPILE_PCRE16)
4377
4378 /* Unaligned read is supported. */
4379 if (othercasebit != 0 && othercasechar == cc)
4380 {
4381 context->c.asuchars[context->ucharptr] = *cc | othercasebit;
4382 context->oc.asuchars[context->ucharptr] = othercasebit;
4383 }
4384 else
4385 {
4386 context->c.asuchars[context->ucharptr] = *cc;
4387 context->oc.asuchars[context->ucharptr] = 0;
4388 }
4389 context->ucharptr++;
4390
4391 #if defined COMPILE_PCRE8
4392 if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1))
4393 #else
4394 if (context->ucharptr >= 2 || context->length == 0)
4395 #endif
4396 {
4397 if (context->length >= 4)
4398 OP1(SLJIT_MOV_SI, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
4399 else if (context->length >= 2)
4400 OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
4401 #if defined COMPILE_PCRE8
4402 else if (context->length >= 1)
4403 OP1(SLJIT_MOV_UB, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
4404 #endif /* COMPILE_PCRE8 */
4405 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
4406
4407 switch(context->ucharptr)
4408 {
4409 case 4 / sizeof(pcre_uchar):
4410 if (context->oc.asint != 0)
4411 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint);
4412 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint));
4413 break;
4414
4415 case 2 / sizeof(pcre_uchar):
4416 if (context->oc.asushort != 0)
4417 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort);
4418 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort));
4419 break;
4420
4421 #ifdef COMPILE_PCRE8
4422 case 1:
4423 if (context->oc.asbyte != 0)
4424 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte);
4425 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte));
4426 break;
4427 #endif
4428
4429 default:
4430 SLJIT_ASSERT_STOP();
4431 break;
4432 }
4433 context->ucharptr = 0;
4434 }
4435
4436 #else
4437
4438 /* Unaligned read is unsupported or in 32 bit mode. */
4439 if (context->length >= 1)
4440 OP1(MOV_UCHAR, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
4441
4442 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
4443
4444 if (othercasebit != 0 && othercasechar == cc)
4445 {
4446 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit);
4447 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit));
4448 }
4449 else
4450 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc));
4451
4452 #endif
4453
4454 cc++;
4455 #ifdef SUPPORT_UTF
4456 utflength--;
4457 }
4458 while (utflength > 0);
4459 #endif
4460
4461 return cc;
4462 }
4463
4464 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
4465
4466 #define SET_TYPE_OFFSET(value) \
4467 if ((value) != typeoffset) \
4468 { \
4469 if ((value) < typeoffset) \
4470 OP2(SLJIT_ADD, typereg, 0, typereg, 0, SLJIT_IMM, typeoffset - (value)); \
4471 else \
4472 OP2(SLJIT_SUB, typereg, 0, typereg, 0, SLJIT_IMM, (value) - typeoffset); \
4473 } \
4474 typeoffset = (value);
4475
4476 #define SET_CHAR_OFFSET(value) \
4477 if ((value) != charoffset) \
4478 { \
4479 if ((value) < charoffset) \
4480 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(charoffset - (value))); \
4481 else \
4482 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)((value) - charoffset)); \
4483 } \
4484 charoffset = (value);
4485
4486 static void compile_xclass_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
4487 {
4488 DEFINE_COMPILER;
4489 jump_list *found = NULL;
4490 jump_list **list = (cc[0] & XCL_NOT) == 0 ? &found : backtracks;
4491 sljit_uw c, charoffset, max = 256, min = READ_CHAR_MAX;
4492 struct sljit_jump *jump = NULL;
4493 pcre_uchar *ccbegin;
4494 int compares, invertcmp, numberofcmps;
4495 #if defined SUPPORT_UTF && (defined COMPILE_PCRE8 || defined COMPILE_PCRE16)
4496 BOOL utf = common->utf;
4497 #endif
4498
4499 #ifdef SUPPORT_UCP
4500 BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE;
4501 BOOL charsaved = FALSE;
4502 int typereg = TMP1, scriptreg = TMP1;
4503 const pcre_uint32 *other_cases;
4504 sljit_uw typeoffset;
4505 #endif
4506
4507 /* Scanning the necessary info. */
4508 cc++;
4509 ccbegin = cc;
4510 compares = 0;
4511 if (cc[-1] & XCL_MAP)
4512 {
4513 min = 0;
4514 cc += 32 / sizeof(pcre_uchar);
4515 }
4516
4517 while (*cc != XCL_END)
4518 {
4519 compares++;
4520 if (*cc == XCL_SINGLE)
4521 {
4522 cc ++;
4523 GETCHARINCTEST(c, cc);
4524 if (c > max) max = c;
4525 if (c < min) min = c;
4526 #ifdef SUPPORT_UCP
4527 needschar = TRUE;
4528 #endif
4529 }
4530 else if (*cc == XCL_RANGE)
4531 {
4532 cc ++;
4533 GETCHARINCTEST(c, cc);
4534 if (c < min) min = c;
4535 GETCHARINCTEST(c, cc);
4536 if (c > max) max = c;
4537 #ifdef SUPPORT_UCP
4538 needschar = TRUE;
4539 #endif
4540 }
4541 #ifdef SUPPORT_UCP
4542 else
4543 {
4544 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
4545 cc++;
4546 if (*cc == PT_CLIST)
4547 {
4548 other_cases = PRIV(ucd_caseless_sets) + cc[1];
4549 while (*other_cases != NOTACHAR)
4550 {
4551 if (*other_cases > max) max = *other_cases;
4552 if (*other_cases < min) min = *other_cases;
4553 other_cases++;
4554 }
4555 }
4556 else
4557 {
4558 max = READ_CHAR_MAX;
4559 min = 0;
4560 }
4561
4562 switch(*cc)
4563 {
4564 case PT_ANY:
4565 break;
4566
4567 case PT_LAMP:
4568 case PT_GC:
4569 case PT_PC:
4570 case PT_ALNUM:
4571 needstype = TRUE;
4572 break;
4573
4574 case PT_SC:
4575 needsscript = TRUE;
4576 break;
4577
4578 case PT_SPACE:
4579 case PT_PXSPACE:
4580 case PT_WORD:
4581 case PT_PXGRAPH:
4582 case PT_PXPRINT:
4583 case PT_PXPUNCT:
4584 needstype = TRUE;
4585 needschar = TRUE;
4586 break;
4587
4588 case PT_CLIST:
4589 case PT_UCNC:
4590 needschar = TRUE;
4591 break;
4592
4593 default:
4594 SLJIT_ASSERT_STOP();
4595 break;
4596 }
4597 cc += 2;
4598 }
4599 #endif
4600 }
4601
4602 /* We are not necessary in utf mode even in 8 bit mode. */
4603 cc = ccbegin;
4604 detect_partial_match(common, backtracks);
4605 read_char_range(common, min, max, (cc[-1] & XCL_NOT) != 0);
4606
4607 if ((cc[-1] & XCL_HASPROP) == 0)
4608 {
4609 if ((cc[-1] & XCL_MAP) != 0)
4610 {
4611 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
4612 if (!check_class_ranges(common, (const pcre_uint8 *)cc, (((const pcre_uint8 *)cc)[31] & 0x80) != 0, TRUE, &found))
4613 {
4614 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
4615 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
4616 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
4617 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4618 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4619 add_jump(compiler, &found, JUMP(SLJIT_C_NOT_ZERO));
4620 }
4621
4622 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4623 JUMPHERE(jump);
4624
4625 cc += 32 / sizeof(pcre_uchar);
4626 }
4627 else
4628 {
4629 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, min);
4630 add_jump(compiler, (cc[-1] & XCL_NOT) == 0 ? backtracks : &found, CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, max - min));
4631 }
4632 }
4633 else if ((cc[-1] & XCL_MAP) != 0)
4634 {
4635 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
4636 #ifdef SUPPORT_UCP
4637 charsaved = TRUE;
4638 #endif
4639 if (!check_class_ranges(common, (const pcre_uint8 *)cc, FALSE, TRUE, list))
4640 {
4641 #ifdef COMPILE_PCRE8
4642 SLJIT_ASSERT(common->utf);
4643 #endif
4644 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
4645
4646 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
4647 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
4648 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
4649 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4650 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4651 add_jump(compiler, list, JUMP(SLJIT_C_NOT_ZERO));
4652
4653 JUMPHERE(jump);
4654 }
4655
4656 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
4657 cc += 32 / sizeof(pcre_uchar);
4658 }
4659
4660 #ifdef SUPPORT_UCP
4661 /* Simple register allocation. TMP1 is preferred if possible. */
4662 if (needstype || needsscript)
4663 {
4664 if (needschar && !charsaved)
4665 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
4666 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4667 if (needschar)
4668 {
4669 if (needstype)
4670 {
4671 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
4672 typereg = RETURN_ADDR;
4673 }
4674
4675 if (needsscript)
4676 scriptreg = TMP3;
4677 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
4678 }
4679 else if (needstype && needsscript)
4680 scriptreg = TMP3;
4681 /* In all other cases only one of them was specified, and that can goes to TMP1. */
4682
4683 if (needsscript)
4684 {
4685 if (scriptreg == TMP1)
4686 {
4687 OP1(SLJIT_MOV, scriptreg, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
4688 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM2(scriptreg, TMP2), 3);
4689 }
4690 else
4691 {
4692 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
4693 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
4694 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM1(TMP2), 0);
4695 }
4696 }
4697 }
4698 #endif
4699
4700 /* Generating code. */
4701 charoffset = 0;
4702 numberofcmps = 0;
4703 #ifdef SUPPORT_UCP
4704 typeoffset = 0;
4705 #endif
4706
4707 while (*cc != XCL_END)
4708 {
4709 compares--;
4710 invertcmp = (compares == 0 && list != backtracks);
4711 jump = NULL;
4712
4713 if (*cc == XCL_SINGLE)
4714 {
4715 cc ++;
4716 GETCHARINCTEST(c, cc);
4717
4718 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
4719 {
4720 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
4721 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_EQUAL);
4722 numberofcmps++;
4723 }
4724 else if (numberofcmps > 0)
4725 {
4726 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
4727 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4728 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4729 numberofcmps = 0;
4730 }
4731 else
4732 {
4733 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
4734 numberofcmps = 0;
4735 }
4736 }
4737 else if (*cc == XCL_RANGE)
4738 {
4739 cc ++;
4740 GETCHARINCTEST(c, cc);
4741 SET_CHAR_OFFSET(c);
4742 GETCHARINCTEST(c, cc);
4743
4744 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
4745 {
4746 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
4747 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_LESS_EQUAL);
4748 numberofcmps++;
4749 }
4750 else if (numberofcmps > 0)
4751 {
4752 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
4753 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4754 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4755 numberofcmps = 0;
4756 }
4757 else
4758 {
4759 jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
4760 numberofcmps = 0;
4761 }
4762 }
4763 #ifdef SUPPORT_UCP
4764 else
4765 {
4766 if (*cc == XCL_NOTPROP)
4767 invertcmp ^= 0x1;
4768 cc++;
4769 switch(*cc)
4770 {
4771 case PT_ANY:
4772 if (list != backtracks)
4773 {
4774 if ((cc[-1] == XCL_NOTPROP && compares > 0) || (cc[-1] == XCL_PROP && compares == 0))
4775 continue;
4776 }
4777 else if (cc[-1] == XCL_NOTPROP)
4778 continue;
4779 jump = JUMP(SLJIT_JUMP);
4780 break;
4781
4782 case PT_LAMP:
4783 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - typeoffset);
4784 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4785 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ll - typeoffset);
4786 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4787 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lt - typeoffset);
4788 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4789 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4790 break;
4791
4792 case PT_GC:
4793 c = PRIV(ucp_typerange)[(int)cc[1] * 2];
4794 SET_TYPE_OFFSET(c);
4795 jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, PRIV(ucp_typerange)[(int)cc[1] * 2 + 1] - c);
4796 break;
4797
4798 case PT_PC:
4799 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, (int)cc[1] - typeoffset);
4800 break;
4801
4802 case PT_SC:
4803 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, scriptreg, 0, SLJIT_IMM, (int)cc[1]);
4804 break;
4805
4806 case PT_SPACE:
4807 case PT_PXSPACE:
4808 SET_CHAR_OFFSET(9);
4809 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd - 0x9);
4810 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
4811
4812 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x9);
4813 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4814
4815 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x9);
4816 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4817
4818 SET_TYPE_OFFSET(ucp_Zl);
4819 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl);
4820 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4821 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4822 break;
4823
4824 case PT_WORD:
4825 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_UNDERSCORE - charoffset));
4826 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4827 /* Fall through. */
4828
4829 case PT_ALNUM:
4830 SET_TYPE_OFFSET(ucp_Ll);
4831 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
4832 OP_FLAGS((*cc == PT_ALNUM) ? SLJIT_MOV : SLJIT_OR, TMP2, 0, (*cc == PT_ALNUM) ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_LESS_EQUAL);
4833 SET_TYPE_OFFSET(ucp_Nd);
4834 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_No - ucp_Nd);
4835 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4836 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4837 break;
4838
4839 case PT_CLIST:
4840 other_cases = PRIV(ucd_caseless_sets) + cc[1];
4841
4842 /* At least three characters are required.
4843 Otherwise this case would be handled by the normal code path. */
4844 SLJIT_ASSERT(other_cases[0] != NOTACHAR && other_cases[1] != NOTACHAR && other_cases[2] != NOTACHAR);
4845 SLJIT_ASSERT(other_cases[0] < other_cases[1] && other_cases[1] < other_cases[2]);
4846
4847 /* Optimizing character pairs, if their difference is power of 2. */
4848 if (is_powerof2(other_cases[1] ^ other_cases[0]))
4849 {
4850 if (charoffset == 0)
4851 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
4852 else
4853 {
4854 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
4855 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
4856 }
4857 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[1]);
4858 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4859 other_cases += 2;
4860 }
4861 else if (is_powerof2(other_cases[2] ^ other_cases[1]))
4862 {
4863 if (charoffset == 0)
4864 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[2] ^ other_cases[1]);
4865 else
4866 {
4867 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
4868 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
4869 }
4870 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[2]);
4871 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4872
4873 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(other_cases[0] - charoffset));
4874 OP_FLAGS(SLJIT_OR | ((other_cases[3] == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4875
4876 other_cases += 3;
4877 }
4878 else
4879 {
4880 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
4881 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4882 }
4883
4884 while (*other_cases != NOTACHAR)
4885 {
4886 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
4887 OP_FLAGS(SLJIT_OR | ((*other_cases == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4888 }
4889 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4890 break;
4891
4892 case PT_UCNC:
4893 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_DOLLAR_SIGN - charoffset));
4894 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4895 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_COMMERCIAL_AT - charoffset));
4896 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4897 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_GRAVE_ACCENT - charoffset));
4898 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4899
4900 SET_CHAR_OFFSET(0xa0);
4901 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(0xd7ff - charoffset));
4902 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4903 SET_CHAR_OFFSET(0);
4904 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xe000 - 0);
4905 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_GREATER_EQUAL);
4906 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4907 break;
4908
4909 case PT_PXGRAPH:
4910 /* C and Z groups are the farthest two groups. */
4911 SET_TYPE_OFFSET(ucp_Ll);
4912 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
4913 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_GREATER);
4914
4915 jump = CMP(SLJIT_C_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
4916
4917 /* In case of ucp_Cf, we overwrite the result. */
4918 SET_CHAR_OFFSET(0x2066);
4919 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
4920 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
4921
4922 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
4923 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4924
4925 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x2066);
4926 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4927
4928 JUMPHERE(jump);
4929 jump = CMP(SLJIT_C_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
4930 break;
4931
4932 case PT_PXPRINT:
4933 /* C and Z groups are the farthest two groups. */
4934 SET_TYPE_OFFSET(ucp_Ll);
4935 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
4936 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_GREATER);
4937
4938 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Ll);
4939 OP_FLAGS(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_C_NOT_EQUAL);
4940
4941 jump = CMP(SLJIT_C_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
4942
4943 /* In case of ucp_Cf, we overwrite the result. */
4944 SET_CHAR_OFFSET(0x2066);
4945 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
4946 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
4947
4948 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
4949 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4950
4951 JUMPHERE(jump);
4952 jump = CMP(SLJIT_C_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
4953 break;
4954
4955 case PT_PXPUNCT:
4956 SET_TYPE_OFFSET(ucp_Sc);
4957 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Sc);
4958 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
4959
4960 SET_CHAR_OFFSET(0);
4961 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xff);
4962 OP_FLAGS(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4963
4964 SET_TYPE_OFFSET(ucp_Pc);
4965 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ps - ucp_Pc);
4966 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4967 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4968 break;
4969 }
4970 cc += 2;
4971 }
4972 #endif
4973
4974 if (jump != NULL)
4975 add_jump(compiler, compares > 0 ? list : backtracks, jump);
4976 }
4977
4978 if (found != NULL)
4979 set_jumps(found, LABEL());
4980 }
4981
4982 #undef SET_TYPE_OFFSET
4983 #undef SET_CHAR_OFFSET
4984
4985 #endif
4986
4987 static pcre_uchar *compile_char1_matchingpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **backtracks)
4988 {
4989 DEFINE_COMPILER;
4990 int length;
4991 unsigned int c, oc, bit;
4992 compare_context context;
4993 struct sljit_jump *jump[4];
4994 jump_list *end_list;
4995 #ifdef SUPPORT_UTF
4996 struct sljit_label *label;
4997 #ifdef SUPPORT_UCP
4998 pcre_uchar propdata[5];
4999 #endif
5000 #endif /* SUPPORT_UTF */
5001
5002 switch(type)
5003 {
5004 case OP_SOD:
5005 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5006 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
5007 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
5008 return cc;
5009
5010 case OP_SOM:
5011 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5012 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
5013 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
5014 return cc;
5015
5016 case OP_NOT_WORD_BOUNDARY:
5017 case OP_WORD_BOUNDARY:
5018 add_jump(compiler, &common->wordboundary, JUMP(SLJIT_FAST_CALL));
5019 add_jump(compiler, backtracks, JUMP(type == OP_NOT_WORD_BOUNDARY ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
5020 return cc;
5021
5022 case OP_NOT_DIGIT:
5023 case OP_DIGIT:
5024 /* Digits are usually 0-9, so it is worth to optimize them. */
5025 detect_partial_match(common, backtracks);
5026 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5027 if (common->utf && is_char7_bitset((const pcre_uint8*)common->ctypes - cbit_length + cbit_digit, FALSE))
5028 read_char7_type(common, type == OP_NOT_DIGIT);
5029 else
5030 #endif
5031 read_char8_type(common, type == OP_NOT_DIGIT);
5032 /* Flip the starting bit in the negative case. */
5033 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit);
5034 add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
5035 return cc;
5036
5037 case OP_NOT_WHITESPACE:
5038 case OP_WHITESPACE:
5039 detect_partial_match(common, backtracks);
5040 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5041 if (common->utf && is_char7_bitset((const pcre_uint8*)common->ctypes - cbit_length + cbit_space, FALSE))
5042 read_char7_type(common, type == OP_NOT_WHITESPACE);
5043 else
5044 #endif
5045 read_char8_type(common, type == OP_NOT_WHITESPACE);
5046 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space);
5047 add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
5048 return cc;
5049
5050 case OP_NOT_WORDCHAR:
5051 case OP_WORDCHAR:
5052 detect_partial_match(common, backtracks);
5053 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5054 if (common->utf && is_char7_bitset((const pcre_uint8*)common->ctypes - cbit_length + cbit_word, FALSE))
5055 read_char7_type(common, type == OP_NOT_WORDCHAR);
5056 else
5057 #endif
5058 read_char8_type(common, type == OP_NOT_WORDCHAR);
5059 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word);
5060 add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
5061 return cc;
5062
5063 case OP_ANY:
5064 detect_partial_match(common, backtracks);
5065 read_char_range(common, common->nlmin, common->nlmax, TRUE);
5066 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
5067 {
5068 jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
5069 end_list = NULL;
5070 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
5071 add_jump(compiler, &end_list, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
5072 else
5073 check_str_end(common, &end_list);
5074
5075 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5076 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff));
5077 set_jumps(end_list, LABEL());
5078 JUMPHERE(jump[0]);
5079 }
5080 else
5081 check_newlinechar(common, common->nltype, backtracks, TRUE);
5082 return cc;
5083
5084 case OP_ALLANY:
5085 detect_partial_match(common, backtracks);
5086 #ifdef SUPPORT_UTF
5087 if (common->utf)
5088 {
5089 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5090 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5091 #if defined COMPILE_PCRE8 || defined COMPILE_PCRE16
5092 #if defined COMPILE_PCRE8
5093 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
5094 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
5095 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5096 #elif defined COMPILE_PCRE16
5097 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
5098 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
5099 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
5100 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
5101 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
5102 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5103 #endif
5104 JUMPHERE(jump[0]);
5105 #endif /* COMPILE_PCRE[8|16] */
5106 return cc;
5107 }
5108 #endif
5109 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5110 return cc;
5111
5112 case OP_ANYBYTE:
5113 detect_partial_match(common, backtracks);
5114 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5115 return cc;
5116
5117 #ifdef SUPPORT_UTF
5118 #ifdef SUPPORT_UCP
5119 case OP_NOTPROP:
5120 case OP_PROP:
5121 propdata[0] = XCL_HASPROP;
5122 propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP;
5123 propdata[2] = cc[0];
5124 propdata[3] = cc[1];
5125 propdata[4] = XCL_END;
5126 compile_xclass_matchingpath(common, propdata, backtracks);
5127 return cc + 2;
5128 #endif
5129 #endif
5130
5131 case OP_ANYNL:
5132 detect_partial_match(common, backtracks);
5133 read_char_range(common, common->bsr_nlmin, common->bsr_nlmax, FALSE);
5134 jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
5135 /* We don't need to handle soft partial matching case. */
5136 end_list = NULL;
5137 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
5138 add_jump(compiler, &end_list, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
5139 else
5140 check_str_end(common, &end_list);
5141 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5142 jump[1] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
5143 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5144 jump[2] = JUMP(SLJIT_JUMP);
5145 JUMPHERE(jump[0]);
5146 check_newlinechar(common, common->bsr_nltype, backtracks, FALSE);
5147 set_jumps(end_list, LABEL());
5148 JUMPHERE(jump[1]);
5149 JUMPHERE(jump[2]);
5150 return cc;
5151
5152 case OP_NOT_HSPACE:
5153 case OP_HSPACE:
5154 detect_partial_match(common, backtracks);
5155 read_char_range(common, 0x9, 0x3000, type == OP_NOT_HSPACE);
5156 add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));
5157 add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
5158 return cc;
5159
5160 case OP_NOT_VSPACE:
5161 case OP_VSPACE:
5162 detect_partial_match(common, backtracks);
5163 read_char_range(common, 0xa, 0x2029, type == OP_NOT_VSPACE);
5164 add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));
5165 add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
5166 return cc;
5167
5168 #ifdef SUPPORT_UCP
5169 case OP_EXTUNI:
5170 detect_partial_match(common, backtracks);
5171 read_char(common);
5172 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
5173 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
5174 /* Optimize register allocation: use a real register. */
5175 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
5176 OP1(SLJIT_MOV_UB, STACK_TOP, 0, SLJIT_MEM2(TMP1, TMP2), 3);
5177
5178 label = LABEL();
5179 jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5180 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
5181 read_char(common);
5182 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
5183 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
5184 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM2(TMP1, TMP2), 3);
5185
5186 OP2(SLJIT_SHL, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2);
5187 OP1(SLJIT_MOV_UI, TMP1, 0, SLJIT_MEM1(STACK_TOP), (sljit_sw)PRIV(ucp_gbtable));
5188 OP1(SLJIT_MOV, STACK_TOP, 0, TMP2, 0);
5189 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
5190 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
5191 JUMPTO(SLJIT_C_NOT_ZERO, label);
5192
5193 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
5194 JUMPHERE(jump[0]);
5195 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
5196
5197 if (common->mode == JIT_PARTIAL_HARD_COMPILE)
5198 {
5199 jump[0] = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
5200 /* Since we successfully read a char above, partial matching must occure. */
5201 check_partial(common, TRUE);
5202 JUMPHERE(jump[0]);
5203 }
5204 return cc;
5205 #endif
5206
5207 case OP_EODN:
5208 /* Requires rather complex checks. */
5209 jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5210 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
5211 {
5212 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
5213 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5214 if (common->mode == JIT_COMPILE)
5215 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0));
5216 else
5217 {
5218 jump[1] = CMP(SLJIT_C_EQUAL, TMP2, 0, STR_END, 0);
5219 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
5220 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS);
5221 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
5222 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_NOT_EQUAL);
5223 add_jump(compiler, backtracks, JUMP(SLJIT_C_NOT_EQUAL));
5224 check_partial(common, TRUE);
5225 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5226 JUMPHERE(jump[1]);
5227 }
5228 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
5229 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
5230 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
5231 }
5232 else if (common->nltype == NLTYPE_FIXED)
5233 {
5234 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5235 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5236 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0));
5237 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
5238 }
5239 else
5240 {
5241 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5242 jump[1] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
5243 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
5244 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
5245 jump[2] = JUMP(SLJIT_C_GREATER);
5246 add_jump(compiler, backtracks, JUMP(SLJIT_C_LESS));
5247 /* Equal. */
5248 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
5249 jump[3] = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
5250 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5251
5252 JUMPHERE(jump[1]);
5253 if (common->nltype == NLTYPE_ANYCRLF)
5254 {
5255 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5256 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP2, 0, STR_END, 0));
5257 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
5258 }
5259 else
5260 {
5261 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, STR_PTR, 0);
5262 read_char_range(common, common->nlmin, common->nlmax, TRUE);
5263 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
5264 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
5265 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
5266 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
5267 }
5268 JUMPHERE(jump[2]);
5269 JUMPHERE(jump[3]);
5270 }
5271 JUMPHERE(jump[0]);
5272 check_partial(common, FALSE);
5273 return cc;
5274
5275 case OP_EOD:
5276 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0));
5277 check_partial(common, FALSE);
5278 return cc;
5279
5280 case OP_CIRC:
5281 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
5282 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
5283 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0));
5284 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
5285 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5286 return cc;
5287
5288 case OP_CIRCM:
5289 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
5290 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
5291 jump[1] = CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0);
5292 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
5293 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5294 jump[0] = JUMP(SLJIT_JUMP);
5295 JUMPHERE(jump[1]);
5296
5297 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
5298 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
5299 {
5300 OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
5301 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP2, 0, TMP1, 0));
5302 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
5303 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
5304 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
5305 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
5306 }
5307 else
5308 {
5309 skip_char_back(common);
5310 read_char_range(common, common->nlmin, common->nlmax, TRUE);
5311 check_newlinechar(common, common->nltype, backtracks, FALSE);
5312 }
5313 JUMPHERE(jump[0]);
5314 return cc;
5315
5316 case OP_DOLL:
5317 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
5318 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
5319 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5320
5321 if (!common->endonly)
5322 compile_char1_matchingpath(common, OP_EODN, cc, backtracks);
5323 else
5324 {
5325 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0));
5326 check_partial(common, FALSE);
5327 }
5328 return cc;
5329
5330 case OP_DOLLM:
5331 jump[1] = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
5332 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
5333 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
5334 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5335 check_partial(common, FALSE);
5336 jump[0] = JUMP(SLJIT_JUMP);
5337 JUMPHERE(jump[1]);
5338
5339 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
5340 {
5341 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
5342 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5343 if (common->mode == JIT_COMPILE)
5344 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, TMP2, 0, STR_END, 0));
5345 else
5346 {
5347 jump[1] = CMP(SLJIT_C_LESS_EQUAL, TMP2, 0, STR_END, 0);
5348 /* STR_PTR = STR_END - IN_UCHARS(1) */
5349 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
5350 check_partial(common, TRUE);
5351 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5352 JUMPHERE(jump[1]);
5353 }
5354
5355 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
5356 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
5357 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
5358 }
5359 else
5360 {
5361 peek_char(common, common->nlmax);
5362 check_newlinechar(common, common->nltype, backtracks, FALSE);
5363 }
5364 JUMPHERE(jump[0]);
5365 return cc;
5366
5367 case OP_CHAR:
5368 case OP_CHARI:
5369 length = 1;
5370 #ifdef SUPPORT_UTF
5371 if (common->utf && HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc);
5372 #endif
5373 if (common->mode == JIT_COMPILE && (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0))
5374 {
5375 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
5376 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
5377
5378 context.length = IN_UCHARS(length);
5379 context.sourcereg = -1;
5380 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
5381 context.ucharptr = 0;
5382 #endif
5383 return byte_sequence_compare(common, type == OP_CHARI, cc, &context, backtracks);
5384 }
5385
5386 detect_partial_match(common, backtracks);
5387 #ifdef SUPPORT_UTF
5388 if (common->utf)
5389 {
5390 GETCHAR(c, cc);
5391 }
5392 else
5393 #endif
5394 c = *cc;
5395
5396 if (type == OP_CHAR || !char_has_othercase(common, cc))
5397 {
5398 read_char_range(common, c, c, FALSE);
5399 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c));
5400 return cc + length;
5401 }
5402 oc = char_othercase(common, c);
5403 read_char_range(common, c < oc ? c : oc, c > oc ? c : oc, FALSE);
5404 bit = c ^ oc;
5405 if (is_powerof2(bit))
5406 {
5407 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
5408 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
5409 return cc + length;
5410 }
5411 jump[0] = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c);
5412 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, oc));
5413 JUMPHERE(jump[0]);
5414 return cc + length;
5415
5416 case OP_NOT:
5417 case OP_NOTI:
5418 detect_partial_match(common, backtracks);
5419 length = 1;
5420 #ifdef SUPPORT_UTF
5421 if (common->utf)
5422 {
5423 #ifdef COMPILE_PCRE8
5424 c = *cc;
5425 if (c < 128)
5426 {
5427 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5428 if (type == OP_NOT || !char_has_othercase(common, cc))
5429 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
5430 else
5431 {
5432 /* Since UTF8 code page is fixed, we know that c is in [a-z] or [A-Z] range. */
5433 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x20);
5434 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20));
5435 }
5436 /* Skip the variable-length character. */
5437 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5438 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
5439 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
5440 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5441 JUMPHERE(jump[0]);
5442 return cc + 1;
5443 }
5444 else
5445 #endif /* COMPILE_PCRE8 */
5446 {
5447 GETCHARLEN(c, cc, length);
5448 }
5449 }
5450 else
5451 #endif /* SUPPORT_UTF */
5452 c = *cc;
5453
5454 if (type == OP_NOT || !char_has_othercase(common, cc))
5455 {
5456 read_char_range(common, c, c, TRUE);
5457 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
5458 }
5459 else
5460 {
5461 oc = char_othercase(common, c);
5462 read_char_range(common, c < oc ? c : oc, c > oc ? c : oc, TRUE);
5463 bit = c ^ oc;
5464 if (is_powerof2(bit))
5465 {
5466 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
5467 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
5468 }
5469 else
5470 {
5471 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
5472 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, oc));
5473 }
5474 }
5475 return cc + length;
5476
5477 case OP_CLASS:
5478 case OP_NCLASS:
5479 detect_partial_match(common, backtracks);
5480
5481 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5482 bit = (common->utf && is_char7_bitset((const pcre_uint8 *)cc, type == OP_NCLASS)) ? 127 : 255;
5483 read_char_range(common, 0, bit, type == OP_NCLASS);
5484 #else
5485 read_char_range(common, 0, 255, type == OP_NCLASS);
5486 #endif
5487
5488 if (check_class_ranges(common, (const pcre_uint8 *)cc, type == OP_NCLASS, FALSE, backtracks))
5489 return cc + 32 / sizeof(pcre_uchar);
5490
5491 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5492 jump[0] = NULL;
5493 if (common->utf)
5494 {
5495 jump[0] = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, bit);
5496 if (type == OP_CLASS)
5497 {
5498 add_jump(compiler, backtracks, jump[0]);
5499 jump[0] = NULL;
5500 }
5501 }
5502 #elif !defined COMPILE_PCRE8
5503 jump[0] = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
5504 if (type == OP_CLASS)
5505 {
5506 add_jump(compiler, backtracks, jump[0]);
5507 jump[0] = NULL;
5508 }
5509 #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
5510
5511 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
5512 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
5513 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
5514 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
5515 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
5516 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
5517
5518 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
5519 if (jump[0] != NULL)
5520 JUMPHERE(jump[0]);
5521 #endif
5522
5523 return cc + 32 / sizeof(pcre_uchar);
5524
5525 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5526 case OP_XCLASS:
5527 compile_xclass_matchingpath(common, cc + LINK_SIZE, backtracks);
5528 return cc + GET(cc, 0) - 1;
5529 #endif
5530
5531 case OP_REVERSE:
5532 length = GET(cc, 0);
5533 if (length == 0)
5534 return cc + LINK_SIZE;
5535 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5536 #ifdef SUPPORT_UTF
5537 if (common->utf)
5538 {
5539 OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
5540 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, length);
5541 label = LABEL();
5542 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP3, 0));
5543 skip_char_back(common);
5544 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
5545 JUMPTO(SLJIT_C_NOT_ZERO, label);
5546 }
5547 else
5548 #endif
5549 {
5550 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
5551 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
5552 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, TMP1, 0));
5553 }
5554 check_start_used_ptr(common);
5555 return cc + LINK_SIZE;
5556 }
5557 SLJIT_ASSERT_STOP();
5558 return cc;
5559 }
5560
5561 static SLJIT_INLINE pcre_uchar *compile_charn_matchingpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, jump_list **backtracks)
5562 {
5563 /* This function consumes at least one input character. */
5564 /* To decrease the number of length checks, we try to concatenate the fixed length character sequences. */
5565 DEFINE_COMPILER;
5566 pcre_uchar *ccbegin = cc;
5567 compare_context context;
5568 int size;
5569
5570 context.length = 0;
5571 do
5572 {
5573 if (cc >= ccend)
5574 break;
5575
5576 if (*cc == OP_CHAR)
5577 {
5578 size = 1;
5579 #ifdef SUPPORT_UTF
5580 if (common->utf && HAS_EXTRALEN(cc[1]))
5581 size += GET_EXTRALEN(cc[1]);
5582 #endif
5583 }
5584 else if (*cc == OP_CHARI)
5585 {
5586 size = 1;
5587 #ifdef SUPPORT_UTF
5588 if (common->utf)
5589 {
5590 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
5591 size = 0;
5592 else if (HAS_EXTRALEN(cc[1]))
5593 size += GET_EXTRALEN(cc[1]);
5594 }
5595 else
5596 #endif
5597 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
5598 size = 0;
5599 }
5600 else
5601 size = 0;
5602
5603 cc += 1 + size;
5604 context.length += IN_UCHARS(size);
5605 }
5606 while (size > 0 && context.length <= 128);
5607
5608 cc = ccbegin;
5609 if (context.length > 0)
5610 {
5611 /* We have a fixed-length byte sequence. */
5612 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, context.length);
5613 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
5614
5615 context.sourcereg = -1;
5616 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
5617 context.ucharptr = 0;
5618 #endif
5619 do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, backtracks); while (context.length > 0);
5620 return cc;
5621 }
5622
5623 /* A non-fixed length character will be checked if length == 0. */
5624 return compile_char1_matchingpath(common, *cc, cc + 1, backtracks);
5625 }
5626
5627 /* Forward definitions. */
5628 static void compile_matchingpath(compiler_common *, pcre_uchar *, pcre_uchar *, backtrack_common *);
5629 static void compile_backtrackingpath(compiler_common *, struct backtrack_common *);
5630
5631 #define PUSH_BACKTRACK(size, ccstart, error) \
5632 do \
5633 { \
5634 backtrack = sljit_alloc_memory(compiler, (size)); \
5635 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
5636 return error; \
5637 memset(backtrack, 0, size); \
5638 backtrack->prev = parent->top; \
5639 backtrack->cc = (ccstart); \
5640 parent->top = backtrack; \
5641 } \
5642 while (0)
5643
5644 #define PUSH_BACKTRACK_NOVALUE(size, ccstart) \
5645 do \
5646 { \
5647 backtrack = sljit_alloc_memory(compiler, (size)); \
5648 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
5649 return; \
5650 memset(backtrack, 0, size); \
5651 backtrack->prev = parent->top; \
5652 backtrack->cc = (ccstart); \
5653 parent->top = backtrack; \
5654 } \
5655 while (0)
5656
5657 #define BACKTRACK_AS(type) ((type *)backtrack)
5658
5659 static void compile_dnref_search(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
5660 {
5661 /* The OVECTOR offset goes to TMP2. */
5662 DEFINE_COMPILER;
5663 int count = GET2(cc, 1 + IMM2_SIZE);
5664 pcre_uchar *slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
5665 unsigned int offset;
5666 jump_list *found = NULL;
5667
5668 SLJIT_ASSERT(*cc == OP_DNREF || *cc == OP_DNREFI);
5669
5670 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
5671
5672 count--;
5673 while (count-- > 0)
5674 {
5675 offset = GET2(slot, 0) << 1;
5676 GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
5677 add_jump(compiler, &found, CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0));
5678 slot += common->name_entry_size;
5679 }
5680
5681 offset = GET2(slot, 0) << 1;
5682 GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
5683 if (backtracks != NULL && !common->jscript_compat)
5684 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0));
5685
5686 set_jumps(found, LABEL());
5687 }
5688
5689 static void compile_ref_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks, BOOL withchecks, BOOL emptyfail)
5690 {
5691 DEFINE_COMPILER;
5692 BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
5693 int offset = 0;
5694 struct sljit_jump *jump = NULL;
5695 struct sljit_jump *partial;
5696 struct sljit_jump *nopartial;
5697
5698 if (ref)
5699 {
5700 offset = GET2(cc, 1) << 1;
5701 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
5702 /* OVECTOR(1) contains the "string begin - 1" constant. */
5703 if (withchecks && !common->jscript_compat)
5704 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
5705 }
5706 else
5707 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
5708
5709 #if defined SUPPORT_UTF && defined SUPPORT_UCP
5710 if (common->utf && *cc == OP_REFI)
5711 {
5712 SLJIT_ASSERT(TMP1 == SLJIT_SCRATCH_REG1 && STACK_TOP == SLJIT_SCRATCH_REG2 && TMP2 == SLJIT_SCRATCH_REG3);
5713 if (ref)
5714 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
5715 else
5716 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
5717
5718 if (withchecks)
5719 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, TMP2, 0);
5720
5721 /* Needed to save important temporary registers. */
5722 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
5723 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, ARGUMENTS, 0);
5724 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, uchar_ptr), STR_PTR, 0);
5725 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_utf_caselesscmp));
5726 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
5727 if (common->mode == JIT_COMPILE)
5728 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1));
5729 else
5730 {
5731 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
5732 nopartial = CMP(SLJIT_C_NOT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
5733 check_partial(common, FALSE);
5734 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5735 JUMPHERE(nopartial);
5736 }
5737 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
5738 }
5739 else
5740 #endif /* SUPPORT_UTF && SUPPORT_UCP */
5741 {
5742 if (ref)
5743 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP1, 0);
5744 else
5745 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
5746
5747 if (withchecks)
5748 jump = JUMP(SLJIT_C_ZERO);
5749
5750 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
5751 partial = CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0);
5752 if (common->mode == JIT_COMPILE)
5753 add_jump(compiler, backtracks, partial);
5754
5755 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
5756 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5757
5758 if (common->mode != JIT_COMPILE)
5759 {
5760 nopartial = JUMP(SLJIT_JUMP);
5761 JUMPHERE(partial);
5762 /* TMP2 -= STR_END - STR_PTR */
5763 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, STR_PTR, 0);
5764 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, STR_END, 0);
5765 partial = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0);
5766 OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
5767 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
5768 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5769 JUMPHERE(partial);
5770 check_partial(common, FALSE);
5771 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5772 JUMPHERE(nopartial);
5773 }
5774 }
5775
5776 if (jump != NULL)
5777 {
5778 if (emptyfail)
5779 add_jump(compiler, backtracks, jump);
5780 else
5781 JUMPHERE(jump);
5782 }
5783 }
5784
5785 static SLJIT_INLINE pcre_uchar *compile_ref_iterator_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
5786 {
5787 DEFINE_COMPILER;
5788 BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
5789 backtrack_common *backtrack;
5790 pcre_uchar type;
5791 int offset = 0;
5792 struct sljit_label *label;
5793 struct sljit_jump *zerolength;
5794 struct sljit_jump *jump = NULL;
5795 pcre_uchar *ccbegin = cc;
5796 int min = 0, max = 0;
5797 BOOL minimize;
5798
5799 PUSH_BACKTRACK(sizeof(iterator_backtrack), cc, NULL);
5800
5801 if (ref)
5802 offset = GET2(cc, 1) << 1;
5803 else
5804 cc += IMM2_SIZE;
5805 type = cc[1 + IMM2_SIZE];
5806
5807 SLJIT_COMPILE_ASSERT((OP_CRSTAR & 0x1) == 0, crstar_opcode_must_be_even);
5808 minimize = (type & 0x1) != 0;
5809 switch(type)
5810 {
5811 case OP_CRSTAR:
5812 case OP_CRMINSTAR:
5813 min = 0;
5814 max = 0;
5815 cc += 1 + IMM2_SIZE + 1;
5816 break;
5817 case OP_CRPLUS:
5818 case OP_CRMINPLUS:
5819 min = 1;
5820 max = 0;
5821 cc += 1 + IMM2_SIZE + 1;
5822 break;
5823 case OP_CRQUERY:
5824 case OP_CRMINQUERY:
5825 min = 0;
5826 max = 1;
5827 cc += 1 + IMM2_SIZE + 1;
5828 break;
5829 case OP_CRRANGE:
5830 case OP_CRMINRANGE:
5831 min = GET2(cc, 1 + IMM2_SIZE + 1);
5832 max = GET2(cc, 1 + IMM2_SIZE + 1 + IMM2_SIZE);
5833 cc += 1 + IMM2_SIZE + 1 + 2 * IMM2_SIZE;
5834 break;
5835 default:
5836 SLJIT_ASSERT_STOP();
5837 break;
5838 }
5839
5840 if (!minimize)
5841 {
5842 if (min == 0)
5843 {
5844 allocate_stack(common, 2);
5845 if (ref)
5846 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
5847 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5848 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
5849 /* Temporary release of STR_PTR. */
5850 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5851 /* Handles both invalid and empty cases. Since the minimum repeat,
5852 is zero the invalid case is basically the same as an empty case. */
5853 if (ref)
5854 zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
5855 else
5856 {
5857 compile_dnref_search(common, ccbegin, NULL);
5858 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
5859 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, TMP2, 0);
5860 zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
5861 }
5862 /* Restore if not zero length. */
5863 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5864 }
5865 else
5866 {
5867 allocate_stack(common, 1);
5868 if (ref)
5869 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
5870 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5871 if (ref)
5872 {
5873 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
5874 zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
5875 }
5876 else
5877 {
5878 compile_dnref_search(common, ccbegin, &backtrack->topbacktracks);
5879 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
5880 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, TMP2, 0);
5881 zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
5882 }
5883 }
5884
5885 if (min > 1 || max > 1)
5886 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 0);
5887
5888 label = LABEL();
5889 if (!ref)
5890 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);
5891 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, FALSE, FALSE);
5892
5893 if (min > 1 || max > 1)
5894 {
5895 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
5896 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
5897 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0);
5898 if (min > 1)
5899 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, label);
5900 if (max > 1)
5901 {
5902 jump = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max);
5903 allocate_stack(common, 1);
5904 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5905 JUMPTO(SLJIT_JUMP, label);
5906 JUMPHERE(jump);
5907 }
5908 }
5909
5910 if (max == 0)
5911 {
5912 /* Includes min > 1 case as well. */
5913 allocate_stack(common, 1);
5914 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5915 JUMPTO(SLJIT_JUMP, label);
5916 }
5917
5918 JUMPHERE(zerolength);
5919 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
5920
5921 count_match(common);
5922 return cc;
5923 }
5924
5925 allocate_stack(common, ref ? 2 : 3);
5926 if (ref)
5927 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
5928 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5929 if (type != OP_CRMINSTAR)
5930 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
5931
5932 if (min == 0)
5933 {
5934 /* Handles both invalid and empty cases. Since the minimum repeat,
5935 is zero the invalid case is basically the same as an empty case. */
5936 if (ref)
5937 zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
5938 else
5939 {
5940 compile_dnref_search(common, ccbegin, NULL);
5941 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
5942 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
5943 zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
5944 }
5945 /* Length is non-zero, we can match real repeats. */
5946 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5947 jump = JUMP(SLJIT_JUMP);
5948 }
5949 else
5950 {
5951 if (ref)
5952 {
5953 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
5954 zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
5955 }
5956 else
5957 {
5958 compile_dnref_search(common, ccbegin, &backtrack->topbacktracks);
5959 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
5960 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
5961 zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
5962 }
5963 }
5964
5965 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
5966 if (max > 0)
5967 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max));
5968
5969 if (!ref)
5970 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
5971 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, TRUE, TRUE);
5972 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5973
5974 if (min > 1)
5975 {
5976 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5977 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
5978 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5979 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, BACKTRACK_AS(iterator_backtrack)->matchingpath);
5980 }
5981 else if (max > 0)
5982 OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
5983
5984 if (jump != NULL)
5985 JUMPHERE(jump);
5986 JUMPHERE(zerolength);
5987
5988 count_match(common);
5989 return cc;
5990 }
5991
5992 static SLJIT_INLINE pcre_uchar *compile_recurse_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
5993 {
5994 DEFINE_COMPILER;
5995 backtrack_common *backtrack;
5996 recurse_entry *entry = common->entries;
5997 recurse_entry *prev = NULL;
5998 sljit_sw start = GET(cc, 1);
5999 pcre_uchar *start_cc;
6000 BOOL needs_control_head;
6001
6002 PUSH_BACKTRACK(sizeof(recurse_backtrack), cc, NULL);
6003
6004 /* Inlining simple patterns. */
6005 if (get_framesize(common, common->start + start, NULL, TRUE, &needs_control_head) == no_stack)
6006 {
6007 start_cc = common->start + start;
6008 compile_matchingpath(common, next_opcode(common, start_cc), bracketend(start_cc) - (1 + LINK_SIZE), backtrack);
6009 BACKTRACK_AS(recurse_backtrack)->inlined_pattern = TRUE;
6010 return cc + 1 + LINK_SIZE;
6011 }
6012
6013 while (entry != NULL)
6014 {
6015 if (entry->start == start)
6016 break;
6017 prev = entry;
6018 entry = entry->next;
6019 }
6020
6021 if (entry == NULL)
6022 {
6023 entry = sljit_alloc_memory(compiler, sizeof(recurse_entry));
6024 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6025 return NULL;
6026 entry->next = NULL;
6027 entry->entry = NULL;
6028 entry->calls = NULL;
6029 entry->start = start;
6030
6031 if (prev != NULL)
6032 prev->next = entry;
6033 else
6034 common->entries = entry;
6035 }
6036
6037 if (common->has_set_som && common->mark_ptr != 0)
6038 {
6039 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
6040 allocate_stack(common, 2);
6041 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
6042 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
6043 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
6044 }
6045 else if (common->has_set_som || common->mark_ptr != 0)
6046 {
6047 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->has_set_som ? (int)(OVECTOR(0)) : common->mark_ptr);
6048 allocate_stack(common, 1);
6049 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
6050 }
6051
6052 if (entry->entry == NULL)
6053 add_jump(compiler, &entry->calls, JUMP(SLJIT_FAST_CALL));
6054 else
6055 JUMPTO(SLJIT_FAST_CALL, entry->entry);
6056 /* Leave if the match is failed. */
6057 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0));
6058 return cc + 1 + LINK_SIZE;
6059 }
6060
6061 static int SLJIT_CALL do_callout(struct jit_arguments* arguments, PUBL(callout_block) *callout_block, pcre_uchar **jit_ovector)
6062 {
6063 const pcre_uchar *begin = arguments->begin;
6064 int *offset_vector = arguments->offsets;
6065 int offset_count = arguments->offset_count;
6066 int i;
6067
6068 if (PUBL(callout) == NULL)
6069 return 0;
6070
6071 callout_block->version = 2;
6072 callout_block->callout_data = arguments->callout_data;
6073
6074 /* Offsets in subject. */
6075 callout_block->subject_length = arguments->end - arguments->begin;
6076 callout_block->start_match = (pcre_uchar*)callout_block->subject - arguments->begin;
6077 callout_block->current_position = (pcre_uchar*)callout_block->offset_vector - arguments->begin;
6078 #if defined COMPILE_PCRE8
6079 callout_block->subject = (PCRE_SPTR)begin;
6080 #elif defined COMPILE_PCRE16
6081 callout_block->subject = (PCRE_SPTR16)begin;
6082 #elif defined COMPILE_PCRE32
6083 callout_block->subject = (PCRE_SPTR32)begin;
6084 #endif
6085
6086 /* Convert and copy the JIT offset vector to the offset_vector array. */
6087 callout_block->capture_top = 0;
6088 callout_block->offset_vector = offset_vector;
6089 for (i = 2; i < offset_count; i += 2)
6090 {
6091 offset_vector[i] = jit_ovector[i] - begin;
6092 offset_vector[i + 1] = jit_ovector[i + 1] - begin;
6093 if (jit_ovector[i] >= begin)
6094 callout_block->capture_top = i;
6095 }
6096
6097 callout_block->capture_top = (callout_block->capture_top >> 1) + 1;
6098 if (offset_count > 0)
6099 offset_vector[0] = -1;
6100 if (offset_count > 1)
6101 offset_vector[1] = -1;
6102 return (*PUBL(callout))(callout_block);
6103 }
6104
6105 /* Aligning to 8 byte. */
6106 #define CALLOUT_ARG_SIZE \
6107 (((int)sizeof(PUBL(callout_block)) + 7) & ~7)
6108
6109 #define CALLOUT_ARG_OFFSET(arg) \
6110 (-CALLOUT_ARG_SIZE + SLJIT_OFFSETOF(PUBL(callout_block), arg))
6111
6112 static SLJIT_INLINE pcre_uchar *compile_callout_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6113 {
6114 DEFINE_COMPILER;
6115 backtrack_common *backtrack;
6116
6117 PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
6118
6119 allocate_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw));
6120
6121 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
6122 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6123 SLJIT_ASSERT(common->capture_last_ptr != 0);
6124 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_number), SLJIT_IMM, cc[1]);
6125 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_last), TMP2, 0);
6126
6127 /* These pointer sized fields temporarly stores internal variables. */
6128 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
6129 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(offset_vector), STR_PTR, 0);
6130 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(subject), TMP2, 0);
6131
6132 if (common->mark_ptr != 0)
6133 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr));
6134 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(pattern_position), SLJIT_IMM, GET(cc, 2));
6135 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(next_item_length), SLJIT_IMM, GET(cc, 2 + LINK_SIZE));
6136 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(mark), (common->mark_ptr != 0) ? TMP2 : SLJIT_IMM, 0);
6137
6138 /* Needed to save important temporary registers. */
6139 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
6140 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG2, 0, STACK_TOP, 0, SLJIT_IMM, CALLOUT_ARG_SIZE);
6141 GET_LOCAL_BASE(SLJIT_SCRATCH_REG3, 0, OVECTOR_START);
6142 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_callout));
6143 OP1(SLJIT_MOV_SI, SLJIT_RETURN_REG, 0, SLJIT_RETURN_REG, 0);
6144 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
6145 free_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw));
6146
6147 /* Check return value. */
6148 OP2(SLJIT_SUB | SLJIT_SET_S, SLJIT_UNUSED, 0, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
6149 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_C_SIG_GREATER));
6150 if (common->forced_quit_label == NULL)
6151 add_jump(compiler, &common->forced_quit, JUMP(SLJIT_C_SIG_LESS));
6152 else
6153 JUMPTO(SLJIT_C_SIG_LESS, common->forced_quit_label);
6154 return cc + 2 + 2 * LINK_SIZE;
6155 }
6156
6157 #undef CALLOUT_ARG_SIZE
6158 #undef CALLOUT_ARG_OFFSET
6159
6160 static pcre_uchar *compile_assert_matchingpath(compiler_common *common, pcre_uchar *cc, assert_backtrack *backtrack, BOOL conditional)
6161 {
6162 DEFINE_COMPILER;
6163 int framesize;
6164 int extrasize;
6165 BOOL needs_control_head;
6166 int private_data_ptr;
6167 backtrack_common altbacktrack;
6168 pcre_uchar *ccbegin;
6169 pcre_uchar opcode;
6170 pcre_uchar bra = OP_BRA;
6171 jump_list *tmp = NULL;
6172 jump_list **target = (conditional) ? &backtrack->condfailed : &backtrack->common.topbacktracks;
6173 jump_list **found;
6174 /* Saving previous accept variables. */
6175 BOOL save_local_exit = common->local_exit;
6176 BOOL save_positive_assert = common->positive_assert;
6177 then_trap_backtrack *save_then_trap = common->then_trap;
6178 struct sljit_label *save_quit_label = common->quit_label;
6179 struct sljit_label *save_accept_label = common->accept_label;
6180 jump_list *save_quit = common->quit;
6181 jump_list *save_positive_assert_quit = common->positive_assert_quit;
6182 jump_list *save_accept = common->accept;
6183 struct sljit_jump *jump;
6184 struct sljit_jump *brajump = NULL;
6185
6186 /* Assert captures then. */
6187 common->then_trap = NULL;
6188
6189 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
6190 {
6191 SLJIT_ASSERT(!conditional);
6192 bra = *cc;
6193 cc++;
6194 }
6195 private_data_ptr = PRIVATE_DATA(cc);
6196 SLJIT_ASSERT(private_data_ptr != 0);
6197 framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
6198 backtrack->framesize = framesize;
6199 backtrack->private_data_ptr = private_data_ptr;
6200 opcode = *cc;
6201 SLJIT_ASSERT(opcode >= OP_ASSERT && opcode <= OP_ASSERTBACK_NOT);
6202 found = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) ? &tmp : target;
6203 ccbegin = cc;
6204 cc += GET(cc, 1);
6205
6206 if (bra == OP_BRAMINZERO)
6207 {
6208 /* This is a braminzero backtrack path. */
6209 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6210 free_stack(common, 1);
6211 brajump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
6212 }
6213
6214 if (framesize < 0)
6215 {
6216 extrasize = needs_control_head ? 2 : 1;
6217 if (framesize == no_frame)
6218 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);
6219 allocate_stack(common, extrasize);
6220 if (needs_control_head)
6221 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
6222 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6223 if (needs_control_head)
6224 {
6225 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_IMM, 0);
6226 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
6227 }
6228 }
6229 else
6230 {
6231 extrasize = needs_control_head ? 3 : 2;
6232 allocate_stack(common, framesize + extrasize);
6233 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6234 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw));
6235 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0);
6236 if (needs_control_head)
6237 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
6238 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6239 if (needs_control_head)
6240 {
6241 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
6242 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
6243 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_IMM, 0);
6244 }
6245 else
6246 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
6247 init_frame(common, ccbegin, NULL, framesize + extrasize - 1, extrasize, FALSE);
6248 }
6249
6250 memset(&altbacktrack, 0, sizeof(backtrack_common));
6251 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6252 {
6253 /* Negative assert is stronger than positive assert. */
6254 common->local_exit = TRUE;
6255 common->quit_label = NULL;
6256 common->quit = NULL;
6257 common->positive_assert = FALSE;
6258 }
6259 else
6260 common->positive_assert = TRUE;
6261 common->positive_assert_quit = NULL;
6262
6263 while (1)
6264 {
6265 common->accept_label = NULL;
6266 common->accept = NULL;
6267 altbacktrack.top = NULL;
6268 altbacktrack.topbacktracks = NULL;
6269
6270 if (*ccbegin == OP_ALT)
6271 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6272
6273 altbacktrack.cc = ccbegin;
6274 compile_matchingpath(common, ccbegin + 1 + LINK_SIZE, cc, &altbacktrack);
6275 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6276 {
6277 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6278 {
6279 common->local_exit = save_local_exit;
6280 common->quit_label = save_quit_label;
6281 common->quit = save_quit;
6282 }
6283 common->positive_assert = save_positive_assert;
6284 common->then_trap = save_then_trap;
6285 common->accept_label = save_accept_label;
6286 common->positive_assert_quit = save_positive_assert_quit;
6287 common->accept = save_accept;
6288 return NULL;
6289 }
6290 common->accept_label = LABEL();
6291 if (common->accept != NULL)
6292 set_jumps(common->accept, common->accept_label);
6293
6294 /* Reset stack. */
6295 if (framesize < 0)
6296 {
6297 if (framesize == no_frame)
6298 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6299 else
6300 free_stack(common, extrasize);
6301 if (needs_control_head)
6302 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), 0);
6303 }
6304 else
6305 {
6306 if ((opcode != OP_ASSERT_NOT && opcode != OP_ASSERTBACK_NOT) || conditional)
6307 {
6308 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
6309 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
6310 if (needs_control_head)
6311 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), 0);
6312 }
6313 else
6314 {
6315 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6316 if (needs_control_head)
6317 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), (framesize + 1) * sizeof(sljit_sw));
6318 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6319 }
6320 }
6321
6322 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6323 {
6324 /* We know that STR_PTR was stored on the top of the stack. */
6325 if (conditional)
6326 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), needs_control_head ? sizeof(sljit_sw) : 0);
6327 else if (bra == OP_BRAZERO)
6328 {
6329 if (framesize < 0)
6330 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 1) * sizeof(sljit_sw));
6331 else
6332 {
6333 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
6334 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (framesize + extrasize - 1) * sizeof(sljit_sw));
6335 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
6336 }
6337 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
6338 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6339 }
6340 else if (framesize >= 0)
6341 {
6342 /* For OP_BRA and OP_BRAMINZERO. */
6343 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
6344 }
6345 }
6346 add_jump(compiler, found, JUMP(SLJIT_JUMP));
6347
6348 compile_backtrackingpath(common, altbacktrack.top);
6349 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6350 {
6351 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6352 {
6353 common->local_exit = save_local_exit;
6354 common->quit_label = save_quit_label;
6355 common->quit = save_quit;
6356 }
6357 common->positive_assert = save_positive_assert;
6358 common->then_trap = save_then_trap;
6359 common->accept_label = save_accept_label;
6360 common->positive_assert_quit = save_positive_assert_quit;
6361 common->accept = save_accept;
6362 return NULL;
6363 }
6364 set_jumps(altbacktrack.topbacktracks, LABEL());
6365
6366 if (*cc != OP_ALT)
6367 break;
6368
6369 ccbegin = cc;
6370 cc += GET(cc, 1);
6371 }
6372
6373 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6374 {
6375 SLJIT_ASSERT(common->positive_assert_quit == NULL);
6376 /* Makes the check less complicated below. */
6377 common->positive_assert_quit = common->quit;
6378 }
6379
6380 /* None of them matched. */
6381 if (common->positive_assert_quit != NULL)
6382 {
6383 jump = JUMP(SLJIT_JUMP);
6384 set_jumps(common->positive_assert_quit, LABEL());
6385 SLJIT_ASSERT(framesize != no_stack);
6386 if (framesize < 0)
6387 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, extrasize * sizeof(sljit_sw));
6388 else
6389 {
6390 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6391 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6392 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw));
6393 }
6394 JUMPHERE(jump);
6395 }
6396
6397 if (needs_control_head)
6398 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(1));
6399
6400 if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK)
6401 {
6402 /* Assert is failed. */
6403 if (conditional || bra == OP_BRAZERO)
6404 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6405
6406 if (framesize < 0)
6407 {
6408 /* The topmost item should be 0. */
6409 if (bra == OP_BRAZERO)
6410 {
6411 if (extrasize == 2)
6412 free_stack(common, 1);
6413 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6414 }
6415 else
6416 free_stack(common, extrasize);
6417 }
6418 else
6419 {
6420 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
6421 /* The topmost item should be 0. */
6422 if (bra == OP_BRAZERO)
6423 {
6424 free_stack(common, framesize + extrasize - 1);
6425 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6426 }
6427 else
6428 free_stack(common, framesize + extrasize);
6429 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
6430 }
6431 jump = JUMP(SLJIT_JUMP);
6432 if (bra != OP_BRAZERO)
6433 add_jump(compiler, target, jump);
6434
6435 /* Assert is successful. */
6436 set_jumps(tmp, LABEL());
6437 if (framesize < 0)
6438 {
6439 /* We know that STR_PTR was stored on the top of the stack. */
6440 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 1) * sizeof(sljit_sw));
6441 /* Keep the STR_PTR on the top of the stack. */
6442 if (bra == OP_BRAZERO)
6443 {
6444 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
6445 if (extrasize == 2)
6446 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6447 }
6448 else if (bra == OP_BRAMINZERO)
6449 {
6450 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
6451 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6452 }
6453 }
6454 else
6455 {
6456 if (bra == OP_BRA)
6457 {
6458 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
6459 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
6460 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 2) * sizeof(sljit_sw));
6461 }
6462 else
6463 {
6464 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
6465 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 2) * sizeof(sljit_sw));
6466 if (extrasize == 2)
6467 {
6468 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6469 if (bra == OP_BRAMINZERO)
6470 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6471 }
6472 else
6473 {
6474 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
6475 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), bra == OP_BRAZERO ? STR_PTR : SLJIT_IMM, 0);
6476 }
6477 }
6478 }
6479
6480 if (bra == OP_BRAZERO)
6481 {
6482 backtrack->matchingpath = LABEL();
6483 SET_LABEL(jump, backtrack->matchingpath);
6484 }
6485 else if (bra == OP_BRAMINZERO)
6486 {
6487 JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
6488 JUMPHERE(brajump);
6489 if (framesize >= 0)
6490 {
6491 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6492 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6493 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
6494 }
6495 set_jumps(backtrack->common.topbacktracks, LABEL());
6496 }
6497 }
6498 else
6499 {
6500 /* AssertNot is successful. */
6501 if (framesize < 0)
6502 {
6503 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6504 if (bra != OP_BRA)
6505 {
6506 if (extrasize == 2)
6507 free_stack(common, 1);
6508 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6509 }
6510 else
6511 free_stack(common, extrasize);
6512 }
6513 else
6514 {
6515 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6516 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
6517 /* The topmost item should be 0. */
6518 if (bra != OP_BRA)
6519 {
6520 free_stack(common, framesize + extrasize - 1);
6521 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6522 }
6523 else
6524 free_stack(common, framesize + extrasize);
6525 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
6526 }
6527
6528 if (bra == OP_BRAZERO)
6529 backtrack->matchingpath = LABEL();
6530 else if (bra == OP_BRAMINZERO)
6531 {
6532 JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
6533 JUMPHERE(brajump);
6534 }
6535
6536 if (bra != OP_BRA)
6537 {
6538 SLJIT_ASSERT(found == &backtrack->common.topbacktracks);
6539 set_jumps(backtrack->common.topbacktracks, LABEL());
6540 backtrack->common.topbacktracks = NULL;
6541 }
6542 }
6543
6544 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6545 {
6546 common->local_exit = save_local_exit;
6547 common->quit_label = save_quit_label;
6548 common->quit = save_quit;
6549 }
6550 common->positive_assert = save_positive_assert;
6551 common->then_trap = save_then_trap;
6552 common->accept_label = save_accept_label;
6553 common->positive_assert_quit = save_positive_assert_quit;
6554 common->accept = save_accept;
6555 return cc + 1 + LINK_SIZE;
6556 }
6557
6558 static SLJIT_INLINE void match_once_common(compiler_common *common, pcre_uchar ket, int framesize, int private_data_ptr, BOOL has_alternatives, BOOL needs_control_head)
6559 {
6560 DEFINE_COMPILER;
6561 int stacksize;
6562
6563 if (framesize < 0)
6564 {
6565 if (framesize == no_frame)
6566 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6567 else
6568 {
6569 stacksize = needs_control_head ? 1 : 0;
6570 if (ket != OP_KET || has_alternatives)
6571 stacksize++;
6572 free_stack(common, stacksize);
6573 }
6574
6575 if (needs_control_head)
6576 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), (ket != OP_KET || has_alternatives) ? sizeof(sljit_sw) : 0);
6577
6578 /* TMP2 which is set here used by OP_KETRMAX below. */
6579 if (ket == OP_KETRMAX)
6580 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), 0);
6581 else if (ket == OP_KETRMIN)
6582 {
6583 /* Move the STR_PTR to the private_data_ptr. */
6584 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), 0);
6585 }
6586 }
6587 else
6588 {
6589 stacksize = (ket != OP_KET || has_alternatives) ? 2 : 1;
6590 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + stacksize) * sizeof(sljit_sw));
6591 if (needs_control_head)
6592 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), 0);
6593
6594 if (ket == OP_KETRMAX)
6595 {
6596 /* TMP2 which is set here used by OP_KETRMAX below. */
6597 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6598 }
6599 }
6600 if (needs_control_head)
6601 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, TMP1, 0);
6602 }
6603
6604 static SLJIT_INLINE int match_capture_common(compiler_common *common, int stacksize, int offset, int private_data_ptr)
6605 {
6606 DEFINE_COMPILER;
6607
6608 if (common->capture_last_ptr != 0)
6609 {
6610 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
6611 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
6612 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
6613 stacksize++;
6614 }
6615 if (common->optimized_cbracket[offset >> 1] == 0)
6616 {
6617 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
6618 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
6619 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
6620 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6621 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
6622 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
6623 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
6624 stacksize += 2;
6625 }
6626 return stacksize;
6627 }
6628
6629 /*
6630 Handling bracketed expressions is probably the most complex part.
6631
6632 Stack layout naming characters:
6633 S - Push the current STR_PTR
6634 0 - Push a 0 (NULL)
6635 A - Push the current STR_PTR. Needed for restoring the STR_PTR
6636 before the next alternative. Not pushed if there are no alternatives.
6637 M - Any values pushed by the current alternative. Can be empty, or anything.
6638 C - Push the previous OVECTOR(i), OVECTOR(i+1) and OVECTOR_PRIV(i) to the stack.
6639 L - Push the previous local (pointed by localptr) to the stack
6640 () - opional values stored on the stack
6641 ()* - optonal, can be stored multiple times
6642
6643 The following list shows the regular expression templates, their PCRE byte codes
6644 and stack layout supported by pcre-sljit.
6645
6646 (?:) OP_BRA | OP_KET A M
6647 () OP_CBRA | OP_KET C M
6648 (?:)+ OP_BRA | OP_KETRMAX 0 A M S ( A M S )*
6649 OP_SBRA | OP_KETRMAX 0 L M S ( L M S )*
6650 (?:)+? OP_BRA | OP_KETRMIN 0 A M S ( A M S )*
6651 OP_SBRA | OP_KETRMIN 0 L M S ( L M S )*
6652 ()+ OP_CBRA | OP_KETRMAX 0 C M S ( C M S )*
6653 OP_SCBRA | OP_KETRMAX 0 C M S ( C M S )*
6654 ()+? OP_CBRA | OP_KETRMIN 0 C M S ( C M S )*
6655 OP_SCBRA | OP_KETRMIN 0 C M S ( C M S )*
6656 (?:)? OP_BRAZERO | OP_BRA | OP_KET S ( A M 0 )
6657 (?:)?? OP_BRAMINZERO | OP_BRA | OP_KET S ( A M 0 )
6658 ()? OP_BRAZERO | OP_CBRA | OP_KET S ( C M 0 )
6659 ()?? OP_BRAMINZERO | OP_CBRA | OP_KET S ( C M 0 )
6660 (?:)* OP_BRAZERO | OP_BRA | OP_KETRMAX S 0 ( A M S )*
6661 OP_BRAZERO | OP_SBRA | OP_KETRMAX S 0 ( L M S )*
6662 (?:)*? OP_BRAMINZERO | OP_BRA | OP_KETRMIN S 0 ( A M S )*
6663 OP_BRAMINZERO | OP_SBRA | OP_KETRMIN S 0 ( L M S )*
6664 ()* OP_BRAZERO | OP_CBRA | OP_KETRMAX S 0 ( C M S )*
6665 OP_BRAZERO | OP_SCBRA | OP_KETRMAX S 0 ( C M S )*
6666 ()*? OP_BRAMINZERO | OP_CBRA | OP_KETRMIN S 0 ( C M S )*
6667 OP_BRAMINZERO | OP_SCBRA | OP_KETRMIN S 0 ( C M S )*
6668
6669
6670 Stack layout naming characters:
6671 A - Push the alternative index (starting from 0) on the stack.
6672 Not pushed if there is no alternatives.
6673 M - Any values pushed by the current alternative. Can be empty, or anything.
6674
6675 The next list shows the possible content of a bracket:
6676 (|) OP_*BRA | OP_ALT ... M A
6677 (?()|) OP_*COND | OP_ALT M A
6678 (?>|) OP_ONCE | OP_ALT ... [stack trace] M A
6679 (?>|) OP_ONCE_NC | OP_ALT ... [stack trace] M A
6680 Or nothing, if trace is unnecessary
6681 */
6682
6683 static pcre_uchar *compile_bracket_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6684 {
6685 DEFINE_COMPILER;
6686 backtrack_common *backtrack;
6687 pcre_uchar opcode;
6688 int private_data_ptr = 0;
6689 int offset = 0;
6690 int i, stacksize;
6691 int repeat_ptr = 0, repeat_length = 0;
6692 int repeat_type = 0, repeat_count = 0;
6693 pcre_uchar *ccbegin;
6694 pcre_uchar *matchingpath;
6695 pcre_uchar *slot;
6696 pcre_uchar bra = OP_BRA;
6697 pcre_uchar ket;
6698 assert_backtrack *assert;
6699 BOOL has_alternatives;
6700 BOOL needs_control_head = FALSE;
6701 struct sljit_jump *jump;
6702 struct sljit_jump *skip;
6703 struct sljit_label *rmax_label = NULL;
6704 struct sljit_jump *braminzero = NULL;
6705
6706 PUSH_BACKTRACK(sizeof(bracket_backtrack), cc, NULL);
6707
6708 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
6709 {
6710 bra = *cc;
6711 cc++;
6712 opcode = *cc;
6713 }
6714
6715 opcode = *cc;
6716 ccbegin = cc;
6717 matchingpath = bracketend(cc) - 1 - LINK_SIZE;
6718 ket = *matchingpath;
6719 if (ket == OP_KET && PRIVATE_DATA(matchingpath) != 0)
6720 {
6721 repeat_ptr = PRIVATE_DATA(matchingpath);
6722 repeat_length = PRIVATE_DATA(matchingpath + 1);
6723 repeat_type = PRIVATE_DATA(matchingpath + 2);
6724 repeat_count = PRIVATE_DATA(matchingpath + 3);
6725 SLJIT_ASSERT(repeat_length != 0 && repeat_type != 0 && repeat_count != 0);
6726 if (repeat_type == OP_UPTO)
6727 ket = OP_KETRMAX;
6728 if (repeat_type == OP_MINUPTO)
6729 ket = OP_KETRMIN;
6730 }
6731
6732 if ((opcode == OP_COND || opcode == OP_SCOND) && cc[1 + LINK_SIZE] == OP_DEF)
6733 {
6734 /* Drop this bracket_backtrack. */
6735 parent->top = backtrack->prev;
6736 return matchingpath + 1 + LINK_SIZE + repeat_length;
6737 }
6738
6739 matchingpath = ccbegin + 1 + LINK_SIZE;
6740 SLJIT_ASSERT(ket == OP_KET || ket == OP_KETRMAX || ket == OP_KETRMIN);
6741 SLJIT_ASSERT(!((bra == OP_BRAZERO && ket == OP_KETRMIN) || (bra == OP_BRAMINZERO && ket == OP_KETRMAX)));
6742 cc += GET(cc, 1);
6743