/[pcre]/code/trunk/pcre_jit_compile.c
ViewVC logotype

Contents of /code/trunk/pcre_jit_compile.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1427 - (show annotations)
Wed Jan 1 15:15:09 2014 UTC (5 years, 7 months ago) by zherczeg
File MIME type: text/plain
File size: 325397 byte(s)
JIT: Optimize character range read based on minimum value in UTF-8 mode. Several tests were added for checking existing character range optimizations.
1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Written by Philip Hazel
9 Copyright (c) 1997-2013 University of Cambridge
10
11 The machine code generator part (this module) was written by Zoltan Herczeg
12 Copyright (c) 2010-2013
13
14 -----------------------------------------------------------------------------
15 Redistribution and use in source and binary forms, with or without
16 modification, are permitted provided that the following conditions are met:
17
18 * Redistributions of source code must retain the above copyright notice,
19 this list of conditions and the following disclaimer.
20
21 * Redistributions in binary form must reproduce the above copyright
22 notice, this list of conditions and the following disclaimer in the
23 documentation and/or other materials provided with the distribution.
24
25 * Neither the name of the University of Cambridge nor the names of its
26 contributors may be used to endorse or promote products derived from
27 this software without specific prior written permission.
28
29 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 POSSIBILITY OF SUCH DAMAGE.
40 -----------------------------------------------------------------------------
41 */
42
43 #ifdef HAVE_CONFIG_H
44 #include "config.h"
45 #endif
46
47 #include "pcre_internal.h"
48
49 #if defined SUPPORT_JIT
50
51 /* All-in-one: Since we use the JIT compiler only from here,
52 we just include it. This way we don't need to touch the build
53 system files. */
54
55 #define SLJIT_MALLOC(size) (PUBL(malloc))(size)
56 #define SLJIT_FREE(ptr) (PUBL(free))(ptr)
57 #define SLJIT_CONFIG_AUTO 1
58 #define SLJIT_CONFIG_STATIC 1
59 #define SLJIT_VERBOSE 0
60 #define SLJIT_DEBUG 0
61
62 #include "sljit/sljitLir.c"
63
64 #if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED
65 #error Unsupported architecture
66 #endif
67
68 /* Defines for debugging purposes. */
69
70 /* 1 - Use unoptimized capturing brackets.
71 2 - Enable capture_last_ptr (includes option 1). */
72 /* #define DEBUG_FORCE_UNOPTIMIZED_CBRAS 2 */
73
74 /* 1 - Always have a control head. */
75 /* #define DEBUG_FORCE_CONTROL_HEAD 1 */
76
77 /* Allocate memory for the regex stack on the real machine stack.
78 Fast, but limited size. */
79 #define MACHINE_STACK_SIZE 32768
80
81 /* Growth rate for stack allocated by the OS. Should be the multiply
82 of page size. */
83 #define STACK_GROWTH_RATE 8192
84
85 /* Enable to check that the allocation could destroy temporaries. */
86 #if defined SLJIT_DEBUG && SLJIT_DEBUG
87 #define DESTROY_REGISTERS 1
88 #endif
89
90 /*
91 Short summary about the backtracking mechanism empolyed by the jit code generator:
92
93 The code generator follows the recursive nature of the PERL compatible regular
94 expressions. The basic blocks of regular expressions are condition checkers
95 whose execute different commands depending on the result of the condition check.
96 The relationship between the operators can be horizontal (concatenation) and
97 vertical (sub-expression) (See struct backtrack_common for more details).
98
99 'ab' - 'a' and 'b' regexps are concatenated
100 'a+' - 'a' is the sub-expression of the '+' operator
101
102 The condition checkers are boolean (true/false) checkers. Machine code is generated
103 for the checker itself and for the actions depending on the result of the checker.
104 The 'true' case is called as the matching path (expected path), and the other is called as
105 the 'backtrack' path. Branch instructions are expesive for all CPUs, so we avoid taken
106 branches on the matching path.
107
108 Greedy star operator (*) :
109 Matching path: match happens.
110 Backtrack path: match failed.
111 Non-greedy star operator (*?) :
112 Matching path: no need to perform a match.
113 Backtrack path: match is required.
114
115 The following example shows how the code generated for a capturing bracket
116 with two alternatives. Let A, B, C, D are arbirary regular expressions, and
117 we have the following regular expression:
118
119 A(B|C)D
120
121 The generated code will be the following:
122
123 A matching path
124 '(' matching path (pushing arguments to the stack)
125 B matching path
126 ')' matching path (pushing arguments to the stack)
127 D matching path
128 return with successful match
129
130 D backtrack path
131 ')' backtrack path (If we arrived from "C" jump to the backtrack of "C")
132 B backtrack path
133 C expected path
134 jump to D matching path
135 C backtrack path
136 A backtrack path
137
138 Notice, that the order of backtrack code paths are the opposite of the fast
139 code paths. In this way the topmost value on the stack is always belong
140 to the current backtrack code path. The backtrack path must check
141 whether there is a next alternative. If so, it needs to jump back to
142 the matching path eventually. Otherwise it needs to clear out its own stack
143 frame and continue the execution on the backtrack code paths.
144 */
145
146 /*
147 Saved stack frames:
148
149 Atomic blocks and asserts require reloading the values of private data
150 when the backtrack mechanism performed. Because of OP_RECURSE, the data
151 are not necessarly known in compile time, thus we need a dynamic restore
152 mechanism.
153
154 The stack frames are stored in a chain list, and have the following format:
155 ([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]
156
157 Thus we can restore the private data to a particular point in the stack.
158 */
159
160 typedef struct jit_arguments {
161 /* Pointers first. */
162 struct sljit_stack *stack;
163 const pcre_uchar *str;
164 const pcre_uchar *begin;
165 const pcre_uchar *end;
166 int *offsets;
167 pcre_uchar *uchar_ptr;
168 pcre_uchar *mark_ptr;
169 void *callout_data;
170 /* Everything else after. */
171 pcre_uint32 limit_match;
172 int real_offset_count;
173 int offset_count;
174 pcre_uint8 notbol;
175 pcre_uint8 noteol;
176 pcre_uint8 notempty;
177 pcre_uint8 notempty_atstart;
178 } jit_arguments;
179
180 typedef struct executable_functions {
181 void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES];
182 PUBL(jit_callback) callback;
183 void *userdata;
184 pcre_uint32 top_bracket;
185 pcre_uint32 limit_match;
186 sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];
187 } executable_functions;
188
189 typedef struct jump_list {
190 struct sljit_jump *jump;
191 struct jump_list *next;
192 } jump_list;
193
194 typedef struct stub_list {
195 struct sljit_jump *start;
196 struct sljit_label *quit;
197 struct stub_list *next;
198 } stub_list;
199
200 enum frame_types {
201 no_frame = -1,
202 no_stack = -2
203 };
204
205 enum control_types {
206 type_mark = 0,
207 type_then_trap = 1
208 };
209
210 typedef int (SLJIT_CALL *jit_function)(jit_arguments *args);
211
212 /* The following structure is the key data type for the recursive
213 code generator. It is allocated by compile_matchingpath, and contains
214 the arguments for compile_backtrackingpath. Must be the first member
215 of its descendants. */
216 typedef struct backtrack_common {
217 /* Concatenation stack. */
218 struct backtrack_common *prev;
219 jump_list *nextbacktracks;
220 /* Internal stack (for component operators). */
221 struct backtrack_common *top;
222 jump_list *topbacktracks;
223 /* Opcode pointer. */
224 pcre_uchar *cc;
225 } backtrack_common;
226
227 typedef struct assert_backtrack {
228 backtrack_common common;
229 jump_list *condfailed;
230 /* Less than 0 if a frame is not needed. */
231 int framesize;
232 /* Points to our private memory word on the stack. */
233 int private_data_ptr;
234 /* For iterators. */
235 struct sljit_label *matchingpath;
236 } assert_backtrack;
237
238 typedef struct bracket_backtrack {
239 backtrack_common common;
240 /* Where to coninue if an alternative is successfully matched. */
241 struct sljit_label *alternative_matchingpath;
242 /* For rmin and rmax iterators. */
243 struct sljit_label *recursive_matchingpath;
244 /* For greedy ? operator. */
245 struct sljit_label *zero_matchingpath;
246 /* Contains the branches of a failed condition. */
247 union {
248 /* Both for OP_COND, OP_SCOND. */
249 jump_list *condfailed;
250 assert_backtrack *assert;
251 /* For OP_ONCE. Less than 0 if not needed. */
252 int framesize;
253 } u;
254 /* Points to our private memory word on the stack. */
255 int private_data_ptr;
256 } bracket_backtrack;
257
258 typedef struct bracketpos_backtrack {
259 backtrack_common common;
260 /* Points to our private memory word on the stack. */
261 int private_data_ptr;
262 /* Reverting stack is needed. */
263 int framesize;
264 /* Allocated stack size. */
265 int stacksize;
266 } bracketpos_backtrack;
267
268 typedef struct braminzero_backtrack {
269 backtrack_common common;
270 struct sljit_label *matchingpath;
271 } braminzero_backtrack;
272
273 typedef struct iterator_backtrack {
274 backtrack_common common;
275 /* Next iteration. */
276 struct sljit_label *matchingpath;
277 } iterator_backtrack;
278
279 typedef struct recurse_entry {
280 struct recurse_entry *next;
281 /* Contains the function entry. */
282 struct sljit_label *entry;
283 /* Collects the calls until the function is not created. */
284 jump_list *calls;
285 /* Points to the starting opcode. */
286 sljit_sw start;
287 } recurse_entry;
288
289 typedef struct recurse_backtrack {
290 backtrack_common common;
291 BOOL inlined_pattern;
292 } recurse_backtrack;
293
294 #define OP_THEN_TRAP OP_TABLE_LENGTH
295
296 typedef struct then_trap_backtrack {
297 backtrack_common common;
298 /* If then_trap is not NULL, this structure contains the real
299 then_trap for the backtracking path. */
300 struct then_trap_backtrack *then_trap;
301 /* Points to the starting opcode. */
302 sljit_sw start;
303 /* Exit point for the then opcodes of this alternative. */
304 jump_list *quit;
305 /* Frame size of the current alternative. */
306 int framesize;
307 } then_trap_backtrack;
308
309 #define MAX_RANGE_SIZE 4
310
311 typedef struct compiler_common {
312 /* The sljit ceneric compiler. */
313 struct sljit_compiler *compiler;
314 /* First byte code. */
315 pcre_uchar *start;
316 /* Maps private data offset to each opcode. */
317 sljit_si *private_data_ptrs;
318 /* Tells whether the capturing bracket is optimized. */
319 pcre_uint8 *optimized_cbracket;
320 /* Tells whether the starting offset is a target of then. */
321 pcre_uint8 *then_offsets;
322 /* Current position where a THEN must jump. */
323 then_trap_backtrack *then_trap;
324 /* Starting offset of private data for capturing brackets. */
325 int cbra_ptr;
326 /* Output vector starting point. Must be divisible by 2. */
327 int ovector_start;
328 /* Last known position of the requested byte. */
329 int req_char_ptr;
330 /* Head of the last recursion. */
331 int recursive_head_ptr;
332 /* First inspected character for partial matching. */
333 int start_used_ptr;
334 /* Starting pointer for partial soft matches. */
335 int hit_start;
336 /* End pointer of the first line. */
337 int first_line_end;
338 /* Points to the marked string. */
339 int mark_ptr;
340 /* Recursive control verb management chain. */
341 int control_head_ptr;
342 /* Points to the last matched capture block index. */
343 int capture_last_ptr;
344 /* Points to the starting position of the current match. */
345 int start_ptr;
346
347 /* Flipped and lower case tables. */
348 const pcre_uint8 *fcc;
349 sljit_sw lcc;
350 /* Mode can be PCRE_STUDY_JIT_COMPILE and others. */
351 int mode;
352 /* \K is found in the pattern. */
353 BOOL has_set_som;
354 /* (*SKIP:arg) is found in the pattern. */
355 BOOL has_skip_arg;
356 /* (*THEN) is found in the pattern. */
357 BOOL has_then;
358 /* Needs to know the start position anytime. */
359 BOOL needs_start_ptr;
360 /* Currently in recurse or negative assert. */
361 BOOL local_exit;
362 /* Currently in a positive assert. */
363 BOOL positive_assert;
364 /* Newline control. */
365 int nltype;
366 pcre_uint32 nlmax;
367 pcre_uint32 nlmin;
368 int newline;
369 int bsr_nltype;
370 pcre_uint32 bsr_nlmax;
371 pcre_uint32 bsr_nlmin;
372 /* Dollar endonly. */
373 int endonly;
374 /* Tables. */
375 sljit_sw ctypes;
376 /* Named capturing brackets. */
377 pcre_uchar *name_table;
378 sljit_sw name_count;
379 sljit_sw name_entry_size;
380
381 /* Labels and jump lists. */
382 struct sljit_label *partialmatchlabel;
383 struct sljit_label *quit_label;
384 struct sljit_label *forced_quit_label;
385 struct sljit_label *accept_label;
386 stub_list *stubs;
387 recurse_entry *entries;
388 recurse_entry *currententry;
389 jump_list *partialmatch;
390 jump_list *quit;
391 jump_list *positive_assert_quit;
392 jump_list *forced_quit;
393 jump_list *accept;
394 jump_list *calllimit;
395 jump_list *stackalloc;
396 jump_list *revertframes;
397 jump_list *wordboundary;
398 jump_list *anynewline;
399 jump_list *hspace;
400 jump_list *vspace;
401 jump_list *casefulcmp;
402 jump_list *caselesscmp;
403 jump_list *reset_match;
404 BOOL jscript_compat;
405 #ifdef SUPPORT_UTF
406 BOOL utf;
407 #ifdef SUPPORT_UCP
408 BOOL use_ucp;
409 #endif
410 #ifdef COMPILE_PCRE8
411 jump_list *utfreadchar;
412 jump_list *utfreadchar16;
413 jump_list *utfreadtype8;
414 #endif
415 #endif /* SUPPORT_UTF */
416 #ifdef SUPPORT_UCP
417 jump_list *getucd;
418 #endif
419 } compiler_common;
420
421 /* For byte_sequence_compare. */
422
423 typedef struct compare_context {
424 int length;
425 int sourcereg;
426 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
427 int ucharptr;
428 union {
429 sljit_si asint;
430 sljit_uh asushort;
431 #if defined COMPILE_PCRE8
432 sljit_ub asbyte;
433 sljit_ub asuchars[4];
434 #elif defined COMPILE_PCRE16
435 sljit_uh asuchars[2];
436 #elif defined COMPILE_PCRE32
437 sljit_ui asuchars[1];
438 #endif
439 } c;
440 union {
441 sljit_si asint;
442 sljit_uh asushort;
443 #if defined COMPILE_PCRE8
444 sljit_ub asbyte;
445 sljit_ub asuchars[4];
446 #elif defined COMPILE_PCRE16
447 sljit_uh asuchars[2];
448 #elif defined COMPILE_PCRE32
449 sljit_ui asuchars[1];
450 #endif
451 } oc;
452 #endif
453 } compare_context;
454
455 /* Undefine sljit macros. */
456 #undef CMP
457
458 /* Used for accessing the elements of the stack. */
459 #define STACK(i) ((-(i) - 1) * (int)sizeof(sljit_sw))
460
461 #define TMP1 SLJIT_SCRATCH_REG1
462 #define TMP2 SLJIT_SCRATCH_REG3
463 #define TMP3 SLJIT_TEMPORARY_EREG2
464 #define STR_PTR SLJIT_SAVED_REG1
465 #define STR_END SLJIT_SAVED_REG2
466 #define STACK_TOP SLJIT_SCRATCH_REG2
467 #define STACK_LIMIT SLJIT_SAVED_REG3
468 #define ARGUMENTS SLJIT_SAVED_EREG1
469 #define COUNT_MATCH SLJIT_SAVED_EREG2
470 #define RETURN_ADDR SLJIT_TEMPORARY_EREG1
471
472 /* Local space layout. */
473 /* These two locals can be used by the current opcode. */
474 #define LOCALS0 (0 * sizeof(sljit_sw))
475 #define LOCALS1 (1 * sizeof(sljit_sw))
476 /* Two local variables for possessive quantifiers (char1 cannot use them). */
477 #define POSSESSIVE0 (2 * sizeof(sljit_sw))
478 #define POSSESSIVE1 (3 * sizeof(sljit_sw))
479 /* Max limit of recursions. */
480 #define LIMIT_MATCH (4 * sizeof(sljit_sw))
481 /* The output vector is stored on the stack, and contains pointers
482 to characters. The vector data is divided into two groups: the first
483 group contains the start / end character pointers, and the second is
484 the start pointers when the end of the capturing group has not yet reached. */
485 #define OVECTOR_START (common->ovector_start)
486 #define OVECTOR(i) (OVECTOR_START + (i) * (sljit_sw)sizeof(sljit_sw))
487 #define OVECTOR_PRIV(i) (common->cbra_ptr + (i) * (sljit_sw)sizeof(sljit_sw))
488 #define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start])
489
490 #if defined COMPILE_PCRE8
491 #define MOV_UCHAR SLJIT_MOV_UB
492 #define MOVU_UCHAR SLJIT_MOVU_UB
493 #elif defined COMPILE_PCRE16
494 #define MOV_UCHAR SLJIT_MOV_UH
495 #define MOVU_UCHAR SLJIT_MOVU_UH
496 #elif defined COMPILE_PCRE32
497 #define MOV_UCHAR SLJIT_MOV_UI
498 #define MOVU_UCHAR SLJIT_MOVU_UI
499 #else
500 #error Unsupported compiling mode
501 #endif
502
503 /* Shortcuts. */
504 #define DEFINE_COMPILER \
505 struct sljit_compiler *compiler = common->compiler
506 #define OP1(op, dst, dstw, src, srcw) \
507 sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw))
508 #define OP2(op, dst, dstw, src1, src1w, src2, src2w) \
509 sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w))
510 #define LABEL() \
511 sljit_emit_label(compiler)
512 #define JUMP(type) \
513 sljit_emit_jump(compiler, (type))
514 #define JUMPTO(type, label) \
515 sljit_set_label(sljit_emit_jump(compiler, (type)), (label))
516 #define JUMPHERE(jump) \
517 sljit_set_label((jump), sljit_emit_label(compiler))
518 #define SET_LABEL(jump, label) \
519 sljit_set_label((jump), (label))
520 #define CMP(type, src1, src1w, src2, src2w) \
521 sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))
522 #define CMPTO(type, src1, src1w, src2, src2w, label) \
523 sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))
524 #define OP_FLAGS(op, dst, dstw, src, srcw, type) \
525 sljit_emit_op_flags(compiler, (op), (dst), (dstw), (src), (srcw), (type))
526 #define GET_LOCAL_BASE(dst, dstw, offset) \
527 sljit_get_local_base(compiler, (dst), (dstw), (offset))
528
529 #define READ_CHAR_MAX 0x7fffffff
530
531 static pcre_uchar* bracketend(pcre_uchar* cc)
532 {
533 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
534 do cc += GET(cc, 1); while (*cc == OP_ALT);
535 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
536 cc += 1 + LINK_SIZE;
537 return cc;
538 }
539
540 static int ones_in_half_byte[16] = {
541 /* 0 */ 0, 1, 1, 2, /* 4 */ 1, 2, 2, 3,
542 /* 8 */ 1, 2, 2, 3, /* 12 */ 2, 3, 3, 4
543 };
544
545 /* Functions whose might need modification for all new supported opcodes:
546 next_opcode
547 check_opcode_types
548 set_private_data_ptrs
549 get_framesize
550 init_frame
551 get_private_data_copy_length
552 copy_private_data
553 compile_matchingpath
554 compile_backtrackingpath
555 */
556
557 static pcre_uchar *next_opcode(compiler_common *common, pcre_uchar *cc)
558 {
559 SLJIT_UNUSED_ARG(common);
560 switch(*cc)
561 {
562 case OP_SOD:
563 case OP_SOM:
564 case OP_SET_SOM:
565 case OP_NOT_WORD_BOUNDARY:
566 case OP_WORD_BOUNDARY:
567 case OP_NOT_DIGIT:
568 case OP_DIGIT:
569 case OP_NOT_WHITESPACE:
570 case OP_WHITESPACE:
571 case OP_NOT_WORDCHAR:
572 case OP_WORDCHAR:
573 case OP_ANY:
574 case OP_ALLANY:
575 case OP_NOTPROP:
576 case OP_PROP:
577 case OP_ANYNL:
578 case OP_NOT_HSPACE:
579 case OP_HSPACE:
580 case OP_NOT_VSPACE:
581 case OP_VSPACE:
582 case OP_EXTUNI:
583 case OP_EODN:
584 case OP_EOD:
585 case OP_CIRC:
586 case OP_CIRCM:
587 case OP_DOLL:
588 case OP_DOLLM:
589 case OP_CRSTAR:
590 case OP_CRMINSTAR:
591 case OP_CRPLUS:
592 case OP_CRMINPLUS:
593 case OP_CRQUERY:
594 case OP_CRMINQUERY:
595 case OP_CRRANGE:
596 case OP_CRMINRANGE:
597 case OP_CRPOSSTAR:
598 case OP_CRPOSPLUS:
599 case OP_CRPOSQUERY:
600 case OP_CRPOSRANGE:
601 case OP_CLASS:
602 case OP_NCLASS:
603 case OP_REF:
604 case OP_REFI:
605 case OP_DNREF:
606 case OP_DNREFI:
607 case OP_RECURSE:
608 case OP_CALLOUT:
609 case OP_ALT:
610 case OP_KET:
611 case OP_KETRMAX:
612 case OP_KETRMIN:
613 case OP_KETRPOS:
614 case OP_REVERSE:
615 case OP_ASSERT:
616 case OP_ASSERT_NOT:
617 case OP_ASSERTBACK:
618 case OP_ASSERTBACK_NOT:
619 case OP_ONCE:
620 case OP_ONCE_NC:
621 case OP_BRA:
622 case OP_BRAPOS:
623 case OP_CBRA:
624 case OP_CBRAPOS:
625 case OP_COND:
626 case OP_SBRA:
627 case OP_SBRAPOS:
628 case OP_SCBRA:
629 case OP_SCBRAPOS:
630 case OP_SCOND:
631 case OP_CREF:
632 case OP_DNCREF:
633 case OP_RREF:
634 case OP_DNRREF:
635 case OP_DEF:
636 case OP_BRAZERO:
637 case OP_BRAMINZERO:
638 case OP_BRAPOSZERO:
639 case OP_PRUNE:
640 case OP_SKIP:
641 case OP_THEN:
642 case OP_COMMIT:
643 case OP_FAIL:
644 case OP_ACCEPT:
645 case OP_ASSERT_ACCEPT:
646 case OP_CLOSE:
647 case OP_SKIPZERO:
648 return cc + PRIV(OP_lengths)[*cc];
649
650 case OP_CHAR:
651 case OP_CHARI:
652 case OP_NOT:
653 case OP_NOTI:
654 case OP_STAR:
655 case OP_MINSTAR:
656 case OP_PLUS:
657 case OP_MINPLUS:
658 case OP_QUERY:
659 case OP_MINQUERY:
660 case OP_UPTO:
661 case OP_MINUPTO:
662 case OP_EXACT:
663 case OP_POSSTAR:
664 case OP_POSPLUS:
665 case OP_POSQUERY:
666 case OP_POSUPTO:
667 case OP_STARI:
668 case OP_MINSTARI:
669 case OP_PLUSI:
670 case OP_MINPLUSI:
671 case OP_QUERYI:
672 case OP_MINQUERYI:
673 case OP_UPTOI:
674 case OP_MINUPTOI:
675 case OP_EXACTI:
676 case OP_POSSTARI:
677 case OP_POSPLUSI:
678 case OP_POSQUERYI:
679 case OP_POSUPTOI:
680 case OP_NOTSTAR:
681 case OP_NOTMINSTAR:
682 case OP_NOTPLUS:
683 case OP_NOTMINPLUS:
684 case OP_NOTQUERY:
685 case OP_NOTMINQUERY:
686 case OP_NOTUPTO:
687 case OP_NOTMINUPTO:
688 case OP_NOTEXACT:
689 case OP_NOTPOSSTAR:
690 case OP_NOTPOSPLUS:
691 case OP_NOTPOSQUERY:
692 case OP_NOTPOSUPTO:
693 case OP_NOTSTARI:
694 case OP_NOTMINSTARI:
695 case OP_NOTPLUSI:
696 case OP_NOTMINPLUSI:
697 case OP_NOTQUERYI:
698 case OP_NOTMINQUERYI:
699 case OP_NOTUPTOI:
700 case OP_NOTMINUPTOI:
701 case OP_NOTEXACTI:
702 case OP_NOTPOSSTARI:
703 case OP_NOTPOSPLUSI:
704 case OP_NOTPOSQUERYI:
705 case OP_NOTPOSUPTOI:
706 cc += PRIV(OP_lengths)[*cc];
707 #ifdef SUPPORT_UTF
708 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
709 #endif
710 return cc;
711
712 /* Special cases. */
713 case OP_TYPESTAR:
714 case OP_TYPEMINSTAR:
715 case OP_TYPEPLUS:
716 case OP_TYPEMINPLUS:
717 case OP_TYPEQUERY:
718 case OP_TYPEMINQUERY:
719 case OP_TYPEUPTO:
720 case OP_TYPEMINUPTO:
721 case OP_TYPEEXACT:
722 case OP_TYPEPOSSTAR:
723 case OP_TYPEPOSPLUS:
724 case OP_TYPEPOSQUERY:
725 case OP_TYPEPOSUPTO:
726 return cc + PRIV(OP_lengths)[*cc] - 1;
727
728 case OP_ANYBYTE:
729 #ifdef SUPPORT_UTF
730 if (common->utf) return NULL;
731 #endif
732 return cc + 1;
733
734 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
735 case OP_XCLASS:
736 return cc + GET(cc, 1);
737 #endif
738
739 case OP_MARK:
740 case OP_PRUNE_ARG:
741 case OP_SKIP_ARG:
742 case OP_THEN_ARG:
743 return cc + 1 + 2 + cc[1];
744
745 default:
746 /* All opcodes are supported now! */
747 SLJIT_ASSERT_STOP();
748 return NULL;
749 }
750 }
751
752 static BOOL check_opcode_types(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
753 {
754 int count;
755 pcre_uchar *slot;
756
757 /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
758 while (cc < ccend)
759 {
760 switch(*cc)
761 {
762 case OP_SET_SOM:
763 common->has_set_som = TRUE;
764 cc += 1;
765 break;
766
767 case OP_REF:
768 case OP_REFI:
769 common->optimized_cbracket[GET2(cc, 1)] = 0;
770 cc += 1 + IMM2_SIZE;
771 break;
772
773 case OP_CBRAPOS:
774 case OP_SCBRAPOS:
775 common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] = 0;
776 cc += 1 + LINK_SIZE + IMM2_SIZE;
777 break;
778
779 case OP_COND:
780 case OP_SCOND:
781 /* Only AUTO_CALLOUT can insert this opcode. We do
782 not intend to support this case. */
783 if (cc[1 + LINK_SIZE] == OP_CALLOUT)
784 return FALSE;
785 cc += 1 + LINK_SIZE;
786 break;
787
788 case OP_CREF:
789 common->optimized_cbracket[GET2(cc, 1)] = 0;
790 cc += 1 + IMM2_SIZE;
791 break;
792
793 case OP_DNREF:
794 case OP_DNREFI:
795 case OP_DNCREF:
796 count = GET2(cc, 1 + IMM2_SIZE);
797 slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
798 while (count-- > 0)
799 {
800 common->optimized_cbracket[GET2(slot, 0)] = 0;
801 slot += common->name_entry_size;
802 }
803 cc += 1 + 2 * IMM2_SIZE;
804 break;
805
806 case OP_RECURSE:
807 /* Set its value only once. */
808 if (common->recursive_head_ptr == 0)
809 {
810 common->recursive_head_ptr = common->ovector_start;
811 common->ovector_start += sizeof(sljit_sw);
812 }
813 cc += 1 + LINK_SIZE;
814 break;
815
816 case OP_CALLOUT:
817 if (common->capture_last_ptr == 0)
818 {
819 common->capture_last_ptr = common->ovector_start;
820 common->ovector_start += sizeof(sljit_sw);
821 }
822 cc += 2 + 2 * LINK_SIZE;
823 break;
824
825 case OP_THEN_ARG:
826 common->has_then = TRUE;
827 common->control_head_ptr = 1;
828 /* Fall through. */
829
830 case OP_PRUNE_ARG:
831 common->needs_start_ptr = TRUE;
832 /* Fall through. */
833
834 case OP_MARK:
835 if (common->mark_ptr == 0)
836 {
837 common->mark_ptr = common->ovector_start;
838 common->ovector_start += sizeof(sljit_sw);
839 }
840 cc += 1 + 2 + cc[1];
841 break;
842
843 case OP_THEN:
844 common->has_then = TRUE;
845 common->control_head_ptr = 1;
846 /* Fall through. */
847
848 case OP_PRUNE:
849 case OP_SKIP:
850 common->needs_start_ptr = TRUE;
851 cc += 1;
852 break;
853
854 case OP_SKIP_ARG:
855 common->control_head_ptr = 1;
856 common->has_skip_arg = TRUE;
857 cc += 1 + 2 + cc[1];
858 break;
859
860 default:
861 cc = next_opcode(common, cc);
862 if (cc == NULL)
863 return FALSE;
864 break;
865 }
866 }
867 return TRUE;
868 }
869
870 static int get_class_iterator_size(pcre_uchar *cc)
871 {
872 switch(*cc)
873 {
874 case OP_CRSTAR:
875 case OP_CRPLUS:
876 return 2;
877
878 case OP_CRMINSTAR:
879 case OP_CRMINPLUS:
880 case OP_CRQUERY:
881 case OP_CRMINQUERY:
882 return 1;
883
884 case OP_CRRANGE:
885 case OP_CRMINRANGE:
886 if (GET2(cc, 1) == GET2(cc, 1 + IMM2_SIZE))
887 return 0;
888 return 2;
889
890 default:
891 return 0;
892 }
893 }
894
895 static BOOL detect_repeat(compiler_common *common, pcre_uchar *begin)
896 {
897 pcre_uchar *end = bracketend(begin);
898 pcre_uchar *next;
899 pcre_uchar *next_end;
900 pcre_uchar *max_end;
901 pcre_uchar type;
902 sljit_sw length = end - begin;
903 int min, max, i;
904
905 /* Detect fixed iterations first. */
906 if (end[-(1 + LINK_SIZE)] != OP_KET)
907 return FALSE;
908
909 /* Already detected repeat. */
910 if (common->private_data_ptrs[end - common->start - LINK_SIZE] != 0)
911 return TRUE;
912
913 next = end;
914 min = 1;
915 while (1)
916 {
917 if (*next != *begin)
918 break;
919 next_end = bracketend(next);
920 if (next_end - next != length || memcmp(begin, next, IN_UCHARS(length)) != 0)
921 break;
922 next = next_end;
923 min++;
924 }
925
926 if (min == 2)
927 return FALSE;
928
929 max = 0;
930 max_end = next;
931 if (*next == OP_BRAZERO || *next == OP_BRAMINZERO)
932 {
933 type = *next;
934 while (1)
935 {
936 if (next[0] != type || next[1] != OP_BRA || next[2 + LINK_SIZE] != *begin)
937 break;
938 next_end = bracketend(next + 2 + LINK_SIZE);
939 if (next_end - next != (length + 2 + LINK_SIZE) || memcmp(begin, next + 2 + LINK_SIZE, IN_UCHARS(length)) != 0)
940 break;
941 next = next_end;
942 max++;
943 }
944
945 if (next[0] == type && next[1] == *begin && max >= 1)
946 {
947 next_end = bracketend(next + 1);
948 if (next_end - next == (length + 1) && memcmp(begin, next + 1, IN_UCHARS(length)) == 0)
949 {
950 for (i = 0; i < max; i++, next_end += 1 + LINK_SIZE)
951 if (*next_end != OP_KET)
952 break;
953
954 if (i == max)
955 {
956 common->private_data_ptrs[max_end - common->start - LINK_SIZE] = next_end - max_end;
957 common->private_data_ptrs[max_end - common->start - LINK_SIZE + 1] = (type == OP_BRAZERO) ? OP_UPTO : OP_MINUPTO;
958 /* +2 the original and the last. */
959 common->private_data_ptrs[max_end - common->start - LINK_SIZE + 2] = max + 2;
960 if (min == 1)
961 return TRUE;
962 min--;
963 max_end -= (1 + LINK_SIZE) + GET(max_end, -LINK_SIZE);
964 }
965 }
966 }
967 }
968
969 if (min >= 3)
970 {
971 common->private_data_ptrs[end - common->start - LINK_SIZE] = max_end - end;
972 common->private_data_ptrs[end - common->start - LINK_SIZE + 1] = OP_EXACT;
973 common->private_data_ptrs[end - common->start - LINK_SIZE + 2] = min;
974 return TRUE;
975 }
976
977 return FALSE;
978 }
979
980 #define CASE_ITERATOR_PRIVATE_DATA_1 \
981 case OP_MINSTAR: \
982 case OP_MINPLUS: \
983 case OP_QUERY: \
984 case OP_MINQUERY: \
985 case OP_MINSTARI: \
986 case OP_MINPLUSI: \
987 case OP_QUERYI: \
988 case OP_MINQUERYI: \
989 case OP_NOTMINSTAR: \
990 case OP_NOTMINPLUS: \
991 case OP_NOTQUERY: \
992 case OP_NOTMINQUERY: \
993 case OP_NOTMINSTARI: \
994 case OP_NOTMINPLUSI: \
995 case OP_NOTQUERYI: \
996 case OP_NOTMINQUERYI:
997
998 #define CASE_ITERATOR_PRIVATE_DATA_2A \
999 case OP_STAR: \
1000 case OP_PLUS: \
1001 case OP_STARI: \
1002 case OP_PLUSI: \
1003 case OP_NOTSTAR: \
1004 case OP_NOTPLUS: \
1005 case OP_NOTSTARI: \
1006 case OP_NOTPLUSI:
1007
1008 #define CASE_ITERATOR_PRIVATE_DATA_2B \
1009 case OP_UPTO: \
1010 case OP_MINUPTO: \
1011 case OP_UPTOI: \
1012 case OP_MINUPTOI: \
1013 case OP_NOTUPTO: \
1014 case OP_NOTMINUPTO: \
1015 case OP_NOTUPTOI: \
1016 case OP_NOTMINUPTOI:
1017
1018 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_1 \
1019 case OP_TYPEMINSTAR: \
1020 case OP_TYPEMINPLUS: \
1021 case OP_TYPEQUERY: \
1022 case OP_TYPEMINQUERY:
1023
1024 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2A \
1025 case OP_TYPESTAR: \
1026 case OP_TYPEPLUS:
1027
1028 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2B \
1029 case OP_TYPEUPTO: \
1030 case OP_TYPEMINUPTO:
1031
1032 static void set_private_data_ptrs(compiler_common *common, int *private_data_start, pcre_uchar *ccend)
1033 {
1034 pcre_uchar *cc = common->start;
1035 pcre_uchar *alternative;
1036 pcre_uchar *end = NULL;
1037 int private_data_ptr = *private_data_start;
1038 int space, size, bracketlen;
1039
1040 while (cc < ccend)
1041 {
1042 space = 0;
1043 size = 0;
1044 bracketlen = 0;
1045 if (private_data_ptr > SLJIT_MAX_LOCAL_SIZE)
1046 return;
1047
1048 if (*cc == OP_ONCE || *cc == OP_ONCE_NC || *cc == OP_BRA || *cc == OP_CBRA || *cc == OP_COND)
1049 if (detect_repeat(common, cc))
1050 {
1051 /* These brackets are converted to repeats, so no global
1052 based single character repeat is allowed. */
1053 if (cc >= end)
1054 end = bracketend(cc);
1055 }
1056
1057 switch(*cc)
1058 {
1059 case OP_KET:
1060 if (common->private_data_ptrs[cc + 1 - common->start] != 0)
1061 {
1062 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1063 private_data_ptr += sizeof(sljit_sw);
1064 cc += common->private_data_ptrs[cc + 1 - common->start];
1065 }
1066 cc += 1 + LINK_SIZE;
1067 break;
1068
1069 case OP_ASSERT:
1070 case OP_ASSERT_NOT:
1071 case OP_ASSERTBACK:
1072 case OP_ASSERTBACK_NOT:
1073 case OP_ONCE:
1074 case OP_ONCE_NC:
1075 case OP_BRAPOS:
1076 case OP_SBRA:
1077 case OP_SBRAPOS:
1078 case OP_SCOND:
1079 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1080 private_data_ptr += sizeof(sljit_sw);
1081 bracketlen = 1 + LINK_SIZE;
1082 break;
1083
1084 case OP_CBRAPOS:
1085 case OP_SCBRAPOS:
1086 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1087 private_data_ptr += sizeof(sljit_sw);
1088 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1089 break;
1090
1091 case OP_COND:
1092 /* Might be a hidden SCOND. */
1093 alternative = cc + GET(cc, 1);
1094 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1095 {
1096 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1097 private_data_ptr += sizeof(sljit_sw);
1098 }
1099 bracketlen = 1 + LINK_SIZE;
1100 break;
1101
1102 case OP_BRA:
1103 bracketlen = 1 + LINK_SIZE;
1104 break;
1105
1106 case OP_CBRA:
1107 case OP_SCBRA:
1108 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1109 break;
1110
1111 CASE_ITERATOR_PRIVATE_DATA_1
1112 space = 1;
1113 size = -2;
1114 break;
1115
1116 CASE_ITERATOR_PRIVATE_DATA_2A
1117 space = 2;
1118 size = -2;
1119 break;
1120
1121 CASE_ITERATOR_PRIVATE_DATA_2B
1122 space = 2;
1123 size = -(2 + IMM2_SIZE);
1124 break;
1125
1126 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1127 space = 1;
1128 size = 1;
1129 break;
1130
1131 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1132 if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
1133 space = 2;
1134 size = 1;
1135 break;
1136
1137 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1138 if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
1139 space = 2;
1140 size = 1 + IMM2_SIZE;
1141 break;
1142
1143 case OP_CLASS:
1144 case OP_NCLASS:
1145 size += 1 + 32 / sizeof(pcre_uchar);
1146 space = get_class_iterator_size(cc + size);
1147 break;
1148
1149 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1150 case OP_XCLASS:
1151 size = GET(cc, 1);
1152 space = get_class_iterator_size(cc + size);
1153 break;
1154 #endif
1155
1156 default:
1157 cc = next_opcode(common, cc);
1158 SLJIT_ASSERT(cc != NULL);
1159 break;
1160 }
1161
1162 /* Character iterators, which are not inside a repeated bracket,
1163 gets a private slot instead of allocating it on the stack. */
1164 if (space > 0 && cc >= end)
1165 {
1166 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1167 private_data_ptr += sizeof(sljit_sw) * space;
1168 }
1169
1170 if (size != 0)
1171 {
1172 if (size < 0)
1173 {
1174 cc += -size;
1175 #ifdef SUPPORT_UTF
1176 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1177 #endif
1178 }
1179 else
1180 cc += size;
1181 }
1182
1183 if (bracketlen > 0)
1184 {
1185 if (cc >= end)
1186 {
1187 end = bracketend(cc);
1188 if (end[-1 - LINK_SIZE] == OP_KET)
1189 end = NULL;
1190 }
1191 cc += bracketlen;
1192 }
1193 }
1194 *private_data_start = private_data_ptr;
1195 }
1196
1197 /* Returns with a frame_types (always < 0) if no need for frame. */
1198 static int get_framesize(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL recursive, BOOL* needs_control_head)
1199 {
1200 int length = 0;
1201 int possessive = 0;
1202 BOOL stack_restore = FALSE;
1203 BOOL setsom_found = recursive;
1204 BOOL setmark_found = recursive;
1205 /* The last capture is a local variable even for recursions. */
1206 BOOL capture_last_found = FALSE;
1207
1208 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
1209 SLJIT_ASSERT(common->control_head_ptr != 0);
1210 *needs_control_head = TRUE;
1211 #else
1212 *needs_control_head = FALSE;
1213 #endif
1214
1215 if (ccend == NULL)
1216 {
1217 ccend = bracketend(cc) - (1 + LINK_SIZE);
1218 if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))
1219 {
1220 possessive = length = (common->capture_last_ptr != 0) ? 5 : 3;
1221 /* This is correct regardless of common->capture_last_ptr. */
1222 capture_last_found = TRUE;
1223 }
1224 cc = next_opcode(common, cc);
1225 }
1226
1227 SLJIT_ASSERT(cc != NULL);
1228 while (cc < ccend)
1229 switch(*cc)
1230 {
1231 case OP_SET_SOM:
1232 SLJIT_ASSERT(common->has_set_som);
1233 stack_restore = TRUE;
1234 if (!setsom_found)
1235 {
1236 length += 2;
1237 setsom_found = TRUE;
1238 }
1239 cc += 1;
1240 break;
1241
1242 case OP_MARK:
1243 case OP_PRUNE_ARG:
1244 case OP_THEN_ARG:
1245 SLJIT_ASSERT(common->mark_ptr != 0);
1246 stack_restore = TRUE;
1247 if (!setmark_found)
1248 {
1249 length += 2;
1250 setmark_found = TRUE;
1251 }
1252 if (common->control_head_ptr != 0)
1253 *needs_control_head = TRUE;
1254 cc += 1 + 2 + cc[1];
1255 break;
1256
1257 case OP_RECURSE:
1258 stack_restore = TRUE;
1259 if (common->has_set_som && !setsom_found)
1260 {
1261 length += 2;
1262 setsom_found = TRUE;
1263 }
1264 if (common->mark_ptr != 0 && !setmark_found)
1265 {
1266 length += 2;
1267 setmark_found = TRUE;
1268 }
1269 if (common->capture_last_ptr != 0 && !capture_last_found)
1270 {
1271 length += 2;
1272 capture_last_found = TRUE;
1273 }
1274 cc += 1 + LINK_SIZE;
1275 break;
1276
1277 case OP_CBRA:
1278 case OP_CBRAPOS:
1279 case OP_SCBRA:
1280 case OP_SCBRAPOS:
1281 stack_restore = TRUE;
1282 if (common->capture_last_ptr != 0 && !capture_last_found)
1283 {
1284 length += 2;
1285 capture_last_found = TRUE;
1286 }
1287 length += 3;
1288 cc += 1 + LINK_SIZE + IMM2_SIZE;
1289 break;
1290
1291 default:
1292 stack_restore = TRUE;
1293 /* Fall through. */
1294
1295 case OP_NOT_WORD_BOUNDARY:
1296 case OP_WORD_BOUNDARY:
1297 case OP_NOT_DIGIT:
1298 case OP_DIGIT:
1299 case OP_NOT_WHITESPACE:
1300 case OP_WHITESPACE:
1301 case OP_NOT_WORDCHAR:
1302 case OP_WORDCHAR:
1303 case OP_ANY:
1304 case OP_ALLANY:
1305 case OP_ANYBYTE:
1306 case OP_NOTPROP:
1307 case OP_PROP:
1308 case OP_ANYNL:
1309 case OP_NOT_HSPACE:
1310 case OP_HSPACE:
1311 case OP_NOT_VSPACE:
1312 case OP_VSPACE:
1313 case OP_EXTUNI:
1314 case OP_EODN:
1315 case OP_EOD:
1316 case OP_CIRC:
1317 case OP_CIRCM:
1318 case OP_DOLL:
1319 case OP_DOLLM:
1320 case OP_CHAR:
1321 case OP_CHARI:
1322 case OP_NOT:
1323 case OP_NOTI:
1324
1325 case OP_EXACT:
1326 case OP_POSSTAR:
1327 case OP_POSPLUS:
1328 case OP_POSQUERY:
1329 case OP_POSUPTO:
1330
1331 case OP_EXACTI:
1332 case OP_POSSTARI:
1333 case OP_POSPLUSI:
1334 case OP_POSQUERYI:
1335 case OP_POSUPTOI:
1336
1337 case OP_NOTEXACT:
1338 case OP_NOTPOSSTAR:
1339 case OP_NOTPOSPLUS:
1340 case OP_NOTPOSQUERY:
1341 case OP_NOTPOSUPTO:
1342
1343 case OP_NOTEXACTI:
1344 case OP_NOTPOSSTARI:
1345 case OP_NOTPOSPLUSI:
1346 case OP_NOTPOSQUERYI:
1347 case OP_NOTPOSUPTOI:
1348
1349 case OP_TYPEEXACT:
1350 case OP_TYPEPOSSTAR:
1351 case OP_TYPEPOSPLUS:
1352 case OP_TYPEPOSQUERY:
1353 case OP_TYPEPOSUPTO:
1354
1355 case OP_CLASS:
1356 case OP_NCLASS:
1357 case OP_XCLASS:
1358
1359 cc = next_opcode(common, cc);
1360 SLJIT_ASSERT(cc != NULL);
1361 break;
1362 }
1363
1364 /* Possessive quantifiers can use a special case. */
1365 if (SLJIT_UNLIKELY(possessive == length))
1366 return stack_restore ? no_frame : no_stack;
1367
1368 if (length > 0)
1369 return length + 1;
1370 return stack_restore ? no_frame : no_stack;
1371 }
1372
1373 static void init_frame(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, int stackpos, int stacktop, BOOL recursive)
1374 {
1375 DEFINE_COMPILER;
1376 BOOL setsom_found = recursive;
1377 BOOL setmark_found = recursive;
1378 /* The last capture is a local variable even for recursions. */
1379 BOOL capture_last_found = FALSE;
1380 int offset;
1381
1382 /* >= 1 + shortest item size (2) */
1383 SLJIT_UNUSED_ARG(stacktop);
1384 SLJIT_ASSERT(stackpos >= stacktop + 2);
1385
1386 stackpos = STACK(stackpos);
1387 if (ccend == NULL)
1388 {
1389 ccend = bracketend(cc) - (1 + LINK_SIZE);
1390 if (recursive || (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS))
1391 cc = next_opcode(common, cc);
1392 }
1393
1394 SLJIT_ASSERT(cc != NULL);
1395 while (cc < ccend)
1396 switch(*cc)
1397 {
1398 case OP_SET_SOM:
1399 SLJIT_ASSERT(common->has_set_som);
1400 if (!setsom_found)
1401 {
1402 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
1403 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1404 stackpos += (int)sizeof(sljit_sw);
1405 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1406 stackpos += (int)sizeof(sljit_sw);
1407 setsom_found = TRUE;
1408 }
1409 cc += 1;
1410 break;
1411
1412 case OP_MARK:
1413 case OP_PRUNE_ARG:
1414 case OP_THEN_ARG:
1415 SLJIT_ASSERT(common->mark_ptr != 0);
1416 if (!setmark_found)
1417 {
1418 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
1419 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1420 stackpos += (int)sizeof(sljit_sw);
1421 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1422 stackpos += (int)sizeof(sljit_sw);
1423 setmark_found = TRUE;
1424 }
1425 cc += 1 + 2 + cc[1];
1426 break;
1427
1428 case OP_RECURSE:
1429 if (common->has_set_som && !setsom_found)
1430 {
1431 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
1432 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1433 stackpos += (int)sizeof(sljit_sw);
1434 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1435 stackpos += (int)sizeof(sljit_sw);
1436 setsom_found = TRUE;
1437 }
1438 if (common->mark_ptr != 0 && !setmark_found)
1439 {
1440 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
1441 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1442 stackpos += (int)sizeof(sljit_sw);
1443 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1444 stackpos += (int)sizeof(sljit_sw);
1445 setmark_found = TRUE;
1446 }
1447 if (common->capture_last_ptr != 0 && !capture_last_found)
1448 {
1449 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
1450 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1451 stackpos += (int)sizeof(sljit_sw);
1452 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1453 stackpos += (int)sizeof(sljit_sw);
1454 capture_last_found = TRUE;
1455 }
1456 cc += 1 + LINK_SIZE;
1457 break;
1458
1459 case OP_CBRA:
1460 case OP_CBRAPOS:
1461 case OP_SCBRA:
1462 case OP_SCBRAPOS:
1463 if (common->capture_last_ptr != 0 && !capture_last_found)
1464 {
1465 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
1466 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1467 stackpos += (int)sizeof(sljit_sw);
1468 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1469 stackpos += (int)sizeof(sljit_sw);
1470 capture_last_found = TRUE;
1471 }
1472 offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
1473 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));
1474 stackpos += (int)sizeof(sljit_sw);
1475 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
1476 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
1477 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1478 stackpos += (int)sizeof(sljit_sw);
1479 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
1480 stackpos += (int)sizeof(sljit_sw);
1481
1482 cc += 1 + LINK_SIZE + IMM2_SIZE;
1483 break;
1484
1485 default:
1486 cc = next_opcode(common, cc);
1487 SLJIT_ASSERT(cc != NULL);
1488 break;
1489 }
1490
1491 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, 0);
1492 SLJIT_ASSERT(stackpos == STACK(stacktop));
1493 }
1494
1495 static SLJIT_INLINE int get_private_data_copy_length(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL needs_control_head)
1496 {
1497 int private_data_length = needs_control_head ? 3 : 2;
1498 int size;
1499 pcre_uchar *alternative;
1500 /* Calculate the sum of the private machine words. */
1501 while (cc < ccend)
1502 {
1503 size = 0;
1504 switch(*cc)
1505 {
1506 case OP_KET:
1507 if (PRIVATE_DATA(cc) != 0)
1508 private_data_length++;
1509 cc += 1 + LINK_SIZE;
1510 break;
1511
1512 case OP_ASSERT:
1513 case OP_ASSERT_NOT:
1514 case OP_ASSERTBACK:
1515 case OP_ASSERTBACK_NOT:
1516 case OP_ONCE:
1517 case OP_ONCE_NC:
1518 case OP_BRAPOS:
1519 case OP_SBRA:
1520 case OP_SBRAPOS:
1521 case OP_SCOND:
1522 private_data_length++;
1523 cc += 1 + LINK_SIZE;
1524 break;
1525
1526 case OP_CBRA:
1527 case OP_SCBRA:
1528 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1529 private_data_length++;
1530 cc += 1 + LINK_SIZE + IMM2_SIZE;
1531 break;
1532
1533 case OP_CBRAPOS:
1534 case OP_SCBRAPOS:
1535 private_data_length += 2;
1536 cc += 1 + LINK_SIZE + IMM2_SIZE;
1537 break;
1538
1539 case OP_COND:
1540 /* Might be a hidden SCOND. */
1541 alternative = cc + GET(cc, 1);
1542 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1543 private_data_length++;
1544 cc += 1 + LINK_SIZE;
1545 break;
1546
1547 CASE_ITERATOR_PRIVATE_DATA_1
1548 if (PRIVATE_DATA(cc))
1549 private_data_length++;
1550 cc += 2;
1551 #ifdef SUPPORT_UTF
1552 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1553 #endif
1554 break;
1555
1556 CASE_ITERATOR_PRIVATE_DATA_2A
1557 if (PRIVATE_DATA(cc))
1558 private_data_length += 2;
1559 cc += 2;
1560 #ifdef SUPPORT_UTF
1561 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1562 #endif
1563 break;
1564
1565 CASE_ITERATOR_PRIVATE_DATA_2B
1566 if (PRIVATE_DATA(cc))
1567 private_data_length += 2;
1568 cc += 2 + IMM2_SIZE;
1569 #ifdef SUPPORT_UTF
1570 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1571 #endif
1572 break;
1573
1574 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1575 if (PRIVATE_DATA(cc))
1576 private_data_length++;
1577 cc += 1;
1578 break;
1579
1580 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1581 if (PRIVATE_DATA(cc))
1582 private_data_length += 2;
1583 cc += 1;
1584 break;
1585
1586 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1587 if (PRIVATE_DATA(cc))
1588 private_data_length += 2;
1589 cc += 1 + IMM2_SIZE;
1590 break;
1591
1592 case OP_CLASS:
1593 case OP_NCLASS:
1594 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1595 case OP_XCLASS:
1596 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1597 #else
1598 size = 1 + 32 / (int)sizeof(pcre_uchar);
1599 #endif
1600 if (PRIVATE_DATA(cc))
1601 private_data_length += get_class_iterator_size(cc + size);
1602 cc += size;
1603 break;
1604
1605 default:
1606 cc = next_opcode(common, cc);
1607 SLJIT_ASSERT(cc != NULL);
1608 break;
1609 }
1610 }
1611 SLJIT_ASSERT(cc == ccend);
1612 return private_data_length;
1613 }
1614
1615 static void copy_private_data(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend,
1616 BOOL save, int stackptr, int stacktop, BOOL needs_control_head)
1617 {
1618 DEFINE_COMPILER;
1619 int srcw[2];
1620 int count, size;
1621 BOOL tmp1next = TRUE;
1622 BOOL tmp1empty = TRUE;
1623 BOOL tmp2empty = TRUE;
1624 pcre_uchar *alternative;
1625 enum {
1626 start,
1627 loop,
1628 end
1629 } status;
1630
1631 status = save ? start : loop;
1632 stackptr = STACK(stackptr - 2);
1633 stacktop = STACK(stacktop - 1);
1634
1635 if (!save)
1636 {
1637 stackptr += (needs_control_head ? 2 : 1) * sizeof(sljit_sw);
1638 if (stackptr < stacktop)
1639 {
1640 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1641 stackptr += sizeof(sljit_sw);
1642 tmp1empty = FALSE;
1643 }
1644 if (stackptr < stacktop)
1645 {
1646 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1647 stackptr += sizeof(sljit_sw);
1648 tmp2empty = FALSE;
1649 }
1650 /* The tmp1next must be TRUE in either way. */
1651 }
1652
1653 do
1654 {
1655 count = 0;
1656 switch(status)
1657 {
1658 case start:
1659 SLJIT_ASSERT(save && common->recursive_head_ptr != 0);
1660 count = 1;
1661 srcw[0] = common->recursive_head_ptr;
1662 if (needs_control_head)
1663 {
1664 SLJIT_ASSERT(common->control_head_ptr != 0);
1665 count = 2;
1666 srcw[1] = common->control_head_ptr;
1667 }
1668 status = loop;
1669 break;
1670
1671 case loop:
1672 if (cc >= ccend)
1673 {
1674 status = end;
1675 break;
1676 }
1677
1678 switch(*cc)
1679 {
1680 case OP_KET:
1681 if (PRIVATE_DATA(cc) != 0)
1682 {
1683 count = 1;
1684 srcw[0] = PRIVATE_DATA(cc);
1685 }
1686 cc += 1 + LINK_SIZE;
1687 break;
1688
1689 case OP_ASSERT:
1690 case OP_ASSERT_NOT:
1691 case OP_ASSERTBACK:
1692 case OP_ASSERTBACK_NOT:
1693 case OP_ONCE:
1694 case OP_ONCE_NC:
1695 case OP_BRAPOS:
1696 case OP_SBRA:
1697 case OP_SBRAPOS:
1698 case OP_SCOND:
1699 count = 1;
1700 srcw[0] = PRIVATE_DATA(cc);
1701 SLJIT_ASSERT(srcw[0] != 0);
1702 cc += 1 + LINK_SIZE;
1703 break;
1704
1705 case OP_CBRA:
1706 case OP_SCBRA:
1707 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1708 {
1709 count = 1;
1710 srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1711 }
1712 cc += 1 + LINK_SIZE + IMM2_SIZE;
1713 break;
1714
1715 case OP_CBRAPOS:
1716 case OP_SCBRAPOS:
1717 count = 2;
1718 srcw[0] = PRIVATE_DATA(cc);
1719 srcw[1] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1720 SLJIT_ASSERT(srcw[0] != 0 && srcw[1] != 0);
1721 cc += 1 + LINK_SIZE + IMM2_SIZE;
1722 break;
1723
1724 case OP_COND:
1725 /* Might be a hidden SCOND. */
1726 alternative = cc + GET(cc, 1);
1727 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1728 {
1729 count = 1;
1730 srcw[0] = PRIVATE_DATA(cc);
1731 SLJIT_ASSERT(srcw[0] != 0);
1732 }
1733 cc += 1 + LINK_SIZE;
1734 break;
1735
1736 CASE_ITERATOR_PRIVATE_DATA_1
1737 if (PRIVATE_DATA(cc))
1738 {
1739 count = 1;
1740 srcw[0] = PRIVATE_DATA(cc);
1741 }
1742 cc += 2;
1743 #ifdef SUPPORT_UTF
1744 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1745 #endif
1746 break;
1747
1748 CASE_ITERATOR_PRIVATE_DATA_2A
1749 if (PRIVATE_DATA(cc))
1750 {
1751 count = 2;
1752 srcw[0] = PRIVATE_DATA(cc);
1753 srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
1754 }
1755 cc += 2;
1756 #ifdef SUPPORT_UTF
1757 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1758 #endif
1759 break;
1760
1761 CASE_ITERATOR_PRIVATE_DATA_2B
1762 if (PRIVATE_DATA(cc))
1763 {
1764 count = 2;
1765 srcw[0] = PRIVATE_DATA(cc);
1766 srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
1767 }
1768 cc += 2 + IMM2_SIZE;
1769 #ifdef SUPPORT_UTF
1770 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1771 #endif
1772 break;
1773
1774 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1775 if (PRIVATE_DATA(cc))
1776 {
1777 count = 1;
1778 srcw[0] = PRIVATE_DATA(cc);
1779 }
1780 cc += 1;
1781 break;
1782
1783 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1784 if (PRIVATE_DATA(cc))
1785 {
1786 count = 2;
1787 srcw[0] = PRIVATE_DATA(cc);
1788 srcw[1] = srcw[0] + sizeof(sljit_sw);
1789 }
1790 cc += 1;
1791 break;
1792
1793 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1794 if (PRIVATE_DATA(cc))
1795 {
1796 count = 2;
1797 srcw[0] = PRIVATE_DATA(cc);
1798 srcw[1] = srcw[0] + sizeof(sljit_sw);
1799 }
1800 cc += 1 + IMM2_SIZE;
1801 break;
1802
1803 case OP_CLASS:
1804 case OP_NCLASS:
1805 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1806 case OP_XCLASS:
1807 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1808 #else
1809 size = 1 + 32 / (int)sizeof(pcre_uchar);
1810 #endif
1811 if (PRIVATE_DATA(cc))
1812 switch(get_class_iterator_size(cc + size))
1813 {
1814 case 1:
1815 count = 1;
1816 srcw[0] = PRIVATE_DATA(cc);
1817 break;
1818
1819 case 2:
1820 count = 2;
1821 srcw[0] = PRIVATE_DATA(cc);
1822 srcw[1] = srcw[0] + sizeof(sljit_sw);
1823 break;
1824
1825 default:
1826 SLJIT_ASSERT_STOP();
1827 break;
1828 }
1829 cc += size;
1830 break;
1831
1832 default:
1833 cc = next_opcode(common, cc);
1834 SLJIT_ASSERT(cc != NULL);
1835 break;
1836 }
1837 break;
1838
1839 case end:
1840 SLJIT_ASSERT_STOP();
1841 break;
1842 }
1843
1844 while (count > 0)
1845 {
1846 count--;
1847 if (save)
1848 {
1849 if (tmp1next)
1850 {
1851 if (!tmp1empty)
1852 {
1853 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1854 stackptr += sizeof(sljit_sw);
1855 }
1856 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);
1857 tmp1empty = FALSE;
1858 tmp1next = FALSE;
1859 }
1860 else
1861 {
1862 if (!tmp2empty)
1863 {
1864 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1865 stackptr += sizeof(sljit_sw);
1866 }
1867 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);
1868 tmp2empty = FALSE;
1869 tmp1next = TRUE;
1870 }
1871 }
1872 else
1873 {
1874 if (tmp1next)
1875 {
1876 SLJIT_ASSERT(!tmp1empty);
1877 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP1, 0);
1878 tmp1empty = stackptr >= stacktop;
1879 if (!tmp1empty)
1880 {
1881 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1882 stackptr += sizeof(sljit_sw);
1883 }
1884 tmp1next = FALSE;
1885 }
1886 else
1887 {
1888 SLJIT_ASSERT(!tmp2empty);
1889 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP2, 0);
1890 tmp2empty = stackptr >= stacktop;
1891 if (!tmp2empty)
1892 {
1893 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1894 stackptr += sizeof(sljit_sw);
1895 }
1896 tmp1next = TRUE;
1897 }
1898 }
1899 }
1900 }
1901 while (status != end);
1902
1903 if (save)
1904 {
1905 if (tmp1next)
1906 {
1907 if (!tmp1empty)
1908 {
1909 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1910 stackptr += sizeof(sljit_sw);
1911 }
1912 if (!tmp2empty)
1913 {
1914 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1915 stackptr += sizeof(sljit_sw);
1916 }
1917 }
1918 else
1919 {
1920 if (!tmp2empty)
1921 {
1922 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1923 stackptr += sizeof(sljit_sw);
1924 }
1925 if (!tmp1empty)
1926 {
1927 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1928 stackptr += sizeof(sljit_sw);
1929 }
1930 }
1931 }
1932 SLJIT_ASSERT(cc == ccend && stackptr == stacktop && (save || (tmp1empty && tmp2empty)));
1933 }
1934
1935 static SLJIT_INLINE pcre_uchar *set_then_offsets(compiler_common *common, pcre_uchar *cc, pcre_uint8 *current_offset)
1936 {
1937 pcre_uchar *end = bracketend(cc);
1938 BOOL has_alternatives = cc[GET(cc, 1)] == OP_ALT;
1939
1940 /* Assert captures then. */
1941 if (*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT)
1942 current_offset = NULL;
1943 /* Conditional block does not. */
1944 if (*cc == OP_COND || *cc == OP_SCOND)
1945 has_alternatives = FALSE;
1946
1947 cc = next_opcode(common, cc);
1948 if (has_alternatives)
1949 current_offset = common->then_offsets + (cc - common->start);
1950
1951 while (cc < end)
1952 {
1953 if ((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND))
1954 cc = set_then_offsets(common, cc, current_offset);
1955 else
1956 {
1957 if (*cc == OP_ALT && has_alternatives)
1958 current_offset = common->then_offsets + (cc + 1 + LINK_SIZE - common->start);
1959 if (*cc >= OP_THEN && *cc <= OP_THEN_ARG && current_offset != NULL)
1960 *current_offset = 1;
1961 cc = next_opcode(common, cc);
1962 }
1963 }
1964
1965 return end;
1966 }
1967
1968 #undef CASE_ITERATOR_PRIVATE_DATA_1
1969 #undef CASE_ITERATOR_PRIVATE_DATA_2A
1970 #undef CASE_ITERATOR_PRIVATE_DATA_2B
1971 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1972 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1973 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1974
1975 static SLJIT_INLINE BOOL is_powerof2(unsigned int value)
1976 {
1977 return (value & (value - 1)) == 0;
1978 }
1979
1980 static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label)
1981 {
1982 while (list)
1983 {
1984 /* sljit_set_label is clever enough to do nothing
1985 if either the jump or the label is NULL. */
1986 SET_LABEL(list->jump, label);
1987 list = list->next;
1988 }
1989 }
1990
1991 static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump* jump)
1992 {
1993 jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list));
1994 if (list_item)
1995 {
1996 list_item->next = *list;
1997 list_item->jump = jump;
1998 *list = list_item;
1999 }
2000 }
2001
2002 static void add_stub(compiler_common *common, struct sljit_jump *start)
2003 {
2004 DEFINE_COMPILER;
2005 stub_list* list_item = sljit_alloc_memory(compiler, sizeof(stub_list));
2006
2007 if (list_item)
2008 {
2009 list_item->start = start;
2010 list_item->quit = LABEL();
2011 list_item->next = common->stubs;
2012 common->stubs = list_item;
2013 }
2014 }
2015
2016 static void flush_stubs(compiler_common *common)
2017 {
2018 DEFINE_COMPILER;
2019 stub_list* list_item = common->stubs;
2020
2021 while (list_item)
2022 {
2023 JUMPHERE(list_item->start);
2024 add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));
2025 JUMPTO(SLJIT_JUMP, list_item->quit);
2026 list_item = list_item->next;
2027 }
2028 common->stubs = NULL;
2029 }
2030
2031 static SLJIT_INLINE void count_match(compiler_common *common)
2032 {
2033 DEFINE_COMPILER;
2034
2035 OP2(SLJIT_SUB | SLJIT_SET_E, COUNT_MATCH, 0, COUNT_MATCH, 0, SLJIT_IMM, 1);
2036 add_jump(compiler, &common->calllimit, JUMP(SLJIT_C_ZERO));
2037 }
2038
2039 static SLJIT_INLINE void allocate_stack(compiler_common *common, int size)
2040 {
2041 /* May destroy all locals and registers except TMP2. */
2042 DEFINE_COMPILER;
2043
2044 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
2045 #ifdef DESTROY_REGISTERS
2046 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);
2047 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
2048 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
2049 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, TMP1, 0);
2050 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);
2051 #endif
2052 add_stub(common, CMP(SLJIT_C_GREATER, STACK_TOP, 0, STACK_LIMIT, 0));
2053 }
2054
2055 static SLJIT_INLINE void free_stack(compiler_common *common, int size)
2056 {
2057 DEFINE_COMPILER;
2058 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
2059 }
2060
2061 static SLJIT_INLINE void reset_ovector(compiler_common *common, int length)
2062 {
2063 DEFINE_COMPILER;
2064 struct sljit_label *loop;
2065 int i;
2066
2067 /* At this point we can freely use all temporary registers. */
2068 SLJIT_ASSERT(length > 1);
2069 /* TMP1 returns with begin - 1. */
2070 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG1, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));
2071 if (length < 8)
2072 {
2073 for (i = 1; i < length; i++)
2074 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(i), SLJIT_SCRATCH_REG1, 0);
2075 }
2076 else
2077 {
2078 GET_LOCAL_BASE(SLJIT_SCRATCH_REG2, 0, OVECTOR_START);
2079 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, length - 1);
2080 loop = LABEL();
2081 OP1(SLJIT_MOVU, SLJIT_MEM1(SLJIT_SCRATCH_REG2), sizeof(sljit_sw), SLJIT_SCRATCH_REG1, 0);
2082 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 1);
2083 JUMPTO(SLJIT_C_NOT_ZERO, loop);
2084 }
2085 }
2086
2087 static SLJIT_INLINE void do_reset_match(compiler_common *common, int length)
2088 {
2089 DEFINE_COMPILER;
2090 struct sljit_label *loop;
2091 int i;
2092
2093 SLJIT_ASSERT(length > 1);
2094 /* OVECTOR(1) contains the "string begin - 1" constant. */
2095 if (length > 2)
2096 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
2097 if (length < 8)
2098 {
2099 for (i = 2; i < length; i++)
2100 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(i), TMP1, 0);
2101 }
2102 else
2103 {
2104 GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + sizeof(sljit_sw));
2105 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
2106 loop = LABEL();
2107 OP1(SLJIT_MOVU, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
2108 OP2(SLJIT_SUB | SLJIT_SET_E, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
2109 JUMPTO(SLJIT_C_NOT_ZERO, loop);
2110 }
2111
2112 OP1(SLJIT_MOV, STACK_TOP, 0, ARGUMENTS, 0);
2113 if (common->mark_ptr != 0)
2114 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr, SLJIT_IMM, 0);
2115 if (common->control_head_ptr != 0)
2116 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_IMM, 0);
2117 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(jit_arguments, stack));
2118 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_ptr);
2119 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(struct sljit_stack, base));
2120 }
2121
2122 static sljit_sw SLJIT_CALL do_search_mark(sljit_sw *current, const pcre_uchar *skip_arg)
2123 {
2124 while (current != NULL)
2125 {
2126 switch (current[-2])
2127 {
2128 case type_then_trap:
2129 break;
2130
2131 case type_mark:
2132 if (STRCMP_UC_UC(skip_arg, (pcre_uchar *)current[-3]) == 0)
2133 return current[-4];
2134 break;
2135
2136 default:
2137 SLJIT_ASSERT_STOP();
2138 break;
2139 }
2140 current = (sljit_sw*)current[-1];
2141 }
2142 return -1;
2143 }
2144
2145 static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)
2146 {
2147 DEFINE_COMPILER;
2148 struct sljit_label *loop;
2149 struct sljit_jump *early_quit;
2150
2151 /* At this point we can freely use all registers. */
2152 OP1(SLJIT_MOV, SLJIT_SAVED_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
2153 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1), STR_PTR, 0);
2154
2155 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, ARGUMENTS, 0);
2156 if (common->mark_ptr != 0)
2157 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
2158 OP1(SLJIT_MOV_SI, SLJIT_SCRATCH_REG2, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, offset_count));
2159 if (common->mark_ptr != 0)
2160 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_SCRATCH_REG3, 0);
2161 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, offsets), SLJIT_IMM, sizeof(int));
2162 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, begin));
2163 GET_LOCAL_BASE(SLJIT_SAVED_REG1, 0, OVECTOR_START);
2164 /* Unlikely, but possible */
2165 early_quit = CMP(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, 0);
2166 loop = LABEL();
2167 OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), 0, SLJIT_SCRATCH_REG1, 0);
2168 OP2(SLJIT_ADD, SLJIT_SAVED_REG1, 0, SLJIT_SAVED_REG1, 0, SLJIT_IMM, sizeof(sljit_sw));
2169 /* Copy the integer value to the output buffer */
2170 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2171 OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, UCHAR_SHIFT);
2172 #endif
2173 OP1(SLJIT_MOVU_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG3), sizeof(int), SLJIT_SAVED_REG2, 0);
2174 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_SCRATCH_REG2, 0, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, 1);
2175 JUMPTO(SLJIT_C_NOT_ZERO, loop);
2176 JUMPHERE(early_quit);
2177
2178 /* Calculate the return value, which is the maximum ovector value. */
2179 if (topbracket > 1)
2180 {
2181 GET_LOCAL_BASE(SLJIT_SCRATCH_REG1, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_sw));
2182 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, topbracket + 1);
2183
2184 /* OVECTOR(0) is never equal to SLJIT_SAVED_REG3. */
2185 loop = LABEL();
2186 OP1(SLJIT_MOVU, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), -(2 * (sljit_sw)sizeof(sljit_sw)));
2187 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG2, 0, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, 1);
2188 CMPTO(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG3, 0, SLJIT_SAVED_REG3, 0, loop);
2189 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_SCRATCH_REG2, 0);
2190 }
2191 else
2192 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
2193 }
2194
2195 static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *quit)
2196 {
2197 DEFINE_COMPILER;
2198 struct sljit_jump *jump;
2199
2200 SLJIT_COMPILE_ASSERT(STR_END == SLJIT_SAVED_REG2, str_end_must_be_saved_reg2);
2201 SLJIT_ASSERT(common->start_used_ptr != 0 && common->start_ptr != 0
2202 && (common->mode == JIT_PARTIAL_SOFT_COMPILE ? common->hit_start != 0 : common->hit_start == 0));
2203
2204 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, ARGUMENTS, 0);
2205 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_PARTIAL);
2206 OP1(SLJIT_MOV_SI, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, real_offset_count));
2207 CMPTO(SLJIT_C_SIG_LESS, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 2, quit);
2208
2209 /* Store match begin and end. */
2210 OP1(SLJIT_MOV, SLJIT_SAVED_REG1, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, begin));
2211 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, offsets));
2212
2213 jump = CMP(SLJIT_C_SIG_LESS, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 3);
2214 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_ptr : (common->hit_start + (int)sizeof(sljit_sw)), SLJIT_SAVED_REG1, 0);
2215 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2216 OP2(SLJIT_ASHR, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, UCHAR_SHIFT);
2217 #endif
2218 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG2), 2 * sizeof(int), SLJIT_SCRATCH_REG3, 0);
2219 JUMPHERE(jump);
2220
2221 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_used_ptr : common->hit_start);
2222 OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, STR_END, 0, SLJIT_SAVED_REG1, 0);
2223 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2224 OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, UCHAR_SHIFT);
2225 #endif
2226 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG2), sizeof(int), SLJIT_SAVED_REG2, 0);
2227
2228 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_SAVED_REG1, 0);
2229 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2230 OP2(SLJIT_ASHR, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, UCHAR_SHIFT);
2231 #endif
2232 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG2), 0, SLJIT_SCRATCH_REG3, 0);
2233
2234 JUMPTO(SLJIT_JUMP, quit);
2235 }
2236
2237 static SLJIT_INLINE void check_start_used_ptr(compiler_common *common)
2238 {
2239 /* May destroy TMP1. */
2240 DEFINE_COMPILER;
2241 struct sljit_jump *jump;
2242
2243 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2244 {
2245 /* The value of -1 must be kept for start_used_ptr! */
2246 OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, 1);
2247 /* Jumps if start_used_ptr < STR_PTR, or start_used_ptr == -1. Although overwriting
2248 is not necessary if start_used_ptr == STR_PTR, it does not hurt as well. */
2249 jump = CMP(SLJIT_C_LESS_EQUAL, TMP1, 0, STR_PTR, 0);
2250 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2251 JUMPHERE(jump);
2252 }
2253 else if (common->mode == JIT_PARTIAL_HARD_COMPILE)
2254 {
2255 jump = CMP(SLJIT_C_LESS_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2256 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2257 JUMPHERE(jump);
2258 }
2259 }
2260
2261 static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, pcre_uchar* cc)
2262 {
2263 /* Detects if the character has an othercase. */
2264 unsigned int c;
2265
2266 #ifdef SUPPORT_UTF
2267 if (common->utf)
2268 {
2269 GETCHAR(c, cc);
2270 if (c > 127)
2271 {
2272 #ifdef SUPPORT_UCP
2273 return c != UCD_OTHERCASE(c);
2274 #else
2275 return FALSE;
2276 #endif
2277 }
2278 #ifndef COMPILE_PCRE8
2279 return common->fcc[c] != c;
2280 #endif
2281 }
2282 else
2283 #endif
2284 c = *cc;
2285 return MAX_255(c) ? common->fcc[c] != c : FALSE;
2286 }
2287
2288 static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)
2289 {
2290 /* Returns with the othercase. */
2291 #ifdef SUPPORT_UTF
2292 if (common->utf && c > 127)
2293 {
2294 #ifdef SUPPORT_UCP
2295 return UCD_OTHERCASE(c);
2296 #else
2297 return c;
2298 #endif
2299 }
2300 #endif
2301 return TABLE_GET(c, common->fcc, c);
2302 }
2303
2304 static unsigned int char_get_othercase_bit(compiler_common *common, pcre_uchar* cc)
2305 {
2306 /* Detects if the character and its othercase has only 1 bit difference. */
2307 unsigned int c, oc, bit;
2308 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2309 int n;
2310 #endif
2311
2312 #ifdef SUPPORT_UTF
2313 if (common->utf)
2314 {
2315 GETCHAR(c, cc);
2316 if (c <= 127)
2317 oc = common->fcc[c];
2318 else
2319 {
2320 #ifdef SUPPORT_UCP
2321 oc = UCD_OTHERCASE(c);
2322 #else
2323 oc = c;
2324 #endif
2325 }
2326 }
2327 else
2328 {
2329 c = *cc;
2330 oc = TABLE_GET(c, common->fcc, c);
2331 }
2332 #else
2333 c = *cc;
2334 oc = TABLE_GET(c, common->fcc, c);
2335 #endif
2336
2337 SLJIT_ASSERT(c != oc);
2338
2339 bit = c ^ oc;
2340 /* Optimized for English alphabet. */
2341 if (c <= 127 && bit == 0x20)
2342 return (0 << 8) | 0x20;
2343
2344 /* Since c != oc, they must have at least 1 bit difference. */
2345 if (!is_powerof2(bit))
2346 return 0;
2347
2348 #if defined COMPILE_PCRE8
2349
2350 #ifdef SUPPORT_UTF
2351 if (common->utf && c > 127)
2352 {
2353 n = GET_EXTRALEN(*cc);
2354 while ((bit & 0x3f) == 0)
2355 {
2356 n--;
2357 bit >>= 6;
2358 }
2359 return (n << 8) | bit;
2360 }
2361 #endif /* SUPPORT_UTF */
2362 return (0 << 8) | bit;
2363
2364 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2365
2366 #ifdef SUPPORT_UTF
2367 if (common->utf && c > 65535)
2368 {
2369 if (bit >= (1 << 10))
2370 bit >>= 10;
2371 else
2372 return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8));
2373 }
2374 #endif /* SUPPORT_UTF */
2375 return (bit < 256) ? ((0 << 8) | bit) : ((1 << 8) | (bit >> 8));
2376
2377 #endif /* COMPILE_PCRE[8|16|32] */
2378 }
2379
2380 static void check_partial(compiler_common *common, BOOL force)
2381 {
2382 /* Checks whether a partial matching is occurred. Does not modify registers. */
2383 DEFINE_COMPILER;
2384 struct sljit_jump *jump = NULL;
2385
2386 SLJIT_ASSERT(!force || common->mode != JIT_COMPILE);
2387
2388 if (common->mode == JIT_COMPILE)
2389 return;
2390
2391 if (!force)
2392 jump = CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2393 else if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2394 jump = CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, -1);
2395
2396 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2397 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, 0);
2398 else
2399 {
2400 if (common->partialmatchlabel != NULL)
2401 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2402 else
2403 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2404 }
2405
2406 if (jump != NULL)
2407 JUMPHERE(jump);
2408 }
2409
2410 static void check_str_end(compiler_common *common, jump_list **end_reached)
2411 {
2412 /* Does not affect registers. Usually used in a tight spot. */
2413 DEFINE_COMPILER;
2414 struct sljit_jump *jump;
2415
2416 if (common->mode == JIT_COMPILE)
2417 {
2418 add_jump(compiler, end_reached, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2419 return;
2420 }
2421
2422 jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
2423 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2424 {
2425 add_jump(compiler, end_reached, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0));
2426 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, 0);
2427 add_jump(compiler, end_reached, JUMP(SLJIT_JUMP));
2428 }
2429 else
2430 {
2431 add_jump(compiler, end_reached, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0));
2432 if (common->partialmatchlabel != NULL)
2433 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2434 else
2435 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2436 }
2437 JUMPHERE(jump);
2438 }
2439
2440 static void detect_partial_match(compiler_common *common, jump_list **backtracks)
2441 {
2442 DEFINE_COMPILER;
2443 struct sljit_jump *jump;
2444
2445 if (common->mode == JIT_COMPILE)
2446 {
2447 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2448 return;
2449 }
2450
2451 /* Partial matching mode. */
2452 jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
2453 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0));
2454 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2455 {
2456 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, 0);
2457 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
2458 }
2459 else
2460 {
2461 if (common->partialmatchlabel != NULL)
2462 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2463 else
2464 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2465 }
2466 JUMPHERE(jump);
2467 }
2468
2469 static void peek_char(compiler_common *common, pcre_uint32 max)
2470 {
2471 /* Reads the character into TMP1, keeps STR_PTR.
2472 Does not check STR_END. TMP2 Destroyed. */
2473 DEFINE_COMPILER;
2474 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2475 struct sljit_jump *jump;
2476 #endif
2477
2478 SLJIT_UNUSED_ARG(max);
2479
2480 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2481 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2482 if (common->utf)
2483 {
2484 if (max < 128) return;
2485
2486 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2487 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2488 add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2489 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2490 JUMPHERE(jump);
2491 }
2492 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2493
2494 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2495 if (common->utf)
2496 {
2497 if (max < 0xd800) return;
2498
2499 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2500 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2501 /* TMP2 contains the high surrogate. */
2502 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2503 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x40);
2504 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
2505 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
2506 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2507 JUMPHERE(jump);
2508 }
2509 #endif
2510 }
2511
2512 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2513
2514 static BOOL is_char7_bitset(const pcre_uint8* bitset, BOOL nclass)
2515 {
2516 /* Tells whether the character codes below 128 are enough
2517 to determine a match. */
2518 const pcre_uint8 value = nclass ? 0xff : 0;
2519 const pcre_uint8* end = bitset + 32;
2520
2521 bitset += 16;
2522 do
2523 {
2524 if (*bitset++ != value)
2525 return FALSE;
2526 }
2527 while (bitset < end);
2528 return TRUE;
2529 }
2530
2531 static void read_char7_type(compiler_common *common, BOOL full_read)
2532 {
2533 /* Reads the precise character type of a character into TMP1, if the character
2534 is less than 128. Otherwise it returns with zero. Does not check STR_END. The
2535 full_read argument tells whether characters above max are accepted or not. */
2536 DEFINE_COMPILER;
2537 struct sljit_jump *jump;
2538
2539 SLJIT_ASSERT(common->utf);
2540
2541 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2542 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2543
2544 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2545
2546 if (full_read)
2547 {
2548 jump = CMP(SLJIT_C_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
2549 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2550 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2551 JUMPHERE(jump);
2552 }
2553 }
2554
2555 #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
2556
2557 static void read_char_range(compiler_common *common, pcre_uint32 min, pcre_uint32 max, BOOL update_str_ptr)
2558 {
2559 /* Reads the precise value of a character into TMP1, if the character is
2560 between min and max (c >= min && c <= max). Otherwise it returns with a value
2561 outside the range. Does not check STR_END. */
2562 DEFINE_COMPILER;
2563 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2564 struct sljit_jump *jump;
2565 #endif
2566 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2567 struct sljit_jump *jump2;
2568 #endif
2569
2570 SLJIT_UNUSED_ARG(update_str_ptr);
2571 SLJIT_UNUSED_ARG(min);
2572 SLJIT_UNUSED_ARG(max);
2573 SLJIT_ASSERT(min <= max);
2574
2575 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2576 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2577
2578 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2579 if (common->utf)
2580 {
2581 if (max < 128 && !update_str_ptr) return;
2582
2583 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2584 if (min >= 0x10000)
2585 {
2586 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xf0);
2587 if (update_str_ptr)
2588 OP1(SLJIT_MOV_UB, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2589 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2590 jump2 = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0x7);
2591 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2592 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2593 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2594 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2595 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2596 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2597 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2598 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2599 if (!update_str_ptr)
2600 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
2601 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2602 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2603 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2604 JUMPHERE(jump2);
2605 if (update_str_ptr)
2606 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
2607 }
2608 else if (min >= 0x800 && max <= 0xffff)
2609 {
2610 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xe0);
2611 if (update_str_ptr)
2612 OP1(SLJIT_MOV_UB, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2613 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2614 jump2 = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0xf);
2615 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2616 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2617 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2618 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2619 if (!update_str_ptr)
2620 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
2621 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2622 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2623 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2624 JUMPHERE(jump2);
2625 if (update_str_ptr)
2626 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
2627 }
2628 else if (max >= 0x800)
2629 add_jump(compiler, (max < 0x10000) ? &common->utfreadchar16 : &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2630 else if (max < 128)
2631 {
2632 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2633 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2634 }
2635 else
2636 {
2637 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2638 if (!update_str_ptr)
2639 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2640 else
2641 OP1(SLJIT_MOV_UB, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2642 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2643 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2644 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2645 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2646 if (update_str_ptr)
2647 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
2648 }
2649 JUMPHERE(jump);
2650 }
2651 #endif
2652
2653 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2654 if (common->utf)
2655 {
2656 if (max >= 0x10000)
2657 {
2658 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2659 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2660 /* TMP2 contains the high surrogate. */
2661 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2662 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x40);
2663 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
2664 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2665 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
2666 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2667 JUMPHERE(jump);
2668 return;
2669 }
2670
2671 if (max < 0xd800 && !update_str_ptr) return;
2672
2673 /* Skip low surrogate if necessary. */
2674 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2675 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2676 if (update_str_ptr)
2677 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2678 if (max >= 0xd800)
2679 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x10000);
2680 JUMPHERE(jump);
2681 }
2682 #endif
2683 }
2684
2685 static SLJIT_INLINE void read_char(compiler_common *common)
2686 {
2687 read_char_range(common, 0, READ_CHAR_MAX, TRUE);
2688 }
2689
2690 static void read_char8_type(compiler_common *common, BOOL update_str_ptr)
2691 {
2692 /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */
2693 DEFINE_COMPILER;
2694 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
2695 struct sljit_jump *jump;
2696 #endif
2697 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2698 struct sljit_jump *jump2;
2699 #endif
2700
2701 SLJIT_UNUSED_ARG(update_str_ptr);
2702
2703 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2704 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2705
2706 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2707 if (common->utf)
2708 {
2709 /* This can be an extra read in some situations, but hopefully
2710 it is needed in most cases. */
2711 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2712 jump = CMP(SLJIT_C_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
2713 if (!update_str_ptr)
2714 {
2715 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2716 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2717 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2718 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2719 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2720 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
2721 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2722 jump2 = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2723 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2724 JUMPHERE(jump2);
2725 }
2726 else
2727 add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));
2728 JUMPHERE(jump);
2729 return;
2730 }
2731 #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
2732
2733 #if !defined COMPILE_PCRE8
2734 /* The ctypes array contains only 256 values. */
2735 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2736 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2737 #endif
2738 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2739 #if !defined COMPILE_PCRE8
2740 JUMPHERE(jump);
2741 #endif
2742
2743 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2744 if (common->utf && update_str_ptr)
2745 {
2746 /* Skip low surrogate if necessary. */
2747 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);
2748 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2749 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2750 JUMPHERE(jump);
2751 }
2752 #endif /* SUPPORT_UTF && COMPILE_PCRE16 */
2753 }
2754
2755 static void skip_char_back(compiler_common *common)
2756 {
2757 /* Goes one character back. Affects STR_PTR and TMP1. Does not check begin. */
2758 DEFINE_COMPILER;
2759 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2760 #if defined COMPILE_PCRE8
2761 struct sljit_label *label;
2762
2763 if (common->utf)
2764 {
2765 label = LABEL();
2766 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2767 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2768 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
2769 CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);
2770 return;
2771 }
2772 #elif defined COMPILE_PCRE16
2773 if (common->utf)
2774 {
2775 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2776 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2777 /* Skip low surrogate if necessary. */
2778 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2779 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
2780 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2781 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2782 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2783 return;
2784 }
2785 #endif /* COMPILE_PCRE[8|16] */
2786 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2787 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2788 }
2789
2790 static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpifmatch)
2791 {
2792 /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */
2793 DEFINE_COMPILER;
2794 struct sljit_jump *jump;
2795
2796 if (nltype == NLTYPE_ANY)
2797 {
2798 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
2799 add_jump(compiler, backtracks, JUMP(jumpifmatch ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
2800 }
2801 else if (nltype == NLTYPE_ANYCRLF)
2802 {
2803 if (jumpifmatch)
2804 {
2805 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR));
2806 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
2807 }
2808 else
2809 {
2810 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
2811 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
2812 JUMPHERE(jump);
2813 }
2814 }
2815 else
2816 {
2817 SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);
2818 add_jump(compiler, backtracks, CMP(jumpifmatch ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
2819 }
2820 }
2821
2822 #ifdef SUPPORT_UTF
2823
2824 #if defined COMPILE_PCRE8
2825 static void do_utfreadchar(compiler_common *common)
2826 {
2827 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
2828 of the character (>= 0xc0). Return char value in TMP1, length in TMP2. */
2829 DEFINE_COMPILER;
2830 struct sljit_jump *jump;
2831
2832 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2833 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2834 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2835 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2836 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2837 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2838
2839 /* Searching for the first zero. */
2840 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
2841 jump = JUMP(SLJIT_C_NOT_ZERO);
2842 /* Two byte sequence. */
2843 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2844 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(2));
2845 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2846
2847 JUMPHERE(jump);
2848 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2849 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x800);
2850 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2851 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2852 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2853
2854 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10000);
2855 jump = JUMP(SLJIT_C_NOT_ZERO);
2856 /* Three byte sequence. */
2857 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
2858 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));
2859 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2860
2861 /* Four byte sequence. */
2862 JUMPHERE(jump);
2863 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2864 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
2865 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2866 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
2867 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2868 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2869 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(4));
2870 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2871 }
2872
2873 static void do_utfreadchar16(compiler_common *common)
2874 {
2875 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
2876 of the character (>= 0xc0). Return value in TMP1. */
2877 DEFINE_COMPILER;
2878 struct sljit_jump *jump;
2879
2880 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2881 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2882 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2883 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2884 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2885 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2886
2887 /* Searching for the first zero. */
2888 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
2889 jump = JUMP(SLJIT_C_NOT_ZERO);
2890 /* Two byte sequence. */
2891 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2892 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2893
2894 JUMPHERE(jump);
2895 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x400);
2896 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_NOT_ZERO);
2897 /* This code runs only in 8 bit mode. No need to shift the value. */
2898 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2899 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2900 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x800);
2901 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2902 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2903 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2904 /* Three byte sequence. */
2905 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
2906 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2907 }
2908
2909 static void do_utfreadtype8(compiler_common *common)
2910 {
2911 /* Fast decoding a UTF-8 character type. TMP2 contains the first byte
2912 of the character (>= 0xc0). Return value in TMP1. */
2913 DEFINE_COMPILER;
2914 struct sljit_jump *jump;
2915 struct sljit_jump *compare;
2916
2917 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2918
2919 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20);
2920 jump = JUMP(SLJIT_C_NOT_ZERO);
2921 /* Two byte sequence. */
2922 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2923 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2924 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
2925 /* The upper 5 bits are known at this point. */
2926 compare = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0x3);
2927 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2928 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2929 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
2930 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2931 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2932
2933 JUMPHERE(compare);
2934 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2935 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2936
2937 /* We only have types for characters less than 256. */
2938 JUMPHERE(jump);
2939 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2940 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2941 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2942 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2943 }
2944
2945 #endif /* COMPILE_PCRE8 */
2946
2947 #endif /* SUPPORT_UTF */
2948
2949 #ifdef SUPPORT_UCP
2950
2951 /* UCD_BLOCK_SIZE must be 128 (see the assert below). */
2952 #define UCD_BLOCK_MASK 127
2953 #define UCD_BLOCK_SHIFT 7
2954
2955 static void do_getucd(compiler_common *common)
2956 {
2957 /* Search the UCD record for the character comes in TMP1.
2958 Returns chartype in TMP1 and UCD offset in TMP2. */
2959 DEFINE_COMPILER;
2960
2961 SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 8);
2962
2963 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2964 OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
2965 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
2966 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
2967 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
2968 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
2969 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
2970 OP1(SLJIT_MOV_UH, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
2971 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
2972 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
2973 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2974 }
2975 #endif
2976
2977 static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common, BOOL hascrorlf, BOOL firstline)
2978 {
2979 DEFINE_COMPILER;
2980 struct sljit_label *mainloop;
2981 struct sljit_label *newlinelabel = NULL;
2982 struct sljit_jump *start;
2983 struct sljit_jump *end = NULL;
2984 struct sljit_jump *nl = NULL;
2985 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2986 struct sljit_jump *singlechar;
2987 #endif
2988 jump_list *newline = NULL;
2989 BOOL newlinecheck = FALSE;
2990 BOOL readuchar = FALSE;
2991
2992 if (!(hascrorlf || firstline) && (common->nltype == NLTYPE_ANY ||
2993 common->nltype == NLTYPE_ANYCRLF || common->newline > 255))
2994 newlinecheck = TRUE;
2995
2996 if (firstline)
2997 {
2998 /* Search for the end of the first line. */
2999 SLJIT_ASSERT(common->first_line_end != 0);
3000 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
3001
3002 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3003 {
3004 mainloop = LABEL();
3005 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3006 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3007 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
3008 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3009 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);
3010 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);
3011 JUMPHERE(end);
3012 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3013 }
3014 else
3015 {
3016 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3017 mainloop = LABEL();
3018 /* Continual stores does not cause data dependency. */
3019 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0);
3020 read_char_range(common, common->nlmin, common->nlmax, TRUE);
3021 check_newlinechar(common, common->nltype, &newline, TRUE);
3022 CMPTO(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0, mainloop);
3023 JUMPHERE(end);
3024 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0);
3025 set_jumps(newline, LABEL());
3026 }
3027
3028 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
3029 }
3030
3031 start = JUMP(SLJIT_JUMP);
3032
3033 if (newlinecheck)
3034 {
3035 newlinelabel = LABEL();
3036 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3037 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3038 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3039 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
3040 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3041 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3042 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
3043 #endif
3044 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3045 nl = JUMP(SLJIT_JUMP);
3046 }
3047
3048 mainloop = LABEL();
3049
3050 /* Increasing the STR_PTR here requires one less jump in the most common case. */
3051 #ifdef SUPPORT_UTF
3052 if (common->utf) readuchar = TRUE;
3053 #endif
3054 if (newlinecheck) readuchar = TRUE;
3055
3056 if (readuchar)
3057 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3058
3059 if (newlinecheck)
3060 CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);
3061
3062 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3063 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3064 #if defined COMPILE_PCRE8
3065 if (common->utf)
3066 {
3067 singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
3068 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3069 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3070 JUMPHERE(singlechar);
3071 }
3072 #elif defined COMPILE_PCRE16
3073 if (common->utf)
3074 {
3075 singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
3076 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
3077 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3078 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3079 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3080 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3081 JUMPHERE(singlechar);
3082 }
3083 #endif /* COMPILE_PCRE[8|16] */
3084 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
3085 JUMPHERE(start);
3086
3087 if (newlinecheck)
3088 {
3089 JUMPHERE(end);
3090 JUMPHERE(nl);
3091 }
3092
3093 return mainloop;
3094 }
3095
3096 static int scan_prefix(compiler_common *common, pcre_uchar *cc, pcre_uint32 *chars, int max_chars)
3097 {
3098 /* Recursive function, which scans prefix literals. */
3099 int len, repeat, len_save, consumed = 0;
3100 pcre_uint32 caseless, chr, mask;
3101 pcre_uchar *alternative, *cc_save;
3102 BOOL last, any;
3103
3104 repeat = 1;
3105 while (TRUE)
3106 {
3107 last = TRUE;
3108 any = FALSE;
3109 caseless = 0;
3110 switch (*cc)
3111 {
3112 case OP_CHARI:
3113 caseless = 1;
3114 case OP_CHAR:
3115 last = FALSE;
3116 cc++;
3117 break;
3118
3119 case OP_SOD:
3120 case OP_SOM:
3121 case OP_SET_SOM:
3122 case OP_NOT_WORD_BOUNDARY:
3123 case OP_WORD_BOUNDARY:
3124 case OP_EODN:
3125 case OP_EOD:
3126 case OP_CIRC:
3127 case OP_CIRCM:
3128 case OP_DOLL:
3129 case OP_DOLLM:
3130 /* Zero width assertions. */
3131 cc++;
3132 continue;
3133
3134 case OP_PLUS:
3135 case OP_MINPLUS:
3136 case OP_POSPLUS:
3137 cc++;
3138 break;
3139
3140 case OP_EXACTI:
3141 caseless = 1;
3142 case OP_EXACT:
3143 repeat = GET2(cc, 1);
3144 last = FALSE;
3145 cc += 1 + IMM2_SIZE;
3146 break;
3147
3148 case OP_PLUSI:
3149 case OP_MINPLUSI:
3150 case OP_POSPLUSI:
3151 caseless = 1;
3152 cc++;
3153 break;
3154
3155 case OP_KET:
3156 cc += 1 + LINK_SIZE;
3157 continue;
3158
3159 case OP_ALT:
3160 cc += GET(cc, 1);
3161 continue;
3162
3163 case OP_ONCE:
3164 case OP_ONCE_NC:
3165 case OP_BRA:
3166 case OP_BRAPOS:
3167 case OP_CBRA:
3168 case OP_CBRAPOS:
3169 alternative = cc + GET(cc, 1);
3170 while (*alternative == OP_ALT)
3171 {
3172 max_chars = scan_prefix(common, alternative + 1 + LINK_SIZE, chars, max_chars);
3173 if (max_chars == 0)
3174 return consumed;
3175 alternative += GET(alternative, 1);
3176 }
3177
3178 if (*cc == OP_CBRA || *cc == OP_CBRAPOS)
3179 cc += IMM2_SIZE;
3180 cc += 1 + LINK_SIZE;
3181 continue;
3182
3183 case OP_CLASS:
3184 case OP_NCLASS:
3185 any = TRUE;
3186 cc += 1 + 32 / sizeof(pcre_uchar);
3187 break;
3188
3189 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3190 case OP_XCLASS:
3191 any = TRUE;
3192 cc += GET(cc, 1);
3193 break;
3194 #endif
3195
3196 case OP_NOT_DIGIT:
3197 case OP_DIGIT:
3198 case OP_NOT_WHITESPACE:
3199 case OP_WHITESPACE:
3200 case OP_NOT_WORDCHAR:
3201 case OP_WORDCHAR:
3202 case OP_ANY:
3203 case OP_ALLANY:
3204 any = TRUE;
3205 cc++;
3206 break;
3207
3208 #ifdef SUPPORT_UCP
3209 case OP_NOTPROP:
3210 case OP_PROP:
3211 any = TRUE;
3212 cc += 1 + 2;
3213 break;
3214 #endif
3215
3216 case OP_TYPEEXACT:
3217 repeat = GET2(cc, 1);
3218 cc += 1 + IMM2_SIZE;
3219 continue;
3220
3221 default:
3222 return consumed;
3223 }
3224
3225 if (any)
3226 {
3227 #ifdef SUPPORT_UTF
3228 if (common->utf) return consumed;
3229 #endif
3230 #if defined COMPILE_PCRE8
3231 mask = 0xff;
3232 #elif defined COMPILE_PCRE16
3233 mask = 0xffff;
3234 #elif defined COMPILE_PCRE32
3235 mask = 0xffffffff;
3236 #else
3237 SLJIT_ASSERT_STOP();
3238 #endif
3239
3240 do
3241 {
3242 chars[0] = mask;
3243 chars[1] = mask;
3244
3245 if (--max_chars == 0)
3246 return consumed;
3247 consumed++;
3248 chars += 2;
3249 }
3250 while (--repeat > 0);
3251
3252 repeat = 1;
3253 continue;
3254 }
3255
3256 len = 1;
3257 #ifdef SUPPORT_UTF
3258 if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
3259 #endif
3260
3261 if (caseless != 0 && char_has_othercase(common, cc))
3262 {
3263 caseless = char_get_othercase_bit(common, cc);
3264 if (caseless == 0)
3265 return consumed;
3266 #ifdef COMPILE_PCRE8
3267 caseless = ((caseless & 0xff) << 8) | (len - (caseless >> 8));
3268 #else
3269 if ((caseless & 0x100) != 0)
3270 caseless = ((caseless & 0xff) << 16) | (len - (caseless >> 9));
3271 else
3272 caseless = ((caseless & 0xff) << 8) | (len - (caseless >> 9));
3273 #endif
3274 }
3275 else
3276 caseless = 0;
3277
3278 len_save = len;
3279 cc_save = cc;
3280 while (TRUE)
3281 {
3282 do
3283 {
3284 chr = *cc;
3285 #ifdef COMPILE_PCRE32
3286 if (SLJIT_UNLIKELY(chr == NOTACHAR))
3287 return consumed;
3288 #endif
3289 mask = 0;
3290 if ((pcre_uint32)len == (caseless & 0xff))
3291 {
3292 mask = caseless >> 8;
3293 chr |= mask;
3294 }
3295
3296 if (chars[0] == NOTACHAR)
3297 {
3298 chars[0] = chr;
3299 chars[1] = mask;
3300 }
3301 else
3302 {
3303 mask |= chars[0] ^ chr;
3304 chr |= mask;
3305 chars[0] = chr;
3306 chars[1] |= mask;
3307 }
3308
3309 len--;
3310 if (--max_chars == 0)
3311 return consumed;
3312 consumed++;
3313 chars += 2;
3314 cc++;
3315 }
3316 while (len > 0);
3317
3318 if (--repeat == 0)
3319 break;
3320
3321 len = len_save;
3322 cc = cc_save;
3323 }
3324
3325 repeat = 1;
3326 if (last)
3327 return consumed;
3328 }
3329 }
3330
3331 #define MAX_N_CHARS 16
3332
3333 static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common, BOOL firstline)
3334 {
3335 DEFINE_COMPILER;
3336 struct sljit_label *start;
3337 struct sljit_jump *quit;
3338 pcre_uint32 chars[MAX_N_CHARS * 2];
3339 pcre_uint8 ones[MAX_N_CHARS];
3340 pcre_uint32 mask;
3341 int i, max;
3342 int offsets[3];
3343
3344 for (i = 0; i < MAX_N_CHARS; i++)
3345 {
3346 chars[i << 1] = NOTACHAR;
3347 chars[(i << 1) + 1] = 0;
3348 }
3349
3350 max = scan_prefix(common, common->start, chars, MAX_N_CHARS);
3351
3352 if (max <= 1)
3353 return FALSE;
3354
3355 for (i = 0; i < max; i++)
3356 {
3357 mask = chars[(i << 1) + 1];
3358 ones[i] = ones_in_half_byte[mask & 0xf];
3359 mask >>= 4;
3360 while (mask != 0)
3361 {
3362 ones[i] += ones_in_half_byte[mask & 0xf];
3363 mask >>= 4;
3364 }
3365 }
3366
3367 offsets[0] = -1;
3368 /* Scan forward. */
3369 for (i = 0; i < max; i++)
3370 if (ones[i] <= 2) {
3371 offsets[0] = i;
3372 break;
3373 }
3374
3375 if (offsets[0] == -1)
3376 return FALSE;
3377
3378 /* Scan backward. */
3379 offsets[1] = -1;
3380 for (i = max - 1; i > offsets[0]; i--)
3381 if (ones[i] <= 2) {
3382 offsets[1] = i;
3383 break;
3384 }
3385
3386 offsets[2] = -1;
3387 if (offsets[1] >= 0)
3388 {
3389 /* Scan from middle. */
3390 for (i = (offsets[0] + offsets[1]) / 2 + 1; i < offsets[1]; i++)
3391 if (ones[i] <= 2)
3392 {
3393 offsets[2] = i;
3394 break;
3395 }
3396
3397 if (offsets[2] == -1)
3398 {
3399 for (i = (offsets[0] + offsets[1]) / 2; i > offsets[0]; i--)
3400 if (ones[i] <= 2)
3401 {
3402 offsets[2] = i;
3403 break;
3404 }
3405 }
3406 }
3407
3408 SLJIT_ASSERT(offsets[1] == -1 || (offsets[0] < offsets[1]));
3409 SLJIT_ASSERT(offsets[2] == -1 || (offsets[0] < offsets[2] && offsets[1] > offsets[2]));
3410
3411 chars[0] = chars[offsets[0] << 1];
3412 chars[1] = chars[(offsets[0] << 1) + 1];
3413 if (offsets[2] >= 0)
3414 {
3415 chars[2] = chars[offsets[2] << 1];
3416 chars[3] = chars[(offsets[2] << 1) + 1];
3417 }
3418 if (offsets[1] >= 0)
3419 {
3420 chars[4] = chars[offsets[1] << 1];
3421 chars[5] = chars[(offsets[1] << 1) + 1];
3422 }
3423
3424 max -= 1;
3425 if (firstline)
3426 {
3427 SLJIT_ASSERT(common->first_line_end != 0);
3428 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
3429 OP2(SLJIT_SUB, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, SLJIT_IMM, IN_UCHARS(max));
3430 }
3431 else
3432 OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
3433
3434 start = LABEL();
3435 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3436
3437 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[0]));
3438 if (offsets[1] >= 0)
3439 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[1]));
3440 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3441
3442 if (chars[1] != 0)
3443 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[1]);
3444 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[0], start);
3445 if (offsets[2] >= 0)
3446 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[2] - 1));
3447
3448 if (offsets[1] >= 0)
3449 {
3450 if (chars[5] != 0)
3451 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, chars[5]);
3452 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, chars[4], start);
3453 }
3454
3455 if (offsets[2] >= 0)
3456 {
3457 if (chars[3] != 0)
3458 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[3]);
3459 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[2], start);
3460 }
3461 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3462
3463 JUMPHERE(quit);
3464
3465 if (firstline)
3466 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3467 else
3468 OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
3469 return TRUE;
3470 }
3471
3472 #undef MAX_N_CHARS
3473
3474 static SLJIT_INLINE void fast_forward_first_char(compiler_common *common, pcre_uchar first_char, BOOL caseless, BOOL firstline)
3475 {
3476 DEFINE_COMPILER;
3477 struct sljit_label *start;
3478 struct sljit_jump *quit;
3479 struct sljit_jump *found;
3480 pcre_uchar oc, bit;
3481
3482 if (firstline)
3483 {
3484 SLJIT_ASSERT(common->first_line_end != 0);
3485 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
3486 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
3487 }
3488
3489 start = LABEL();
3490 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3491 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3492
3493 oc = first_char;
3494 if (caseless)
3495 {
3496 oc = TABLE_GET(first_char, common->fcc, first_char);
3497 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
3498 if (first_char > 127 && common->utf)
3499 oc = UCD_OTHERCASE(first_char);
3500 #endif
3501 }
3502 if (first_char == oc)
3503 found = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, first_char);
3504 else
3505 {
3506 bit = first_char ^ oc;
3507 if (is_powerof2(bit))
3508 {
3509 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, bit);
3510 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, first_char | bit);
3511 }
3512 else
3513 {
3514 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, first_char);
3515 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3516 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);
3517 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3518 found = JUMP(SLJIT_C_NOT_ZERO);
3519 }
3520 }
3521
3522 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3523 JUMPTO(SLJIT_JUMP, start);
3524 JUMPHERE(found);
3525 JUMPHERE(quit);
3526
3527 if (firstline)
3528 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3529 }
3530
3531 static SLJIT_INLINE void fast_forward_newline(compiler_common *common, BOOL firstline)
3532 {
3533 DEFINE_COMPILER;
3534 struct sljit_label *loop;
3535 struct sljit_jump *lastchar;
3536 struct sljit_jump *firstchar;
3537 struct sljit_jump *quit;
3538 struct sljit_jump *foundcr = NULL;
3539 struct sljit_jump *notfoundnl;
3540 jump_list *newline = NULL;
3541
3542 if (firstline)
3543 {
3544 SLJIT_ASSERT(common->first_line_end != 0);
3545 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
3546 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
3547 }
3548
3549 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3550 {
3551 lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3552 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3553 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
3554 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
3555 firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
3556
3557 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2));
3558 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0);
3559 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_GREATER_EQUAL);
3560 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3561 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
3562 #endif
3563 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3564
3565 loop = LABEL();
3566 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3567 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3568 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
3569 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
3570 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop);
3571 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop);
3572
3573 JUMPHERE(quit);
3574 JUMPHERE(firstchar);
3575 JUMPHERE(lastchar);
3576
3577 if (firstline)
3578 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
3579 return;
3580 }
3581
3582 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3583 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
3584 firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
3585 skip_char_back(common);
3586
3587 loop = LABEL();
3588 read_char_range(common, common->nlmin, common->nlmax, TRUE);
3589 lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3590 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
3591 foundcr = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
3592 check_newlinechar(common, common->nltype, &newline, FALSE);
3593 set_jumps(newline, loop);
3594
3595 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
3596 {
3597 quit = JUMP(SLJIT_JUMP);
3598 JUMPHERE(foundcr);
3599 notfoundnl = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3600 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3601 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
3602 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3603 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3604 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
3605 #endif
3606 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3607 JUMPHERE(notfoundnl);
3608 JUMPHERE(quit);
3609 }
3610 JUMPHERE(lastchar);
3611 JUMPHERE(firstchar);
3612
3613 if (firstline)
3614 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3615 }
3616
3617 static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks);
3618
3619 static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, pcre_uint8 *start_bits, BOOL firstline)
3620 {
3621 DEFINE_COMPILER;
3622 struct sljit_label *start;
3623 struct sljit_jump *quit;
3624 struct sljit_jump *found = NULL;
3625 jump_list *matches = NULL;
3626 #ifndef COMPILE_PCRE8
3627 struct sljit_jump *jump;
3628 #endif
3629
3630 if (firstline)
3631 {
3632 SLJIT_ASSERT(common->first_line_end != 0);
3633 OP1(SLJIT_MOV, RETURN_ADDR, 0, STR_END, 0);
3634 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
3635 }
3636
3637 start = LABEL();
3638 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3639 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3640 #ifdef SUPPORT_UTF
3641 if (common->utf)
3642 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
3643 #endif
3644
3645 if (!check_class_ranges(common, start_bits, (start_bits[31] & 0x80) != 0, TRUE, &matches))
3646 {
3647 #ifndef COMPILE_PCRE8
3648 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 255);
3649 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 255);
3650 JUMPHERE(jump);
3651 #endif
3652 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
3653 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
3654 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)start_bits);
3655 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
3656 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
3657 found = JUMP(SLJIT_C_NOT_ZERO);
3658 }
3659
3660 #ifdef SUPPORT_UTF
3661 if (common->utf)
3662 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
3663 #endif
3664 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3665 #ifdef SUPPORT_UTF
3666 #if defined COMPILE_PCRE8
3667 if (common->utf)
3668 {
3669 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start);
3670 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3671 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3672 }
3673 #elif defined COMPILE_PCRE16
3674 if (common->utf)
3675 {
3676 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800, start);
3677 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
3678 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3679 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3680 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3681 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3682 }
3683 #endif /* COMPILE_PCRE[8|16] */
3684 #endif /* SUPPORT_UTF */
3685 JUMPTO(SLJIT_JUMP, start);
3686 if (found != NULL)
3687 JUMPHERE(found);
3688 if (matches != NULL)
3689 set_jumps(matches, LABEL());
3690 JUMPHERE(quit);
3691
3692 if (firstline)
3693 OP1(SLJIT_MOV, STR_END, 0, RETURN_ADDR, 0);
3694 }
3695
3696 static SLJIT_INLINE struct sljit_jump *search_requested_char(compiler_common *common, pcre_uchar req_char, BOOL caseless, BOOL has_firstchar)
3697 {
3698 DEFINE_COMPILER;
3699 struct sljit_label *loop;
3700 struct sljit_jump *toolong;
3701 struct sljit_jump *alreadyfound;
3702 struct sljit_jump *found;
3703 struct sljit_jump *foundoc = NULL;
3704 struct sljit_jump *notfound;
3705 pcre_uint32 oc, bit;
3706
3707 SLJIT_ASSERT(common->req_char_ptr != 0);
3708 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr);
3709 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, REQ_BYTE_MAX);
3710 toolong = CMP(SLJIT_C_LESS, TMP1, 0, STR_END, 0);
3711 alreadyfound = CMP(SLJIT_C_LESS, STR_PTR, 0, TMP2, 0);
3712
3713 if (has_firstchar)
3714 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3715 else
3716 OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);
3717
3718 loop = LABEL();
3719 notfound = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, STR_END, 0);
3720
3721 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0);
3722 oc = req_char;
3723 if (caseless)
3724 {
3725 oc = TABLE_GET(req_char, common->fcc, req_char);
3726 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
3727 if (req_char > 127 && common->utf)
3728 oc = UCD_OTHERCASE(req_char);
3729 #endif
3730 }
3731 if (req_char == oc)
3732 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
3733 else
3734 {
3735 bit = req_char ^ oc;
3736 if (is_powerof2(bit))
3737 {
3738 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);
3739 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit);
3740 }
3741 else
3742 {
3743 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
3744 foundoc = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, oc);
3745 }
3746 }
3747 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
3748 JUMPTO(SLJIT_JUMP, loop);
3749
3750 JUMPHERE(found);
3751 if (foundoc)
3752 JUMPHERE(foundoc);
3753 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr, TMP1, 0);
3754 JUMPHERE(alreadyfound);
3755 JUMPHERE(toolong);
3756 return notfound;
3757 }
3758
3759 static void do_revertframes(compiler_common *common)
3760 {
3761 DEFINE_COMPILER;
3762 struct sljit_jump *jump;
3763 struct sljit_label *mainloop;
3764
3765 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3766 OP1(SLJIT_MOV, TMP1, 0, STACK_TOP, 0);
3767 GET_LOCAL_BASE(TMP3, 0, 0);
3768
3769 /* Drop frames until we reach STACK_TOP. */
3770 mainloop = LABEL();
3771 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), 0);
3772 OP2(SLJIT_SUB | SLJIT_SET_S, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0);
3773 jump = JUMP(SLJIT_C_SIG_LESS_EQUAL);
3774
3775 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
3776 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw));
3777 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), SLJIT_MEM1(TMP1), 2 * sizeof(sljit_sw));
3778 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
3779 JUMPTO(SLJIT_JUMP, mainloop);
3780
3781 JUMPHERE(jump);
3782 jump = JUMP(SLJIT_C_SIG_LESS);
3783 /* End of dropping frames. */
3784 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3785
3786 JUMPHERE(jump);
3787 OP1(SLJIT_NEG, TMP2, 0, TMP2, 0);
3788 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
3789 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw));
3790 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_sw));
3791 JUMPTO(SLJIT_JUMP, mainloop);
3792 }
3793
3794 static void check_wordboundary(compiler_common *common)
3795 {
3796 DEFINE_COMPILER;
3797 struct sljit_jump *skipread;
3798 jump_list *skipread_list = NULL;
3799 #if !(defined COMPILE_PCRE8) || defined SUPPORT_UTF
3800 struct sljit_jump *jump;
3801 #endif
3802
3803 SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16);
3804
3805 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3806 /* Get type of the previous char, and put it to LOCALS1. */
3807 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3808 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
3809 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, 0);
3810 skipread = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
3811 skip_char_back(common);
3812 check_start_used_ptr(common);
3813 read_char(common);
3814
3815 /* Testing char type. */
3816 #ifdef SUPPORT_UCP
3817 if (common->use_ucp)
3818 {
3819 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
3820 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
3821 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
3822 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
3823 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
3824 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
3825 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
3826 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
3827 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
3828 JUMPHERE(jump);
3829 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP2, 0);
3830 }
3831 else
3832 #endif
3833 {
3834 #ifndef COMPILE_PCRE8
3835 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3836 #elif defined SUPPORT_UTF
3837 /* Here LOCALS1 has already been zeroed. */
3838 jump = NULL;
3839 if (common->utf)
3840 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3841 #endif /* COMPILE_PCRE8 */
3842 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);
3843 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */);
3844 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3845 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);
3846 #ifndef COMPILE_PCRE8
3847 JUMPHERE(jump);
3848 #elif defined SUPPORT_UTF
3849 if (jump != NULL)
3850 JUMPHERE(jump);
3851 #endif /* COMPILE_PCRE8 */
3852 }
3853 JUMPHERE(skipread);
3854
3855 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3856 check_str_end(common, &skipread_list);
3857 peek_char(common, READ_CHAR_MAX);
3858
3859 /* Testing char type. This is a code duplication. */
3860 #ifdef SUPPORT_UCP
3861 if (common->use_ucp)
3862 {
3863 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
3864 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
3865 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
3866 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
3867 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
3868 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
3869 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
3870 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
3871 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
3872 JUMPHERE(jump);
3873 }
3874 else
3875 #endif
3876 {
3877 #ifndef COMPILE_PCRE8
3878 /* TMP2 may be destroyed by peek_char. */
3879 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3880 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3881 #elif defined SUPPORT_UTF
3882 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3883 jump = NULL;
3884 if (common->utf)
3885 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3886 #endif
3887 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes);
3888 OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */);
3889 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
3890 #ifndef COMPILE_PCRE8
3891 JUMPHERE(jump);
3892 #elif defined SUPPORT_UTF
3893 if (jump != NULL)
3894 JUMPHERE(jump);
3895 #endif /* COMPILE_PCRE8 */
3896 }
3897 set_jumps(skipread_list, LABEL());
3898
3899 OP2(SLJIT_XOR | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
3900 sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3901 }
3902
3903 static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
3904 {
3905 DEFINE_COMPILER;
3906 int ranges[MAX_RANGE_SIZE];
3907 pcre_uint8 bit, cbit, all;
3908 int i, byte, length = 0;
3909
3910 bit = bits[0] & 0x1;
3911 /* All bits will be zero or one (since bit is zero or one). */
3912 all = -bit;
3913
3914 for (i = 0; i < 256; )
3915 {
3916 byte = i >> 3;
3917 if ((i & 0x7) == 0 && bits[byte] == all)
3918 i += 8;
3919 else
3920 {
3921 cbit = (bits[byte] >> (i & 0x7)) & 0x1;
3922 if (cbit != bit)
3923 {
3924 if (length >= MAX_RANGE_SIZE)
3925 return FALSE;
3926 ranges[length] = i;
3927 length++;
3928 bit = cbit;
3929 all = -cbit;
3930 }
3931 i++;
3932 }
3933 }
3934
3935 if (((bit == 0) && nclass) || ((bit == 1) && !nclass))
3936 {
3937 if (length >= MAX_RANGE_SIZE)
3938 return FALSE;
3939 ranges[length] = 256;
3940 length++;
3941 }
3942
3943 if (length < 0 || length > 4)
3944 return FALSE;
3945
3946 bit = bits[0] & 0x1;
3947 if (invert) bit ^= 0x1;
3948
3949 /* No character is accepted. */
3950 if (length == 0 && bit == 0)
3951 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
3952
3953 switch(length)
3954 {
3955 case 0:
3956 /* When bit != 0, all characters are accepted. */
3957 return TRUE;
3958
3959 case 1:
3960 add_jump(compiler, backtracks, CMP(bit == 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
3961 return TRUE;
3962
3963 case 2:
3964 if (ranges[0] + 1 != ranges[1])
3965 {
3966 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
3967 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
3968 }
3969 else
3970 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
3971 return TRUE;
3972
3973 case 3:
3974 if (bit != 0)
3975 {
3976 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
3977 if (ranges[0] + 1 != ranges[1])
3978 {
3979 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
3980 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
3981 }
3982 else
3983 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
3984 return TRUE;
3985 }
3986
3987 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[0]));
3988 if (ranges[1] + 1 != ranges[2])
3989 {
3990 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1]);
3991 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
3992 }
3993 else
3994 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1]));
3995 return TRUE;
3996
3997 case 4:
3998 if ((ranges[1] - ranges[0]) == (ranges[3] - ranges[2])
3999 && (ranges[0] | (ranges[2] - ranges[0])) == ranges[2]
4000 && is_powerof2(ranges[2] - ranges[0]))
4001 {
4002 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[0]);
4003 if (ranges[2] + 1 != ranges[3])
4004 {
4005 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);
4006 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
4007 }
4008 else
4009 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
4010 return TRUE;
4011 }
4012
4013 if (bit != 0)
4014 {
4015 i = 0;
4016 if (ranges[0] + 1 != ranges[1])
4017 {
4018 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
4019 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
4020 i = ranges[0];
4021 }
4022 else
4023 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
4024
4025 if (ranges[2] + 1 != ranges[3])
4026 {
4027 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - i);
4028 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
4029 }
4030 else
4031 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2] - i));
4032 return TRUE;
4033 }
4034
4035 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
4036 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[0]));
4037 if (ranges[1] + 1 != ranges[2])
4038 {
4039 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]);
4040 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
4041 }
4042 else
4043 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
4044 return TRUE;
4045
4046 default:
4047 SLJIT_ASSERT_STOP();
4048 return FALSE;
4049 }
4050 }
4051
4052 static void check_anynewline(compiler_common *common)
4053 {
4054 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
4055 DEFINE_COMPILER;
4056
4057 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4058
4059 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
4060 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
4061 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
4062 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
4063 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4064 #ifdef COMPILE_PCRE8
4065 if (common->utf)
4066 {
4067 #endif
4068 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4069 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
4070 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
4071 #ifdef COMPILE_PCRE8
4072 }
4073 #endif
4074 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
4075 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4076 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4077 }
4078
4079 static void check_hspace(compiler_common *common)
4080 {
4081 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
4082 DEFINE_COMPILER;
4083
4084 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4085
4086 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x09);
4087 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4088 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
4089 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4090 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xa0);
4091 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4092 #ifdef COMPILE_PCRE8
4093 if (common->utf)
4094 {
4095 #endif
4096 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4097 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x1680);
4098 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4099 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e);
4100 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4101 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000);
4102 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000);
4103 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4104 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000);
4105 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4106 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000);
4107 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4108 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000);
4109 #ifdef COMPILE_PCRE8
4110 }
4111 #endif
4112 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
4113 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4114
4115 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4116 }
4117
4118 static void check_vspace(compiler_common *common)
4119 {
4120 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
4121 DEFINE_COMPILER;
4122
4123 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4124
4125 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
4126 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
4127 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
4128 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
4129 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4130 #ifdef COMPILE_PCRE8
4131 if (common->utf)
4132 {
4133 #endif
4134 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4135 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
4136 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
4137 #ifdef COMPILE_PCRE8
4138 }
4139 #endif
4140 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
4141 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4142
4143 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4144 }
4145
4146 #define CHAR1 STR_END
4147 #define CHAR2 STACK_TOP
4148
4149 static void do_casefulcmp(compiler_common *common)
4150 {
4151 DEFINE_COMPILER;
4152 struct sljit_jump *jump;
4153 struct sljit_label *label;
4154
4155 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4156 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4157 OP1(SLJIT_MOV, TMP3, 0, CHAR1, 0);
4158 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR2, 0);
4159 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
4160 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4161
4162 label = LABEL();
4163 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
4164 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4165 jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
4166 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
4167 JUMPTO(SLJIT_C_NOT_ZERO, label);
4168
4169 JUMPHERE(jump);
4170 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4171 OP1(SLJIT_MOV, CHAR1, 0, TMP3, 0);
4172 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
4173 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4174 }
4175
4176 #define LCC_TABLE STACK_LIMIT
4177
4178 static void do_caselesscmp(compiler_common *common)
4179 {
4180 DEFINE_COMPILER;
4181 struct sljit_jump *jump;
4182 struct sljit_label *label;
4183
4184 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4185 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4186
4187 OP1(SLJIT_MOV, TMP3, 0, LCC_TABLE, 0);
4188 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR1, 0);
4189 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, CHAR2, 0);
4190 OP1(SLJIT_MOV, LCC_TABLE, 0, SLJIT_IMM, common->lcc);
4191 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
4192 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4193
4194 label = LABEL();
4195 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
4196 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4197 #ifndef COMPILE_PCRE8
4198 jump = CMP(SLJIT_C_GREATER, CHAR1, 0, SLJIT_IMM, 255);
4199 #endif
4200 OP1(SLJIT_MOV_UB, CHAR1, 0, SLJIT_MEM2(LCC_TABLE, CHAR1), 0);
4201 #ifndef COMPILE_PCRE8
4202 JUMPHERE(jump);
4203 jump = CMP(SLJIT_C_GREATER, CHAR2, 0, SLJIT_IMM, 255);
4204 #endif
4205 OP1(SLJIT_MOV_UB, CHAR2, 0, SLJIT_MEM2(LCC_TABLE, CHAR2), 0);
4206 #ifndef COMPILE_PCRE8
4207 JUMPHERE(jump);
4208 #endif
4209 jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
4210 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
4211 JUMPTO(SLJIT_C_NOT_ZERO, label);
4212
4213 JUMPHERE(jump);
4214 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4215 OP1(SLJIT_MOV, LCC_TABLE, 0, TMP3, 0);
4216 OP1(SLJIT_MOV, CHAR1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
4217 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
4218 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4219 }
4220
4221 #undef LCC_TABLE
4222 #undef CHAR1
4223 #undef CHAR2
4224
4225 #if defined SUPPORT_UTF && defined SUPPORT_UCP
4226
4227 static const pcre_uchar * SLJIT_CALL do_utf_caselesscmp(pcre_uchar *src1, jit_arguments *args, pcre_uchar *end1)
4228 {
4229 /* This function would be ineffective to do in JIT level. */
4230 pcre_uint32 c1, c2;
4231 const pcre_uchar *src2 = args->uchar_ptr;
4232 const pcre_uchar *end2 = args->end;
4233 const ucd_record *ur;
4234 const pcre_uint32 *pp;
4235
4236 while (src1 < end1)
4237 {
4238 if (src2 >= end2)
4239 return (pcre_uchar*)1;
4240 GETCHARINC(c1, src1);
4241 GETCHARINC(c2, src2);
4242 ur = GET_UCD(c2);
4243 if (c1 != c2 && c1 != c2 + ur->other_case)
4244 {
4245 pp = PRIV(ucd_caseless_sets) + ur->caseset;
4246 for (;;)
4247 {
4248 if (c1 < *pp) return NULL;
4249 if (c1 == *pp++) break;
4250 }
4251 }
4252 }
4253 return src2;
4254 }
4255
4256 #endif /* SUPPORT_UTF && SUPPORT_UCP */
4257
4258 static pcre_uchar *byte_sequence_compare(compiler_common *common, BOOL caseless, pcre_uchar *cc,
4259 compare_context* context, jump_list **backtracks)
4260 {
4261 DEFINE_COMPILER;
4262 unsigned int othercasebit = 0;
4263 pcre_uchar *othercasechar = NULL;
4264 #ifdef SUPPORT_UTF
4265 int utflength;
4266 #endif
4267
4268 if (caseless && char_has_othercase(common, cc))
4269 {
4270 othercasebit = char_get_othercase_bit(common, cc);
4271 SLJIT_ASSERT(othercasebit);
4272 /* Extracting bit difference info. */
4273 #if defined COMPILE_PCRE8
4274 othercasechar = cc + (othercasebit >> 8);
4275 othercasebit &= 0xff;
4276 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4277 /* Note that this code only handles characters in the BMP. If there
4278 ever are characters outside the BMP whose othercase differs in only one
4279 bit from itself (there currently are none), this code will need to be
4280 revised for COMPILE_PCRE32. */
4281 othercasechar = cc + (othercasebit >> 9);
4282 if ((othercasebit & 0x100) != 0)
4283 othercasebit = (othercasebit & 0xff) << 8;
4284 else
4285 othercasebit &= 0xff;
4286 #endif /* COMPILE_PCRE[8|16|32] */
4287 }
4288
4289 if (context->sourcereg == -1)
4290 {
4291 #if defined COMPILE_PCRE8
4292 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
4293 if (context->length >= 4)
4294 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4295 else if (context->length >= 2)
4296 OP1(SLJIT_MOV_UH, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4297 else
4298 #endif
4299 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4300 #elif defined COMPILE_PCRE16
4301 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
4302 if (context->length >= 4)
4303 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4304 else
4305 #endif
4306 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4307 #elif defined COMPILE_PCRE32
4308 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4309 #endif /* COMPILE_PCRE[8|16|32] */
4310 context->sourcereg = TMP2;
4311 }
4312
4313 #ifdef SUPPORT_UTF
4314 utflength = 1;
4315 if (common->utf && HAS_EXTRALEN(*cc))
4316 utflength += GET_EXTRALEN(*cc);
4317
4318 do
4319 {
4320 #endif
4321
4322 context->length -= IN_UCHARS(1);
4323 #if (defined SLJIT_UNALIGNED && SLJIT_UNALIGNED) && (defined COMPILE_PCRE8 || defined COMPILE_PCRE16)
4324
4325 /* Unaligned read is supported. */
4326 if (othercasebit != 0 && othercasechar == cc)
4327 {
4328 context->c.asuchars[context->ucharptr] = *cc | othercasebit;
4329 context->oc.asuchars[context->ucharptr] = othercasebit;
4330 }
4331 else
4332 {
4333 context->c.asuchars[context->ucharptr] = *cc;
4334 context->oc.asuchars[context->ucharptr] = 0;
4335 }
4336 context->ucharptr++;
4337
4338 #if defined COMPILE_PCRE8
4339 if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1))
4340 #else
4341 if (context->ucharptr >= 2 || context->length == 0)
4342 #endif
4343 {
4344 if (context->length >= 4)
4345 OP1(SLJIT_MOV_SI, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
4346 else if (context->length >= 2)
4347 OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
4348 #if defined COMPILE_PCRE8
4349 else if (context->length >= 1)
4350 OP1(SLJIT_MOV_UB, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
4351 #endif /* COMPILE_PCRE8 */
4352 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
4353
4354 switch(context->ucharptr)
4355 {
4356 case 4 / sizeof(pcre_uchar):
4357 if (context->oc.asint != 0)
4358 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint);
4359 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint));
4360 break;
4361
4362 case 2 / sizeof(pcre_uchar):
4363 if (context->oc.asushort != 0)
4364 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort);
4365 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort));
4366 break;
4367
4368 #ifdef COMPILE_PCRE8
4369 case 1:
4370 if (context->oc.asbyte != 0)
4371 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte);
4372 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte));
4373 break;
4374 #endif
4375
4376 default:
4377 SLJIT_ASSERT_STOP();
4378 break;
4379 }
4380 context->ucharptr = 0;
4381 }
4382
4383 #else
4384
4385 /* Unaligned read is unsupported or in 32 bit mode. */
4386 if (context->length >= 1)
4387 OP1(MOV_UCHAR, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
4388
4389 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
4390
4391 if (othercasebit != 0 && othercasechar == cc)
4392 {
4393 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit);
4394 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit));
4395 }
4396 else
4397 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc));
4398
4399 #endif
4400
4401 cc++;
4402 #ifdef SUPPORT_UTF
4403 utflength--;
4404 }
4405 while (utflength > 0);
4406 #endif
4407
4408 return cc;
4409 }
4410
4411 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
4412
4413 #define SET_TYPE_OFFSET(value) \
4414 if ((value) != typeoffset) \
4415 { \
4416 if ((value) < typeoffset) \
4417 OP2(SLJIT_ADD, typereg, 0, typereg, 0, SLJIT_IMM, typeoffset - (value)); \
4418 else \
4419 OP2(SLJIT_SUB, typereg, 0, typereg, 0, SLJIT_IMM, (value) - typeoffset); \
4420 } \
4421 typeoffset = (value);
4422
4423 #define SET_CHAR_OFFSET(value) \
4424 if ((value) != charoffset) \
4425 { \
4426 if ((value) < charoffset) \
4427 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(charoffset - (value))); \
4428 else \
4429 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)((value) - charoffset)); \
4430 } \
4431 charoffset = (value);
4432
4433 static void compile_xclass_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
4434 {
4435 DEFINE_COMPILER;
4436 jump_list *found = NULL;
4437 jump_list **list = (cc[0] & XCL_NOT) == 0 ? &found : backtracks;
4438 sljit_uw c, charoffset, max = 256, min = READ_CHAR_MAX;
4439 struct sljit_jump *jump = NULL;
4440 pcre_uchar *ccbegin;
4441 int compares, invertcmp, numberofcmps;
4442 #if defined SUPPORT_UTF && (defined COMPILE_PCRE8 || defined COMPILE_PCRE16)
4443 BOOL utf = common->utf;
4444 #endif
4445
4446 #ifdef SUPPORT_UCP
4447 BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE;
4448 BOOL charsaved = FALSE;
4449 int typereg = TMP1, scriptreg = TMP1;
4450 const pcre_uint32 *other_cases;
4451 sljit_uw typeoffset;
4452 #endif
4453
4454 /* Scanning the necessary info. */
4455 cc++;
4456 ccbegin = cc;
4457 compares = 0;
4458 if (cc[-1] & XCL_MAP)
4459 {
4460 min = 0;
4461 cc += 32 / sizeof(pcre_uchar);
4462 }
4463
4464 while (*cc != XCL_END)
4465 {
4466 compares++;
4467 if (*cc == XCL_SINGLE)
4468 {
4469 cc ++;
4470 GETCHARINCTEST(c, cc);
4471 if (c > max) max = c;
4472 if (c < min) min = c;
4473 #ifdef SUPPORT_UCP
4474 needschar = TRUE;
4475 #endif
4476 }
4477 else if (*cc == XCL_RANGE)
4478 {
4479 cc ++;
4480 GETCHARINCTEST(c, cc);
4481 if (c < min) min = c;
4482 GETCHARINCTEST(c, cc);
4483 if (c > max) max = c;
4484 #ifdef SUPPORT_UCP
4485 needschar = TRUE;
4486 #endif
4487 }
4488 #ifdef SUPPORT_UCP
4489 else
4490 {
4491 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
4492 cc++;
4493 if (*cc == PT_CLIST)
4494 {
4495 other_cases = PRIV(ucd_caseless_sets) + cc[1];
4496 while (*other_cases != NOTACHAR)
4497 {
4498 if (*other_cases > max) max = *other_cases;
4499 if (*other_cases < min) min = *other_cases;
4500 other_cases++;
4501 }
4502 }
4503 else
4504 {
4505 max = READ_CHAR_MAX;
4506 min = 0;
4507 }
4508
4509 switch(*cc)
4510 {
4511 case PT_ANY:
4512 break;
4513
4514 case PT_LAMP:
4515 case PT_GC:
4516 case PT_PC:
4517 case PT_ALNUM:
4518 needstype = TRUE;
4519 break;
4520
4521 case PT_SC:
4522 needsscript = TRUE;
4523 break;
4524
4525 case PT_SPACE:
4526 case PT_PXSPACE:
4527 case PT_WORD:
4528 case PT_PXGRAPH:
4529 case PT_PXPRINT:
4530 case PT_PXPUNCT:
4531 needstype = TRUE;
4532 needschar = TRUE;
4533 break;
4534
4535 case PT_CLIST:
4536 case PT_UCNC:
4537 needschar = TRUE;
4538 break;
4539
4540 default:
4541 SLJIT_ASSERT_STOP();
4542 break;
4543 }
4544 cc += 2;
4545 }
4546 #endif
4547 }
4548
4549 /* We are not necessary in utf mode even in 8 bit mode. */
4550 cc = ccbegin;
4551 detect_partial_match(common, backtracks);
4552 read_char_range(common, min, max, (cc[-1] & XCL_NOT) != 0);
4553
4554 if ((cc[-1] & XCL_HASPROP) == 0)
4555 {
4556 if ((cc[-1] & XCL_MAP) != 0)
4557 {
4558 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
4559 if (!check_class_ranges(common, (const pcre_uint8 *)cc, (((const pcre_uint8 *)cc)[31] & 0x80) != 0, TRUE, &found))
4560 {
4561 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
4562 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
4563 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
4564 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4565 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4566 add_jump(compiler, &found, JUMP(SLJIT_C_NOT_ZERO));
4567 }
4568
4569 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4570 JUMPHERE(jump);
4571
4572 cc += 32 / sizeof(pcre_uchar);
4573 }
4574 else
4575 {
4576 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, min);
4577 add_jump(compiler, (cc[-1] & XCL_NOT) == 0 ? backtracks : &found, CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, max - min));
4578 }
4579 }
4580 else if ((cc[-1] & XCL_MAP) != 0)
4581 {
4582 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
4583 #ifdef SUPPORT_UCP
4584 charsaved = TRUE;
4585 #endif
4586 if (!check_class_ranges(common, (const pcre_uint8 *)cc, FALSE, TRUE, list))
4587 {
4588 #ifdef COMPILE_PCRE8
4589 SLJIT_ASSERT(common->utf);
4590 #endif
4591 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
4592
4593 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
4594 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
4595 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
4596 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4597 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4598 add_jump(compiler, list, JUMP(SLJIT_C_NOT_ZERO));
4599
4600 JUMPHERE(jump);
4601 }
4602
4603 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
4604 cc += 32 / sizeof(pcre_uchar);
4605 }
4606
4607 #ifdef SUPPORT_UCP
4608 /* Simple register allocation. TMP1 is preferred if possible. */
4609 if (needstype || needsscript)
4610 {
4611 if (needschar && !charsaved)
4612 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
4613 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4614 if (needschar)
4615 {
4616 if (needstype)
4617 {
4618 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
4619 typereg = RETURN_ADDR;
4620 }
4621
4622 if (needsscript)
4623 scriptreg = TMP3;
4624 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
4625 }
4626 else if (needstype && needsscript)
4627 scriptreg = TMP3;
4628 /* In all other cases only one of them was specified, and that can goes to TMP1. */
4629
4630 if (needsscript)
4631 {
4632 if (scriptreg == TMP1)
4633 {
4634 OP1(SLJIT_MOV, scriptreg, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
4635 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM2(scriptreg, TMP2), 3);
4636 }
4637 else
4638 {
4639 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
4640 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
4641 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM1(TMP2), 0);
4642 }
4643 }
4644 }
4645 #endif
4646
4647 /* Generating code. */
4648 charoffset = 0;
4649 numberofcmps = 0;
4650 #ifdef SUPPORT_UCP
4651 typeoffset = 0;
4652 #endif
4653
4654 while (*cc != XCL_END)
4655 {
4656 compares--;
4657 invertcmp = (compares == 0 && list != backtracks);
4658 jump = NULL;
4659
4660 if (*cc == XCL_SINGLE)
4661 {
4662 cc ++;
4663 GETCHARINCTEST(c, cc);
4664
4665 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
4666 {
4667 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
4668 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_EQUAL);
4669 numberofcmps++;
4670 }
4671 else if (numberofcmps > 0)
4672 {
4673 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
4674 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4675 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4676 numberofcmps = 0;
4677 }
4678 else
4679 {
4680 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
4681 numberofcmps = 0;
4682 }
4683 }
4684 else if (*cc == XCL_RANGE)
4685 {
4686 cc ++;
4687 GETCHARINCTEST(c, cc);
4688 SET_CHAR_OFFSET(c);
4689 GETCHARINCTEST(c, cc);
4690
4691 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
4692 {
4693 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
4694 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_LESS_EQUAL);
4695 numberofcmps++;
4696 }
4697 else if (numberofcmps > 0)
4698 {
4699 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
4700 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4701 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4702 numberofcmps = 0;
4703 }
4704 else
4705 {
4706 jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
4707 numberofcmps = 0;
4708 }
4709 }
4710 #ifdef SUPPORT_UCP
4711 else
4712 {
4713 if (*cc == XCL_NOTPROP)
4714 invertcmp ^= 0x1;
4715 cc++;
4716 switch(*cc)
4717 {
4718 case PT_ANY:
4719 if (list != backtracks)
4720 {
4721 if ((cc[-1] == XCL_NOTPROP && compares > 0) || (cc[-1] == XCL_PROP && compares == 0))
4722 continue;
4723 }
4724 else if (cc[-1] == XCL_NOTPROP)
4725 continue;
4726 jump = JUMP(SLJIT_JUMP);
4727 break;
4728
4729 case PT_LAMP:
4730 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - typeoffset);
4731 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4732 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ll - typeoffset);
4733 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4734 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lt - typeoffset);
4735 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4736 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4737 break;
4738
4739 case PT_GC:
4740 c = PRIV(ucp_typerange)[(int)cc[1] * 2];
4741 SET_TYPE_OFFSET(c);
4742 jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, PRIV(ucp_typerange)[(int)cc[1] * 2 + 1] - c);
4743 break;
4744
4745 case PT_PC:
4746 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, (int)cc[1] - typeoffset);
4747 break;
4748
4749 case PT_SC:
4750 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, scriptreg, 0, SLJIT_IMM, (int)cc[1]);
4751 break;
4752
4753 case PT_SPACE:
4754 case PT_PXSPACE:
4755 SET_CHAR_OFFSET(9);
4756 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd - 0x9);
4757 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
4758
4759 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x9);
4760 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4761
4762 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x9);
4763 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4764
4765 SET_TYPE_OFFSET(ucp_Zl);
4766 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl);
4767 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4768 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4769 break;
4770
4771 case PT_WORD:
4772 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_UNDERSCORE - charoffset));
4773 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4774 /* Fall through. */
4775
4776 case PT_ALNUM:
4777 SET_TYPE_OFFSET(ucp_Ll);
4778 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
4779 OP_FLAGS((*cc == PT_ALNUM) ? SLJIT_MOV : SLJIT_OR, TMP2, 0, (*cc == PT_ALNUM) ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_LESS_EQUAL);
4780 SET_TYPE_OFFSET(ucp_Nd);
4781 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_No - ucp_Nd);
4782 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4783 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4784 break;
4785
4786 case PT_CLIST:
4787 other_cases = PRIV(ucd_caseless_sets) + cc[1];
4788
4789 /* At least three characters are required.
4790 Otherwise this case would be handled by the normal code path. */
4791 SLJIT_ASSERT(other_cases[0] != NOTACHAR && other_cases[1] != NOTACHAR && other_cases[2] != NOTACHAR);
4792 SLJIT_ASSERT(other_cases[0] < other_cases[1] && other_cases[1] < other_cases[2]);
4793
4794 /* Optimizing character pairs, if their difference is power of 2. */
4795 if (is_powerof2(other_cases[1] ^ other_cases[0]))
4796 {
4797 if (charoffset == 0)
4798 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
4799 else
4800 {
4801 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
4802 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
4803 }
4804 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[1]);
4805 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4806 other_cases += 2;
4807 }
4808 else if (is_powerof2(other_cases[2] ^ other_cases[1]))
4809 {
4810 if (charoffset == 0)
4811 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[2] ^ other_cases[1]);
4812 else
4813 {
4814 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
4815 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
4816 }
4817 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[2]);
4818 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4819
4820 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(other_cases[0] - charoffset));
4821 OP_FLAGS(SLJIT_OR | ((other_cases[3] == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4822
4823 other_cases += 3;
4824 }
4825 else
4826 {
4827 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
4828 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4829 }
4830
4831 while (*other_cases != NOTACHAR)
4832 {
4833 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
4834 OP_FLAGS(SLJIT_OR | ((*other_cases == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4835 }
4836 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4837 break;
4838
4839 case PT_UCNC:
4840 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_DOLLAR_SIGN - charoffset));
4841 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4842 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_COMMERCIAL_AT - charoffset));
4843 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4844 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_GRAVE_ACCENT - charoffset));
4845 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4846
4847 SET_CHAR_OFFSET(0xa0);
4848 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(0xd7ff - charoffset));
4849 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4850 SET_CHAR_OFFSET(0);
4851 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xe000 - 0);
4852 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_GREATER_EQUAL);
4853 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4854 break;
4855
4856 case PT_PXGRAPH:
4857 /* C and Z groups are the farthest two groups. */
4858 SET_TYPE_OFFSET(ucp_Ll);
4859 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
4860 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_GREATER);
4861
4862 jump = CMP(SLJIT_C_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
4863
4864 /* In case of ucp_Cf, we overwrite the result. */
4865 SET_CHAR_OFFSET(0x2066);
4866 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
4867 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
4868
4869 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
4870 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4871
4872 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x2066);
4873 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4874
4875 JUMPHERE(jump);
4876 jump = CMP(SLJIT_C_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
4877 break;
4878
4879 case PT_PXPRINT:
4880 /* C and Z groups are the farthest two groups. */
4881 SET_TYPE_OFFSET(ucp_Ll);
4882 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
4883 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_GREATER);
4884
4885 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Ll);
4886 OP_FLAGS(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_C_NOT_EQUAL);
4887
4888 jump = CMP(SLJIT_C_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
4889
4890 /* In case of ucp_Cf, we overwrite the result. */
4891 SET_CHAR_OFFSET(0x2066);
4892 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
4893 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
4894
4895 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
4896 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4897
4898 JUMPHERE(jump);
4899 jump = CMP(SLJIT_C_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
4900 break;
4901
4902 case PT_PXPUNCT:
4903 SET_TYPE_OFFSET(ucp_Sc);
4904 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Sc);
4905 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
4906
4907 SET_CHAR_OFFSET(0);
4908 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xff);
4909 OP_FLAGS(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4910
4911 SET_TYPE_OFFSET(ucp_Pc);
4912 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ps - ucp_Pc);
4913 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4914 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4915 break;
4916 }
4917 cc += 2;
4918 }
4919 #endif
4920
4921 if (jump != NULL)
4922 add_jump(compiler, compares > 0 ? list : backtracks, jump);
4923 }
4924
4925 if (found != NULL)
4926 set_jumps(found, LABEL());
4927 }
4928
4929 #undef SET_TYPE_OFFSET
4930 #undef SET_CHAR_OFFSET
4931
4932 #endif
4933
4934 static pcre_uchar *compile_char1_matchingpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **backtracks)
4935 {
4936 DEFINE_COMPILER;
4937 int length;
4938 unsigned int c, oc, bit;
4939 compare_context context;
4940 struct sljit_jump *jump[4];
4941 jump_list *end_list;
4942 #ifdef SUPPORT_UTF
4943 struct sljit_label *label;
4944 #ifdef SUPPORT_UCP
4945 pcre_uchar propdata[5];
4946 #endif
4947 #endif /* SUPPORT_UTF */
4948
4949 switch(type)
4950 {
4951 case OP_SOD:
4952 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4953 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4954 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
4955 return cc;
4956
4957 case OP_SOM:
4958 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4959 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
4960 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
4961 return cc;
4962
4963 case OP_NOT_WORD_BOUNDARY:
4964 case OP_WORD_BOUNDARY:
4965 add_jump(compiler, &common->wordboundary, JUMP(SLJIT_FAST_CALL));
4966 add_jump(compiler, backtracks, JUMP(type == OP_NOT_WORD_BOUNDARY ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
4967 return cc;
4968
4969 case OP_NOT_DIGIT:
4970 case OP_DIGIT:
4971 /* Digits are usually 0-9, so it is worth to optimize them. */
4972 detect_partial_match(common, backtracks);
4973 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
4974 if (common->utf && is_char7_bitset((const pcre_uint8*)common->ctypes - cbit_length + cbit_digit, FALSE))
4975 read_char7_type(common, type == OP_NOT_DIGIT);
4976 else
4977 #endif
4978 read_char8_type(common, type == OP_NOT_DIGIT);
4979 /* Flip the starting bit in the negative case. */
4980 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit);
4981 add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
4982 return cc;
4983
4984 case OP_NOT_WHITESPACE:
4985 case OP_WHITESPACE:
4986 detect_partial_match(common, backtracks);
4987 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
4988 if (common->utf && is_char7_bitset((const pcre_uint8*)common->ctypes - cbit_length + cbit_space, FALSE))
4989 read_char7_type(common, type == OP_NOT_WHITESPACE);
4990 else
4991 #endif
4992 read_char8_type(common, type == OP_NOT_WHITESPACE);
4993 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space);
4994 add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
4995 return cc;
4996
4997 case OP_NOT_WORDCHAR:
4998 case OP_WORDCHAR:
4999 detect_partial_match(common, backtracks);
5000 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5001 if (common->utf && is_char7_bitset((const pcre_uint8*)common->ctypes - cbit_length + cbit_word, FALSE))
5002 read_char7_type(common, type == OP_NOT_WORDCHAR);
5003 else
5004 #endif
5005 read_char8_type(common, type == OP_NOT_WORDCHAR);
5006 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word);
5007 add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
5008 return cc;
5009
5010 case OP_ANY:
5011 detect_partial_match(common, backtracks);
5012 read_char_range(common, common->nlmin, common->nlmax, TRUE);
5013 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
5014 {
5015 jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
5016 end_list = NULL;
5017 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
5018 add_jump(compiler, &end_list, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
5019 else
5020 check_str_end(common, &end_list);
5021
5022 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5023 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff));
5024 set_jumps(end_list, LABEL());
5025 JUMPHERE(jump[0]);
5026 }
5027 else
5028 check_newlinechar(common, common->nltype, backtracks, TRUE);
5029 return cc;
5030
5031 case OP_ALLANY:
5032 detect_partial_match(common, backtracks);
5033 #ifdef SUPPORT_UTF
5034 if (common->utf)
5035 {
5036 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5037 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5038 #if defined COMPILE_PCRE8 || defined COMPILE_PCRE16
5039 #if defined COMPILE_PCRE8
5040 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
5041 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
5042 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5043 #elif defined COMPILE_PCRE16
5044 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
5045 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
5046 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
5047 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
5048 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
5049 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5050 #endif
5051 JUMPHERE(jump[0]);
5052 #endif /* COMPILE_PCRE[8|16] */
5053 return cc;
5054 }
5055 #endif
5056 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5057 return cc;
5058
5059 case OP_ANYBYTE:
5060 detect_partial_match(common, backtracks);
5061 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5062 return cc;
5063
5064 #ifdef SUPPORT_UTF
5065 #ifdef SUPPORT_UCP
5066 case OP_NOTPROP:
5067 case OP_PROP:
5068 propdata[0] = XCL_HASPROP;
5069 propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP;
5070 propdata[2] = cc[0];
5071 propdata[3] = cc[1];
5072 propdata[4] = XCL_END;
5073 compile_xclass_matchingpath(common, propdata, backtracks);
5074 return cc + 2;
5075 #endif
5076 #endif
5077
5078 case OP_ANYNL:
5079 detect_partial_match(common, backtracks);
5080 read_char_range(common, common->bsr_nlmin, common->bsr_nlmax, FALSE);
5081 jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
5082 /* We don't need to handle soft partial matching case. */
5083 end_list = NULL;
5084 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
5085 add_jump(compiler, &end_list, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
5086 else
5087 check_str_end(common, &end_list);
5088 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5089 jump[1] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
5090 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5091 jump[2] = JUMP(SLJIT_JUMP);
5092 JUMPHERE(jump[0]);
5093 check_newlinechar(common, common->bsr_nltype, backtracks, FALSE);
5094 set_jumps(end_list, LABEL());
5095 JUMPHERE(jump[1]);
5096 JUMPHERE(jump[2]);
5097 return cc;
5098
5099 case OP_NOT_HSPACE:
5100 case OP_HSPACE:
5101 detect_partial_match(common, backtracks);
5102 read_char_range(common, 0x9, 0x3000, type == OP_NOT_HSPACE);
5103 add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));
5104 add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
5105 return cc;
5106
5107 case OP_NOT_VSPACE:
5108 case OP_VSPACE:
5109 detect_partial_match(common, backtracks);
5110 read_char_range(common, 0xa, 0x2029, type == OP_NOT_VSPACE);
5111 add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));
5112 add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
5113 return cc;
5114
5115 #ifdef SUPPORT_UCP
5116 case OP_EXTUNI:
5117 detect_partial_match(common, backtracks);
5118 read_char(common);
5119 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
5120 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
5121 /* Optimize register allocation: use a real register. */
5122 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
5123 OP1(SLJIT_MOV_UB, STACK_TOP, 0, SLJIT_MEM2(TMP1, TMP2), 3);
5124
5125 label = LABEL();
5126 jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5127 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
5128 read_char(common);
5129 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
5130 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
5131 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM2(TMP1, TMP2), 3);
5132
5133 OP2(SLJIT_SHL, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2);
5134 OP1(SLJIT_MOV_UI, TMP1, 0, SLJIT_MEM1(STACK_TOP), (sljit_sw)PRIV(ucp_gbtable));
5135 OP1(SLJIT_MOV, STACK_TOP, 0, TMP2, 0);
5136 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
5137 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
5138 JUMPTO(SLJIT_C_NOT_ZERO, label);
5139
5140 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
5141 JUMPHERE(jump[0]);
5142 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
5143
5144 if (common->mode == JIT_PARTIAL_HARD_COMPILE)
5145 {
5146 jump[0] = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
5147 /* Since we successfully read a char above, partial matching must occure. */
5148 check_partial(common, TRUE);
5149 JUMPHERE(jump[0]);
5150 }
5151 return cc;
5152 #endif
5153
5154 case OP_EODN:
5155 /* Requires rather complex checks. */
5156 jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5157 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
5158 {
5159 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
5160 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5161 if (common->mode == JIT_COMPILE)
5162 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0));
5163 else
5164 {
5165 jump[1] = CMP(SLJIT_C_EQUAL, TMP2, 0, STR_END, 0);
5166 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
5167 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS);
5168 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
5169 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_NOT_EQUAL);
5170 add_jump(compiler, backtracks, JUMP(SLJIT_C_NOT_EQUAL));
5171 check_partial(common, TRUE);
5172 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5173 JUMPHERE(jump[1]);
5174 }
5175 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
5176 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
5177 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
5178 }
5179 else if (common->nltype == NLTYPE_FIXED)
5180 {
5181 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5182 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5183 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0));
5184 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
5185 }
5186 else
5187 {
5188 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5189 jump[1] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
5190 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
5191 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
5192 jump[2] = JUMP(SLJIT_C_GREATER);
5193 add_jump(compiler, backtracks, JUMP(SLJIT_C_LESS));
5194 /* Equal. */
5195 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
5196 jump[3] = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
5197 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5198
5199 JUMPHERE(jump[1]);
5200 if (common->nltype == NLTYPE_ANYCRLF)
5201 {
5202 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5203 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP2, 0, STR_END, 0));
5204 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
5205 }
5206 else
5207 {
5208 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, STR_PTR, 0);
5209 read_char_range(common, common->nlmin, common->nlmax, TRUE);
5210 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
5211 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
5212 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
5213 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
5214 }
5215 JUMPHERE(jump[2]);
5216 JUMPHERE(jump[3]);
5217 }
5218 JUMPHERE(jump[0]);
5219 check_partial(common, FALSE);
5220 return cc;
5221
5222 case OP_EOD:
5223 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0));
5224 check_partial(common, FALSE);
5225 return cc;
5226
5227 case OP_CIRC:
5228 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
5229 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
5230 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0));
5231 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
5232 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5233 return cc;
5234
5235 case OP_CIRCM:
5236 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
5237 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
5238 jump[1] = CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0);
5239 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
5240 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5241 jump[0] = JUMP(SLJIT_JUMP);
5242 JUMPHERE(jump[1]);
5243
5244 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
5245 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
5246 {
5247 OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
5248 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP2, 0, TMP1, 0));
5249 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
5250 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
5251 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
5252 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
5253 }
5254 else
5255 {
5256 skip_char_back(common);
5257 read_char_range(common, common->nlmin, common->nlmax, TRUE);
5258 check_newlinechar(common, common->nltype, backtracks, FALSE);
5259 }
5260 JUMPHERE(jump[0]);
5261 return cc;
5262
5263 case OP_DOLL:
5264 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
5265 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
5266 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5267
5268 if (!common->endonly)
5269 compile_char1_matchingpath(common, OP_EODN, cc, backtracks);
5270 else
5271 {
5272 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0));
5273 check_partial(common, FALSE);
5274 }
5275 return cc;
5276
5277 case OP_DOLLM:
5278 jump[1] = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
5279 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
5280 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
5281 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5282 check_partial(common, FALSE);
5283 jump[0] = JUMP(SLJIT_JUMP);
5284 JUMPHERE(jump[1]);
5285
5286 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
5287 {
5288 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
5289 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5290 if (common->mode == JIT_COMPILE)
5291 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, TMP2, 0, STR_END, 0));
5292 else
5293 {
5294 jump[1] = CMP(SLJIT_C_LESS_EQUAL, TMP2, 0, STR_END, 0);
5295 /* STR_PTR = STR_END - IN_UCHARS(1) */
5296 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
5297 check_partial(common, TRUE);
5298 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5299 JUMPHERE(jump[1]);
5300 }
5301
5302 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
5303 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
5304 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
5305 }
5306 else
5307 {
5308 peek_char(common, common->nlmax);
5309 check_newlinechar(common, common->nltype, backtracks, FALSE);
5310 }
5311 JUMPHERE(jump[0]);
5312 return cc;
5313
5314 case OP_CHAR:
5315 case OP_CHARI:
5316 length = 1;
5317 #ifdef SUPPORT_UTF
5318 if (common->utf && HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc);
5319 #endif
5320 if (common->mode == JIT_COMPILE && (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0))
5321 {
5322 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
5323 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
5324
5325 context.length = IN_UCHARS(length);
5326 context.sourcereg = -1;
5327 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
5328 context.ucharptr = 0;
5329 #endif
5330 return byte_sequence_compare(common, type == OP_CHARI, cc, &context, backtracks);
5331 }
5332
5333 detect_partial_match(common, backtracks);
5334 #ifdef SUPPORT_UTF
5335 if (common->utf)
5336 {
5337 GETCHAR(c, cc);
5338 }
5339 else
5340 #endif
5341 c = *cc;
5342
5343 if (type == OP_CHAR || !char_has_othercase(common, cc))
5344 {
5345 read_char_range(common, c, c, FALSE);
5346 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c));
5347 return cc + length;
5348 }
5349 oc = char_othercase(common, c);
5350 read_char_range(common, c < oc ? c : oc, c > oc ? c : oc, FALSE);
5351 bit = c ^ oc;
5352 if (is_powerof2(bit))
5353 {
5354 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
5355 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
5356 return cc + length;
5357 }
5358 jump[0] = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c);
5359 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, oc));
5360 JUMPHERE(jump[0]);
5361 return cc + length;
5362
5363 case OP_NOT:
5364 case OP_NOTI:
5365 detect_partial_match(common, backtracks);
5366 length = 1;
5367 #ifdef SUPPORT_UTF
5368 if (common->utf)
5369 {
5370 #ifdef COMPILE_PCRE8
5371 c = *cc;
5372 if (c < 128)
5373 {
5374 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5375 if (type == OP_NOT || !char_has_othercase(common, cc))
5376 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
5377 else
5378 {
5379 /* Since UTF8 code page is fixed, we know that c is in [a-z] or [A-Z] range. */
5380 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x20);
5381 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20));
5382 }
5383 /* Skip the variable-length character. */
5384 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5385 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
5386 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
5387 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5388 JUMPHERE(jump[0]);
5389 return cc + 1;
5390 }
5391 else
5392 #endif /* COMPILE_PCRE8 */
5393 {
5394 GETCHARLEN(c, cc, length);
5395 }
5396 }
5397 else
5398 #endif /* SUPPORT_UTF */
5399 c = *cc;
5400
5401 if (type == OP_NOT || !char_has_othercase(common, cc))
5402 {
5403 read_char_range(common, c, c, TRUE);
5404 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
5405 }
5406 else
5407 {
5408 oc = char_othercase(common, c);
5409 read_char_range(common, c < oc ? c : oc, c > oc ? c : oc, TRUE);
5410 bit = c ^ oc;
5411 if (is_powerof2(bit))
5412 {
5413 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
5414 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
5415 }
5416 else
5417 {
5418 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
5419 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, oc));
5420 }
5421 }
5422 return cc + length;
5423
5424 case OP_CLASS:
5425 case OP_NCLASS:
5426 detect_partial_match(common, backtracks);
5427
5428 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5429 bit = (common->utf && is_char7_bitset((const pcre_uint8 *)cc, type == OP_NCLASS)) ? 127 : 255;
5430 read_char_range(common, 0, bit, type == OP_NCLASS);
5431 #else
5432 read_char_range(common, 0, 255, type == OP_NCLASS);
5433 #endif
5434
5435 if (check_class_ranges(common, (const pcre_uint8 *)cc, type == OP_NCLASS, FALSE, backtracks))
5436 return cc + 32 / sizeof(pcre_uchar);
5437
5438 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5439 jump[0] = NULL;
5440 if (common->utf)
5441 {
5442 jump[0] = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, bit);
5443 if (type == OP_CLASS)
5444 {
5445 add_jump(compiler, backtracks, jump[0]);
5446 jump[0] = NULL;
5447 }
5448 }
5449 #elif !defined COMPILE_PCRE8
5450 jump[0] = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
5451 if (type == OP_CLASS)
5452 {
5453 add_jump(compiler, backtracks, jump[0]);
5454 jump[0] = NULL;
5455 }
5456 #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
5457
5458 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
5459 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
5460 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
5461 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
5462 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
5463 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
5464
5465 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
5466 if (jump[0] != NULL)
5467 JUMPHERE(jump[0]);
5468 #endif
5469
5470 return cc + 32 / sizeof(pcre_uchar);
5471
5472 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5473 case OP_XCLASS:
5474 compile_xclass_matchingpath(common, cc + LINK_SIZE, backtracks);
5475 return cc + GET(cc, 0) - 1;
5476 #endif
5477
5478 case OP_REVERSE:
5479 length = GET(cc, 0);
5480 if (length == 0)
5481 return cc + LINK_SIZE;
5482 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5483 #ifdef SUPPORT_UTF
5484 if (common->utf)
5485 {
5486 OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
5487 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, length);
5488 label = LABEL();
5489 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP3, 0));
5490 skip_char_back(common);
5491 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
5492 JUMPTO(SLJIT_C_NOT_ZERO, label);
5493 }
5494 else
5495 #endif
5496 {
5497 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
5498 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
5499 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, TMP1, 0));
5500 }
5501 check_start_used_ptr(common);
5502 return cc + LINK_SIZE;
5503 }
5504 SLJIT_ASSERT_STOP();
5505 return cc;
5506 }
5507
5508 static SLJIT_INLINE pcre_uchar *compile_charn_matchingpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, jump_list **backtracks)
5509 {
5510 /* This function consumes at least one input character. */
5511 /* To decrease the number of length checks, we try to concatenate the fixed length character sequences. */
5512 DEFINE_COMPILER;
5513 pcre_uchar *ccbegin = cc;
5514 compare_context context;
5515 int size;
5516
5517 context.length = 0;
5518 do
5519 {
5520 if (cc >= ccend)
5521 break;
5522
5523 if (*cc == OP_CHAR)
5524 {
5525 size = 1;
5526 #ifdef SUPPORT_UTF
5527 if (common->utf && HAS_EXTRALEN(cc[1]))
5528 size += GET_EXTRALEN(cc[1]);
5529 #endif
5530 }
5531 else if (*cc == OP_CHARI)
5532 {
5533 size = 1;
5534 #ifdef SUPPORT_UTF
5535 if (common->utf)
5536 {
5537 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
5538 size = 0;
5539 else if (HAS_EXTRALEN(cc[1]))
5540 size += GET_EXTRALEN(cc[1]);
5541 }
5542 else
5543 #endif
5544 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
5545 size = 0;
5546 }
5547 else
5548 size = 0;
5549
5550 cc += 1 + size;
5551 context.length += IN_UCHARS(size);
5552 }
5553 while (size > 0 && context.length <= 128);
5554
5555 cc = ccbegin;
5556 if (context.length > 0)
5557 {
5558 /* We have a fixed-length byte sequence. */
5559 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, context.length);
5560 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
5561
5562 context.sourcereg = -1;
5563 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
5564 context.ucharptr = 0;
5565 #endif
5566 do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, backtracks); while (context.length > 0);
5567 return cc;
5568 }
5569
5570 /* A non-fixed length character will be checked if length == 0. */
5571 return compile_char1_matchingpath(common, *cc, cc + 1, backtracks);
5572 }
5573
5574 /* Forward definitions. */
5575 static void compile_matchingpath(compiler_common *, pcre_uchar *, pcre_uchar *, backtrack_common *);
5576 static void compile_backtrackingpath(compiler_common *, struct backtrack_common *);
5577
5578 #define PUSH_BACKTRACK(size, ccstart, error) \
5579 do \
5580 { \
5581 backtrack = sljit_alloc_memory(compiler, (size)); \
5582 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
5583 return error; \
5584 memset(backtrack, 0, size); \
5585 backtrack->prev = parent->top; \
5586 backtrack->cc = (ccstart); \
5587 parent->top = backtrack; \
5588 } \
5589 while (0)
5590
5591 #define PUSH_BACKTRACK_NOVALUE(size, ccstart) \
5592 do \
5593 { \
5594 backtrack = sljit_alloc_memory(compiler, (size)); \
5595 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
5596 return; \
5597 memset(backtrack, 0, size); \
5598 backtrack->prev = parent->top; \
5599 backtrack->cc = (ccstart); \
5600 parent->top = backtrack; \
5601 } \
5602 while (0)
5603
5604 #define BACKTRACK_AS(type) ((type *)backtrack)
5605
5606 static void compile_dnref_search(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
5607 {
5608 /* The OVECTOR offset goes to TMP2. */
5609 DEFINE_COMPILER;
5610 int count = GET2(cc, 1 + IMM2_SIZE);
5611 pcre_uchar *slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
5612 unsigned int offset;
5613 jump_list *found = NULL;
5614
5615 SLJIT_ASSERT(*cc == OP_DNREF || *cc == OP_DNREFI);
5616
5617 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
5618
5619 count--;
5620 while (count-- > 0)
5621 {
5622 offset = GET2(slot, 0) << 1;
5623 GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
5624 add_jump(compiler, &found, CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0));
5625 slot += common->name_entry_size;
5626 }
5627
5628 offset = GET2(slot, 0) << 1;
5629 GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
5630 if (backtracks != NULL && !common->jscript_compat)
5631 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0));
5632
5633 set_jumps(found, LABEL());
5634 }
5635
5636 static void compile_ref_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks, BOOL withchecks, BOOL emptyfail)
5637 {
5638 DEFINE_COMPILER;
5639 BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
5640 int offset = 0;
5641 struct sljit_jump *jump = NULL;
5642 struct sljit_jump *partial;
5643 struct sljit_jump *nopartial;
5644
5645 if (ref)
5646 {
5647 offset = GET2(cc, 1) << 1;
5648 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
5649 /* OVECTOR(1) contains the "string begin - 1" constant. */
5650 if (withchecks && !common->jscript_compat)
5651 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
5652 }
5653 else
5654 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
5655
5656 #if defined SUPPORT_UTF && defined SUPPORT_UCP
5657 if (common->utf && *cc == OP_REFI)
5658 {
5659 SLJIT_ASSERT(TMP1 == SLJIT_SCRATCH_REG1 && STACK_TOP == SLJIT_SCRATCH_REG2 && TMP2 == SLJIT_SCRATCH_REG3);
5660 if (ref)
5661 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
5662 else
5663 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
5664
5665 if (withchecks)
5666 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, TMP2, 0);
5667
5668 /* Needed to save important temporary registers. */
5669 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
5670 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, ARGUMENTS, 0);
5671 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, uchar_ptr), STR_PTR, 0);
5672 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_utf_caselesscmp));
5673 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
5674 if (common->mode == JIT_COMPILE)
5675 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1));
5676 else
5677 {
5678 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
5679 nopartial = CMP(SLJIT_C_NOT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
5680 check_partial(common, FALSE);
5681 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5682 JUMPHERE(nopartial);
5683 }
5684 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
5685 }
5686 else
5687 #endif /* SUPPORT_UTF && SUPPORT_UCP */
5688 {
5689 if (ref)
5690 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP1, 0);
5691 else
5692 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
5693
5694 if (withchecks)
5695 jump = JUMP(SLJIT_C_ZERO);
5696
5697 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
5698 partial = CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0);
5699 if (common->mode == JIT_COMPILE)
5700 add_jump(compiler, backtracks, partial);
5701
5702 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
5703 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5704
5705 if (common->mode != JIT_COMPILE)
5706 {
5707 nopartial = JUMP(SLJIT_JUMP);
5708 JUMPHERE(partial);
5709 /* TMP2 -= STR_END - STR_PTR */
5710 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, STR_PTR, 0);
5711 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, STR_END, 0);
5712 partial = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0);
5713 OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
5714 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
5715 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5716 JUMPHERE(partial);
5717 check_partial(common, FALSE);
5718 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5719 JUMPHERE(nopartial);
5720 }
5721 }
5722
5723 if (jump != NULL)
5724 {
5725 if (emptyfail)
5726 add_jump(compiler, backtracks, jump);
5727 else
5728 JUMPHERE(jump);
5729 }
5730 }
5731
5732 static SLJIT_INLINE pcre_uchar *compile_ref_iterator_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
5733 {
5734 DEFINE_COMPILER;
5735 BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
5736 backtrack_common *backtrack;
5737 pcre_uchar type;
5738 int offset = 0;
5739 struct sljit_label *label;
5740 struct sljit_jump *zerolength;
5741 struct sljit_jump *jump = NULL;
5742 pcre_uchar *ccbegin = cc;
5743 int min = 0, max = 0;
5744 BOOL minimize;
5745
5746 PUSH_BACKTRACK(sizeof(iterator_backtrack), cc, NULL);
5747
5748 if (ref)
5749 offset = GET2(cc, 1) << 1;
5750 else
5751 cc += IMM2_SIZE;
5752 type = cc[1 + IMM2_SIZE];
5753
5754 SLJIT_COMPILE_ASSERT((OP_CRSTAR & 0x1) == 0, crstar_opcode_must_be_even);
5755 minimize = (type & 0x1) != 0;
5756 switch(type)
5757 {
5758 case OP_CRSTAR:
5759 case OP_CRMINSTAR:
5760 min = 0;
5761 max = 0;
5762 cc += 1 + IMM2_SIZE + 1;
5763 break;
5764 case OP_CRPLUS:
5765 case OP_CRMINPLUS:
5766 min = 1;
5767 max = 0;
5768 cc += 1 + IMM2_SIZE + 1;
5769 break;
5770 case OP_CRQUERY:
5771 case OP_CRMINQUERY:
5772 min = 0;
5773 max = 1;
5774 cc += 1 + IMM2_SIZE + 1;
5775 break;
5776 case OP_CRRANGE:
5777 case OP_CRMINRANGE:
5778 min = GET2(cc, 1 + IMM2_SIZE + 1);
5779 max = GET2(cc, 1 + IMM2_SIZE + 1 + IMM2_SIZE);
5780 cc += 1 + IMM2_SIZE + 1 + 2 * IMM2_SIZE;
5781 break;
5782 default:
5783 SLJIT_ASSERT_STOP();
5784 break;
5785 }
5786
5787 if (!minimize)
5788 {
5789 if (min == 0)
5790 {
5791 allocate_stack(common, 2);
5792 if (ref)
5793 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
5794 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5795 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
5796 /* Temporary release of STR_PTR. */
5797 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5798 /* Handles both invalid and empty cases. Since the minimum repeat,
5799 is zero the invalid case is basically the same as an empty case. */
5800 if (ref)
5801 zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
5802 else
5803 {
5804 compile_dnref_search(common, ccbegin, NULL);
5805 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
5806 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, TMP2, 0);
5807 zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
5808 }
5809 /* Restore if not zero length. */
5810 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5811 }
5812 else
5813 {
5814 allocate_stack(common, 1);
5815 if (ref)
5816 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
5817 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5818 if (ref)
5819 {
5820 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
5821 zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
5822 }
5823 else
5824 {
5825 compile_dnref_search(common, ccbegin, &backtrack->topbacktracks);
5826 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
5827 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, TMP2, 0);
5828 zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
5829 }
5830 }
5831
5832 if (min > 1 || max > 1)
5833 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 0);
5834
5835 label = LABEL();
5836 if (!ref)
5837 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);
5838 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, FALSE, FALSE);
5839
5840 if (min > 1 || max > 1)
5841 {
5842 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
5843 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
5844 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0);
5845 if (min > 1)
5846 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, label);
5847 if (max > 1)
5848 {
5849 jump = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max);
5850 allocate_stack(common, 1);
5851 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5852 JUMPTO(SLJIT_JUMP, label);
5853 JUMPHERE(jump);
5854 }
5855 }
5856
5857 if (max == 0)
5858 {
5859 /* Includes min > 1 case as well. */
5860 allocate_stack(common, 1);
5861 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5862 JUMPTO(SLJIT_JUMP, label);
5863 }
5864
5865 JUMPHERE(zerolength);
5866 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
5867
5868 count_match(common);
5869 return cc;
5870 }
5871
5872 allocate_stack(common, ref ? 2 : 3);
5873 if (ref)
5874 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
5875 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5876 if (type != OP_CRMINSTAR)
5877 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
5878
5879 if (min == 0)
5880 {
5881 /* Handles both invalid and empty cases. Since the minimum repeat,
5882 is zero the invalid case is basically the same as an empty case. */
5883 if (ref)
5884 zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
5885 else
5886 {
5887 compile_dnref_search(common, ccbegin, NULL);
5888 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
5889 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
5890 zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
5891 }
5892 /* Length is non-zero, we can match real repeats. */
5893 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5894 jump = JUMP(SLJIT_JUMP);
5895 }
5896 else
5897 {
5898 if (ref)
5899 {
5900 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
5901 zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
5902 }
5903 else
5904 {
5905 compile_dnref_search(common, ccbegin, &backtrack->topbacktracks);
5906 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
5907 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
5908 zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
5909 }
5910 }
5911
5912 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
5913 if (max > 0)
5914 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max));
5915
5916 if (!ref)
5917 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
5918 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, TRUE, TRUE);
5919 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5920
5921 if (min > 1)
5922 {
5923 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5924 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
5925 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5926 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, BACKTRACK_AS(iterator_backtrack)->matchingpath);
5927 }
5928 else if (max > 0)
5929 OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
5930
5931 if (jump != NULL)
5932 JUMPHERE(jump);
5933 JUMPHERE(zerolength);
5934
5935 count_match(common);
5936 return cc;
5937 }
5938
5939 static SLJIT_INLINE pcre_uchar *compile_recurse_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
5940 {
5941 DEFINE_COMPILER;
5942 backtrack_common *backtrack;
5943 recurse_entry *entry = common->entries;
5944 recurse_entry *prev = NULL;
5945 sljit_sw start = GET(cc, 1);
5946 pcre_uchar *start_cc;
5947 BOOL needs_control_head;
5948
5949 PUSH_BACKTRACK(sizeof(recurse_backtrack), cc, NULL);
5950
5951 /* Inlining simple patterns. */
5952 if (get_framesize(common, common->start + start, NULL, TRUE, &needs_control_head) == no_stack)
5953 {
5954 start_cc = common->start + start;
5955 compile_matchingpath(common, next_opcode(common, start_cc), bracketend(start_cc) - (1 + LINK_SIZE), backtrack);
5956 BACKTRACK_AS(recurse_backtrack)->inlined_pattern = TRUE;
5957 return cc + 1 + LINK_SIZE;
5958 }
5959
5960 while (entry != NULL)
5961 {
5962 if (entry->start == start)
5963 break;
5964 prev = entry;
5965 entry = entry->next;
5966 }
5967
5968 if (entry == NULL)
5969 {
5970 entry = sljit_alloc_memory(compiler, sizeof(recurse_entry));
5971 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5972 return NULL;
5973 entry->next = NULL;
5974 entry->entry = NULL;
5975 entry->calls = NULL;
5976 entry->start = start;
5977
5978 if (prev != NULL)
5979 prev->next = entry;
5980 else
5981 common->entries = entry;
5982 }
5983
5984 if (common->has_set_som && common->mark_ptr != 0)
5985 {
5986 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
5987 allocate_stack(common, 2);
5988 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
5989 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
5990 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5991 }
5992 else if (common->has_set_som || common->mark_ptr != 0)
5993 {
5994 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->has_set_som ? (int)(OVECTOR(0)) : common->mark_ptr);
5995 allocate_stack(common, 1);
5996 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
5997 }
5998
5999 if (entry->entry == NULL)
6000 add_jump(compiler, &entry->calls, JUMP(SLJIT_FAST_CALL));
6001 else
6002 JUMPTO(SLJIT_FAST_CALL, entry->entry);
6003 /* Leave if the match is failed. */
6004 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0));
6005 return cc + 1 + LINK_SIZE;
6006 }
6007
6008 static int SLJIT_CALL do_callout(struct jit_arguments* arguments, PUBL(callout_block) *callout_block, pcre_uchar **jit_ovector)
6009 {
6010 const pcre_uchar *begin = arguments->begin;
6011 int *offset_vector = arguments->offsets;
6012 int offset_count = arguments->offset_count;
6013 int i;
6014
6015 if (PUBL(callout) == NULL)
6016 return 0;
6017
6018 callout_block->version = 2;
6019 callout_block->callout_data = arguments->callout_data;
6020
6021 /* Offsets in subject. */
6022 callout_block->subject_length = arguments->end - arguments->begin;
6023 callout_block->start_match = (pcre_uchar*)callout_block->subject - arguments->begin;
6024 callout_block->current_position = (pcre_uchar*)callout_block->offset_vector - arguments->begin;
6025 #if defined COMPILE_PCRE8
6026 callout_block->subject = (PCRE_SPTR)begin;
6027 #elif defined COMPILE_PCRE16
6028 callout_block->subject = (PCRE_SPTR16)begin;
6029 #elif defined COMPILE_PCRE32
6030 callout_block->subject = (PCRE_SPTR32)begin;
6031 #endif
6032
6033 /* Convert and copy the JIT offset vector to the offset_vector array. */
6034 callout_block->capture_top = 0;
6035 callout_block->offset_vector = offset_vector;
6036 for (i = 2; i < offset_count; i += 2)
6037 {
6038 offset_vector[i] = jit_ovector[i] - begin;
6039 offset_vector[i + 1] = jit_ovector[i + 1] - begin;
6040 if (jit_ovector[i] >= begin)
6041 callout_block->capture_top = i;
6042 }
6043
6044 callout_block->capture_top = (callout_block->capture_top >> 1) + 1;
6045 if (offset_count > 0)
6046 offset_vector[0] = -1;
6047 if (offset_count > 1)
6048 offset_vector[1] = -1;
6049 return (*PUBL(callout))(callout_block);
6050 }
6051
6052 /* Aligning to 8 byte. */
6053 #define CALLOUT_ARG_SIZE \
6054 (((int)sizeof(PUBL(callout_block)) + 7) & ~7)
6055
6056 #define CALLOUT_ARG_OFFSET(arg) \
6057 (-CALLOUT_ARG_SIZE + SLJIT_OFFSETOF(PUBL(callout_block), arg))
6058
6059 static SLJIT_INLINE pcre_uchar *compile_callout_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6060 {
6061 DEFINE_COMPILER;
6062 backtrack_common *backtrack;
6063
6064 PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
6065
6066 allocate_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw));
6067
6068 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
6069 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6070 SLJIT_ASSERT(common->capture_last_ptr != 0);
6071 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_number), SLJIT_IMM, cc[1]);
6072 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_last), TMP2, 0);
6073
6074 /* These pointer sized fields temporarly stores internal variables. */
6075 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
6076 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(offset_vector), STR_PTR, 0);
6077 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(subject), TMP2, 0);
6078
6079 if (common->mark_ptr != 0)
6080 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr));
6081 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(pattern_position), SLJIT_IMM, GET(cc, 2));
6082 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(next_item_length), SLJIT_IMM, GET(cc, 2 + LINK_SIZE));
6083 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(mark), (common->mark_ptr != 0) ? TMP2 : SLJIT_IMM, 0);
6084
6085 /* Needed to save important temporary registers. */
6086 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
6087 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG2, 0, STACK_TOP, 0, SLJIT_IMM, CALLOUT_ARG_SIZE);
6088 GET_LOCAL_BASE(SLJIT_SCRATCH_REG3, 0, OVECTOR_START);
6089 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_callout));
6090 OP1(SLJIT_MOV_SI, SLJIT_RETURN_REG, 0, SLJIT_RETURN_REG, 0);
6091 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
6092 free_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw));
6093
6094 /* Check return value. */
6095 OP2(SLJIT_SUB | SLJIT_SET_S, SLJIT_UNUSED, 0, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
6096 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_C_SIG_GREATER));
6097 if (common->forced_quit_label == NULL)
6098 add_jump(compiler, &common->forced_quit, JUMP(SLJIT_C_SIG_LESS));
6099 else
6100 JUMPTO(SLJIT_C_SIG_LESS, common->forced_quit_label);
6101 return cc + 2 + 2 * LINK_SIZE;
6102 }
6103
6104 #undef CALLOUT_ARG_SIZE
6105 #undef CALLOUT_ARG_OFFSET
6106
6107 static pcre_uchar *compile_assert_matchingpath(compiler_common *common, pcre_uchar *cc, assert_backtrack *backtrack, BOOL conditional)
6108 {
6109 DEFINE_COMPILER;
6110 int framesize;
6111 int extrasize;
6112 BOOL needs_control_head;
6113 int private_data_ptr;
6114 backtrack_common altbacktrack;
6115 pcre_uchar *ccbegin;
6116 pcre_uchar opcode;
6117 pcre_uchar bra = OP_BRA;
6118 jump_list *tmp = NULL;
6119 jump_list **target = (conditional) ? &backtrack->condfailed : &backtrack->common.topbacktracks;
6120 jump_list **found;
6121 /* Saving previous accept variables. */
6122 BOOL save_local_exit = common->local_exit;
6123 BOOL save_positive_assert = common->positive_assert;
6124 then_trap_backtrack *save_then_trap = common->then_trap;
6125 struct sljit_label *save_quit_label = common->quit_label;
6126 struct sljit_label *save_accept_label = common->accept_label;
6127 jump_list *save_quit = common->quit;
6128 jump_list *save_positive_assert_quit = common->positive_assert_quit;
6129 jump_list *save_accept = common->accept;
6130 struct sljit_jump *jump;
6131 struct sljit_jump *brajump = NULL;
6132
6133 /* Assert captures then. */
6134 common->then_trap = NULL;
6135
6136 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
6137 {
6138 SLJIT_ASSERT(!conditional);
6139 bra = *cc;
6140 cc++;
6141 }
6142 private_data_ptr = PRIVATE_DATA(cc);
6143 SLJIT_ASSERT(private_data_ptr != 0);
6144 framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
6145 backtrack->framesize = framesize;
6146 backtrack->private_data_ptr = private_data_ptr;
6147 opcode = *cc;
6148 SLJIT_ASSERT(opcode >= OP_ASSERT && opcode <= OP_ASSERTBACK_NOT);
6149 found = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) ? &tmp : target;
6150 ccbegin = cc;
6151 cc += GET(cc, 1);
6152
6153 if (bra == OP_BRAMINZERO)
6154 {
6155 /* This is a braminzero backtrack path. */
6156 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6157 free_stack(common, 1);
6158 brajump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
6159 }
6160
6161 if (framesize < 0)
6162 {
6163 extrasize = needs_control_head ? 2 : 1;
6164 if (framesize == no_frame)
6165 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);
6166 allocate_stack(common, extrasize);
6167 if (needs_control_head)
6168 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
6169 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6170 if (needs_control_head)
6171 {
6172 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_IMM, 0);
6173 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
6174 }
6175 }
6176 else
6177 {
6178 extrasize = needs_control_head ? 3 : 2;
6179 allocate_stack(common, framesize + extrasize);
6180 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6181 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw));
6182 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0);
6183 if (needs_control_head)
6184 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
6185 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6186 if (needs_control_head)
6187 {
6188 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
6189 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
6190 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_IMM, 0);
6191 }
6192 else
6193 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
6194 init_frame(common, ccbegin, NULL, framesize + extrasize - 1, extrasize, FALSE);
6195 }
6196
6197 memset(&altbacktrack, 0, sizeof(backtrack_common));
6198 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6199 {
6200 /* Negative assert is stronger than positive assert. */
6201 common->local_exit = TRUE;
6202 common->quit_label = NULL;
6203 common->quit = NULL;
6204 common->positive_assert = FALSE;
6205 }
6206 else
6207 common->positive_assert = TRUE;
6208 common->positive_assert_quit = NULL;
6209
6210 while (1)
6211 {
6212 common->accept_label = NULL;
6213 common->accept = NULL;
6214 altbacktrack.top = NULL;
6215 altbacktrack.topbacktracks = NULL;
6216
6217 if (*ccbegin == OP_ALT)
6218 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6219
6220 altbacktrack.cc = ccbegin;
6221 compile_matchingpath(common, ccbegin + 1 + LINK_SIZE, cc, &altbacktrack);
6222 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6223 {
6224 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6225 {
6226 common->local_exit = save_local_exit;
6227 common->quit_label = save_quit_label;
6228 common->quit = save_quit;
6229 }
6230 common->positive_assert = save_positive_assert;
6231 common->then_trap = save_then_trap;
6232 common->accept_label = save_accept_label;
6233 common->positive_assert_quit = save_positive_assert_quit;
6234 common->accept = save_accept;
6235 return NULL;
6236 }
6237 common->accept_label = LABEL();
6238 if (common->accept != NULL)
6239 set_jumps(common->accept, common->accept_label);
6240
6241 /* Reset stack. */
6242 if (framesize < 0)
6243 {
6244 if (framesize == no_frame)
6245 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6246 else
6247 free_stack(common, extrasize);
6248 if (needs_control_head)
6249 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), 0);
6250 }
6251 else
6252 {
6253 if ((opcode != OP_ASSERT_NOT && opcode != OP_ASSERTBACK_NOT) || conditional)
6254 {
6255 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
6256 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
6257 if (needs_control_head)
6258 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), 0);
6259 }
6260 else
6261 {
6262 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6263 if (needs_control_head)
6264 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), (framesize + 1) * sizeof(sljit_sw));
6265 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6266 }
6267 }
6268
6269 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6270 {
6271 /* We know that STR_PTR was stored on the top of the stack. */
6272 if (conditional)
6273 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), needs_control_head ? sizeof(sljit_sw) : 0);
6274 else if (bra == OP_BRAZERO)
6275 {
6276 if (framesize < 0)
6277 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 1) * sizeof(sljit_sw));
6278 else
6279 {
6280 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
6281 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (framesize + extrasize - 1) * sizeof(sljit_sw));
6282 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
6283 }
6284 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
6285 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6286 }
6287 else if (framesize >= 0)
6288 {
6289 /* For OP_BRA and OP_BRAMINZERO. */
6290 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
6291 }
6292 }
6293 add_jump(compiler, found, JUMP(SLJIT_JUMP));
6294
6295 compile_backtrackingpath(common, altbacktrack.top);
6296 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6297 {
6298 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6299 {
6300 common->local_exit = save_local_exit;
6301 common->quit_label = save_quit_label;
6302 common->quit = save_quit;
6303 }
6304 common->positive_assert = save_positive_assert;
6305 common->then_trap = save_then_trap;
6306 common->accept_label = save_accept_label;
6307 common->positive_assert_quit = save_positive_assert_quit;
6308 common->accept = save_accept;
6309 return NULL;
6310 }
6311 set_jumps(altbacktrack.topbacktracks, LABEL());
6312
6313 if (*cc != OP_ALT)
6314 break;
6315
6316 ccbegin = cc;
6317 cc += GET(cc, 1);
6318 }
6319
6320 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6321 {
6322 SLJIT_ASSERT(common->positive_assert_quit == NULL);
6323 /* Makes the check less complicated below. */
6324 common->positive_assert_quit = common->quit;
6325 }
6326
6327 /* None of them matched. */
6328 if (common->positive_assert_quit != NULL)
6329 {
6330 jump = JUMP(SLJIT_JUMP);
6331 set_jumps(common->positive_assert_quit, LABEL());
6332 SLJIT_ASSERT(framesize != no_stack);
6333 if (framesize < 0)
6334 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, extrasize * sizeof(sljit_sw));
6335 else
6336 {
6337 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6338 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6339 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw));
6340 }
6341 JUMPHERE(jump);
6342 }
6343
6344 if (needs_control_head)
6345 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(1));
6346
6347 if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK)
6348 {
6349 /* Assert is failed. */
6350 if (conditional || bra == OP_BRAZERO)
6351 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6352
6353 if (framesize < 0)
6354 {
6355 /* The topmost item should be 0. */
6356 if (bra == OP_BRAZERO)
6357 {
6358 if (extrasize == 2)
6359 free_stack(common, 1);
6360 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6361 }
6362 else
6363 free_stack(common, extrasize);
6364 }
6365 else
6366 {
6367 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
6368 /* The topmost item should be 0. */
6369 if (bra == OP_BRAZERO)
6370 {
6371 free_stack(common, framesize + extrasize - 1);
6372 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6373 }
6374 else
6375 free_stack(common, framesize + extrasize);
6376 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
6377 }
6378 jump = JUMP(SLJIT_JUMP);
6379 if (bra != OP_BRAZERO)
6380 add_jump(compiler, target, jump);
6381
6382 /* Assert is successful. */
6383 set_jumps(tmp, LABEL());
6384 if (framesize < 0)
6385 {
6386 /* We know that STR_PTR was stored on the top of the stack. */
6387 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 1) * sizeof(sljit_sw));
6388 /* Keep the STR_PTR on the top of the stack. */
6389 if (bra == OP_BRAZERO)
6390 {
6391 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
6392 if (extrasize == 2)
6393 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6394 }
6395 else if (bra == OP_BRAMINZERO)
6396 {
6397 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
6398 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6399 }
6400 }
6401 else
6402 {
6403 if (bra == OP_BRA)
6404 {
6405 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
6406 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
6407 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 2) * sizeof(sljit_sw));
6408 }
6409 else
6410 {
6411 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
6412 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 2) * sizeof(sljit_sw));
6413 if (extrasize == 2)
6414 {
6415 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6416 if (bra == OP_BRAMINZERO)
6417 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6418 }
6419 else
6420 {
6421 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
6422 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), bra == OP_BRAZERO ? STR_PTR : SLJIT_IMM, 0);
6423 }
6424 }
6425 }
6426
6427 if (bra == OP_BRAZERO)
6428 {
6429 backtrack->matchingpath = LABEL();
6430 SET_LABEL(jump, backtrack->matchingpath);
6431 }
6432 else if (bra == OP_BRAMINZERO)
6433 {
6434 JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
6435 JUMPHERE(brajump);
6436 if (framesize >= 0)
6437 {
6438 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6439 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6440 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
6441 }
6442 set_jumps(backtrack->common.topbacktracks, LABEL());
6443 }
6444 }
6445 else
6446 {
6447 /* AssertNot is successful. */
6448 if (framesize < 0)
6449 {
6450 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6451 if (bra != OP_BRA)
6452 {
6453 if (extrasize == 2)
6454 free_stack(common, 1);
6455 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6456 }
6457 else
6458 free_stack(common, extrasize);
6459 }
6460 else
6461 {
6462 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6463 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
6464 /* The topmost item should be 0. */
6465 if (bra != OP_BRA)
6466 {
6467 free_stack(common, framesize + extrasize - 1);
6468 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6469 }
6470 else
6471 free_stack(common, framesize + extrasize);
6472 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
6473 }
6474
6475 if (bra == OP_BRAZERO)
6476 backtrack->matchingpath = LABEL();
6477 else if (bra == OP_BRAMINZERO)
6478 {
6479 JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
6480 JUMPHERE(brajump);
6481 }
6482
6483 if (bra != OP_BRA)
6484 {
6485 SLJIT_ASSERT(found == &backtrack->common.topbacktracks);
6486 set_jumps(backtrack->common.topbacktracks, LABEL());
6487 backtrack->common.topbacktracks = NULL;
6488 }
6489 }
6490
6491 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6492 {
6493 common->local_exit = save_local_exit;
6494 common->quit_label = save_quit_label;
6495 common->quit = save_quit;
6496 }
6497 common->positive_assert = save_positive_assert;
6498 common->then_trap = save_then_trap;
6499 common->accept_label = save_accept_label;
6500 common->positive_assert_quit = save_positive_assert_quit;
6501 common->accept = save_accept;
6502 return cc + 1 + LINK_SIZE;
6503 }
6504
6505 static SLJIT_INLINE void match_once_common(compiler_common *common, pcre_uchar ket, int framesize, int private_data_ptr, BOOL has_alternatives, BOOL needs_control_head)
6506 {
6507 DEFINE_COMPILER;
6508 int stacksize;
6509
6510 if (framesize < 0)
6511 {
6512 if (framesize == no_frame)
6513 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6514 else
6515 {
6516 stacksize = needs_control_head ? 1 : 0;
6517 if (ket != OP_KET || has_alternatives)
6518 stacksize++;
6519 free_stack(common, stacksize);
6520 }
6521
6522 if (needs_control_head)
6523 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), (ket != OP_KET || has_alternatives) ? sizeof(sljit_sw) : 0);
6524
6525 /* TMP2 which is set here used by OP_KETRMAX below. */
6526 if (ket == OP_KETRMAX)
6527 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), 0);
6528 else if (ket == OP_KETRMIN)
6529 {
6530 /* Move the STR_PTR to the private_data_ptr. */
6531 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), 0);
6532 }
6533 }
6534 else
6535 {
6536 stacksize = (ket != OP_KET || has_alternatives) ? 2 : 1;
6537 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + stacksize) * sizeof(sljit_sw));
6538 if (needs_control_head)
6539 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), 0);
6540
6541 if (ket == OP_KETRMAX)
6542 {
6543 /* TMP2 which is set here used by OP_KETRMAX below. */
6544 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6545 }
6546 }
6547 if (needs_control_head)
6548 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, TMP1, 0);
6549 }
6550
6551 static SLJIT_INLINE int match_capture_common(compiler_common *common, int stacksize, int offset, int private_data_ptr)
6552 {
6553 DEFINE_COMPILER;
6554
6555 if (common->capture_last_ptr != 0)
6556 {
6557 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
6558 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
6559 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
6560 stacksize++;
6561 }
6562 if (common->optimized_cbracket[offset >> 1] == 0)
6563 {
6564 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
6565 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
6566 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
6567 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6568 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
6569 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
6570 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
6571 stacksize += 2;
6572 }
6573 return stacksize;
6574 }
6575
6576 /*
6577 Handling bracketed expressions is probably the most complex part.
6578
6579 Stack layout naming characters:
6580 S - Push the current STR_PTR
6581 0 - Push a 0 (NULL)
6582 A - Push the current STR_PTR. Needed for restoring the STR_PTR
6583 before the next alternative. Not pushed if there are no alternatives.
6584 M - Any values pushed by the current alternative. Can be empty, or anything.
6585 C - Push the previous OVECTOR(i), OVECTOR(i+1) and OVECTOR_PRIV(i) to the stack.
6586 L - Push the previous local (pointed by localptr) to the stack
6587 () - opional values stored on the stack
6588 ()* - optonal, can be stored multiple times
6589
6590 The following list shows the regular expression templates, their PCRE byte codes
6591 and stack layout supported by pcre-sljit.
6592
6593 (?:) OP_BRA | OP_KET A M
6594 () OP_CBRA | OP_KET C M
6595 (?:)+ OP_BRA | OP_KETRMAX 0 A M S ( A M S )*
6596 OP_SBRA | OP_KETRMAX 0 L M S ( L M S )*
6597 (?:)+? OP_BRA | OP_KETRMIN 0 A M S ( A M S )*
6598 OP_SBRA | OP_KETRMIN 0 L M S ( L M S )*
6599 ()+ OP_CBRA | OP_KETRMAX 0 C M S ( C M S )*
6600 OP_SCBRA | OP_KETRMAX 0 C M S ( C M S )*
6601 ()+? OP_CBRA | OP_KETRMIN 0 C M S ( C M S )*
6602 OP_SCBRA | OP_KETRMIN 0 C M S ( C M S )*
6603 (?:)? OP_BRAZERO | OP_BRA | OP_KET S ( A M 0 )
6604 (?:)?? OP_BRAMINZERO | OP_BRA | OP_KET S ( A M 0 )
6605 ()? OP_BRAZERO | OP_CBRA | OP_KET S ( C M 0 )
6606 ()?? OP_BRAMINZERO | OP_CBRA | OP_KET S ( C M 0 )
6607 (?:)* OP_BRAZERO | OP_BRA | OP_KETRMAX S 0 ( A M S )*
6608 OP_BRAZERO | OP_SBRA | OP_KETRMAX S 0 ( L M S )*
6609 (?:)*? OP_BRAMINZERO | OP_BRA | OP_KETRMIN S 0 ( A M S )*
6610 OP_BRAMINZERO | OP_SBRA | OP_KETRMIN S 0 ( L M S )*
6611 ()* OP_BRAZERO | OP_CBRA | OP_KETRMAX S 0 ( C M S )*
6612 OP_BRAZERO | OP_SCBRA | OP_KETRMAX S 0 ( C M S )*
6613 ()*? OP_BRAMINZERO | OP_CBRA | OP_KETRMIN S 0 ( C M S )*
6614 OP_BRAMINZERO | OP_SCBRA | OP_KETRMIN S 0 ( C M S )*
6615
6616
6617 Stack layout naming characters:
6618 A - Push the alternative index (starting from 0) on the stack.
6619 Not pushed if there is no alternatives.
6620 M - Any values pushed by the current alternative. Can be empty, or anything.
6621
6622 The next list shows the possible content of a bracket:
6623 (|) OP_*BRA | OP_ALT ... M A
6624 (?()|) OP_*COND | OP_ALT M A
6625 (?>|) OP_ONCE | OP_ALT ... [stack trace] M A
6626 (?>|) OP_ONCE_NC | OP_ALT ... [stack trace] M A
6627 Or nothing, if trace is unnecessary
6628 */
6629
6630 static pcre_uchar *compile_bracket_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6631 {
6632 DEFINE_COMPILER;
6633 backtrack_common *backtrack;
6634 pcre_uchar opcode;
6635 int private_data_ptr = 0;
6636 int offset = 0;
6637 int i, stacksize;
6638 int repeat_ptr = 0, repeat_length = 0;
6639 int repeat_type = 0, repeat_count = 0;
6640 pcre_uchar *ccbegin;
6641 pcre_uchar *matchingpath;
6642 pcre_uchar *slot;
6643 pcre_uchar bra = OP_BRA;
6644 pcre_uchar ket;
6645 assert_backtrack *assert;
6646 BOOL has_alternatives;
6647 BOOL needs_control_head = FALSE;
6648 struct sljit_jump *jump;
6649 struct sljit_jump *skip;
6650 struct sljit_label *rmax_label = NULL;
6651 struct sljit_jump *braminzero = NULL;
6652
6653 PUSH_BACKTRACK(sizeof(bracket_backtrack), cc, NULL);
6654
6655 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
6656 {
6657 bra = *cc;
6658 cc++;
6659 opcode = *cc;
6660 }
6661
6662 opcode = *cc;
6663 ccbegin = cc;
6664 matchingpath = bracketend(cc) - 1 - LINK_SIZE;
6665 ket = *matchingpath;
6666 if (ket == OP_KET && PRIVATE_DATA(matchingpath) != 0)
6667 {
6668 repeat_ptr = PRIVATE_DATA(matchingpath);
6669 repeat_length = PRIVATE_DATA(matchingpath + 1);
6670 repeat_type = PRIVATE_DATA(matchingpath + 2);
6671 repeat_count = PRIVATE_DATA(matchingpath + 3);
6672 SLJIT_ASSERT(repeat_length != 0 && repeat_type != 0 && repeat_count != 0);
6673 if (repeat_type == OP_UPTO)
6674 ket = OP_KETRMAX;
6675 if (repeat_type == OP_MINUPTO)
6676 ket = OP_KETRMIN;
6677 }
6678
6679 if ((opcode == OP_COND || opcode == OP_SCOND) && cc[1 + LINK_SIZE] == OP_DEF)
6680 {
6681 /* Drop this bracket_backtrack. */
6682 parent->top = backtrack->prev;
6683 return matchingpath + 1 + LINK_SIZE + repeat_length;
6684 }
6685
6686 matchingpath = ccbegin + 1 + LINK_SIZE;
6687 SLJIT_ASSERT(ket == OP_KET || ket == OP_KETRMAX || ket == OP_KETRMIN);
6688 SLJIT_ASSERT(!((bra == OP_BRAZERO && ket == OP_KETRMIN) || (bra == OP_BRAMINZERO && ket == OP_KETRMAX)));
6689 cc += GET(cc, 1);
6690
6691 has_alternatives = *cc == OP_ALT;
6692 if (SLJIT_UNLIKELY(opcode == OP_COND || opcode == OP_SCOND))
6693 has_alternatives = (*matchingpath == OP_RREF || *matchingpath == OP_DNRREF) ? FALSE : TRUE;
6694
6695 if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
6696 opcode = OP_SCOND;
6697 if (SLJIT_UNLIKELY(opcode == OP_ONCE_NC))
6698 opcode = OP_ONCE;
6699
6700 if (opcode == OP_CBRA || opcode == OP_SCBRA)
6701 {
6702 /* Capturing brackets has a pre-allocated space. */
6703 offset = GET2(ccbegin, 1 + LINK_SIZE);
6704 if (common->optimized_cbracket[offset] == 0)
6705 {
6706 private_data_ptr = OVECTOR_PRIV(offset);
6707 offset <<= 1;
6708 }
6709 else
6710 {
6711 offset <<= 1;
6712 private_data_ptr = OVECTOR(offset);
6713 }
6714 BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
6715 matchingpath += IMM2_SIZE;
6716 }
6717 else if (opcode == OP_ONCE || opcode == OP_SBRA || opcode == OP_SCOND)
6718 {
6719 /* Other brackets simply allocate the next entry. */
6720 private_data_ptr = PRIVATE_DATA(ccbegin);
6721 SLJIT_ASSERT(private_data_ptr != 0);
6722 BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
6723 if (opcode == OP_ONCE)
6724 BACKTRACK_AS(bracket_backtrack)->u.framesize = get_framesize(common, ccbegin, NULL, FALSE, &needs_control_head);
6725 }
6726
6727 /* Instructions before the first alternative. */
6728 stacksize = 0;
6729 if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
6730 stacksize++;
6731 if (bra == OP_BRAZERO)
6732 stacksize++;
6733
6734 if (stacksize > 0)
6735 allocate_stack(common, stacksize);
6736
6737 stacksize = 0;
6738 if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
6739 {
6740 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
6741