/[pcre]/code/trunk/pcre_jit_compile.c
ViewVC logotype

Contents of /code/trunk/pcre_jit_compile.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1587 - (show annotations)
Sat Aug 8 06:02:16 2015 UTC (4 years, 2 months ago) by zherczeg
File MIME type: text/plain
File size: 331380 byte(s)
The JIT compiler did not restore the control verb head in case of *THEN control verbs.
1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Written by Philip Hazel
9 Copyright (c) 1997-2013 University of Cambridge
10
11 The machine code generator part (this module) was written by Zoltan Herczeg
12 Copyright (c) 2010-2013
13
14 -----------------------------------------------------------------------------
15 Redistribution and use in source and binary forms, with or without
16 modification, are permitted provided that the following conditions are met:
17
18 * Redistributions of source code must retain the above copyright notice,
19 this list of conditions and the following disclaimer.
20
21 * Redistributions in binary form must reproduce the above copyright
22 notice, this list of conditions and the following disclaimer in the
23 documentation and/or other materials provided with the distribution.
24
25 * Neither the name of the University of Cambridge nor the names of its
26 contributors may be used to endorse or promote products derived from
27 this software without specific prior written permission.
28
29 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 POSSIBILITY OF SUCH DAMAGE.
40 -----------------------------------------------------------------------------
41 */
42
43 #ifdef HAVE_CONFIG_H
44 #include "config.h"
45 #endif
46
47 #include "pcre_internal.h"
48
49 #if defined SUPPORT_JIT
50
51 /* All-in-one: Since we use the JIT compiler only from here,
52 we just include it. This way we don't need to touch the build
53 system files. */
54
55 #define SLJIT_MALLOC(size, allocator_data) (PUBL(malloc))(size)
56 #define SLJIT_FREE(ptr, allocator_data) (PUBL(free))(ptr)
57 #define SLJIT_CONFIG_AUTO 1
58 #define SLJIT_CONFIG_STATIC 1
59 #define SLJIT_VERBOSE 0
60 #define SLJIT_DEBUG 0
61
62 #include "sljit/sljitLir.c"
63
64 #if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED
65 #error Unsupported architecture
66 #endif
67
68 /* Defines for debugging purposes. */
69
70 /* 1 - Use unoptimized capturing brackets.
71 2 - Enable capture_last_ptr (includes option 1). */
72 /* #define DEBUG_FORCE_UNOPTIMIZED_CBRAS 2 */
73
74 /* 1 - Always have a control head. */
75 /* #define DEBUG_FORCE_CONTROL_HEAD 1 */
76
77 /* Allocate memory for the regex stack on the real machine stack.
78 Fast, but limited size. */
79 #define MACHINE_STACK_SIZE 32768
80
81 /* Growth rate for stack allocated by the OS. Should be the multiply
82 of page size. */
83 #define STACK_GROWTH_RATE 8192
84
85 /* Enable to check that the allocation could destroy temporaries. */
86 #if defined SLJIT_DEBUG && SLJIT_DEBUG
87 #define DESTROY_REGISTERS 1
88 #endif
89
90 /*
91 Short summary about the backtracking mechanism empolyed by the jit code generator:
92
93 The code generator follows the recursive nature of the PERL compatible regular
94 expressions. The basic blocks of regular expressions are condition checkers
95 whose execute different commands depending on the result of the condition check.
96 The relationship between the operators can be horizontal (concatenation) and
97 vertical (sub-expression) (See struct backtrack_common for more details).
98
99 'ab' - 'a' and 'b' regexps are concatenated
100 'a+' - 'a' is the sub-expression of the '+' operator
101
102 The condition checkers are boolean (true/false) checkers. Machine code is generated
103 for the checker itself and for the actions depending on the result of the checker.
104 The 'true' case is called as the matching path (expected path), and the other is called as
105 the 'backtrack' path. Branch instructions are expesive for all CPUs, so we avoid taken
106 branches on the matching path.
107
108 Greedy star operator (*) :
109 Matching path: match happens.
110 Backtrack path: match failed.
111 Non-greedy star operator (*?) :
112 Matching path: no need to perform a match.
113 Backtrack path: match is required.
114
115 The following example shows how the code generated for a capturing bracket
116 with two alternatives. Let A, B, C, D are arbirary regular expressions, and
117 we have the following regular expression:
118
119 A(B|C)D
120
121 The generated code will be the following:
122
123 A matching path
124 '(' matching path (pushing arguments to the stack)
125 B matching path
126 ')' matching path (pushing arguments to the stack)
127 D matching path
128 return with successful match
129
130 D backtrack path
131 ')' backtrack path (If we arrived from "C" jump to the backtrack of "C")
132 B backtrack path
133 C expected path
134 jump to D matching path
135 C backtrack path
136 A backtrack path
137
138 Notice, that the order of backtrack code paths are the opposite of the fast
139 code paths. In this way the topmost value on the stack is always belong
140 to the current backtrack code path. The backtrack path must check
141 whether there is a next alternative. If so, it needs to jump back to
142 the matching path eventually. Otherwise it needs to clear out its own stack
143 frame and continue the execution on the backtrack code paths.
144 */
145
146 /*
147 Saved stack frames:
148
149 Atomic blocks and asserts require reloading the values of private data
150 when the backtrack mechanism performed. Because of OP_RECURSE, the data
151 are not necessarly known in compile time, thus we need a dynamic restore
152 mechanism.
153
154 The stack frames are stored in a chain list, and have the following format:
155 ([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]
156
157 Thus we can restore the private data to a particular point in the stack.
158 */
159
160 typedef struct jit_arguments {
161 /* Pointers first. */
162 struct sljit_stack *stack;
163 const pcre_uchar *str;
164 const pcre_uchar *begin;
165 const pcre_uchar *end;
166 int *offsets;
167 pcre_uchar *uchar_ptr;
168 pcre_uchar *mark_ptr;
169 void *callout_data;
170 /* Everything else after. */
171 pcre_uint32 limit_match;
172 int real_offset_count;
173 int offset_count;
174 pcre_uint8 notbol;
175 pcre_uint8 noteol;
176 pcre_uint8 notempty;
177 pcre_uint8 notempty_atstart;
178 } jit_arguments;
179
180 typedef struct executable_functions {
181 void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES];
182 void *read_only_data_heads[JIT_NUMBER_OF_COMPILE_MODES];
183 sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];
184 PUBL(jit_callback) callback;
185 void *userdata;
186 pcre_uint32 top_bracket;
187 pcre_uint32 limit_match;
188 } executable_functions;
189
190 typedef struct jump_list {
191 struct sljit_jump *jump;
192 struct jump_list *next;
193 } jump_list;
194
195 typedef struct stub_list {
196 struct sljit_jump *start;
197 struct sljit_label *quit;
198 struct stub_list *next;
199 } stub_list;
200
201 typedef struct label_addr_list {
202 struct sljit_label *label;
203 sljit_uw *update_addr;
204 struct label_addr_list *next;
205 } label_addr_list;
206
207 enum frame_types {
208 no_frame = -1,
209 no_stack = -2
210 };
211
212 enum control_types {
213 type_mark = 0,
214 type_then_trap = 1
215 };
216
217 typedef int (SLJIT_CALL *jit_function)(jit_arguments *args);
218
219 /* The following structure is the key data type for the recursive
220 code generator. It is allocated by compile_matchingpath, and contains
221 the arguments for compile_backtrackingpath. Must be the first member
222 of its descendants. */
223 typedef struct backtrack_common {
224 /* Concatenation stack. */
225 struct backtrack_common *prev;
226 jump_list *nextbacktracks;
227 /* Internal stack (for component operators). */
228 struct backtrack_common *top;
229 jump_list *topbacktracks;
230 /* Opcode pointer. */
231 pcre_uchar *cc;
232 } backtrack_common;
233
234 typedef struct assert_backtrack {
235 backtrack_common common;
236 jump_list *condfailed;
237 /* Less than 0 if a frame is not needed. */
238 int framesize;
239 /* Points to our private memory word on the stack. */
240 int private_data_ptr;
241 /* For iterators. */
242 struct sljit_label *matchingpath;
243 } assert_backtrack;
244
245 typedef struct bracket_backtrack {
246 backtrack_common common;
247 /* Where to coninue if an alternative is successfully matched. */
248 struct sljit_label *alternative_matchingpath;
249 /* For rmin and rmax iterators. */
250 struct sljit_label *recursive_matchingpath;
251 /* For greedy ? operator. */
252 struct sljit_label *zero_matchingpath;
253 /* Contains the branches of a failed condition. */
254 union {
255 /* Both for OP_COND, OP_SCOND. */
256 jump_list *condfailed;
257 assert_backtrack *assert;
258 /* For OP_ONCE. Less than 0 if not needed. */
259 int framesize;
260 } u;
261 /* Points to our private memory word on the stack. */
262 int private_data_ptr;
263 } bracket_backtrack;
264
265 typedef struct bracketpos_backtrack {
266 backtrack_common common;
267 /* Points to our private memory word on the stack. */
268 int private_data_ptr;
269 /* Reverting stack is needed. */
270 int framesize;
271 /* Allocated stack size. */
272 int stacksize;
273 } bracketpos_backtrack;
274
275 typedef struct braminzero_backtrack {
276 backtrack_common common;
277 struct sljit_label *matchingpath;
278 } braminzero_backtrack;
279
280 typedef struct iterator_backtrack {
281 backtrack_common common;
282 /* Next iteration. */
283 struct sljit_label *matchingpath;
284 } iterator_backtrack;
285
286 typedef struct recurse_entry {
287 struct recurse_entry *next;
288 /* Contains the function entry. */
289 struct sljit_label *entry;
290 /* Collects the calls until the function is not created. */
291 jump_list *calls;
292 /* Points to the starting opcode. */
293 sljit_sw start;
294 } recurse_entry;
295
296 typedef struct recurse_backtrack {
297 backtrack_common common;
298 BOOL inlined_pattern;
299 } recurse_backtrack;
300
301 #define OP_THEN_TRAP OP_TABLE_LENGTH
302
303 typedef struct then_trap_backtrack {
304 backtrack_common common;
305 /* If then_trap is not NULL, this structure contains the real
306 then_trap for the backtracking path. */
307 struct then_trap_backtrack *then_trap;
308 /* Points to the starting opcode. */
309 sljit_sw start;
310 /* Exit point for the then opcodes of this alternative. */
311 jump_list *quit;
312 /* Frame size of the current alternative. */
313 int framesize;
314 } then_trap_backtrack;
315
316 #define MAX_RANGE_SIZE 4
317
318 typedef struct compiler_common {
319 /* The sljit ceneric compiler. */
320 struct sljit_compiler *compiler;
321 /* First byte code. */
322 pcre_uchar *start;
323 /* Maps private data offset to each opcode. */
324 sljit_si *private_data_ptrs;
325 /* Chain list of read-only data ptrs. */
326 void *read_only_data_head;
327 /* Tells whether the capturing bracket is optimized. */
328 pcre_uint8 *optimized_cbracket;
329 /* Tells whether the starting offset is a target of then. */
330 pcre_uint8 *then_offsets;
331 /* Current position where a THEN must jump. */
332 then_trap_backtrack *then_trap;
333 /* Starting offset of private data for capturing brackets. */
334 int cbra_ptr;
335 /* Output vector starting point. Must be divisible by 2. */
336 int ovector_start;
337 /* Last known position of the requested byte. */
338 int req_char_ptr;
339 /* Head of the last recursion. */
340 int recursive_head_ptr;
341 /* First inspected character for partial matching. */
342 int start_used_ptr;
343 /* Starting pointer for partial soft matches. */
344 int hit_start;
345 /* End pointer of the first line. */
346 int first_line_end;
347 /* Points to the marked string. */
348 int mark_ptr;
349 /* Recursive control verb management chain. */
350 int control_head_ptr;
351 /* Points to the last matched capture block index. */
352 int capture_last_ptr;
353 /* Points to the starting position of the current match. */
354 int start_ptr;
355
356 /* Flipped and lower case tables. */
357 const pcre_uint8 *fcc;
358 sljit_sw lcc;
359 /* Mode can be PCRE_STUDY_JIT_COMPILE and others. */
360 int mode;
361 /* TRUE, when minlength is greater than 0. */
362 BOOL might_be_empty;
363 /* \K is found in the pattern. */
364 BOOL has_set_som;
365 /* (*SKIP:arg) is found in the pattern. */
366 BOOL has_skip_arg;
367 /* (*THEN) is found in the pattern. */
368 BOOL has_then;
369 /* Needs to know the start position anytime. */
370 BOOL needs_start_ptr;
371 /* Currently in recurse or negative assert. */
372 BOOL local_exit;
373 /* Currently in a positive assert. */
374 BOOL positive_assert;
375 /* Newline control. */
376 int nltype;
377 pcre_uint32 nlmax;
378 pcre_uint32 nlmin;
379 int newline;
380 int bsr_nltype;
381 pcre_uint32 bsr_nlmax;
382 pcre_uint32 bsr_nlmin;
383 /* Dollar endonly. */
384 int endonly;
385 /* Tables. */
386 sljit_sw ctypes;
387 /* Named capturing brackets. */
388 pcre_uchar *name_table;
389 sljit_sw name_count;
390 sljit_sw name_entry_size;
391
392 /* Labels and jump lists. */
393 struct sljit_label *partialmatchlabel;
394 struct sljit_label *quit_label;
395 struct sljit_label *forced_quit_label;
396 struct sljit_label *accept_label;
397 struct sljit_label *ff_newline_shortcut;
398 stub_list *stubs;
399 label_addr_list *label_addrs;
400 recurse_entry *entries;
401 recurse_entry *currententry;
402 jump_list *partialmatch;
403 jump_list *quit;
404 jump_list *positive_assert_quit;
405 jump_list *forced_quit;
406 jump_list *accept;
407 jump_list *calllimit;
408 jump_list *stackalloc;
409 jump_list *revertframes;
410 jump_list *wordboundary;
411 jump_list *anynewline;
412 jump_list *hspace;
413 jump_list *vspace;
414 jump_list *casefulcmp;
415 jump_list *caselesscmp;
416 jump_list *reset_match;
417 BOOL jscript_compat;
418 #ifdef SUPPORT_UTF
419 BOOL utf;
420 #ifdef SUPPORT_UCP
421 BOOL use_ucp;
422 #endif
423 #ifdef COMPILE_PCRE8
424 jump_list *utfreadchar;
425 jump_list *utfreadchar16;
426 jump_list *utfreadtype8;
427 #endif
428 #endif /* SUPPORT_UTF */
429 #ifdef SUPPORT_UCP
430 jump_list *getucd;
431 #endif
432 } compiler_common;
433
434 /* For byte_sequence_compare. */
435
436 typedef struct compare_context {
437 int length;
438 int sourcereg;
439 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
440 int ucharptr;
441 union {
442 sljit_si asint;
443 sljit_uh asushort;
444 #if defined COMPILE_PCRE8
445 sljit_ub asbyte;
446 sljit_ub asuchars[4];
447 #elif defined COMPILE_PCRE16
448 sljit_uh asuchars[2];
449 #elif defined COMPILE_PCRE32
450 sljit_ui asuchars[1];
451 #endif
452 } c;
453 union {
454 sljit_si asint;
455 sljit_uh asushort;
456 #if defined COMPILE_PCRE8
457 sljit_ub asbyte;
458 sljit_ub asuchars[4];
459 #elif defined COMPILE_PCRE16
460 sljit_uh asuchars[2];
461 #elif defined COMPILE_PCRE32
462 sljit_ui asuchars[1];
463 #endif
464 } oc;
465 #endif
466 } compare_context;
467
468 /* Undefine sljit macros. */
469 #undef CMP
470
471 /* Used for accessing the elements of the stack. */
472 #define STACK(i) ((-(i) - 1) * (int)sizeof(sljit_sw))
473
474 #define TMP1 SLJIT_R0
475 #define TMP2 SLJIT_R2
476 #define TMP3 SLJIT_R3
477 #define STR_PTR SLJIT_S0
478 #define STR_END SLJIT_S1
479 #define STACK_TOP SLJIT_R1
480 #define STACK_LIMIT SLJIT_S2
481 #define COUNT_MATCH SLJIT_S3
482 #define ARGUMENTS SLJIT_S4
483 #define RETURN_ADDR SLJIT_R4
484
485 /* Local space layout. */
486 /* These two locals can be used by the current opcode. */
487 #define LOCALS0 (0 * sizeof(sljit_sw))
488 #define LOCALS1 (1 * sizeof(sljit_sw))
489 /* Two local variables for possessive quantifiers (char1 cannot use them). */
490 #define POSSESSIVE0 (2 * sizeof(sljit_sw))
491 #define POSSESSIVE1 (3 * sizeof(sljit_sw))
492 /* Max limit of recursions. */
493 #define LIMIT_MATCH (4 * sizeof(sljit_sw))
494 /* The output vector is stored on the stack, and contains pointers
495 to characters. The vector data is divided into two groups: the first
496 group contains the start / end character pointers, and the second is
497 the start pointers when the end of the capturing group has not yet reached. */
498 #define OVECTOR_START (common->ovector_start)
499 #define OVECTOR(i) (OVECTOR_START + (i) * (sljit_sw)sizeof(sljit_sw))
500 #define OVECTOR_PRIV(i) (common->cbra_ptr + (i) * (sljit_sw)sizeof(sljit_sw))
501 #define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start])
502
503 #if defined COMPILE_PCRE8
504 #define MOV_UCHAR SLJIT_MOV_UB
505 #define MOVU_UCHAR SLJIT_MOVU_UB
506 #elif defined COMPILE_PCRE16
507 #define MOV_UCHAR SLJIT_MOV_UH
508 #define MOVU_UCHAR SLJIT_MOVU_UH
509 #elif defined COMPILE_PCRE32
510 #define MOV_UCHAR SLJIT_MOV_UI
511 #define MOVU_UCHAR SLJIT_MOVU_UI
512 #else
513 #error Unsupported compiling mode
514 #endif
515
516 /* Shortcuts. */
517 #define DEFINE_COMPILER \
518 struct sljit_compiler *compiler = common->compiler
519 #define OP1(op, dst, dstw, src, srcw) \
520 sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw))
521 #define OP2(op, dst, dstw, src1, src1w, src2, src2w) \
522 sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w))
523 #define LABEL() \
524 sljit_emit_label(compiler)
525 #define JUMP(type) \
526 sljit_emit_jump(compiler, (type))
527 #define JUMPTO(type, label) \
528 sljit_set_label(sljit_emit_jump(compiler, (type)), (label))
529 #define JUMPHERE(jump) \
530 sljit_set_label((jump), sljit_emit_label(compiler))
531 #define SET_LABEL(jump, label) \
532 sljit_set_label((jump), (label))
533 #define CMP(type, src1, src1w, src2, src2w) \
534 sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))
535 #define CMPTO(type, src1, src1w, src2, src2w, label) \
536 sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))
537 #define OP_FLAGS(op, dst, dstw, src, srcw, type) \
538 sljit_emit_op_flags(compiler, (op), (dst), (dstw), (src), (srcw), (type))
539 #define GET_LOCAL_BASE(dst, dstw, offset) \
540 sljit_get_local_base(compiler, (dst), (dstw), (offset))
541
542 #define READ_CHAR_MAX 0x7fffffff
543
544 static pcre_uchar *bracketend(pcre_uchar *cc)
545 {
546 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
547 do cc += GET(cc, 1); while (*cc == OP_ALT);
548 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
549 cc += 1 + LINK_SIZE;
550 return cc;
551 }
552
553 static int no_alternatives(pcre_uchar *cc)
554 {
555 int count = 0;
556 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
557 do
558 {
559 cc += GET(cc, 1);
560 count++;
561 }
562 while (*cc == OP_ALT);
563 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
564 return count;
565 }
566
567 static int ones_in_half_byte[16] = {
568 /* 0 */ 0, 1, 1, 2, /* 4 */ 1, 2, 2, 3,
569 /* 8 */ 1, 2, 2, 3, /* 12 */ 2, 3, 3, 4
570 };
571
572 /* Functions whose might need modification for all new supported opcodes:
573 next_opcode
574 check_opcode_types
575 set_private_data_ptrs
576 get_framesize
577 init_frame
578 get_private_data_copy_length
579 copy_private_data
580 compile_matchingpath
581 compile_backtrackingpath
582 */
583
584 static pcre_uchar *next_opcode(compiler_common *common, pcre_uchar *cc)
585 {
586 SLJIT_UNUSED_ARG(common);
587 switch(*cc)
588 {
589 case OP_SOD:
590 case OP_SOM:
591 case OP_SET_SOM:
592 case OP_NOT_WORD_BOUNDARY:
593 case OP_WORD_BOUNDARY:
594 case OP_NOT_DIGIT:
595 case OP_DIGIT:
596 case OP_NOT_WHITESPACE:
597 case OP_WHITESPACE:
598 case OP_NOT_WORDCHAR:
599 case OP_WORDCHAR:
600 case OP_ANY:
601 case OP_ALLANY:
602 case OP_NOTPROP:
603 case OP_PROP:
604 case OP_ANYNL:
605 case OP_NOT_HSPACE:
606 case OP_HSPACE:
607 case OP_NOT_VSPACE:
608 case OP_VSPACE:
609 case OP_EXTUNI:
610 case OP_EODN:
611 case OP_EOD:
612 case OP_CIRC:
613 case OP_CIRCM:
614 case OP_DOLL:
615 case OP_DOLLM:
616 case OP_CRSTAR:
617 case OP_CRMINSTAR:
618 case OP_CRPLUS:
619 case OP_CRMINPLUS:
620 case OP_CRQUERY:
621 case OP_CRMINQUERY:
622 case OP_CRRANGE:
623 case OP_CRMINRANGE:
624 case OP_CRPOSSTAR:
625 case OP_CRPOSPLUS:
626 case OP_CRPOSQUERY:
627 case OP_CRPOSRANGE:
628 case OP_CLASS:
629 case OP_NCLASS:
630 case OP_REF:
631 case OP_REFI:
632 case OP_DNREF:
633 case OP_DNREFI:
634 case OP_RECURSE:
635 case OP_CALLOUT:
636 case OP_ALT:
637 case OP_KET:
638 case OP_KETRMAX:
639 case OP_KETRMIN:
640 case OP_KETRPOS:
641 case OP_REVERSE:
642 case OP_ASSERT:
643 case OP_ASSERT_NOT:
644 case OP_ASSERTBACK:
645 case OP_ASSERTBACK_NOT:
646 case OP_ONCE:
647 case OP_ONCE_NC:
648 case OP_BRA:
649 case OP_BRAPOS:
650 case OP_CBRA:
651 case OP_CBRAPOS:
652 case OP_COND:
653 case OP_SBRA:
654 case OP_SBRAPOS:
655 case OP_SCBRA:
656 case OP_SCBRAPOS:
657 case OP_SCOND:
658 case OP_CREF:
659 case OP_DNCREF:
660 case OP_RREF:
661 case OP_DNRREF:
662 case OP_DEF:
663 case OP_BRAZERO:
664 case OP_BRAMINZERO:
665 case OP_BRAPOSZERO:
666 case OP_PRUNE:
667 case OP_SKIP:
668 case OP_THEN:
669 case OP_COMMIT:
670 case OP_FAIL:
671 case OP_ACCEPT:
672 case OP_ASSERT_ACCEPT:
673 case OP_CLOSE:
674 case OP_SKIPZERO:
675 return cc + PRIV(OP_lengths)[*cc];
676
677 case OP_CHAR:
678 case OP_CHARI:
679 case OP_NOT:
680 case OP_NOTI:
681 case OP_STAR:
682 case OP_MINSTAR:
683 case OP_PLUS:
684 case OP_MINPLUS:
685 case OP_QUERY:
686 case OP_MINQUERY:
687 case OP_UPTO:
688 case OP_MINUPTO:
689 case OP_EXACT:
690 case OP_POSSTAR:
691 case OP_POSPLUS:
692 case OP_POSQUERY:
693 case OP_POSUPTO:
694 case OP_STARI:
695 case OP_MINSTARI:
696 case OP_PLUSI:
697 case OP_MINPLUSI:
698 case OP_QUERYI:
699 case OP_MINQUERYI:
700 case OP_UPTOI:
701 case OP_MINUPTOI:
702 case OP_EXACTI:
703 case OP_POSSTARI:
704 case OP_POSPLUSI:
705 case OP_POSQUERYI:
706 case OP_POSUPTOI:
707 case OP_NOTSTAR:
708 case OP_NOTMINSTAR:
709 case OP_NOTPLUS:
710 case OP_NOTMINPLUS:
711 case OP_NOTQUERY:
712 case OP_NOTMINQUERY:
713 case OP_NOTUPTO:
714 case OP_NOTMINUPTO:
715 case OP_NOTEXACT:
716 case OP_NOTPOSSTAR:
717 case OP_NOTPOSPLUS:
718 case OP_NOTPOSQUERY:
719 case OP_NOTPOSUPTO:
720 case OP_NOTSTARI:
721 case OP_NOTMINSTARI:
722 case OP_NOTPLUSI:
723 case OP_NOTMINPLUSI:
724 case OP_NOTQUERYI:
725 case OP_NOTMINQUERYI:
726 case OP_NOTUPTOI:
727 case OP_NOTMINUPTOI:
728 case OP_NOTEXACTI:
729 case OP_NOTPOSSTARI:
730 case OP_NOTPOSPLUSI:
731 case OP_NOTPOSQUERYI:
732 case OP_NOTPOSUPTOI:
733 cc += PRIV(OP_lengths)[*cc];
734 #ifdef SUPPORT_UTF
735 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
736 #endif
737 return cc;
738
739 /* Special cases. */
740 case OP_TYPESTAR:
741 case OP_TYPEMINSTAR:
742 case OP_TYPEPLUS:
743 case OP_TYPEMINPLUS:
744 case OP_TYPEQUERY:
745 case OP_TYPEMINQUERY:
746 case OP_TYPEUPTO:
747 case OP_TYPEMINUPTO:
748 case OP_TYPEEXACT:
749 case OP_TYPEPOSSTAR:
750 case OP_TYPEPOSPLUS:
751 case OP_TYPEPOSQUERY:
752 case OP_TYPEPOSUPTO:
753 return cc + PRIV(OP_lengths)[*cc] - 1;
754
755 case OP_ANYBYTE:
756 #ifdef SUPPORT_UTF
757 if (common->utf) return NULL;
758 #endif
759 return cc + 1;
760
761 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
762 case OP_XCLASS:
763 return cc + GET(cc, 1);
764 #endif
765
766 case OP_MARK:
767 case OP_PRUNE_ARG:
768 case OP_SKIP_ARG:
769 case OP_THEN_ARG:
770 return cc + 1 + 2 + cc[1];
771
772 default:
773 /* All opcodes are supported now! */
774 SLJIT_ASSERT_STOP();
775 return NULL;
776 }
777 }
778
779 static BOOL check_opcode_types(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
780 {
781 int count;
782 pcre_uchar *slot;
783
784 /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
785 while (cc < ccend)
786 {
787 switch(*cc)
788 {
789 case OP_SET_SOM:
790 common->has_set_som = TRUE;
791 common->might_be_empty = TRUE;
792 cc += 1;
793 break;
794
795 case OP_REF:
796 case OP_REFI:
797 common->optimized_cbracket[GET2(cc, 1)] = 0;
798 cc += 1 + IMM2_SIZE;
799 break;
800
801 case OP_CBRAPOS:
802 case OP_SCBRAPOS:
803 common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] = 0;
804 cc += 1 + LINK_SIZE + IMM2_SIZE;
805 break;
806
807 case OP_COND:
808 case OP_SCOND:
809 /* Only AUTO_CALLOUT can insert this opcode. We do
810 not intend to support this case. */
811 if (cc[1 + LINK_SIZE] == OP_CALLOUT)
812 return FALSE;
813 cc += 1 + LINK_SIZE;
814 break;
815
816 case OP_CREF:
817 common->optimized_cbracket[GET2(cc, 1)] = 0;
818 cc += 1 + IMM2_SIZE;
819 break;
820
821 case OP_DNREF:
822 case OP_DNREFI:
823 case OP_DNCREF:
824 count = GET2(cc, 1 + IMM2_SIZE);
825 slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
826 while (count-- > 0)
827 {
828 common->optimized_cbracket[GET2(slot, 0)] = 0;
829 slot += common->name_entry_size;
830 }
831 cc += 1 + 2 * IMM2_SIZE;
832 break;
833
834 case OP_RECURSE:
835 /* Set its value only once. */
836 if (common->recursive_head_ptr == 0)
837 {
838 common->recursive_head_ptr = common->ovector_start;
839 common->ovector_start += sizeof(sljit_sw);
840 }
841 cc += 1 + LINK_SIZE;
842 break;
843
844 case OP_CALLOUT:
845 if (common->capture_last_ptr == 0)
846 {
847 common->capture_last_ptr = common->ovector_start;
848 common->ovector_start += sizeof(sljit_sw);
849 }
850 cc += 2 + 2 * LINK_SIZE;
851 break;
852
853 case OP_THEN_ARG:
854 common->has_then = TRUE;
855 common->control_head_ptr = 1;
856 /* Fall through. */
857
858 case OP_PRUNE_ARG:
859 common->needs_start_ptr = TRUE;
860 /* Fall through. */
861
862 case OP_MARK:
863 if (common->mark_ptr == 0)
864 {
865 common->mark_ptr = common->ovector_start;
866 common->ovector_start += sizeof(sljit_sw);
867 }
868 cc += 1 + 2 + cc[1];
869 break;
870
871 case OP_THEN:
872 common->has_then = TRUE;
873 common->control_head_ptr = 1;
874 /* Fall through. */
875
876 case OP_PRUNE:
877 case OP_SKIP:
878 common->needs_start_ptr = TRUE;
879 cc += 1;
880 break;
881
882 case OP_SKIP_ARG:
883 common->control_head_ptr = 1;
884 common->has_skip_arg = TRUE;
885 cc += 1 + 2 + cc[1];
886 break;
887
888 default:
889 cc = next_opcode(common, cc);
890 if (cc == NULL)
891 return FALSE;
892 break;
893 }
894 }
895 return TRUE;
896 }
897
898 static int get_class_iterator_size(pcre_uchar *cc)
899 {
900 switch(*cc)
901 {
902 case OP_CRSTAR:
903 case OP_CRPLUS:
904 return 2;
905
906 case OP_CRMINSTAR:
907 case OP_CRMINPLUS:
908 case OP_CRQUERY:
909 case OP_CRMINQUERY:
910 return 1;
911
912 case OP_CRRANGE:
913 case OP_CRMINRANGE:
914 if (GET2(cc, 1) == GET2(cc, 1 + IMM2_SIZE))
915 return 0;
916 return 2;
917
918 default:
919 return 0;
920 }
921 }
922
923 static BOOL detect_repeat(compiler_common *common, pcre_uchar *begin)
924 {
925 pcre_uchar *end = bracketend(begin);
926 pcre_uchar *next;
927 pcre_uchar *next_end;
928 pcre_uchar *max_end;
929 pcre_uchar type;
930 sljit_sw length = end - begin;
931 int min, max, i;
932
933 /* Detect fixed iterations first. */
934 if (end[-(1 + LINK_SIZE)] != OP_KET)
935 return FALSE;
936
937 /* Already detected repeat. */
938 if (common->private_data_ptrs[end - common->start - LINK_SIZE] != 0)
939 return TRUE;
940
941 next = end;
942 min = 1;
943 while (1)
944 {
945 if (*next != *begin)
946 break;
947 next_end = bracketend(next);
948 if (next_end - next != length || memcmp(begin, next, IN_UCHARS(length)) != 0)
949 break;
950 next = next_end;
951 min++;
952 }
953
954 if (min == 2)
955 return FALSE;
956
957 max = 0;
958 max_end = next;
959 if (*next == OP_BRAZERO || *next == OP_BRAMINZERO)
960 {
961 type = *next;
962 while (1)
963 {
964 if (next[0] != type || next[1] != OP_BRA || next[2 + LINK_SIZE] != *begin)
965 break;
966 next_end = bracketend(next + 2 + LINK_SIZE);
967 if (next_end - next != (length + 2 + LINK_SIZE) || memcmp(begin, next + 2 + LINK_SIZE, IN_UCHARS(length)) != 0)
968 break;
969 next = next_end;
970 max++;
971 }
972
973 if (next[0] == type && next[1] == *begin && max >= 1)
974 {
975 next_end = bracketend(next + 1);
976 if (next_end - next == (length + 1) && memcmp(begin, next + 1, IN_UCHARS(length)) == 0)
977 {
978 for (i = 0; i < max; i++, next_end += 1 + LINK_SIZE)
979 if (*next_end != OP_KET)
980 break;
981
982 if (i == max)
983 {
984 common->private_data_ptrs[max_end - common->start - LINK_SIZE] = next_end - max_end;
985 common->private_data_ptrs[max_end - common->start - LINK_SIZE + 1] = (type == OP_BRAZERO) ? OP_UPTO : OP_MINUPTO;
986 /* +2 the original and the last. */
987 common->private_data_ptrs[max_end - common->start - LINK_SIZE + 2] = max + 2;
988 if (min == 1)
989 return TRUE;
990 min--;
991 max_end -= (1 + LINK_SIZE) + GET(max_end, -LINK_SIZE);
992 }
993 }
994 }
995 }
996
997 if (min >= 3)
998 {
999 common->private_data_ptrs[end - common->start - LINK_SIZE] = max_end - end;
1000 common->private_data_ptrs[end - common->start - LINK_SIZE + 1] = OP_EXACT;
1001 common->private_data_ptrs[end - common->start - LINK_SIZE + 2] = min;
1002 return TRUE;
1003 }
1004
1005 return FALSE;
1006 }
1007
1008 #define CASE_ITERATOR_PRIVATE_DATA_1 \
1009 case OP_MINSTAR: \
1010 case OP_MINPLUS: \
1011 case OP_QUERY: \
1012 case OP_MINQUERY: \
1013 case OP_MINSTARI: \
1014 case OP_MINPLUSI: \
1015 case OP_QUERYI: \
1016 case OP_MINQUERYI: \
1017 case OP_NOTMINSTAR: \
1018 case OP_NOTMINPLUS: \
1019 case OP_NOTQUERY: \
1020 case OP_NOTMINQUERY: \
1021 case OP_NOTMINSTARI: \
1022 case OP_NOTMINPLUSI: \
1023 case OP_NOTQUERYI: \
1024 case OP_NOTMINQUERYI:
1025
1026 #define CASE_ITERATOR_PRIVATE_DATA_2A \
1027 case OP_STAR: \
1028 case OP_PLUS: \
1029 case OP_STARI: \
1030 case OP_PLUSI: \
1031 case OP_NOTSTAR: \
1032 case OP_NOTPLUS: \
1033 case OP_NOTSTARI: \
1034 case OP_NOTPLUSI:
1035
1036 #define CASE_ITERATOR_PRIVATE_DATA_2B \
1037 case OP_UPTO: \
1038 case OP_MINUPTO: \
1039 case OP_UPTOI: \
1040 case OP_MINUPTOI: \
1041 case OP_NOTUPTO: \
1042 case OP_NOTMINUPTO: \
1043 case OP_NOTUPTOI: \
1044 case OP_NOTMINUPTOI:
1045
1046 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_1 \
1047 case OP_TYPEMINSTAR: \
1048 case OP_TYPEMINPLUS: \
1049 case OP_TYPEQUERY: \
1050 case OP_TYPEMINQUERY:
1051
1052 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2A \
1053 case OP_TYPESTAR: \
1054 case OP_TYPEPLUS:
1055
1056 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2B \
1057 case OP_TYPEUPTO: \
1058 case OP_TYPEMINUPTO:
1059
1060 static void set_private_data_ptrs(compiler_common *common, int *private_data_start, pcre_uchar *ccend)
1061 {
1062 pcre_uchar *cc = common->start;
1063 pcre_uchar *alternative;
1064 pcre_uchar *end = NULL;
1065 int private_data_ptr = *private_data_start;
1066 int space, size, bracketlen;
1067
1068 while (cc < ccend)
1069 {
1070 space = 0;
1071 size = 0;
1072 bracketlen = 0;
1073 if (private_data_ptr > SLJIT_MAX_LOCAL_SIZE)
1074 break;
1075
1076 if (*cc == OP_ONCE || *cc == OP_ONCE_NC || *cc == OP_BRA || *cc == OP_CBRA || *cc == OP_COND)
1077 if (detect_repeat(common, cc))
1078 {
1079 /* These brackets are converted to repeats, so no global
1080 based single character repeat is allowed. */
1081 if (cc >= end)
1082 end = bracketend(cc);
1083 }
1084
1085 switch(*cc)
1086 {
1087 case OP_KET:
1088 if (common->private_data_ptrs[cc + 1 - common->start] != 0)
1089 {
1090 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1091 private_data_ptr += sizeof(sljit_sw);
1092 cc += common->private_data_ptrs[cc + 1 - common->start];
1093 }
1094 cc += 1 + LINK_SIZE;
1095 break;
1096
1097 case OP_ASSERT:
1098 case OP_ASSERT_NOT:
1099 case OP_ASSERTBACK:
1100 case OP_ASSERTBACK_NOT:
1101 case OP_ONCE:
1102 case OP_ONCE_NC:
1103 case OP_BRAPOS:
1104 case OP_SBRA:
1105 case OP_SBRAPOS:
1106 case OP_SCOND:
1107 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1108 private_data_ptr += sizeof(sljit_sw);
1109 bracketlen = 1 + LINK_SIZE;
1110 break;
1111
1112 case OP_CBRAPOS:
1113 case OP_SCBRAPOS:
1114 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1115 private_data_ptr += sizeof(sljit_sw);
1116 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1117 break;
1118
1119 case OP_COND:
1120 /* Might be a hidden SCOND. */
1121 alternative = cc + GET(cc, 1);
1122 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1123 {
1124 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1125 private_data_ptr += sizeof(sljit_sw);
1126 }
1127 bracketlen = 1 + LINK_SIZE;
1128 break;
1129
1130 case OP_BRA:
1131 bracketlen = 1 + LINK_SIZE;
1132 break;
1133
1134 case OP_CBRA:
1135 case OP_SCBRA:
1136 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1137 break;
1138
1139 CASE_ITERATOR_PRIVATE_DATA_1
1140 space = 1;
1141 size = -2;
1142 break;
1143
1144 CASE_ITERATOR_PRIVATE_DATA_2A
1145 space = 2;
1146 size = -2;
1147 break;
1148
1149 CASE_ITERATOR_PRIVATE_DATA_2B
1150 space = 2;
1151 size = -(2 + IMM2_SIZE);
1152 break;
1153
1154 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1155 space = 1;
1156 size = 1;
1157 break;
1158
1159 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1160 if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
1161 space = 2;
1162 size = 1;
1163 break;
1164
1165 case OP_TYPEUPTO:
1166 if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
1167 space = 2;
1168 size = 1 + IMM2_SIZE;
1169 break;
1170
1171 case OP_TYPEMINUPTO:
1172 space = 2;
1173 size = 1 + IMM2_SIZE;
1174 break;
1175
1176 case OP_CLASS:
1177 case OP_NCLASS:
1178 size += 1 + 32 / sizeof(pcre_uchar);
1179 space = get_class_iterator_size(cc + size);
1180 break;
1181
1182 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1183 case OP_XCLASS:
1184 size = GET(cc, 1);
1185 space = get_class_iterator_size(cc + size);
1186 break;
1187 #endif
1188
1189 default:
1190 cc = next_opcode(common, cc);
1191 SLJIT_ASSERT(cc != NULL);
1192 break;
1193 }
1194
1195 /* Character iterators, which are not inside a repeated bracket,
1196 gets a private slot instead of allocating it on the stack. */
1197 if (space > 0 && cc >= end)
1198 {
1199 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1200 private_data_ptr += sizeof(sljit_sw) * space;
1201 }
1202
1203 if (size != 0)
1204 {
1205 if (size < 0)
1206 {
1207 cc += -size;
1208 #ifdef SUPPORT_UTF
1209 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1210 #endif
1211 }
1212 else
1213 cc += size;
1214 }
1215
1216 if (bracketlen > 0)
1217 {
1218 if (cc >= end)
1219 {
1220 end = bracketend(cc);
1221 if (end[-1 - LINK_SIZE] == OP_KET)
1222 end = NULL;
1223 }
1224 cc += bracketlen;
1225 }
1226 }
1227 *private_data_start = private_data_ptr;
1228 }
1229
1230 /* Returns with a frame_types (always < 0) if no need for frame. */
1231 static int get_framesize(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL recursive, BOOL *needs_control_head)
1232 {
1233 int length = 0;
1234 int possessive = 0;
1235 BOOL stack_restore = FALSE;
1236 BOOL setsom_found = recursive;
1237 BOOL setmark_found = recursive;
1238 /* The last capture is a local variable even for recursions. */
1239 BOOL capture_last_found = FALSE;
1240
1241 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
1242 SLJIT_ASSERT(common->control_head_ptr != 0);
1243 *needs_control_head = TRUE;
1244 #else
1245 *needs_control_head = FALSE;
1246 #endif
1247
1248 if (ccend == NULL)
1249 {
1250 ccend = bracketend(cc) - (1 + LINK_SIZE);
1251 if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))
1252 {
1253 possessive = length = (common->capture_last_ptr != 0) ? 5 : 3;
1254 /* This is correct regardless of common->capture_last_ptr. */
1255 capture_last_found = TRUE;
1256 }
1257 cc = next_opcode(common, cc);
1258 }
1259
1260 SLJIT_ASSERT(cc != NULL);
1261 while (cc < ccend)
1262 switch(*cc)
1263 {
1264 case OP_SET_SOM:
1265 SLJIT_ASSERT(common->has_set_som);
1266 stack_restore = TRUE;
1267 if (!setsom_found)
1268 {
1269 length += 2;
1270 setsom_found = TRUE;
1271 }
1272 cc += 1;
1273 break;
1274
1275 case OP_MARK:
1276 case OP_PRUNE_ARG:
1277 case OP_THEN_ARG:
1278 SLJIT_ASSERT(common->mark_ptr != 0);
1279 stack_restore = TRUE;
1280 if (!setmark_found)
1281 {
1282 length += 2;
1283 setmark_found = TRUE;
1284 }
1285 if (common->control_head_ptr != 0)
1286 *needs_control_head = TRUE;
1287 cc += 1 + 2 + cc[1];
1288 break;
1289
1290 case OP_RECURSE:
1291 stack_restore = TRUE;
1292 if (common->has_set_som && !setsom_found)
1293 {
1294 length += 2;
1295 setsom_found = TRUE;
1296 }
1297 if (common->mark_ptr != 0 && !setmark_found)
1298 {
1299 length += 2;
1300 setmark_found = TRUE;
1301 }
1302 if (common->capture_last_ptr != 0 && !capture_last_found)
1303 {
1304 length += 2;
1305 capture_last_found = TRUE;
1306 }
1307 cc += 1 + LINK_SIZE;
1308 break;
1309
1310 case OP_CBRA:
1311 case OP_CBRAPOS:
1312 case OP_SCBRA:
1313 case OP_SCBRAPOS:
1314 stack_restore = TRUE;
1315 if (common->capture_last_ptr != 0 && !capture_last_found)
1316 {
1317 length += 2;
1318 capture_last_found = TRUE;
1319 }
1320 length += 3;
1321 cc += 1 + LINK_SIZE + IMM2_SIZE;
1322 break;
1323
1324 case OP_THEN:
1325 stack_restore = TRUE;
1326 if (common->control_head_ptr != 0)
1327 *needs_control_head = TRUE;
1328 cc ++;
1329 break;
1330
1331 default:
1332 stack_restore = TRUE;
1333 /* Fall through. */
1334
1335 case OP_NOT_WORD_BOUNDARY:
1336 case OP_WORD_BOUNDARY:
1337 case OP_NOT_DIGIT:
1338 case OP_DIGIT:
1339 case OP_NOT_WHITESPACE:
1340 case OP_WHITESPACE:
1341 case OP_NOT_WORDCHAR:
1342 case OP_WORDCHAR:
1343 case OP_ANY:
1344 case OP_ALLANY:
1345 case OP_ANYBYTE:
1346 case OP_NOTPROP:
1347 case OP_PROP:
1348 case OP_ANYNL:
1349 case OP_NOT_HSPACE:
1350 case OP_HSPACE:
1351 case OP_NOT_VSPACE:
1352 case OP_VSPACE:
1353 case OP_EXTUNI:
1354 case OP_EODN:
1355 case OP_EOD:
1356 case OP_CIRC:
1357 case OP_CIRCM:
1358 case OP_DOLL:
1359 case OP_DOLLM:
1360 case OP_CHAR:
1361 case OP_CHARI:
1362 case OP_NOT:
1363 case OP_NOTI:
1364
1365 case OP_EXACT:
1366 case OP_POSSTAR:
1367 case OP_POSPLUS:
1368 case OP_POSQUERY:
1369 case OP_POSUPTO:
1370
1371 case OP_EXACTI:
1372 case OP_POSSTARI:
1373 case OP_POSPLUSI:
1374 case OP_POSQUERYI:
1375 case OP_POSUPTOI:
1376
1377 case OP_NOTEXACT:
1378 case OP_NOTPOSSTAR:
1379 case OP_NOTPOSPLUS:
1380 case OP_NOTPOSQUERY:
1381 case OP_NOTPOSUPTO:
1382
1383 case OP_NOTEXACTI:
1384 case OP_NOTPOSSTARI:
1385 case OP_NOTPOSPLUSI:
1386 case OP_NOTPOSQUERYI:
1387 case OP_NOTPOSUPTOI:
1388
1389 case OP_TYPEEXACT:
1390 case OP_TYPEPOSSTAR:
1391 case OP_TYPEPOSPLUS:
1392 case OP_TYPEPOSQUERY:
1393 case OP_TYPEPOSUPTO:
1394
1395 case OP_CLASS:
1396 case OP_NCLASS:
1397 case OP_XCLASS:
1398
1399 cc = next_opcode(common, cc);
1400 SLJIT_ASSERT(cc != NULL);
1401 break;
1402 }
1403
1404 /* Possessive quantifiers can use a special case. */
1405 if (SLJIT_UNLIKELY(possessive == length))
1406 return stack_restore ? no_frame : no_stack;
1407
1408 if (length > 0)
1409 return length + 1;
1410 return stack_restore ? no_frame : no_stack;
1411 }
1412
1413 static void init_frame(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, int stackpos, int stacktop, BOOL recursive)
1414 {
1415 DEFINE_COMPILER;
1416 BOOL setsom_found = recursive;
1417 BOOL setmark_found = recursive;
1418 /* The last capture is a local variable even for recursions. */
1419 BOOL capture_last_found = FALSE;
1420 int offset;
1421
1422 /* >= 1 + shortest item size (2) */
1423 SLJIT_UNUSED_ARG(stacktop);
1424 SLJIT_ASSERT(stackpos >= stacktop + 2);
1425
1426 stackpos = STACK(stackpos);
1427 if (ccend == NULL)
1428 {
1429 ccend = bracketend(cc) - (1 + LINK_SIZE);
1430 if (recursive || (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS))
1431 cc = next_opcode(common, cc);
1432 }
1433
1434 SLJIT_ASSERT(cc != NULL);
1435 while (cc < ccend)
1436 switch(*cc)
1437 {
1438 case OP_SET_SOM:
1439 SLJIT_ASSERT(common->has_set_som);
1440 if (!setsom_found)
1441 {
1442 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
1443 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1444 stackpos += (int)sizeof(sljit_sw);
1445 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1446 stackpos += (int)sizeof(sljit_sw);
1447 setsom_found = TRUE;
1448 }
1449 cc += 1;
1450 break;
1451
1452 case OP_MARK:
1453 case OP_PRUNE_ARG:
1454 case OP_THEN_ARG:
1455 SLJIT_ASSERT(common->mark_ptr != 0);
1456 if (!setmark_found)
1457 {
1458 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
1459 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1460 stackpos += (int)sizeof(sljit_sw);
1461 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1462 stackpos += (int)sizeof(sljit_sw);
1463 setmark_found = TRUE;
1464 }
1465 cc += 1 + 2 + cc[1];
1466 break;
1467
1468 case OP_RECURSE:
1469 if (common->has_set_som && !setsom_found)
1470 {
1471 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
1472 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1473 stackpos += (int)sizeof(sljit_sw);
1474 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1475 stackpos += (int)sizeof(sljit_sw);
1476 setsom_found = TRUE;
1477 }
1478 if (common->mark_ptr != 0 && !setmark_found)
1479 {
1480 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
1481 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1482 stackpos += (int)sizeof(sljit_sw);
1483 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1484 stackpos += (int)sizeof(sljit_sw);
1485 setmark_found = TRUE;
1486 }
1487 if (common->capture_last_ptr != 0 && !capture_last_found)
1488 {
1489 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
1490 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1491 stackpos += (int)sizeof(sljit_sw);
1492 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1493 stackpos += (int)sizeof(sljit_sw);
1494 capture_last_found = TRUE;
1495 }
1496 cc += 1 + LINK_SIZE;
1497 break;
1498
1499 case OP_CBRA:
1500 case OP_CBRAPOS:
1501 case OP_SCBRA:
1502 case OP_SCBRAPOS:
1503 if (common->capture_last_ptr != 0 && !capture_last_found)
1504 {
1505 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
1506 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1507 stackpos += (int)sizeof(sljit_sw);
1508 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1509 stackpos += (int)sizeof(sljit_sw);
1510 capture_last_found = TRUE;
1511 }
1512 offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
1513 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));
1514 stackpos += (int)sizeof(sljit_sw);
1515 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
1516 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
1517 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1518 stackpos += (int)sizeof(sljit_sw);
1519 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
1520 stackpos += (int)sizeof(sljit_sw);
1521
1522 cc += 1 + LINK_SIZE + IMM2_SIZE;
1523 break;
1524
1525 default:
1526 cc = next_opcode(common, cc);
1527 SLJIT_ASSERT(cc != NULL);
1528 break;
1529 }
1530
1531 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, 0);
1532 SLJIT_ASSERT(stackpos == STACK(stacktop));
1533 }
1534
1535 static SLJIT_INLINE int get_private_data_copy_length(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL needs_control_head)
1536 {
1537 int private_data_length = needs_control_head ? 3 : 2;
1538 int size;
1539 pcre_uchar *alternative;
1540 /* Calculate the sum of the private machine words. */
1541 while (cc < ccend)
1542 {
1543 size = 0;
1544 switch(*cc)
1545 {
1546 case OP_KET:
1547 if (PRIVATE_DATA(cc) != 0)
1548 {
1549 private_data_length++;
1550 SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0);
1551 cc += PRIVATE_DATA(cc + 1);
1552 }
1553 cc += 1 + LINK_SIZE;
1554 break;
1555
1556 case OP_ASSERT:
1557 case OP_ASSERT_NOT:
1558 case OP_ASSERTBACK:
1559 case OP_ASSERTBACK_NOT:
1560 case OP_ONCE:
1561 case OP_ONCE_NC:
1562 case OP_BRAPOS:
1563 case OP_SBRA:
1564 case OP_SBRAPOS:
1565 case OP_SCOND:
1566 private_data_length++;
1567 SLJIT_ASSERT(PRIVATE_DATA(cc) != 0);
1568 cc += 1 + LINK_SIZE;
1569 break;
1570
1571 case OP_CBRA:
1572 case OP_SCBRA:
1573 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1574 private_data_length++;
1575 cc += 1 + LINK_SIZE + IMM2_SIZE;
1576 break;
1577
1578 case OP_CBRAPOS:
1579 case OP_SCBRAPOS:
1580 private_data_length += 2;
1581 cc += 1 + LINK_SIZE + IMM2_SIZE;
1582 break;
1583
1584 case OP_COND:
1585 /* Might be a hidden SCOND. */
1586 alternative = cc + GET(cc, 1);
1587 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1588 private_data_length++;
1589 cc += 1 + LINK_SIZE;
1590 break;
1591
1592 CASE_ITERATOR_PRIVATE_DATA_1
1593 if (PRIVATE_DATA(cc))
1594 private_data_length++;
1595 cc += 2;
1596 #ifdef SUPPORT_UTF
1597 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1598 #endif
1599 break;
1600
1601 CASE_ITERATOR_PRIVATE_DATA_2A
1602 if (PRIVATE_DATA(cc))
1603 private_data_length += 2;
1604 cc += 2;
1605 #ifdef SUPPORT_UTF
1606 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1607 #endif
1608 break;
1609
1610 CASE_ITERATOR_PRIVATE_DATA_2B
1611 if (PRIVATE_DATA(cc))
1612 private_data_length += 2;
1613 cc += 2 + IMM2_SIZE;
1614 #ifdef SUPPORT_UTF
1615 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1616 #endif
1617 break;
1618
1619 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1620 if (PRIVATE_DATA(cc))
1621 private_data_length++;
1622 cc += 1;
1623 break;
1624
1625 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1626 if (PRIVATE_DATA(cc))
1627 private_data_length += 2;
1628 cc += 1;
1629 break;
1630
1631 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1632 if (PRIVATE_DATA(cc))
1633 private_data_length += 2;
1634 cc += 1 + IMM2_SIZE;
1635 break;
1636
1637 case OP_CLASS:
1638 case OP_NCLASS:
1639 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1640 case OP_XCLASS:
1641 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1642 #else
1643 size = 1 + 32 / (int)sizeof(pcre_uchar);
1644 #endif
1645 if (PRIVATE_DATA(cc))
1646 private_data_length += get_class_iterator_size(cc + size);
1647 cc += size;
1648 break;
1649
1650 default:
1651 cc = next_opcode(common, cc);
1652 SLJIT_ASSERT(cc != NULL);
1653 break;
1654 }
1655 }
1656 SLJIT_ASSERT(cc == ccend);
1657 return private_data_length;
1658 }
1659
1660 static void copy_private_data(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend,
1661 BOOL save, int stackptr, int stacktop, BOOL needs_control_head)
1662 {
1663 DEFINE_COMPILER;
1664 int srcw[2];
1665 int count, size;
1666 BOOL tmp1next = TRUE;
1667 BOOL tmp1empty = TRUE;
1668 BOOL tmp2empty = TRUE;
1669 pcre_uchar *alternative;
1670 enum {
1671 start,
1672 loop,
1673 end
1674 } status;
1675
1676 status = save ? start : loop;
1677 stackptr = STACK(stackptr - 2);
1678 stacktop = STACK(stacktop - 1);
1679
1680 if (!save)
1681 {
1682 stackptr += (needs_control_head ? 2 : 1) * sizeof(sljit_sw);
1683 if (stackptr < stacktop)
1684 {
1685 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1686 stackptr += sizeof(sljit_sw);
1687 tmp1empty = FALSE;
1688 }
1689 if (stackptr < stacktop)
1690 {
1691 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1692 stackptr += sizeof(sljit_sw);
1693 tmp2empty = FALSE;
1694 }
1695 /* The tmp1next must be TRUE in either way. */
1696 }
1697
1698 do
1699 {
1700 count = 0;
1701 switch(status)
1702 {
1703 case start:
1704 SLJIT_ASSERT(save && common->recursive_head_ptr != 0);
1705 count = 1;
1706 srcw[0] = common->recursive_head_ptr;
1707 if (needs_control_head)
1708 {
1709 SLJIT_ASSERT(common->control_head_ptr != 0);
1710 count = 2;
1711 srcw[1] = common->control_head_ptr;
1712 }
1713 status = loop;
1714 break;
1715
1716 case loop:
1717 if (cc >= ccend)
1718 {
1719 status = end;
1720 break;
1721 }
1722
1723 switch(*cc)
1724 {
1725 case OP_KET:
1726 if (PRIVATE_DATA(cc) != 0)
1727 {
1728 count = 1;
1729 srcw[0] = PRIVATE_DATA(cc);
1730 SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0);
1731 cc += PRIVATE_DATA(cc + 1);
1732 }
1733 cc += 1 + LINK_SIZE;
1734 break;
1735
1736 case OP_ASSERT:
1737 case OP_ASSERT_NOT:
1738 case OP_ASSERTBACK:
1739 case OP_ASSERTBACK_NOT:
1740 case OP_ONCE:
1741 case OP_ONCE_NC:
1742 case OP_BRAPOS:
1743 case OP_SBRA:
1744 case OP_SBRAPOS:
1745 case OP_SCOND:
1746 count = 1;
1747 srcw[0] = PRIVATE_DATA(cc);
1748 SLJIT_ASSERT(srcw[0] != 0);
1749 cc += 1 + LINK_SIZE;
1750 break;
1751
1752 case OP_CBRA:
1753 case OP_SCBRA:
1754 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1755 {
1756 count = 1;
1757 srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1758 }
1759 cc += 1 + LINK_SIZE + IMM2_SIZE;
1760 break;
1761
1762 case OP_CBRAPOS:
1763 case OP_SCBRAPOS:
1764 count = 2;
1765 srcw[0] = PRIVATE_DATA(cc);
1766 srcw[1] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1767 SLJIT_ASSERT(srcw[0] != 0 && srcw[1] != 0);
1768 cc += 1 + LINK_SIZE + IMM2_SIZE;
1769 break;
1770
1771 case OP_COND:
1772 /* Might be a hidden SCOND. */
1773 alternative = cc + GET(cc, 1);
1774 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1775 {
1776 count = 1;
1777 srcw[0] = PRIVATE_DATA(cc);
1778 SLJIT_ASSERT(srcw[0] != 0);
1779 }
1780 cc += 1 + LINK_SIZE;
1781 break;
1782
1783 CASE_ITERATOR_PRIVATE_DATA_1
1784 if (PRIVATE_DATA(cc))
1785 {
1786 count = 1;
1787 srcw[0] = PRIVATE_DATA(cc);
1788 }
1789 cc += 2;
1790 #ifdef SUPPORT_UTF
1791 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1792 #endif
1793 break;
1794
1795 CASE_ITERATOR_PRIVATE_DATA_2A
1796 if (PRIVATE_DATA(cc))
1797 {
1798 count = 2;
1799 srcw[0] = PRIVATE_DATA(cc);
1800 srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
1801 }
1802 cc += 2;
1803 #ifdef SUPPORT_UTF
1804 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1805 #endif
1806 break;
1807
1808 CASE_ITERATOR_PRIVATE_DATA_2B
1809 if (PRIVATE_DATA(cc))
1810 {
1811 count = 2;
1812 srcw[0] = PRIVATE_DATA(cc);
1813 srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
1814 }
1815 cc += 2 + IMM2_SIZE;
1816 #ifdef SUPPORT_UTF
1817 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1818 #endif
1819 break;
1820
1821 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1822 if (PRIVATE_DATA(cc))
1823 {
1824 count = 1;
1825 srcw[0] = PRIVATE_DATA(cc);
1826 }
1827 cc += 1;
1828 break;
1829
1830 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1831 if (PRIVATE_DATA(cc))
1832 {
1833 count = 2;
1834 srcw[0] = PRIVATE_DATA(cc);
1835 srcw[1] = srcw[0] + sizeof(sljit_sw);
1836 }
1837 cc += 1;
1838 break;
1839
1840 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1841 if (PRIVATE_DATA(cc))
1842 {
1843 count = 2;
1844 srcw[0] = PRIVATE_DATA(cc);
1845 srcw[1] = srcw[0] + sizeof(sljit_sw);
1846 }
1847 cc += 1 + IMM2_SIZE;
1848 break;
1849
1850 case OP_CLASS:
1851 case OP_NCLASS:
1852 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1853 case OP_XCLASS:
1854 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1855 #else
1856 size = 1 + 32 / (int)sizeof(pcre_uchar);
1857 #endif
1858 if (PRIVATE_DATA(cc))
1859 switch(get_class_iterator_size(cc + size))
1860 {
1861 case 1:
1862 count = 1;
1863 srcw[0] = PRIVATE_DATA(cc);
1864 break;
1865
1866 case 2:
1867 count = 2;
1868 srcw[0] = PRIVATE_DATA(cc);
1869 srcw[1] = srcw[0] + sizeof(sljit_sw);
1870 break;
1871
1872 default:
1873 SLJIT_ASSERT_STOP();
1874 break;
1875 }
1876 cc += size;
1877 break;
1878
1879 default:
1880 cc = next_opcode(common, cc);
1881 SLJIT_ASSERT(cc != NULL);
1882 break;
1883 }
1884 break;
1885
1886 case end:
1887 SLJIT_ASSERT_STOP();
1888 break;
1889 }
1890
1891 while (count > 0)
1892 {
1893 count--;
1894 if (save)
1895 {
1896 if (tmp1next)
1897 {
1898 if (!tmp1empty)
1899 {
1900 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1901 stackptr += sizeof(sljit_sw);
1902 }
1903 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), srcw[count]);
1904 tmp1empty = FALSE;
1905 tmp1next = FALSE;
1906 }
1907 else
1908 {
1909 if (!tmp2empty)
1910 {
1911 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1912 stackptr += sizeof(sljit_sw);
1913 }
1914 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), srcw[count]);
1915 tmp2empty = FALSE;
1916 tmp1next = TRUE;
1917 }
1918 }
1919 else
1920 {
1921 if (tmp1next)
1922 {
1923 SLJIT_ASSERT(!tmp1empty);
1924 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), srcw[count], TMP1, 0);
1925 tmp1empty = stackptr >= stacktop;
1926 if (!tmp1empty)
1927 {
1928 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1929 stackptr += sizeof(sljit_sw);
1930 }
1931 tmp1next = FALSE;
1932 }
1933 else
1934 {
1935 SLJIT_ASSERT(!tmp2empty);
1936 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), srcw[count], TMP2, 0);
1937 tmp2empty = stackptr >= stacktop;
1938 if (!tmp2empty)
1939 {
1940 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1941 stackptr += sizeof(sljit_sw);
1942 }
1943 tmp1next = TRUE;
1944 }
1945 }
1946 }
1947 }
1948 while (status != end);
1949
1950 if (save)
1951 {
1952 if (tmp1next)
1953 {
1954 if (!tmp1empty)
1955 {
1956 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1957 stackptr += sizeof(sljit_sw);
1958 }
1959 if (!tmp2empty)
1960 {
1961 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1962 stackptr += sizeof(sljit_sw);
1963 }
1964 }
1965 else
1966 {
1967 if (!tmp2empty)
1968 {
1969 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1970 stackptr += sizeof(sljit_sw);
1971 }
1972 if (!tmp1empty)
1973 {
1974 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1975 stackptr += sizeof(sljit_sw);
1976 }
1977 }
1978 }
1979 SLJIT_ASSERT(cc == ccend && stackptr == stacktop && (save || (tmp1empty && tmp2empty)));
1980 }
1981
1982 static SLJIT_INLINE pcre_uchar *set_then_offsets(compiler_common *common, pcre_uchar *cc, pcre_uint8 *current_offset)
1983 {
1984 pcre_uchar *end = bracketend(cc);
1985 BOOL has_alternatives = cc[GET(cc, 1)] == OP_ALT;
1986
1987 /* Assert captures then. */
1988 if (*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT)
1989 current_offset = NULL;
1990 /* Conditional block does not. */
1991 if (*cc == OP_COND || *cc == OP_SCOND)
1992 has_alternatives = FALSE;
1993
1994 cc = next_opcode(common, cc);
1995 if (has_alternatives)
1996 current_offset = common->then_offsets + (cc - common->start);
1997
1998 while (cc < end)
1999 {
2000 if ((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND))
2001 cc = set_then_offsets(common, cc, current_offset);
2002 else
2003 {
2004 if (*cc == OP_ALT && has_alternatives)
2005 current_offset = common->then_offsets + (cc + 1 + LINK_SIZE - common->start);
2006 if (*cc >= OP_THEN && *cc <= OP_THEN_ARG && current_offset != NULL)
2007 *current_offset = 1;
2008 cc = next_opcode(common, cc);
2009 }
2010 }
2011
2012 return end;
2013 }
2014
2015 #undef CASE_ITERATOR_PRIVATE_DATA_1
2016 #undef CASE_ITERATOR_PRIVATE_DATA_2A
2017 #undef CASE_ITERATOR_PRIVATE_DATA_2B
2018 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_1
2019 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
2020 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
2021
2022 static SLJIT_INLINE BOOL is_powerof2(unsigned int value)
2023 {
2024 return (value & (value - 1)) == 0;
2025 }
2026
2027 static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label)
2028 {
2029 while (list)
2030 {
2031 /* sljit_set_label is clever enough to do nothing
2032 if either the jump or the label is NULL. */
2033 SET_LABEL(list->jump, label);
2034 list = list->next;
2035 }
2036 }
2037
2038 static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump *jump)
2039 {
2040 jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list));
2041 if (list_item)
2042 {
2043 list_item->next = *list;
2044 list_item->jump = jump;
2045 *list = list_item;
2046 }
2047 }
2048
2049 static void add_stub(compiler_common *common, struct sljit_jump *start)
2050 {
2051 DEFINE_COMPILER;
2052 stub_list *list_item = sljit_alloc_memory(compiler, sizeof(stub_list));
2053
2054 if (list_item)
2055 {
2056 list_item->start = start;
2057 list_item->quit = LABEL();
2058 list_item->next = common->stubs;
2059 common->stubs = list_item;
2060 }
2061 }
2062
2063 static void flush_stubs(compiler_common *common)
2064 {
2065 DEFINE_COMPILER;
2066 stub_list *list_item = common->stubs;
2067
2068 while (list_item)
2069 {
2070 JUMPHERE(list_item->start);
2071 add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));
2072 JUMPTO(SLJIT_JUMP, list_item->quit);
2073 list_item = list_item->next;
2074 }
2075 common->stubs = NULL;
2076 }
2077
2078 static void add_label_addr(compiler_common *common, sljit_uw *update_addr)
2079 {
2080 DEFINE_COMPILER;
2081 label_addr_list *label_addr;
2082
2083 label_addr = sljit_alloc_memory(compiler, sizeof(label_addr_list));
2084 if (label_addr == NULL)
2085 return;
2086 label_addr->label = LABEL();
2087 label_addr->update_addr = update_addr;
2088 label_addr->next = common->label_addrs;
2089 common->label_addrs = label_addr;
2090 }
2091
2092 static SLJIT_INLINE void count_match(compiler_common *common)
2093 {
2094 DEFINE_COMPILER;
2095
2096 OP2(SLJIT_SUB | SLJIT_SET_E, COUNT_MATCH, 0, COUNT_MATCH, 0, SLJIT_IMM, 1);
2097 add_jump(compiler, &common->calllimit, JUMP(SLJIT_ZERO));
2098 }
2099
2100 static SLJIT_INLINE void allocate_stack(compiler_common *common, int size)
2101 {
2102 /* May destroy all locals and registers except TMP2. */
2103 DEFINE_COMPILER;
2104
2105 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
2106 #ifdef DESTROY_REGISTERS
2107 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);
2108 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
2109 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
2110 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, TMP1, 0);
2111 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP1, 0);
2112 #endif
2113 add_stub(common, CMP(SLJIT_GREATER, STACK_TOP, 0, STACK_LIMIT, 0));
2114 }
2115
2116 static SLJIT_INLINE void free_stack(compiler_common *common, int size)
2117 {
2118 DEFINE_COMPILER;
2119 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
2120 }
2121
2122 static sljit_uw * allocate_read_only_data(compiler_common *common, sljit_uw size)
2123 {
2124 DEFINE_COMPILER;
2125 sljit_uw *result;
2126
2127 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
2128 return NULL;
2129
2130 result = (sljit_uw *)SLJIT_MALLOC(size + sizeof(sljit_uw), compiler->allocator_data);
2131 if (SLJIT_UNLIKELY(result == NULL))
2132 {
2133 sljit_set_compiler_memory_error(compiler);
2134 return NULL;
2135 }
2136
2137 *(void**)result = common->read_only_data_head;
2138 common->read_only_data_head = (void *)result;
2139 return result + 1;
2140 }
2141
2142 static void free_read_only_data(void *current, void *allocator_data)
2143 {
2144 void *next;
2145
2146 SLJIT_UNUSED_ARG(allocator_data);
2147
2148 while (current != NULL)
2149 {
2150 next = *(void**)current;
2151 SLJIT_FREE(current, allocator_data);
2152 current = next;
2153 }
2154 }
2155
2156 static SLJIT_INLINE void reset_ovector(compiler_common *common, int length)
2157 {
2158 DEFINE_COMPILER;
2159 struct sljit_label *loop;
2160 int i;
2161
2162 /* At this point we can freely use all temporary registers. */
2163 SLJIT_ASSERT(length > 1);
2164 /* TMP1 returns with begin - 1. */
2165 OP2(SLJIT_SUB, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_S0), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));
2166 if (length < 8)
2167 {
2168 for (i = 1; i < length; i++)
2169 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), SLJIT_R0, 0);
2170 }
2171 else
2172 {
2173 GET_LOCAL_BASE(SLJIT_R1, 0, OVECTOR_START);
2174 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, length - 1);
2175 loop = LABEL();
2176 OP1(SLJIT_MOVU, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw), SLJIT_R0, 0);
2177 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, 1);
2178 JUMPTO(SLJIT_NOT_ZERO, loop);
2179 }
2180 }
2181
2182 static SLJIT_INLINE void do_reset_match(compiler_common *common, int length)
2183 {
2184 DEFINE_COMPILER;
2185 struct sljit_label *loop;
2186 int i;
2187
2188 SLJIT_ASSERT(length > 1);
2189 /* OVECTOR(1) contains the "string begin - 1" constant. */
2190 if (length > 2)
2191 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
2192 if (length < 8)
2193 {
2194 for (i = 2; i < length; i++)
2195 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), TMP1, 0);
2196 }
2197 else
2198 {
2199 GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + sizeof(sljit_sw));
2200 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
2201 loop = LABEL();
2202 OP1(SLJIT_MOVU, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
2203 OP2(SLJIT_SUB | SLJIT_SET_E, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
2204 JUMPTO(SLJIT_NOT_ZERO, loop);
2205 }
2206
2207 OP1(SLJIT_MOV, STACK_TOP, 0, ARGUMENTS, 0);
2208 if (common->mark_ptr != 0)
2209 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, SLJIT_IMM, 0);
2210 if (common->control_head_ptr != 0)
2211 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
2212 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(jit_arguments, stack));
2213 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
2214 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(struct sljit_stack, base));
2215 }
2216
2217 static sljit_sw SLJIT_CALL do_search_mark(sljit_sw *current, const pcre_uchar *skip_arg)
2218 {
2219 while (current != NULL)
2220 {
2221 switch (current[-2])
2222 {
2223 case type_then_trap:
2224 break;
2225
2226 case type_mark:
2227 if (STRCMP_UC_UC(skip_arg, (pcre_uchar *)current[-3]) == 0)
2228 return current[-4];
2229 break;
2230
2231 default:
2232 SLJIT_ASSERT_STOP();
2233 break;
2234 }
2235 current = (sljit_sw*)current[-1];
2236 }
2237 return -1;
2238 }
2239
2240 static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)
2241 {
2242 DEFINE_COMPILER;
2243 struct sljit_label *loop;
2244 struct sljit_jump *early_quit;
2245
2246 /* At this point we can freely use all registers. */
2247 OP1(SLJIT_MOV, SLJIT_S2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
2248 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(1), STR_PTR, 0);
2249
2250 OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0);
2251 if (common->mark_ptr != 0)
2252 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
2253 OP1(SLJIT_MOV_SI, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, offset_count));
2254 if (common->mark_ptr != 0)
2255 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_R2, 0);
2256 OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, offsets), SLJIT_IMM, sizeof(int));
2257 OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, begin));
2258 GET_LOCAL_BASE(SLJIT_S0, 0, OVECTOR_START);
2259 /* Unlikely, but possible */
2260 early_quit = CMP(SLJIT_EQUAL, SLJIT_R1, 0, SLJIT_IMM, 0);
2261 loop = LABEL();
2262 OP2(SLJIT_SUB, SLJIT_S1, 0, SLJIT_MEM1(SLJIT_S0), 0, SLJIT_R0, 0);
2263 OP2(SLJIT_ADD, SLJIT_S0, 0, SLJIT_S0, 0, SLJIT_IMM, sizeof(sljit_sw));
2264 /* Copy the integer value to the output buffer */
2265 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2266 OP2(SLJIT_ASHR, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_IMM, UCHAR_SHIFT);
2267 #endif
2268 OP1(SLJIT_MOVU_SI, SLJIT_MEM1(SLJIT_R2), sizeof(int), SLJIT_S1, 0);
2269 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
2270 JUMPTO(SLJIT_NOT_ZERO, loop);
2271 JUMPHERE(early_quit);
2272
2273 /* Calculate the return value, which is the maximum ovector value. */
2274 if (topbracket > 1)
2275 {
2276 GET_LOCAL_BASE(SLJIT_R0, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_sw));
2277 OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, topbracket + 1);
2278
2279 /* OVECTOR(0) is never equal to SLJIT_S2. */
2280 loop = LABEL();
2281 OP1(SLJIT_MOVU, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), -(2 * (sljit_sw)sizeof(sljit_sw)));
2282 OP2(SLJIT_SUB, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
2283 CMPTO(SLJIT_EQUAL, SLJIT_R2, 0, SLJIT_S2, 0, loop);
2284 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_R1, 0);
2285 }
2286 else
2287 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
2288 }
2289
2290 static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *quit)
2291 {
2292 DEFINE_COMPILER;
2293 struct sljit_jump *jump;
2294
2295 SLJIT_COMPILE_ASSERT(STR_END == SLJIT_S1, str_end_must_be_saved_reg2);
2296 SLJIT_ASSERT(common->start_used_ptr != 0 && common->start_ptr != 0
2297 && (common->mode == JIT_PARTIAL_SOFT_COMPILE ? common->hit_start != 0 : common->hit_start == 0));
2298
2299 OP1(SLJIT_MOV, SLJIT_R1, 0, ARGUMENTS, 0);
2300 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_PARTIAL);
2301 OP1(SLJIT_MOV_SI, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, real_offset_count));
2302 CMPTO(SLJIT_SIG_LESS, SLJIT_R2, 0, SLJIT_IMM, 2, quit);
2303
2304 /* Store match begin and end. */
2305 OP1(SLJIT_MOV, SLJIT_S0, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, begin));
2306 OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, offsets));
2307
2308 jump = CMP(SLJIT_SIG_LESS, SLJIT_R2, 0, SLJIT_IMM, 3);
2309 OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_ptr : (common->hit_start + (int)sizeof(sljit_sw)), SLJIT_S0, 0);
2310 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2311 OP2(SLJIT_ASHR, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, UCHAR_SHIFT);
2312 #endif
2313 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_R1), 2 * sizeof(int), SLJIT_R2, 0);
2314 JUMPHERE(jump);
2315
2316 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_used_ptr : common->hit_start);
2317 OP2(SLJIT_SUB, SLJIT_S1, 0, STR_END, 0, SLJIT_S0, 0);
2318 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2319 OP2(SLJIT_ASHR, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_IMM, UCHAR_SHIFT);
2320 #endif
2321 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_R1), sizeof(int), SLJIT_S1, 0);
2322
2323 OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_S0, 0);
2324 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2325 OP2(SLJIT_ASHR, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, UCHAR_SHIFT);
2326 #endif
2327 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_R1), 0, SLJIT_R2, 0);
2328
2329 JUMPTO(SLJIT_JUMP, quit);
2330 }
2331
2332 static SLJIT_INLINE void check_start_used_ptr(compiler_common *common)
2333 {
2334 /* May destroy TMP1. */
2335 DEFINE_COMPILER;
2336 struct sljit_jump *jump;
2337
2338 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2339 {
2340 /* The value of -1 must be kept for start_used_ptr! */
2341 OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, 1);
2342 /* Jumps if start_used_ptr < STR_PTR, or start_used_ptr == -1. Although overwriting
2343 is not necessary if start_used_ptr == STR_PTR, it does not hurt as well. */
2344 jump = CMP(SLJIT_LESS_EQUAL, TMP1, 0, STR_PTR, 0);
2345 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2346 JUMPHERE(jump);
2347 }
2348 else if (common->mode == JIT_PARTIAL_HARD_COMPILE)
2349 {
2350 jump = CMP(SLJIT_LESS_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2351 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2352 JUMPHERE(jump);
2353 }
2354 }
2355
2356 static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, pcre_uchar *cc)
2357 {
2358 /* Detects if the character has an othercase. */
2359 unsigned int c;
2360
2361 #ifdef SUPPORT_UTF
2362 if (common->utf)
2363 {
2364 GETCHAR(c, cc);
2365 if (c > 127)
2366 {
2367 #ifdef SUPPORT_UCP
2368 return c != UCD_OTHERCASE(c);
2369 #else
2370 return FALSE;
2371 #endif
2372 }
2373 #ifndef COMPILE_PCRE8
2374 return common->fcc[c] != c;
2375 #endif
2376 }
2377 else
2378 #endif
2379 c = *cc;
2380 return MAX_255(c) ? common->fcc[c] != c : FALSE;
2381 }
2382
2383 static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)
2384 {
2385 /* Returns with the othercase. */
2386 #ifdef SUPPORT_UTF
2387 if (common->utf && c > 127)
2388 {
2389 #ifdef SUPPORT_UCP
2390 return UCD_OTHERCASE(c);
2391 #else
2392 return c;
2393 #endif
2394 }
2395 #endif
2396 return TABLE_GET(c, common->fcc, c);
2397 }
2398
2399 static unsigned int char_get_othercase_bit(compiler_common *common, pcre_uchar *cc)
2400 {
2401 /* Detects if the character and its othercase has only 1 bit difference. */
2402 unsigned int c, oc, bit;
2403 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2404 int n;
2405 #endif
2406
2407 #ifdef SUPPORT_UTF
2408 if (common->utf)
2409 {
2410 GETCHAR(c, cc);
2411 if (c <= 127)
2412 oc = common->fcc[c];
2413 else
2414 {
2415 #ifdef SUPPORT_UCP
2416 oc = UCD_OTHERCASE(c);
2417 #else
2418 oc = c;
2419 #endif
2420 }
2421 }
2422 else
2423 {
2424 c = *cc;
2425 oc = TABLE_GET(c, common->fcc, c);
2426 }
2427 #else
2428 c = *cc;
2429 oc = TABLE_GET(c, common->fcc, c);
2430 #endif
2431
2432 SLJIT_ASSERT(c != oc);
2433
2434 bit = c ^ oc;
2435 /* Optimized for English alphabet. */
2436 if (c <= 127 && bit == 0x20)
2437 return (0 << 8) | 0x20;
2438
2439 /* Since c != oc, they must have at least 1 bit difference. */
2440 if (!is_powerof2(bit))
2441 return 0;
2442
2443 #if defined COMPILE_PCRE8
2444
2445 #ifdef SUPPORT_UTF
2446 if (common->utf && c > 127)
2447 {
2448 n = GET_EXTRALEN(*cc);
2449 while ((bit & 0x3f) == 0)
2450 {
2451 n--;
2452 bit >>= 6;
2453 }
2454 return (n << 8) | bit;
2455 }
2456 #endif /* SUPPORT_UTF */
2457 return (0 << 8) | bit;
2458
2459 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2460
2461 #ifdef SUPPORT_UTF
2462 if (common->utf && c > 65535)
2463 {
2464 if (bit >= (1 << 10))
2465 bit >>= 10;
2466 else
2467 return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8));
2468 }
2469 #endif /* SUPPORT_UTF */
2470 return (bit < 256) ? ((0 << 8) | bit) : ((1 << 8) | (bit >> 8));
2471
2472 #endif /* COMPILE_PCRE[8|16|32] */
2473 }
2474
2475 static void check_partial(compiler_common *common, BOOL force)
2476 {
2477 /* Checks whether a partial matching is occurred. Does not modify registers. */
2478 DEFINE_COMPILER;
2479 struct sljit_jump *jump = NULL;
2480
2481 SLJIT_ASSERT(!force || common->mode != JIT_COMPILE);
2482
2483 if (common->mode == JIT_COMPILE)
2484 return;
2485
2486 if (!force)
2487 jump = CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2488 else if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2489 jump = CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1);
2490
2491 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2492 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
2493 else
2494 {
2495 if (common->partialmatchlabel != NULL)
2496 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2497 else
2498 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2499 }
2500
2501 if (jump != NULL)
2502 JUMPHERE(jump);
2503 }
2504
2505 static void check_str_end(compiler_common *common, jump_list **end_reached)
2506 {
2507 /* Does not affect registers. Usually used in a tight spot. */
2508 DEFINE_COMPILER;
2509 struct sljit_jump *jump;
2510
2511 if (common->mode == JIT_COMPILE)
2512 {
2513 add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2514 return;
2515 }
2516
2517 jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
2518 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2519 {
2520 add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
2521 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
2522 add_jump(compiler, end_reached, JUMP(SLJIT_JUMP));
2523 }
2524 else
2525 {
2526 add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
2527 if (common->partialmatchlabel != NULL)
2528 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2529 else
2530 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2531 }
2532 JUMPHERE(jump);
2533 }
2534
2535 static void detect_partial_match(compiler_common *common, jump_list **backtracks)
2536 {
2537 DEFINE_COMPILER;
2538 struct sljit_jump *jump;
2539
2540 if (common->mode == JIT_COMPILE)
2541 {
2542 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2543 return;
2544 }
2545
2546 /* Partial matching mode. */
2547 jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
2548 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
2549 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2550 {
2551 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
2552 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
2553 }
2554 else
2555 {
2556 if (common->partialmatchlabel != NULL)
2557 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2558 else
2559 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2560 }
2561 JUMPHERE(jump);
2562 }
2563
2564 static void peek_char(compiler_common *common, pcre_uint32 max)
2565 {
2566 /* Reads the character into TMP1, keeps STR_PTR.
2567 Does not check STR_END. TMP2 Destroyed. */
2568 DEFINE_COMPILER;
2569 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2570 struct sljit_jump *jump;
2571 #endif
2572
2573 SLJIT_UNUSED_ARG(max);
2574
2575 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2576 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2577 if (common->utf)
2578 {
2579 if (max < 128) return;
2580
2581 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2582 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2583 add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2584 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2585 JUMPHERE(jump);
2586 }
2587 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2588
2589 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2590 if (common->utf)
2591 {
2592 if (max < 0xd800) return;
2593
2594 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2595 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2596 /* TMP2 contains the high surrogate. */
2597 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2598 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x40);
2599 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
2600 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
2601 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2602 JUMPHERE(jump);
2603 }
2604 #endif
2605 }
2606
2607 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2608
2609 static BOOL is_char7_bitset(const pcre_uint8 *bitset, BOOL nclass)
2610 {
2611 /* Tells whether the character codes below 128 are enough
2612 to determine a match. */
2613 const pcre_uint8 value = nclass ? 0xff : 0;
2614 const pcre_uint8 *end = bitset + 32;
2615
2616 bitset += 16;
2617 do
2618 {
2619 if (*bitset++ != value)
2620 return FALSE;
2621 }
2622 while (bitset < end);
2623 return TRUE;
2624 }
2625
2626 static void read_char7_type(compiler_common *common, BOOL full_read)
2627 {
2628 /* Reads the precise character type of a character into TMP1, if the character
2629 is less than 128. Otherwise it returns with zero. Does not check STR_END. The
2630 full_read argument tells whether characters above max are accepted or not. */
2631 DEFINE_COMPILER;
2632 struct sljit_jump *jump;
2633
2634 SLJIT_ASSERT(common->utf);
2635
2636 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2637 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2638
2639 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2640
2641 if (full_read)
2642 {
2643 jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
2644 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2645 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2646 JUMPHERE(jump);
2647 }
2648 }
2649
2650 #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
2651
2652 static void read_char_range(compiler_common *common, pcre_uint32 min, pcre_uint32 max, BOOL update_str_ptr)
2653 {
2654 /* Reads the precise value of a character into TMP1, if the character is
2655 between min and max (c >= min && c <= max). Otherwise it returns with a value
2656 outside the range. Does not check STR_END. */
2657 DEFINE_COMPILER;
2658 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2659 struct sljit_jump *jump;
2660 #endif
2661 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2662 struct sljit_jump *jump2;
2663 #endif
2664
2665 SLJIT_UNUSED_ARG(update_str_ptr);
2666 SLJIT_UNUSED_ARG(min);
2667 SLJIT_UNUSED_ARG(max);
2668 SLJIT_ASSERT(min <= max);
2669
2670 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2671 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2672
2673 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2674 if (common->utf)
2675 {
2676 if (max < 128 && !update_str_ptr) return;
2677
2678 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2679 if (min >= 0x10000)
2680 {
2681 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xf0);
2682 if (update_str_ptr)
2683 OP1(SLJIT_MOV_UB, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2684 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2685 jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x7);
2686 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2687 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2688 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2689 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2690 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2691 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2692 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2693 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2694 if (!update_str_ptr)
2695 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
2696 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2697 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2698 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2699 JUMPHERE(jump2);
2700 if (update_str_ptr)
2701 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
2702 }
2703 else if (min >= 0x800 && max <= 0xffff)
2704 {
2705 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xe0);
2706 if (update_str_ptr)
2707 OP1(SLJIT_MOV_UB, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2708 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2709 jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xf);
2710 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2711 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2712 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2713 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2714 if (!update_str_ptr)
2715 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
2716 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2717 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2718 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2719 JUMPHERE(jump2);
2720 if (update_str_ptr)
2721 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
2722 }
2723 else if (max >= 0x800)
2724 add_jump(compiler, (max < 0x10000) ? &common->utfreadchar16 : &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2725 else if (max < 128)
2726 {
2727 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2728 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2729 }
2730 else
2731 {
2732 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2733 if (!update_str_ptr)
2734 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2735 else
2736 OP1(SLJIT_MOV_UB, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2737 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2738 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2739 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2740 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2741 if (update_str_ptr)
2742 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
2743 }
2744 JUMPHERE(jump);
2745 }
2746 #endif
2747
2748 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2749 if (common->utf)
2750 {
2751 if (max >= 0x10000)
2752 {
2753 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2754 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2755 /* TMP2 contains the high surrogate. */
2756 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2757 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x40);
2758 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
2759 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2760 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
2761 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2762 JUMPHERE(jump);
2763 return;
2764 }
2765
2766 if (max < 0xd800 && !update_str_ptr) return;
2767
2768 /* Skip low surrogate if necessary. */
2769 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2770 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2771 if (update_str_ptr)
2772 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2773 if (max >= 0xd800)
2774 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x10000);
2775 JUMPHERE(jump);
2776 }
2777 #endif
2778 }
2779
2780 static SLJIT_INLINE void read_char(compiler_common *common)
2781 {
2782 read_char_range(common, 0, READ_CHAR_MAX, TRUE);
2783 }
2784
2785 static void read_char8_type(compiler_common *common, BOOL update_str_ptr)
2786 {
2787 /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */
2788 DEFINE_COMPILER;
2789 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
2790 struct sljit_jump *jump;
2791 #endif
2792 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2793 struct sljit_jump *jump2;
2794 #endif
2795
2796 SLJIT_UNUSED_ARG(update_str_ptr);
2797
2798 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2799 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2800
2801 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2802 if (common->utf)
2803 {
2804 /* This can be an extra read in some situations, but hopefully
2805 it is needed in most cases. */
2806 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2807 jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
2808 if (!update_str_ptr)
2809 {
2810 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2811 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2812 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2813 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2814 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2815 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
2816 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2817 jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
2818 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2819 JUMPHERE(jump2);
2820 }
2821 else
2822 add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));
2823 JUMPHERE(jump);
2824 return;
2825 }
2826 #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
2827
2828 #if !defined COMPILE_PCRE8
2829 /* The ctypes array contains only 256 values. */
2830 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2831 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
2832 #endif
2833 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2834 #if !defined COMPILE_PCRE8
2835 JUMPHERE(jump);
2836 #endif
2837
2838 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2839 if (common->utf && update_str_ptr)
2840 {
2841 /* Skip low surrogate if necessary. */
2842 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);
2843 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2844 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2845 JUMPHERE(jump);
2846 }
2847 #endif /* SUPPORT_UTF && COMPILE_PCRE16 */
2848 }
2849
2850 static void skip_char_back(compiler_common *common)
2851 {
2852 /* Goes one character back. Affects STR_PTR and TMP1. Does not check begin. */
2853 DEFINE_COMPILER;
2854 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2855 #if defined COMPILE_PCRE8
2856 struct sljit_label *label;
2857
2858 if (common->utf)
2859 {
2860 label = LABEL();
2861 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2862 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2863 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
2864 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);
2865 return;
2866 }
2867 #elif defined COMPILE_PCRE16
2868 if (common->utf)
2869 {
2870 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2871 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2872 /* Skip low surrogate if necessary. */
2873 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2874 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
2875 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
2876 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2877 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2878 return;
2879 }
2880 #endif /* COMPILE_PCRE[8|16] */
2881 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2882 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2883 }
2884
2885 static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpifmatch)
2886 {
2887 /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */
2888 DEFINE_COMPILER;
2889 struct sljit_jump *jump;
2890
2891 if (nltype == NLTYPE_ANY)
2892 {
2893 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
2894 add_jump(compiler, backtracks, JUMP(jumpifmatch ? SLJIT_NOT_ZERO : SLJIT_ZERO));
2895 }
2896 else if (nltype == NLTYPE_ANYCRLF)
2897 {
2898 if (jumpifmatch)
2899 {
2900 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR));
2901 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
2902 }
2903 else
2904 {
2905 jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
2906 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
2907 JUMPHERE(jump);
2908 }
2909 }
2910 else
2911 {
2912 SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);
2913 add_jump(compiler, backtracks, CMP(jumpifmatch ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
2914 }
2915 }
2916
2917 #ifdef SUPPORT_UTF
2918
2919 #if defined COMPILE_PCRE8
2920 static void do_utfreadchar(compiler_common *common)
2921 {
2922 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
2923 of the character (>= 0xc0). Return char value in TMP1, length in TMP2. */
2924 DEFINE_COMPILER;
2925 struct sljit_jump *jump;
2926
2927 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2928 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2929 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2930 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2931 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2932 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2933
2934 /* Searching for the first zero. */
2935 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
2936 jump = JUMP(SLJIT_NOT_ZERO);
2937 /* Two byte sequence. */
2938 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2939 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(2));
2940 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2941
2942 JUMPHERE(jump);
2943 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2944 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x800);
2945 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2946 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2947 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2948
2949 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10000);
2950 jump = JUMP(SLJIT_NOT_ZERO);
2951 /* Three byte sequence. */
2952 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
2953 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));
2954 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2955
2956 /* Four byte sequence. */
2957 JUMPHERE(jump);
2958 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2959 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
2960 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2961 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
2962 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2963 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2964 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(4));
2965 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2966 }
2967
2968 static void do_utfreadchar16(compiler_common *common)
2969 {
2970 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
2971 of the character (>= 0xc0). Return value in TMP1. */
2972 DEFINE_COMPILER;
2973 struct sljit_jump *jump;
2974
2975 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2976 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2977 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2978 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2979 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2980 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2981
2982 /* Searching for the first zero. */
2983 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
2984 jump = JUMP(SLJIT_NOT_ZERO);
2985 /* Two byte sequence. */
2986 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2987 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2988
2989 JUMPHERE(jump);
2990 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x400);
2991 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_NOT_ZERO);
2992 /* This code runs only in 8 bit mode. No need to shift the value. */
2993 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2994 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2995 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x800);
2996 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2997 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2998 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2999 /* Three byte sequence. */
3000 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
3001 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3002 }
3003
3004 static void do_utfreadtype8(compiler_common *common)
3005 {
3006 /* Fast decoding a UTF-8 character type. TMP2 contains the first byte
3007 of the character (>= 0xc0). Return value in TMP1. */
3008 DEFINE_COMPILER;
3009 struct sljit_jump *jump;
3010 struct sljit_jump *compare;
3011
3012 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3013
3014 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20);
3015 jump = JUMP(SLJIT_NOT_ZERO);
3016 /* Two byte sequence. */
3017 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3018 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3019 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
3020 /* The upper 5 bits are known at this point. */
3021 compare = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x3);
3022 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
3023 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3024 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
3025 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
3026 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3027
3028 JUMPHERE(compare);
3029 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
3030 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3031
3032 /* We only have types for characters less than 256. */
3033 JUMPHERE(jump);
3034 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3035 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
3036 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3037 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3038 }
3039
3040 #endif /* COMPILE_PCRE8 */
3041
3042 #endif /* SUPPORT_UTF */
3043
3044 #ifdef SUPPORT_UCP
3045
3046 /* UCD_BLOCK_SIZE must be 128 (see the assert below). */
3047 #define UCD_BLOCK_MASK 127
3048 #define UCD_BLOCK_SHIFT 7
3049
3050 static void do_getucd(compiler_common *common)
3051 {
3052 /* Search the UCD record for the character comes in TMP1.
3053 Returns chartype in TMP1 and UCD offset in TMP2. */
3054 DEFINE_COMPILER;
3055
3056 SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 8);
3057
3058 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3059 OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
3060 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
3061 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
3062 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
3063 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
3064 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
3065 OP1(SLJIT_MOV_UH, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
3066 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
3067 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
3068 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3069 }
3070 #endif
3071
3072 static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common, BOOL hascrorlf, BOOL firstline)
3073 {
3074 DEFINE_COMPILER;
3075 struct sljit_label *mainloop;
3076 struct sljit_label *newlinelabel = NULL;
3077 struct sljit_jump *start;
3078 struct sljit_jump *end = NULL;
3079 struct sljit_jump *nl = NULL;
3080 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3081 struct sljit_jump *singlechar;
3082 #endif
3083 jump_list *newline = NULL;
3084 BOOL newlinecheck = FALSE;
3085 BOOL readuchar = FALSE;
3086
3087 if (!(hascrorlf || firstline) && (common->nltype == NLTYPE_ANY ||
3088 common->nltype == NLTYPE_ANYCRLF || common->newline > 255))
3089 newlinecheck = TRUE;
3090
3091 if (firstline)
3092 {
3093 /* Search for the end of the first line. */
3094 SLJIT_ASSERT(common->first_line_end != 0);
3095 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
3096
3097 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3098 {
3099 mainloop = LABEL();
3100 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3101 end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3102 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
3103 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3104 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);
3105 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);
3106 JUMPHERE(end);
3107 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_SP), common->first_line_end, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3108 }
3109 else
3110 {
3111 end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3112 mainloop = LABEL();
3113 /* Continual stores does not cause data dependency. */
3114 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->first_line_end, STR_PTR, 0);
3115 read_char_range(common, common->nlmin, common->nlmax, TRUE);
3116 check_newlinechar(common, common->nltype, &newline, TRUE);
3117 CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, mainloop);
3118 JUMPHERE(end);
3119 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->first_line_end, STR_PTR, 0);
3120 set_jumps(newline, LABEL());
3121 }
3122
3123 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
3124 }
3125
3126 start = JUMP(SLJIT_JUMP);
3127
3128 if (newlinecheck)
3129 {
3130 newlinelabel = LABEL();
3131 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3132 end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3133 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3134 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
3135 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
3136 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3137 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
3138 #endif
3139 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3140 nl = JUMP(SLJIT_JUMP);
3141 }
3142
3143 mainloop = LABEL();
3144
3145 /* Increasing the STR_PTR here requires one less jump in the most common case. */
3146 #ifdef SUPPORT_UTF
3147 if (common->utf) readuchar = TRUE;
3148 #endif
3149 if (newlinecheck) readuchar = TRUE;
3150
3151 if (readuchar)
3152 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3153
3154 if (newlinecheck)
3155 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);
3156
3157 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3158 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3159 #if defined COMPILE_PCRE8
3160 if (common->utf)
3161 {
3162 singlechar = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
3163 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3164 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3165 JUMPHERE(singlechar);
3166 }
3167 #elif defined COMPILE_PCRE16
3168 if (common->utf)
3169 {
3170 singlechar = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
3171 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
3172 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3173 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
3174 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3175 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3176 JUMPHERE(singlechar);
3177 }
3178 #endif /* COMPILE_PCRE[8|16] */
3179 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
3180 JUMPHERE(start);
3181
3182 if (newlinecheck)
3183 {
3184 JUMPHERE(end);
3185 JUMPHERE(nl);
3186 }
3187
3188 return mainloop;
3189 }
3190
3191 #define MAX_N_CHARS 16
3192 #define MAX_N_BYTES 8
3193
3194 static SLJIT_INLINE void add_prefix_byte(pcre_uint8 byte, pcre_uint8 *bytes)
3195 {
3196 pcre_uint8 len = bytes[0];
3197 int i;
3198
3199 if (len == 255)
3200 return;
3201
3202 if (len == 0)
3203 {
3204 bytes[0] = 1;
3205 bytes[1] = byte;
3206 return;
3207 }
3208
3209 for (i = len; i > 0; i--)
3210 if (bytes[i] == byte)
3211 return;
3212
3213 if (len >= MAX_N_BYTES - 1)
3214 {
3215 bytes[0] = 255;
3216 return;
3217 }
3218
3219 len++;
3220 bytes[len] = byte;
3221 bytes[0] = len;
3222 }
3223
3224 static int scan_prefix(compiler_common *common, pcre_uchar *cc, pcre_uint32 *chars, pcre_uint8 *bytes, int max_chars, pcre_uint32 *rec_count)
3225 {
3226 /* Recursive function, which scans prefix literals. */
3227 BOOL last, any, caseless;
3228 int len, repeat, len_save, consumed = 0;
3229 pcre_uint32 chr, mask;
3230 pcre_uchar *alternative, *cc_save, *oc;
3231 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3232 pcre_uchar othercase[8];
3233 #elif defined SUPPORT_UTF && defined COMPILE_PCRE16
3234 pcre_uchar othercase[2];
3235 #else
3236 pcre_uchar othercase[1];
3237 #endif
3238
3239 repeat = 1;
3240 while (TRUE)
3241 {
3242 if (*rec_count == 0)
3243 return 0;
3244 (*rec_count)--;
3245
3246 last = TRUE;
3247 any = FALSE;
3248 caseless = FALSE;
3249
3250 switch (*cc)
3251 {
3252 case OP_CHARI:
3253 caseless = TRUE;
3254 case OP_CHAR:
3255 last = FALSE;
3256 cc++;
3257 break;
3258
3259 case OP_SOD:
3260 case OP_SOM:
3261 case OP_SET_SOM:
3262 case OP_NOT_WORD_BOUNDARY:
3263 case OP_WORD_BOUNDARY:
3264 case OP_EODN:
3265 case OP_EOD:
3266 case OP_CIRC:
3267 case OP_CIRCM:
3268 case OP_DOLL:
3269 case OP_DOLLM:
3270 /* Zero width assertions. */
3271 cc++;
3272 continue;
3273
3274 case OP_ASSERT:
3275 case OP_ASSERT_NOT:
3276 case OP_ASSERTBACK:
3277 case OP_ASSERTBACK_NOT:
3278 cc = bracketend(cc);
3279 continue;
3280
3281 case OP_PLUSI:
3282 case OP_MINPLUSI:
3283 case OP_POSPLUSI:
3284 caseless = TRUE;
3285 case OP_PLUS:
3286 case OP_MINPLUS:
3287 case OP_POSPLUS:
3288 cc++;
3289 break;
3290
3291 case OP_EXACTI:
3292 caseless = TRUE;
3293 case OP_EXACT:
3294 repeat = GET2(cc, 1);
3295 last = FALSE;
3296 cc += 1 + IMM2_SIZE;
3297 break;
3298
3299 case OP_QUERYI:
3300 case OP_MINQUERYI:
3301 case OP_POSQUERYI:
3302 caseless = TRUE;
3303 case OP_QUERY:
3304 case OP_MINQUERY:
3305 case OP_POSQUERY:
3306 len = 1;
3307 cc++;
3308 #ifdef SUPPORT_UTF
3309 if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
3310 #endif
3311 max_chars = scan_prefix(common, cc + len, chars, bytes, max_chars, rec_count);
3312 if (max_chars == 0)
3313 return consumed;
3314 last = FALSE;
3315 break;
3316
3317 case OP_KET:
3318 cc += 1 + LINK_SIZE;
3319 continue;
3320
3321 case OP_ALT:
3322 cc += GET(cc, 1);
3323 continue;
3324
3325 case OP_ONCE:
3326 case OP_ONCE_NC:
3327 case OP_BRA:
3328 case OP_BRAPOS:
3329 case OP_CBRA:
3330 case OP_CBRAPOS:
3331 alternative = cc + GET(cc, 1);
3332 while (*alternative == OP_ALT)
3333 {
3334 max_chars = scan_prefix(common, alternative + 1 + LINK_SIZE, chars, bytes, max_chars, rec_count);
3335 if (max_chars == 0)
3336 return consumed;
3337 alternative += GET(alternative, 1);
3338 }
3339
3340 if (*cc == OP_CBRA || *cc == OP_CBRAPOS)
3341 cc += IMM2_SIZE;
3342 cc += 1 + LINK_SIZE;
3343 continue;
3344
3345 case OP_CLASS:
3346 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3347 if (common->utf && !is_char7_bitset((const pcre_uint8 *)(cc + 1), FALSE)) return consumed;
3348 #endif
3349 any = TRUE;
3350 cc += 1 + 32 / sizeof(pcre_uchar);
3351 break;
3352
3353 case OP_NCLASS:
3354 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3355 if (common->utf) return consumed;
3356 #endif
3357 any = TRUE;
3358 cc += 1 + 32 / sizeof(pcre_uchar);
3359 break;
3360
3361 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3362 case OP_XCLASS:
3363 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3364 if (common->utf) return consumed;
3365 #endif
3366 any = TRUE;
3367 cc += GET(cc, 1);
3368 break;
3369 #endif
3370
3371 case OP_DIGIT:
3372 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3373 if (common->utf && !is_char7_bitset((const pcre_uint8 *)common->ctypes - cbit_length + cbit_digit, FALSE))
3374 return consumed;
3375 #endif
3376 any = TRUE;
3377 cc++;
3378 break;
3379
3380 case OP_WHITESPACE:
3381 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3382 if (common->utf && !is_char7_bitset((const pcre_uint8 *)common->ctypes - cbit_length + cbit_space, FALSE))
3383 return consumed;
3384 #endif
3385 any = TRUE;
3386 cc++;
3387 break;
3388
3389 case OP_WORDCHAR:
3390 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3391 if (common->utf && !is_char7_bitset((const pcre_uint8 *)common->ctypes - cbit_length + cbit_word, FALSE))
3392 return consumed;
3393 #endif
3394 any = TRUE;
3395 cc++;
3396 break;
3397
3398 case OP_NOT:
3399 case OP_NOTI:
3400 cc++;
3401 /* Fall through. */
3402 case OP_NOT_DIGIT:
3403 case OP_NOT_WHITESPACE:
3404 case OP_NOT_WORDCHAR:
3405 case OP_ANY:
3406 case OP_ALLANY:
3407 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3408 if (common->utf) return consumed;
3409 #endif
3410 any = TRUE;
3411 cc++;
3412 break;
3413
3414 #ifdef SUPPORT_UCP
3415 case OP_NOTPROP:
3416 case OP_PROP:
3417 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3418 if (common->utf) return consumed;
3419 #endif
3420 any = TRUE;
3421 cc += 1 + 2;
3422 break;
3423 #endif
3424
3425 case OP_TYPEEXACT:
3426 repeat = GET2(cc, 1);
3427 cc += 1 + IMM2_SIZE;
3428 continue;
3429
3430 case OP_NOTEXACT:
3431 case OP_NOTEXACTI:
3432 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3433 if (common->utf) return consumed;
3434 #endif
3435 any = TRUE;
3436 repeat = GET2(cc, 1);
3437 cc += 1 + IMM2_SIZE + 1;
3438 break;
3439
3440 default:
3441 return consumed;
3442 }
3443
3444 if (any)
3445 {
3446 #if defined COMPILE_PCRE8
3447 mask = 0xff;
3448 #elif defined COMPILE_PCRE16
3449 mask = 0xffff;
3450 #elif defined COMPILE_PCRE32
3451 mask = 0xffffffff;
3452 #else
3453 SLJIT_ASSERT_STOP();
3454 #endif
3455
3456 do
3457 {
3458 chars[0] = mask;
3459 chars[1] = mask;
3460 bytes[0] = 255;
3461
3462 consumed++;
3463 if (--max_chars == 0)
3464 return consumed;
3465 chars += 2;
3466 bytes += MAX_N_BYTES;
3467 }
3468 while (--repeat > 0);
3469
3470 repeat = 1;
3471 continue;
3472 }
3473
3474 len = 1;
3475 #ifdef SUPPORT_UTF
3476 if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
3477 #endif
3478
3479 if (caseless && char_has_othercase(common, cc))
3480 {
3481 #ifdef SUPPORT_UTF
3482 if (common->utf)
3483 {
3484 GETCHAR(chr, cc);
3485 if ((int)PRIV(ord2utf)(char_othercase(common, chr), othercase) != len)
3486 return consumed;
3487 }
3488 else
3489 #endif
3490 {
3491 chr = *cc;
3492 othercase[0] = TABLE_GET(chr, common->fcc, chr);
3493 }
3494 }
3495 else
3496 caseless = FALSE;
3497
3498 len_save = len;
3499 cc_save = cc;
3500 while (TRUE)
3501 {
3502 oc = othercase;
3503 do
3504 {
3505 chr = *cc;
3506 #ifdef COMPILE_PCRE32
3507 if (SLJIT_UNLIKELY(chr == NOTACHAR))
3508 return consumed;
3509 #endif
3510 add_prefix_byte((pcre_uint8)chr, bytes);
3511
3512 mask = 0;
3513 if (caseless)
3514 {
3515 add_prefix_byte((pcre_uint8)*oc, bytes);
3516 mask = *cc ^ *oc;
3517 chr |= mask;
3518 }
3519
3520 #ifdef COMPILE_PCRE32
3521 if (chars[0] == NOTACHAR && chars[1] == 0)
3522 #else
3523 if (chars[0] == NOTACHAR)
3524 #endif
3525 {
3526 chars[0] = chr;
3527 chars[1] = mask;
3528 }
3529 else
3530 {
3531 mask |= chars[0] ^ chr;
3532 chr |= mask;
3533 chars[0] = chr;
3534 chars[1] |= mask;
3535 }
3536
3537 len--;
3538 consumed++;
3539 if (--max_chars == 0)
3540 return consumed;
3541 chars += 2;
3542 bytes += MAX_N_BYTES;
3543 cc++;
3544 oc++;
3545 }
3546 while (len > 0);
3547
3548 if (--repeat == 0)
3549 break;
3550
3551 len = len_save;
3552 cc = cc_save;
3553 }
3554
3555 repeat = 1;
3556 if (last)
3557 return consumed;
3558 }
3559 }
3560
3561 static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common, BOOL firstline)
3562 {
3563 DEFINE_COMPILER;
3564 struct sljit_label *start;
3565 struct sljit_jump *quit;
3566 pcre_uint32 chars[MAX_N_CHARS * 2];
3567 pcre_uint8 bytes[MAX_N_CHARS * MAX_N_BYTES];
3568 pcre_uint8 ones[MAX_N_CHARS];
3569 int offsets[3];
3570 pcre_uint32 mask;
3571 pcre_uint8 *byte_set, *byte_set_end;
3572 int i, max, from;
3573 int range_right = -1, range_len = 3 - 1;
3574 sljit_ub *update_table = NULL;
3575 BOOL in_range;
3576 pcre_uint32 rec_count;
3577
3578 for (i = 0; i < MAX_N_CHARS; i++)
3579 {
3580 chars[i << 1] = NOTACHAR;
3581 chars[(i << 1) + 1] = 0;
3582 bytes[i * MAX_N_BYTES] = 0;
3583 }
3584
3585 rec_count = 10000;
3586 max = scan_prefix(common, common->start, chars, bytes, MAX_N_CHARS, &rec_count);
3587
3588 if (max <= 1)
3589 return FALSE;
3590
3591 for (i = 0; i < max; i++)
3592 {
3593 mask = chars[(i << 1) + 1];
3594 ones[i] = ones_in_half_byte[mask & 0xf];
3595 mask >>= 4;
3596 while (mask != 0)
3597 {
3598 ones[i] += ones_in_half_byte[mask & 0xf];
3599 mask >>= 4;
3600 }
3601 }
3602
3603 in_range = FALSE;
3604 from = 0; /* Prevent compiler "uninitialized" warning */
3605 for (i = 0; i <= max; i++)
3606 {
3607 if (in_range && (i - from) > range_len && (bytes[(i - 1) * MAX_N_BYTES] <= 4))
3608 {
3609 range_len = i - from;
3610 range_right = i - 1;
3611 }
3612
3613 if (i < max && bytes[i * MAX_N_BYTES] < 255)
3614 {
3615 if (!in_range)
3616 {
3617 in_range = TRUE;
3618 from = i;
3619 }
3620 }
3621 else if (in_range)
3622 in_range = FALSE;
3623 }
3624
3625 if (range_right >= 0)
3626 {
3627 update_table = (sljit_ub *)allocate_read_only_data(common, 256);
3628 if (update_table == NULL)
3629 return TRUE;
3630 memset(update_table, IN_UCHARS(range_len), 256);
3631
3632 for (i = 0; i < range_len; i++)
3633 {
3634 byte_set = bytes + ((range_right - i) * MAX_N_BYTES);
3635 SLJIT_ASSERT(byte_set[0] > 0 && byte_set[0] < 255);
3636 byte_set_end = byte_set + byte_set[0];
3637 byte_set++;
3638 while (byte_set <= byte_set_end)
3639 {
3640 if (update_table[*byte_set] > IN_UCHARS(i))
3641 update_table[*byte_set] = IN_UCHARS(i);
3642 byte_set++;
3643 }
3644 }
3645 }
3646
3647 offsets[0] = -1;
3648 /* Scan forward. */
3649 for (i = 0; i < max; i++)
3650 if (ones[i] <= 2) {
3651 offsets[0] = i;
3652 break;
3653 }
3654
3655 if (offsets[0] < 0 && range_right < 0)
3656 return FALSE;
3657
3658 if (offsets[0] >= 0)
3659 {
3660 /* Scan backward. */
3661 offsets[1] = -1;
3662 for (i = max - 1; i > offsets[0]; i--)
3663 if (ones[i] <= 2 && i != range_right)
3664 {
3665 offsets[1] = i;
3666 break;
3667 }
3668
3669 /* This case is handled better by fast_forward_first_char. */
3670 if (offsets[1] == -1 && offsets[0] == 0 && range_right < 0)
3671 return FALSE;
3672
3673 offsets[2] = -1;
3674 /* We only search for a middle character if there is no range check. */
3675 if (offsets[1] >= 0 && range_right == -1)
3676 {
3677 /* Scan from middle. */
3678 for (i = (offsets[0] + offsets[1]) / 2 + 1; i < offsets[1]; i++)
3679 if (ones[i] <= 2)
3680 {
3681 offsets[2] = i;
3682 break;
3683 }
3684
3685 if (offsets[2] == -1)
3686 {
3687 for (i = (offsets[0] + offsets[1]) / 2; i > offsets[0]; i--)
3688 if (ones[i] <= 2)
3689 {
3690 offsets[2] = i;
3691 break;
3692 }
3693 }
3694 }
3695
3696 SLJIT_ASSERT(offsets[1] == -1 || (offsets[0] < offsets[1]));
3697 SLJIT_ASSERT(offsets[2] == -1 || (offsets[0] < offsets[2] && offsets[1] > offsets[2]));
3698
3699 chars[0] = chars[offsets[0] << 1];
3700 chars[1] = chars[(offsets[0] << 1) + 1];
3701 if (offsets[2] >= 0)
3702 {
3703 chars[2] = chars[offsets[2] << 1];
3704 chars[3] = chars[(offsets[2] << 1) + 1];
3705 }
3706 if (offsets[1] >= 0)
3707 {
3708 chars[4] = chars[offsets[1] << 1];
3709 chars[5] = chars[(offsets[1] << 1) + 1];
3710 }
3711 }
3712
3713 max -= 1;
3714 if (firstline)
3715 {
3716 SLJIT_ASSERT(common->first_line_end != 0);
3717 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end);
3718 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
3719 OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
3720 quit = CMP(SLJIT_LESS_EQUAL, STR_END, 0, TMP1, 0);
3721 OP1(SLJIT_MOV, STR_END, 0, TMP1, 0);
3722 JUMPHERE(quit);
3723 }
3724 else
3725 OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
3726
3727 #if !(defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
3728 if (range_right >= 0)
3729 OP1(SLJIT_MOV, RETURN_ADDR, 0, SLJIT_IMM, (sljit_sw)update_table);
3730 #endif
3731
3732 start = LABEL();
3733 quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3734
3735 SLJIT_ASSERT(range_right >= 0 || offsets[0] >= 0);
3736
3737 if (range_right >= 0)
3738 {
3739 #if defined COMPILE_PCRE8 || (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN)
3740 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right));
3741 #else
3742 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right + 1) - 1);
3743 #endif
3744
3745 #if !(defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
3746 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(RETURN_ADDR, TMP1), 0);
3747 #else
3748 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)update_table);
3749 #endif
3750 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3751 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, start);
3752 }
3753
3754 if (offsets[0] >= 0)
3755 {
3756 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[0]));
3757 if (offsets[1] >= 0)
3758 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[1]));
3759 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3760
3761 if (chars[1] != 0)
3762 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[1]);
3763 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[0], start);
3764 if (offsets[2] >= 0)
3765 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[2] - 1));
3766
3767 if (offsets[1] >= 0)
3768 {
3769 if (chars[5] != 0)
3770 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, chars[5]);
3771 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, chars[4], start);
3772 }
3773
3774 if (offsets[2] >= 0)
3775 {
3776 if (chars[3] != 0)
3777 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[3]);
3778 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[2], start);
3779 }
3780 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3781 }
3782
3783 JUMPHERE(quit);
3784
3785 if (firstline)
3786 {
3787 if (range_right >= 0)
3788 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end);
3789 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3790 if (range_right >= 0)
3791 {
3792 quit = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
3793 OP1(SLJIT_MOV, STR_PTR, 0, TMP1, 0);
3794 JUMPHERE(quit);
3795 }
3796 }
3797 else
3798 OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
3799 return TRUE;
3800 }
3801
3802 #undef MAX_N_CHARS
3803 #undef MAX_N_BYTES
3804
3805 static SLJIT_INLINE void fast_forward_first_char(compiler_common *common, pcre_uchar first_char, BOOL caseless, BOOL firstline)
3806 {
3807 DEFINE_COMPILER;
3808 struct sljit_label *start;
3809 struct sljit_jump *quit;
3810 struct sljit_jump *found;
3811 pcre_uchar oc, bit;
3812
3813 if (firstline)
3814 {
3815 SLJIT_ASSERT(common->first_line_end != 0);
3816 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
3817 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end);
3818 }
3819
3820 start = LABEL();
3821 quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3822 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3823
3824 oc = first_char;
3825 if (caseless)
3826 {
3827 oc = TABLE_GET(first_char, common->fcc, first_char);
3828 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
3829 if (first_char > 127 && common->utf)
3830 oc = UCD_OTHERCASE(first_char);
3831 #endif
3832 }
3833 if (first_char == oc)
3834 found = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, first_char);
3835 else
3836 {
3837 bit = first_char ^ oc;
3838 if (is_powerof2(bit))
3839 {
3840 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, bit);
3841 found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, first_char | bit);
3842 }
3843 else
3844 {
3845 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, first_char);
3846 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
3847 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);
3848 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
3849 found = JUMP(SLJIT_NOT_ZERO);
3850 }
3851 }
3852
3853 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3854 JUMPTO(SLJIT_JUMP, start);
3855 JUMPHERE(found);
3856 JUMPHERE(quit);
3857
3858 if (firstline)
3859 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3860 }
3861
3862 static SLJIT_INLINE void fast_forward_newline(compiler_common *common, BOOL firstline)
3863 {
3864 DEFINE_COMPILER;
3865 struct sljit_label *loop;
3866 struct sljit_jump *lastchar;
3867 struct sljit_jump *firstchar;
3868 struct sljit_jump *quit;
3869 struct sljit_jump *foundcr = NULL;
3870 struct sljit_jump *notfoundnl;
3871 jump_list *newline = NULL;
3872
3873 if (firstline)
3874 {
3875 SLJIT_ASSERT(common->first_line_end != 0);
3876 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
3877 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end);
3878 }
3879
3880 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3881 {
3882 lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3883 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3884 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
3885 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
3886 firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
3887
3888 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2));
3889 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0);
3890 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_GREATER_EQUAL);
3891 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3892 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
3893 #endif
3894 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3895
3896 loop = LABEL();
3897 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3898 quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3899 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
3900 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
3901 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop);
3902 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop);
3903
3904 JUMPHERE(quit);
3905 JUMPHERE(firstchar);
3906 JUMPHERE(lastchar);
3907
3908 if (firstline)
3909 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3910 return;
3911 }
3912
3913 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3914 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
3915 firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
3916 skip_char_back(common);
3917
3918 loop = LABEL();
3919 common->ff_newline_shortcut = loop;
3920
3921 read_char_range(common, common->nlmin, common->nlmax, TRUE);
3922 lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3923 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
3924 foundcr = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
3925 check_newlinechar(common, common->nltype, &newline, FALSE);
3926 set_jumps(newline, loop);
3927
3928 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
3929 {
3930 quit = JUMP(SLJIT_JUMP);
3931 JUMPHERE(foundcr);
3932 notfoundnl = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3933 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3934 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
3935 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
3936 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3937 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
3938 #endif
3939 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3940 JUMPHERE(notfoundnl);
3941 JUMPHERE(quit);
3942 }
3943 JUMPHERE(lastchar);
3944 JUMPHERE(firstchar);
3945
3946 if (firstline)
3947 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3948 }
3949
3950 static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks);
3951
3952 static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, pcre_uint8 *start_bits, BOOL firstline)
3953 {
3954 DEFINE_COMPILER;
3955 struct sljit_label *start;
3956 struct sljit_jump *quit;
3957 struct sljit_jump *found = NULL;
3958 jump_list *matches = NULL;
3959 #ifndef COMPILE_PCRE8
3960 struct sljit_jump *jump;
3961 #endif
3962
3963 if (firstline)
3964 {
3965 SLJIT_ASSERT(common->first_line_end != 0);
3966 OP1(SLJIT_MOV, RETURN_ADDR, 0, STR_END, 0);
3967 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end);
3968 }
3969
3970 start = LABEL();
3971 quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3972 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3973 #ifdef SUPPORT_UTF
3974 if (common->utf)
3975 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
3976 #endif
3977
3978 if (!check_class_ranges(common, start_bits, (start_bits[31] & 0x80) != 0, TRUE, &matches))
3979 {
3980 #ifndef COMPILE_PCRE8
3981 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 255);
3982 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 255);
3983 JUMPHERE(jump);
3984 #endif
3985 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
3986 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
3987 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)start_bits);
3988 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
3989 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
3990 found = JUMP(SLJIT_NOT_ZERO);
3991 }
3992
3993 #ifdef SUPPORT_UTF
3994 if (common->utf)
3995 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
3996 #endif
3997 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3998 #ifdef SUPPORT_UTF
3999 #if defined COMPILE_PCRE8
4000 if (common->utf)
4001 {
4002 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start);
4003 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4004 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4005 }
4006 #elif defined COMPILE_PCRE16
4007 if (common->utf)
4008 {
4009 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800, start);
4010 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
4011 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4012 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
4013 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4014 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4015 }
4016 #endif /* COMPILE_PCRE[8|16] */
4017 #endif /* SUPPORT_UTF */
4018 JUMPTO(SLJIT_JUMP, start);
4019 if (found != NULL)
4020 JUMPHERE(found);
4021 if (matches != NULL)
4022 set_jumps(matches, LABEL());
4023 JUMPHERE(quit);
4024
4025 if (firstline)
4026 OP1(SLJIT_MOV, STR_END, 0, RETURN_ADDR, 0);
4027 }
4028
4029 static SLJIT_INLINE struct sljit_jump *search_requested_char(compiler_common *common, pcre_uchar req_char, BOOL caseless, BOOL has_firstchar)
4030 {
4031 DEFINE_COMPILER;
4032 struct sljit_label *loop;
4033 struct sljit_jump *toolong;
4034 struct sljit_jump *alreadyfound;
4035 struct sljit_jump *found;
4036 struct sljit_jump *foundoc = NULL;
4037 struct sljit_jump *notfound;
4038 pcre_uint32 oc, bit;
4039
4040 SLJIT_ASSERT(common->req_char_ptr != 0);
4041 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr);
4042 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, REQ_BYTE_MAX);
4043 toolong = CMP(SLJIT_LESS, TMP1, 0, STR_END, 0);
4044 alreadyfound = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0);
4045
4046 if (has_firstchar)
4047 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4048 else
4049 OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);
4050
4051 loop = LABEL();
4052 notfound = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0);
4053
4054 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0);
4055 oc = req_char;
4056 if (caseless)
4057 {
4058 oc = TABLE_GET(req_char, common->fcc, req_char);
4059 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
4060 if (req_char > 127 && common->utf)
4061 oc = UCD_OTHERCASE(req_char);
4062 #endif
4063 }
4064 if (req_char == oc)
4065 found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
4066 else
4067 {
4068 bit = req_char ^ oc;
4069 if (is_powerof2(bit))
4070 {
4071 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);
4072 found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit);
4073 }
4074 else
4075 {
4076 found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
4077 foundoc = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, oc);
4078 }
4079 }
4080 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
4081 JUMPTO(SLJIT_JUMP, loop);
4082
4083 JUMPHERE(found);
4084 if (foundoc)
4085 JUMPHERE(foundoc);
4086 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr, TMP1, 0);
4087 JUMPHERE(alreadyfound);
4088 JUMPHERE(toolong);
4089 return notfound;
4090 }
4091
4092 static void do_revertframes(compiler_common *common)
4093 {
4094 DEFINE_COMPILER;
4095 struct sljit_jump *jump;
4096 struct sljit_label *mainloop;
4097
4098 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4099 OP1(SLJIT_MOV, TMP1, 0, STACK_TOP, 0);
4100 GET_LOCAL_BASE(TMP3, 0, 0);
4101
4102 /* Drop frames until we reach STACK_TOP. */
4103 mainloop = LABEL();
4104 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), 0);
4105 OP2(SLJIT_SUB | SLJIT_SET_S, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0);
4106 jump = JUMP(SLJIT_SIG_LESS_EQUAL);
4107
4108 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
4109 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw));
4110 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), SLJIT_MEM1(TMP1), 2 * sizeof(sljit_sw));
4111 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
4112 JUMPTO(SLJIT_JUMP, mainloop);
4113
4114 JUMPHERE(jump);
4115 jump = JUMP(SLJIT_SIG_LESS);
4116 /* End of dropping frames. */
4117 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4118
4119 JUMPHERE(jump);
4120 OP1(SLJIT_NEG, TMP2, 0, TMP2, 0);
4121 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
4122 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw));
4123 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_sw));
4124 JUMPTO(SLJIT_JUMP, mainloop);
4125 }
4126
4127 static void check_wordboundary(compiler_common *common)
4128 {
4129 DEFINE_COMPILER;
4130 struct sljit_jump *skipread;
4131 jump_list *skipread_list = NULL;
4132 #if !(defined COMPILE_PCRE8) || defined SUPPORT_UTF
4133 struct sljit_jump *jump;
4134 #endif
4135
4136 SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16);
4137
4138 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
4139 /* Get type of the previous char, and put it to LOCALS1. */
4140 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4141 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4142 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, SLJIT_IMM, 0);
4143 skipread = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
4144 skip_char_back(common);
4145 check_start_used_ptr(common);
4146 read_char(common);
4147
4148 /* Testing char type. */
4149 #ifdef SUPPORT_UCP
4150 if (common->use_ucp)
4151 {
4152 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
4153 jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
4154 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4155 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
4156 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
4157 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
4158 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
4159 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
4160 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
4161 JUMPHERE(jump);
4162 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP2, 0);
4163 }
4164 else
4165 #endif
4166 {
4167 #ifndef COMPILE_PCRE8
4168 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4169 #elif defined SUPPORT_UTF
4170 /* Here LOCALS1 has already been zeroed. */
4171 jump = NULL;
4172 if (common->utf)
4173 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4174 #endif /* COMPILE_PCRE8 */
4175 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);
4176 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */);
4177 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4178 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP1, 0);
4179 #ifndef COMPILE_PCRE8
4180 JUMPHERE(jump);
4181 #elif defined SUPPORT_UTF
4182 if (jump != NULL)
4183 JUMPHERE(jump);
4184 #endif /* COMPILE_PCRE8 */
4185 }
4186 JUMPHERE(skipread);
4187
4188 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
4189 check_str_end(common, &skipread_list);
4190 peek_char(common, READ_CHAR_MAX);
4191
4192 /* Testing char type. This is a code duplication. */
4193 #ifdef SUPPORT_UCP
4194 if (common->use_ucp)
4195 {
4196 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
4197 jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
4198 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4199 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
4200 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
4201 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
4202 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
4203 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
4204 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
4205 JUMPHERE(jump);
4206 }
4207 else
4208 #endif
4209 {
4210 #ifndef COMPILE_PCRE8
4211 /* TMP2 may be destroyed by peek_char. */
4212 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
4213 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4214 #elif defined SUPPORT_UTF
4215 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
4216 jump = NULL;
4217 if (common->utf)
4218 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4219 #endif
4220 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes);
4221 OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */);
4222 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
4223 #ifndef COMPILE_PCRE8
4224 JUMPHERE(jump);
4225 #elif defined SUPPORT_UTF
4226 if (jump != NULL)
4227 JUMPHERE(jump);
4228 #endif /* COMPILE_PCRE8 */
4229 }
4230 set_jumps(skipread_list, LABEL());
4231
4232 OP2(SLJIT_XOR | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
4233 sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
4234 }
4235
4236 static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
4237 {
4238 DEFINE_COMPILER;
4239 int ranges[MAX_RANGE_SIZE];
4240 pcre_uint8 bit, cbit, all;
4241 int i, byte, length = 0;
4242
4243 bit = bits[0] & 0x1;
4244 /* All bits will be zero or one (since bit is zero or one). */
4245 all = -bit;
4246
4247 for (i = 0; i < 256; )
4248 {
4249 byte = i >> 3;
4250 if ((i & 0x7) == 0 && bits[byte] == all)
4251 i += 8;
4252 else
4253 {
4254 cbit = (bits[byte] >> (i & 0x7)) & 0x1;
4255 if (cbit != bit)
4256 {
4257 if (length >= MAX_RANGE_SIZE)
4258 return FALSE;
4259 ranges[length] = i;
4260 length++;
4261 bit = cbit;
4262 all = -cbit;
4263 }
4264 i++;
4265 }
4266 }
4267
4268 if (((bit == 0) && nclass) || ((bit == 1) && !nclass))
4269 {
4270 if (length >= MAX_RANGE_SIZE)
4271 return FALSE;
4272 ranges[length] = 256;
4273 length++;
4274 }
4275
4276 if (length < 0 || length > 4)
4277 return FALSE;
4278
4279 bit = bits[0] & 0x1;
4280 if (invert) bit ^= 0x1;
4281
4282 /* No character is accepted. */
4283 if (length == 0 && bit == 0)
4284 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4285
4286 switch(length)
4287 {
4288 case 0:
4289 /* When bit != 0, all characters are accepted. */
4290 return TRUE;
4291
4292 case 1:
4293 add_jump(compiler, backtracks, CMP(bit == 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
4294 return TRUE;
4295
4296 case 2:
4297 if (ranges[0] + 1 != ranges[1])
4298 {
4299 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
4300 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
4301 }
4302 else
4303 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
4304 return TRUE;
4305
4306 case 3:
4307 if (bit != 0)
4308 {
4309 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
4310 if (ranges[0] + 1 != ranges[1])
4311 {
4312 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
4313 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
4314 }
4315 else
4316 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
4317 return TRUE;
4318 }
4319
4320 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[0]));
4321 if (ranges[1] + 1 != ranges[2])
4322 {
4323 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1]);
4324 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
4325 }
4326 else
4327 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1]));
4328 return TRUE;
4329
4330 case 4:
4331 if ((ranges[1] - ranges[0]) == (ranges[3] - ranges[2])
4332 && (ranges[0] | (ranges[2] - ranges[0])) == ranges[2]
4333 && is_powerof2(ranges[2] - ranges[0]))
4334 {
4335 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[0]);
4336 if (ranges[2] + 1 != ranges[3])
4337 {
4338 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);
4339 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
4340 }
4341 else
4342 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
4343 return TRUE;
4344 }
4345
4346 if (bit != 0)
4347 {
4348 i = 0;
4349 if (ranges[0] + 1 != ranges[1])
4350 {
4351 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
4352 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
4353 i = ranges[0];
4354 }
4355 else
4356 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
4357
4358 if (ranges[2] + 1 != ranges[3])
4359 {
4360 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - i);
4361 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
4362 }
4363 else
4364 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2] - i));
4365 return TRUE;
4366 }
4367
4368 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
4369 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[0]));
4370 if (ranges[1] + 1 != ranges[2])
4371 {
4372 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]);
4373 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
4374 }
4375 else
4376 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
4377 return TRUE;
4378
4379 default:
4380 SLJIT_ASSERT_STOP();
4381 return FALSE;
4382 }
4383 }
4384
4385 static void check_anynewline(compiler_common *common)
4386 {
4387 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
4388 DEFINE_COMPILER;
4389
4390 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4391
4392 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
4393 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
4394 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
4395 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
4396 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4397 #ifdef COMPILE_PCRE8
4398 if (common->utf)
4399 {
4400 #endif
4401 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4402 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
4403 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
4404 #ifdef COMPILE_PCRE8
4405 }
4406 #endif
4407 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
4408 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4409 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4410 }
4411
4412 static void check_hspace(compiler_common *common)
4413 {
4414 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
4415 DEFINE_COMPILER;
4416
4417 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4418
4419 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x09);
4420 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
4421 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
4422 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4423 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xa0);
4424 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4425 #ifdef COMPILE_PCRE8
4426 if (common->utf)
4427 {
4428 #endif
4429 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4430 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x1680);
4431 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4432 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e);
4433 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4434 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000);
4435 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000);
4436 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
4437 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000);
4438 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4439 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000);
4440 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4441 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000);
4442 #ifdef COMPILE_PCRE8
4443 }
4444 #endif
4445 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
4446 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4447
4448 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4449 }
4450
4451 static void check_vspace(compiler_common *common)
4452 {
4453 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
4454 DEFINE_COMPILER;
4455
4456 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4457
4458 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
4459 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
4460 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
4461 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
4462 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4463 #ifdef COMPILE_PCRE8
4464 if (common->utf)
4465 {
4466 #endif
4467 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4468 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
4469 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
4470 #ifdef COMPILE_PCRE8
4471 }
4472 #endif
4473 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
4474 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4475
4476 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4477 }
4478
4479 #define CHAR1 STR_END
4480 #define CHAR2 STACK_TOP
4481
4482 static void do_casefulcmp(compiler_common *common)
4483 {
4484 DEFINE_COMPILER;
4485 struct sljit_jump *jump;
4486 struct sljit_label *label;
4487
4488 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4489 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4490 OP1(SLJIT_MOV, TMP3, 0, CHAR1, 0);
4491 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, CHAR2, 0);
4492 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
4493 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4494
4495 label = LABEL();
4496 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
4497 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4498 jump = CMP(SLJIT_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
4499 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
4500 JUMPTO(SLJIT_NOT_ZERO, label);
4501
4502 JUMPHERE(jump);
4503 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4504 OP1(SLJIT_MOV, CHAR1, 0, TMP3, 0);
4505 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
4506 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4507 }
4508
4509 #define LCC_TABLE STACK_LIMIT
4510
4511 static void do_caselesscmp(compiler_common *common)
4512 {
4513 DEFINE_COMPILER;
4514 struct sljit_jump *jump;
4515 struct sljit_label *label;
4516
4517 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4518 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4519
4520 OP1(SLJIT_MOV, TMP3, 0, LCC_TABLE, 0);
4521 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, CHAR1, 0);
4522 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, CHAR2, 0);
4523 OP1(SLJIT_MOV, LCC_TABLE, 0, SLJIT_IMM, common->lcc);
4524 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
4525 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4526
4527 label = LABEL();
4528 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
4529 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4530 #ifndef COMPILE_PCRE8
4531 jump = CMP(SLJIT_GREATER, CHAR1, 0, SLJIT_IMM, 255);
4532 #endif
4533 OP1(SLJIT_MOV_UB, CHAR1, 0, SLJIT_MEM2(LCC_TABLE, CHAR1), 0);
4534 #ifndef COMPILE_PCRE8
4535 JUMPHERE(jump);
4536 jump = CMP(SLJIT_GREATER, CHAR2, 0, SLJIT_IMM, 255);
4537 #endif
4538 OP1(SLJIT_MOV_UB, CHAR2, 0, SLJIT_MEM2(LCC_TABLE, CHAR2), 0);
4539 #ifndef COMPILE_PCRE8
4540 JUMPHERE(jump);
4541 #endif
4542 jump = CMP(SLJIT_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
4543 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
4544 JUMPTO(SLJIT_NOT_ZERO, label);
4545
4546 JUMPHERE(jump);
4547 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4548 OP1(SLJIT_MOV, LCC_TABLE, 0, TMP3, 0);
4549 OP1(SLJIT_MOV, CHAR1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
4550 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
4551 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4552 }
4553
4554 #undef LCC_TABLE
4555 #undef CHAR1
4556 #undef CHAR2
4557
4558 #if defined SUPPORT_UTF && defined SUPPORT_UCP
4559
4560 static const pcre_uchar * SLJIT_CALL do_utf_caselesscmp(pcre_uchar *src1, jit_arguments *args, pcre_uchar *end1)
4561 {
4562 /* This function would be ineffective to do in JIT level. */
4563 pcre_uint32 c1, c2;
4564 const pcre_uchar *src2 = args->uchar_ptr;
4565 const pcre_uchar *end2 = args->end;
4566 const ucd_record *ur;
4567 const pcre_uint32 *pp;
4568
4569 while (src1 < end1)
4570 {
4571 if (src2 >= end2)
4572 return (pcre_uchar*)1;
4573 GETCHARINC(c1, src1);
4574 GETCHARINC(c2, src2);
4575 ur = GET_UCD(c2);
4576 if (c1 != c2 && c1 != c2 + ur->other_case)
4577 {
4578 pp = PRIV(ucd_caseless_sets) + ur->caseset;
4579 for (;;)
4580 {
4581 if (c1 < *pp) return NULL;
4582 if (c1 == *pp++) break;
4583 }
4584 }
4585 }
4586 return src2;
4587 }
4588
4589 #endif /* SUPPORT_UTF && SUPPORT_UCP */
4590
4591 static pcre_uchar *byte_sequence_compare(compiler_common *common, BOOL caseless, pcre_uchar *cc,
4592 compare_context *context, jump_list **backtracks)
4593 {
4594 DEFINE_COMPILER;
4595 unsigned int othercasebit = 0;
4596 pcre_uchar *othercasechar = NULL;
4597 #ifdef SUPPORT_UTF
4598 int utflength;
4599 #endif
4600
4601 if (caseless && char_has_othercase(common, cc))
4602 {
4603 othercasebit = char_get_othercase_bit(common, cc);
4604 SLJIT_ASSERT(othercasebit);
4605 /* Extracting bit difference info. */
4606 #if defined COMPILE_PCRE8
4607 othercasechar = cc + (othercasebit >> 8);
4608 othercasebit &= 0xff;
4609 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4610 /* Note that this code only handles characters in the BMP. If there
4611 ever are characters outside the BMP whose othercase differs in only one
4612 bit from itself (there currently are none), this code will need to be
4613 revised for COMPILE_PCRE32. */
4614 othercasechar = cc + (othercasebit >> 9);
4615 if ((othercasebit & 0x100) != 0)
4616 othercasebit = (othercasebit & 0xff) << 8;
4617 else
4618 othercasebit &= 0xff;
4619 #endif /* COMPILE_PCRE[8|16|32] */
4620 }
4621
4622 if (context->sourcereg == -1)
4623 {
4624 #if defined COMPILE_PCRE8
4625 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
4626 if (context->length >= 4)
4627 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4628 else if (context->length >= 2)
4629 OP1(SLJIT_MOV_UH, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4630 else
4631 #endif
4632 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4633 #elif defined COMPILE_PCRE16
4634 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
4635 if (context->length >= 4)
4636 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4637 else
4638 #endif
4639 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4640 #elif defined COMPILE_PCRE32
4641 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4642 #endif /* COMPILE_PCRE[8|16|32] */
4643 context->sourcereg = TMP2;
4644 }
4645
4646 #ifdef SUPPORT_UTF
4647 utflength = 1;
4648 if (common->utf && HAS_EXTRALEN(*cc))
4649 utflength += GET_EXTRALEN(*cc);
4650
4651 do
4652 {
4653 #endif
4654
4655 context->length -= IN_UCHARS(1);
4656 #if (defined SLJIT_UNALIGNED && SLJIT_UNALIGNED) && (defined COMPILE_PCRE8 || defined COMPILE_PCRE16)
4657
4658 /* Unaligned read is supported. */
4659 if (othercasebit != 0 && othercasechar == cc)
4660 {
4661 context->c.asuchars[context->ucharptr] = *cc | othercasebit;
4662 context->oc.asuchars[context->ucharptr] = othercasebit;
4663 }
4664 else
4665 {
4666 context->c.asuchars[context->ucharptr] = *cc;
4667 context->oc.asuchars[context->ucharptr] = 0;
4668 }
4669 context->ucharptr++;
4670
4671 #if defined COMPILE_PCRE8
4672 if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1))
4673 #else
4674 if (context->ucharptr >= 2 || context->length == 0)
4675 #endif
4676 {
4677 if (context->length >= 4)
4678 OP1(SLJIT_MOV_SI, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
4679 else if (context->length >= 2)
4680 OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
4681 #if defined COMPILE_PCRE8
4682 else if (context->length >= 1)
4683 OP1(SLJIT_MOV_UB, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
4684 #endif /* COMPILE_PCRE8 */
4685 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
4686
4687 switch(context->ucharptr)
4688 {
4689 case 4 / sizeof(pcre_uchar):
4690 if (context->oc.asint != 0)
4691 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint);
4692 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint));
4693 break;
4694
4695 case 2 / sizeof(pcre_uchar):
4696 if (context->oc.asushort != 0)
4697 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort);
4698 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort));
4699 break;
4700
4701 #ifdef COMPILE_PCRE8
4702 case 1:
4703 if (context->oc.asbyte != 0)
4704 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte);
4705 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte));
4706 break;
4707 #endif
4708
4709 default:
4710 SLJIT_ASSERT_STOP();
4711 break;
4712 }
4713 context->ucharptr = 0;
4714 }
4715
4716 #else
4717
4718 /* Unaligned read is unsupported or in 32 bit mode. */
4719 if (context->length >= 1)
4720 OP1(MOV_UCHAR, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
4721
4722 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
4723
4724 if (othercasebit != 0 && othercasechar == cc)
4725 {
4726 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit);
4727 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit));
4728 }
4729 else
4730 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc));
4731
4732 #endif
4733
4734 cc++;
4735 #ifdef SUPPORT_UTF
4736 utflength--;
4737 }
4738 while (utflength > 0);
4739 #endif
4740
4741 return cc;
4742 }
4743
4744 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
4745
4746 #define SET_TYPE_OFFSET(value) \
4747 if ((value) != typeoffset) \
4748 { \
4749 if ((value) < typeoffset) \
4750 OP2(SLJIT_ADD, typereg, 0, typereg, 0, SLJIT_IMM, typeoffset - (value)); \
4751 else \
4752 OP2(SLJIT_SUB, typereg, 0, typereg, 0, SLJIT_IMM, (value) - typeoffset); \
4753 } \
4754 typeoffset = (value);
4755
4756 #define SET_CHAR_OFFSET(value) \
4757 if ((value) != charoffset) \
4758 { \
4759 if ((value) < charoffset) \
4760 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(charoffset - (value))); \
4761 else \
4762 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)((value) - charoffset)); \
4763 } \
4764 charoffset = (value);
4765
4766 static void compile_xclass_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
4767 {
4768 DEFINE_COMPILER;
4769 jump_list *found = NULL;
4770 jump_list **list = (cc[0] & XCL_NOT) == 0 ? &found : backtracks;
4771 sljit_uw c, charoffset, max = 256, min = READ_CHAR_MAX;
4772 struct sljit_jump *jump = NULL;
4773 pcre_uchar *ccbegin;
4774 int compares, invertcmp, numberofcmps;
4775 #if defined SUPPORT_UTF && (defined COMPILE_PCRE8 || defined COMPILE_PCRE16)
4776 BOOL utf = common->utf;
4777 #endif
4778
4779 #ifdef SUPPORT_UCP
4780 BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE;
4781 BOOL charsaved = FALSE;
4782 int typereg = TMP1, scriptreg = TMP1;
4783 const pcre_uint32 *other_cases;
4784 sljit_uw typeoffset;
4785 #endif
4786
4787 /* Scanning the necessary info. */
4788 cc++;
4789 ccbegin = cc;
4790 compares = 0;
4791 if (cc[-1] & XCL_MAP)
4792 {
4793 min = 0;
4794 cc += 32 / sizeof(pcre_uchar);
4795 }
4796
4797 while (*cc != XCL_END)
4798 {
4799 compares++;
4800 if (*cc == XCL_SINGLE)
4801 {
4802 cc ++;
4803 GETCHARINCTEST(c, cc);
4804 if (c > max) max = c;
4805 if (c < min) min = c;
4806 #ifdef SUPPORT_UCP
4807 needschar = TRUE;
4808 #endif
4809 }
4810 else if (*cc == XCL_RANGE)
4811 {
4812 cc ++;
4813 GETCHARINCTEST(c, cc);
4814 if (c < min) min = c;
4815 GETCHARINCTEST(c, cc);
4816 if (c > max) max = c;
4817 #ifdef SUPPORT_UCP
4818 needschar = TRUE;
4819 #endif
4820 }
4821 #ifdef SUPPORT_UCP
4822 else
4823 {
4824 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
4825 cc++;
4826 if (*cc == PT_CLIST)
4827 {
4828 other_cases = PRIV(ucd_caseless_sets) + cc[1];
4829 while (*other_cases != NOTACHAR)
4830 {
4831 if (*other_cases > max) max = *other_cases;
4832 if (*other_cases < min) min = *other_cases;
4833 other_cases++;
4834 }
4835 }
4836 else
4837 {
4838 max = READ_CHAR_MAX;
4839 min = 0;
4840 }
4841
4842 switch(*cc)
4843 {
4844 case PT_ANY:
4845 break;
4846
4847 case PT_LAMP:
4848 case PT_GC:
4849 case PT_PC:
4850 case PT_ALNUM:
4851 needstype = TRUE;
4852 break;
4853
4854 case PT_SC:
4855 needsscript = TRUE;
4856 break;
4857
4858 case PT_SPACE:
4859 case PT_PXSPACE:
4860 case PT_WORD:
4861 case PT_PXGRAPH:
4862 case PT_PXPRINT:
4863 case PT_PXPUNCT:
4864 needstype = TRUE;
4865 needschar = TRUE;
4866 break;
4867
4868 case PT_CLIST:
4869 case PT_UCNC:
4870 needschar = TRUE;
4871 break;
4872
4873 default:
4874 SLJIT_ASSERT_STOP();
4875 break;
4876 }
4877 cc += 2;
4878 }
4879 #endif
4880 }
4881
4882 /* We are not necessary in utf mode even in 8 bit mode. */
4883 cc = ccbegin;
4884 detect_partial_match(common, backtracks);
4885 read_char_range(common, min, max, (cc[-1] & XCL_NOT) != 0);
4886
4887 if ((cc[-1] & XCL_HASPROP) == 0)
4888 {
4889 if ((cc[-1] & XCL_MAP) != 0)
4890 {
4891 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4892 if (!check_class_ranges(common, (const pcre_uint8 *)cc, (((const pcre_uint8 *)cc)[31] & 0x80) != 0, TRUE, &found))
4893 {
4894 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
4895 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
4896 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
4897 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4898 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4899 add_jump(compiler, &found, JUMP(SLJIT_NOT_ZERO));
4900 }
4901
4902 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4903 JUMPHERE(jump);
4904
4905 cc += 32 / sizeof(pcre_uchar);
4906 }
4907 else
4908 {
4909 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, min);
4910 add_jump(compiler, (cc[-1] & XCL_NOT) == 0 ? backtracks : &found, CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, max - min));
4911 }
4912 }
4913 else if ((cc[-1] & XCL_MAP) != 0)
4914 {
4915 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
4916 #ifdef SUPPORT_UCP
4917 charsaved = TRUE;
4918 #endif
4919 if (!check_class_ranges(common, (const pcre_uint8 *)cc, FALSE, TRUE, list))
4920 {
4921 #ifdef COMPILE_PCRE8
4922 SLJIT_ASSERT(common->utf);
4923 #endif
4924 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4925
4926 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
4927 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
4928 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
4929 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4930 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4931 add_jump(compiler, list, JUMP(SLJIT_NOT_ZERO));
4932
4933 JUMPHERE(jump);
4934 }
4935
4936 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
4937 cc += 32 / sizeof(pcre_uchar);
4938 }
4939
4940 #ifdef SUPPORT_UCP
4941 /* Simple register allocation. TMP1 is preferred if possible. */
4942 if (needstype || needsscript)
4943 {
4944 if (needschar && !charsaved)
4945 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
4946 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4947 if (needschar)
4948 {
4949 if (needstype)
4950 {
4951 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
4952 typereg = RETURN_ADDR;
4953 }
4954
4955 if (needsscript)
4956 scriptreg = TMP3;
4957 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
4958 }
4959 else if (needstype && needsscript)
4960 scriptreg = TMP3;
4961 /* In all other cases only one of them was specified, and that can goes to TMP1. */
4962
4963 if (needsscript)
4964 {
4965 if (scriptreg == TMP1)
4966 {
4967 OP1(SLJIT_MOV, scriptreg, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
4968 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM2(scriptreg, TMP2), 3);
4969 }
4970 else
4971 {
4972 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
4973 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
4974 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM1(TMP2), 0);
4975 }
4976 }
4977 }
4978 #endif
4979
4980 /* Generating code. */
4981 charoffset = 0;
4982 numberofcmps = 0;
4983 #ifdef SUPPORT_UCP
4984 typeoffset = 0;
4985 #endif
4986
4987 while (*cc != XCL_END)
4988 {
4989 compares--;
4990 invertcmp = (compares == 0 && list != backtracks);
4991 jump = NULL;
4992
4993 if (*cc == XCL_SINGLE)
4994 {
4995 cc ++;
4996 GETCHARINCTEST(c, cc);
4997
4998 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
4999 {
5000 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5001 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_EQUAL);
5002 numberofcmps++;
5003 }
5004 else if (numberofcmps > 0)
5005 {
5006 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5007 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5008 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5009 numberofcmps = 0;
5010 }
5011 else
5012 {
5013 jump = CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5014 numberofcmps = 0;
5015 }
5016 }
5017 else if (*cc == XCL_RANGE)
5018 {
5019 cc ++;
5020 GETCHARINCTEST(c, cc);
5021 SET_CHAR_OFFSET(c);
5022 GETCHARINCTEST(c, cc);
5023
5024 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
5025 {
5026 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5027 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_LESS_EQUAL);
5028 numberofcmps++;
5029 }
5030 else if (numberofcmps > 0)
5031 {
5032 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5033 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
5034 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5035 numberofcmps = 0;
5036 }
5037 else
5038 {
5039 jump = CMP(SLJIT_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5040 numberofcmps = 0;
5041 }
5042 }
5043 #ifdef SUPPORT_UCP
5044 else
5045 {
5046 if (*cc == XCL_NOTPROP)
5047 invertcmp ^= 0x1;
5048 cc++;
5049 switch(*cc)
5050 {
5051 case PT_ANY:
5052 if (list != backtracks)
5053 {
5054 if ((cc[-1] == XCL_NOTPROP && compares > 0) || (cc[-1] == XCL_PROP && compares == 0))
5055 continue;
5056 }
5057 else if (cc[-1] == XCL_NOTPROP)
5058 continue;
5059 jump = JUMP(SLJIT_JUMP);
5060 break;
5061
5062 case PT_LAMP:
5063 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - typeoffset);
5064 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5065 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ll - typeoffset);
5066 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5067 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lt - typeoffset);
5068 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5069 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5070 break;
5071
5072 case PT_GC:
5073 c = PRIV(ucp_typerange)[(int)cc[1] * 2];
5074 SET_TYPE_OFFSET(c);
5075 jump = CMP(SLJIT_LESS_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, PRIV(ucp_typerange)[(int)cc[1] * 2 + 1] - c);
5076 break;
5077
5078 case PT_PC:
5079 jump = CMP(SLJIT_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, (int)cc[1] - typeoffset);
5080 break;
5081
5082 case PT_SC:
5083 jump = CMP(SLJIT_EQUAL ^ invertcmp, scriptreg, 0, SLJIT_IMM, (int)cc[1]);
5084 break;
5085
5086 case PT_SPACE:
5087 case PT_PXSPACE:
5088 SET_CHAR_OFFSET(9);
5089 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd - 0x9);
5090 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
5091
5092 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x9);
5093 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5094
5095 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x9);
5096 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5097
5098 SET_TYPE_OFFSET(ucp_Zl);
5099 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl);
5100 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
5101 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5102 break;
5103
5104 case PT_WORD:
5105 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_UNDERSCORE - charoffset));
5106 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5107 /* Fall through. */
5108
5109 case PT_ALNUM:
5110 SET_TYPE_OFFSET(ucp_Ll);
5111 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
5112 OP_FLAGS((*cc == PT_ALNUM) ? SLJIT_MOV : SLJIT_OR, TMP2, 0, (*cc == PT_ALNUM) ? SLJIT_UNUSED : TMP2, 0, SLJIT_LESS_EQUAL);
5113 SET_TYPE_OFFSET(ucp_Nd);
5114 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_No - ucp_Nd);
5115 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
5116 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5117 break;
5118
5119 case PT_CLIST:
5120 other_cases = PRIV(ucd_caseless_sets) + cc[1];
5121
5122 /* At least three characters are required.
5123 Otherwise this case would be handled by the normal code path. */
5124 SLJIT_ASSERT(other_cases[0] != NOTACHAR && other_cases[1] != NOTACHAR && other_cases[2] != NOTACHAR);
5125 SLJIT_ASSERT(other_cases[0] < other_cases[1] && other_cases[1] < other_cases[2]);
5126
5127 /* Optimizing character pairs, if their difference is power of 2. */
5128 if (is_powerof2(other_cases[1] ^ other_cases[0]))
5129 {
5130 if (charoffset == 0)
5131 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
5132 else
5133 {
5134 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
5135 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
5136 }
5137 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[1]);
5138 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5139 other_cases += 2;
5140 }
5141 else if (is_powerof2(other_cases[2] ^ other_cases[1]))
5142 {
5143 if (charoffset == 0)
5144 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[2] ^ other_cases[1]);
5145 else
5146 {
5147 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
5148 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
5149 }
5150 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[2]);
5151 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5152
5153 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(other_cases[0] - charoffset));
5154 OP_FLAGS(SLJIT_OR | ((other_cases[3] == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5155
5156 other_cases += 3;
5157 }
5158 else
5159 {
5160 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
5161 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5162 }
5163
5164 while (*other_cases != NOTACHAR)
5165 {
5166 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
5167 OP_FLAGS(SLJIT_OR | ((*other_cases == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5168 }
5169 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5170 break;
5171
5172 case PT_UCNC:
5173 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_DOLLAR_SIGN - charoffset));
5174 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5175 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_COMMERCIAL_AT - charoffset));
5176 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5177 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_GRAVE_ACCENT - charoffset));
5178 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5179
5180 SET_CHAR_OFFSET(0xa0);
5181 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(0xd7ff - charoffset));
5182 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
5183 SET_CHAR_OFFSET(0);
5184 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xe000 - 0);
5185 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_GREATER_EQUAL);
5186 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5187 break;
5188
5189 case PT_PXGRAPH:
5190 /* C and Z groups are the farthest two groups. */
5191 SET_TYPE_OFFSET(ucp_Ll);
5192 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
5193 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_GREATER);
5194
5195 jump = CMP(SLJIT_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
5196
5197 /* In case of ucp_Cf, we overwrite the result. */
5198 SET_CHAR_OFFSET(0x2066);
5199 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
5200 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
5201
5202 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
5203 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5204
5205 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x2066);
5206 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5207
5208 JUMPHERE(jump);
5209 jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
5210 break;
5211
5212 case PT_PXPRINT:
5213 /* C and Z groups are the farthest two groups. */
5214 SET_TYPE_OFFSET(ucp_Ll);
5215 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
5216 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_GREATER);
5217
5218 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Ll);
5219 OP_FLAGS(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_NOT_EQUAL);
5220
5221 jump = CMP(SLJIT_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
5222
5223 /* In case of ucp_Cf, we overwrite the result. */
5224 SET_CHAR_OFFSET(0x2066);
5225 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
5226 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
5227
5228 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
5229 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5230
5231 JUMPHERE(jump);
5232 jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
5233 break;
5234
5235 case PT_PXPUNCT:
5236 SET_TYPE_OFFSET(ucp_Sc);
5237 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Sc);
5238 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
5239
5240 SET_CHAR_OFFSET(0);
5241 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xff);
5242 OP_FLAGS(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
5243
5244 SET_TYPE_OFFSET(ucp_Pc);
5245 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ps - ucp_Pc);
5246 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
5247 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5248 break;
5249 }
5250 cc += 2;
5251 }
5252 #endif
5253
5254 if (jump != NULL)
5255 add_jump(compiler, compares > 0 ? list : backtracks, jump);
5256 }
5257
5258 if (found != NULL)
5259 set_jumps(found, LABEL());
5260 }
5261
5262 #undef SET_TYPE_OFFSET
5263 #undef SET_CHAR_OFFSET
5264
5265 #endif
5266
5267 static pcre_uchar *compile_char1_matchingpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **backtracks)
5268 {
5269 DEFINE_COMPILER;
5270 int length;
5271 unsigned int c, oc, bit;
5272 compare_context context;
5273 struct sljit_jump *jump[4];
5274 jump_list *end_list;
5275 #ifdef SUPPORT_UTF
5276 struct sljit_label *label;
5277 #ifdef SUPPORT_UCP
5278 pcre_uchar propdata[5];
5279 #endif
5280 #endif /* SUPPORT_UTF */
5281
5282 switch(type)
5283 {
5284 case OP_SOD:
5285 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5286 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
5287 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
5288 return cc;
5289
5290 case OP_SOM:
5291 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5292 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
5293 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
5294 return cc;
5295
5296 case OP_NOT_WORD_BOUNDARY:
5297 case OP_WORD_BOUNDARY:
5298 add_jump(compiler, &common->wordboundary, JUMP(SLJIT_FAST_CALL));
5299 add_jump(compiler, backtracks, JUMP(type == OP_NOT_WORD_BOUNDARY ? SLJIT_NOT_ZERO : SLJIT_ZERO));
5300 return cc;
5301
5302 case OP_NOT_DIGIT:
5303 case OP_DIGIT:
5304 /* Digits are usually 0-9, so it is worth to optimize them. */
5305 detect_partial_match(common, backtracks);
5306 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5307 if (common->utf && is_char7_bitset((const pcre_uint8*)common->ctypes - cbit_length + cbit_digit, FALSE))
5308 read_char7_type(common, type == OP_NOT_DIGIT);
5309 else
5310 #endif
5311 read_char8_type(common, type == OP_NOT_DIGIT);
5312 /* Flip the starting bit in the negative case. */
5313 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit);
5314 add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_ZERO : SLJIT_NOT_ZERO));
5315 return cc;
5316
5317 case OP_NOT_WHITESPACE:
5318 case OP_WHITESPACE:
5319 detect_partial_match(common, backtracks);
5320 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5321 if (common->utf && is_char7_bitset((const pcre_uint8*)common->ctypes - cbit_length + cbit_space, FALSE))
5322 read_char7_type(common, type == OP_NOT_WHITESPACE);
5323 else
5324 #endif
5325 read_char8_type(common, type == OP_NOT_WHITESPACE);
5326 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space);
5327 add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_ZERO : SLJIT_NOT_ZERO));
5328 return cc;
5329
5330 case OP_NOT_WORDCHAR:
5331 case OP_WORDCHAR:
5332 detect_partial_match(common, backtracks);
5333 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5334 if (common->utf && is_char7_bitset((const pcre_uint8*)common->ctypes - cbit_length + cbit_word, FALSE))
5335 read_char7_type(common, type == OP_NOT_WORDCHAR);
5336 else
5337 #endif
5338 read_char8_type(common, type == OP_NOT_WORDCHAR);
5339 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word);
5340 add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_ZERO : SLJIT_NOT_ZERO));
5341 return cc;
5342
5343 case OP_ANY:
5344 detect_partial_match(common, backtracks);
5345 read_char_range(common, common->nlmin, common->nlmax, TRUE);
5346 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
5347 {
5348 jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
5349 end_list = NULL;
5350 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
5351 add_jump(compiler, &end_list, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
5352 else
5353 check_str_end(common, &end_list);
5354
5355 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5356 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff));
5357 set_jumps(end_list, LABEL());
5358 JUMPHERE(jump[0]);
5359 }
5360 else
5361 check_newlinechar(common, common->nltype, backtracks, TRUE);
5362 return cc;
5363
5364 case OP_ALLANY:
5365 detect_partial_match(common, backtracks);
5366 #ifdef SUPPORT_UTF
5367 if (common->utf)
5368 {
5369 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5370 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5371 #if defined COMPILE_PCRE8 || defined COMPILE_PCRE16
5372 #if defined COMPILE_PCRE8
5373 jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
5374 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
5375 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5376 #elif defined COMPILE_PCRE16
5377 jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
5378 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
5379 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
5380 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5381 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
5382 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5383 #endif
5384 JUMPHERE(jump[0]);
5385 #endif /* COMPILE_PCRE[8|16] */
5386 return cc;
5387 }
5388 #endif
5389 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5390 return cc;
5391
5392 case OP_ANYBYTE:
5393 detect_partial_match(common, backtracks);
5394 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5395 return cc;
5396
5397 #ifdef SUPPORT_UTF
5398 #ifdef SUPPORT_UCP
5399 case OP_NOTPROP:
5400 case OP_PROP:
5401 propdata[0] = XCL_HASPROP;
5402 propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP;
5403 propdata[2] = cc[0];
5404 propdata[3] = cc[1];
5405 propdata[4] = XCL_END;
5406 compile_xclass_matchingpath(common, propdata, backtracks);
5407 return cc + 2;
5408 #endif
5409 #endif
5410
5411 case OP_ANYNL:
5412 detect_partial_match(common, backtracks);
5413 read_char_range(common, common->bsr_nlmin, common->bsr_nlmax, FALSE);
5414 jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
5415 /* We don't need to handle soft partial matching case. */
5416 end_list = NULL;
5417 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
5418 add_jump(compiler, &end_list, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
5419 else
5420 check_str_end(common, &end_list);
5421 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5422 jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
5423 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5424 jump[2] = JUMP(SLJIT_JUMP);
5425 JUMPHERE(jump[0]);
5426 check_newlinechar(common, common->bsr_nltype, backtracks, FALSE);
5427 set_jumps(end_list, LABEL());
5428 JUMPHERE(jump[1]);
5429 JUMPHERE(jump[2]);
5430 return cc;
5431
5432 case OP_NOT_HSPACE:
5433 case OP_HSPACE:
5434 detect_partial_match(common, backtracks);
5435 read_char_range(common, 0x9, 0x3000, type == OP_NOT_HSPACE);
5436 add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));
5437 add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO));
5438 return cc;
5439
5440 case OP_NOT_VSPACE:
5441 case OP_VSPACE:
5442 detect_partial_match(common, backtracks);
5443 read_char_range(common, 0xa, 0x2029, type == OP_NOT_VSPACE);
5444 add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));
5445 add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO));
5446 return cc;
5447
5448 #ifdef SUPPORT_UCP
5449 case OP_EXTUNI:
5450 detect_partial_match(common, backtracks);
5451 read_char(common);
5452 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
5453 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
5454 /* Optimize register allocation: use a real register. */
5455 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STACK_TOP, 0);
5456 OP1(SLJIT_MOV_UB, STACK_TOP, 0, SLJIT_MEM2(TMP1, TMP2), 3);
5457
5458 label = LABEL();
5459 jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5460 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
5461 read_char(common);
5462 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
5463 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
5464 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM2(TMP1, TMP2), 3);
5465
5466 OP2(SLJIT_SHL, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2);
5467 OP1(SLJIT_MOV_UI, TMP1, 0, SLJIT_MEM1(STACK_TOP), (sljit_sw)PRIV(ucp_gbtable));
5468 OP1(SLJIT_MOV, STACK_TOP, 0, TMP2, 0);
5469 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
5470 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
5471 JUMPTO(SLJIT_NOT_ZERO, label);
5472
5473 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
5474 JUMPHERE(jump[0]);
5475 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
5476
5477 if (common->mode == JIT_PARTIAL_HARD_COMPILE)
5478 {
5479 jump[0] = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
5480 /* Since we successfully read a char above, partial matching must occure. */
5481 check_partial(common, TRUE);
5482 JUMPHERE(jump[0]);
5483 }
5484 return cc;
5485 #endif
5486
5487 case OP_EODN:
5488 /* Requires rather complex checks. */
5489 jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5490 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
5491 {
5492 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
5493 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5494 if (common->mode == JIT_COMPILE)
5495 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0));
5496 else
5497 {
5498 jump[1] = CMP(SLJIT_EQUAL, TMP2, 0, STR_END, 0);
5499 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
5500 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS);
5501 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
5502 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_NOT_EQUAL);
5503 add_jump(compiler, backtracks, JUMP(SLJIT_NOT_EQUAL));
5504 check_partial(common, TRUE);
5505 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5506 JUMPHERE(jump[1]);
5507 }
5508 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
5509 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
5510 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
5511 }
5512 else if (common->nltype == NLTYPE_FIXED)
5513 {
5514 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5515 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5516 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0));
5517 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
5518 }
5519 else
5520 {
5521 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5522 jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
5523 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
5524 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
5525 jump[2] = JUMP(SLJIT_GREATER);
5526 add_jump(compiler, backtracks, JUMP(SLJIT_LESS));
5527 /* Equal. */
5528 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
5529 jump[3] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
5530 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5531
5532 JUMPHERE(jump[1]);
5533 if (common->nltype == NLTYPE_ANYCRLF)
5534 {
5535 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5536 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, STR_END, 0));
5537 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
5538 }
5539 else
5540 {
5541 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, STR_PTR, 0);
5542 read_char_range(common, common->nlmin, common->nlmax, TRUE);
5543 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
5544 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
5545 add_jump(compiler, backtracks, JUMP(SLJIT_ZERO));
5546 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
5547 }
5548 JUMPHERE(jump[2]);
5549 JUMPHERE(jump[3]);
5550 }
5551 JUMPHERE(jump[0]);
5552 check_partial(common, FALSE);
5553 return cc;
5554
5555 case OP_EOD:
5556 add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
5557 check_partial(common, FALSE);
5558 return cc;
5559
5560 case OP_CIRC:
5561 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
5562 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
5563 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0));
5564 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
5565 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5566 return cc;
5567
5568 case OP_CIRCM:
5569 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
5570 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
5571 jump[1] = CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0);
5572 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
5573 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5574 jump[0] = JUMP(SLJIT_JUMP);
5575 JUMPHERE(jump[1]);
5576
5577 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
5578 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
5579 {
5580 OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
5581 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, TMP1, 0));
5582 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
5583 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
5584 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
5585 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
5586 }
5587 else
5588 {
5589 skip_char_back(common);
5590 read_char_range(common, common->nlmin, common->nlmax, TRUE);
5591 check_newlinechar(common, common->nltype, backtracks, FALSE);
5592 }
5593 JUMPHERE(jump[0]);
5594 return cc;
5595
5596 case OP_DOLL:
5597 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
5598 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
5599 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5600
5601 if (!common->endonly)
5602 compile_char1_matchingpath(common, OP_EODN, cc, backtracks);
5603 else
5604 {
5605 add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
5606 check_partial(common, FALSE);
5607 }
5608 return cc;
5609
5610 case OP_DOLLM:
5611 jump[1] = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
5612 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
5613 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
5614 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5615 check_partial(common, FALSE);
5616 jump[0] = JUMP(SLJIT_JUMP);
5617 JUMPHERE(jump[1]);
5618
5619 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
5620 {
5621 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
5622 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5623 if (common->mode == JIT_COMPILE)
5624 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, TMP2, 0, STR_END, 0));
5625 else
5626 {
5627 jump[1] = CMP(SLJIT_LESS_EQUAL, TMP2, 0, STR_END, 0);
5628 /* STR_PTR = STR_END - IN_UCHARS(1) */
5629 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
5630 check_partial(common, TRUE);
5631 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5632 JUMPHERE(jump[1]);
5633 }
5634
5635 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
5636 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
5637 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
5638 }
5639 else
5640 {
5641 peek_char(common, common->nlmax);
5642 check_newlinechar(common, common->nltype, backtracks, FALSE);
5643 }
5644 JUMPHERE(jump[0]);
5645 return cc;
5646
5647 case OP_CHAR:
5648 case OP_CHARI:
5649 length = 1;
5650 #ifdef SUPPORT_UTF
5651 if (common->utf && HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc);
5652 #endif
5653 if (common->mode == JIT_COMPILE && (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0))
5654 {
5655 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
5656 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
5657
5658 context.length = IN_UCHARS(length);
5659 context.sourcereg = -1;
5660 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
5661 context.ucharptr = 0;
5662 #endif
5663 return byte_sequence_compare(common, type == OP_CHARI, cc, &context, backtracks);
5664 }
5665
5666 detect_partial_match(common, backtracks);
5667 #ifdef SUPPORT_UTF
5668 if (common->utf)
5669 {
5670 GETCHAR(c, cc);
5671 }
5672 else
5673 #endif
5674 c = *cc;
5675
5676 if (type == OP_CHAR || !char_has_othercase(common, cc))
5677 {
5678 read_char_range(common, c, c, FALSE);
5679 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c));
5680 return cc + length;
5681 }
5682 oc = char_othercase(common, c);
5683 read_char_range(common, c < oc ? c : oc, c > oc ? c : oc, FALSE);
5684 bit = c ^ oc;
5685 if (is_powerof2(bit))
5686 {
5687 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
5688 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
5689 return cc + length;
5690 }
5691 jump[0] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c);
5692 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, oc));
5693 JUMPHERE(jump[0]);
5694 return cc + length;
5695
5696 case OP_NOT:
5697 case OP_NOTI:
5698 detect_partial_match(common, backtracks);
5699 length = 1;
5700 #ifdef SUPPORT_UTF
5701 if (common->utf)
5702 {
5703 #ifdef COMPILE_PCRE8
5704 c = *cc;
5705 if (c < 128)
5706 {
5707 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5708 if (type == OP_NOT || !char_has_othercase(common, cc))
5709 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
5710 else
5711 {
5712 /* Since UTF8 code page is fixed, we know that c is in [a-z] or [A-Z] range. */
5713 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x20);
5714 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20));
5715 }
5716 /* Skip the variable-length character. */
5717 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5718 jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
5719 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
5720 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5721 JUMPHERE(jump[0]);
5722 return cc + 1;
5723 }
5724 else
5725 #endif /* COMPILE_PCRE8 */
5726 {
5727 GETCHARLEN(c, cc, length);
5728 }
5729 }
5730 else
5731 #endif /* SUPPORT_UTF */
5732 c = *cc;
5733
5734 if (type == OP_NOT || !char_has_othercase(common, cc))
5735 {
5736 read_char_range(common, c, c, TRUE);
5737 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
5738 }
5739 else
5740 {
5741 oc = char_othercase(common, c);
5742 read_char_range(common, c < oc ? c : oc, c > oc ? c : oc, TRUE);
5743 bit = c ^ oc;
5744 if (is_powerof2(bit))
5745 {
5746 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
5747 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
5748 }
5749 else
5750 {
5751 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
5752 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, oc));
5753 }
5754 }
5755 return cc + length;
5756
5757 case OP_CLASS:
5758 case OP_NCLASS:
5759 detect_partial_match(common, backtracks);
5760
5761 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5762 bit = (common->utf && is_char7_bitset((const pcre_uint8 *)cc, type == OP_NCLASS)) ? 127 : 255;
5763 read_char_range(common, 0, bit, type == OP_NCLASS);
5764 #else
5765 read_char_range(common, 0, 255, type == OP_NCLASS);
5766 #endif
5767
5768 if (check_class_ranges(common, (const pcre_uint8 *)cc, type == OP_NCLASS, FALSE, backtracks))
5769 return cc + 32 / sizeof(pcre_uchar);
5770
5771 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5772 jump[0] = NULL;
5773 if (common->utf)
5774 {
5775 jump[0] = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, bit);
5776 if (type == OP_CLASS)
5777 {
5778 add_jump(compiler, backtracks, jump[0]);
5779 jump[0] = NULL;
5780 }
5781 }
5782 #elif !defined COMPILE_PCRE8
5783 jump[0] = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
5784 if (type == OP_CLASS)
5785 {
5786 add_jump(compiler, backtracks, jump[0]);
5787 jump[0] = NULL;
5788 }
5789 #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
5790
5791 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
5792 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
5793 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
5794 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
5795 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
5796 add_jump(compiler, backtracks, JUMP(SLJIT_ZERO));
5797
5798 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
5799 if (jump[0] != NULL)
5800 JUMPHERE(jump[0]);
5801 #endif
5802
5803 return cc + 32 / sizeof(pcre_uchar);
5804
5805 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5806 case OP_XCLASS:
5807 compile_xclass_matchingpath(common, cc + LINK_SIZE, backtracks);
5808 return cc + GET(cc, 0) - 1;
5809 #endif
5810
5811 case OP_REVERSE:
5812 length = GET(cc, 0);
5813 if (length == 0)
5814 return cc + LINK_SIZE;
5815 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5816 #ifdef SUPPORT_UTF
5817 if (common->utf)
5818 {
5819 OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
5820 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, length);
5821 label = LABEL();
5822 add_jump(compiler, backtracks, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP3, 0));
5823 skip_char_back(common);
5824 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
5825 JUMPTO(SLJIT_NOT_ZERO, label);
5826 }
5827 else
5828 #endif
5829 {
5830 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
5831 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
5832 add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, TMP1, 0));
5833 }
5834 check_start_used_ptr(common);
5835 return cc + LINK_SIZE;
5836 }
5837 SLJIT_ASSERT_STOP();
5838 return cc;
5839 }
5840
5841 static SLJIT_INLINE pcre_uchar *compile_charn_matchingpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, jump_list **backtracks)
5842 {
5843 /* This function consumes at least one input character. */
5844 /* To decrease the number of length checks, we try to concatenate the fixed length character sequences. */
5845 DEFINE_COMPILER;
5846 pcre_uchar *ccbegin = cc;
5847 compare_context context;
5848 int size;
5849
5850 context.length = 0;
5851 do
5852 {
5853 if (cc >= ccend)
5854 break;
5855
5856 if (*cc == OP_CHAR)
5857 {
5858 size = 1;
5859 #ifdef SUPPORT_UTF
5860 if (common->utf && HAS_EXTRALEN(cc[1]))
5861 size += GET_EXTRALEN(cc[1]);
5862 #endif
5863 }
5864 else if (*cc == OP_CHARI)
5865 {
5866 size = 1;
5867 #ifdef SUPPORT_UTF
5868 if (common->utf)
5869 {
5870 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
5871 size = 0;
5872 else if (HAS_EXTRALEN(cc[1]))
5873 size += GET_EXTRALEN(cc[1]);
5874 }
5875 else
5876 #endif
5877 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
5878 size = 0;
5879 }
5880 else
5881 size = 0;
5882
5883 cc += 1 + size;
5884 context.length += IN_UCHARS(size);
5885 }
5886 while (size > 0 && context.length <= 128);
5887
5888 cc = ccbegin;
5889 if (context.length > 0)
5890 {
5891 /* We have a fixed-length byte sequence. */
5892 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, context.length);
5893 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
5894
5895 context.sourcereg = -1;
5896 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
5897 context.ucharptr = 0;
5898 #endif
5899 do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, backtracks); while (context.length > 0);
5900 return cc;
5901 }
5902
5903 /* A non-fixed length character will be checked if length == 0. */
5904 return compile_char1_matchingpath(common, *cc, cc + 1, backtracks);
5905 }
5906
5907 /* Forward definitions. */
5908 static void compile_matchingpath(compiler_common *, pcre_uchar *, pcre_uchar *, backtrack_common *);
5909 static void compile_backtrackingpath(compiler_common *, struct backtrack_common *);
5910
5911 #define PUSH_BACKTRACK(size, ccstart, error) \
5912 do \
5913 { \
5914 backtrack = sljit_alloc_memory(compiler, (size)); \
5915 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
5916 return error; \
5917 memset(backtrack, 0, size); \
5918 backtrack->prev = parent->top; \
5919 backtrack->cc = (ccstart); \
5920 parent->top = backtrack; \
5921 } \
5922 while (0)
5923
5924 #define PUSH_BACKTRACK_NOVALUE(size, ccstart) \
5925 do \
5926 { \
5927 backtrack = sljit_alloc_memory(compiler, (size)); \
5928 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
5929 return; \
5930 memset(backtrack, 0, size); \
5931 backtrack->prev = parent->top; \
5932 backtrack->cc = (ccstart); \
5933 parent->top = backtrack; \
5934 } \
5935 while (0)
5936
5937 #define BACKTRACK_AS(type) ((type *)backtrack)
5938
5939 static void compile_dnref_search(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
5940 {
5941 /* The OVECTOR offset goes to TMP2. */
5942 DEFINE_COMPILER;
5943 int count = GET2(cc, 1 + IMM2_SIZE);
5944 pcre_uchar *slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
5945 unsigned int offset;
5946 jump_list *found = NULL;
5947
5948 SLJIT_ASSERT(*cc == OP_DNREF || *cc == OP_DNREFI);
5949
5950 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
5951
5952 count--;
5953 while (count-- > 0)
5954 {
5955 offset = GET2(slot, 0) << 1;
5956 GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
5957 add_jump(compiler, &found, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0));
5958 slot += common->name_entry_size;
5959 }
5960
5961 offset = GET2(slot, 0) << 1;
5962 GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
5963 if (backtracks != NULL && !common->jscript_compat)
5964 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0));
5965
5966 set_jumps(found, LABEL());
5967 }
5968
5969 static void compile_ref_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks, BOOL withchecks, BOOL emptyfail)
5970 {
5971 DEFINE_COMPILER;
5972 BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
5973 int offset = 0;
5974 struct sljit_jump *jump = NULL;
5975 struct sljit_jump *partial;
5976 struct sljit_jump *nopartial;
5977
5978 if (ref)
5979 {
5980 offset = GET2(cc, 1) << 1;
5981 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
5982 /* OVECTOR(1) contains the "string begin - 1" constant. */
5983 if (withchecks && !common->jscript_compat)
5984 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
5985 }
5986 else
5987 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
5988
5989 #if defined SUPPORT_UTF && defined SUPPORT_UCP
5990 if (common->utf && *cc == OP_REFI)
5991 {
5992 SLJIT_ASSERT(TMP1 == SLJIT_R0 && STACK_TOP == SLJIT_R1 && TMP2 == SLJIT_R2);
5993 if (ref)
5994 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
5995 else
5996 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
5997
5998 if (withchecks)
5999 jump = CMP(SLJIT_EQUAL, TMP1, 0, TMP2, 0);
6000
6001 /* Needed to save important temporary registers. */
6002 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STACK_TOP, 0);
6003 OP1(SLJIT_MOV, SLJIT_R1, 0, ARGUMENTS, 0);
6004 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, uchar_ptr), STR_PTR, 0);
6005 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_utf_caselesscmp));
6006 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6007 if (common->mode == JIT_COMPILE)
6008 add_jump(compiler, backtracks, CMP(SLJIT_LESS_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1));
6009 else
6010 {
6011 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
6012 nopartial = CMP(SLJIT_NOT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
6013 check_partial(common, FALSE);
6014 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
6015 JUMPHERE(nopartial);
6016 }
6017 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
6018 }
6019 else
6020 #endif /* SUPPORT_UTF && SUPPORT_UCP */
6021 {
6022 if (ref)
6023 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP1, 0);
6024 else
6025 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
6026
6027 if (withchecks)
6028 jump = JUMP(SLJIT_ZERO);
6029
6030 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
6031 partial = CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0);
6032 if (common->mode == JIT_COMPILE)
6033 add_jump(compiler, backtracks, partial);
6034
6035 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
6036 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
6037
6038 if (common->mode != JIT_COMPILE)
6039 {
6040 nopartial = JUMP(SLJIT_JUMP);
6041 JUMPHERE(partial);
6042 /* TMP2 -= STR_END - STR_PTR */
6043 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, STR_PTR, 0);
6044 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, STR_END, 0);
6045 partial = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0);
6046 OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
6047 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
6048 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
6049 JUMPHERE(partial);
6050 check_partial(common, FALSE);
6051 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
6052 JUMPHERE(nopartial);
6053 }
6054 }
6055
6056 if (jump != NULL)
6057 {
6058 if (emptyfail)
6059 add_jump(compiler, backtracks, jump);
6060 else
6061 JUMPHERE(jump);
6062 }
6063 }
6064
6065 static SLJIT_INLINE pcre_uchar *compile_ref_iterator_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6066 {
6067 DEFINE_COMPILER;
6068 BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
6069 backtrack_common *backtrack;
6070 pcre_uchar type;
6071 int offset = 0;
6072 struct sljit_label *label;
6073 struct sljit_jump *zerolength;
6074 struct sljit_jump *jump = NULL;
6075 pcre_uchar *ccbegin = cc;
6076 int min = 0, max = 0;
6077 BOOL minimize;
6078
6079 PUSH_BACKTRACK(sizeof(iterator_backtrack), cc, NULL);
6080
6081 if (ref)
6082 offset = GET2(cc, 1) << 1;
6083 else
6084 cc += IMM2_SIZE;
6085 type = cc[1 + IMM2_SIZE];
6086
6087 SLJIT_COMPILE_ASSERT((OP_CRSTAR & 0x1) == 0, crstar_opcode_must_be_even);
6088 minimize = (type & 0x1) != 0;
6089 switch(type)
6090 {
6091 case OP_CRSTAR:
6092 case OP_CRMINSTAR:
6093 min = 0;
6094 max = 0;
6095 cc += 1 + IMM2_SIZE + 1;
6096 break;
6097 case OP_CRPLUS:
6098 case OP_CRMINPLUS:
6099 min = 1;
6100 max = 0;
6101 cc += 1 + IMM2_SIZE + 1;
6102 break;
6103 case OP_CRQUERY:
6104 case OP_CRMINQUERY:
6105 min = 0;
6106 max = 1;
6107 cc += 1 + IMM2_SIZE + 1;
6108 break;
6109 case OP_CRRANGE:
6110 case OP_CRMINRANGE:
6111 min = GET2(cc, 1 + IMM2_SIZE + 1);
6112 max = GET2(cc, 1 + IMM2_SIZE + 1 + IMM2_SIZE);
6113 cc += 1 + IMM2_SIZE + 1 + 2 * IMM2_SIZE;
6114 break;
6115 default:
6116 SLJIT_ASSERT_STOP();
6117 break;
6118 }
6119
6120 if (!minimize)
6121 {
6122 if (min == 0)
6123 {
6124 allocate_stack(common, 2);
6125 if (ref)
6126 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
6127 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6128 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
6129 /* Temporary release of STR_PTR. */
6130 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
6131 /* Handles both invalid and empty cases. Since the minimum repeat,
6132 is zero the invalid case is basically the same as an empty case. */
6133 if (ref)
6134 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
6135 else
6136 {
6137 compile_dnref_search(common, ccbegin, NULL);
6138 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
6139 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, TMP2, 0);
6140 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
6141 }
6142 /* Restore if not zero length. */
6143 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
6144 }
6145 else
6146 {
6147 allocate_stack(common, 1);
6148 if (ref)
6149 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
6150 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6151 if (ref)
6152 {
6153 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
6154 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
6155 }
6156 else
6157 {
6158 compile_dnref_search(common, ccbegin, &backtrack->topbacktracks);
6159 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
6160 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, TMP2, 0);
6161 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
6162 }
6163 }
6164
6165 if (min > 1 || max > 1)
6166 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, SLJIT_IMM, 0);
6167
6168 label = LABEL();
6169 if (!ref)
6170 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1);
6171 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, FALSE, FALSE);
6172
6173 if (min > 1 || max > 1)
6174 {
6175 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0);
6176 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
6177 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, TMP1, 0);
6178 if (min > 1)
6179 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min, label);
6180 if (max > 1)
6181 {
6182 jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max);
6183 allocate_stack(common, 1);
6184 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6185 JUMPTO(SLJIT_JUMP, label);
6186 JUMPHERE(jump);
6187 }
6188 }
6189
6190 if (max == 0)
6191 {
6192 /* Includes min > 1 case as well. */
6193 allocate_stack(common, 1);
6194 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6195 JUMPTO(SLJIT_JUMP, label);
6196 }
6197
6198 JUMPHERE(zerolength);
6199 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
6200
6201 count_match(common);
6202 return cc;
6203 }
6204
6205 allocate_stack(common, ref ? 2 : 3);
6206 if (ref)
6207 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
6208 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6209 if (type != OP_CRMINSTAR)
6210 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
6211
6212 if (min == 0)
6213 {
6214 /* Handles both invalid and empty cases. Since the minimum repeat,
6215 is zero the invalid case is basically the same as an empty case. */
6216 if (ref)
6217 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
6218 else
6219 {
6220 compile_dnref_search(common, ccbegin, NULL);
6221 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
6222 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
6223 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
6224 }
6225 /* Length is non-zero, we can match real repeats. */
6226 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6227 jump = JUMP(SLJIT_JUMP);
6228 }
6229 else
6230 {
6231 if (ref)
6232 {
6233 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
6234 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
6235 }
6236 else
6237 {
6238 compile_dnref_search(common, ccbegin, &backtrack->topbacktracks);
6239 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
6240 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
6241 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
6242 }
6243 }
6244
6245 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
6246 if (max > 0)
6247 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max));
6248
6249 if (!ref)
6250 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
6251 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, TRUE, TRUE);
6252 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6253
6254 if (min > 1)
6255 {
6256 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
6257 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
6258 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
6259 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min, BACKTRACK_AS(iterator_backtrack)->matchingpath);
6260 }
6261 else if (max > 0)
6262 OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
6263
6264 if (jump != NULL)
6265 JUMPHERE(jump);
6266 JUMPHERE(zerolength);
6267
6268 count_match(common);
6269 return cc;
6270 }
6271
6272 static SLJIT_INLINE pcre_uchar *compile_recurse_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6273 {
6274 DEFINE_COMPILER;
6275 backtrack_common *backtrack;
6276 recurse_entry *entry = common->entries;
6277 recurse_entry *prev = NULL;
6278 sljit_sw start = GET(cc, 1);
6279 pcre_uchar *start_cc;
6280 BOOL needs_control_head;
6281
6282 PUSH_BACKTRACK(sizeof(recurse_backtrack), cc, NULL);
6283
6284 /* Inlining simple patterns. */
6285 if (get_framesize(common, common->start + start, NULL, TRUE, &needs_control_head) == no_stack)
6286 {
6287 start_cc = common->start + start;
6288 compile_matchingpath(common, next_opcode(common, start_cc), bracketend(start_cc) - (1 + LINK_SIZE), backtrack);
6289 BACKTRACK_AS(recurse_backtrack)->inlined_pattern = TRUE;
6290 return cc + 1 + LINK_SIZE;
6291 }
6292
6293 while (entry != NULL)
6294 {
6295 if (entry->start == start)
6296 break;
6297 prev = entry;
6298 entry = entry->next;
6299 }
6300
6301 if (entry == NULL)
6302 {
6303 entry = sljit_alloc_memory(compiler, sizeof(recurse_entry));
6304 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6305 return NULL;
6306 entry->next = NULL;
6307 entry->entry = NULL;
6308 entry->calls = NULL;
6309 entry->start = start;
6310
6311 if (prev != NULL)
6312 prev->next = entry;
6313 else
6314 common->entries = entry;
6315 }
6316
6317 if (common->has_set_som && common->mark_ptr != 0)
6318 {
6319 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
6320 allocate_stack(common, 2);
6321 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
6322 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
6323 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
6324 }
6325 else if (common->has_set_som || common->mark_ptr != 0)
6326 {
6327 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->has_set_som ? (int)(OVECTOR(0)) : common->mark_ptr);
6328 allocate_stack(common, 1);
6329 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
6330 }
6331
6332 if (entry->entry == NULL)
6333 add_jump(compiler, &entry->calls, JUMP(SLJIT_FAST_CALL));
6334 else
6335 JUMPTO(SLJIT_FAST_CALL, entry->entry);
6336 /* Leave if the match is failed. */
6337 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0));
6338 return cc + 1 + LINK_SIZE;
6339 }
6340
6341 static int SLJIT_CALL do_callout(struct jit_arguments *arguments, PUBL(callout_block) *callout_block, pcre_uchar **jit_ovector)
6342 {
6343 const pcre_uchar *begin = arguments->begin;
6344 int *offset_vector = arguments->offsets;
6345 int offset_count = arguments->offset_count;
6346 int i;
6347
6348 if (PUBL(callout) == NULL)
6349 return 0;
6350
6351 callout_block->version = 2;
6352 callout_block->callout_data = arguments->callout_data;
6353
6354 /* Offsets in subject. */
6355 callout_block->subject_length = arguments->end - arguments->begin;
6356 callout_block->start_match = (pcre_uchar*)callout_block->subject - arguments->begin;
6357 callout_block->current_position = (pcre_uchar*)callout_block->offset_vector - arguments->begin;
6358 #if defined COMPILE_PCRE8
6359 callout_block->subject = (PCRE_SPTR)begin;
6360 #elif defined COMPILE_PCRE16
6361 callout_block->subject = (PCRE_SPTR16)begin;
6362 #elif defined COMPILE_PCRE32
6363 callout_block->subject = (PCRE_SPTR32)begin;
6364 #endif
6365
6366 /* Convert and copy the JIT offset vector to the offset_vector array. */
6367 callout_block->capture_top = 0;
6368 callout_block->offset_vector = offset_vector;
6369 for (i = 2; i < offset_count; i += 2)
6370 {
6371 offset_vector[i] = jit_ovector[i] - begin;
6372 offset_vector[i + 1] = jit_ovector[i + 1] - begin;
6373 if (jit_ovector[i] >= begin)
6374 callout_block->capture_top = i;
6375 }
6376
6377 callout_block->capture_top = (callout_block->capture_top >> 1) + 1;
6378 if (offset_count > 0)
6379 offset_vector[0] = -1;
6380 if (offset_count > 1)
6381 offset_vector[1] = -1;
6382 return (*PUBL(callout))(callout_block);
6383 }
6384
6385 /* Aligning to 8 byte. */
6386 #define CALLOUT_ARG_SIZE \
6387 (((int)sizeof(PUBL(callout_block)) + 7) & ~7)
6388
6389 #define CALLOUT_ARG_OFFSET(arg) \
6390 (-CALLOUT_ARG_SIZE + SLJIT_OFFSETOF(PUBL(callout_block), arg))
6391
6392 static SLJIT_INLINE pcre_uchar *compile_callout_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6393 {
6394 DEFINE_COMPILER;
6395 backtrack_common *backtrack;
6396
6397 PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
6398
6399 allocate_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw));
6400
6401 SLJIT_ASSERT(common->capture_last_ptr != 0);
6402 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
6403 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6404 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_number), SLJIT_IMM, cc[1]);
6405 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_last), TMP2, 0);
6406
6407 /* These pointer sized fields temporarly stores internal variables. */
6408 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
6409 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(offset_vector), STR_PTR, 0);
6410 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(subject), TMP2, 0);
6411
6412 if (common->mark_ptr != 0)
6413 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr));
6414 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(pattern_position), SLJIT_IMM, GET(cc, 2));
6415 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(next_item_length), SLJIT_IMM, GET(cc, 2 + LINK_SIZE));
6416 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(mark), (common->mark_ptr != 0) ? TMP2 : SLJIT_IMM, 0);
6417
6418 /* Needed to save important temporary registers. */
6419 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STACK_TOP, 0);
6420 OP2(SLJIT_SUB, SLJIT_R1, 0, STACK_TOP, 0, SLJIT_IMM, CALLOUT_ARG_SIZE);
6421 GET_LOCAL_BASE(SLJIT_R2, 0, OVECTOR_START);
6422 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_callout));
6423 OP1(SLJIT_MOV_SI, SLJIT_RETURN_REG, 0, SLJIT_RETURN_REG, 0);
6424 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6425 free_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw));
6426
6427 /* Check return value. */
6428 OP2(SLJIT_SUB | SLJIT_SET_S, SLJIT_UNUSED, 0, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
6429 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_SIG_GREATER));
6430 if (common->forced_quit_label == NULL)
6431 add_jump(compiler, &common->forced_quit, JUMP(SLJIT_SIG_LESS));
6432 else
6433 JUMPTO(SLJIT_SIG_LESS, common->forced_quit_label);
6434 return cc + 2 + 2 * LINK_SIZE;
6435 }
6436
6437 #undef CALLOUT_ARG_SIZE
6438 #undef CALLOUT_ARG_OFFSET
6439
6440 static pcre_uchar *compile_assert_matchingpath(compiler_common *common, pcre_uchar *cc, assert_backtrack *backtrack, BOOL conditional)
6441 {
6442 DEFINE_COMPILER;
6443 int framesize;
6444 int extrasize;
6445 BOOL needs_control_head;
6446 int private_data_ptr;
6447 backtrack_common altbacktrack;
6448 pcre_uchar *ccbegin;
6449 pcre_uchar opcode;
6450 pcre_uchar bra = OP_BRA;
6451 jump_list *tmp = NULL;
6452 jump_list **target = (conditional) ? &backtrack->condfailed : &backtrack->common.topbacktracks;
6453 jump_list **found;
6454 /* Saving previous accept variables. */
6455 BOOL save_local_exit = common->local_exit;
6456 BOOL save_positive_assert = common->positive_assert;
6457 then_trap_backtrack *save_then_trap = common->then_trap;
6458 struct sljit_label *save_quit_label = common->quit_label;
6459 struct sljit_label *save_accept_label = common->accept_label;
6460 jump_list *save_quit = common->quit;
6461 jump_list *save_positive_assert_quit = common->positive_assert_quit;
6462 jump_list *save_accept = common->accept;
6463 struct sljit_jump *jump;
6464 struct sljit_jump *brajump = NULL;
6465
6466 /* Assert captures then. */
6467 common->then_trap = NULL;
6468
6469 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
6470 {
6471 SLJIT_ASSERT(!conditional);
6472 bra = *cc;
6473 cc++;
6474 }
6475 private_data_ptr = PRIVATE_DATA(cc);
6476 SLJIT_ASSERT(private_data_ptr != 0);
6477 framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
6478 backtrack->framesize = framesize;
6479 backtrack->private_data_ptr = private_data_ptr;
6480 opcode = *cc;
6481 SLJIT_ASSERT(opcode >= OP_ASSERT && opcode <= OP_ASSERTBACK_NOT);
6482 found = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) ? &tmp : target;
6483 ccbegin = cc;
6484 cc += GET(cc, 1);
6485
6486 if (bra == OP_BRAMINZERO)
6487 {
6488 /* This is a braminzero backtrack path. */
6489 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6490 free_stack(common, 1);
6491 brajump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
6492 }
6493
6494 if (framesize < 0)
6495 {
6496 extrasize = needs_control_head ? 2 : 1;
6497 if (framesize == no_frame)
6498 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
6499 allocate_stack(common, extrasize);
6500 if (needs_control_head)
6501 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
6502 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6503 if (needs_control_head)
6504 {
6505 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
6506 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
6507 }
6508 }
6509 else
6510 {
6511 extrasize = needs_control_head ? 3 : 2;
6512 allocate_stack(common, framesize + extrasize);
6513 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
6514 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw));
6515 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
6516 if (needs_control_head)
6517 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
6518 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6519 if (needs_control_head)
6520 {
6521 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
6522 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
6523 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
6524 }
6525 else
6526 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
6527 init_frame(common, ccbegin, NULL, framesize + extrasize - 1, extrasize, FALSE);
6528 }
6529
6530 memset(&altbacktrack, 0, sizeof(backtrack_common));
6531 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6532 {
6533 /* Negative assert is stronger than positive assert. */
6534 common->local_exit = TRUE;
6535 common->quit_label = NULL;
6536 common->quit = NULL;
6537 common->positive_assert = FALSE;
6538 }
6539 else
6540 common->positive_assert = TRUE;
6541 common->positive_assert_quit = NULL;
6542
6543 while (1)
6544 {
6545 common->accept_label = NULL;
6546 common->accept = NULL;
6547 altbacktrack.top = NULL;
6548 altbacktrack.topbacktracks = NULL;
6549
6550 if (*ccbegin == OP_ALT)
6551 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6552
6553 altbacktrack.cc = ccbegin;
6554 compile_matchingpath(common, ccbegin + 1 + LINK_SIZE, cc, &altbacktrack);
6555 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6556 {
6557 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6558 {
6559 common->local_exit = save_local_exit;
6560 common->quit_label = save_quit_label;
6561 common->quit = save_quit;
6562 }
6563 common->positive_assert = save_positive_assert;
6564 common->then_trap = save_then_trap;
6565 common->accept_label = save_accept_label;
6566 common->positive_assert_quit = save_positive_assert_quit;
6567 common->accept = save_accept;
6568 return NULL;
6569 }
6570 common->accept_label = LABEL();
6571 if (common->accept != NULL)
6572 set_jumps(common->accept, common->accept_label);
6573
6574 /* Reset stack. */
6575 if (framesize < 0)
6576 {
6577 if (framesize == no_frame)
6578 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
6579 else
6580 free_stack(common, extrasize);
6581 if (needs_control_head)
6582 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), 0);
6583 }
6584 else
6585 {
6586 if ((opcode != OP_ASSERT_NOT && opcode != OP_ASSERTBACK_NOT) || conditional)
6587 {
6588 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
6589 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
6590 if (needs_control_head)
6591 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), 0);
6592 }
6593 else
6594 {
6595 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
6596 if (needs_control_head)
6597 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), (framesize + 1) * sizeof(sljit_sw));
6598 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6599 }
6600 }
6601
6602 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6603 {
6604 /* We know that STR_PTR was stored on the top of the stack. */
6605 if (conditional)
6606 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), needs_control_head ? sizeof(sljit_sw) : 0);
6607 else if (bra == OP_BRAZERO)
6608 {
6609 if (framesize < 0)
6610 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 1) * sizeof(sljit_sw));
6611 else
6612 {
6613 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
6614 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (framesize + extrasize - 1) * sizeof(sljit_sw));
6615 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
6616 }
6617 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
6618 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6619 }
6620 else if (framesize >= 0)
6621 {
6622 /* For OP_BRA and OP_BRAMINZERO. */
6623 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
6624 }
6625 }
6626 add_jump(compiler, found, JUMP(SLJIT_JUMP));
6627
6628 compile_backtrackingpath(common, altbacktrack.top);
6629 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6630 {
6631 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6632 {
6633 common->local_exit = save_local_exit;
6634 common->quit_label = save_quit_label;
6635 common->quit = save_quit;
6636 }
6637 common->positive_assert = save_positive_assert;
6638 common->then_trap = save_then_trap;
6639 common->accept_label = save_accept_label;
6640 common->positive_assert_quit = save_positive_assert_quit;
6641 common->accept = save_accept;
6642 return NULL;
6643 }
6644 set_jumps(altbacktrack.topbacktracks, LABEL());
6645
6646 if (*cc != OP_ALT)
6647 break;
6648
6649 ccbegin = cc;
6650 cc += GET(cc, 1);
6651 }
6652
6653 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6654 {
6655 SLJIT_ASSERT(common->positive_assert_quit == NULL);
6656 /* Makes the check less complicated below. */
6657 common->positive_assert_quit = common->quit;
6658 }
6659
6660 /* None of them matched. */
6661 if (common->positive_assert_quit != NULL)
6662 {
6663 jump = JUMP(SLJIT_JUMP);
6664 set_jumps(common->positive_assert_quit, LABEL());
6665 SLJIT_ASSERT(framesize != no_stack);
6666 if (framesize < 0)
6667 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, extrasize * sizeof(sljit_sw));
6668 else
6669 {
6670 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
6671 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6672 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw));
6673 }
6674 JUMPHERE(jump);
6675 }
6676
6677 if (needs_control_head)
6678 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(1));
6679
6680 if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK)
6681 {
6682 /* Assert is failed. */
6683 if (conditional || bra == OP_BRAZERO)
6684 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6685
6686 if (framesize < 0)
6687 {
6688 /* The topmost item should be 0. */
6689 if (bra == OP_BRAZERO)
6690 {
6691 if (extrasize == 2)
6692 free_stack(common, 1);
6693 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6694 }
6695 else
6696 free_stack(common, extrasize);
6697 }
6698 else
6699 {
6700 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
6701 /* The topmost item should be 0. */
6702 if (bra == OP_BRAZERO)
6703 {
6704 free_stack(common, framesize + extrasize - 1);
6705 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6706 }
6707 else
6708 free_stack(common, framesize + extrasize);
6709 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
6710 }
6711 jump = JUMP(SLJIT_JUMP);
6712 if (bra != OP_BRAZERO)
6713 add_jump(compiler, target, jump);
6714
6715 /* Assert is successful. */
6716 set_jumps(tmp, LABEL());
6717 if (framesize < 0)
6718 {
6719 /* We know that STR_PTR was stored on the top of the stack. */
6720 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 1) * sizeof(sljit_sw));
6721 /* Keep the STR_PTR on the top of the stack. */
6722 if (bra == OP_BRAZERO)
6723 {
6724 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
6725 if (extrasize == 2)
6726 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6727 }
6728 else if (bra == OP_BRAMINZERO)
6729 {
6730 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
6731 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6732 }
6733 }
6734 else
6735 {
6736 if (bra == OP_BRA)
6737 {
6738 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
6739 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
6740 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 2) * sizeof(sljit_sw));
6741 }
6742 else
6743 {
6744 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
6745 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 2) * sizeof(sljit_sw));
6746 if (extrasize == 2)
6747 {
6748 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6749 if (bra == OP_BRAMINZERO)
6750 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6751 }
6752 else
6753 {
6754 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
6755 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), bra == OP_BRAZERO ? STR_PTR : SLJIT_IMM, 0);
6756 }
6757 }
6758 }
6759
6760 if (bra == OP_BRAZERO)
6761 {
6762 backtrack->matchingpath = LABEL();
6763 SET_LABEL(jump, backtrack->matchingpath);
6764 }
6765 else if (bra == OP_BRAMINZERO)
6766 {
6767 JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
6768 JUMPHERE(brajump);
6769 if (framesize >= 0)
6770 {
6771 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
6772 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6773 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
6774 }
6775 set_jumps(backtrack->common.topbacktracks, LABEL());
6776 }
6777 }
6778 else