/[pcre]/code/trunk/pcre_jit_compile.c
ViewVC logotype

Contents of /code/trunk/pcre_jit_compile.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1442 - (show annotations)
Sun Jan 12 17:17:29 2014 UTC (5 years, 8 months ago) by zherczeg
File MIME type: text/plain
File size: 332882 byte(s)
JIT: refactoring some code in the forward search.
1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Written by Philip Hazel
9 Copyright (c) 1997-2013 University of Cambridge
10
11 The machine code generator part (this module) was written by Zoltan Herczeg
12 Copyright (c) 2010-2013
13
14 -----------------------------------------------------------------------------
15 Redistribution and use in source and binary forms, with or without
16 modification, are permitted provided that the following conditions are met:
17
18 * Redistributions of source code must retain the above copyright notice,
19 this list of conditions and the following disclaimer.
20
21 * Redistributions in binary form must reproduce the above copyright
22 notice, this list of conditions and the following disclaimer in the
23 documentation and/or other materials provided with the distribution.
24
25 * Neither the name of the University of Cambridge nor the names of its
26 contributors may be used to endorse or promote products derived from
27 this software without specific prior written permission.
28
29 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 POSSIBILITY OF SUCH DAMAGE.
40 -----------------------------------------------------------------------------
41 */
42
43 #ifdef HAVE_CONFIG_H
44 #include "config.h"
45 #endif
46
47 #include "pcre_internal.h"
48
49 #if defined SUPPORT_JIT
50
51 /* All-in-one: Since we use the JIT compiler only from here,
52 we just include it. This way we don't need to touch the build
53 system files. */
54
55 #define SLJIT_MALLOC(size) (PUBL(malloc))(size)
56 #define SLJIT_FREE(ptr) (PUBL(free))(ptr)
57 #define SLJIT_CONFIG_AUTO 1
58 #define SLJIT_CONFIG_STATIC 1
59 #define SLJIT_VERBOSE 0
60 #define SLJIT_DEBUG 0
61
62 #include "sljit/sljitLir.c"
63
64 #if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED
65 #error Unsupported architecture
66 #endif
67
68 /* Defines for debugging purposes. */
69
70 /* 1 - Use unoptimized capturing brackets.
71 2 - Enable capture_last_ptr (includes option 1). */
72 /* #define DEBUG_FORCE_UNOPTIMIZED_CBRAS 2 */
73
74 /* 1 - Always have a control head. */
75 /* #define DEBUG_FORCE_CONTROL_HEAD 1 */
76
77 /* Allocate memory for the regex stack on the real machine stack.
78 Fast, but limited size. */
79 #define MACHINE_STACK_SIZE 32768
80
81 /* Growth rate for stack allocated by the OS. Should be the multiply
82 of page size. */
83 #define STACK_GROWTH_RATE 8192
84
85 /* Enable to check that the allocation could destroy temporaries. */
86 #if defined SLJIT_DEBUG && SLJIT_DEBUG
87 #define DESTROY_REGISTERS 1
88 #endif
89
90 /*
91 Short summary about the backtracking mechanism empolyed by the jit code generator:
92
93 The code generator follows the recursive nature of the PERL compatible regular
94 expressions. The basic blocks of regular expressions are condition checkers
95 whose execute different commands depending on the result of the condition check.
96 The relationship between the operators can be horizontal (concatenation) and
97 vertical (sub-expression) (See struct backtrack_common for more details).
98
99 'ab' - 'a' and 'b' regexps are concatenated
100 'a+' - 'a' is the sub-expression of the '+' operator
101
102 The condition checkers are boolean (true/false) checkers. Machine code is generated
103 for the checker itself and for the actions depending on the result of the checker.
104 The 'true' case is called as the matching path (expected path), and the other is called as
105 the 'backtrack' path. Branch instructions are expesive for all CPUs, so we avoid taken
106 branches on the matching path.
107
108 Greedy star operator (*) :
109 Matching path: match happens.
110 Backtrack path: match failed.
111 Non-greedy star operator (*?) :
112 Matching path: no need to perform a match.
113 Backtrack path: match is required.
114
115 The following example shows how the code generated for a capturing bracket
116 with two alternatives. Let A, B, C, D are arbirary regular expressions, and
117 we have the following regular expression:
118
119 A(B|C)D
120
121 The generated code will be the following:
122
123 A matching path
124 '(' matching path (pushing arguments to the stack)
125 B matching path
126 ')' matching path (pushing arguments to the stack)
127 D matching path
128 return with successful match
129
130 D backtrack path
131 ')' backtrack path (If we arrived from "C" jump to the backtrack of "C")
132 B backtrack path
133 C expected path
134 jump to D matching path
135 C backtrack path
136 A backtrack path
137
138 Notice, that the order of backtrack code paths are the opposite of the fast
139 code paths. In this way the topmost value on the stack is always belong
140 to the current backtrack code path. The backtrack path must check
141 whether there is a next alternative. If so, it needs to jump back to
142 the matching path eventually. Otherwise it needs to clear out its own stack
143 frame and continue the execution on the backtrack code paths.
144 */
145
146 /*
147 Saved stack frames:
148
149 Atomic blocks and asserts require reloading the values of private data
150 when the backtrack mechanism performed. Because of OP_RECURSE, the data
151 are not necessarly known in compile time, thus we need a dynamic restore
152 mechanism.
153
154 The stack frames are stored in a chain list, and have the following format:
155 ([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]
156
157 Thus we can restore the private data to a particular point in the stack.
158 */
159
160 typedef struct jit_arguments {
161 /* Pointers first. */
162 struct sljit_stack *stack;
163 const pcre_uchar *str;
164 const pcre_uchar *begin;
165 const pcre_uchar *end;
166 int *offsets;
167 pcre_uchar *uchar_ptr;
168 pcre_uchar *mark_ptr;
169 void *callout_data;
170 /* Everything else after. */
171 pcre_uint32 limit_match;
172 int real_offset_count;
173 int offset_count;
174 pcre_uint8 notbol;
175 pcre_uint8 noteol;
176 pcre_uint8 notempty;
177 pcre_uint8 notempty_atstart;
178 } jit_arguments;
179
180 typedef struct executable_functions {
181 void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES];
182 sljit_uw *read_only_data[JIT_NUMBER_OF_COMPILE_MODES];
183 sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];
184 PUBL(jit_callback) callback;
185 void *userdata;
186 pcre_uint32 top_bracket;
187 pcre_uint32 limit_match;
188 } executable_functions;
189
190 typedef struct jump_list {
191 struct sljit_jump *jump;
192 struct jump_list *next;
193 } jump_list;
194
195 typedef struct stub_list {
196 struct sljit_jump *start;
197 struct sljit_label *quit;
198 struct stub_list *next;
199 } stub_list;
200
201 typedef struct label_addr_list {
202 struct sljit_label *label;
203 sljit_uw *addr;
204 struct label_addr_list *next;
205 } label_addr_list;
206
207 enum frame_types {
208 no_frame = -1,
209 no_stack = -2
210 };
211
212 enum control_types {
213 type_mark = 0,
214 type_then_trap = 1
215 };
216
217 typedef int (SLJIT_CALL *jit_function)(jit_arguments *args);
218
219 /* The following structure is the key data type for the recursive
220 code generator. It is allocated by compile_matchingpath, and contains
221 the arguments for compile_backtrackingpath. Must be the first member
222 of its descendants. */
223 typedef struct backtrack_common {
224 /* Concatenation stack. */
225 struct backtrack_common *prev;
226 jump_list *nextbacktracks;
227 /* Internal stack (for component operators). */
228 struct backtrack_common *top;
229 jump_list *topbacktracks;
230 /* Opcode pointer. */
231 pcre_uchar *cc;
232 } backtrack_common;
233
234 typedef struct assert_backtrack {
235 backtrack_common common;
236 jump_list *condfailed;
237 /* Less than 0 if a frame is not needed. */
238 int framesize;
239 /* Points to our private memory word on the stack. */
240 int private_data_ptr;
241 /* For iterators. */
242 struct sljit_label *matchingpath;
243 } assert_backtrack;
244
245 typedef struct bracket_backtrack {
246 backtrack_common common;
247 /* Where to coninue if an alternative is successfully matched. */
248 struct sljit_label *alternative_matchingpath;
249 /* For rmin and rmax iterators. */
250 struct sljit_label *recursive_matchingpath;
251 /* For greedy ? operator. */
252 struct sljit_label *zero_matchingpath;
253 /* Contains the branches of a failed condition. */
254 union {
255 /* Both for OP_COND, OP_SCOND. */
256 jump_list *condfailed;
257 assert_backtrack *assert;
258 /* For OP_ONCE. Less than 0 if not needed. */
259 int framesize;
260 } u;
261 /* Points to our private memory word on the stack. */
262 int private_data_ptr;
263 } bracket_backtrack;
264
265 typedef struct bracketpos_backtrack {
266 backtrack_common common;
267 /* Points to our private memory word on the stack. */
268 int private_data_ptr;
269 /* Reverting stack is needed. */
270 int framesize;
271 /* Allocated stack size. */
272 int stacksize;
273 } bracketpos_backtrack;
274
275 typedef struct braminzero_backtrack {
276 backtrack_common common;
277 struct sljit_label *matchingpath;
278 } braminzero_backtrack;
279
280 typedef struct iterator_backtrack {
281 backtrack_common common;
282 /* Next iteration. */
283 struct sljit_label *matchingpath;
284 } iterator_backtrack;
285
286 typedef struct recurse_entry {
287 struct recurse_entry *next;
288 /* Contains the function entry. */
289 struct sljit_label *entry;
290 /* Collects the calls until the function is not created. */
291 jump_list *calls;
292 /* Points to the starting opcode. */
293 sljit_sw start;
294 } recurse_entry;
295
296 typedef struct recurse_backtrack {
297 backtrack_common common;
298 BOOL inlined_pattern;
299 } recurse_backtrack;
300
301 #define OP_THEN_TRAP OP_TABLE_LENGTH
302
303 typedef struct then_trap_backtrack {
304 backtrack_common common;
305 /* If then_trap is not NULL, this structure contains the real
306 then_trap for the backtracking path. */
307 struct then_trap_backtrack *then_trap;
308 /* Points to the starting opcode. */
309 sljit_sw start;
310 /* Exit point for the then opcodes of this alternative. */
311 jump_list *quit;
312 /* Frame size of the current alternative. */
313 int framesize;
314 } then_trap_backtrack;
315
316 #define MAX_RANGE_SIZE 4
317
318 typedef struct compiler_common {
319 /* The sljit ceneric compiler. */
320 struct sljit_compiler *compiler;
321 /* First byte code. */
322 pcre_uchar *start;
323 /* Maps private data offset to each opcode. */
324 sljit_si *private_data_ptrs;
325 /* This read-only data is available during runtime. */
326 sljit_uw *read_only_data;
327 /* The total size of the read-only data. */
328 sljit_uw read_only_data_size;
329 /* The next free entry of the read_only_data. */
330 sljit_uw *read_only_data_ptr;
331 /* Tells whether the capturing bracket is optimized. */
332 pcre_uint8 *optimized_cbracket;
333 /* Tells whether the starting offset is a target of then. */
334 pcre_uint8 *then_offsets;
335 /* Current position where a THEN must jump. */
336 then_trap_backtrack *then_trap;
337 /* Starting offset of private data for capturing brackets. */
338 int cbra_ptr;
339 /* Output vector starting point. Must be divisible by 2. */
340 int ovector_start;
341 /* Last known position of the requested byte. */
342 int req_char_ptr;
343 /* Head of the last recursion. */
344 int recursive_head_ptr;
345 /* First inspected character for partial matching. */
346 int start_used_ptr;
347 /* Starting pointer for partial soft matches. */
348 int hit_start;
349 /* End pointer of the first line. */
350 int first_line_end;
351 /* Points to the marked string. */
352 int mark_ptr;
353 /* Recursive control verb management chain. */
354 int control_head_ptr;
355 /* Points to the last matched capture block index. */
356 int capture_last_ptr;
357 /* Points to the starting position of the current match. */
358 int start_ptr;
359
360 /* Flipped and lower case tables. */
361 const pcre_uint8 *fcc;
362 sljit_sw lcc;
363 /* Mode can be PCRE_STUDY_JIT_COMPILE and others. */
364 int mode;
365 /* TRUE, when minlength is greater than 0. */
366 BOOL might_be_empty;
367 /* \K is found in the pattern. */
368 BOOL has_set_som;
369 /* (*SKIP:arg) is found in the pattern. */
370 BOOL has_skip_arg;
371 /* (*THEN) is found in the pattern. */
372 BOOL has_then;
373 /* Needs to know the start position anytime. */
374 BOOL needs_start_ptr;
375 /* Currently in recurse or negative assert. */
376 BOOL local_exit;
377 /* Currently in a positive assert. */
378 BOOL positive_assert;
379 /* Newline control. */
380 int nltype;
381 pcre_uint32 nlmax;
382 pcre_uint32 nlmin;
383 int newline;
384 int bsr_nltype;
385 pcre_uint32 bsr_nlmax;
386 pcre_uint32 bsr_nlmin;
387 /* Dollar endonly. */
388 int endonly;
389 /* Tables. */
390 sljit_sw ctypes;
391 /* Named capturing brackets. */
392 pcre_uchar *name_table;
393 sljit_sw name_count;
394 sljit_sw name_entry_size;
395
396 /* Labels and jump lists. */
397 struct sljit_label *partialmatchlabel;
398 struct sljit_label *quit_label;
399 struct sljit_label *forced_quit_label;
400 struct sljit_label *accept_label;
401 stub_list *stubs;
402 label_addr_list *label_addrs;
403 recurse_entry *entries;
404 recurse_entry *currententry;
405 jump_list *partialmatch;
406 jump_list *quit;
407 jump_list *positive_assert_quit;
408 jump_list *forced_quit;
409 jump_list *accept;
410 jump_list *calllimit;
411 jump_list *stackalloc;
412 jump_list *revertframes;
413 jump_list *wordboundary;
414 jump_list *anynewline;
415 jump_list *hspace;
416 jump_list *vspace;
417 jump_list *casefulcmp;
418 jump_list *caselesscmp;
419 jump_list *reset_match;
420 BOOL jscript_compat;
421 #ifdef SUPPORT_UTF
422 BOOL utf;
423 #ifdef SUPPORT_UCP
424 BOOL use_ucp;
425 #endif
426 #ifdef COMPILE_PCRE8
427 jump_list *utfreadchar;
428 jump_list *utfreadchar16;
429 jump_list *utfreadtype8;
430 #endif
431 #endif /* SUPPORT_UTF */
432 #ifdef SUPPORT_UCP
433 jump_list *getucd;
434 #endif
435 } compiler_common;
436
437 /* For byte_sequence_compare. */
438
439 typedef struct compare_context {
440 int length;
441 int sourcereg;
442 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
443 int ucharptr;
444 union {
445 sljit_si asint;
446 sljit_uh asushort;
447 #if defined COMPILE_PCRE8
448 sljit_ub asbyte;
449 sljit_ub asuchars[4];
450 #elif defined COMPILE_PCRE16
451 sljit_uh asuchars[2];
452 #elif defined COMPILE_PCRE32
453 sljit_ui asuchars[1];
454 #endif
455 } c;
456 union {
457 sljit_si asint;
458 sljit_uh asushort;
459 #if defined COMPILE_PCRE8
460 sljit_ub asbyte;
461 sljit_ub asuchars[4];
462 #elif defined COMPILE_PCRE16
463 sljit_uh asuchars[2];
464 #elif defined COMPILE_PCRE32
465 sljit_ui asuchars[1];
466 #endif
467 } oc;
468 #endif
469 } compare_context;
470
471 /* Undefine sljit macros. */
472 #undef CMP
473
474 /* Used for accessing the elements of the stack. */
475 #define STACK(i) ((-(i) - 1) * (int)sizeof(sljit_sw))
476
477 #define TMP1 SLJIT_SCRATCH_REG1
478 #define TMP2 SLJIT_SCRATCH_REG3
479 #define TMP3 SLJIT_TEMPORARY_EREG2
480 #define STR_PTR SLJIT_SAVED_REG1
481 #define STR_END SLJIT_SAVED_REG2
482 #define STACK_TOP SLJIT_SCRATCH_REG2
483 #define STACK_LIMIT SLJIT_SAVED_REG3
484 #define ARGUMENTS SLJIT_SAVED_EREG1
485 #define COUNT_MATCH SLJIT_SAVED_EREG2
486 #define RETURN_ADDR SLJIT_TEMPORARY_EREG1
487
488 /* Local space layout. */
489 /* These two locals can be used by the current opcode. */
490 #define LOCALS0 (0 * sizeof(sljit_sw))
491 #define LOCALS1 (1 * sizeof(sljit_sw))
492 /* Two local variables for possessive quantifiers (char1 cannot use them). */
493 #define POSSESSIVE0 (2 * sizeof(sljit_sw))
494 #define POSSESSIVE1 (3 * sizeof(sljit_sw))
495 /* Max limit of recursions. */
496 #define LIMIT_MATCH (4 * sizeof(sljit_sw))
497 /* The output vector is stored on the stack, and contains pointers
498 to characters. The vector data is divided into two groups: the first
499 group contains the start / end character pointers, and the second is
500 the start pointers when the end of the capturing group has not yet reached. */
501 #define OVECTOR_START (common->ovector_start)
502 #define OVECTOR(i) (OVECTOR_START + (i) * (sljit_sw)sizeof(sljit_sw))
503 #define OVECTOR_PRIV(i) (common->cbra_ptr + (i) * (sljit_sw)sizeof(sljit_sw))
504 #define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start])
505
506 #if defined COMPILE_PCRE8
507 #define MOV_UCHAR SLJIT_MOV_UB
508 #define MOVU_UCHAR SLJIT_MOVU_UB
509 #elif defined COMPILE_PCRE16
510 #define MOV_UCHAR SLJIT_MOV_UH
511 #define MOVU_UCHAR SLJIT_MOVU_UH
512 #elif defined COMPILE_PCRE32
513 #define MOV_UCHAR SLJIT_MOV_UI
514 #define MOVU_UCHAR SLJIT_MOVU_UI
515 #else
516 #error Unsupported compiling mode
517 #endif
518
519 /* Shortcuts. */
520 #define DEFINE_COMPILER \
521 struct sljit_compiler *compiler = common->compiler
522 #define OP1(op, dst, dstw, src, srcw) \
523 sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw))
524 #define OP2(op, dst, dstw, src1, src1w, src2, src2w) \
525 sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w))
526 #define LABEL() \
527 sljit_emit_label(compiler)
528 #define JUMP(type) \
529 sljit_emit_jump(compiler, (type))
530 #define JUMPTO(type, label) \
531 sljit_set_label(sljit_emit_jump(compiler, (type)), (label))
532 #define JUMPHERE(jump) \
533 sljit_set_label((jump), sljit_emit_label(compiler))
534 #define SET_LABEL(jump, label) \
535 sljit_set_label((jump), (label))
536 #define CMP(type, src1, src1w, src2, src2w) \
537 sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))
538 #define CMPTO(type, src1, src1w, src2, src2w, label) \
539 sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))
540 #define OP_FLAGS(op, dst, dstw, src, srcw, type) \
541 sljit_emit_op_flags(compiler, (op), (dst), (dstw), (src), (srcw), (type))
542 #define GET_LOCAL_BASE(dst, dstw, offset) \
543 sljit_get_local_base(compiler, (dst), (dstw), (offset))
544
545 #define READ_CHAR_MAX 0x7fffffff
546
547 static pcre_uchar* bracketend(pcre_uchar* cc)
548 {
549 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
550 do cc += GET(cc, 1); while (*cc == OP_ALT);
551 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
552 cc += 1 + LINK_SIZE;
553 return cc;
554 }
555
556 static int no_alternatives(pcre_uchar* cc)
557 {
558 int count = 0;
559 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
560 do
561 {
562 cc += GET(cc, 1);
563 count++;
564 }
565 while (*cc == OP_ALT);
566 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
567 return count;
568 }
569
570 static int ones_in_half_byte[16] = {
571 /* 0 */ 0, 1, 1, 2, /* 4 */ 1, 2, 2, 3,
572 /* 8 */ 1, 2, 2, 3, /* 12 */ 2, 3, 3, 4
573 };
574
575 /* Functions whose might need modification for all new supported opcodes:
576 next_opcode
577 check_opcode_types
578 set_private_data_ptrs
579 get_framesize
580 init_frame
581 get_private_data_copy_length
582 copy_private_data
583 compile_matchingpath
584 compile_backtrackingpath
585 */
586
587 static pcre_uchar *next_opcode(compiler_common *common, pcre_uchar *cc)
588 {
589 SLJIT_UNUSED_ARG(common);
590 switch(*cc)
591 {
592 case OP_SOD:
593 case OP_SOM:
594 case OP_SET_SOM:
595 case OP_NOT_WORD_BOUNDARY:
596 case OP_WORD_BOUNDARY:
597 case OP_NOT_DIGIT:
598 case OP_DIGIT:
599 case OP_NOT_WHITESPACE:
600 case OP_WHITESPACE:
601 case OP_NOT_WORDCHAR:
602 case OP_WORDCHAR:
603 case OP_ANY:
604 case OP_ALLANY:
605 case OP_NOTPROP:
606 case OP_PROP:
607 case OP_ANYNL:
608 case OP_NOT_HSPACE:
609 case OP_HSPACE:
610 case OP_NOT_VSPACE:
611 case OP_VSPACE:
612 case OP_EXTUNI:
613 case OP_EODN:
614 case OP_EOD:
615 case OP_CIRC:
616 case OP_CIRCM:
617 case OP_DOLL:
618 case OP_DOLLM:
619 case OP_CRSTAR:
620 case OP_CRMINSTAR:
621 case OP_CRPLUS:
622 case OP_CRMINPLUS:
623 case OP_CRQUERY:
624 case OP_CRMINQUERY:
625 case OP_CRRANGE:
626 case OP_CRMINRANGE:
627 case OP_CRPOSSTAR:
628 case OP_CRPOSPLUS:
629 case OP_CRPOSQUERY:
630 case OP_CRPOSRANGE:
631 case OP_CLASS:
632 case OP_NCLASS:
633 case OP_REF:
634 case OP_REFI:
635 case OP_DNREF:
636 case OP_DNREFI:
637 case OP_RECURSE:
638 case OP_CALLOUT:
639 case OP_ALT:
640 case OP_KET:
641 case OP_KETRMAX:
642 case OP_KETRMIN:
643 case OP_KETRPOS:
644 case OP_REVERSE:
645 case OP_ASSERT:
646 case OP_ASSERT_NOT:
647 case OP_ASSERTBACK:
648 case OP_ASSERTBACK_NOT:
649 case OP_ONCE:
650 case OP_ONCE_NC:
651 case OP_BRA:
652 case OP_BRAPOS:
653 case OP_CBRA:
654 case OP_CBRAPOS:
655 case OP_COND:
656 case OP_SBRA:
657 case OP_SBRAPOS:
658 case OP_SCBRA:
659 case OP_SCBRAPOS:
660 case OP_SCOND:
661 case OP_CREF:
662 case OP_DNCREF:
663 case OP_RREF:
664 case OP_DNRREF:
665 case OP_DEF:
666 case OP_BRAZERO:
667 case OP_BRAMINZERO:
668 case OP_BRAPOSZERO:
669 case OP_PRUNE:
670 case OP_SKIP:
671 case OP_THEN:
672 case OP_COMMIT:
673 case OP_FAIL:
674 case OP_ACCEPT:
675 case OP_ASSERT_ACCEPT:
676 case OP_CLOSE:
677 case OP_SKIPZERO:
678 return cc + PRIV(OP_lengths)[*cc];
679
680 case OP_CHAR:
681 case OP_CHARI:
682 case OP_NOT:
683 case OP_NOTI:
684 case OP_STAR:
685 case OP_MINSTAR:
686 case OP_PLUS:
687 case OP_MINPLUS:
688 case OP_QUERY:
689 case OP_MINQUERY:
690 case OP_UPTO:
691 case OP_MINUPTO:
692 case OP_EXACT:
693 case OP_POSSTAR:
694 case OP_POSPLUS:
695 case OP_POSQUERY:
696 case OP_POSUPTO:
697 case OP_STARI:
698 case OP_MINSTARI:
699 case OP_PLUSI:
700 case OP_MINPLUSI:
701 case OP_QUERYI:
702 case OP_MINQUERYI:
703 case OP_UPTOI:
704 case OP_MINUPTOI:
705 case OP_EXACTI:
706 case OP_POSSTARI:
707 case OP_POSPLUSI:
708 case OP_POSQUERYI:
709 case OP_POSUPTOI:
710 case OP_NOTSTAR:
711 case OP_NOTMINSTAR:
712 case OP_NOTPLUS:
713 case OP_NOTMINPLUS:
714 case OP_NOTQUERY:
715 case OP_NOTMINQUERY:
716 case OP_NOTUPTO:
717 case OP_NOTMINUPTO:
718 case OP_NOTEXACT:
719 case OP_NOTPOSSTAR:
720 case OP_NOTPOSPLUS:
721 case OP_NOTPOSQUERY:
722 case OP_NOTPOSUPTO:
723 case OP_NOTSTARI:
724 case OP_NOTMINSTARI:
725 case OP_NOTPLUSI:
726 case OP_NOTMINPLUSI:
727 case OP_NOTQUERYI:
728 case OP_NOTMINQUERYI:
729 case OP_NOTUPTOI:
730 case OP_NOTMINUPTOI:
731 case OP_NOTEXACTI:
732 case OP_NOTPOSSTARI:
733 case OP_NOTPOSPLUSI:
734 case OP_NOTPOSQUERYI:
735 case OP_NOTPOSUPTOI:
736 cc += PRIV(OP_lengths)[*cc];
737 #ifdef SUPPORT_UTF
738 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
739 #endif
740 return cc;
741
742 /* Special cases. */
743 case OP_TYPESTAR:
744 case OP_TYPEMINSTAR:
745 case OP_TYPEPLUS:
746 case OP_TYPEMINPLUS:
747 case OP_TYPEQUERY:
748 case OP_TYPEMINQUERY:
749 case OP_TYPEUPTO:
750 case OP_TYPEMINUPTO:
751 case OP_TYPEEXACT:
752 case OP_TYPEPOSSTAR:
753 case OP_TYPEPOSPLUS:
754 case OP_TYPEPOSQUERY:
755 case OP_TYPEPOSUPTO:
756 return cc + PRIV(OP_lengths)[*cc] - 1;
757
758 case OP_ANYBYTE:
759 #ifdef SUPPORT_UTF
760 if (common->utf) return NULL;
761 #endif
762 return cc + 1;
763
764 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
765 case OP_XCLASS:
766 return cc + GET(cc, 1);
767 #endif
768
769 case OP_MARK:
770 case OP_PRUNE_ARG:
771 case OP_SKIP_ARG:
772 case OP_THEN_ARG:
773 return cc + 1 + 2 + cc[1];
774
775 default:
776 /* All opcodes are supported now! */
777 SLJIT_ASSERT_STOP();
778 return NULL;
779 }
780 }
781
782 static BOOL check_opcode_types(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
783 {
784 int count;
785 pcre_uchar *slot;
786
787 /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
788 while (cc < ccend)
789 {
790 switch(*cc)
791 {
792 case OP_SET_SOM:
793 common->has_set_som = TRUE;
794 common->might_be_empty = TRUE;
795 cc += 1;
796 break;
797
798 case OP_REF:
799 case OP_REFI:
800 common->optimized_cbracket[GET2(cc, 1)] = 0;
801 cc += 1 + IMM2_SIZE;
802 break;
803
804 case OP_BRA:
805 case OP_CBRA:
806 case OP_SBRA:
807 case OP_SCBRA:
808 count = no_alternatives(cc);
809 if (count > 4)
810 common->read_only_data_size += count * sizeof(sljit_uw);
811 cc += 1 + LINK_SIZE + (*cc == OP_CBRA || *cc == OP_SCBRA ? IMM2_SIZE : 0);
812 break;
813
814 case OP_CBRAPOS:
815 case OP_SCBRAPOS:
816 common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] = 0;
817 cc += 1 + LINK_SIZE + IMM2_SIZE;
818 break;
819
820 case OP_COND:
821 case OP_SCOND:
822 /* Only AUTO_CALLOUT can insert this opcode. We do
823 not intend to support this case. */
824 if (cc[1 + LINK_SIZE] == OP_CALLOUT)
825 return FALSE;
826 cc += 1 + LINK_SIZE;
827 break;
828
829 case OP_CREF:
830 common->optimized_cbracket[GET2(cc, 1)] = 0;
831 cc += 1 + IMM2_SIZE;
832 break;
833
834 case OP_DNREF:
835 case OP_DNREFI:
836 case OP_DNCREF:
837 count = GET2(cc, 1 + IMM2_SIZE);
838 slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
839 while (count-- > 0)
840 {
841 common->optimized_cbracket[GET2(slot, 0)] = 0;
842 slot += common->name_entry_size;
843 }
844 cc += 1 + 2 * IMM2_SIZE;
845 break;
846
847 case OP_RECURSE:
848 /* Set its value only once. */
849 if (common->recursive_head_ptr == 0)
850 {
851 common->recursive_head_ptr = common->ovector_start;
852 common->ovector_start += sizeof(sljit_sw);
853 }
854 cc += 1 + LINK_SIZE;
855 break;
856
857 case OP_CALLOUT:
858 if (common->capture_last_ptr == 0)
859 {
860 common->capture_last_ptr = common->ovector_start;
861 common->ovector_start += sizeof(sljit_sw);
862 }
863 cc += 2 + 2 * LINK_SIZE;
864 break;
865
866 case OP_THEN_ARG:
867 common->has_then = TRUE;
868 common->control_head_ptr = 1;
869 /* Fall through. */
870
871 case OP_PRUNE_ARG:
872 common->needs_start_ptr = TRUE;
873 /* Fall through. */
874
875 case OP_MARK:
876 if (common->mark_ptr == 0)
877 {
878 common->mark_ptr = common->ovector_start;
879 common->ovector_start += sizeof(sljit_sw);
880 }
881 cc += 1 + 2 + cc[1];
882 break;
883
884 case OP_THEN:
885 common->has_then = TRUE;
886 common->control_head_ptr = 1;
887 /* Fall through. */
888
889 case OP_PRUNE:
890 case OP_SKIP:
891 common->needs_start_ptr = TRUE;
892 cc += 1;
893 break;
894
895 case OP_SKIP_ARG:
896 common->control_head_ptr = 1;
897 common->has_skip_arg = TRUE;
898 cc += 1 + 2 + cc[1];
899 break;
900
901 default:
902 cc = next_opcode(common, cc);
903 if (cc == NULL)
904 return FALSE;
905 break;
906 }
907 }
908 return TRUE;
909 }
910
911 static int get_class_iterator_size(pcre_uchar *cc)
912 {
913 switch(*cc)
914 {
915 case OP_CRSTAR:
916 case OP_CRPLUS:
917 return 2;
918
919 case OP_CRMINSTAR:
920 case OP_CRMINPLUS:
921 case OP_CRQUERY:
922 case OP_CRMINQUERY:
923 return 1;
924
925 case OP_CRRANGE:
926 case OP_CRMINRANGE:
927 if (GET2(cc, 1) == GET2(cc, 1 + IMM2_SIZE))
928 return 0;
929 return 2;
930
931 default:
932 return 0;
933 }
934 }
935
936 static BOOL detect_repeat(compiler_common *common, pcre_uchar *begin)
937 {
938 pcre_uchar *end = bracketend(begin);
939 pcre_uchar *next;
940 pcre_uchar *next_end;
941 pcre_uchar *max_end;
942 pcre_uchar type;
943 sljit_sw length = end - begin;
944 int min, max, i;
945
946 /* Detect fixed iterations first. */
947 if (end[-(1 + LINK_SIZE)] != OP_KET)
948 return FALSE;
949
950 /* Already detected repeat. */
951 if (common->private_data_ptrs[end - common->start - LINK_SIZE] != 0)
952 return TRUE;
953
954 next = end;
955 min = 1;
956 while (1)
957 {
958 if (*next != *begin)
959 break;
960 next_end = bracketend(next);
961 if (next_end - next != length || memcmp(begin, next, IN_UCHARS(length)) != 0)
962 break;
963 next = next_end;
964 min++;
965 }
966
967 if (min == 2)
968 return FALSE;
969
970 max = 0;
971 max_end = next;
972 if (*next == OP_BRAZERO || *next == OP_BRAMINZERO)
973 {
974 type = *next;
975 while (1)
976 {
977 if (next[0] != type || next[1] != OP_BRA || next[2 + LINK_SIZE] != *begin)
978 break;
979 next_end = bracketend(next + 2 + LINK_SIZE);
980 if (next_end - next != (length + 2 + LINK_SIZE) || memcmp(begin, next + 2 + LINK_SIZE, IN_UCHARS(length)) != 0)
981 break;
982 next = next_end;
983 max++;
984 }
985
986 if (next[0] == type && next[1] == *begin && max >= 1)
987 {
988 next_end = bracketend(next + 1);
989 if (next_end - next == (length + 1) && memcmp(begin, next + 1, IN_UCHARS(length)) == 0)
990 {
991 for (i = 0; i < max; i++, next_end += 1 + LINK_SIZE)
992 if (*next_end != OP_KET)
993 break;
994
995 if (i == max)
996 {
997 common->private_data_ptrs[max_end - common->start - LINK_SIZE] = next_end - max_end;
998 common->private_data_ptrs[max_end - common->start - LINK_SIZE + 1] = (type == OP_BRAZERO) ? OP_UPTO : OP_MINUPTO;
999 /* +2 the original and the last. */
1000 common->private_data_ptrs[max_end - common->start - LINK_SIZE + 2] = max + 2;
1001 if (min == 1)
1002 return TRUE;
1003 min--;
1004 max_end -= (1 + LINK_SIZE) + GET(max_end, -LINK_SIZE);
1005 }
1006 }
1007 }
1008 }
1009
1010 if (min >= 3)
1011 {
1012 common->private_data_ptrs[end - common->start - LINK_SIZE] = max_end - end;
1013 common->private_data_ptrs[end - common->start - LINK_SIZE + 1] = OP_EXACT;
1014 common->private_data_ptrs[end - common->start - LINK_SIZE + 2] = min;
1015 return TRUE;
1016 }
1017
1018 return FALSE;
1019 }
1020
1021 #define CASE_ITERATOR_PRIVATE_DATA_1 \
1022 case OP_MINSTAR: \
1023 case OP_MINPLUS: \
1024 case OP_QUERY: \
1025 case OP_MINQUERY: \
1026 case OP_MINSTARI: \
1027 case OP_MINPLUSI: \
1028 case OP_QUERYI: \
1029 case OP_MINQUERYI: \
1030 case OP_NOTMINSTAR: \
1031 case OP_NOTMINPLUS: \
1032 case OP_NOTQUERY: \
1033 case OP_NOTMINQUERY: \
1034 case OP_NOTMINSTARI: \
1035 case OP_NOTMINPLUSI: \
1036 case OP_NOTQUERYI: \
1037 case OP_NOTMINQUERYI:
1038
1039 #define CASE_ITERATOR_PRIVATE_DATA_2A \
1040 case OP_STAR: \
1041 case OP_PLUS: \
1042 case OP_STARI: \
1043 case OP_PLUSI: \
1044 case OP_NOTSTAR: \
1045 case OP_NOTPLUS: \
1046 case OP_NOTSTARI: \
1047 case OP_NOTPLUSI:
1048
1049 #define CASE_ITERATOR_PRIVATE_DATA_2B \
1050 case OP_UPTO: \
1051 case OP_MINUPTO: \
1052 case OP_UPTOI: \
1053 case OP_MINUPTOI: \
1054 case OP_NOTUPTO: \
1055 case OP_NOTMINUPTO: \
1056 case OP_NOTUPTOI: \
1057 case OP_NOTMINUPTOI:
1058
1059 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_1 \
1060 case OP_TYPEMINSTAR: \
1061 case OP_TYPEMINPLUS: \
1062 case OP_TYPEQUERY: \
1063 case OP_TYPEMINQUERY:
1064
1065 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2A \
1066 case OP_TYPESTAR: \
1067 case OP_TYPEPLUS:
1068
1069 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2B \
1070 case OP_TYPEUPTO: \
1071 case OP_TYPEMINUPTO:
1072
1073 static void set_private_data_ptrs(compiler_common *common, int *private_data_start, pcre_uchar *ccend)
1074 {
1075 pcre_uchar *cc = common->start;
1076 pcre_uchar *alternative;
1077 pcre_uchar *end = NULL;
1078 int private_data_ptr = *private_data_start;
1079 int space, size, bracketlen;
1080
1081 while (cc < ccend)
1082 {
1083 space = 0;
1084 size = 0;
1085 bracketlen = 0;
1086 if (private_data_ptr > SLJIT_MAX_LOCAL_SIZE)
1087 return;
1088
1089 if (*cc == OP_ONCE || *cc == OP_ONCE_NC || *cc == OP_BRA || *cc == OP_CBRA || *cc == OP_COND)
1090 if (detect_repeat(common, cc))
1091 {
1092 /* These brackets are converted to repeats, so no global
1093 based single character repeat is allowed. */
1094 if (cc >= end)
1095 end = bracketend(cc);
1096 }
1097
1098 switch(*cc)
1099 {
1100 case OP_KET:
1101 if (common->private_data_ptrs[cc + 1 - common->start] != 0)
1102 {
1103 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1104 private_data_ptr += sizeof(sljit_sw);
1105 cc += common->private_data_ptrs[cc + 1 - common->start];
1106 }
1107 cc += 1 + LINK_SIZE;
1108 break;
1109
1110 case OP_ASSERT:
1111 case OP_ASSERT_NOT:
1112 case OP_ASSERTBACK:
1113 case OP_ASSERTBACK_NOT:
1114 case OP_ONCE:
1115 case OP_ONCE_NC:
1116 case OP_BRAPOS:
1117 case OP_SBRA:
1118 case OP_SBRAPOS:
1119 case OP_SCOND:
1120 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1121 private_data_ptr += sizeof(sljit_sw);
1122 bracketlen = 1 + LINK_SIZE;
1123 break;
1124
1125 case OP_CBRAPOS:
1126 case OP_SCBRAPOS:
1127 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1128 private_data_ptr += sizeof(sljit_sw);
1129 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1130 break;
1131
1132 case OP_COND:
1133 /* Might be a hidden SCOND. */
1134 alternative = cc + GET(cc, 1);
1135 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1136 {
1137 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1138 private_data_ptr += sizeof(sljit_sw);
1139 }
1140 bracketlen = 1 + LINK_SIZE;
1141 break;
1142
1143 case OP_BRA:
1144 bracketlen = 1 + LINK_SIZE;
1145 break;
1146
1147 case OP_CBRA:
1148 case OP_SCBRA:
1149 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1150 break;
1151
1152 CASE_ITERATOR_PRIVATE_DATA_1
1153 space = 1;
1154 size = -2;
1155 break;
1156
1157 CASE_ITERATOR_PRIVATE_DATA_2A
1158 space = 2;
1159 size = -2;
1160 break;
1161
1162 CASE_ITERATOR_PRIVATE_DATA_2B
1163 space = 2;
1164 size = -(2 + IMM2_SIZE);
1165 break;
1166
1167 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1168 space = 1;
1169 size = 1;
1170 break;
1171
1172 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1173 if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
1174 space = 2;
1175 size = 1;
1176 break;
1177
1178 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1179 if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
1180 space = 2;
1181 size = 1 + IMM2_SIZE;
1182 break;
1183
1184 case OP_CLASS:
1185 case OP_NCLASS:
1186 size += 1 + 32 / sizeof(pcre_uchar);
1187 space = get_class_iterator_size(cc + size);
1188 break;
1189
1190 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1191 case OP_XCLASS:
1192 size = GET(cc, 1);
1193 space = get_class_iterator_size(cc + size);
1194 break;
1195 #endif
1196
1197 default:
1198 cc = next_opcode(common, cc);
1199 SLJIT_ASSERT(cc != NULL);
1200 break;
1201 }
1202
1203 /* Character iterators, which are not inside a repeated bracket,
1204 gets a private slot instead of allocating it on the stack. */
1205 if (space > 0 && cc >= end)
1206 {
1207 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1208 private_data_ptr += sizeof(sljit_sw) * space;
1209 }
1210
1211 if (size != 0)
1212 {
1213 if (size < 0)
1214 {
1215 cc += -size;
1216 #ifdef SUPPORT_UTF
1217 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1218 #endif
1219 }
1220 else
1221 cc += size;
1222 }
1223
1224 if (bracketlen > 0)
1225 {
1226 if (cc >= end)
1227 {
1228 end = bracketend(cc);
1229 if (end[-1 - LINK_SIZE] == OP_KET)
1230 end = NULL;
1231 }
1232 cc += bracketlen;
1233 }
1234 }
1235 *private_data_start = private_data_ptr;
1236 }
1237
1238 /* Returns with a frame_types (always < 0) if no need for frame. */
1239 static int get_framesize(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL recursive, BOOL* needs_control_head)
1240 {
1241 int length = 0;
1242 int possessive = 0;
1243 BOOL stack_restore = FALSE;
1244 BOOL setsom_found = recursive;
1245 BOOL setmark_found = recursive;
1246 /* The last capture is a local variable even for recursions. */
1247 BOOL capture_last_found = FALSE;
1248
1249 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
1250 SLJIT_ASSERT(common->control_head_ptr != 0);
1251 *needs_control_head = TRUE;
1252 #else
1253 *needs_control_head = FALSE;
1254 #endif
1255
1256 if (ccend == NULL)
1257 {
1258 ccend = bracketend(cc) - (1 + LINK_SIZE);
1259 if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))
1260 {
1261 possessive = length = (common->capture_last_ptr != 0) ? 5 : 3;
1262 /* This is correct regardless of common->capture_last_ptr. */
1263 capture_last_found = TRUE;
1264 }
1265 cc = next_opcode(common, cc);
1266 }
1267
1268 SLJIT_ASSERT(cc != NULL);
1269 while (cc < ccend)
1270 switch(*cc)
1271 {
1272 case OP_SET_SOM:
1273 SLJIT_ASSERT(common->has_set_som);
1274 stack_restore = TRUE;
1275 if (!setsom_found)
1276 {
1277 length += 2;
1278 setsom_found = TRUE;
1279 }
1280 cc += 1;
1281 break;
1282
1283 case OP_MARK:
1284 case OP_PRUNE_ARG:
1285 case OP_THEN_ARG:
1286 SLJIT_ASSERT(common->mark_ptr != 0);
1287 stack_restore = TRUE;
1288 if (!setmark_found)
1289 {
1290 length += 2;
1291 setmark_found = TRUE;
1292 }
1293 if (common->control_head_ptr != 0)
1294 *needs_control_head = TRUE;
1295 cc += 1 + 2 + cc[1];
1296 break;
1297
1298 case OP_RECURSE:
1299 stack_restore = TRUE;
1300 if (common->has_set_som && !setsom_found)
1301 {
1302 length += 2;
1303 setsom_found = TRUE;
1304 }
1305 if (common->mark_ptr != 0 && !setmark_found)
1306 {
1307 length += 2;
1308 setmark_found = TRUE;
1309 }
1310 if (common->capture_last_ptr != 0 && !capture_last_found)
1311 {
1312 length += 2;
1313 capture_last_found = TRUE;
1314 }
1315 cc += 1 + LINK_SIZE;
1316 break;
1317
1318 case OP_CBRA:
1319 case OP_CBRAPOS:
1320 case OP_SCBRA:
1321 case OP_SCBRAPOS:
1322 stack_restore = TRUE;
1323 if (common->capture_last_ptr != 0 && !capture_last_found)
1324 {
1325 length += 2;
1326 capture_last_found = TRUE;
1327 }
1328 length += 3;
1329 cc += 1 + LINK_SIZE + IMM2_SIZE;
1330 break;
1331
1332 default:
1333 stack_restore = TRUE;
1334 /* Fall through. */
1335
1336 case OP_NOT_WORD_BOUNDARY:
1337 case OP_WORD_BOUNDARY:
1338 case OP_NOT_DIGIT:
1339 case OP_DIGIT:
1340 case OP_NOT_WHITESPACE:
1341 case OP_WHITESPACE:
1342 case OP_NOT_WORDCHAR:
1343 case OP_WORDCHAR:
1344 case OP_ANY:
1345 case OP_ALLANY:
1346 case OP_ANYBYTE:
1347 case OP_NOTPROP:
1348 case OP_PROP:
1349 case OP_ANYNL:
1350 case OP_NOT_HSPACE:
1351 case OP_HSPACE:
1352 case OP_NOT_VSPACE:
1353 case OP_VSPACE:
1354 case OP_EXTUNI:
1355 case OP_EODN:
1356 case OP_EOD:
1357 case OP_CIRC:
1358 case OP_CIRCM:
1359 case OP_DOLL:
1360 case OP_DOLLM:
1361 case OP_CHAR:
1362 case OP_CHARI:
1363 case OP_NOT:
1364 case OP_NOTI:
1365
1366 case OP_EXACT:
1367 case OP_POSSTAR:
1368 case OP_POSPLUS:
1369 case OP_POSQUERY:
1370 case OP_POSUPTO:
1371
1372 case OP_EXACTI:
1373 case OP_POSSTARI:
1374 case OP_POSPLUSI:
1375 case OP_POSQUERYI:
1376 case OP_POSUPTOI:
1377
1378 case OP_NOTEXACT:
1379 case OP_NOTPOSSTAR:
1380 case OP_NOTPOSPLUS:
1381 case OP_NOTPOSQUERY:
1382 case OP_NOTPOSUPTO:
1383
1384 case OP_NOTEXACTI:
1385 case OP_NOTPOSSTARI:
1386 case OP_NOTPOSPLUSI:
1387 case OP_NOTPOSQUERYI:
1388 case OP_NOTPOSUPTOI:
1389
1390 case OP_TYPEEXACT:
1391 case OP_TYPEPOSSTAR:
1392 case OP_TYPEPOSPLUS:
1393 case OP_TYPEPOSQUERY:
1394 case OP_TYPEPOSUPTO:
1395
1396 case OP_CLASS:
1397 case OP_NCLASS:
1398 case OP_XCLASS:
1399
1400 cc = next_opcode(common, cc);
1401 SLJIT_ASSERT(cc != NULL);
1402 break;
1403 }
1404
1405 /* Possessive quantifiers can use a special case. */
1406 if (SLJIT_UNLIKELY(possessive == length))
1407 return stack_restore ? no_frame : no_stack;
1408
1409 if (length > 0)
1410 return length + 1;
1411 return stack_restore ? no_frame : no_stack;
1412 }
1413
1414 static void init_frame(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, int stackpos, int stacktop, BOOL recursive)
1415 {
1416 DEFINE_COMPILER;
1417 BOOL setsom_found = recursive;
1418 BOOL setmark_found = recursive;
1419 /* The last capture is a local variable even for recursions. */
1420 BOOL capture_last_found = FALSE;
1421 int offset;
1422
1423 /* >= 1 + shortest item size (2) */
1424 SLJIT_UNUSED_ARG(stacktop);
1425 SLJIT_ASSERT(stackpos >= stacktop + 2);
1426
1427 stackpos = STACK(stackpos);
1428 if (ccend == NULL)
1429 {
1430 ccend = bracketend(cc) - (1 + LINK_SIZE);
1431 if (recursive || (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS))
1432 cc = next_opcode(common, cc);
1433 }
1434
1435 SLJIT_ASSERT(cc != NULL);
1436 while (cc < ccend)
1437 switch(*cc)
1438 {
1439 case OP_SET_SOM:
1440 SLJIT_ASSERT(common->has_set_som);
1441 if (!setsom_found)
1442 {
1443 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
1444 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1445 stackpos += (int)sizeof(sljit_sw);
1446 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1447 stackpos += (int)sizeof(sljit_sw);
1448 setsom_found = TRUE;
1449 }
1450 cc += 1;
1451 break;
1452
1453 case OP_MARK:
1454 case OP_PRUNE_ARG:
1455 case OP_THEN_ARG:
1456 SLJIT_ASSERT(common->mark_ptr != 0);
1457 if (!setmark_found)
1458 {
1459 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
1460 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1461 stackpos += (int)sizeof(sljit_sw);
1462 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1463 stackpos += (int)sizeof(sljit_sw);
1464 setmark_found = TRUE;
1465 }
1466 cc += 1 + 2 + cc[1];
1467 break;
1468
1469 case OP_RECURSE:
1470 if (common->has_set_som && !setsom_found)
1471 {
1472 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
1473 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1474 stackpos += (int)sizeof(sljit_sw);
1475 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1476 stackpos += (int)sizeof(sljit_sw);
1477 setsom_found = TRUE;
1478 }
1479 if (common->mark_ptr != 0 && !setmark_found)
1480 {
1481 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
1482 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1483 stackpos += (int)sizeof(sljit_sw);
1484 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1485 stackpos += (int)sizeof(sljit_sw);
1486 setmark_found = TRUE;
1487 }
1488 if (common->capture_last_ptr != 0 && !capture_last_found)
1489 {
1490 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
1491 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1492 stackpos += (int)sizeof(sljit_sw);
1493 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1494 stackpos += (int)sizeof(sljit_sw);
1495 capture_last_found = TRUE;
1496 }
1497 cc += 1 + LINK_SIZE;
1498 break;
1499
1500 case OP_CBRA:
1501 case OP_CBRAPOS:
1502 case OP_SCBRA:
1503 case OP_SCBRAPOS:
1504 if (common->capture_last_ptr != 0 && !capture_last_found)
1505 {
1506 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
1507 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1508 stackpos += (int)sizeof(sljit_sw);
1509 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1510 stackpos += (int)sizeof(sljit_sw);
1511 capture_last_found = TRUE;
1512 }
1513 offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
1514 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));
1515 stackpos += (int)sizeof(sljit_sw);
1516 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
1517 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
1518 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1519 stackpos += (int)sizeof(sljit_sw);
1520 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
1521 stackpos += (int)sizeof(sljit_sw);
1522
1523 cc += 1 + LINK_SIZE + IMM2_SIZE;
1524 break;
1525
1526 default:
1527 cc = next_opcode(common, cc);
1528 SLJIT_ASSERT(cc != NULL);
1529 break;
1530 }
1531
1532 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, 0);
1533 SLJIT_ASSERT(stackpos == STACK(stacktop));
1534 }
1535
1536 static SLJIT_INLINE int get_private_data_copy_length(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL needs_control_head)
1537 {
1538 int private_data_length = needs_control_head ? 3 : 2;
1539 int size;
1540 pcre_uchar *alternative;
1541 /* Calculate the sum of the private machine words. */
1542 while (cc < ccend)
1543 {
1544 size = 0;
1545 switch(*cc)
1546 {
1547 case OP_KET:
1548 if (PRIVATE_DATA(cc) != 0)
1549 private_data_length++;
1550 cc += 1 + LINK_SIZE;
1551 break;
1552
1553 case OP_ASSERT:
1554 case OP_ASSERT_NOT:
1555 case OP_ASSERTBACK:
1556 case OP_ASSERTBACK_NOT:
1557 case OP_ONCE:
1558 case OP_ONCE_NC:
1559 case OP_BRAPOS:
1560 case OP_SBRA:
1561 case OP_SBRAPOS:
1562 case OP_SCOND:
1563 private_data_length++;
1564 cc += 1 + LINK_SIZE;
1565 break;
1566
1567 case OP_CBRA:
1568 case OP_SCBRA:
1569 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1570 private_data_length++;
1571 cc += 1 + LINK_SIZE + IMM2_SIZE;
1572 break;
1573
1574 case OP_CBRAPOS:
1575 case OP_SCBRAPOS:
1576 private_data_length += 2;
1577 cc += 1 + LINK_SIZE + IMM2_SIZE;
1578 break;
1579
1580 case OP_COND:
1581 /* Might be a hidden SCOND. */
1582 alternative = cc + GET(cc, 1);
1583 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1584 private_data_length++;
1585 cc += 1 + LINK_SIZE;
1586 break;
1587
1588 CASE_ITERATOR_PRIVATE_DATA_1
1589 if (PRIVATE_DATA(cc))
1590 private_data_length++;
1591 cc += 2;
1592 #ifdef SUPPORT_UTF
1593 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1594 #endif
1595 break;
1596
1597 CASE_ITERATOR_PRIVATE_DATA_2A
1598 if (PRIVATE_DATA(cc))
1599 private_data_length += 2;
1600 cc += 2;
1601 #ifdef SUPPORT_UTF
1602 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1603 #endif
1604 break;
1605
1606 CASE_ITERATOR_PRIVATE_DATA_2B
1607 if (PRIVATE_DATA(cc))
1608 private_data_length += 2;
1609 cc += 2 + IMM2_SIZE;
1610 #ifdef SUPPORT_UTF
1611 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1612 #endif
1613 break;
1614
1615 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1616 if (PRIVATE_DATA(cc))
1617 private_data_length++;
1618 cc += 1;
1619 break;
1620
1621 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1622 if (PRIVATE_DATA(cc))
1623 private_data_length += 2;
1624 cc += 1;
1625 break;
1626
1627 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1628 if (PRIVATE_DATA(cc))
1629 private_data_length += 2;
1630 cc += 1 + IMM2_SIZE;
1631 break;
1632
1633 case OP_CLASS:
1634 case OP_NCLASS:
1635 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1636 case OP_XCLASS:
1637 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1638 #else
1639 size = 1 + 32 / (int)sizeof(pcre_uchar);
1640 #endif
1641 if (PRIVATE_DATA(cc))
1642 private_data_length += get_class_iterator_size(cc + size);
1643 cc += size;
1644 break;
1645
1646 default:
1647 cc = next_opcode(common, cc);
1648 SLJIT_ASSERT(cc != NULL);
1649 break;
1650 }
1651 }
1652 SLJIT_ASSERT(cc == ccend);
1653 return private_data_length;
1654 }
1655
1656 static void copy_private_data(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend,
1657 BOOL save, int stackptr, int stacktop, BOOL needs_control_head)
1658 {
1659 DEFINE_COMPILER;
1660 int srcw[2];
1661 int count, size;
1662 BOOL tmp1next = TRUE;
1663 BOOL tmp1empty = TRUE;
1664 BOOL tmp2empty = TRUE;
1665 pcre_uchar *alternative;
1666 enum {
1667 start,
1668 loop,
1669 end
1670 } status;
1671
1672 status = save ? start : loop;
1673 stackptr = STACK(stackptr - 2);
1674 stacktop = STACK(stacktop - 1);
1675
1676 if (!save)
1677 {
1678 stackptr += (needs_control_head ? 2 : 1) * sizeof(sljit_sw);
1679 if (stackptr < stacktop)
1680 {
1681 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1682 stackptr += sizeof(sljit_sw);
1683 tmp1empty = FALSE;
1684 }
1685 if (stackptr < stacktop)
1686 {
1687 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1688 stackptr += sizeof(sljit_sw);
1689 tmp2empty = FALSE;
1690 }
1691 /* The tmp1next must be TRUE in either way. */
1692 }
1693
1694 do
1695 {
1696 count = 0;
1697 switch(status)
1698 {
1699 case start:
1700 SLJIT_ASSERT(save && common->recursive_head_ptr != 0);
1701 count = 1;
1702 srcw[0] = common->recursive_head_ptr;
1703 if (needs_control_head)
1704 {
1705 SLJIT_ASSERT(common->control_head_ptr != 0);
1706 count = 2;
1707 srcw[1] = common->control_head_ptr;
1708 }
1709 status = loop;
1710 break;
1711
1712 case loop:
1713 if (cc >= ccend)
1714 {
1715 status = end;
1716 break;
1717 }
1718
1719 switch(*cc)
1720 {
1721 case OP_KET:
1722 if (PRIVATE_DATA(cc) != 0)
1723 {
1724 count = 1;
1725 srcw[0] = PRIVATE_DATA(cc);
1726 }
1727 cc += 1 + LINK_SIZE;
1728 break;
1729
1730 case OP_ASSERT:
1731 case OP_ASSERT_NOT:
1732 case OP_ASSERTBACK:
1733 case OP_ASSERTBACK_NOT:
1734 case OP_ONCE:
1735 case OP_ONCE_NC:
1736 case OP_BRAPOS:
1737 case OP_SBRA:
1738 case OP_SBRAPOS:
1739 case OP_SCOND:
1740 count = 1;
1741 srcw[0] = PRIVATE_DATA(cc);
1742 SLJIT_ASSERT(srcw[0] != 0);
1743 cc += 1 + LINK_SIZE;
1744 break;
1745
1746 case OP_CBRA:
1747 case OP_SCBRA:
1748 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1749 {
1750 count = 1;
1751 srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1752 }
1753 cc += 1 + LINK_SIZE + IMM2_SIZE;
1754 break;
1755
1756 case OP_CBRAPOS:
1757 case OP_SCBRAPOS:
1758 count = 2;
1759 srcw[0] = PRIVATE_DATA(cc);
1760 srcw[1] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1761 SLJIT_ASSERT(srcw[0] != 0 && srcw[1] != 0);
1762 cc += 1 + LINK_SIZE + IMM2_SIZE;
1763 break;
1764
1765 case OP_COND:
1766 /* Might be a hidden SCOND. */
1767 alternative = cc + GET(cc, 1);
1768 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1769 {
1770 count = 1;
1771 srcw[0] = PRIVATE_DATA(cc);
1772 SLJIT_ASSERT(srcw[0] != 0);
1773 }
1774 cc += 1 + LINK_SIZE;
1775 break;
1776
1777 CASE_ITERATOR_PRIVATE_DATA_1
1778 if (PRIVATE_DATA(cc))
1779 {
1780 count = 1;
1781 srcw[0] = PRIVATE_DATA(cc);
1782 }
1783 cc += 2;
1784 #ifdef SUPPORT_UTF
1785 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1786 #endif
1787 break;
1788
1789 CASE_ITERATOR_PRIVATE_DATA_2A
1790 if (PRIVATE_DATA(cc))
1791 {
1792 count = 2;
1793 srcw[0] = PRIVATE_DATA(cc);
1794 srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
1795 }
1796 cc += 2;
1797 #ifdef SUPPORT_UTF
1798 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1799 #endif
1800 break;
1801
1802 CASE_ITERATOR_PRIVATE_DATA_2B
1803 if (PRIVATE_DATA(cc))
1804 {
1805 count = 2;
1806 srcw[0] = PRIVATE_DATA(cc);
1807 srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
1808 }
1809 cc += 2 + IMM2_SIZE;
1810 #ifdef SUPPORT_UTF
1811 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1812 #endif
1813 break;
1814
1815 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1816 if (PRIVATE_DATA(cc))
1817 {
1818 count = 1;
1819 srcw[0] = PRIVATE_DATA(cc);
1820 }
1821 cc += 1;
1822 break;
1823
1824 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1825 if (PRIVATE_DATA(cc))
1826 {
1827 count = 2;
1828 srcw[0] = PRIVATE_DATA(cc);
1829 srcw[1] = srcw[0] + sizeof(sljit_sw);
1830 }
1831 cc += 1;
1832 break;
1833
1834 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1835 if (PRIVATE_DATA(cc))
1836 {
1837 count = 2;
1838 srcw[0] = PRIVATE_DATA(cc);
1839 srcw[1] = srcw[0] + sizeof(sljit_sw);
1840 }
1841 cc += 1 + IMM2_SIZE;
1842 break;
1843
1844 case OP_CLASS:
1845 case OP_NCLASS:
1846 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1847 case OP_XCLASS:
1848 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1849 #else
1850 size = 1 + 32 / (int)sizeof(pcre_uchar);
1851 #endif
1852 if (PRIVATE_DATA(cc))
1853 switch(get_class_iterator_size(cc + size))
1854 {
1855 case 1:
1856 count = 1;
1857 srcw[0] = PRIVATE_DATA(cc);
1858 break;
1859
1860 case 2:
1861 count = 2;
1862 srcw[0] = PRIVATE_DATA(cc);
1863 srcw[1] = srcw[0] + sizeof(sljit_sw);
1864 break;
1865
1866 default:
1867 SLJIT_ASSERT_STOP();
1868 break;
1869 }
1870 cc += size;
1871 break;
1872
1873 default:
1874 cc = next_opcode(common, cc);
1875 SLJIT_ASSERT(cc != NULL);
1876 break;
1877 }
1878 break;
1879
1880 case end:
1881 SLJIT_ASSERT_STOP();
1882 break;
1883 }
1884
1885 while (count > 0)
1886 {
1887 count--;
1888 if (save)
1889 {
1890 if (tmp1next)
1891 {
1892 if (!tmp1empty)
1893 {
1894 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1895 stackptr += sizeof(sljit_sw);
1896 }
1897 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);
1898 tmp1empty = FALSE;
1899 tmp1next = FALSE;
1900 }
1901 else
1902 {
1903 if (!tmp2empty)
1904 {
1905 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1906 stackptr += sizeof(sljit_sw);
1907 }
1908 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);
1909 tmp2empty = FALSE;
1910 tmp1next = TRUE;
1911 }
1912 }
1913 else
1914 {
1915 if (tmp1next)
1916 {
1917 SLJIT_ASSERT(!tmp1empty);
1918 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP1, 0);
1919 tmp1empty = stackptr >= stacktop;
1920 if (!tmp1empty)
1921 {
1922 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1923 stackptr += sizeof(sljit_sw);
1924 }
1925 tmp1next = FALSE;
1926 }
1927 else
1928 {
1929 SLJIT_ASSERT(!tmp2empty);
1930 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP2, 0);
1931 tmp2empty = stackptr >= stacktop;
1932 if (!tmp2empty)
1933 {
1934 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1935 stackptr += sizeof(sljit_sw);
1936 }
1937 tmp1next = TRUE;
1938 }
1939 }
1940 }
1941 }
1942 while (status != end);
1943
1944 if (save)
1945 {
1946 if (tmp1next)
1947 {
1948 if (!tmp1empty)
1949 {
1950 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1951 stackptr += sizeof(sljit_sw);
1952 }
1953 if (!tmp2empty)
1954 {
1955 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1956 stackptr += sizeof(sljit_sw);
1957 }
1958 }
1959 else
1960 {
1961 if (!tmp2empty)
1962 {
1963 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1964 stackptr += sizeof(sljit_sw);
1965 }
1966 if (!tmp1empty)
1967 {
1968 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1969 stackptr += sizeof(sljit_sw);
1970 }
1971 }
1972 }
1973 SLJIT_ASSERT(cc == ccend && stackptr == stacktop && (save || (tmp1empty && tmp2empty)));
1974 }
1975
1976 static SLJIT_INLINE pcre_uchar *set_then_offsets(compiler_common *common, pcre_uchar *cc, pcre_uint8 *current_offset)
1977 {
1978 pcre_uchar *end = bracketend(cc);
1979 BOOL has_alternatives = cc[GET(cc, 1)] == OP_ALT;
1980
1981 /* Assert captures then. */
1982 if (*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT)
1983 current_offset = NULL;
1984 /* Conditional block does not. */
1985 if (*cc == OP_COND || *cc == OP_SCOND)
1986 has_alternatives = FALSE;
1987
1988 cc = next_opcode(common, cc);
1989 if (has_alternatives)
1990 current_offset = common->then_offsets + (cc - common->start);
1991
1992 while (cc < end)
1993 {
1994 if ((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND))
1995 cc = set_then_offsets(common, cc, current_offset);
1996 else
1997 {
1998 if (*cc == OP_ALT && has_alternatives)
1999 current_offset = common->then_offsets + (cc + 1 + LINK_SIZE - common->start);
2000 if (*cc >= OP_THEN && *cc <= OP_THEN_ARG && current_offset != NULL)
2001 *current_offset = 1;
2002 cc = next_opcode(common, cc);
2003 }
2004 }
2005
2006 return end;
2007 }
2008
2009 #undef CASE_ITERATOR_PRIVATE_DATA_1
2010 #undef CASE_ITERATOR_PRIVATE_DATA_2A
2011 #undef CASE_ITERATOR_PRIVATE_DATA_2B
2012 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_1
2013 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
2014 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
2015
2016 static SLJIT_INLINE BOOL is_powerof2(unsigned int value)
2017 {
2018 return (value & (value - 1)) == 0;
2019 }
2020
2021 static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label)
2022 {
2023 while (list)
2024 {
2025 /* sljit_set_label is clever enough to do nothing
2026 if either the jump or the label is NULL. */
2027 SET_LABEL(list->jump, label);
2028 list = list->next;
2029 }
2030 }
2031
2032 static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump* jump)
2033 {
2034 jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list));
2035 if (list_item)
2036 {
2037 list_item->next = *list;
2038 list_item->jump = jump;
2039 *list = list_item;
2040 }
2041 }
2042
2043 static void add_stub(compiler_common *common, struct sljit_jump *start)
2044 {
2045 DEFINE_COMPILER;
2046 stub_list* list_item = sljit_alloc_memory(compiler, sizeof(stub_list));
2047
2048 if (list_item)
2049 {
2050 list_item->start = start;
2051 list_item->quit = LABEL();
2052 list_item->next = common->stubs;
2053 common->stubs = list_item;
2054 }
2055 }
2056
2057 static void flush_stubs(compiler_common *common)
2058 {
2059 DEFINE_COMPILER;
2060 stub_list* list_item = common->stubs;
2061
2062 while (list_item)
2063 {
2064 JUMPHERE(list_item->start);
2065 add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));
2066 JUMPTO(SLJIT_JUMP, list_item->quit);
2067 list_item = list_item->next;
2068 }
2069 common->stubs = NULL;
2070 }
2071
2072 static void add_label_addr(compiler_common *common)
2073 {
2074 DEFINE_COMPILER;
2075 label_addr_list *label_addr;
2076
2077 label_addr = sljit_alloc_memory(compiler, sizeof(label_addr_list));
2078 if (label_addr == NULL)
2079 return;
2080 label_addr->label = LABEL();
2081 label_addr->addr = common->read_only_data_ptr;
2082 label_addr->next = common->label_addrs;
2083 common->label_addrs = label_addr;
2084 common->read_only_data_ptr++;
2085 }
2086
2087 static SLJIT_INLINE void count_match(compiler_common *common)
2088 {
2089 DEFINE_COMPILER;
2090
2091 OP2(SLJIT_SUB | SLJIT_SET_E, COUNT_MATCH, 0, COUNT_MATCH, 0, SLJIT_IMM, 1);
2092 add_jump(compiler, &common->calllimit, JUMP(SLJIT_C_ZERO));
2093 }
2094
2095 static SLJIT_INLINE void allocate_stack(compiler_common *common, int size)
2096 {
2097 /* May destroy all locals and registers except TMP2. */
2098 DEFINE_COMPILER;
2099
2100 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
2101 #ifdef DESTROY_REGISTERS
2102 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);
2103 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
2104 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
2105 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, TMP1, 0);
2106 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);
2107 #endif
2108 add_stub(common, CMP(SLJIT_C_GREATER, STACK_TOP, 0, STACK_LIMIT, 0));
2109 }
2110
2111 static SLJIT_INLINE void free_stack(compiler_common *common, int size)
2112 {
2113 DEFINE_COMPILER;
2114 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
2115 }
2116
2117 static SLJIT_INLINE void reset_ovector(compiler_common *common, int length)
2118 {
2119 DEFINE_COMPILER;
2120 struct sljit_label *loop;
2121 int i;
2122
2123 /* At this point we can freely use all temporary registers. */
2124 SLJIT_ASSERT(length > 1);
2125 /* TMP1 returns with begin - 1. */
2126 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG1, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));
2127 if (length < 8)
2128 {
2129 for (i = 1; i < length; i++)
2130 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(i), SLJIT_SCRATCH_REG1, 0);
2131 }
2132 else
2133 {
2134 GET_LOCAL_BASE(SLJIT_SCRATCH_REG2, 0, OVECTOR_START);
2135 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, length - 1);
2136 loop = LABEL();
2137 OP1(SLJIT_MOVU, SLJIT_MEM1(SLJIT_SCRATCH_REG2), sizeof(sljit_sw), SLJIT_SCRATCH_REG1, 0);
2138 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 1);
2139 JUMPTO(SLJIT_C_NOT_ZERO, loop);
2140 }
2141 }
2142
2143 static SLJIT_INLINE void do_reset_match(compiler_common *common, int length)
2144 {
2145 DEFINE_COMPILER;
2146 struct sljit_label *loop;
2147 int i;
2148
2149 SLJIT_ASSERT(length > 1);
2150 /* OVECTOR(1) contains the "string begin - 1" constant. */
2151 if (length > 2)
2152 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
2153 if (length < 8)
2154 {
2155 for (i = 2; i < length; i++)
2156 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(i), TMP1, 0);
2157 }
2158 else
2159 {
2160 GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + sizeof(sljit_sw));
2161 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
2162 loop = LABEL();
2163 OP1(SLJIT_MOVU, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
2164 OP2(SLJIT_SUB | SLJIT_SET_E, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
2165 JUMPTO(SLJIT_C_NOT_ZERO, loop);
2166 }
2167
2168 OP1(SLJIT_MOV, STACK_TOP, 0, ARGUMENTS, 0);
2169 if (common->mark_ptr != 0)
2170 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr, SLJIT_IMM, 0);
2171 if (common->control_head_ptr != 0)
2172 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_IMM, 0);
2173 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(jit_arguments, stack));
2174 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_ptr);
2175 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(struct sljit_stack, base));
2176 }
2177
2178 static sljit_sw SLJIT_CALL do_search_mark(sljit_sw *current, const pcre_uchar *skip_arg)
2179 {
2180 while (current != NULL)
2181 {
2182 switch (current[-2])
2183 {
2184 case type_then_trap:
2185 break;
2186
2187 case type_mark:
2188 if (STRCMP_UC_UC(skip_arg, (pcre_uchar *)current[-3]) == 0)
2189 return current[-4];
2190 break;
2191
2192 default:
2193 SLJIT_ASSERT_STOP();
2194 break;
2195 }
2196 current = (sljit_sw*)current[-1];
2197 }
2198 return -1;
2199 }
2200
2201 static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)
2202 {
2203 DEFINE_COMPILER;
2204 struct sljit_label *loop;
2205 struct sljit_jump *early_quit;
2206
2207 /* At this point we can freely use all registers. */
2208 OP1(SLJIT_MOV, SLJIT_SAVED_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
2209 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1), STR_PTR, 0);
2210
2211 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, ARGUMENTS, 0);
2212 if (common->mark_ptr != 0)
2213 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
2214 OP1(SLJIT_MOV_SI, SLJIT_SCRATCH_REG2, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, offset_count));
2215 if (common->mark_ptr != 0)
2216 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_SCRATCH_REG3, 0);
2217 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, offsets), SLJIT_IMM, sizeof(int));
2218 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, begin));
2219 GET_LOCAL_BASE(SLJIT_SAVED_REG1, 0, OVECTOR_START);
2220 /* Unlikely, but possible */
2221 early_quit = CMP(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, 0);
2222 loop = LABEL();
2223 OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), 0, SLJIT_SCRATCH_REG1, 0);
2224 OP2(SLJIT_ADD, SLJIT_SAVED_REG1, 0, SLJIT_SAVED_REG1, 0, SLJIT_IMM, sizeof(sljit_sw));
2225 /* Copy the integer value to the output buffer */
2226 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2227 OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, UCHAR_SHIFT);
2228 #endif
2229 OP1(SLJIT_MOVU_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG3), sizeof(int), SLJIT_SAVED_REG2, 0);
2230 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_SCRATCH_REG2, 0, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, 1);
2231 JUMPTO(SLJIT_C_NOT_ZERO, loop);
2232 JUMPHERE(early_quit);
2233
2234 /* Calculate the return value, which is the maximum ovector value. */
2235 if (topbracket > 1)
2236 {
2237 GET_LOCAL_BASE(SLJIT_SCRATCH_REG1, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_sw));
2238 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, topbracket + 1);
2239
2240 /* OVECTOR(0) is never equal to SLJIT_SAVED_REG3. */
2241 loop = LABEL();
2242 OP1(SLJIT_MOVU, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), -(2 * (sljit_sw)sizeof(sljit_sw)));
2243 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG2, 0, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, 1);
2244 CMPTO(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG3, 0, SLJIT_SAVED_REG3, 0, loop);
2245 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_SCRATCH_REG2, 0);
2246 }
2247 else
2248 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
2249 }
2250
2251 static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *quit)
2252 {
2253 DEFINE_COMPILER;
2254 struct sljit_jump *jump;
2255
2256 SLJIT_COMPILE_ASSERT(STR_END == SLJIT_SAVED_REG2, str_end_must_be_saved_reg2);
2257 SLJIT_ASSERT(common->start_used_ptr != 0 && common->start_ptr != 0
2258 && (common->mode == JIT_PARTIAL_SOFT_COMPILE ? common->hit_start != 0 : common->hit_start == 0));
2259
2260 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, ARGUMENTS, 0);
2261 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_PARTIAL);
2262 OP1(SLJIT_MOV_SI, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, real_offset_count));
2263 CMPTO(SLJIT_C_SIG_LESS, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 2, quit);
2264
2265 /* Store match begin and end. */
2266 OP1(SLJIT_MOV, SLJIT_SAVED_REG1, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, begin));
2267 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, offsets));
2268
2269 jump = CMP(SLJIT_C_SIG_LESS, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 3);
2270 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_ptr : (common->hit_start + (int)sizeof(sljit_sw)), SLJIT_SAVED_REG1, 0);
2271 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2272 OP2(SLJIT_ASHR, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, UCHAR_SHIFT);
2273 #endif
2274 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG2), 2 * sizeof(int), SLJIT_SCRATCH_REG3, 0);
2275 JUMPHERE(jump);
2276
2277 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_used_ptr : common->hit_start);
2278 OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, STR_END, 0, SLJIT_SAVED_REG1, 0);
2279 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2280 OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, UCHAR_SHIFT);
2281 #endif
2282 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG2), sizeof(int), SLJIT_SAVED_REG2, 0);
2283
2284 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_SAVED_REG1, 0);
2285 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2286 OP2(SLJIT_ASHR, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, UCHAR_SHIFT);
2287 #endif
2288 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG2), 0, SLJIT_SCRATCH_REG3, 0);
2289
2290 JUMPTO(SLJIT_JUMP, quit);
2291 }
2292
2293 static SLJIT_INLINE void check_start_used_ptr(compiler_common *common)
2294 {
2295 /* May destroy TMP1. */
2296 DEFINE_COMPILER;
2297 struct sljit_jump *jump;
2298
2299 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2300 {
2301 /* The value of -1 must be kept for start_used_ptr! */
2302 OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, 1);
2303 /* Jumps if start_used_ptr < STR_PTR, or start_used_ptr == -1. Although overwriting
2304 is not necessary if start_used_ptr == STR_PTR, it does not hurt as well. */
2305 jump = CMP(SLJIT_C_LESS_EQUAL, TMP1, 0, STR_PTR, 0);
2306 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2307 JUMPHERE(jump);
2308 }
2309 else if (common->mode == JIT_PARTIAL_HARD_COMPILE)
2310 {
2311 jump = CMP(SLJIT_C_LESS_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2312 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2313 JUMPHERE(jump);
2314 }
2315 }
2316
2317 static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, pcre_uchar* cc)
2318 {
2319 /* Detects if the character has an othercase. */
2320 unsigned int c;
2321
2322 #ifdef SUPPORT_UTF
2323 if (common->utf)
2324 {
2325 GETCHAR(c, cc);
2326 if (c > 127)
2327 {
2328 #ifdef SUPPORT_UCP
2329 return c != UCD_OTHERCASE(c);
2330 #else
2331 return FALSE;
2332 #endif
2333 }
2334 #ifndef COMPILE_PCRE8
2335 return common->fcc[c] != c;
2336 #endif
2337 }
2338 else
2339 #endif
2340 c = *cc;
2341 return MAX_255(c) ? common->fcc[c] != c : FALSE;
2342 }
2343
2344 static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)
2345 {
2346 /* Returns with the othercase. */
2347 #ifdef SUPPORT_UTF
2348 if (common->utf && c > 127)
2349 {
2350 #ifdef SUPPORT_UCP
2351 return UCD_OTHERCASE(c);
2352 #else
2353 return c;
2354 #endif
2355 }
2356 #endif
2357 return TABLE_GET(c, common->fcc, c);
2358 }
2359
2360 static unsigned int char_get_othercase_bit(compiler_common *common, pcre_uchar* cc)
2361 {
2362 /* Detects if the character and its othercase has only 1 bit difference. */
2363 unsigned int c, oc, bit;
2364 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2365 int n;
2366 #endif
2367
2368 #ifdef SUPPORT_UTF
2369 if (common->utf)
2370 {
2371 GETCHAR(c, cc);
2372 if (c <= 127)
2373 oc = common->fcc[c];
2374 else
2375 {
2376 #ifdef SUPPORT_UCP
2377 oc = UCD_OTHERCASE(c);
2378 #else
2379 oc = c;
2380 #endif
2381 }
2382 }
2383 else
2384 {
2385 c = *cc;
2386 oc = TABLE_GET(c, common->fcc, c);
2387 }
2388 #else
2389 c = *cc;
2390 oc = TABLE_GET(c, common->fcc, c);
2391 #endif
2392
2393 SLJIT_ASSERT(c != oc);
2394
2395 bit = c ^ oc;
2396 /* Optimized for English alphabet. */
2397 if (c <= 127 && bit == 0x20)
2398 return (0 << 8) | 0x20;
2399
2400 /* Since c != oc, they must have at least 1 bit difference. */
2401 if (!is_powerof2(bit))
2402 return 0;
2403
2404 #if defined COMPILE_PCRE8
2405
2406 #ifdef SUPPORT_UTF
2407 if (common->utf && c > 127)
2408 {
2409 n = GET_EXTRALEN(*cc);
2410 while ((bit & 0x3f) == 0)
2411 {
2412 n--;
2413 bit >>= 6;
2414 }
2415 return (n << 8) | bit;
2416 }
2417 #endif /* SUPPORT_UTF */
2418 return (0 << 8) | bit;
2419
2420 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2421
2422 #ifdef SUPPORT_UTF
2423 if (common->utf && c > 65535)
2424 {
2425 if (bit >= (1 << 10))
2426 bit >>= 10;
2427 else
2428 return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8));
2429 }
2430 #endif /* SUPPORT_UTF */
2431 return (bit < 256) ? ((0 << 8) | bit) : ((1 << 8) | (bit >> 8));
2432
2433 #endif /* COMPILE_PCRE[8|16|32] */
2434 }
2435
2436 static void check_partial(compiler_common *common, BOOL force)
2437 {
2438 /* Checks whether a partial matching is occurred. Does not modify registers. */
2439 DEFINE_COMPILER;
2440 struct sljit_jump *jump = NULL;
2441
2442 SLJIT_ASSERT(!force || common->mode != JIT_COMPILE);
2443
2444 if (common->mode == JIT_COMPILE)
2445 return;
2446
2447 if (!force)
2448 jump = CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2449 else if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2450 jump = CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, -1);
2451
2452 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2453 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, 0);
2454 else
2455 {
2456 if (common->partialmatchlabel != NULL)
2457 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2458 else
2459 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2460 }
2461
2462 if (jump != NULL)
2463 JUMPHERE(jump);
2464 }
2465
2466 static void check_str_end(compiler_common *common, jump_list **end_reached)
2467 {
2468 /* Does not affect registers. Usually used in a tight spot. */
2469 DEFINE_COMPILER;
2470 struct sljit_jump *jump;
2471
2472 if (common->mode == JIT_COMPILE)
2473 {
2474 add_jump(compiler, end_reached, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2475 return;
2476 }
2477
2478 jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
2479 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2480 {
2481 add_jump(compiler, end_reached, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0));
2482 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, 0);
2483 add_jump(compiler, end_reached, JUMP(SLJIT_JUMP));
2484 }
2485 else
2486 {
2487 add_jump(compiler, end_reached, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0));
2488 if (common->partialmatchlabel != NULL)
2489 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2490 else
2491 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2492 }
2493 JUMPHERE(jump);
2494 }
2495
2496 static void detect_partial_match(compiler_common *common, jump_list **backtracks)
2497 {
2498 DEFINE_COMPILER;
2499 struct sljit_jump *jump;
2500
2501 if (common->mode == JIT_COMPILE)
2502 {
2503 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2504 return;
2505 }
2506
2507 /* Partial matching mode. */
2508 jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
2509 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0));
2510 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2511 {
2512 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, 0);
2513 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
2514 }
2515 else
2516 {
2517 if (common->partialmatchlabel != NULL)
2518 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2519 else
2520 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2521 }
2522 JUMPHERE(jump);
2523 }
2524
2525 static void peek_char(compiler_common *common, pcre_uint32 max)
2526 {
2527 /* Reads the character into TMP1, keeps STR_PTR.
2528 Does not check STR_END. TMP2 Destroyed. */
2529 DEFINE_COMPILER;
2530 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2531 struct sljit_jump *jump;
2532 #endif
2533
2534 SLJIT_UNUSED_ARG(max);
2535
2536 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2537 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2538 if (common->utf)
2539 {
2540 if (max < 128) return;
2541
2542 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2543 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2544 add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2545 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2546 JUMPHERE(jump);
2547 }
2548 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2549
2550 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2551 if (common->utf)
2552 {
2553 if (max < 0xd800) return;
2554
2555 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2556 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2557 /* TMP2 contains the high surrogate. */
2558 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2559 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x40);
2560 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
2561 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
2562 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2563 JUMPHERE(jump);
2564 }
2565 #endif
2566 }
2567
2568 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2569
2570 static BOOL is_char7_bitset(const pcre_uint8 *bitset, BOOL nclass)
2571 {
2572 /* Tells whether the character codes below 128 are enough
2573 to determine a match. */
2574 const pcre_uint8 value = nclass ? 0xff : 0;
2575 const pcre_uint8* end = bitset + 32;
2576
2577 bitset += 16;
2578 do
2579 {
2580 if (*bitset++ != value)
2581 return FALSE;
2582 }
2583 while (bitset < end);
2584 return TRUE;
2585 }
2586
2587 static void read_char7_type(compiler_common *common, BOOL full_read)
2588 {
2589 /* Reads the precise character type of a character into TMP1, if the character
2590 is less than 128. Otherwise it returns with zero. Does not check STR_END. The
2591 full_read argument tells whether characters above max are accepted or not. */
2592 DEFINE_COMPILER;
2593 struct sljit_jump *jump;
2594
2595 SLJIT_ASSERT(common->utf);
2596
2597 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2598 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2599
2600 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2601
2602 if (full_read)
2603 {
2604 jump = CMP(SLJIT_C_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
2605 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2606 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2607 JUMPHERE(jump);
2608 }
2609 }
2610
2611 #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
2612
2613 static void read_char_range(compiler_common *common, pcre_uint32 min, pcre_uint32 max, BOOL update_str_ptr)
2614 {
2615 /* Reads the precise value of a character into TMP1, if the character is
2616 between min and max (c >= min && c <= max). Otherwise it returns with a value
2617 outside the range. Does not check STR_END. */
2618 DEFINE_COMPILER;
2619 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2620 struct sljit_jump *jump;
2621 #endif
2622 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2623 struct sljit_jump *jump2;
2624 #endif
2625
2626 SLJIT_UNUSED_ARG(update_str_ptr);
2627 SLJIT_UNUSED_ARG(min);
2628 SLJIT_UNUSED_ARG(max);
2629 SLJIT_ASSERT(min <= max);
2630
2631 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2632 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2633
2634 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2635 if (common->utf)
2636 {
2637 if (max < 128 && !update_str_ptr) return;
2638
2639 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2640 if (min >= 0x10000)
2641 {
2642 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xf0);
2643 if (update_str_ptr)
2644 OP1(SLJIT_MOV_UB, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2645 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2646 jump2 = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0x7);
2647 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2648 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2649 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2650 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2651 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2652 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2653 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2654 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2655 if (!update_str_ptr)
2656 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
2657 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2658 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2659 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2660 JUMPHERE(jump2);
2661 if (update_str_ptr)
2662 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
2663 }
2664 else if (min >= 0x800 && max <= 0xffff)
2665 {
2666 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xe0);
2667 if (update_str_ptr)
2668 OP1(SLJIT_MOV_UB, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2669 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2670 jump2 = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0xf);
2671 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2672 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2673 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2674 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2675 if (!update_str_ptr)
2676 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
2677 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2678 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2679 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2680 JUMPHERE(jump2);
2681 if (update_str_ptr)
2682 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
2683 }
2684 else if (max >= 0x800)
2685 add_jump(compiler, (max < 0x10000) ? &common->utfreadchar16 : &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2686 else if (max < 128)
2687 {
2688 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2689 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2690 }
2691 else
2692 {
2693 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2694 if (!update_str_ptr)
2695 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2696 else
2697 OP1(SLJIT_MOV_UB, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2698 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2699 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2700 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2701 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2702 if (update_str_ptr)
2703 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
2704 }
2705 JUMPHERE(jump);
2706 }
2707 #endif
2708
2709 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2710 if (common->utf)
2711 {
2712 if (max >= 0x10000)
2713 {
2714 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2715 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2716 /* TMP2 contains the high surrogate. */
2717 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2718 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x40);
2719 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
2720 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2721 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
2722 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2723 JUMPHERE(jump);
2724 return;
2725 }
2726
2727 if (max < 0xd800 && !update_str_ptr) return;
2728
2729 /* Skip low surrogate if necessary. */
2730 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2731 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2732 if (update_str_ptr)
2733 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2734 if (max >= 0xd800)
2735 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x10000);
2736 JUMPHERE(jump);
2737 }
2738 #endif
2739 }
2740
2741 static SLJIT_INLINE void read_char(compiler_common *common)
2742 {
2743 read_char_range(common, 0, READ_CHAR_MAX, TRUE);
2744 }
2745
2746 static void read_char8_type(compiler_common *common, BOOL update_str_ptr)
2747 {
2748 /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */
2749 DEFINE_COMPILER;
2750 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
2751 struct sljit_jump *jump;
2752 #endif
2753 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2754 struct sljit_jump *jump2;
2755 #endif
2756
2757 SLJIT_UNUSED_ARG(update_str_ptr);
2758
2759 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2760 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2761
2762 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2763 if (common->utf)
2764 {
2765 /* This can be an extra read in some situations, but hopefully
2766 it is needed in most cases. */
2767 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2768 jump = CMP(SLJIT_C_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
2769 if (!update_str_ptr)
2770 {
2771 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2772 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2773 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2774 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2775 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2776 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
2777 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2778 jump2 = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2779 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2780 JUMPHERE(jump2);
2781 }
2782 else
2783 add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));
2784 JUMPHERE(jump);
2785 return;
2786 }
2787 #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
2788
2789 #if !defined COMPILE_PCRE8
2790 /* The ctypes array contains only 256 values. */
2791 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2792 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2793 #endif
2794 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2795 #if !defined COMPILE_PCRE8
2796 JUMPHERE(jump);
2797 #endif
2798
2799 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2800 if (common->utf && update_str_ptr)
2801 {
2802 /* Skip low surrogate if necessary. */
2803 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);
2804 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2805 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2806 JUMPHERE(jump);
2807 }
2808 #endif /* SUPPORT_UTF && COMPILE_PCRE16 */
2809 }
2810
2811 static void skip_char_back(compiler_common *common)
2812 {
2813 /* Goes one character back. Affects STR_PTR and TMP1. Does not check begin. */
2814 DEFINE_COMPILER;
2815 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2816 #if defined COMPILE_PCRE8
2817 struct sljit_label *label;
2818
2819 if (common->utf)
2820 {
2821 label = LABEL();
2822 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2823 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2824 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
2825 CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);
2826 return;
2827 }
2828 #elif defined COMPILE_PCRE16
2829 if (common->utf)
2830 {
2831 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2832 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2833 /* Skip low surrogate if necessary. */
2834 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2835 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
2836 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2837 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2838 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2839 return;
2840 }
2841 #endif /* COMPILE_PCRE[8|16] */
2842 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2843 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2844 }
2845
2846 static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpifmatch)
2847 {
2848 /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */
2849 DEFINE_COMPILER;
2850 struct sljit_jump *jump;
2851
2852 if (nltype == NLTYPE_ANY)
2853 {
2854 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
2855 add_jump(compiler, backtracks, JUMP(jumpifmatch ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
2856 }
2857 else if (nltype == NLTYPE_ANYCRLF)
2858 {
2859 if (jumpifmatch)
2860 {
2861 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR));
2862 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
2863 }
2864 else
2865 {
2866 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
2867 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
2868 JUMPHERE(jump);
2869 }
2870 }
2871 else
2872 {
2873 SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);
2874 add_jump(compiler, backtracks, CMP(jumpifmatch ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
2875 }
2876 }
2877
2878 #ifdef SUPPORT_UTF
2879
2880 #if defined COMPILE_PCRE8
2881 static void do_utfreadchar(compiler_common *common)
2882 {
2883 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
2884 of the character (>= 0xc0). Return char value in TMP1, length in TMP2. */
2885 DEFINE_COMPILER;
2886 struct sljit_jump *jump;
2887
2888 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2889 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2890 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2891 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2892 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2893 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2894
2895 /* Searching for the first zero. */
2896 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
2897 jump = JUMP(SLJIT_C_NOT_ZERO);
2898 /* Two byte sequence. */
2899 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2900 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(2));
2901 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2902
2903 JUMPHERE(jump);
2904 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2905 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x800);
2906 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2907 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2908 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2909
2910 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10000);
2911 jump = JUMP(SLJIT_C_NOT_ZERO);
2912 /* Three byte sequence. */
2913 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
2914 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));
2915 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2916
2917 /* Four byte sequence. */
2918 JUMPHERE(jump);
2919 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2920 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
2921 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2922 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
2923 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2924 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2925 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(4));
2926 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2927 }
2928
2929 static void do_utfreadchar16(compiler_common *common)
2930 {
2931 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
2932 of the character (>= 0xc0). Return value in TMP1. */
2933 DEFINE_COMPILER;
2934 struct sljit_jump *jump;
2935
2936 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2937 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2938 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2939 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2940 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2941 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2942
2943 /* Searching for the first zero. */
2944 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
2945 jump = JUMP(SLJIT_C_NOT_ZERO);
2946 /* Two byte sequence. */
2947 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2948 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2949
2950 JUMPHERE(jump);
2951 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x400);
2952 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_NOT_ZERO);
2953 /* This code runs only in 8 bit mode. No need to shift the value. */
2954 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2955 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2956 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x800);
2957 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2958 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2959 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2960 /* Three byte sequence. */
2961 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
2962 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2963 }
2964
2965 static void do_utfreadtype8(compiler_common *common)
2966 {
2967 /* Fast decoding a UTF-8 character type. TMP2 contains the first byte
2968 of the character (>= 0xc0). Return value in TMP1. */
2969 DEFINE_COMPILER;
2970 struct sljit_jump *jump;
2971 struct sljit_jump *compare;
2972
2973 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2974
2975 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20);
2976 jump = JUMP(SLJIT_C_NOT_ZERO);
2977 /* Two byte sequence. */
2978 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2979 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2980 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
2981 /* The upper 5 bits are known at this point. */
2982 compare = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0x3);
2983 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2984 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2985 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
2986 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2987 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2988
2989 JUMPHERE(compare);
2990 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2991 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2992
2993 /* We only have types for characters less than 256. */
2994 JUMPHERE(jump);
2995 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2996 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2997 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2998 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2999 }
3000
3001 #endif /* COMPILE_PCRE8 */
3002
3003 #endif /* SUPPORT_UTF */
3004
3005 #ifdef SUPPORT_UCP
3006
3007 /* UCD_BLOCK_SIZE must be 128 (see the assert below). */
3008 #define UCD_BLOCK_MASK 127
3009 #define UCD_BLOCK_SHIFT 7
3010
3011 static void do_getucd(compiler_common *common)
3012 {
3013 /* Search the UCD record for the character comes in TMP1.
3014 Returns chartype in TMP1 and UCD offset in TMP2. */
3015 DEFINE_COMPILER;
3016
3017 SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 8);
3018
3019 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3020 OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
3021 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
3022 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
3023 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
3024 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
3025 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
3026 OP1(SLJIT_MOV_UH, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
3027 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
3028 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
3029 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3030 }
3031 #endif
3032
3033 static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common, BOOL hascrorlf, BOOL firstline)
3034 {
3035 DEFINE_COMPILER;
3036 struct sljit_label *mainloop;
3037 struct sljit_label *newlinelabel = NULL;
3038 struct sljit_jump *start;
3039 struct sljit_jump *end = NULL;
3040 struct sljit_jump *nl = NULL;
3041 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3042 struct sljit_jump *singlechar;
3043 #endif
3044 jump_list *newline = NULL;
3045 BOOL newlinecheck = FALSE;
3046 BOOL readuchar = FALSE;
3047
3048 if (!(hascrorlf || firstline) && (common->nltype == NLTYPE_ANY ||
3049 common->nltype == NLTYPE_ANYCRLF || common->newline > 255))
3050 newlinecheck = TRUE;
3051
3052 if (firstline)
3053 {
3054 /* Search for the end of the first line. */
3055 SLJIT_ASSERT(common->first_line_end != 0);
3056 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
3057
3058 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3059 {
3060 mainloop = LABEL();
3061 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3062 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3063 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
3064 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3065 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);
3066 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);
3067 JUMPHERE(end);
3068 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3069 }
3070 else
3071 {
3072 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3073 mainloop = LABEL();
3074 /* Continual stores does not cause data dependency. */
3075 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0);
3076 read_char_range(common, common->nlmin, common->nlmax, TRUE);
3077 check_newlinechar(common, common->nltype, &newline, TRUE);
3078 CMPTO(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0, mainloop);
3079 JUMPHERE(end);
3080 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0);
3081 set_jumps(newline, LABEL());
3082 }
3083
3084 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
3085 }
3086
3087 start = JUMP(SLJIT_JUMP);
3088
3089 if (newlinecheck)
3090 {
3091 newlinelabel = LABEL();
3092 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3093 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3094 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3095 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
3096 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3097 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3098 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
3099 #endif
3100 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3101 nl = JUMP(SLJIT_JUMP);
3102 }
3103
3104 mainloop = LABEL();
3105
3106 /* Increasing the STR_PTR here requires one less jump in the most common case. */
3107 #ifdef SUPPORT_UTF
3108 if (common->utf) readuchar = TRUE;
3109 #endif
3110 if (newlinecheck) readuchar = TRUE;
3111
3112 if (readuchar)
3113 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3114
3115 if (newlinecheck)
3116 CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);
3117
3118 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3119 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3120 #if defined COMPILE_PCRE8
3121 if (common->utf)
3122 {
3123 singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
3124 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3125 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3126 JUMPHERE(singlechar);
3127 }
3128 #elif defined COMPILE_PCRE16
3129 if (common->utf)
3130 {
3131 singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
3132 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
3133 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3134 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3135 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3136 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3137 JUMPHERE(singlechar);
3138 }
3139 #endif /* COMPILE_PCRE[8|16] */
3140 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
3141 JUMPHERE(start);
3142
3143 if (newlinecheck)
3144 {
3145 JUMPHERE(end);
3146 JUMPHERE(nl);
3147 }
3148
3149 return mainloop;
3150 }
3151
3152 static int scan_prefix(compiler_common *common, pcre_uchar *cc, pcre_uint32 *chars, int max_chars)
3153 {
3154 /* Recursive function, which scans prefix literals. */
3155 int len, repeat, len_save, consumed = 0;
3156 pcre_uint32 chr, mask;
3157 pcre_uchar *alternative, *cc_save, *oc;
3158 BOOL last, any, caseless;
3159 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3160 pcre_uchar othercase[8];
3161 #elif defined SUPPORT_UTF && defined COMPILE_PCRE16
3162 pcre_uchar othercase[2];
3163 #else
3164 pcre_uchar othercase[1];
3165 #endif
3166
3167 repeat = 1;
3168 while (TRUE)
3169 {
3170 last = TRUE;
3171 any = FALSE;
3172 caseless = FALSE;
3173 switch (*cc)
3174 {
3175 case OP_CHARI:
3176 caseless = TRUE;
3177 case OP_CHAR:
3178 last = FALSE;
3179 cc++;
3180 break;
3181
3182 case OP_SOD:
3183 case OP_SOM:
3184 case OP_SET_SOM:
3185 case OP_NOT_WORD_BOUNDARY:
3186 case OP_WORD_BOUNDARY:
3187 case OP_EODN:
3188 case OP_EOD:
3189 case OP_CIRC:
3190 case OP_CIRCM:
3191 case OP_DOLL:
3192 case OP_DOLLM:
3193 /* Zero width assertions. */
3194 cc++;
3195 continue;
3196
3197 case OP_ASSERT:
3198 case OP_ASSERT_NOT:
3199 case OP_ASSERTBACK:
3200 case OP_ASSERTBACK_NOT:
3201 cc = bracketend(cc);
3202 continue;
3203
3204 case OP_PLUS:
3205 case OP_MINPLUS:
3206 case OP_POSPLUS:
3207 cc++;
3208 break;
3209
3210 case OP_EXACTI:
3211 caseless = TRUE;
3212 case OP_EXACT:
3213 repeat = GET2(cc, 1);
3214 last = FALSE;
3215 cc += 1 + IMM2_SIZE;
3216 break;
3217
3218 case OP_PLUSI:
3219 case OP_MINPLUSI:
3220 case OP_POSPLUSI:
3221 caseless = TRUE;
3222 cc++;
3223 break;
3224
3225 case OP_KET:
3226 cc += 1 + LINK_SIZE;
3227 continue;
3228
3229 case OP_ALT:
3230 cc += GET(cc, 1);
3231 continue;
3232
3233 case OP_ONCE:
3234 case OP_ONCE_NC:
3235 case OP_BRA:
3236 case OP_BRAPOS:
3237 case OP_CBRA:
3238 case OP_CBRAPOS:
3239 alternative = cc + GET(cc, 1);
3240 while (*alternative == OP_ALT)
3241 {
3242 max_chars = scan_prefix(common, alternative + 1 + LINK_SIZE, chars, max_chars);
3243 if (max_chars == 0)
3244 return consumed;
3245 alternative += GET(alternative, 1);
3246 }
3247
3248 if (*cc == OP_CBRA || *cc == OP_CBRAPOS)
3249 cc += IMM2_SIZE;
3250 cc += 1 + LINK_SIZE;
3251 continue;
3252
3253 case OP_CLASS:
3254 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3255 if (common->utf && !is_char7_bitset((const pcre_uint8 *)(cc + 1), FALSE)) return consumed;
3256 #endif
3257 any = TRUE;
3258 cc += 1 + 32 / sizeof(pcre_uchar);
3259 break;
3260
3261 case OP_NCLASS:
3262 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3263 if (common->utf) return consumed;
3264 #endif
3265 any = TRUE;
3266 cc += 1 + 32 / sizeof(pcre_uchar);
3267 break;
3268
3269 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3270 case OP_XCLASS:
3271 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3272 if (common->utf) return consumed;
3273 #endif
3274 any = TRUE;
3275 cc += GET(cc, 1);
3276 break;
3277 #endif
3278
3279 case OP_DIGIT:
3280 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3281 if (common->utf && !is_char7_bitset((const pcre_uint8 *)common->ctypes - cbit_length + cbit_digit, FALSE))
3282 return consumed;
3283 #endif
3284 any = TRUE;
3285 cc++;
3286 break;
3287
3288 case OP_WHITESPACE:
3289 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3290 if (common->utf && !is_char7_bitset((const pcre_uint8 *)common->ctypes - cbit_length + cbit_space, FALSE))
3291 return consumed;
3292 #endif
3293 any = TRUE;
3294 cc++;
3295 break;
3296
3297 case OP_WORDCHAR:
3298 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3299 if (common->utf && !is_char7_bitset((const pcre_uint8 *)common->ctypes - cbit_length + cbit_word, FALSE))
3300 return consumed;
3301 #endif
3302 any = TRUE;
3303 cc++;
3304 break;
3305
3306 case OP_NOT_DIGIT:
3307 case OP_NOT_WHITESPACE:
3308 case OP_NOT_WORDCHAR:
3309 case OP_ANY:
3310 case OP_ALLANY:
3311 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3312 if (common->utf) return consumed;
3313 #endif
3314 any = TRUE;
3315 cc++;
3316 break;
3317
3318 #ifdef SUPPORT_UCP
3319 case OP_NOTPROP:
3320 case OP_PROP:
3321 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3322 if (common->utf) return consumed;
3323 #endif
3324 any = TRUE;
3325 cc += 1 + 2;
3326 break;
3327 #endif
3328
3329 case OP_TYPEEXACT:
3330 repeat = GET2(cc, 1);
3331 cc += 1 + IMM2_SIZE;
3332 continue;
3333
3334 default:
3335 return consumed;
3336 }
3337
3338 if (any)
3339 {
3340 #if defined COMPILE_PCRE8
3341 mask = 0xff;
3342 #elif defined COMPILE_PCRE16
3343 mask = 0xffff;
3344 #elif defined COMPILE_PCRE32
3345 mask = 0xffffffff;
3346 #else
3347 SLJIT_ASSERT_STOP();
3348 #endif
3349
3350 do
3351 {
3352 chars[0] = mask;
3353 chars[1] = mask;
3354
3355 consumed++;
3356 if (--max_chars == 0)
3357 return consumed;
3358 chars += 2;
3359 }
3360 while (--repeat > 0);
3361
3362 repeat = 1;
3363 continue;
3364 }
3365
3366 len = 1;
3367 #ifdef SUPPORT_UTF
3368 if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
3369 #endif
3370
3371 if (caseless && char_has_othercase(common, cc))
3372 {
3373 #ifdef SUPPORT_UTF
3374 if (common->utf)
3375 {
3376 GETCHAR(chr, cc);
3377 if (PRIV(ord2utf)(char_othercase(common, chr), othercase) != len)
3378 return consumed;
3379 }
3380 else
3381 #endif
3382 {
3383 chr = *cc;
3384 othercase[0] = TABLE_GET(chr, common->fcc, chr);
3385 }
3386 }
3387 else
3388 caseless = FALSE;
3389
3390 len_save = len;
3391 cc_save = cc;
3392 while (TRUE)
3393 {
3394 oc = othercase;
3395 do
3396 {
3397 chr = *cc;
3398 #ifdef COMPILE_PCRE32
3399 if (SLJIT_UNLIKELY(chr == NOTACHAR))
3400 return consumed;
3401 #endif
3402 mask = 0;
3403 if (caseless)
3404 {
3405 mask = *cc ^ *oc;
3406 chr |= mask;
3407 }
3408
3409 #ifdef COMPILE_PCRE32
3410 if (chars[0] == NOTACHAR && chars[1] == 0)
3411 #else
3412 if (chars[0] == NOTACHAR)
3413 #endif
3414 {
3415 chars[0] = chr;
3416 chars[1] = mask;
3417 }
3418 else
3419 {
3420 mask |= chars[0] ^ chr;
3421 chr |= mask;
3422 chars[0] = chr;
3423 chars[1] |= mask;
3424 }
3425
3426 len--;
3427 consumed++;
3428 if (--max_chars == 0)
3429 return consumed;
3430 chars += 2;
3431 cc++;
3432 oc++;
3433 }
3434 while (len > 0);
3435
3436 if (--repeat == 0)
3437 break;
3438
3439 len = len_save;
3440 cc = cc_save;
3441 }
3442
3443 repeat = 1;
3444 if (last)
3445 return consumed;
3446 }
3447 }
3448
3449 #define MAX_N_CHARS 16
3450
3451 static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common, BOOL firstline)
3452 {
3453 DEFINE_COMPILER;
3454 struct sljit_label *start;
3455 struct sljit_jump *quit;
3456 pcre_uint32 chars[MAX_N_CHARS * 2];
3457 pcre_uint8 ones[MAX_N_CHARS];
3458 int offsets[3];
3459 pcre_uint32 mask, byte;
3460 int i, max, from;
3461 int range_right = -1, range_len = 4 - 1;
3462 sljit_ub *update_table = NULL;
3463 BOOL in_range;
3464
3465 /* This is even TRUE, if both are NULL. */
3466 SLJIT_ASSERT(common->read_only_data_ptr == common->read_only_data);
3467
3468 for (i = 0; i < MAX_N_CHARS; i++)
3469 {
3470 chars[i << 1] = NOTACHAR;
3471 chars[(i << 1) + 1] = 0;
3472 }
3473
3474 max = scan_prefix(common, common->start, chars, MAX_N_CHARS);
3475
3476 if (max <= 1)
3477 return FALSE;
3478
3479 for (i = 0; i < max; i++)
3480 {
3481 mask = chars[(i << 1) + 1];
3482 ones[i] = ones_in_half_byte[mask & 0xf];
3483 mask >>= 4;
3484 while (mask != 0)
3485 {
3486 ones[i] += ones_in_half_byte[mask & 0xf];
3487 mask >>= 4;
3488 }
3489 }
3490
3491 in_range = FALSE;
3492 for (i = 0; i <= max; i++)
3493 {
3494 if (i < max && ones[i] <= 1)
3495 {
3496 if (!in_range)
3497 {
3498 in_range = TRUE;
3499 from = i;
3500 }
3501 }
3502 else if (in_range)
3503 {
3504 if ((i - from) > range_len)
3505 {
3506 range_len = i - from;
3507 range_right = i - 1;
3508 }
3509 in_range = FALSE;
3510 }
3511 }
3512
3513 if (range_right >= 0)
3514 {
3515 /* Since no data is consumed (see the assert in the beginning
3516 of this function), this space can be reallocated. */
3517 if (common->read_only_data)
3518 SLJIT_FREE(common->read_only_data);
3519
3520 common->read_only_data_size += 256;
3521 common->read_only_data = (sljit_uw *)SLJIT_MALLOC(common->read_only_data_size);
3522 if (common->read_only_data == NULL)
3523 return TRUE;
3524
3525 update_table = (sljit_ub *)common->read_only_data;
3526 common->read_only_data_ptr = (sljit_uw *)(update_table + 256);
3527 memset(update_table, IN_UCHARS(range_len), 256);
3528
3529 for (i = 0; i < range_len; i++)
3530 {
3531 byte = chars[(range_right - i) << 1] & 0xff;
3532 if (update_table[byte] > IN_UCHARS(i))
3533 update_table[byte] = IN_UCHARS(i);
3534 mask = chars[((range_right - i) << 1) + 1] & 0xff;
3535 if (mask != 0)
3536 {
3537 byte ^= mask;
3538 if (update_table[byte] > IN_UCHARS(i))
3539 update_table[byte] = IN_UCHARS(i);
3540 }
3541 }
3542 }
3543
3544 offsets[0] = -1;
3545 /* Scan forward. */
3546 for (i = 0; i < max; i++)
3547 if (ones[i] <= 2) {
3548 offsets[0] = i;
3549 break;
3550 }
3551
3552 if (offsets[0] == -1)
3553 return FALSE;
3554
3555 /* Scan backward. */
3556 offsets[1] = -1;
3557 for (i = max - 1; i > offsets[0]; i--)
3558 if (ones[i] <= 2 && i != range_right)
3559 {
3560 offsets[1] = i;
3561 break;
3562 }
3563
3564 /* This case is handled better by fast_forward_first_char. */
3565 if (offsets[1] == -1 && offsets[0] == 0)
3566 return FALSE;
3567
3568 offsets[2] = -1;
3569 if (offsets[1] >= 0 && range_right == -1)
3570 {
3571 /* Scan from middle. */
3572 for (i = (offsets[0] + offsets[1]) / 2 + 1; i < offsets[1]; i++)
3573 if (ones[i] <= 2)
3574 {
3575 offsets[2] = i;
3576 break;
3577 }
3578
3579 if (offsets[2] == -1)
3580 {
3581 for (i = (offsets[0] + offsets[1]) / 2; i > offsets[0]; i--)
3582 if (ones[i] <= 2)
3583 {
3584 offsets[2] = i;
3585 break;
3586 }
3587 }
3588 }
3589
3590 SLJIT_ASSERT(offsets[1] == -1 || (offsets[0] < offsets[1]));
3591 SLJIT_ASSERT(offsets[2] == -1 || (offsets[0] < offsets[2] && offsets[1] > offsets[2]));
3592
3593 chars[0] = chars[offsets[0] << 1];
3594 chars[1] = chars[(offsets[0] << 1) + 1];
3595 if (offsets[2] >= 0)
3596 {
3597 chars[2] = chars[offsets[2] << 1];
3598 chars[3] = chars[(offsets[2] << 1) + 1];
3599 }
3600 if (offsets[1] >= 0)
3601 {
3602 chars[4] = chars[offsets[1] << 1];
3603 chars[5] = chars[(offsets[1] << 1) + 1];
3604 }
3605
3606 max -= 1;
3607 if (firstline)
3608 {
3609 SLJIT_ASSERT(common->first_line_end != 0);
3610 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
3611 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
3612 OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
3613 quit = CMP(SLJIT_C_LESS_EQUAL, STR_END, 0, TMP1, 0);
3614 OP1(SLJIT_MOV, STR_END, 0, TMP1, 0);
3615 JUMPHERE(quit);
3616 }
3617 else
3618 OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
3619
3620 #if !(defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
3621 if (range_right >= 0)
3622 OP1(SLJIT_MOV, RETURN_ADDR, 0, SLJIT_IMM, (sljit_sw)update_table);
3623 #endif
3624
3625 start = LABEL();
3626 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3627
3628 if (range_right >= 0)
3629 {
3630 #if defined COMPILE_PCRE8 || (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN)
3631 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right));
3632 #else
3633 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right + 1) - 1);
3634 #endif
3635
3636 #if !(defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
3637 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(RETURN_ADDR, TMP1), 0);
3638 #else
3639 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)update_table);
3640 #endif
3641 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3642 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, start);
3643 }
3644
3645 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[0]));
3646 if (offsets[1] >= 0)
3647 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[1]));
3648 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3649
3650 if (chars[1] != 0)
3651 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[1]);
3652 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[0], start);
3653 if (offsets[2] >= 0)
3654 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[2] - 1));
3655
3656 if (offsets[1] >= 0)
3657 {
3658 if (chars[5] != 0)
3659 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, chars[5]);
3660 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, chars[4], start);
3661 }
3662
3663 if (offsets[2] >= 0)
3664 {
3665 if (chars[3] != 0)
3666 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[3]);
3667 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[2], start);
3668 }
3669 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3670
3671 JUMPHERE(quit);
3672
3673 if (firstline)
3674 {
3675 if (range_right >= 0)
3676 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
3677 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3678 if (range_right >= 0)
3679 {
3680 quit = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
3681 OP1(SLJIT_MOV, STR_PTR, 0, TMP1, 0);
3682 JUMPHERE(quit);
3683 }
3684 }
3685 else
3686 OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
3687 return TRUE;
3688 }
3689
3690 #undef MAX_N_CHARS
3691
3692 static SLJIT_INLINE void fast_forward_first_char(compiler_common *common, pcre_uchar first_char, BOOL caseless, BOOL firstline)
3693 {
3694 DEFINE_COMPILER;
3695 struct sljit_label *start;
3696 struct sljit_jump *quit;
3697 struct sljit_jump *found;
3698 pcre_uchar oc, bit;
3699
3700 if (firstline)
3701 {
3702 SLJIT_ASSERT(common->first_line_end != 0);
3703 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
3704 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
3705 }
3706
3707 start = LABEL();
3708 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3709 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3710
3711 oc = first_char;
3712 if (caseless)
3713 {
3714 oc = TABLE_GET(first_char, common->fcc, first_char);
3715 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
3716 if (first_char > 127 && common->utf)
3717 oc = UCD_OTHERCASE(first_char);
3718 #endif
3719 }
3720 if (first_char == oc)
3721 found = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, first_char);
3722 else
3723 {
3724 bit = first_char ^ oc;
3725 if (is_powerof2(bit))
3726 {
3727 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, bit);
3728 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, first_char | bit);
3729 }
3730 else
3731 {
3732 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, first_char);
3733 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3734 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);
3735 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3736 found = JUMP(SLJIT_C_NOT_ZERO);
3737 }
3738 }
3739
3740 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3741 JUMPTO(SLJIT_JUMP, start);
3742 JUMPHERE(found);
3743 JUMPHERE(quit);
3744
3745 if (firstline)
3746 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3747 }
3748
3749 static SLJIT_INLINE void fast_forward_newline(compiler_common *common, BOOL firstline)
3750 {
3751 DEFINE_COMPILER;
3752 struct sljit_label *loop;
3753 struct sljit_jump *lastchar;
3754 struct sljit_jump *firstchar;
3755 struct sljit_jump *quit;
3756 struct sljit_jump *foundcr = NULL;
3757 struct sljit_jump *notfoundnl;
3758 jump_list *newline = NULL;
3759
3760 if (firstline)
3761 {
3762 SLJIT_ASSERT(common->first_line_end != 0);
3763 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
3764 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
3765 }
3766
3767 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3768 {
3769 lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3770 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3771 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
3772 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
3773 firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
3774
3775 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2));
3776 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0);
3777 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_GREATER_EQUAL);
3778 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3779 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
3780 #endif
3781 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3782
3783 loop = LABEL();
3784 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3785 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3786 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
3787 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
3788 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop);
3789 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop);
3790
3791 JUMPHERE(quit);
3792 JUMPHERE(firstchar);
3793 JUMPHERE(lastchar);
3794
3795 if (firstline)
3796 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
3797 return;
3798 }
3799
3800 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3801 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
3802 firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
3803 skip_char_back(common);
3804
3805 loop = LABEL();
3806 read_char_range(common, common->nlmin, common->nlmax, TRUE);
3807 lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3808 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
3809 foundcr = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
3810 check_newlinechar(common, common->nltype, &newline, FALSE);
3811 set_jumps(newline, loop);
3812
3813 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
3814 {
3815 quit = JUMP(SLJIT_JUMP);
3816 JUMPHERE(foundcr);
3817 notfoundnl = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3818 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3819 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
3820 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3821 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3822 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
3823 #endif
3824 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3825 JUMPHERE(notfoundnl);
3826 JUMPHERE(quit);
3827 }
3828 JUMPHERE(lastchar);
3829 JUMPHERE(firstchar);
3830
3831 if (firstline)
3832 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3833 }
3834
3835 static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks);
3836
3837 static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, pcre_uint8 *start_bits, BOOL firstline)
3838 {
3839 DEFINE_COMPILER;
3840 struct sljit_label *start;
3841 struct sljit_jump *quit;
3842 struct sljit_jump *found = NULL;
3843 jump_list *matches = NULL;
3844 #ifndef COMPILE_PCRE8
3845 struct sljit_jump *jump;
3846 #endif
3847
3848 if (firstline)
3849 {
3850 SLJIT_ASSERT(common->first_line_end != 0);
3851 OP1(SLJIT_MOV, RETURN_ADDR, 0, STR_END, 0);
3852 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
3853 }
3854
3855 start = LABEL();
3856 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3857 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3858 #ifdef SUPPORT_UTF
3859 if (common->utf)
3860 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
3861 #endif
3862
3863 if (!check_class_ranges(common, start_bits, (start_bits[31] & 0x80) != 0, TRUE, &matches))
3864 {
3865 #ifndef COMPILE_PCRE8
3866 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 255);
3867 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 255);
3868 JUMPHERE(jump);
3869 #endif
3870 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
3871 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
3872 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)start_bits);
3873 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
3874 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
3875 found = JUMP(SLJIT_C_NOT_ZERO);
3876 }
3877
3878 #ifdef SUPPORT_UTF
3879 if (common->utf)
3880 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
3881 #endif
3882 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3883 #ifdef SUPPORT_UTF
3884 #if defined COMPILE_PCRE8
3885 if (common->utf)
3886 {
3887 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start);
3888 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3889 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3890 }
3891 #elif defined COMPILE_PCRE16
3892 if (common->utf)
3893 {
3894 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800, start);
3895 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
3896 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3897 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3898 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3899 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3900 }
3901 #endif /* COMPILE_PCRE[8|16] */
3902 #endif /* SUPPORT_UTF */
3903 JUMPTO(SLJIT_JUMP, start);
3904 if (found != NULL)
3905 JUMPHERE(found);
3906 if (matches != NULL)
3907 set_jumps(matches, LABEL());
3908 JUMPHERE(quit);
3909
3910 if (firstline)
3911 OP1(SLJIT_MOV, STR_END, 0, RETURN_ADDR, 0);
3912 }
3913
3914 static SLJIT_INLINE struct sljit_jump *search_requested_char(compiler_common *common, pcre_uchar req_char, BOOL caseless, BOOL has_firstchar)
3915 {
3916 DEFINE_COMPILER;
3917 struct sljit_label *loop;
3918 struct sljit_jump *toolong;
3919 struct sljit_jump *alreadyfound;
3920 struct sljit_jump *found;
3921 struct sljit_jump *foundoc = NULL;
3922 struct sljit_jump *notfound;
3923 pcre_uint32 oc, bit;
3924
3925 SLJIT_ASSERT(common->req_char_ptr != 0);
3926 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr);
3927 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, REQ_BYTE_MAX);
3928 toolong = CMP(SLJIT_C_LESS, TMP1, 0, STR_END, 0);
3929 alreadyfound = CMP(SLJIT_C_LESS, STR_PTR, 0, TMP2, 0);
3930
3931 if (has_firstchar)
3932 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3933 else
3934 OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);
3935
3936 loop = LABEL();
3937 notfound = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, STR_END, 0);
3938
3939 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0);
3940 oc = req_char;
3941 if (caseless)
3942 {
3943 oc = TABLE_GET(req_char, common->fcc, req_char);
3944 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
3945 if (req_char > 127 && common->utf)
3946 oc = UCD_OTHERCASE(req_char);
3947 #endif
3948 }
3949 if (req_char == oc)
3950 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
3951 else
3952 {
3953 bit = req_char ^ oc;
3954 if (is_powerof2(bit))
3955 {
3956 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);
3957 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit);
3958 }
3959 else
3960 {
3961 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
3962 foundoc = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, oc);
3963 }
3964 }
3965 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
3966 JUMPTO(SLJIT_JUMP, loop);
3967
3968 JUMPHERE(found);
3969 if (foundoc)
3970 JUMPHERE(foundoc);
3971 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr, TMP1, 0);
3972 JUMPHERE(alreadyfound);
3973 JUMPHERE(toolong);
3974 return notfound;
3975 }
3976
3977 static void do_revertframes(compiler_common *common)
3978 {
3979 DEFINE_COMPILER;
3980 struct sljit_jump *jump;
3981 struct sljit_label *mainloop;
3982
3983 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3984 OP1(SLJIT_MOV, TMP1, 0, STACK_TOP, 0);
3985 GET_LOCAL_BASE(TMP3, 0, 0);
3986
3987 /* Drop frames until we reach STACK_TOP. */
3988 mainloop = LABEL();
3989 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), 0);
3990 OP2(SLJIT_SUB | SLJIT_SET_S, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0);
3991 jump = JUMP(SLJIT_C_SIG_LESS_EQUAL);
3992
3993 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
3994 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw));
3995 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), SLJIT_MEM1(TMP1), 2 * sizeof(sljit_sw));
3996 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
3997 JUMPTO(SLJIT_JUMP, mainloop);
3998
3999 JUMPHERE(jump);
4000 jump = JUMP(SLJIT_C_SIG_LESS);
4001 /* End of dropping frames. */
4002 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4003
4004 JUMPHERE(jump);
4005 OP1(SLJIT_NEG, TMP2, 0, TMP2, 0);
4006 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
4007 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw));
4008 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_sw));
4009 JUMPTO(SLJIT_JUMP, mainloop);
4010 }
4011
4012 static void check_wordboundary(compiler_common *common)
4013 {
4014 DEFINE_COMPILER;
4015 struct sljit_jump *skipread;
4016 jump_list *skipread_list = NULL;
4017 #if !(defined COMPILE_PCRE8) || defined SUPPORT_UTF
4018 struct sljit_jump *jump;
4019 #endif
4020
4021 SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16);
4022
4023 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
4024 /* Get type of the previous char, and put it to LOCALS1. */
4025 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4026 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4027 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, 0);
4028 skipread = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
4029 skip_char_back(common);
4030 check_start_used_ptr(common);
4031 read_char(common);
4032
4033 /* Testing char type. */
4034 #ifdef SUPPORT_UCP
4035 if (common->use_ucp)
4036 {
4037 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
4038 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
4039 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4040 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
4041 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
4042 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
4043 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
4044 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
4045 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4046 JUMPHERE(jump);
4047 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP2, 0);
4048 }
4049 else
4050 #endif
4051 {
4052 #ifndef COMPILE_PCRE8
4053 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
4054 #elif defined SUPPORT_UTF
4055 /* Here LOCALS1 has already been zeroed. */
4056 jump = NULL;
4057 if (common->utf)
4058 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
4059 #endif /* COMPILE_PCRE8 */
4060 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);
4061 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */);
4062 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4063 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);
4064 #ifndef COMPILE_PCRE8
4065 JUMPHERE(jump);
4066 #elif defined SUPPORT_UTF
4067 if (jump != NULL)
4068 JUMPHERE(jump);
4069 #endif /* COMPILE_PCRE8 */
4070 }
4071 JUMPHERE(skipread);
4072
4073 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
4074 check_str_end(common, &skipread_list);
4075 peek_char(common, READ_CHAR_MAX);
4076
4077 /* Testing char type. This is a code duplication. */
4078 #ifdef SUPPORT_UCP
4079 if (common->use_ucp)
4080 {
4081 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
4082 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
4083 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4084 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
4085 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
4086 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
4087 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
4088 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
4089 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4090 JUMPHERE(jump);
4091 }
4092 else
4093 #endif
4094 {
4095 #ifndef COMPILE_PCRE8
4096 /* TMP2 may be destroyed by peek_char. */
4097 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
4098 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
4099 #elif defined SUPPORT_UTF
4100 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
4101 jump = NULL;
4102 if (common->utf)
4103 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
4104 #endif
4105 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes);
4106 OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */);
4107 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
4108 #ifndef COMPILE_PCRE8
4109 JUMPHERE(jump);
4110 #elif defined SUPPORT_UTF
4111 if (jump != NULL)
4112 JUMPHERE(jump);
4113 #endif /* COMPILE_PCRE8 */
4114 }
4115 set_jumps(skipread_list, LABEL());
4116
4117 OP2(SLJIT_XOR | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
4118 sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
4119 }
4120
4121 static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
4122 {
4123 DEFINE_COMPILER;
4124 int ranges[MAX_RANGE_SIZE];
4125 pcre_uint8 bit, cbit, all;
4126 int i, byte, length = 0;
4127
4128 bit = bits[0] & 0x1;
4129 /* All bits will be zero or one (since bit is zero or one). */
4130 all = -bit;
4131
4132 for (i = 0; i < 256; )
4133 {
4134 byte = i >> 3;
4135 if ((i & 0x7) == 0 && bits[byte] == all)
4136 i += 8;
4137 else
4138 {
4139 cbit = (bits[byte] >> (i & 0x7)) & 0x1;
4140 if (cbit != bit)
4141 {
4142 if (length >= MAX_RANGE_SIZE)
4143 return FALSE;
4144 ranges[length] = i;
4145 length++;
4146 bit = cbit;
4147 all = -cbit;
4148 }
4149 i++;
4150 }
4151 }
4152
4153 if (((bit == 0) && nclass) || ((bit == 1) && !nclass))
4154 {
4155 if (length >= MAX_RANGE_SIZE)
4156 return FALSE;
4157 ranges[length] = 256;
4158 length++;
4159 }
4160
4161 if (length < 0 || length > 4)
4162 return FALSE;
4163
4164 bit = bits[0] & 0x1;
4165 if (invert) bit ^= 0x1;
4166
4167 /* No character is accepted. */
4168 if (length == 0 && bit == 0)
4169 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4170
4171 switch(length)
4172 {
4173 case 0:
4174 /* When bit != 0, all characters are accepted. */
4175 return TRUE;
4176
4177 case 1:
4178 add_jump(compiler, backtracks, CMP(bit == 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
4179 return TRUE;
4180
4181 case 2:
4182 if (ranges[0] + 1 != ranges[1])
4183 {
4184 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
4185 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
4186 }
4187 else
4188 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
4189 return TRUE;
4190
4191 case 3:
4192 if (bit != 0)
4193 {
4194 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
4195 if (ranges[0] + 1 != ranges[1])
4196 {
4197 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
4198 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
4199 }
4200 else
4201 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
4202 return TRUE;
4203 }
4204
4205 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[0]));
4206 if (ranges[1] + 1 != ranges[2])
4207 {
4208 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1]);
4209 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
4210 }
4211 else
4212 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1]));
4213 return TRUE;
4214
4215 case 4:
4216 if ((ranges[1] - ranges[0]) == (ranges[3] - ranges[2])
4217 && (ranges[0] | (ranges[2] - ranges[0])) == ranges[2]
4218 && is_powerof2(ranges[2] - ranges[0]))
4219 {
4220 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[0]);
4221 if (ranges[2] + 1 != ranges[3])
4222 {
4223 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);
4224 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
4225 }
4226 else
4227 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
4228 return TRUE;
4229 }
4230
4231 if (bit != 0)
4232 {
4233 i = 0;
4234 if (ranges[0] + 1 != ranges[1])
4235 {
4236 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
4237 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
4238 i = ranges[0];
4239 }
4240 else
4241 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
4242
4243 if (ranges[2] + 1 != ranges[3])
4244 {
4245 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - i);
4246 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
4247 }
4248 else
4249 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2] - i));
4250 return TRUE;
4251 }
4252
4253 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
4254 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[0]));
4255 if (ranges[1] + 1 != ranges[2])
4256 {
4257 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]);
4258 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
4259 }
4260 else
4261 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
4262 return TRUE;
4263
4264 default:
4265 SLJIT_ASSERT_STOP();
4266 return FALSE;
4267 }
4268 }
4269
4270 static void check_anynewline(compiler_common *common)
4271 {
4272 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
4273 DEFINE_COMPILER;
4274
4275 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4276
4277 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
4278 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
4279 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
4280 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
4281 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4282 #ifdef COMPILE_PCRE8
4283 if (common->utf)
4284 {
4285 #endif
4286 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4287 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
4288 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
4289 #ifdef COMPILE_PCRE8
4290 }
4291 #endif
4292 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
4293 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4294 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4295 }
4296
4297 static void check_hspace(compiler_common *common)
4298 {
4299 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
4300 DEFINE_COMPILER;
4301
4302 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4303
4304 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x09);
4305 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4306 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
4307 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4308 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xa0);
4309 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4310 #ifdef COMPILE_PCRE8
4311 if (common->utf)
4312 {
4313 #endif
4314 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4315 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x1680);
4316 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4317 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e);
4318 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4319 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000);
4320 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000);
4321 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4322 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000);
4323 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4324 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000);
4325 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4326 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000);
4327 #ifdef COMPILE_PCRE8
4328 }
4329 #endif
4330 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
4331 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4332
4333 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4334 }
4335
4336 static void check_vspace(compiler_common *common)
4337 {
4338 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
4339 DEFINE_COMPILER;
4340
4341 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4342
4343 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
4344 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
4345 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
4346 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
4347 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4348 #ifdef COMPILE_PCRE8
4349 if (common->utf)
4350 {
4351 #endif
4352 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4353 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
4354 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
4355 #ifdef COMPILE_PCRE8
4356 }
4357 #endif
4358 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
4359 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4360
4361 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4362 }
4363
4364 #define CHAR1 STR_END
4365 #define CHAR2 STACK_TOP
4366
4367 static void do_casefulcmp(compiler_common *common)
4368 {
4369 DEFINE_COMPILER;
4370 struct sljit_jump *jump;
4371 struct sljit_label *label;
4372
4373 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4374 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4375 OP1(SLJIT_MOV, TMP3, 0, CHAR1, 0);
4376 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR2, 0);
4377 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
4378 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4379
4380 label = LABEL();
4381 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
4382 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4383 jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
4384 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
4385 JUMPTO(SLJIT_C_NOT_ZERO, label);
4386
4387 JUMPHERE(jump);
4388 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4389 OP1(SLJIT_MOV, CHAR1, 0, TMP3, 0);
4390 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
4391 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4392 }
4393
4394 #define LCC_TABLE STACK_LIMIT
4395
4396 static void do_caselesscmp(compiler_common *common)
4397 {
4398 DEFINE_COMPILER;
4399 struct sljit_jump *jump;
4400 struct sljit_label *label;
4401
4402 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4403 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4404
4405 OP1(SLJIT_MOV, TMP3, 0, LCC_TABLE, 0);
4406 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR1, 0);
4407 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, CHAR2, 0);
4408 OP1(SLJIT_MOV, LCC_TABLE, 0, SLJIT_IMM, common->lcc);
4409 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
4410 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4411
4412 label = LABEL();
4413 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
4414 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4415 #ifndef COMPILE_PCRE8
4416 jump = CMP(SLJIT_C_GREATER, CHAR1, 0, SLJIT_IMM, 255);
4417 #endif
4418 OP1(SLJIT_MOV_UB, CHAR1, 0, SLJIT_MEM2(LCC_TABLE, CHAR1), 0);
4419 #ifndef COMPILE_PCRE8
4420 JUMPHERE(jump);
4421 jump = CMP(SLJIT_C_GREATER, CHAR2, 0, SLJIT_IMM, 255);
4422 #endif
4423 OP1(SLJIT_MOV_UB, CHAR2, 0, SLJIT_MEM2(LCC_TABLE, CHAR2), 0);
4424 #ifndef COMPILE_PCRE8
4425 JUMPHERE(jump);
4426 #endif
4427 jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
4428 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
4429 JUMPTO(SLJIT_C_NOT_ZERO, label);
4430
4431 JUMPHERE(jump);
4432 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4433 OP1(SLJIT_MOV, LCC_TABLE, 0, TMP3, 0);
4434 OP1(SLJIT_MOV, CHAR1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
4435 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
4436 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4437 }
4438
4439 #undef LCC_TABLE
4440 #undef CHAR1
4441 #undef CHAR2
4442
4443 #if defined SUPPORT_UTF && defined SUPPORT_UCP
4444
4445 static const pcre_uchar * SLJIT_CALL do_utf_caselesscmp(pcre_uchar *src1, jit_arguments *args, pcre_uchar *end1)
4446 {
4447 /* This function would be ineffective to do in JIT level. */
4448 pcre_uint32 c1, c2;
4449 const pcre_uchar *src2 = args->uchar_ptr;
4450 const pcre_uchar *end2 = args->end;
4451 const ucd_record *ur;
4452 const pcre_uint32 *pp;
4453
4454 while (src1 < end1)
4455 {
4456 if (src2 >= end2)
4457 return (pcre_uchar*)1;
4458 GETCHARINC(c1, src1);
4459 GETCHARINC(c2, src2);
4460 ur = GET_UCD(c2);
4461 if (c1 != c2 && c1 != c2 + ur->other_case)
4462 {
4463 pp = PRIV(ucd_caseless_sets) + ur->caseset;
4464 for (;;)
4465 {
4466 if (c1 < *pp) return NULL;
4467 if (c1 == *pp++) break;
4468 }
4469 }
4470 }
4471 return src2;
4472 }
4473
4474 #endif /* SUPPORT_UTF && SUPPORT_UCP */
4475
4476 static pcre_uchar *byte_sequence_compare(compiler_common *common, BOOL caseless, pcre_uchar *cc,
4477 compare_context* context, jump_list **backtracks)
4478 {
4479 DEFINE_COMPILER;
4480 unsigned int othercasebit = 0;
4481 pcre_uchar *othercasechar = NULL;
4482 #ifdef SUPPORT_UTF
4483 int utflength;
4484 #endif
4485
4486 if (caseless && char_has_othercase(common, cc))
4487 {
4488 othercasebit = char_get_othercase_bit(common, cc);
4489 SLJIT_ASSERT(othercasebit);
4490 /* Extracting bit difference info. */
4491 #if defined COMPILE_PCRE8
4492 othercasechar = cc + (othercasebit >> 8);
4493 othercasebit &= 0xff;
4494 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4495 /* Note that this code only handles characters in the BMP. If there
4496 ever are characters outside the BMP whose othercase differs in only one
4497 bit from itself (there currently are none), this code will need to be
4498 revised for COMPILE_PCRE32. */
4499 othercasechar = cc + (othercasebit >> 9);
4500 if ((othercasebit & 0x100) != 0)
4501 othercasebit = (othercasebit & 0xff) << 8;
4502 else
4503 othercasebit &= 0xff;
4504 #endif /* COMPILE_PCRE[8|16|32] */
4505 }
4506
4507 if (context->sourcereg == -1)
4508 {
4509 #if defined COMPILE_PCRE8
4510 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
4511 if (context->length >= 4)
4512 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4513 else if (context->length >= 2)
4514 OP1(SLJIT_MOV_UH, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4515 else
4516 #endif
4517 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4518 #elif defined COMPILE_PCRE16
4519 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
4520 if (context->length >= 4)
4521 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4522 else
4523 #endif
4524 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4525 #elif defined COMPILE_PCRE32
4526 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4527 #endif /* COMPILE_PCRE[8|16|32] */
4528 context->sourcereg = TMP2;
4529 }
4530
4531 #ifdef SUPPORT_UTF
4532 utflength = 1;
4533 if (common->utf && HAS_EXTRALEN(*cc))
4534 utflength += GET_EXTRALEN(*cc);
4535
4536 do
4537 {
4538 #endif
4539
4540 context->length -= IN_UCHARS(1);
4541 #if (defined SLJIT_UNALIGNED && SLJIT_UNALIGNED) && (defined COMPILE_PCRE8 || defined COMPILE_PCRE16)
4542
4543 /* Unaligned read is supported. */
4544 if (othercasebit != 0 && othercasechar == cc)
4545 {
4546 context->c.asuchars[context->ucharptr] = *cc | othercasebit;
4547 context->oc.asuchars[context->ucharptr] = othercasebit;
4548 }
4549 else
4550 {
4551 context->c.asuchars[context->ucharptr] = *cc;
4552 context->oc.asuchars[context->ucharptr] = 0;
4553 }
4554 context->ucharptr++;
4555
4556 #if defined COMPILE_PCRE8
4557 if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1))
4558 #else
4559 if (context->ucharptr >= 2 || context->length == 0)
4560 #endif
4561 {
4562 if (context->length >= 4)
4563 OP1(SLJIT_MOV_SI, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
4564 else if (context->length >= 2)
4565 OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
4566 #if defined COMPILE_PCRE8
4567 else if (context->length >= 1)
4568 OP1(SLJIT_MOV_UB, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
4569 #endif /* COMPILE_PCRE8 */
4570 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
4571
4572 switch(context->ucharptr)
4573 {
4574 case 4 / sizeof(pcre_uchar):
4575 if (context->oc.asint != 0)
4576 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint);
4577 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint));
4578 break;
4579
4580 case 2 / sizeof(pcre_uchar):
4581 if (context->oc.asushort != 0)
4582 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort);
4583 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort));
4584 break;
4585
4586 #ifdef COMPILE_PCRE8
4587 case 1:
4588 if (context->oc.asbyte != 0)
4589 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte);
4590 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte));
4591 break;
4592 #endif
4593
4594 default:
4595 SLJIT_ASSERT_STOP();
4596 break;
4597 }
4598 context->ucharptr = 0;
4599 }
4600
4601 #else
4602
4603 /* Unaligned read is unsupported or in 32 bit mode. */
4604 if (context->length >= 1)
4605 OP1(MOV_UCHAR, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
4606
4607 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
4608
4609 if (othercasebit != 0 && othercasechar == cc)
4610 {
4611 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit);
4612 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit));
4613 }
4614 else
4615 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc));
4616
4617 #endif
4618
4619 cc++;
4620 #ifdef SUPPORT_UTF
4621 utflength--;
4622 }
4623 while (utflength > 0);
4624 #endif
4625
4626 return cc;
4627 }
4628
4629 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
4630
4631 #define SET_TYPE_OFFSET(value) \
4632 if ((value) != typeoffset) \
4633 { \
4634 if ((value) < typeoffset) \
4635 OP2(SLJIT_ADD, typereg, 0, typereg, 0, SLJIT_IMM, typeoffset - (value)); \
4636 else \
4637 OP2(SLJIT_SUB, typereg, 0, typereg, 0, SLJIT_IMM, (value) - typeoffset); \
4638 } \
4639 typeoffset = (value);
4640
4641 #define SET_CHAR_OFFSET(value) \
4642 if ((value) != charoffset) \
4643 { \
4644 if ((value) < charoffset) \
4645 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(charoffset - (value))); \
4646 else \
4647 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)((value) - charoffset)); \
4648 } \
4649 charoffset = (value);
4650
4651 static void compile_xclass_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
4652 {
4653 DEFINE_COMPILER;
4654 jump_list *found = NULL;
4655 jump_list **list = (cc[0] & XCL_NOT) == 0 ? &found : backtracks;
4656 sljit_uw c, charoffset, max = 256, min = READ_CHAR_MAX;
4657 struct sljit_jump *jump = NULL;
4658 pcre_uchar *ccbegin;
4659 int compares, invertcmp, numberofcmps;
4660 #if defined SUPPORT_UTF && (defined COMPILE_PCRE8 || defined COMPILE_PCRE16)
4661 BOOL utf = common->utf;
4662 #endif
4663
4664 #ifdef SUPPORT_UCP
4665 BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE;
4666 BOOL charsaved = FALSE;
4667 int typereg = TMP1, scriptreg = TMP1;
4668 const pcre_uint32 *other_cases;
4669 sljit_uw typeoffset;
4670 #endif
4671
4672 /* Scanning the necessary info. */
4673 cc++;
4674 ccbegin = cc;
4675 compares = 0;
4676 if (cc[-1] & XCL_MAP)
4677 {
4678 min = 0;
4679 cc += 32 / sizeof(pcre_uchar);
4680 }
4681
4682 while (*cc != XCL_END)
4683 {
4684 compares++;
4685 if (*cc == XCL_SINGLE)
4686 {
4687 cc ++;
4688 GETCHARINCTEST(c, cc);
4689 if (c > max) max = c;
4690 if (c < min) min = c;
4691 #ifdef SUPPORT_UCP
4692 needschar = TRUE;
4693 #endif
4694 }
4695 else if (*cc == XCL_RANGE)
4696 {
4697 cc ++;
4698 GETCHARINCTEST(c, cc);
4699 if (c < min) min = c;
4700 GETCHARINCTEST(c, cc);
4701 if (c > max) max = c;
4702 #ifdef SUPPORT_UCP
4703 needschar = TRUE;
4704 #endif
4705 }
4706 #ifdef SUPPORT_UCP
4707 else
4708 {
4709 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
4710 cc++;
4711 if (*cc == PT_CLIST)
4712 {
4713 other_cases = PRIV(ucd_caseless_sets) + cc[1];
4714 while (*other_cases != NOTACHAR)
4715 {
4716 if (*other_cases > max) max = *other_cases;
4717 if (*other_cases < min) min = *other_cases;
4718 other_cases++;
4719 }
4720 }
4721 else
4722 {
4723 max = READ_CHAR_MAX;
4724 min = 0;
4725 }
4726
4727 switch(*cc)
4728 {
4729 case PT_ANY:
4730 break;
4731
4732 case PT_LAMP:
4733 case PT_GC:
4734 case PT_PC:
4735 case PT_ALNUM:
4736 needstype = TRUE;
4737 break;
4738
4739 case PT_SC:
4740 needsscript = TRUE;
4741 break;
4742
4743 case PT_SPACE:
4744 case PT_PXSPACE:
4745 case PT_WORD:
4746 case PT_PXGRAPH:
4747 case PT_PXPRINT:
4748 case PT_PXPUNCT:
4749 needstype = TRUE;
4750 needschar = TRUE;
4751 break;
4752
4753 case PT_CLIST:
4754 case PT_UCNC:
4755 needschar = TRUE;
4756 break;
4757
4758 default:
4759 SLJIT_ASSERT_STOP();
4760 break;
4761 }
4762 cc += 2;
4763 }
4764 #endif
4765 }
4766
4767 /* We are not necessary in utf mode even in 8 bit mode. */
4768 cc = ccbegin;
4769 detect_partial_match(common, backtracks);
4770 read_char_range(common, min, max, (cc[-1] & XCL_NOT) != 0);
4771
4772 if ((cc[-1] & XCL_HASPROP) == 0)
4773 {
4774 if ((cc[-1] & XCL_MAP) != 0)
4775 {
4776 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
4777 if (!check_class_ranges(common, (const pcre_uint8 *)cc, (((const pcre_uint8 *)cc)[31] & 0x80) != 0, TRUE, &found))
4778 {
4779 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
4780 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
4781 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
4782 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4783 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4784 add_jump(compiler, &found, JUMP(SLJIT_C_NOT_ZERO));
4785 }
4786
4787 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4788 JUMPHERE(jump);
4789
4790 cc += 32 / sizeof(pcre_uchar);
4791 }
4792 else
4793 {
4794 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, min);
4795 add_jump(compiler, (cc[-1] & XCL_NOT) == 0 ? backtracks : &found, CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, max - min));
4796 }
4797 }
4798 else if ((cc[-1] & XCL_MAP) != 0)
4799 {
4800 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
4801 #ifdef SUPPORT_UCP
4802 charsaved = TRUE;
4803 #endif
4804 if (!check_class_ranges(common, (const pcre_uint8 *)cc, FALSE, TRUE, list))
4805 {
4806 #ifdef COMPILE_PCRE8
4807 SLJIT_ASSERT(common->utf);
4808 #endif
4809 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
4810
4811 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
4812 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
4813 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
4814 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4815 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4816 add_jump(compiler, list, JUMP(SLJIT_C_NOT_ZERO));
4817
4818 JUMPHERE(jump);
4819 }
4820
4821 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
4822 cc += 32 / sizeof(pcre_uchar);
4823 }
4824
4825 #ifdef SUPPORT_UCP
4826 /* Simple register allocation. TMP1 is preferred if possible. */
4827 if (needstype || needsscript)
4828 {
4829 if (needschar && !charsaved)
4830 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
4831 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4832 if (needschar)
4833 {
4834 if (needstype)
4835 {
4836 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
4837 typereg = RETURN_ADDR;
4838 }
4839
4840 if (needsscript)
4841 scriptreg = TMP3;
4842 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
4843 }
4844 else if (needstype && needsscript)
4845 scriptreg = TMP3;
4846 /* In all other cases only one of them was specified, and that can goes to TMP1. */
4847
4848 if (needsscript)
4849 {
4850 if (scriptreg == TMP1)
4851 {
4852 OP1(SLJIT_MOV, scriptreg, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
4853 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM2(scriptreg, TMP2), 3);
4854 }
4855 else
4856 {
4857 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
4858 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
4859 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM1(TMP2), 0);
4860 }
4861 }
4862 }
4863 #endif
4864
4865 /* Generating code. */
4866 charoffset = 0;
4867 numberofcmps = 0;
4868 #ifdef SUPPORT_UCP
4869 typeoffset = 0;
4870 #endif
4871
4872 while (*cc != XCL_END)
4873 {
4874 compares--;
4875 invertcmp = (compares == 0 && list != backtracks);
4876 jump = NULL;
4877
4878 if (*cc == XCL_SINGLE)
4879 {
4880 cc ++;
4881 GETCHARINCTEST(c, cc);
4882
4883 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
4884 {
4885 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
4886 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_EQUAL);
4887 numberofcmps++;
4888 }
4889 else if (numberofcmps > 0)
4890 {
4891 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
4892 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4893 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4894 numberofcmps = 0;
4895 }
4896 else
4897 {
4898 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
4899 numberofcmps = 0;
4900 }
4901 }
4902 else if (*cc == XCL_RANGE)
4903 {
4904 cc ++;
4905 GETCHARINCTEST(c, cc);
4906 SET_CHAR_OFFSET(c);
4907 GETCHARINCTEST(c, cc);
4908
4909 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
4910 {
4911 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
4912 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_LESS_EQUAL);
4913 numberofcmps++;
4914 }
4915 else if (numberofcmps > 0)
4916 {
4917 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
4918 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4919 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4920 numberofcmps = 0;
4921 }
4922 else
4923 {
4924 jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
4925 numberofcmps = 0;
4926 }
4927 }
4928 #ifdef SUPPORT_UCP
4929 else
4930 {
4931 if (*cc == XCL_NOTPROP)
4932 invertcmp ^= 0x1;
4933 cc++;
4934 switch(*cc)
4935 {
4936 case PT_ANY:
4937 if (list != backtracks)
4938 {
4939 if ((cc[-1] == XCL_NOTPROP && compares > 0) || (cc[-1] == XCL_PROP && compares == 0))
4940 continue;
4941 }
4942 else if (cc[-1] == XCL_NOTPROP)
4943 continue;
4944 jump = JUMP(SLJIT_JUMP);
4945 break;
4946
4947 case PT_LAMP:
4948 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - typeoffset);
4949 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4950 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ll - typeoffset);
4951 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4952 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lt - typeoffset);
4953 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4954 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4955 break;
4956
4957 case PT_GC:
4958 c = PRIV(ucp_typerange)[(int)cc[1] * 2];
4959 SET_TYPE_OFFSET(c);
4960 jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, PRIV(ucp_typerange)[(int)cc[1] * 2 + 1] - c);
4961 break;
4962
4963 case PT_PC:
4964 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, (int)cc[1] - typeoffset);
4965 break;
4966
4967 case PT_SC:
4968 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, scriptreg, 0, SLJIT_IMM, (int)cc[1]);
4969 break;
4970
4971 case PT_SPACE:
4972 case PT_PXSPACE:
4973 SET_CHAR_OFFSET(9);
4974 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd - 0x9);
4975 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
4976
4977 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x9);
4978 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4979
4980 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x9);
4981 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4982
4983 SET_TYPE_OFFSET(ucp_Zl);
4984 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl);
4985 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4986 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4987 break;
4988
4989 case PT_WORD:
4990 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_UNDERSCORE - charoffset));
4991 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4992 /* Fall through. */
4993
4994 case PT_ALNUM:
4995 SET_TYPE_OFFSET(ucp_Ll);
4996 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
4997 OP_FLAGS((*cc == PT_ALNUM) ? SLJIT_MOV : SLJIT_OR, TMP2, 0, (*cc == PT_ALNUM) ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_LESS_EQUAL);
4998 SET_TYPE_OFFSET(ucp_Nd);
4999 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_No - ucp_Nd);
5000 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
5001 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
5002 break;
5003
5004 case PT_CLIST:
5005 other_cases = PRIV(ucd_caseless_sets) + cc[1];
5006
5007 /* At least three characters are required.
5008 Otherwise this case would be handled by the normal code path. */
5009 SLJIT_ASSERT(other_cases[0] != NOTACHAR && other_cases[1] != NOTACHAR && other_cases[2] != NOTACHAR);
5010 SLJIT_ASSERT(other_cases[0] < other_cases[1] && other_cases[1] < other_cases[2]);
5011
5012 /* Optimizing character pairs, if their difference is power of 2. */
5013 if (is_powerof2(other_cases[1] ^ other_cases[0]))
5014 {
5015 if (charoffset == 0)
5016 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
5017 else
5018 {
5019 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
5020 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
5021 }
5022 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[1]);
5023 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
5024 other_cases += 2;
5025 }
5026 else if (is_powerof2(other_cases[2] ^ other_cases[1]))
5027 {
5028 if (charoffset == 0)
5029 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[2] ^ other_cases[1]);
5030 else
5031 {
5032 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
5033 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
5034 }
5035 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[2]);
5036 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
5037
5038 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(other_cases[0] - charoffset));
5039 OP_FLAGS(SLJIT_OR | ((other_cases[3] == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
5040
5041 other_cases += 3;
5042 }
5043 else
5044 {
5045 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
5046 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
5047 }
5048
5049 while (*other_cases != NOTACHAR)
5050 {
5051 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
5052 OP_FLAGS(SLJIT_OR | ((*other_cases == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
5053 }
5054 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
5055 break;
5056
5057 case PT_UCNC:
5058 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_DOLLAR_SIGN - charoffset));
5059 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
5060 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_COMMERCIAL_AT - charoffset));
5061 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
5062 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_GRAVE_ACCENT - charoffset));
5063 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
5064
5065 SET_CHAR_OFFSET(0xa0);
5066 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(0xd7ff - charoffset));
5067 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
5068 SET_CHAR_OFFSET(0);
5069 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xe000 - 0);
5070 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_GREATER_EQUAL);
5071 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
5072 break;
5073
5074 case PT_PXGRAPH:
5075 /* C and Z groups are the farthest two groups. */
5076 SET_TYPE_OFFSET(ucp_Ll);
5077 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
5078 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_GREATER);
5079
5080 jump = CMP(SLJIT_C_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
5081
5082 /* In case of ucp_Cf, we overwrite the result. */
5083 SET_CHAR_OFFSET(0x2066);
5084 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
5085 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
5086
5087 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
5088 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
5089
5090 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x2066);
5091 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
5092
5093 JUMPHERE(jump);
5094 jump = CMP(SLJIT_C_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
5095 break;
5096
5097 case PT_PXPRINT:
5098 /* C and Z groups are the farthest two groups. */
5099 SET_TYPE_OFFSET(ucp_Ll);
5100 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
5101 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_GREATER);
5102
5103 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Ll);
5104 OP_FLAGS(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_C_NOT_EQUAL);
5105
5106 jump = CMP(SLJIT_C_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
5107
5108 /* In case of ucp_Cf, we overwrite the result. */
5109 SET_CHAR_OFFSET(0x2066);
5110 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
5111 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
5112
5113 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
5114 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
5115
5116 JUMPHERE(jump);
5117 jump = CMP(SLJIT_C_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
5118 break;
5119
5120 case PT_PXPUNCT:
5121 SET_TYPE_OFFSET(ucp_Sc);
5122 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Sc);
5123 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
5124
5125 SET_CHAR_OFFSET(0);
5126 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xff);
5127 OP_FLAGS(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
5128
5129 SET_TYPE_OFFSET(ucp_Pc);
5130 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ps - ucp_Pc);
5131 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
5132 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
5133 break;
5134 }
5135 cc += 2;
5136 }
5137 #endif
5138
5139 if (jump != NULL)
5140 add_jump(compiler, compares > 0 ? list : backtracks, jump);
5141 }
5142
5143 if (found != NULL)
5144 set_jumps(found, LABEL());
5145 }
5146
5147 #undef SET_TYPE_OFFSET
5148 #undef SET_CHAR_OFFSET
5149
5150 #endif
5151
5152 static pcre_uchar *compile_char1_matchingpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **backtracks)
5153 {
5154 DEFINE_COMPILER;
5155 int length;
5156 unsigned int c, oc, bit;
5157 compare_context context;
5158 struct sljit_jump *jump[4];
5159 jump_list *end_list;
5160 #ifdef SUPPORT_UTF
5161 struct sljit_label *label;
5162 #ifdef SUPPORT_UCP
5163 pcre_uchar propdata[5];
5164 #endif
5165 #endif /* SUPPORT_UTF */
5166
5167 switch(type)
5168 {
5169 case OP_SOD:
5170 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5171 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
5172 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
5173 return cc;
5174
5175 case OP_SOM:
5176 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5177 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
5178 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
5179 return cc;
5180
5181 case OP_NOT_WORD_BOUNDARY:
5182 case OP_WORD_BOUNDARY:
5183 add_jump(compiler, &common->wordboundary, JUMP(SLJIT_FAST_CALL));
5184 add_jump(compiler, backtracks, JUMP(type == OP_NOT_WORD_BOUNDARY ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
5185 return cc;
5186
5187 case OP_NOT_DIGIT:
5188 case OP_DIGIT:
5189 /* Digits are usually 0-9, so it is worth to optimize them. */
5190 detect_partial_match(common, backtracks);
5191 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5192 if (common->utf && is_char7_bitset((const pcre_uint8*)common->ctypes - cbit_length + cbit_digit, FALSE))
5193 read_char7_type(common, type == OP_NOT_DIGIT);
5194 else
5195 #endif
5196 read_char8_type(common, type == OP_NOT_DIGIT);
5197 /* Flip the starting bit in the negative case. */
5198 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit);
5199 add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
5200 return cc;
5201
5202 case OP_NOT_WHITESPACE:
5203 case OP_WHITESPACE:
5204 detect_partial_match(common, backtracks);
5205 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5206 if (common->utf && is_char7_bitset((const pcre_uint8*)common->ctypes - cbit_length + cbit_space, FALSE))
5207 read_char7_type(common, type == OP_NOT_WHITESPACE);
5208 else
5209 #endif
5210 read_char8_type(common, type == OP_NOT_WHITESPACE);
5211 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space);
5212 add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
5213 return cc;
5214
5215 case OP_NOT_WORDCHAR:
5216 case OP_WORDCHAR:
5217 detect_partial_match(common, backtracks);
5218 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5219 if (common->utf && is_char7_bitset((const pcre_uint8*)common->ctypes - cbit_length + cbit_word, FALSE))
5220 read_char7_type(common, type == OP_NOT_WORDCHAR);
5221 else
5222 #endif
5223 read_char8_type(common, type == OP_NOT_WORDCHAR);
5224 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word);
5225 add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
5226 return cc;
5227
5228 case OP_ANY:
5229 detect_partial_match(common, backtracks);
5230 read_char_range(common, common->nlmin, common->nlmax, TRUE);
5231 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
5232 {
5233 jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
5234 end_list = NULL;
5235 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
5236 add_jump(compiler, &end_list, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
5237 else
5238 check_str_end(common, &end_list);
5239
5240 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5241 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff));
5242 set_jumps(end_list, LABEL());
5243 JUMPHERE(jump[0]);
5244 }
5245 else
5246 check_newlinechar(common, common->nltype, backtracks, TRUE);
5247 return cc;
5248
5249 case OP_ALLANY:
5250 detect_partial_match(common, backtracks);
5251 #ifdef SUPPORT_UTF
5252 if (common->utf)
5253 {
5254 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5255 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5256 #if defined COMPILE_PCRE8 || defined COMPILE_PCRE16
5257 #if defined COMPILE_PCRE8
5258 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
5259 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
5260 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5261 #elif defined COMPILE_PCRE16
5262 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
5263 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
5264 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
5265 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
5266 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
5267 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5268 #endif
5269 JUMPHERE(jump[0]);
5270 #endif /* COMPILE_PCRE[8|16] */
5271 return cc;
5272 }
5273 #endif
5274 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5275 return cc;
5276
5277 case OP_ANYBYTE:
5278 detect_partial_match(common, backtracks);
5279 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5280 return cc;
5281
5282 #ifdef SUPPORT_UTF
5283 #ifdef SUPPORT_UCP
5284 case OP_NOTPROP:
5285 case OP_PROP:
5286 propdata[0] = XCL_HASPROP;
5287 propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP;
5288 propdata[2] = cc[0];
5289 propdata[3] = cc[1];
5290 propdata[4] = XCL_END;
5291 compile_xclass_matchingpath(common, propdata, backtracks);
5292 return cc + 2;
5293 #endif
5294 #endif
5295
5296 case OP_ANYNL:
5297 detect_partial_match(common, backtracks);
5298 read_char_range(common, common->bsr_nlmin, common->bsr_nlmax, FALSE);
5299 jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
5300 /* We don't need to handle soft partial matching case. */
5301 end_list = NULL;
5302 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
5303 add_jump(compiler, &end_list, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
5304 else
5305 check_str_end(common, &end_list);
5306 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5307 jump[1] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
5308 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5309 jump[2] = JUMP(SLJIT_JUMP);
5310 JUMPHERE(jump[0]);
5311 check_newlinechar(common, common->bsr_nltype, backtracks, FALSE);
5312 set_jumps(end_list, LABEL());
5313 JUMPHERE(jump[1]);
5314 JUMPHERE(jump[2]);
5315 return cc;
5316
5317 case OP_NOT_HSPACE:
5318 case OP_HSPACE:
5319 detect_partial_match(common, backtracks);
5320 read_char_range(common, 0x9, 0x3000, type == OP_NOT_HSPACE);
5321 add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));
5322 add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
5323 return cc;
5324
5325 case OP_NOT_VSPACE:
5326 case OP_VSPACE:
5327 detect_partial_match(common, backtracks);
5328 read_char_range(common, 0xa, 0x2029, type == OP_NOT_VSPACE);
5329 add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));
5330 add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
5331 return cc;
5332
5333 #ifdef SUPPORT_UCP
5334 case OP_EXTUNI:
5335 detect_partial_match(common, backtracks);
5336 read_char(common);
5337 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
5338 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
5339 /* Optimize register allocation: use a real register. */
5340 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
5341 OP1(SLJIT_MOV_UB, STACK_TOP, 0, SLJIT_MEM2(TMP1, TMP2), 3);
5342
5343 label = LABEL();
5344 jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5345 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
5346 read_char(common);
5347 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
5348 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
5349 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM2(TMP1, TMP2), 3);
5350
5351 OP2(SLJIT_SHL, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2);
5352 OP1(SLJIT_MOV_UI, TMP1, 0, SLJIT_MEM1(STACK_TOP), (sljit_sw)PRIV(ucp_gbtable));
5353 OP1(SLJIT_MOV, STACK_TOP, 0, TMP2, 0);
5354 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
5355 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
5356 JUMPTO(SLJIT_C_NOT_ZERO, label);
5357
5358 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
5359 JUMPHERE(jump[0]);
5360 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
5361
5362 if (common->mode == JIT_PARTIAL_HARD_COMPILE)
5363 {
5364 jump[0] = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
5365 /* Since we successfully read a char above, partial matching must occure. */
5366 check_partial(common, TRUE);
5367 JUMPHERE(jump[0]);
5368 }
5369 return cc;
5370 #endif
5371
5372 case OP_EODN:
5373 /* Requires rather complex checks. */
5374 jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5375 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
5376 {
5377 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
5378 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5379 if (common->mode == JIT_COMPILE)
5380 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0));
5381 else
5382 {
5383 jump[1] = CMP(SLJIT_C_EQUAL, TMP2, 0, STR_END, 0);
5384 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
5385 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS);
5386 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
5387 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_NOT_EQUAL);
5388 add_jump(compiler, backtracks, JUMP(SLJIT_C_NOT_EQUAL));
5389 check_partial(common, TRUE);
5390 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5391 JUMPHERE(jump[1]);
5392 }
5393 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
5394 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
5395 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
5396 }
5397 else if (common->nltype == NLTYPE_FIXED)
5398 {
5399 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5400 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5401 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0));
5402 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
5403 }
5404 else
5405 {
5406 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5407 jump[1] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
5408 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
5409 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
5410 jump[2] = JUMP(SLJIT_C_GREATER);
5411 add_jump(compiler, backtracks, JUMP(SLJIT_C_LESS));
5412 /* Equal. */
5413 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
5414 jump[3] = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
5415 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5416
5417 JUMPHERE(jump[1]);
5418 if (common->nltype == NLTYPE_ANYCRLF)
5419 {
5420 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5421 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP2, 0, STR_END, 0));
5422 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
5423 }
5424 else
5425 {
5426 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, STR_PTR, 0);
5427 read_char_range(common, common->nlmin, common->nlmax, TRUE);
5428 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
5429 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
5430 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
5431 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
5432 }
5433 JUMPHERE(jump[2]);
5434 JUMPHERE(jump[3]);
5435 }
5436 JUMPHERE(jump[0]);
5437 check_partial(common, FALSE);
5438 return cc;
5439
5440 case OP_EOD:
5441 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0));
5442 check_partial(common, FALSE);
5443 return cc;
5444
5445 case OP_CIRC:
5446 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
5447 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
5448 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0));
5449 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
5450 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5451 return cc;
5452
5453 case OP_CIRCM:
5454 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
5455 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
5456 jump[1] = CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0);
5457 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
5458 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5459 jump[0] = JUMP(SLJIT_JUMP);
5460 JUMPHERE(jump[1]);
5461
5462 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
5463 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
5464 {
5465 OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
5466 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP2, 0, TMP1, 0));
5467 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
5468 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
5469 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
5470 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
5471 }
5472 else
5473 {
5474 skip_char_back(common);
5475 read_char_range(common, common->nlmin, common->nlmax, TRUE);
5476 check_newlinechar(common, common->nltype, backtracks, FALSE);
5477 }
5478 JUMPHERE(jump[0]);
5479 return cc;
5480
5481 case OP_DOLL:
5482 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
5483 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
5484 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5485
5486 if (!common->endonly)
5487 compile_char1_matchingpath(common, OP_EODN, cc, backtracks);
5488 else
5489 {
5490 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0));
5491 check_partial(common, FALSE);
5492 }
5493 return cc;
5494
5495 case OP_DOLLM:
5496 jump[1] = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
5497 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
5498 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
5499 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5500 check_partial(common, FALSE);
5501 jump[0] = JUMP(SLJIT_JUMP);
5502 JUMPHERE(jump[1]);
5503
5504 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
5505 {
5506 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
5507 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5508 if (common->mode == JIT_COMPILE)
5509 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, TMP2, 0, STR_END, 0));
5510 else
5511 {
5512 jump[1] = CMP(SLJIT_C_LESS_EQUAL, TMP2, 0, STR_END, 0);
5513 /* STR_PTR = STR_END - IN_UCHARS(1) */
5514 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
5515 check_partial(common, TRUE);
5516 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5517 JUMPHERE(jump[1]);
5518 }
5519
5520 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
5521 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
5522 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
5523 }
5524 else
5525 {
5526 peek_char(common, common->nlmax);
5527 check_newlinechar(common, common->nltype, backtracks, FALSE);
5528 }
5529 JUMPHERE(jump[0]);
5530 return cc;
5531
5532 case OP_CHAR:
5533 case OP_CHARI:
5534 length = 1;
5535 #ifdef SUPPORT_UTF
5536 if (common->utf && HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc);
5537 #endif
5538 if (common->mode == JIT_COMPILE && (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0))
5539 {
5540 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
5541 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
5542
5543 context.length = IN_UCHARS(length);
5544 context.sourcereg = -1;
5545 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
5546 context.ucharptr = 0;
5547 #endif
5548 return byte_sequence_compare(common, type == OP_CHARI, cc, &context, backtracks);
5549 }
5550
5551 detect_partial_match(common, backtracks);
5552 #ifdef SUPPORT_UTF
5553 if (common->utf)
5554 {
5555 GETCHAR(c, cc);
5556 }
5557 else
5558 #endif
5559 c = *cc;
5560
5561 if (type == OP_CHAR || !char_has_othercase(common, cc))
5562 {
5563 read_char_range(common, c, c, FALSE);
5564 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c));
5565 return cc + length;
5566 }
5567 oc = char_othercase(common, c);
5568 read_char_range(common, c < oc ? c : oc, c > oc ? c : oc, FALSE);
5569 bit = c ^ oc;
5570 if (is_powerof2(bit))
5571 {
5572 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
5573 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
5574 return cc + length;
5575 }
5576 jump[0] = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c);
5577 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, oc));
5578 JUMPHERE(jump[0]);
5579 return cc + length;
5580
5581 case OP_NOT:
5582 case OP_NOTI:
5583 detect_partial_match(common, backtracks);
5584 length = 1;
5585 #ifdef SUPPORT_UTF
5586 if (common->utf)
5587 {
5588 #ifdef COMPILE_PCRE8
5589 c = *cc;
5590 if (c < 128)
5591 {
5592 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5593 if (type == OP_NOT || !char_has_othercase(common, cc))
5594 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
5595 else
5596 {
5597 /* Since UTF8 code page is fixed, we know that c is in [a-z] or [A-Z] range. */
5598 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x20);
5599 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20));
5600 }
5601 /* Skip the variable-length character. */
5602 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5603 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
5604 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
5605 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5606 JUMPHERE(jump[0]);
5607 return cc + 1;
5608 }
5609 else
5610 #endif /* COMPILE_PCRE8 */
5611 {
5612 GETCHARLEN(c, cc, length);
5613 }
5614 }
5615 else
5616 #endif /* SUPPORT_UTF */
5617 c = *cc;
5618
5619 if (type == OP_NOT || !char_has_othercase(common, cc))
5620 {
5621 read_char_range(common, c, c, TRUE);
5622 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
5623 }
5624 else
5625 {
5626 oc = char_othercase(common, c);
5627 read_char_range(common, c < oc ? c : oc, c > oc ? c : oc, TRUE);
5628 bit = c ^ oc;
5629 if (is_powerof2(bit))
5630 {
5631 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
5632 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
5633 }
5634 else
5635 {
5636 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
5637 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, oc));
5638 }
5639 }
5640 return cc + length;
5641
5642 case OP_CLASS:
5643 case OP_NCLASS:
5644 detect_partial_match(common, backtracks);
5645
5646 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5647 bit = (common->utf && is_char7_bitset((const pcre_uint8 *)cc, type == OP_NCLASS)) ? 127 : 255;
5648 read_char_range(common, 0, bit, type == OP_NCLASS);
5649 #else
5650 read_char_range(common, 0, 255, type == OP_NCLASS);
5651 #endif
5652
5653 if (check_class_ranges(common, (const pcre_uint8 *)cc, type == OP_NCLASS, FALSE, backtracks))
5654 return cc + 32 / sizeof(pcre_uchar);
5655
5656 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5657 jump[0] = NULL;
5658 if (common->utf)
5659 {
5660 jump[0] = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, bit);
5661 if (type == OP_CLASS)
5662 {
5663 add_jump(compiler, backtracks, jump[0]);
5664 jump[0] = NULL;
5665 }
5666 }
5667 #elif !defined COMPILE_PCRE8
5668 jump[0] = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
5669 if (type == OP_CLASS)
5670 {
5671 add_jump(compiler, backtracks, jump[0]);
5672 jump[0] = NULL;
5673 }
5674 #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
5675
5676 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
5677 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
5678 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
5679 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
5680 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
5681 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
5682
5683 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
5684 if (jump[0] != NULL)
5685 JUMPHERE(jump[0]);
5686 #endif
5687
5688 return cc + 32 / sizeof(pcre_uchar);
5689
5690 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5691 case OP_XCLASS:
5692 compile_xclass_matchingpath(common, cc + LINK_SIZE, backtracks);
5693 return cc + GET(cc, 0) - 1;
5694 #endif
5695
5696 case OP_REVERSE:
5697 length = GET(cc, 0);
5698 if (length == 0)
5699 return cc + LINK_SIZE;
5700 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5701 #ifdef SUPPORT_UTF
5702 if (common->utf)
5703 {
5704 OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
5705 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, length);
5706 label = LABEL();
5707 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP3, 0));
5708 skip_char_back(common);
5709 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
5710 JUMPTO(SLJIT_C_NOT_ZERO, label);
5711 }
5712 else
5713 #endif
5714 {
5715 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
5716 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
5717 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, TMP1, 0));
5718 }
5719 check_start_used_ptr(common);
5720 return cc + LINK_SIZE;
5721 }
5722 SLJIT_ASSERT_STOP();
5723 return cc;
5724 }
5725
5726 static SLJIT_INLINE pcre_uchar *compile_charn_matchingpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, jump_list **backtracks)
5727 {
5728 /* This function consumes at least one input character. */
5729 /* To decrease the number of length checks, we try to concatenate the fixed length character sequences. */
5730 DEFINE_COMPILER;
5731 pcre_uchar *ccbegin = cc;
5732 compare_context context;
5733 int size;
5734
5735 context.length = 0;
5736 do
5737 {
5738 if (cc >= ccend)
5739 break;
5740
5741 if (*cc == OP_CHAR)
5742 {
5743 size = 1;
5744 #ifdef SUPPORT_UTF
5745 if (common->utf && HAS_EXTRALEN(cc[1]))
5746 size += GET_EXTRALEN(cc[1]);
5747 #endif
5748 }
5749 else if (*cc == OP_CHARI)
5750 {
5751 size = 1;
5752 #ifdef SUPPORT_UTF
5753 if (common->utf)
5754 {
5755 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
5756 size = 0;
5757 else if (HAS_EXTRALEN(cc[1]))
5758 size += GET_EXTRALEN(cc[1]);
5759 }
5760 else
5761 #endif
5762 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
5763 size = 0;
5764 }
5765 else
5766 size = 0;
5767
5768 cc += 1 + size;
5769 context.length += IN_UCHARS(size);
5770 }
5771 while (size > 0 && context.length <= 128);
5772
5773 cc = ccbegin;
5774 if (context.length > 0)
5775 {
5776 /* We have a fixed-length byte sequence. */
5777 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, context.length);
5778 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
5779
5780 context.sourcereg = -1;
5781 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
5782 context.ucharptr = 0;
5783 #endif
5784 do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, backtracks); while (context.length > 0);
5785 return cc;
5786 }
5787
5788 /* A non-fixed length character will be checked if length == 0. */
5789 return compile_char1_matchingpath(common, *cc, cc + 1, backtracks);
5790 }
5791
5792 /* Forward definitions. */
5793 static void compile_matchingpath(compiler_common *, pcre_uchar *, pcre_uchar *, backtrack_common *);
5794 static void compile_backtrackingpath(compiler_common *, struct backtrack_common *);
5795
5796 #define PUSH_BACKTRACK(size, ccstart, error) \
5797 do \
5798 { \
5799 backtrack = sljit_alloc_memory(compiler, (size)); \
5800 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
5801 return error; \
5802 memset(backtrack, 0, size); \
5803 backtrack->prev = parent->top; \
5804 backtrack->cc = (ccstart); \
5805 parent->top = backtrack; \
5806 } \
5807 while (0)
5808
5809 #define PUSH_BACKTRACK_NOVALUE(size, ccstart) \
5810 do \
5811 { \
5812 backtrack = sljit_alloc_memory(compiler, (size)); \
5813 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
5814 return; \
5815 memset(backtrack, 0, size); \
5816 backtrack->prev = parent->top; \
5817 backtrack->cc = (ccstart); \
5818 parent->top = backtrack; \
5819 } \
5820 while (0)
5821
5822 #define BACKTRACK_AS(type) ((type *)backtrack)
5823
5824 static void compile_dnref_search(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
5825 {
5826 /* The OVECTOR offset goes to TMP2. */
5827 DEFINE_COMPILER;
5828 int count = GET2(cc, 1 + IMM2_SIZE);
5829 pcre_uchar *slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
5830 unsigned int offset;
5831 jump_list *found = NULL;
5832
5833 SLJIT_ASSERT(*cc == OP_DNREF || *cc == OP_DNREFI);
5834
5835 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
5836
5837 count--;
5838 while (count-- > 0)
5839 {
5840 offset = GET2(slot, 0) << 1;
5841 GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
5842 add_jump(compiler, &found, CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0));
5843 slot += common->name_entry_size;
5844 }
5845
5846 offset = GET2(slot, 0) << 1;
5847 GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
5848 if (backtracks != NULL && !common->jscript_compat)
5849 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0));
5850
5851 set_jumps(found, LABEL());
5852 }
5853
5854 static void compile_ref_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks, BOOL withchecks, BOOL emptyfail)
5855 {
5856 DEFINE_COMPILER;
5857 BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
5858 int offset = 0;
5859 struct sljit_jump *jump = NULL;
5860 struct sljit_jump *partial;
5861 struct sljit_jump *nopartial;
5862
5863 if (ref)
5864 {
5865 offset = GET2(cc, 1) << 1;
5866 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
5867 /* OVECTOR(1) contains the "string begin - 1" constant. */
5868 if (withchecks && !common->jscript_compat)
5869 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
5870 }
5871 else
5872 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
5873
5874 #if defined SUPPORT_UTF && defined SUPPORT_UCP
5875 if (common->utf && *cc == OP_REFI)
5876 {
5877 SLJIT_ASSERT(TMP1 == SLJIT_SCRATCH_REG1 && STACK_TOP == SLJIT_SCRATCH_REG2 && TMP2 == SLJIT_SCRATCH_REG3);
5878 if (ref)
5879 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
5880 else
5881 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
5882
5883 if (withchecks)
5884 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, TMP2, 0);
5885
5886 /* Needed to save important temporary registers. */
5887 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
5888 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, ARGUMENTS, 0);
5889 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, uchar_ptr), STR_PTR, 0);
5890 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_utf_caselesscmp));
5891 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
5892 if (common->mode == JIT_COMPILE)
5893 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1));
5894 else
5895 {
5896 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
5897 nopartial = CMP(SLJIT_C_NOT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
5898 check_partial(common, FALSE);
5899 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5900 JUMPHERE(nopartial);
5901 }
5902 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
5903 }
5904 else
5905 #endif /* SUPPORT_UTF && SUPPORT_UCP */
5906 {
5907 if (ref)
5908 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP1, 0);
5909 else
5910 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
5911
5912 if (withchecks)
5913 jump = JUMP(SLJIT_C_ZERO);
5914
5915 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
5916 partial = CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0);
5917 if (common->mode == JIT_COMPILE)
5918 add_jump(compiler, backtracks, partial);
5919
5920 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
5921 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5922
5923 if (common->mode != JIT_COMPILE)
5924 {
5925 nopartial = JUMP(SLJIT_JUMP);
5926 JUMPHERE(partial);
5927 /* TMP2 -= STR_END - STR_PTR */
5928 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, STR_PTR, 0);
5929 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, STR_END, 0);
5930 partial = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0);
5931 OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
5932 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
5933 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5934 JUMPHERE(partial);
5935 check_partial(common, FALSE);
5936 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5937 JUMPHERE(nopartial);
5938 }
5939 }
5940
5941 if (jump != NULL)
5942 {
5943 if (emptyfail)
5944 add_jump(compiler, backtracks, jump);
5945 else
5946 JUMPHERE(jump);
5947 }
5948 }
5949
5950 static SLJIT_INLINE pcre_uchar *compile_ref_iterator_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
5951 {
5952 DEFINE_COMPILER;
5953 BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
5954 backtrack_common *backtrack;
5955 pcre_uchar type;
5956 int offset = 0;
5957 struct sljit_label *label;
5958 struct sljit_jump *zerolength;
5959 struct sljit_jump *jump = NULL;
5960 pcre_uchar *ccbegin = cc;
5961 int min = 0, max = 0;
5962 BOOL minimize;
5963
5964 PUSH_BACKTRACK(sizeof(iterator_backtrack), cc, NULL);
5965
5966 if (ref)
5967 offset = GET2(cc, 1) << 1;
5968 else
5969 cc += IMM2_SIZE;
5970 type = cc[1 + IMM2_SIZE];
5971
5972 SLJIT_COMPILE_ASSERT((OP_CRSTAR & 0x1) == 0, crstar_opcode_must_be_even);
5973 minimize = (type & 0x1) != 0;
5974 switch(type)
5975 {
5976 case OP_CRSTAR:
5977 case OP_CRMINSTAR:
5978 min = 0;
5979 max = 0;
5980 cc += 1 + IMM2_SIZE + 1;
5981 break;
5982 case OP_CRPLUS:
5983 case OP_CRMINPLUS:
5984 min = 1;
5985 max = 0;
5986 cc += 1 + IMM2_SIZE + 1;
5987 break;
5988 case OP_CRQUERY:
5989 case OP_CRMINQUERY:
5990 min = 0;
5991 max = 1;
5992 cc += 1 + IMM2_SIZE + 1;
5993 break;
5994 case OP_CRRANGE:
5995 case OP_CRMINRANGE:
5996 min = GET2(cc, 1 + IMM2_SIZE + 1);
5997 max = GET2(cc, 1 + IMM2_SIZE + 1 + IMM2_SIZE);
5998 cc += 1 + IMM2_SIZE + 1 + 2 * IMM2_SIZE;
5999 break;
6000 default:
6001 SLJIT_ASSERT_STOP();
6002 break;
6003 }
6004
6005 if (!minimize)
6006 {
6007 if (min == 0)
6008 {
6009 allocate_stack(common, 2);
6010 if (ref)
6011 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
6012 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6013 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
6014 /* Temporary release of STR_PTR. */
6015 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
6016 /* Handles both invalid and empty cases. Since the minimum repeat,
6017 is zero the invalid case is basically the same as an empty case. */
6018 if (ref)
6019 zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
6020 else
6021 {
6022 compile_dnref_search(common, ccbegin, NULL);
6023 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
6024 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, TMP2, 0);
6025 zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
6026 }
6027 /* Restore if not zero length. */
6028 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
6029 }
6030 else
6031 {
6032 allocate_stack(common, 1);
6033 if (ref)
6034 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
6035 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6036 if (ref)
6037 {
6038 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
6039 zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
6040 }
6041 else
6042 {
6043 compile_dnref_search(common, ccbegin, &backtrack->topbacktracks);
6044 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
6045 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, TMP2, 0);
6046 zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
6047 }
6048 }
6049
6050 if (min > 1 || max > 1)
6051 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 0);
6052
6053 label = LABEL();
6054 if (!ref)
6055 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);
6056 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, FALSE, FALSE);
6057
6058 if (min > 1 || max > 1)
6059 {
6060 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
6061 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
6062 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0);
6063 if (min > 1)
6064 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, label);
6065 if (max > 1)
6066 {
6067 jump = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max);
6068 allocate_stack(common, 1);
6069 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6070 JUMPTO(SLJIT_JUMP, label);
6071 JUMPHERE(jump);
6072 }
6073 }
6074
6075 if (max == 0)
6076 {
6077 /* Includes min > 1 case as well. */
6078 allocate_stack(common, 1);
6079 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6080 JUMPTO(SLJIT_JUMP, label);
6081 }
6082
6083 JUMPHERE(zerolength);
6084 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
6085
6086 count_match(common);
6087 return cc;
6088 }
6089
6090 allocate_stack(common, ref ? 2 : 3);
6091 if (ref)
6092 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
6093 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6094 if (type != OP_CRMINSTAR)
6095 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
6096
6097 if (min == 0)
6098 {
6099 /* Handles both invalid and empty cases. Since the minimum repeat,
6100 is zero the invalid case is basically the same as an empty case. */
6101 if (ref)
6102 zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
6103 else
6104 {
6105 compile_dnref_search(common, ccbegin, NULL);
6106 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
6107 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
6108 zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
6109 }
6110 /* Length is non-zero, we can match real repeats. */
6111 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6112 jump = JUMP(SLJIT_JUMP);
6113 }
6114 else
6115 {
6116 if (ref)
6117 {
6118 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
6119 zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
6120 }
6121 else
6122 {
6123 compile_dnref_search(common, ccbegin, &backtrack->topbacktracks);
6124 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
6125 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
6126 zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
6127 }
6128 }
6129
6130 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
6131 if (max > 0)
6132 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max));
6133
6134 if (!ref)
6135 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
6136 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, TRUE, TRUE);
6137 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6138
6139 if (min > 1)
6140 {
6141 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
6142 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
6143 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
6144 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, BACKTRACK_AS(iterator_backtrack)->matchingpath);
6145 }
6146 else if (max > 0)
6147 OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
6148
6149 if (jump != NULL)
6150 JUMPHERE(jump);
6151 JUMPHERE(zerolength);
6152
6153 count_match(common);
6154 return cc;
6155 }
6156
6157 static SLJIT_INLINE pcre_uchar *compile_recurse_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6158 {
6159 DEFINE_COMPILER;
6160 backtrack_common *backtrack;
6161 recurse_entry *entry = common->entries;
6162 recurse_entry *prev = NULL;
6163 sljit_sw start = GET(cc, 1);
6164 pcre_uchar *start_cc;
6165 BOOL needs_control_head;
6166
6167 PUSH_BACKTRACK(sizeof(recurse_backtrack), cc, NULL);
6168
6169 /* Inlining simple patterns. */
6170 if (get_framesize(common, common->start + start, NULL, TRUE, &needs_control_head) == no_stack)
6171 {
6172 start_cc = common->start + start;
6173 compile_matchingpath(common, next_opcode(common, start_cc), bracketend(start_cc) - (1 + LINK_SIZE), backtrack);
6174 BACKTRACK_AS(recurse_backtrack)->inlined_pattern = TRUE;
6175 return cc + 1 + LINK_SIZE;
6176 }
6177
6178 while (entry != NULL)
6179 {
6180 if (entry->start == start)
6181 break;
6182 prev = entry;
6183 entry = entry->next;
6184 }
6185
6186 if (entry == NULL)
6187 {
6188 entry = sljit_alloc_memory(compiler, sizeof(recurse_entry));
6189 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6190 return NULL;
6191 entry->next = NULL;
6192 entry->entry = NULL;
6193 entry->calls = NULL;
6194 entry->start = start;
6195
6196 if (prev != NULL)
6197 prev->next = entry;
6198 else
6199 common->entries = entry;
6200 }
6201
6202 if (common->has_set_som && common->mark_ptr != 0)
6203 {
6204 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
6205 allocate_stack(common, 2);
6206 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
6207 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
6208 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
6209 }
6210 else if (common->has_set_som || common->mark_ptr != 0)
6211 {
6212 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->has_set_som ? (int)(OVECTOR(0)) : common->mark_ptr);
6213 allocate_stack(common, 1);
6214 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
6215 }
6216
6217 if (entry->entry == NULL)
6218 add_jump(compiler, &entry->calls, JUMP(SLJIT_FAST_CALL));
6219 else
6220 JUMPTO(SLJIT_FAST_CALL, entry->entry);
6221 /* Leave if the match is failed. */
6222 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0));
6223 return cc + 1 + LINK_SIZE;
6224 }
6225
6226 static int SLJIT_CALL do_callout(struct jit_arguments* arguments, PUBL(callout_block) *callout_block, pcre_uchar **jit_ovector)
6227 {
6228 const pcre_uchar *begin = arguments->begin;
6229 int *offset_vector = arguments->offsets;
6230 int offset_count = arguments->offset_count;
6231 int i;
6232
6233 if (PUBL(callout) == NULL)
6234 return 0;
6235
6236 callout_block->version = 2;
6237 callout_block->callout_data = arguments->callout_data;
6238
6239 /* Offsets in subject. */
6240 callout_block->subject_length = arguments->end - arguments->begin;
6241 callout_block->start_match = (pcre_uchar*)callout_block->subject - arguments->begin;
6242 callout_block->current_position = (pcre_uchar*)callout_block->offset_vector - arguments->begin;
6243 #if defined COMPILE_PCRE8
6244 callout_block->subject = (PCRE_SPTR)begin;
6245 #elif defined COMPILE_PCRE16
6246 callout_block->subject = (PCRE_SPTR16)begin;
6247 #elif defined COMPILE_PCRE32
6248 callout_block->subject = (PCRE_SPTR32)begin;
6249 #endif
6250
6251 /* Convert and copy the JIT offset vector to the offset_vector array. */
6252 callout_block->capture_top = 0;
6253 callout_block->offset_vector = offset_vector;
6254 for (i = 2; i < offset_count; i += 2)
6255 {
6256 offset_vector[i] = jit_ovector[i] - begin;
6257 offset_vector[i + 1] = jit_ovector[i + 1] - begin;
6258 if (jit_ovector[i] >= begin)
6259 callout_block->capture_top = i;
6260 }
6261
6262 callout_block->capture_top = (callout_block->capture_top >> 1) + 1;
6263 if (offset_count > 0)
6264 offset_vector[0] = -1;
6265 if (offset_count > 1)
6266 offset_vector[1] = -1;
6267 return (*PUBL(callout))(callout_block);
6268 }
6269
6270 /* Aligning to 8 byte. */
6271 #define CALLOUT_ARG_SIZE \
6272 (((int)sizeof(PUBL(callout_block)) + 7) & ~7)
6273
6274 #define CALLOUT_ARG_OFFSET(arg) \
6275 (-CALLOUT_ARG_SIZE + SLJIT_OFFSETOF(PUBL(callout_block), arg))
6276
6277 static SLJIT_INLINE pcre_uchar *compile_callout_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6278 {
6279 DEFINE_COMPILER;
6280 backtrack_common *backtrack;
6281
6282 PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
6283
6284 allocate_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw));
6285
6286 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
6287 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6288 SLJIT_ASSERT(common->capture_last_ptr != 0);
6289 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_number), SLJIT_IMM, cc[1]);
6290 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_last), TMP2, 0);
6291
6292 /* These pointer sized fields temporarly stores internal variables. */
6293 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
6294 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(offset_vector), STR_PTR, 0);
6295 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(subject), TMP2, 0);
6296
6297 if (common->mark_ptr != 0)
6298 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr));
6299 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(pattern_position), SLJIT_IMM, GET(cc, 2));
6300 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(next_item_length), SLJIT_IMM, GET(cc, 2 + LINK_SIZE));
6301 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(mark), (common->mark_ptr != 0) ? TMP2 : SLJIT_IMM, 0);
6302
6303 /* Needed to save important temporary registers. */
6304 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
6305 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG2, 0, STACK_TOP, 0, SLJIT_IMM, CALLOUT_ARG_SIZE);
6306 GET_LOCAL_BASE(SLJIT_SCRATCH_REG3, 0, OVECTOR_START);
6307 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_callout));
6308 OP1(SLJIT_MOV_SI, SLJIT_RETURN_REG, 0, SLJIT_RETURN_REG, 0);
6309 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
6310 free_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw));
6311
6312 /* Check return value. */
6313 OP2(SLJIT_SUB | SLJIT_SET_S, SLJIT_UNUSED, 0, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
6314 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_C_SIG_GREATER));
6315 if (common->forced_quit_label == NULL)
6316 add_jump(compiler, &common->forced_quit, JUMP(SLJIT_C_SIG_LESS));
6317 else
6318 JUMPTO(SLJIT_C_SIG_LESS, common->forced_quit_label);
6319 return cc + 2 + 2 * LINK_SIZE;
6320 }
6321
6322 #undef CALLOUT_ARG_SIZE
6323 #undef CALLOUT_ARG_OFFSET
6324
6325 static pcre_uchar *compile_assert_matchingpath(compiler_common *common, pcre_uchar *cc, assert_backtrack *backtrack, BOOL conditional)
6326 {
6327 DEFINE_COMPILER;
6328 int framesize;
6329 int extrasize;
6330 BOOL needs_control_head;
6331 int private_data_ptr;
6332 backtrack_common altbacktrack;
6333 pcre_uchar *ccbegin;
6334 pcre_uchar opcode;
6335 pcre_uchar bra = OP_BRA;
6336 jump_list *tmp = NULL;
6337 jump_list **target = (conditional) ? &backtrack->condfailed : &backtrack->common.topbacktracks;
6338 jump_list **found;
6339 /* Saving previous accept variables. */
6340 BOOL save_local_exit = common->local_exit;
6341 BOOL save_positive_assert = common->positive_assert;
6342 then_trap_backtrack *save_then_trap = common->then_trap;
6343 struct sljit_label *save_quit_label = common->quit_label;
6344 struct sljit_label *save_accept_label = common->accept_label;
6345 jump_list *save_quit = common->quit;
6346 jump_list *save_positive_assert_quit = common->positive_assert_quit;
6347 jump_list *save_accept = common->accept;
6348 struct sljit_jump *jump;
6349 struct sljit_jump *brajump = NULL;
6350
6351 /* Assert captures then. */
6352 common->then_trap = NULL;
6353
6354 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
6355 {
6356 SLJIT_ASSERT(!conditional);
6357 bra = *cc;
6358 cc++;
6359 }
6360 private_data_ptr = PRIVATE_DATA(cc);
6361 SLJIT_ASSERT(private_data_ptr != 0);
6362 framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
6363 backtrack->framesize = framesize;
6364 backtrack->private_data_ptr = private_data_ptr;
6365 opcode = *cc;
6366 SLJIT_ASSERT(opcode >= OP_ASSERT && opcode <= OP_ASSERTBACK_NOT);
6367 found = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) ? &tmp : target;
6368 ccbegin = cc;
6369 cc += GET(cc, 1);
6370
6371 if (bra == OP_BRAMINZERO)
6372 {
6373 /* This is a braminzero backtrack path. */
6374 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6375 free_stack(common, 1);
6376 brajump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
6377 }
6378
6379 if (framesize < 0)
6380 {
6381 extrasize = needs_control_head ? 2 : 1;
6382 if (framesize == no_frame)
6383 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);
6384 allocate_stack(common, extrasize);
6385 if (needs_control_head)
6386 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
6387 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6388 if (needs_control_head)
6389 {
6390 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_IMM, 0);
6391 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
6392 }
6393 }
6394 else
6395 {
6396 extrasize = needs_control_head ? 3 : 2;
6397 allocate_stack(common, framesize + extrasize);
6398 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6399 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw));
6400 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0);
6401 if (needs_control_head)
6402 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
6403 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6404 if (needs_control_head)
6405 {
6406 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
6407 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
6408 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_IMM, 0);
6409 }
6410 else
6411 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
6412 init_frame(common, ccbegin, NULL, framesize + extrasize - 1, extrasize, FALSE);
6413 }
6414
6415 memset(&altbacktrack, 0, sizeof(backtrack_common));
6416 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6417 {
6418 /* Negative assert is stronger than positive assert. */
6419 common->local_exit = TRUE;
6420 common->quit_label = NULL;
6421 common->quit = NULL;
6422 common->positive_assert = FALSE;
6423 }
6424 else
6425 common->positive_assert = TRUE;
6426 common->positive_assert_quit = NULL;
6427
6428 while (1)
6429 {
6430 common->accept_label = NULL;
6431 common->accept = NULL;
6432 altbacktrack.top = NULL;
6433 altbacktrack.topbacktracks = NULL;
6434
6435 if (*ccbegin == OP_ALT)
6436 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6437
6438 altbacktrack.cc = ccbegin;
6439 compile_matchingpath(common, ccbegin + 1 + LINK_SIZE, cc, &altbacktrack);
6440 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6441 {
6442 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6443 {
6444 common->local_exit = save_local_exit;
6445 common->quit_label = save_quit_label;
6446 common->quit = save_quit;
6447 }
6448 common->positive_assert = save_positive_assert;
6449 common->then_trap = save_then_trap;
6450 common->accept_label = save_accept_label;
6451 common->positive_assert_quit = save_positive_assert_quit;
6452 common->accept = save_accept;
6453 return NULL;
6454 }
6455 common->accept_label = LABEL();
6456 if (common->accept != NULL)
6457 set_jumps(common->accept, common->accept_label);
6458
6459 /* Reset stack. */
6460 if (framesize < 0)
6461 {
6462 if (framesize == no_frame)
6463 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6464 else
6465 free_stack(common, extrasize);
6466 if (needs_control_head)
6467 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), 0);
6468 }
6469 else
6470 {
6471 if ((opcode != OP_ASSERT_NOT && opcode != OP_ASSERTBACK_NOT) || conditional)
6472 {
6473 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
6474 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
6475 if (needs_control_head)
6476 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), 0);
6477 }
6478 else
6479 {
6480 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6481 if (needs_control_head)
6482 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), (framesize + 1) * sizeof(sljit_sw));
6483 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6484 }
6485 }
6486
6487 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6488 {
6489 /* We know that STR_PTR was stored on the top of the stack. */
6490 if (conditional)
6491 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), needs_control_head ? sizeof(sljit_sw) : 0);
6492 else if (bra == OP_BRAZERO)
6493 {
6494 if (framesize < 0)
6495 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 1) * sizeof(sljit_sw));
6496 else
6497 {
6498 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
6499 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (framesize + extrasize - 1) * sizeof(sljit_sw));
6500 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
6501 }
6502 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
6503 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6504 }
6505 else if (framesize >= 0)
6506 {
6507 /* For OP_BRA and OP_BRAMINZERO. */
6508 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
6509 }
6510 }
6511 add_jump(compiler, found, JUMP(SLJIT_JUMP));
6512
6513 compile_backtrackingpath(common, altbacktrack.top);
6514 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6515 {
6516 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6517 {
6518 common->local_exit = save_local_exit;
6519 common->quit_label = save_quit_label;
6520 common->quit = save_quit;
6521 }
6522 common->positive_assert = save_positive_assert;
6523 common->then_trap = save_then_trap;
6524 common->accept_label = save_accept_label;
6525 common->positive_assert_quit = save_positive_assert_quit;
6526 common->accept = save_accept;
6527 return NULL;
6528 }
6529 set_jumps(altbacktrack.topbacktracks, LABEL());
6530
6531 if (*cc != OP_ALT)
6532 break;
6533
6534 ccbegin = cc;
6535 cc += GET(cc, 1);
6536 }
6537
6538 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6539 {
6540 SLJIT_ASSERT(common->positive_assert_quit == NULL);
6541 /* Makes the check less complicated below. */
6542 common->positive_assert_quit = common->quit;
6543 }
6544
6545 /* None of them matched. */
6546 if (common->positive_assert_quit != NULL)
6547 {
6548 jump = JUMP(SLJIT_JUMP);
6549 set_jumps(common->positive_assert_quit, LABEL());
6550 SLJIT_ASSERT(framesize != no_stack);
6551 if (framesize < 0)
6552 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, extrasize * sizeof(sljit_sw));
6553 else
6554 {
6555 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6556 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6557 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw));
6558 }
6559 JUMPHERE(jump);
6560 }
6561
6562 if (needs_control_head)
6563 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(1));
6564
6565 if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK)
6566 {
6567 /* Assert is failed. */
6568 if (conditional || bra == OP_BRAZERO)
6569 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6570
6571 if (framesize < 0)
6572 {
6573 /* The topmost item should be 0. */
6574 if (bra == OP_BRAZERO)
6575 {
6576 if (extrasize == 2)
6577 free_stack(common, 1);
6578 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6579 }
6580 else
6581 free_stack(common, extrasize);
6582 }
6583 else
6584 {
6585 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
6586 /* The topmost item should be 0. */
6587 if (bra == OP_BRAZERO)
6588 {
6589 free_stack(common, framesize + extrasize - 1);
6590 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6591 }
6592 else
6593 free_stack(common, framesize + extrasize);
6594 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
6595 }
6596 jump = JUMP(SLJIT_JUMP);
6597 if (bra != OP_BRAZERO)
6598 add_jump(compiler, target, jump);
6599
6600 /* Assert is successful. */
6601 set_jumps(tmp, LABEL());
6602 if (framesize < 0)
6603 {
6604 /* We know that STR_PTR was stored on the top of the stack. */
6605 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 1) * sizeof(sljit_sw));
6606 /* Keep the STR_PTR on the top of the stack. */
6607 if (bra == OP_BRAZERO)
6608 {
6609 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
6610 if (extrasize == 2)
6611 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6612 }
6613 else if (bra == OP_BRAMINZERO)
6614 {
6615 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
6616 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6617 }
6618 }
6619 else
6620 {
6621 if (bra == OP_BRA)
6622 {
6623 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
6624 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
6625 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 2) * sizeof(sljit_sw));
6626 }
6627 else
6628 {
6629 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
6630 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 2) * sizeof(sljit_sw));
6631 if (extrasize == 2)
6632 {
6633 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6634 if (bra == OP_BRAMINZERO)
6635 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6636 }
6637 else
6638 {
6639 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
6640 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), bra == OP_BRAZERO ? STR_PTR : SLJIT_IMM, 0);
6641 }
6642 }
6643 }
6644
6645 if (bra == OP_BRAZERO)
6646 {
6647 backtrack->matchingpath = LABEL();
6648 SET_LABEL(jump, backtrack->matchingpath);
6649 }
6650 else if (bra == OP_BRAMINZERO)
6651 {
6652 JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
6653 JUMPHERE(brajump);
6654 if (framesize >= 0)
6655 {
6656 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6657 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6658 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
6659 }
6660 set_jumps(backtrack->common.topbacktracks, LABEL());
6661 }
6662 }
6663 else
6664 {
6665 /* AssertNot is successful. */
6666 if (framesize < 0)
6667 {
6668 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6669 if (bra != OP_BRA)
6670 {
6671 if (extrasize == 2)
6672 free_stack(common, 1);
6673 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6674 }
6675 else
6676 free_stack(common, extrasize);
6677 }
6678 else
6679 {
6680 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6681 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
6682 /* The topmost item should be 0. */
6683 if (bra != OP_BRA)
6684 {
6685 free_stack(common, framesize + extrasize - 1);
6686 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6687 }
6688 else
6689 free_stack(common, framesize + extrasize);
6690 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
6691 }
6692
6693 if (bra == OP_BRAZERO)
6694 backtrack->matchingpath = LABEL();
6695 else if (bra == OP_BRAMINZERO)
6696 {
6697 JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
6698 JUMPHERE(brajump);
6699 }
6700
6701 if (bra != OP_BRA)
6702 {
6703 SLJIT_ASSERT(found == &backtrack->common.topbacktracks);
6704 set_jumps(backtrack->common.topbacktracks, LABEL());
6705 backtrack->common.topbacktracks = NULL;
6706 }
6707 }
6708
6709 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6710 {
6711 common->local_exit = save_local_exit;
6712 common->quit_label = save_quit_label;
6713 common->quit = save_quit;
6714 }
6715 common->positive_assert = save_positive_assert;
6716 common->then_trap = save_then_trap;
6717 common->accept_label = save_accept_label;
6718 common->positive_assert_quit = save_positive_assert_quit;
6719 common->accept = save_accept;
6720 return cc + 1 + LINK_SIZE;
6721 }
6722
6723 static SLJIT_INLINE void match_once_common(compiler_common *common, pcre_uchar ket, int framesize, int private_data_ptr, BOOL has_alternatives, BOOL needs_control_head)
6724 {
6725 DEFINE_COMPILER;
6726 int stacksize;
6727
6728 if (framesize < 0)
6729 {
6730 if (framesize == no_frame)
6731 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6732 else
6733 {
6734 stacksize = needs_control_head ? 1 : 0;
6735 if (ket != OP_KET || has_alternatives)
6736 stacksize++;
6737 free_stack(common, stacksize);
6738 }
6739
6740 if (needs_control_head)
6741 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), (ket != OP_KET || has_alternatives) ? sizeof(sljit_sw) : 0);
6742
6743 /* TMP2 which is set here used by OP_KETRMAX below. */
6744 if (ket == OP_KETRMAX)
6745 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), 0);
6746 else if (ket == OP_KETRMIN)
6747 {
6748 /* Move the STR_PTR to the private_data_ptr. */
6749 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), 0);
6750 }
6751 }
6752 else
6753 {
6754 stacksize = (ket != OP_KET || has_alternatives) ? 2 : 1;
6755 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + stacksize) * sizeof(sljit_sw));
6756 if (needs_control_head)