/[pcre]/code/trunk/pcre_jit_compile.c
ViewVC logotype

Contents of /code/trunk/pcre_jit_compile.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1419 - (show annotations)
Sun Dec 29 04:42:14 2013 UTC (5 years, 11 months ago) by zherczeg
File MIME type: text/plain
File size: 318095 byte(s)
Error occurred while calculating annotation data.
Improve fast forward search in JIT.
1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Written by Philip Hazel
9 Copyright (c) 1997-2013 University of Cambridge
10
11 The machine code generator part (this module) was written by Zoltan Herczeg
12 Copyright (c) 2010-2013
13
14 -----------------------------------------------------------------------------
15 Redistribution and use in source and binary forms, with or without
16 modification, are permitted provided that the following conditions are met:
17
18 * Redistributions of source code must retain the above copyright notice,
19 this list of conditions and the following disclaimer.
20
21 * Redistributions in binary form must reproduce the above copyright
22 notice, this list of conditions and the following disclaimer in the
23 documentation and/or other materials provided with the distribution.
24
25 * Neither the name of the University of Cambridge nor the names of its
26 contributors may be used to endorse or promote products derived from
27 this software without specific prior written permission.
28
29 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 POSSIBILITY OF SUCH DAMAGE.
40 -----------------------------------------------------------------------------
41 */
42
43 #ifdef HAVE_CONFIG_H
44 #include "config.h"
45 #endif
46
47 #include "pcre_internal.h"
48
49 #if defined SUPPORT_JIT
50
51 /* All-in-one: Since we use the JIT compiler only from here,
52 we just include it. This way we don't need to touch the build
53 system files. */
54
55 #define SLJIT_MALLOC(size) (PUBL(malloc))(size)
56 #define SLJIT_FREE(ptr) (PUBL(free))(ptr)
57 #define SLJIT_CONFIG_AUTO 1
58 #define SLJIT_CONFIG_STATIC 1
59 #define SLJIT_VERBOSE 0
60 #define SLJIT_DEBUG 0
61
62 #include "sljit/sljitLir.c"
63
64 #if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED
65 #error Unsupported architecture
66 #endif
67
68 /* Defines for debugging purposes. */
69
70 /* 1 - Use unoptimized capturing brackets.
71 2 - Enable capture_last_ptr (includes option 1). */
72 /* #define DEBUG_FORCE_UNOPTIMIZED_CBRAS 2 */
73
74 /* 1 - Always have a control head. */
75 /* #define DEBUG_FORCE_CONTROL_HEAD 1 */
76
77 /* Allocate memory for the regex stack on the real machine stack.
78 Fast, but limited size. */
79 #define MACHINE_STACK_SIZE 32768
80
81 /* Growth rate for stack allocated by the OS. Should be the multiply
82 of page size. */
83 #define STACK_GROWTH_RATE 8192
84
85 /* Enable to check that the allocation could destroy temporaries. */
86 #if defined SLJIT_DEBUG && SLJIT_DEBUG
87 #define DESTROY_REGISTERS 1
88 #endif
89
90 /*
91 Short summary about the backtracking mechanism empolyed by the jit code generator:
92
93 The code generator follows the recursive nature of the PERL compatible regular
94 expressions. The basic blocks of regular expressions are condition checkers
95 whose execute different commands depending on the result of the condition check.
96 The relationship between the operators can be horizontal (concatenation) and
97 vertical (sub-expression) (See struct backtrack_common for more details).
98
99 'ab' - 'a' and 'b' regexps are concatenated
100 'a+' - 'a' is the sub-expression of the '+' operator
101
102 The condition checkers are boolean (true/false) checkers. Machine code is generated
103 for the checker itself and for the actions depending on the result of the checker.
104 The 'true' case is called as the matching path (expected path), and the other is called as
105 the 'backtrack' path. Branch instructions are expesive for all CPUs, so we avoid taken
106 branches on the matching path.
107
108 Greedy star operator (*) :
109 Matching path: match happens.
110 Backtrack path: match failed.
111 Non-greedy star operator (*?) :
112 Matching path: no need to perform a match.
113 Backtrack path: match is required.
114
115 The following example shows how the code generated for a capturing bracket
116 with two alternatives. Let A, B, C, D are arbirary regular expressions, and
117 we have the following regular expression:
118
119 A(B|C)D
120
121 The generated code will be the following:
122
123 A matching path
124 '(' matching path (pushing arguments to the stack)
125 B matching path
126 ')' matching path (pushing arguments to the stack)
127 D matching path
128 return with successful match
129
130 D backtrack path
131 ')' backtrack path (If we arrived from "C" jump to the backtrack of "C")
132 B backtrack path
133 C expected path
134 jump to D matching path
135 C backtrack path
136 A backtrack path
137
138 Notice, that the order of backtrack code paths are the opposite of the fast
139 code paths. In this way the topmost value on the stack is always belong
140 to the current backtrack code path. The backtrack path must check
141 whether there is a next alternative. If so, it needs to jump back to
142 the matching path eventually. Otherwise it needs to clear out its own stack
143 frame and continue the execution on the backtrack code paths.
144 */
145
146 /*
147 Saved stack frames:
148
149 Atomic blocks and asserts require reloading the values of private data
150 when the backtrack mechanism performed. Because of OP_RECURSE, the data
151 are not necessarly known in compile time, thus we need a dynamic restore
152 mechanism.
153
154 The stack frames are stored in a chain list, and have the following format:
155 ([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]
156
157 Thus we can restore the private data to a particular point in the stack.
158 */
159
160 typedef struct jit_arguments {
161 /* Pointers first. */
162 struct sljit_stack *stack;
163 const pcre_uchar *str;
164 const pcre_uchar *begin;
165 const pcre_uchar *end;
166 int *offsets;
167 pcre_uchar *uchar_ptr;
168 pcre_uchar *mark_ptr;
169 void *callout_data;
170 /* Everything else after. */
171 pcre_uint32 limit_match;
172 int real_offset_count;
173 int offset_count;
174 pcre_uint8 notbol;
175 pcre_uint8 noteol;
176 pcre_uint8 notempty;
177 pcre_uint8 notempty_atstart;
178 } jit_arguments;
179
180 typedef struct executable_functions {
181 void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES];
182 PUBL(jit_callback) callback;
183 void *userdata;
184 pcre_uint32 top_bracket;
185 pcre_uint32 limit_match;
186 sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];
187 } executable_functions;
188
189 typedef struct jump_list {
190 struct sljit_jump *jump;
191 struct jump_list *next;
192 } jump_list;
193
194 typedef struct stub_list {
195 struct sljit_jump *start;
196 struct sljit_label *quit;
197 struct stub_list *next;
198 } stub_list;
199
200 enum frame_types {
201 no_frame = -1,
202 no_stack = -2
203 };
204
205 enum control_types {
206 type_mark = 0,
207 type_then_trap = 1
208 };
209
210 typedef int (SLJIT_CALL *jit_function)(jit_arguments *args);
211
212 /* The following structure is the key data type for the recursive
213 code generator. It is allocated by compile_matchingpath, and contains
214 the arguments for compile_backtrackingpath. Must be the first member
215 of its descendants. */
216 typedef struct backtrack_common {
217 /* Concatenation stack. */
218 struct backtrack_common *prev;
219 jump_list *nextbacktracks;
220 /* Internal stack (for component operators). */
221 struct backtrack_common *top;
222 jump_list *topbacktracks;
223 /* Opcode pointer. */
224 pcre_uchar *cc;
225 } backtrack_common;
226
227 typedef struct assert_backtrack {
228 backtrack_common common;
229 jump_list *condfailed;
230 /* Less than 0 if a frame is not needed. */
231 int framesize;
232 /* Points to our private memory word on the stack. */
233 int private_data_ptr;
234 /* For iterators. */
235 struct sljit_label *matchingpath;
236 } assert_backtrack;
237
238 typedef struct bracket_backtrack {
239 backtrack_common common;
240 /* Where to coninue if an alternative is successfully matched. */
241 struct sljit_label *alternative_matchingpath;
242 /* For rmin and rmax iterators. */
243 struct sljit_label *recursive_matchingpath;
244 /* For greedy ? operator. */
245 struct sljit_label *zero_matchingpath;
246 /* Contains the branches of a failed condition. */
247 union {
248 /* Both for OP_COND, OP_SCOND. */
249 jump_list *condfailed;
250 assert_backtrack *assert;
251 /* For OP_ONCE. Less than 0 if not needed. */
252 int framesize;
253 } u;
254 /* Points to our private memory word on the stack. */
255 int private_data_ptr;
256 } bracket_backtrack;
257
258 typedef struct bracketpos_backtrack {
259 backtrack_common common;
260 /* Points to our private memory word on the stack. */
261 int private_data_ptr;
262 /* Reverting stack is needed. */
263 int framesize;
264 /* Allocated stack size. */
265 int stacksize;
266 } bracketpos_backtrack;
267
268 typedef struct braminzero_backtrack {
269 backtrack_common common;
270 struct sljit_label *matchingpath;
271 } braminzero_backtrack;
272
273 typedef struct iterator_backtrack {
274 backtrack_common common;
275 /* Next iteration. */
276 struct sljit_label *matchingpath;
277 } iterator_backtrack;
278
279 typedef struct recurse_entry {
280 struct recurse_entry *next;
281 /* Contains the function entry. */
282 struct sljit_label *entry;
283 /* Collects the calls until the function is not created. */
284 jump_list *calls;
285 /* Points to the starting opcode. */
286 sljit_sw start;
287 } recurse_entry;
288
289 typedef struct recurse_backtrack {
290 backtrack_common common;
291 BOOL inlined_pattern;
292 } recurse_backtrack;
293
294 #define OP_THEN_TRAP OP_TABLE_LENGTH
295
296 typedef struct then_trap_backtrack {
297 backtrack_common common;
298 /* If then_trap is not NULL, this structure contains the real
299 then_trap for the backtracking path. */
300 struct then_trap_backtrack *then_trap;
301 /* Points to the starting opcode. */
302 sljit_sw start;
303 /* Exit point for the then opcodes of this alternative. */
304 jump_list *quit;
305 /* Frame size of the current alternative. */
306 int framesize;
307 } then_trap_backtrack;
308
309 #define MAX_RANGE_SIZE 4
310
311 typedef struct compiler_common {
312 /* The sljit ceneric compiler. */
313 struct sljit_compiler *compiler;
314 /* First byte code. */
315 pcre_uchar *start;
316 /* Maps private data offset to each opcode. */
317 sljit_si *private_data_ptrs;
318 /* Tells whether the capturing bracket is optimized. */
319 pcre_uint8 *optimized_cbracket;
320 /* Tells whether the starting offset is a target of then. */
321 pcre_uint8 *then_offsets;
322 /* Current position where a THEN must jump. */
323 then_trap_backtrack *then_trap;
324 /* Starting offset of private data for capturing brackets. */
325 int cbra_ptr;
326 /* Output vector starting point. Must be divisible by 2. */
327 int ovector_start;
328 /* Last known position of the requested byte. */
329 int req_char_ptr;
330 /* Head of the last recursion. */
331 int recursive_head_ptr;
332 /* First inspected character for partial matching. */
333 int start_used_ptr;
334 /* Starting pointer for partial soft matches. */
335 int hit_start;
336 /* End pointer of the first line. */
337 int first_line_end;
338 /* Points to the marked string. */
339 int mark_ptr;
340 /* Recursive control verb management chain. */
341 int control_head_ptr;
342 /* Points to the last matched capture block index. */
343 int capture_last_ptr;
344 /* Points to the starting position of the current match. */
345 int start_ptr;
346
347 /* Flipped and lower case tables. */
348 const pcre_uint8 *fcc;
349 sljit_sw lcc;
350 /* Mode can be PCRE_STUDY_JIT_COMPILE and others. */
351 int mode;
352 /* \K is found in the pattern. */
353 BOOL has_set_som;
354 /* (*SKIP:arg) is found in the pattern. */
355 BOOL has_skip_arg;
356 /* (*THEN) is found in the pattern. */
357 BOOL has_then;
358 /* Needs to know the start position anytime. */
359 BOOL needs_start_ptr;
360 /* Currently in recurse or negative assert. */
361 BOOL local_exit;
362 /* Currently in a positive assert. */
363 BOOL positive_assert;
364 /* Newline control. */
365 int nltype;
366 int newline;
367 int bsr_nltype;
368 /* Dollar endonly. */
369 int endonly;
370 /* Tables. */
371 sljit_sw ctypes;
372 int digits[2 + MAX_RANGE_SIZE];
373 /* Named capturing brackets. */
374 pcre_uchar *name_table;
375 sljit_sw name_count;
376 sljit_sw name_entry_size;
377
378 /* Labels and jump lists. */
379 struct sljit_label *partialmatchlabel;
380 struct sljit_label *quit_label;
381 struct sljit_label *forced_quit_label;
382 struct sljit_label *accept_label;
383 stub_list *stubs;
384 recurse_entry *entries;
385 recurse_entry *currententry;
386 jump_list *partialmatch;
387 jump_list *quit;
388 jump_list *positive_assert_quit;
389 jump_list *forced_quit;
390 jump_list *accept;
391 jump_list *calllimit;
392 jump_list *stackalloc;
393 jump_list *revertframes;
394 jump_list *wordboundary;
395 jump_list *anynewline;
396 jump_list *hspace;
397 jump_list *vspace;
398 jump_list *casefulcmp;
399 jump_list *caselesscmp;
400 jump_list *reset_match;
401 BOOL jscript_compat;
402 #ifdef SUPPORT_UTF
403 BOOL utf;
404 #ifdef SUPPORT_UCP
405 BOOL use_ucp;
406 #endif
407 #ifndef COMPILE_PCRE32
408 jump_list *utfreadchar;
409 #endif
410 #ifdef COMPILE_PCRE8
411 jump_list *utfreadtype8;
412 #endif
413 #endif /* SUPPORT_UTF */
414 #ifdef SUPPORT_UCP
415 jump_list *getucd;
416 #endif
417 } compiler_common;
418
419 /* For byte_sequence_compare. */
420
421 typedef struct compare_context {
422 int length;
423 int sourcereg;
424 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
425 int ucharptr;
426 union {
427 sljit_si asint;
428 sljit_uh asushort;
429 #if defined COMPILE_PCRE8
430 sljit_ub asbyte;
431 sljit_ub asuchars[4];
432 #elif defined COMPILE_PCRE16
433 sljit_uh asuchars[2];
434 #elif defined COMPILE_PCRE32
435 sljit_ui asuchars[1];
436 #endif
437 } c;
438 union {
439 sljit_si asint;
440 sljit_uh asushort;
441 #if defined COMPILE_PCRE8
442 sljit_ub asbyte;
443 sljit_ub asuchars[4];
444 #elif defined COMPILE_PCRE16
445 sljit_uh asuchars[2];
446 #elif defined COMPILE_PCRE32
447 sljit_ui asuchars[1];
448 #endif
449 } oc;
450 #endif
451 } compare_context;
452
453 /* Undefine sljit macros. */
454 #undef CMP
455
456 /* Used for accessing the elements of the stack. */
457 #define STACK(i) ((-(i) - 1) * (int)sizeof(sljit_sw))
458
459 #define TMP1 SLJIT_SCRATCH_REG1
460 #define TMP2 SLJIT_SCRATCH_REG3
461 #define TMP3 SLJIT_TEMPORARY_EREG2
462 #define STR_PTR SLJIT_SAVED_REG1
463 #define STR_END SLJIT_SAVED_REG2
464 #define STACK_TOP SLJIT_SCRATCH_REG2
465 #define STACK_LIMIT SLJIT_SAVED_REG3
466 #define ARGUMENTS SLJIT_SAVED_EREG1
467 #define COUNT_MATCH SLJIT_SAVED_EREG2
468 #define RETURN_ADDR SLJIT_TEMPORARY_EREG1
469
470 /* Local space layout. */
471 /* These two locals can be used by the current opcode. */
472 #define LOCALS0 (0 * sizeof(sljit_sw))
473 #define LOCALS1 (1 * sizeof(sljit_sw))
474 /* Two local variables for possessive quantifiers (char1 cannot use them). */
475 #define POSSESSIVE0 (2 * sizeof(sljit_sw))
476 #define POSSESSIVE1 (3 * sizeof(sljit_sw))
477 /* Max limit of recursions. */
478 #define LIMIT_MATCH (4 * sizeof(sljit_sw))
479 /* The output vector is stored on the stack, and contains pointers
480 to characters. The vector data is divided into two groups: the first
481 group contains the start / end character pointers, and the second is
482 the start pointers when the end of the capturing group has not yet reached. */
483 #define OVECTOR_START (common->ovector_start)
484 #define OVECTOR(i) (OVECTOR_START + (i) * (sljit_sw)sizeof(sljit_sw))
485 #define OVECTOR_PRIV(i) (common->cbra_ptr + (i) * (sljit_sw)sizeof(sljit_sw))
486 #define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start])
487
488 #if defined COMPILE_PCRE8
489 #define MOV_UCHAR SLJIT_MOV_UB
490 #define MOVU_UCHAR SLJIT_MOVU_UB
491 #elif defined COMPILE_PCRE16
492 #define MOV_UCHAR SLJIT_MOV_UH
493 #define MOVU_UCHAR SLJIT_MOVU_UH
494 #elif defined COMPILE_PCRE32
495 #define MOV_UCHAR SLJIT_MOV_UI
496 #define MOVU_UCHAR SLJIT_MOVU_UI
497 #else
498 #error Unsupported compiling mode
499 #endif
500
501 /* Shortcuts. */
502 #define DEFINE_COMPILER \
503 struct sljit_compiler *compiler = common->compiler
504 #define OP1(op, dst, dstw, src, srcw) \
505 sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw))
506 #define OP2(op, dst, dstw, src1, src1w, src2, src2w) \
507 sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w))
508 #define LABEL() \
509 sljit_emit_label(compiler)
510 #define JUMP(type) \
511 sljit_emit_jump(compiler, (type))
512 #define JUMPTO(type, label) \
513 sljit_set_label(sljit_emit_jump(compiler, (type)), (label))
514 #define JUMPHERE(jump) \
515 sljit_set_label((jump), sljit_emit_label(compiler))
516 #define SET_LABEL(jump, label) \
517 sljit_set_label((jump), (label))
518 #define CMP(type, src1, src1w, src2, src2w) \
519 sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))
520 #define CMPTO(type, src1, src1w, src2, src2w, label) \
521 sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))
522 #define OP_FLAGS(op, dst, dstw, src, srcw, type) \
523 sljit_emit_op_flags(compiler, (op), (dst), (dstw), (src), (srcw), (type))
524 #define GET_LOCAL_BASE(dst, dstw, offset) \
525 sljit_get_local_base(compiler, (dst), (dstw), (offset))
526
527 static pcre_uchar* bracketend(pcre_uchar* cc)
528 {
529 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
530 do cc += GET(cc, 1); while (*cc == OP_ALT);
531 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
532 cc += 1 + LINK_SIZE;
533 return cc;
534 }
535
536 static int ones_in_half_byte[16] = {
537 /* 0 */ 0, 1, 1, 2, /* 4 */ 1, 2, 2, 3,
538 /* 8 */ 1, 2, 2, 3, /* 12 */ 2, 3, 3, 4
539 };
540
541 /* Functions whose might need modification for all new supported opcodes:
542 next_opcode
543 check_opcode_types
544 set_private_data_ptrs
545 get_framesize
546 init_frame
547 get_private_data_copy_length
548 copy_private_data
549 compile_matchingpath
550 compile_backtrackingpath
551 */
552
553 static pcre_uchar *next_opcode(compiler_common *common, pcre_uchar *cc)
554 {
555 SLJIT_UNUSED_ARG(common);
556 switch(*cc)
557 {
558 case OP_SOD:
559 case OP_SOM:
560 case OP_SET_SOM:
561 case OP_NOT_WORD_BOUNDARY:
562 case OP_WORD_BOUNDARY:
563 case OP_NOT_DIGIT:
564 case OP_DIGIT:
565 case OP_NOT_WHITESPACE:
566 case OP_WHITESPACE:
567 case OP_NOT_WORDCHAR:
568 case OP_WORDCHAR:
569 case OP_ANY:
570 case OP_ALLANY:
571 case OP_NOTPROP:
572 case OP_PROP:
573 case OP_ANYNL:
574 case OP_NOT_HSPACE:
575 case OP_HSPACE:
576 case OP_NOT_VSPACE:
577 case OP_VSPACE:
578 case OP_EXTUNI:
579 case OP_EODN:
580 case OP_EOD:
581 case OP_CIRC:
582 case OP_CIRCM:
583 case OP_DOLL:
584 case OP_DOLLM:
585 case OP_CRSTAR:
586 case OP_CRMINSTAR:
587 case OP_CRPLUS:
588 case OP_CRMINPLUS:
589 case OP_CRQUERY:
590 case OP_CRMINQUERY:
591 case OP_CRRANGE:
592 case OP_CRMINRANGE:
593 case OP_CRPOSSTAR:
594 case OP_CRPOSPLUS:
595 case OP_CRPOSQUERY:
596 case OP_CRPOSRANGE:
597 case OP_CLASS:
598 case OP_NCLASS:
599 case OP_REF:
600 case OP_REFI:
601 case OP_DNREF:
602 case OP_DNREFI:
603 case OP_RECURSE:
604 case OP_CALLOUT:
605 case OP_ALT:
606 case OP_KET:
607 case OP_KETRMAX:
608 case OP_KETRMIN:
609 case OP_KETRPOS:
610 case OP_REVERSE:
611 case OP_ASSERT:
612 case OP_ASSERT_NOT:
613 case OP_ASSERTBACK:
614 case OP_ASSERTBACK_NOT:
615 case OP_ONCE:
616 case OP_ONCE_NC:
617 case OP_BRA:
618 case OP_BRAPOS:
619 case OP_CBRA:
620 case OP_CBRAPOS:
621 case OP_COND:
622 case OP_SBRA:
623 case OP_SBRAPOS:
624 case OP_SCBRA:
625 case OP_SCBRAPOS:
626 case OP_SCOND:
627 case OP_CREF:
628 case OP_DNCREF:
629 case OP_RREF:
630 case OP_DNRREF:
631 case OP_DEF:
632 case OP_BRAZERO:
633 case OP_BRAMINZERO:
634 case OP_BRAPOSZERO:
635 case OP_PRUNE:
636 case OP_SKIP:
637 case OP_THEN:
638 case OP_COMMIT:
639 case OP_FAIL:
640 case OP_ACCEPT:
641 case OP_ASSERT_ACCEPT:
642 case OP_CLOSE:
643 case OP_SKIPZERO:
644 return cc + PRIV(OP_lengths)[*cc];
645
646 case OP_CHAR:
647 case OP_CHARI:
648 case OP_NOT:
649 case OP_NOTI:
650 case OP_STAR:
651 case OP_MINSTAR:
652 case OP_PLUS:
653 case OP_MINPLUS:
654 case OP_QUERY:
655 case OP_MINQUERY:
656 case OP_UPTO:
657 case OP_MINUPTO:
658 case OP_EXACT:
659 case OP_POSSTAR:
660 case OP_POSPLUS:
661 case OP_POSQUERY:
662 case OP_POSUPTO:
663 case OP_STARI:
664 case OP_MINSTARI:
665 case OP_PLUSI:
666 case OP_MINPLUSI:
667 case OP_QUERYI:
668 case OP_MINQUERYI:
669 case OP_UPTOI:
670 case OP_MINUPTOI:
671 case OP_EXACTI:
672 case OP_POSSTARI:
673 case OP_POSPLUSI:
674 case OP_POSQUERYI:
675 case OP_POSUPTOI:
676 case OP_NOTSTAR:
677 case OP_NOTMINSTAR:
678 case OP_NOTPLUS:
679 case OP_NOTMINPLUS:
680 case OP_NOTQUERY:
681 case OP_NOTMINQUERY:
682 case OP_NOTUPTO:
683 case OP_NOTMINUPTO:
684 case OP_NOTEXACT:
685 case OP_NOTPOSSTAR:
686 case OP_NOTPOSPLUS:
687 case OP_NOTPOSQUERY:
688 case OP_NOTPOSUPTO:
689 case OP_NOTSTARI:
690 case OP_NOTMINSTARI:
691 case OP_NOTPLUSI:
692 case OP_NOTMINPLUSI:
693 case OP_NOTQUERYI:
694 case OP_NOTMINQUERYI:
695 case OP_NOTUPTOI:
696 case OP_NOTMINUPTOI:
697 case OP_NOTEXACTI:
698 case OP_NOTPOSSTARI:
699 case OP_NOTPOSPLUSI:
700 case OP_NOTPOSQUERYI:
701 case OP_NOTPOSUPTOI:
702 cc += PRIV(OP_lengths)[*cc];
703 #ifdef SUPPORT_UTF
704 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
705 #endif
706 return cc;
707
708 /* Special cases. */
709 case OP_TYPESTAR:
710 case OP_TYPEMINSTAR:
711 case OP_TYPEPLUS:
712 case OP_TYPEMINPLUS:
713 case OP_TYPEQUERY:
714 case OP_TYPEMINQUERY:
715 case OP_TYPEUPTO:
716 case OP_TYPEMINUPTO:
717 case OP_TYPEEXACT:
718 case OP_TYPEPOSSTAR:
719 case OP_TYPEPOSPLUS:
720 case OP_TYPEPOSQUERY:
721 case OP_TYPEPOSUPTO:
722 return cc + PRIV(OP_lengths)[*cc] - 1;
723
724 case OP_ANYBYTE:
725 #ifdef SUPPORT_UTF
726 if (common->utf) return NULL;
727 #endif
728 return cc + 1;
729
730 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
731 case OP_XCLASS:
732 return cc + GET(cc, 1);
733 #endif
734
735 case OP_MARK:
736 case OP_PRUNE_ARG:
737 case OP_SKIP_ARG:
738 case OP_THEN_ARG:
739 return cc + 1 + 2 + cc[1];
740
741 default:
742 /* All opcodes are supported now! */
743 SLJIT_ASSERT_STOP();
744 return NULL;
745 }
746 }
747
748 static BOOL check_opcode_types(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
749 {
750 int count;
751 pcre_uchar *slot;
752
753 /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
754 while (cc < ccend)
755 {
756 switch(*cc)
757 {
758 case OP_SET_SOM:
759 common->has_set_som = TRUE;
760 cc += 1;
761 break;
762
763 case OP_REF:
764 case OP_REFI:
765 common->optimized_cbracket[GET2(cc, 1)] = 0;
766 cc += 1 + IMM2_SIZE;
767 break;
768
769 case OP_CBRAPOS:
770 case OP_SCBRAPOS:
771 common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] = 0;
772 cc += 1 + LINK_SIZE + IMM2_SIZE;
773 break;
774
775 case OP_COND:
776 case OP_SCOND:
777 /* Only AUTO_CALLOUT can insert this opcode. We do
778 not intend to support this case. */
779 if (cc[1 + LINK_SIZE] == OP_CALLOUT)
780 return FALSE;
781 cc += 1 + LINK_SIZE;
782 break;
783
784 case OP_CREF:
785 common->optimized_cbracket[GET2(cc, 1)] = 0;
786 cc += 1 + IMM2_SIZE;
787 break;
788
789 case OP_DNREF:
790 case OP_DNREFI:
791 case OP_DNCREF:
792 count = GET2(cc, 1 + IMM2_SIZE);
793 slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
794 while (count-- > 0)
795 {
796 common->optimized_cbracket[GET2(slot, 0)] = 0;
797 slot += common->name_entry_size;
798 }
799 cc += 1 + 2 * IMM2_SIZE;
800 break;
801
802 case OP_RECURSE:
803 /* Set its value only once. */
804 if (common->recursive_head_ptr == 0)
805 {
806 common->recursive_head_ptr = common->ovector_start;
807 common->ovector_start += sizeof(sljit_sw);
808 }
809 cc += 1 + LINK_SIZE;
810 break;
811
812 case OP_CALLOUT:
813 if (common->capture_last_ptr == 0)
814 {
815 common->capture_last_ptr = common->ovector_start;
816 common->ovector_start += sizeof(sljit_sw);
817 }
818 cc += 2 + 2 * LINK_SIZE;
819 break;
820
821 case OP_THEN_ARG:
822 common->has_then = TRUE;
823 common->control_head_ptr = 1;
824 /* Fall through. */
825
826 case OP_PRUNE_ARG:
827 common->needs_start_ptr = TRUE;
828 /* Fall through. */
829
830 case OP_MARK:
831 if (common->mark_ptr == 0)
832 {
833 common->mark_ptr = common->ovector_start;
834 common->ovector_start += sizeof(sljit_sw);
835 }
836 cc += 1 + 2 + cc[1];
837 break;
838
839 case OP_THEN:
840 common->has_then = TRUE;
841 common->control_head_ptr = 1;
842 /* Fall through. */
843
844 case OP_PRUNE:
845 case OP_SKIP:
846 common->needs_start_ptr = TRUE;
847 cc += 1;
848 break;
849
850 case OP_SKIP_ARG:
851 common->control_head_ptr = 1;
852 common->has_skip_arg = TRUE;
853 cc += 1 + 2 + cc[1];
854 break;
855
856 default:
857 cc = next_opcode(common, cc);
858 if (cc == NULL)
859 return FALSE;
860 break;
861 }
862 }
863 return TRUE;
864 }
865
866 static int get_class_iterator_size(pcre_uchar *cc)
867 {
868 switch(*cc)
869 {
870 case OP_CRSTAR:
871 case OP_CRPLUS:
872 return 2;
873
874 case OP_CRMINSTAR:
875 case OP_CRMINPLUS:
876 case OP_CRQUERY:
877 case OP_CRMINQUERY:
878 return 1;
879
880 case OP_CRRANGE:
881 case OP_CRMINRANGE:
882 if (GET2(cc, 1) == GET2(cc, 1 + IMM2_SIZE))
883 return 0;
884 return 2;
885
886 default:
887 return 0;
888 }
889 }
890
891 static BOOL detect_repeat(compiler_common *common, pcre_uchar *begin)
892 {
893 pcre_uchar *end = bracketend(begin);
894 pcre_uchar *next;
895 pcre_uchar *next_end;
896 pcre_uchar *max_end;
897 pcre_uchar type;
898 sljit_sw length = end - begin;
899 int min, max, i;
900
901 /* Detect fixed iterations first. */
902 if (end[-(1 + LINK_SIZE)] != OP_KET)
903 return FALSE;
904
905 /* Already detected repeat. */
906 if (common->private_data_ptrs[end - common->start - LINK_SIZE] != 0)
907 return TRUE;
908
909 next = end;
910 min = 1;
911 while (1)
912 {
913 if (*next != *begin)
914 break;
915 next_end = bracketend(next);
916 if (next_end - next != length || memcmp(begin, next, IN_UCHARS(length)) != 0)
917 break;
918 next = next_end;
919 min++;
920 }
921
922 if (min == 2)
923 return FALSE;
924
925 max = 0;
926 max_end = next;
927 if (*next == OP_BRAZERO || *next == OP_BRAMINZERO)
928 {
929 type = *next;
930 while (1)
931 {
932 if (next[0] != type || next[1] != OP_BRA || next[2 + LINK_SIZE] != *begin)
933 break;
934 next_end = bracketend(next + 2 + LINK_SIZE);
935 if (next_end - next != (length + 2 + LINK_SIZE) || memcmp(begin, next + 2 + LINK_SIZE, IN_UCHARS(length)) != 0)
936 break;
937 next = next_end;
938 max++;
939 }
940
941 if (next[0] == type && next[1] == *begin && max >= 1)
942 {
943 next_end = bracketend(next + 1);
944 if (next_end - next == (length + 1) && memcmp(begin, next + 1, IN_UCHARS(length)) == 0)
945 {
946 for (i = 0; i < max; i++, next_end += 1 + LINK_SIZE)
947 if (*next_end != OP_KET)
948 break;
949
950 if (i == max)
951 {
952 common->private_data_ptrs[max_end - common->start - LINK_SIZE] = next_end - max_end;
953 common->private_data_ptrs[max_end - common->start - LINK_SIZE + 1] = (type == OP_BRAZERO) ? OP_UPTO : OP_MINUPTO;
954 /* +2 the original and the last. */
955 common->private_data_ptrs[max_end - common->start - LINK_SIZE + 2] = max + 2;
956 if (min == 1)
957 return TRUE;
958 min--;
959 max_end -= (1 + LINK_SIZE) + GET(max_end, -LINK_SIZE);
960 }
961 }
962 }
963 }
964
965 if (min >= 3)
966 {
967 common->private_data_ptrs[end - common->start - LINK_SIZE] = max_end - end;
968 common->private_data_ptrs[end - common->start - LINK_SIZE + 1] = OP_EXACT;
969 common->private_data_ptrs[end - common->start - LINK_SIZE + 2] = min;
970 return TRUE;
971 }
972
973 return FALSE;
974 }
975
976 #define CASE_ITERATOR_PRIVATE_DATA_1 \
977 case OP_MINSTAR: \
978 case OP_MINPLUS: \
979 case OP_QUERY: \
980 case OP_MINQUERY: \
981 case OP_MINSTARI: \
982 case OP_MINPLUSI: \
983 case OP_QUERYI: \
984 case OP_MINQUERYI: \
985 case OP_NOTMINSTAR: \
986 case OP_NOTMINPLUS: \
987 case OP_NOTQUERY: \
988 case OP_NOTMINQUERY: \
989 case OP_NOTMINSTARI: \
990 case OP_NOTMINPLUSI: \
991 case OP_NOTQUERYI: \
992 case OP_NOTMINQUERYI:
993
994 #define CASE_ITERATOR_PRIVATE_DATA_2A \
995 case OP_STAR: \
996 case OP_PLUS: \
997 case OP_STARI: \
998 case OP_PLUSI: \
999 case OP_NOTSTAR: \
1000 case OP_NOTPLUS: \
1001 case OP_NOTSTARI: \
1002 case OP_NOTPLUSI:
1003
1004 #define CASE_ITERATOR_PRIVATE_DATA_2B \
1005 case OP_UPTO: \
1006 case OP_MINUPTO: \
1007 case OP_UPTOI: \
1008 case OP_MINUPTOI: \
1009 case OP_NOTUPTO: \
1010 case OP_NOTMINUPTO: \
1011 case OP_NOTUPTOI: \
1012 case OP_NOTMINUPTOI:
1013
1014 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_1 \
1015 case OP_TYPEMINSTAR: \
1016 case OP_TYPEMINPLUS: \
1017 case OP_TYPEQUERY: \
1018 case OP_TYPEMINQUERY:
1019
1020 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2A \
1021 case OP_TYPESTAR: \
1022 case OP_TYPEPLUS:
1023
1024 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2B \
1025 case OP_TYPEUPTO: \
1026 case OP_TYPEMINUPTO:
1027
1028 static void set_private_data_ptrs(compiler_common *common, int *private_data_start, pcre_uchar *ccend)
1029 {
1030 pcre_uchar *cc = common->start;
1031 pcre_uchar *alternative;
1032 pcre_uchar *end = NULL;
1033 int private_data_ptr = *private_data_start;
1034 int space, size, bracketlen;
1035
1036 while (cc < ccend)
1037 {
1038 space = 0;
1039 size = 0;
1040 bracketlen = 0;
1041 if (private_data_ptr > SLJIT_MAX_LOCAL_SIZE)
1042 return;
1043
1044 if (*cc == OP_ONCE || *cc == OP_ONCE_NC || *cc == OP_BRA || *cc == OP_CBRA || *cc == OP_COND)
1045 if (detect_repeat(common, cc))
1046 {
1047 /* These brackets are converted to repeats, so no global
1048 based single character repeat is allowed. */
1049 if (cc >= end)
1050 end = bracketend(cc);
1051 }
1052
1053 switch(*cc)
1054 {
1055 case OP_KET:
1056 if (common->private_data_ptrs[cc + 1 - common->start] != 0)
1057 {
1058 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1059 private_data_ptr += sizeof(sljit_sw);
1060 cc += common->private_data_ptrs[cc + 1 - common->start];
1061 }
1062 cc += 1 + LINK_SIZE;
1063 break;
1064
1065 case OP_ASSERT:
1066 case OP_ASSERT_NOT:
1067 case OP_ASSERTBACK:
1068 case OP_ASSERTBACK_NOT:
1069 case OP_ONCE:
1070 case OP_ONCE_NC:
1071 case OP_BRAPOS:
1072 case OP_SBRA:
1073 case OP_SBRAPOS:
1074 case OP_SCOND:
1075 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1076 private_data_ptr += sizeof(sljit_sw);
1077 bracketlen = 1 + LINK_SIZE;
1078 break;
1079
1080 case OP_CBRAPOS:
1081 case OP_SCBRAPOS:
1082 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1083 private_data_ptr += sizeof(sljit_sw);
1084 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1085 break;
1086
1087 case OP_COND:
1088 /* Might be a hidden SCOND. */
1089 alternative = cc + GET(cc, 1);
1090 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1091 {
1092 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1093 private_data_ptr += sizeof(sljit_sw);
1094 }
1095 bracketlen = 1 + LINK_SIZE;
1096 break;
1097
1098 case OP_BRA:
1099 bracketlen = 1 + LINK_SIZE;
1100 break;
1101
1102 case OP_CBRA:
1103 case OP_SCBRA:
1104 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1105 break;
1106
1107 CASE_ITERATOR_PRIVATE_DATA_1
1108 space = 1;
1109 size = -2;
1110 break;
1111
1112 CASE_ITERATOR_PRIVATE_DATA_2A
1113 space = 2;
1114 size = -2;
1115 break;
1116
1117 CASE_ITERATOR_PRIVATE_DATA_2B
1118 space = 2;
1119 size = -(2 + IMM2_SIZE);
1120 break;
1121
1122 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1123 space = 1;
1124 size = 1;
1125 break;
1126
1127 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1128 if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
1129 space = 2;
1130 size = 1;
1131 break;
1132
1133 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1134 if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
1135 space = 2;
1136 size = 1 + IMM2_SIZE;
1137 break;
1138
1139 case OP_CLASS:
1140 case OP_NCLASS:
1141 size += 1 + 32 / sizeof(pcre_uchar);
1142 space = get_class_iterator_size(cc + size);
1143 break;
1144
1145 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1146 case OP_XCLASS:
1147 size = GET(cc, 1);
1148 space = get_class_iterator_size(cc + size);
1149 break;
1150 #endif
1151
1152 default:
1153 cc = next_opcode(common, cc);
1154 SLJIT_ASSERT(cc != NULL);
1155 break;
1156 }
1157
1158 /* Character iterators, which are not inside a repeated bracket,
1159 gets a private slot instead of allocating it on the stack. */
1160 if (space > 0 && cc >= end)
1161 {
1162 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1163 private_data_ptr += sizeof(sljit_sw) * space;
1164 }
1165
1166 if (size != 0)
1167 {
1168 if (size < 0)
1169 {
1170 cc += -size;
1171 #ifdef SUPPORT_UTF
1172 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1173 #endif
1174 }
1175 else
1176 cc += size;
1177 }
1178
1179 if (bracketlen > 0)
1180 {
1181 if (cc >= end)
1182 {
1183 end = bracketend(cc);
1184 if (end[-1 - LINK_SIZE] == OP_KET)
1185 end = NULL;
1186 }
1187 cc += bracketlen;
1188 }
1189 }
1190 *private_data_start = private_data_ptr;
1191 }
1192
1193 /* Returns with a frame_types (always < 0) if no need for frame. */
1194 static int get_framesize(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL recursive, BOOL* needs_control_head)
1195 {
1196 int length = 0;
1197 int possessive = 0;
1198 BOOL stack_restore = FALSE;
1199 BOOL setsom_found = recursive;
1200 BOOL setmark_found = recursive;
1201 /* The last capture is a local variable even for recursions. */
1202 BOOL capture_last_found = FALSE;
1203
1204 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
1205 SLJIT_ASSERT(common->control_head_ptr != 0);
1206 *needs_control_head = TRUE;
1207 #else
1208 *needs_control_head = FALSE;
1209 #endif
1210
1211 if (ccend == NULL)
1212 {
1213 ccend = bracketend(cc) - (1 + LINK_SIZE);
1214 if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))
1215 {
1216 possessive = length = (common->capture_last_ptr != 0) ? 5 : 3;
1217 /* This is correct regardless of common->capture_last_ptr. */
1218 capture_last_found = TRUE;
1219 }
1220 cc = next_opcode(common, cc);
1221 }
1222
1223 SLJIT_ASSERT(cc != NULL);
1224 while (cc < ccend)
1225 switch(*cc)
1226 {
1227 case OP_SET_SOM:
1228 SLJIT_ASSERT(common->has_set_som);
1229 stack_restore = TRUE;
1230 if (!setsom_found)
1231 {
1232 length += 2;
1233 setsom_found = TRUE;
1234 }
1235 cc += 1;
1236 break;
1237
1238 case OP_MARK:
1239 case OP_PRUNE_ARG:
1240 case OP_THEN_ARG:
1241 SLJIT_ASSERT(common->mark_ptr != 0);
1242 stack_restore = TRUE;
1243 if (!setmark_found)
1244 {
1245 length += 2;
1246 setmark_found = TRUE;
1247 }
1248 if (common->control_head_ptr != 0)
1249 *needs_control_head = TRUE;
1250 cc += 1 + 2 + cc[1];
1251 break;
1252
1253 case OP_RECURSE:
1254 stack_restore = TRUE;
1255 if (common->has_set_som && !setsom_found)
1256 {
1257 length += 2;
1258 setsom_found = TRUE;
1259 }
1260 if (common->mark_ptr != 0 && !setmark_found)
1261 {
1262 length += 2;
1263 setmark_found = TRUE;
1264 }
1265 if (common->capture_last_ptr != 0 && !capture_last_found)
1266 {
1267 length += 2;
1268 capture_last_found = TRUE;
1269 }
1270 cc += 1 + LINK_SIZE;
1271 break;
1272
1273 case OP_CBRA:
1274 case OP_CBRAPOS:
1275 case OP_SCBRA:
1276 case OP_SCBRAPOS:
1277 stack_restore = TRUE;
1278 if (common->capture_last_ptr != 0 && !capture_last_found)
1279 {
1280 length += 2;
1281 capture_last_found = TRUE;
1282 }
1283 length += 3;
1284 cc += 1 + LINK_SIZE + IMM2_SIZE;
1285 break;
1286
1287 default:
1288 stack_restore = TRUE;
1289 /* Fall through. */
1290
1291 case OP_NOT_WORD_BOUNDARY:
1292 case OP_WORD_BOUNDARY:
1293 case OP_NOT_DIGIT:
1294 case OP_DIGIT:
1295 case OP_NOT_WHITESPACE:
1296 case OP_WHITESPACE:
1297 case OP_NOT_WORDCHAR:
1298 case OP_WORDCHAR:
1299 case OP_ANY:
1300 case OP_ALLANY:
1301 case OP_ANYBYTE:
1302 case OP_NOTPROP:
1303 case OP_PROP:
1304 case OP_ANYNL:
1305 case OP_NOT_HSPACE:
1306 case OP_HSPACE:
1307 case OP_NOT_VSPACE:
1308 case OP_VSPACE:
1309 case OP_EXTUNI:
1310 case OP_EODN:
1311 case OP_EOD:
1312 case OP_CIRC:
1313 case OP_CIRCM:
1314 case OP_DOLL:
1315 case OP_DOLLM:
1316 case OP_CHAR:
1317 case OP_CHARI:
1318 case OP_NOT:
1319 case OP_NOTI:
1320
1321 case OP_EXACT:
1322 case OP_POSSTAR:
1323 case OP_POSPLUS:
1324 case OP_POSQUERY:
1325 case OP_POSUPTO:
1326
1327 case OP_EXACTI:
1328 case OP_POSSTARI:
1329 case OP_POSPLUSI:
1330 case OP_POSQUERYI:
1331 case OP_POSUPTOI:
1332
1333 case OP_NOTEXACT:
1334 case OP_NOTPOSSTAR:
1335 case OP_NOTPOSPLUS:
1336 case OP_NOTPOSQUERY:
1337 case OP_NOTPOSUPTO:
1338
1339 case OP_NOTEXACTI:
1340 case OP_NOTPOSSTARI:
1341 case OP_NOTPOSPLUSI:
1342 case OP_NOTPOSQUERYI:
1343 case OP_NOTPOSUPTOI:
1344
1345 case OP_TYPEEXACT:
1346 case OP_TYPEPOSSTAR:
1347 case OP_TYPEPOSPLUS:
1348 case OP_TYPEPOSQUERY:
1349 case OP_TYPEPOSUPTO:
1350
1351 case OP_CLASS:
1352 case OP_NCLASS:
1353 case OP_XCLASS:
1354
1355 cc = next_opcode(common, cc);
1356 SLJIT_ASSERT(cc != NULL);
1357 break;
1358 }
1359
1360 /* Possessive quantifiers can use a special case. */
1361 if (SLJIT_UNLIKELY(possessive == length))
1362 return stack_restore ? no_frame : no_stack;
1363
1364 if (length > 0)
1365 return length + 1;
1366 return stack_restore ? no_frame : no_stack;
1367 }
1368
1369 static void init_frame(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, int stackpos, int stacktop, BOOL recursive)
1370 {
1371 DEFINE_COMPILER;
1372 BOOL setsom_found = recursive;
1373 BOOL setmark_found = recursive;
1374 /* The last capture is a local variable even for recursions. */
1375 BOOL capture_last_found = FALSE;
1376 int offset;
1377
1378 /* >= 1 + shortest item size (2) */
1379 SLJIT_UNUSED_ARG(stacktop);
1380 SLJIT_ASSERT(stackpos >= stacktop + 2);
1381
1382 stackpos = STACK(stackpos);
1383 if (ccend == NULL)
1384 {
1385 ccend = bracketend(cc) - (1 + LINK_SIZE);
1386 if (recursive || (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS))
1387 cc = next_opcode(common, cc);
1388 }
1389
1390 SLJIT_ASSERT(cc != NULL);
1391 while (cc < ccend)
1392 switch(*cc)
1393 {
1394 case OP_SET_SOM:
1395 SLJIT_ASSERT(common->has_set_som);
1396 if (!setsom_found)
1397 {
1398 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
1399 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1400 stackpos += (int)sizeof(sljit_sw);
1401 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1402 stackpos += (int)sizeof(sljit_sw);
1403 setsom_found = TRUE;
1404 }
1405 cc += 1;
1406 break;
1407
1408 case OP_MARK:
1409 case OP_PRUNE_ARG:
1410 case OP_THEN_ARG:
1411 SLJIT_ASSERT(common->mark_ptr != 0);
1412 if (!setmark_found)
1413 {
1414 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
1415 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1416 stackpos += (int)sizeof(sljit_sw);
1417 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1418 stackpos += (int)sizeof(sljit_sw);
1419 setmark_found = TRUE;
1420 }
1421 cc += 1 + 2 + cc[1];
1422 break;
1423
1424 case OP_RECURSE:
1425 if (common->has_set_som && !setsom_found)
1426 {
1427 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
1428 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1429 stackpos += (int)sizeof(sljit_sw);
1430 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1431 stackpos += (int)sizeof(sljit_sw);
1432 setsom_found = TRUE;
1433 }
1434 if (common->mark_ptr != 0 && !setmark_found)
1435 {
1436 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
1437 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1438 stackpos += (int)sizeof(sljit_sw);
1439 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1440 stackpos += (int)sizeof(sljit_sw);
1441 setmark_found = TRUE;
1442 }
1443 if (common->capture_last_ptr != 0 && !capture_last_found)
1444 {
1445 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
1446 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1447 stackpos += (int)sizeof(sljit_sw);
1448 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1449 stackpos += (int)sizeof(sljit_sw);
1450 capture_last_found = TRUE;
1451 }
1452 cc += 1 + LINK_SIZE;
1453 break;
1454
1455 case OP_CBRA:
1456 case OP_CBRAPOS:
1457 case OP_SCBRA:
1458 case OP_SCBRAPOS:
1459 if (common->capture_last_ptr != 0 && !capture_last_found)
1460 {
1461 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
1462 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1463 stackpos += (int)sizeof(sljit_sw);
1464 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1465 stackpos += (int)sizeof(sljit_sw);
1466 capture_last_found = TRUE;
1467 }
1468 offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
1469 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));
1470 stackpos += (int)sizeof(sljit_sw);
1471 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
1472 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
1473 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1474 stackpos += (int)sizeof(sljit_sw);
1475 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
1476 stackpos += (int)sizeof(sljit_sw);
1477
1478 cc += 1 + LINK_SIZE + IMM2_SIZE;
1479 break;
1480
1481 default:
1482 cc = next_opcode(common, cc);
1483 SLJIT_ASSERT(cc != NULL);
1484 break;
1485 }
1486
1487 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, 0);
1488 SLJIT_ASSERT(stackpos == STACK(stacktop));
1489 }
1490
1491 static SLJIT_INLINE int get_private_data_copy_length(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL needs_control_head)
1492 {
1493 int private_data_length = needs_control_head ? 3 : 2;
1494 int size;
1495 pcre_uchar *alternative;
1496 /* Calculate the sum of the private machine words. */
1497 while (cc < ccend)
1498 {
1499 size = 0;
1500 switch(*cc)
1501 {
1502 case OP_KET:
1503 if (PRIVATE_DATA(cc) != 0)
1504 private_data_length++;
1505 cc += 1 + LINK_SIZE;
1506 break;
1507
1508 case OP_ASSERT:
1509 case OP_ASSERT_NOT:
1510 case OP_ASSERTBACK:
1511 case OP_ASSERTBACK_NOT:
1512 case OP_ONCE:
1513 case OP_ONCE_NC:
1514 case OP_BRAPOS:
1515 case OP_SBRA:
1516 case OP_SBRAPOS:
1517 case OP_SCOND:
1518 private_data_length++;
1519 cc += 1 + LINK_SIZE;
1520 break;
1521
1522 case OP_CBRA:
1523 case OP_SCBRA:
1524 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1525 private_data_length++;
1526 cc += 1 + LINK_SIZE + IMM2_SIZE;
1527 break;
1528
1529 case OP_CBRAPOS:
1530 case OP_SCBRAPOS:
1531 private_data_length += 2;
1532 cc += 1 + LINK_SIZE + IMM2_SIZE;
1533 break;
1534
1535 case OP_COND:
1536 /* Might be a hidden SCOND. */
1537 alternative = cc + GET(cc, 1);
1538 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1539 private_data_length++;
1540 cc += 1 + LINK_SIZE;
1541 break;
1542
1543 CASE_ITERATOR_PRIVATE_DATA_1
1544 if (PRIVATE_DATA(cc))
1545 private_data_length++;
1546 cc += 2;
1547 #ifdef SUPPORT_UTF
1548 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1549 #endif
1550 break;
1551
1552 CASE_ITERATOR_PRIVATE_DATA_2A
1553 if (PRIVATE_DATA(cc))
1554 private_data_length += 2;
1555 cc += 2;
1556 #ifdef SUPPORT_UTF
1557 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1558 #endif
1559 break;
1560
1561 CASE_ITERATOR_PRIVATE_DATA_2B
1562 if (PRIVATE_DATA(cc))
1563 private_data_length += 2;
1564 cc += 2 + IMM2_SIZE;
1565 #ifdef SUPPORT_UTF
1566 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1567 #endif
1568 break;
1569
1570 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1571 if (PRIVATE_DATA(cc))
1572 private_data_length++;
1573 cc += 1;
1574 break;
1575
1576 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1577 if (PRIVATE_DATA(cc))
1578 private_data_length += 2;
1579 cc += 1;
1580 break;
1581
1582 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1583 if (PRIVATE_DATA(cc))
1584 private_data_length += 2;
1585 cc += 1 + IMM2_SIZE;
1586 break;
1587
1588 case OP_CLASS:
1589 case OP_NCLASS:
1590 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1591 case OP_XCLASS:
1592 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1593 #else
1594 size = 1 + 32 / (int)sizeof(pcre_uchar);
1595 #endif
1596 if (PRIVATE_DATA(cc))
1597 private_data_length += get_class_iterator_size(cc + size);
1598 cc += size;
1599 break;
1600
1601 default:
1602 cc = next_opcode(common, cc);
1603 SLJIT_ASSERT(cc != NULL);
1604 break;
1605 }
1606 }
1607 SLJIT_ASSERT(cc == ccend);
1608 return private_data_length;
1609 }
1610
1611 static void copy_private_data(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend,
1612 BOOL save, int stackptr, int stacktop, BOOL needs_control_head)
1613 {
1614 DEFINE_COMPILER;
1615 int srcw[2];
1616 int count, size;
1617 BOOL tmp1next = TRUE;
1618 BOOL tmp1empty = TRUE;
1619 BOOL tmp2empty = TRUE;
1620 pcre_uchar *alternative;
1621 enum {
1622 start,
1623 loop,
1624 end
1625 } status;
1626
1627 status = save ? start : loop;
1628 stackptr = STACK(stackptr - 2);
1629 stacktop = STACK(stacktop - 1);
1630
1631 if (!save)
1632 {
1633 stackptr += (needs_control_head ? 2 : 1) * sizeof(sljit_sw);
1634 if (stackptr < stacktop)
1635 {
1636 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1637 stackptr += sizeof(sljit_sw);
1638 tmp1empty = FALSE;
1639 }
1640 if (stackptr < stacktop)
1641 {
1642 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1643 stackptr += sizeof(sljit_sw);
1644 tmp2empty = FALSE;
1645 }
1646 /* The tmp1next must be TRUE in either way. */
1647 }
1648
1649 do
1650 {
1651 count = 0;
1652 switch(status)
1653 {
1654 case start:
1655 SLJIT_ASSERT(save && common->recursive_head_ptr != 0);
1656 count = 1;
1657 srcw[0] = common->recursive_head_ptr;
1658 if (needs_control_head)
1659 {
1660 SLJIT_ASSERT(common->control_head_ptr != 0);
1661 count = 2;
1662 srcw[1] = common->control_head_ptr;
1663 }
1664 status = loop;
1665 break;
1666
1667 case loop:
1668 if (cc >= ccend)
1669 {
1670 status = end;
1671 break;
1672 }
1673
1674 switch(*cc)
1675 {
1676 case OP_KET:
1677 if (PRIVATE_DATA(cc) != 0)
1678 {
1679 count = 1;
1680 srcw[0] = PRIVATE_DATA(cc);
1681 }
1682 cc += 1 + LINK_SIZE;
1683 break;
1684
1685 case OP_ASSERT:
1686 case OP_ASSERT_NOT:
1687 case OP_ASSERTBACK:
1688 case OP_ASSERTBACK_NOT:
1689 case OP_ONCE:
1690 case OP_ONCE_NC:
1691 case OP_BRAPOS:
1692 case OP_SBRA:
1693 case OP_SBRAPOS:
1694 case OP_SCOND:
1695 count = 1;
1696 srcw[0] = PRIVATE_DATA(cc);
1697 SLJIT_ASSERT(srcw[0] != 0);
1698 cc += 1 + LINK_SIZE;
1699 break;
1700
1701 case OP_CBRA:
1702 case OP_SCBRA:
1703 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1704 {
1705 count = 1;
1706 srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1707 }
1708 cc += 1 + LINK_SIZE + IMM2_SIZE;
1709 break;
1710
1711 case OP_CBRAPOS:
1712 case OP_SCBRAPOS:
1713 count = 2;
1714 srcw[0] = PRIVATE_DATA(cc);
1715 srcw[1] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1716 SLJIT_ASSERT(srcw[0] != 0 && srcw[1] != 0);
1717 cc += 1 + LINK_SIZE + IMM2_SIZE;
1718 break;
1719
1720 case OP_COND:
1721 /* Might be a hidden SCOND. */
1722 alternative = cc + GET(cc, 1);
1723 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1724 {
1725 count = 1;
1726 srcw[0] = PRIVATE_DATA(cc);
1727 SLJIT_ASSERT(srcw[0] != 0);
1728 }
1729 cc += 1 + LINK_SIZE;
1730 break;
1731
1732 CASE_ITERATOR_PRIVATE_DATA_1
1733 if (PRIVATE_DATA(cc))
1734 {
1735 count = 1;
1736 srcw[0] = PRIVATE_DATA(cc);
1737 }
1738 cc += 2;
1739 #ifdef SUPPORT_UTF
1740 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1741 #endif
1742 break;
1743
1744 CASE_ITERATOR_PRIVATE_DATA_2A
1745 if (PRIVATE_DATA(cc))
1746 {
1747 count = 2;
1748 srcw[0] = PRIVATE_DATA(cc);
1749 srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
1750 }
1751 cc += 2;
1752 #ifdef SUPPORT_UTF
1753 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1754 #endif
1755 break;
1756
1757 CASE_ITERATOR_PRIVATE_DATA_2B
1758 if (PRIVATE_DATA(cc))
1759 {
1760 count = 2;
1761 srcw[0] = PRIVATE_DATA(cc);
1762 srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
1763 }
1764 cc += 2 + IMM2_SIZE;
1765 #ifdef SUPPORT_UTF
1766 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1767 #endif
1768 break;
1769
1770 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1771 if (PRIVATE_DATA(cc))
1772 {
1773 count = 1;
1774 srcw[0] = PRIVATE_DATA(cc);
1775 }
1776 cc += 1;
1777 break;
1778
1779 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1780 if (PRIVATE_DATA(cc))
1781 {
1782 count = 2;
1783 srcw[0] = PRIVATE_DATA(cc);
1784 srcw[1] = srcw[0] + sizeof(sljit_sw);
1785 }
1786 cc += 1;
1787 break;
1788
1789 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1790 if (PRIVATE_DATA(cc))
1791 {
1792 count = 2;
1793 srcw[0] = PRIVATE_DATA(cc);
1794 srcw[1] = srcw[0] + sizeof(sljit_sw);
1795 }
1796 cc += 1 + IMM2_SIZE;
1797 break;
1798
1799 case OP_CLASS:
1800 case OP_NCLASS:
1801 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1802 case OP_XCLASS:
1803 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1804 #else
1805 size = 1 + 32 / (int)sizeof(pcre_uchar);
1806 #endif
1807 if (PRIVATE_DATA(cc))
1808 switch(get_class_iterator_size(cc + size))
1809 {
1810 case 1:
1811 count = 1;
1812 srcw[0] = PRIVATE_DATA(cc);
1813 break;
1814
1815 case 2:
1816 count = 2;
1817 srcw[0] = PRIVATE_DATA(cc);
1818 srcw[1] = srcw[0] + sizeof(sljit_sw);
1819 break;
1820
1821 default:
1822 SLJIT_ASSERT_STOP();
1823 break;
1824 }
1825 cc += size;
1826 break;
1827
1828 default:
1829 cc = next_opcode(common, cc);
1830 SLJIT_ASSERT(cc != NULL);
1831 break;
1832 }
1833 break;
1834
1835 case end:
1836 SLJIT_ASSERT_STOP();
1837 break;
1838 }
1839
1840 while (count > 0)
1841 {
1842 count--;
1843 if (save)
1844 {
1845 if (tmp1next)
1846 {
1847 if (!tmp1empty)
1848 {
1849 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1850 stackptr += sizeof(sljit_sw);
1851 }
1852 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);
1853 tmp1empty = FALSE;
1854 tmp1next = FALSE;
1855 }
1856 else
1857 {
1858 if (!tmp2empty)
1859 {
1860 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1861 stackptr += sizeof(sljit_sw);
1862 }
1863 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);
1864 tmp2empty = FALSE;
1865 tmp1next = TRUE;
1866 }
1867 }
1868 else
1869 {
1870 if (tmp1next)
1871 {
1872 SLJIT_ASSERT(!tmp1empty);
1873 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP1, 0);
1874 tmp1empty = stackptr >= stacktop;
1875 if (!tmp1empty)
1876 {
1877 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1878 stackptr += sizeof(sljit_sw);
1879 }
1880 tmp1next = FALSE;
1881 }
1882 else
1883 {
1884 SLJIT_ASSERT(!tmp2empty);
1885 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP2, 0);
1886 tmp2empty = stackptr >= stacktop;
1887 if (!tmp2empty)
1888 {
1889 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1890 stackptr += sizeof(sljit_sw);
1891 }
1892 tmp1next = TRUE;
1893 }
1894 }
1895 }
1896 }
1897 while (status != end);
1898
1899 if (save)
1900 {
1901 if (tmp1next)
1902 {
1903 if (!tmp1empty)
1904 {
1905 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1906 stackptr += sizeof(sljit_sw);
1907 }
1908 if (!tmp2empty)
1909 {
1910 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1911 stackptr += sizeof(sljit_sw);
1912 }
1913 }
1914 else
1915 {
1916 if (!tmp2empty)
1917 {
1918 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1919 stackptr += sizeof(sljit_sw);
1920 }
1921 if (!tmp1empty)
1922 {
1923 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1924 stackptr += sizeof(sljit_sw);
1925 }
1926 }
1927 }
1928 SLJIT_ASSERT(cc == ccend && stackptr == stacktop && (save || (tmp1empty && tmp2empty)));
1929 }
1930
1931 static SLJIT_INLINE pcre_uchar *set_then_offsets(compiler_common *common, pcre_uchar *cc, pcre_uint8 *current_offset)
1932 {
1933 pcre_uchar *end = bracketend(cc);
1934 BOOL has_alternatives = cc[GET(cc, 1)] == OP_ALT;
1935
1936 /* Assert captures then. */
1937 if (*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT)
1938 current_offset = NULL;
1939 /* Conditional block does not. */
1940 if (*cc == OP_COND || *cc == OP_SCOND)
1941 has_alternatives = FALSE;
1942
1943 cc = next_opcode(common, cc);
1944 if (has_alternatives)
1945 current_offset = common->then_offsets + (cc - common->start);
1946
1947 while (cc < end)
1948 {
1949 if ((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND))
1950 cc = set_then_offsets(common, cc, current_offset);
1951 else
1952 {
1953 if (*cc == OP_ALT && has_alternatives)
1954 current_offset = common->then_offsets + (cc + 1 + LINK_SIZE - common->start);
1955 if (*cc >= OP_THEN && *cc <= OP_THEN_ARG && current_offset != NULL)
1956 *current_offset = 1;
1957 cc = next_opcode(common, cc);
1958 }
1959 }
1960
1961 return end;
1962 }
1963
1964 #undef CASE_ITERATOR_PRIVATE_DATA_1
1965 #undef CASE_ITERATOR_PRIVATE_DATA_2A
1966 #undef CASE_ITERATOR_PRIVATE_DATA_2B
1967 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1968 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1969 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1970
1971 static SLJIT_INLINE BOOL is_powerof2(unsigned int value)
1972 {
1973 return (value & (value - 1)) == 0;
1974 }
1975
1976 static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label)
1977 {
1978 while (list)
1979 {
1980 /* sljit_set_label is clever enough to do nothing
1981 if either the jump or the label is NULL. */
1982 SET_LABEL(list->jump, label);
1983 list = list->next;
1984 }
1985 }
1986
1987 static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump* jump)
1988 {
1989 jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list));
1990 if (list_item)
1991 {
1992 list_item->next = *list;
1993 list_item->jump = jump;
1994 *list = list_item;
1995 }
1996 }
1997
1998 static void add_stub(compiler_common *common, struct sljit_jump *start)
1999 {
2000 DEFINE_COMPILER;
2001 stub_list* list_item = sljit_alloc_memory(compiler, sizeof(stub_list));
2002
2003 if (list_item)
2004 {
2005 list_item->start = start;
2006 list_item->quit = LABEL();
2007 list_item->next = common->stubs;
2008 common->stubs = list_item;
2009 }
2010 }
2011
2012 static void flush_stubs(compiler_common *common)
2013 {
2014 DEFINE_COMPILER;
2015 stub_list* list_item = common->stubs;
2016
2017 while (list_item)
2018 {
2019 JUMPHERE(list_item->start);
2020 add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));
2021 JUMPTO(SLJIT_JUMP, list_item->quit);
2022 list_item = list_item->next;
2023 }
2024 common->stubs = NULL;
2025 }
2026
2027 static SLJIT_INLINE void count_match(compiler_common *common)
2028 {
2029 DEFINE_COMPILER;
2030
2031 OP2(SLJIT_SUB | SLJIT_SET_E, COUNT_MATCH, 0, COUNT_MATCH, 0, SLJIT_IMM, 1);
2032 add_jump(compiler, &common->calllimit, JUMP(SLJIT_C_ZERO));
2033 }
2034
2035 static SLJIT_INLINE void allocate_stack(compiler_common *common, int size)
2036 {
2037 /* May destroy all locals and registers except TMP2. */
2038 DEFINE_COMPILER;
2039
2040 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
2041 #ifdef DESTROY_REGISTERS
2042 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);
2043 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
2044 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
2045 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, TMP1, 0);
2046 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);
2047 #endif
2048 add_stub(common, CMP(SLJIT_C_GREATER, STACK_TOP, 0, STACK_LIMIT, 0));
2049 }
2050
2051 static SLJIT_INLINE void free_stack(compiler_common *common, int size)
2052 {
2053 DEFINE_COMPILER;
2054 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
2055 }
2056
2057 static SLJIT_INLINE void reset_ovector(compiler_common *common, int length)
2058 {
2059 DEFINE_COMPILER;
2060 struct sljit_label *loop;
2061 int i;
2062
2063 /* At this point we can freely use all temporary registers. */
2064 SLJIT_ASSERT(length > 1);
2065 /* TMP1 returns with begin - 1. */
2066 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG1, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));
2067 if (length < 8)
2068 {
2069 for (i = 1; i < length; i++)
2070 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(i), SLJIT_SCRATCH_REG1, 0);
2071 }
2072 else
2073 {
2074 GET_LOCAL_BASE(SLJIT_SCRATCH_REG2, 0, OVECTOR_START);
2075 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, length - 1);
2076 loop = LABEL();
2077 OP1(SLJIT_MOVU, SLJIT_MEM1(SLJIT_SCRATCH_REG2), sizeof(sljit_sw), SLJIT_SCRATCH_REG1, 0);
2078 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 1);
2079 JUMPTO(SLJIT_C_NOT_ZERO, loop);
2080 }
2081 }
2082
2083 static SLJIT_INLINE void do_reset_match(compiler_common *common, int length)
2084 {
2085 DEFINE_COMPILER;
2086 struct sljit_label *loop;
2087 int i;
2088
2089 SLJIT_ASSERT(length > 1);
2090 /* OVECTOR(1) contains the "string begin - 1" constant. */
2091 if (length > 2)
2092 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
2093 if (length < 8)
2094 {
2095 for (i = 2; i < length; i++)
2096 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(i), TMP1, 0);
2097 }
2098 else
2099 {
2100 GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + sizeof(sljit_sw));
2101 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
2102 loop = LABEL();
2103 OP1(SLJIT_MOVU, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
2104 OP2(SLJIT_SUB | SLJIT_SET_E, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
2105 JUMPTO(SLJIT_C_NOT_ZERO, loop);
2106 }
2107
2108 OP1(SLJIT_MOV, STACK_TOP, 0, ARGUMENTS, 0);
2109 if (common->mark_ptr != 0)
2110 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr, SLJIT_IMM, 0);
2111 if (common->control_head_ptr != 0)
2112 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_IMM, 0);
2113 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(jit_arguments, stack));
2114 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_ptr);
2115 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(struct sljit_stack, base));
2116 }
2117
2118 static sljit_sw SLJIT_CALL do_search_mark(sljit_sw *current, const pcre_uchar *skip_arg)
2119 {
2120 while (current != NULL)
2121 {
2122 switch (current[-2])
2123 {
2124 case type_then_trap:
2125 break;
2126
2127 case type_mark:
2128 if (STRCMP_UC_UC(skip_arg, (pcre_uchar *)current[-3]) == 0)
2129 return current[-4];
2130 break;
2131
2132 default:
2133 SLJIT_ASSERT_STOP();
2134 break;
2135 }
2136 current = (sljit_sw*)current[-1];
2137 }
2138 return -1;
2139 }
2140
2141 static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)
2142 {
2143 DEFINE_COMPILER;
2144 struct sljit_label *loop;
2145 struct sljit_jump *early_quit;
2146
2147 /* At this point we can freely use all registers. */
2148 OP1(SLJIT_MOV, SLJIT_SAVED_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
2149 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1), STR_PTR, 0);
2150
2151 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, ARGUMENTS, 0);
2152 if (common->mark_ptr != 0)
2153 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
2154 OP1(SLJIT_MOV_SI, SLJIT_SCRATCH_REG2, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, offset_count));
2155 if (common->mark_ptr != 0)
2156 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_SCRATCH_REG3, 0);
2157 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, offsets), SLJIT_IMM, sizeof(int));
2158 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, begin));
2159 GET_LOCAL_BASE(SLJIT_SAVED_REG1, 0, OVECTOR_START);
2160 /* Unlikely, but possible */
2161 early_quit = CMP(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, 0);
2162 loop = LABEL();
2163 OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), 0, SLJIT_SCRATCH_REG1, 0);
2164 OP2(SLJIT_ADD, SLJIT_SAVED_REG1, 0, SLJIT_SAVED_REG1, 0, SLJIT_IMM, sizeof(sljit_sw));
2165 /* Copy the integer value to the output buffer */
2166 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2167 OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, UCHAR_SHIFT);
2168 #endif
2169 OP1(SLJIT_MOVU_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG3), sizeof(int), SLJIT_SAVED_REG2, 0);
2170 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_SCRATCH_REG2, 0, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, 1);
2171 JUMPTO(SLJIT_C_NOT_ZERO, loop);
2172 JUMPHERE(early_quit);
2173
2174 /* Calculate the return value, which is the maximum ovector value. */
2175 if (topbracket > 1)
2176 {
2177 GET_LOCAL_BASE(SLJIT_SCRATCH_REG1, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_sw));
2178 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, topbracket + 1);
2179
2180 /* OVECTOR(0) is never equal to SLJIT_SAVED_REG3. */
2181 loop = LABEL();
2182 OP1(SLJIT_MOVU, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), -(2 * (sljit_sw)sizeof(sljit_sw)));
2183 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG2, 0, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, 1);
2184 CMPTO(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG3, 0, SLJIT_SAVED_REG3, 0, loop);
2185 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_SCRATCH_REG2, 0);
2186 }
2187 else
2188 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
2189 }
2190
2191 static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *quit)
2192 {
2193 DEFINE_COMPILER;
2194 struct sljit_jump *jump;
2195
2196 SLJIT_COMPILE_ASSERT(STR_END == SLJIT_SAVED_REG2, str_end_must_be_saved_reg2);
2197 SLJIT_ASSERT(common->start_used_ptr != 0 && common->start_ptr != 0
2198 && (common->mode == JIT_PARTIAL_SOFT_COMPILE ? common->hit_start != 0 : common->hit_start == 0));
2199
2200 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, ARGUMENTS, 0);
2201 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_PARTIAL);
2202 OP1(SLJIT_MOV_SI, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, real_offset_count));
2203 CMPTO(SLJIT_C_SIG_LESS, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 2, quit);
2204
2205 /* Store match begin and end. */
2206 OP1(SLJIT_MOV, SLJIT_SAVED_REG1, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, begin));
2207 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, offsets));
2208
2209 jump = CMP(SLJIT_C_SIG_LESS, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 3);
2210 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_ptr : (common->hit_start + (int)sizeof(sljit_sw)), SLJIT_SAVED_REG1, 0);
2211 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2212 OP2(SLJIT_ASHR, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, UCHAR_SHIFT);
2213 #endif
2214 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG2), 2 * sizeof(int), SLJIT_SCRATCH_REG3, 0);
2215 JUMPHERE(jump);
2216
2217 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_used_ptr : common->hit_start);
2218 OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, STR_END, 0, SLJIT_SAVED_REG1, 0);
2219 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2220 OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, UCHAR_SHIFT);
2221 #endif
2222 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG2), sizeof(int), SLJIT_SAVED_REG2, 0);
2223
2224 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_SAVED_REG1, 0);
2225 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2226 OP2(SLJIT_ASHR, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, UCHAR_SHIFT);
2227 #endif
2228 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG2), 0, SLJIT_SCRATCH_REG3, 0);
2229
2230 JUMPTO(SLJIT_JUMP, quit);
2231 }
2232
2233 static SLJIT_INLINE void check_start_used_ptr(compiler_common *common)
2234 {
2235 /* May destroy TMP1. */
2236 DEFINE_COMPILER;
2237 struct sljit_jump *jump;
2238
2239 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2240 {
2241 /* The value of -1 must be kept for start_used_ptr! */
2242 OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, 1);
2243 /* Jumps if start_used_ptr < STR_PTR, or start_used_ptr == -1. Although overwriting
2244 is not necessary if start_used_ptr == STR_PTR, it does not hurt as well. */
2245 jump = CMP(SLJIT_C_LESS_EQUAL, TMP1, 0, STR_PTR, 0);
2246 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2247 JUMPHERE(jump);
2248 }
2249 else if (common->mode == JIT_PARTIAL_HARD_COMPILE)
2250 {
2251 jump = CMP(SLJIT_C_LESS_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2252 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2253 JUMPHERE(jump);
2254 }
2255 }
2256
2257 static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, pcre_uchar* cc)
2258 {
2259 /* Detects if the character has an othercase. */
2260 unsigned int c;
2261
2262 #ifdef SUPPORT_UTF
2263 if (common->utf)
2264 {
2265 GETCHAR(c, cc);
2266 if (c > 127)
2267 {
2268 #ifdef SUPPORT_UCP
2269 return c != UCD_OTHERCASE(c);
2270 #else
2271 return FALSE;
2272 #endif
2273 }
2274 #ifndef COMPILE_PCRE8
2275 return common->fcc[c] != c;
2276 #endif
2277 }
2278 else
2279 #endif
2280 c = *cc;
2281 return MAX_255(c) ? common->fcc[c] != c : FALSE;
2282 }
2283
2284 static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)
2285 {
2286 /* Returns with the othercase. */
2287 #ifdef SUPPORT_UTF
2288 if (common->utf && c > 127)
2289 {
2290 #ifdef SUPPORT_UCP
2291 return UCD_OTHERCASE(c);
2292 #else
2293 return c;
2294 #endif
2295 }
2296 #endif
2297 return TABLE_GET(c, common->fcc, c);
2298 }
2299
2300 static unsigned int char_get_othercase_bit(compiler_common *common, pcre_uchar* cc)
2301 {
2302 /* Detects if the character and its othercase has only 1 bit difference. */
2303 unsigned int c, oc, bit;
2304 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2305 int n;
2306 #endif
2307
2308 #ifdef SUPPORT_UTF
2309 if (common->utf)
2310 {
2311 GETCHAR(c, cc);
2312 if (c <= 127)
2313 oc = common->fcc[c];
2314 else
2315 {
2316 #ifdef SUPPORT_UCP
2317 oc = UCD_OTHERCASE(c);
2318 #else
2319 oc = c;
2320 #endif
2321 }
2322 }
2323 else
2324 {
2325 c = *cc;
2326 oc = TABLE_GET(c, common->fcc, c);
2327 }
2328 #else
2329 c = *cc;
2330 oc = TABLE_GET(c, common->fcc, c);
2331 #endif
2332
2333 SLJIT_ASSERT(c != oc);
2334
2335 bit = c ^ oc;
2336 /* Optimized for English alphabet. */
2337 if (c <= 127 && bit == 0x20)
2338 return (0 << 8) | 0x20;
2339
2340 /* Since c != oc, they must have at least 1 bit difference. */
2341 if (!is_powerof2(bit))
2342 return 0;
2343
2344 #if defined COMPILE_PCRE8
2345
2346 #ifdef SUPPORT_UTF
2347 if (common->utf && c > 127)
2348 {
2349 n = GET_EXTRALEN(*cc);
2350 while ((bit & 0x3f) == 0)
2351 {
2352 n--;
2353 bit >>= 6;
2354 }
2355 return (n << 8) | bit;
2356 }
2357 #endif /* SUPPORT_UTF */
2358 return (0 << 8) | bit;
2359
2360 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2361
2362 #ifdef SUPPORT_UTF
2363 if (common->utf && c > 65535)
2364 {
2365 if (bit >= (1 << 10))
2366 bit >>= 10;
2367 else
2368 return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8));
2369 }
2370 #endif /* SUPPORT_UTF */
2371 return (bit < 256) ? ((0 << 8) | bit) : ((1 << 8) | (bit >> 8));
2372
2373 #endif /* COMPILE_PCRE[8|16|32] */
2374 }
2375
2376 static void check_partial(compiler_common *common, BOOL force)
2377 {
2378 /* Checks whether a partial matching is occurred. Does not modify registers. */
2379 DEFINE_COMPILER;
2380 struct sljit_jump *jump = NULL;
2381
2382 SLJIT_ASSERT(!force || common->mode != JIT_COMPILE);
2383
2384 if (common->mode == JIT_COMPILE)
2385 return;
2386
2387 if (!force)
2388 jump = CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2389 else if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2390 jump = CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, -1);
2391
2392 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2393 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, 0);
2394 else
2395 {
2396 if (common->partialmatchlabel != NULL)
2397 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2398 else
2399 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2400 }
2401
2402 if (jump != NULL)
2403 JUMPHERE(jump);
2404 }
2405
2406 static void check_str_end(compiler_common *common, jump_list **end_reached)
2407 {
2408 /* Does not affect registers. Usually used in a tight spot. */
2409 DEFINE_COMPILER;
2410 struct sljit_jump *jump;
2411
2412 if (common->mode == JIT_COMPILE)
2413 {
2414 add_jump(compiler, end_reached, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2415 return;
2416 }
2417
2418 jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
2419 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2420 {
2421 add_jump(compiler, end_reached, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0));
2422 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, 0);
2423 add_jump(compiler, end_reached, JUMP(SLJIT_JUMP));
2424 }
2425 else
2426 {
2427 add_jump(compiler, end_reached, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0));
2428 if (common->partialmatchlabel != NULL)
2429 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2430 else
2431 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2432 }
2433 JUMPHERE(jump);
2434 }
2435
2436 static void detect_partial_match(compiler_common *common, jump_list **backtracks)
2437 {
2438 DEFINE_COMPILER;
2439 struct sljit_jump *jump;
2440
2441 if (common->mode == JIT_COMPILE)
2442 {
2443 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2444 return;
2445 }
2446
2447 /* Partial matching mode. */
2448 jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
2449 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0));
2450 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2451 {
2452 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, 0);
2453 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
2454 }
2455 else
2456 {
2457 if (common->partialmatchlabel != NULL)
2458 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2459 else
2460 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2461 }
2462 JUMPHERE(jump);
2463 }
2464
2465 static void read_char(compiler_common *common)
2466 {
2467 /* Reads the character into TMP1, updates STR_PTR.
2468 Does not check STR_END. TMP2 Destroyed. */
2469 DEFINE_COMPILER;
2470 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2471 struct sljit_jump *jump;
2472 #endif
2473
2474 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2475 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2476 if (common->utf)
2477 {
2478 #if defined COMPILE_PCRE8
2479 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2480 #elif defined COMPILE_PCRE16
2481 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
2482 #endif /* COMPILE_PCRE[8|16] */
2483 add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2484 JUMPHERE(jump);
2485 }
2486 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2487 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2488 }
2489
2490 static void peek_char(compiler_common *common)
2491 {
2492 /* Reads the character into TMP1, keeps STR_PTR.
2493 Does not check STR_END. TMP2 Destroyed. */
2494 DEFINE_COMPILER;
2495 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2496 struct sljit_jump *jump;
2497 #endif
2498
2499 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2500 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2501 if (common->utf)
2502 {
2503 #if defined COMPILE_PCRE8
2504 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2505 #elif defined COMPILE_PCRE16
2506 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
2507 #endif /* COMPILE_PCRE[8|16] */
2508 add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2509 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2510 JUMPHERE(jump);
2511 }
2512 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2513 }
2514
2515 static void read_char8_type(compiler_common *common)
2516 {
2517 /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */
2518 DEFINE_COMPILER;
2519 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2520 struct sljit_jump *jump;
2521 #endif
2522
2523 #ifdef SUPPORT_UTF
2524 if (common->utf)
2525 {
2526 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2527 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2528 #if defined COMPILE_PCRE8
2529 /* This can be an extra read in some situations, but hopefully
2530 it is needed in most cases. */
2531 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2532 jump = CMP(SLJIT_C_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
2533 add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));
2534 JUMPHERE(jump);
2535 #elif defined COMPILE_PCRE16
2536 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2537 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2538 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2539 JUMPHERE(jump);
2540 /* Skip low surrogate if necessary. */
2541 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xfc00);
2542 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0xd800);
2543 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2544 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
2545 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2546 #elif defined COMPILE_PCRE32
2547 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2548 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2549 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2550 JUMPHERE(jump);
2551 #endif /* COMPILE_PCRE[8|16|32] */
2552 return;
2553 }
2554 #endif /* SUPPORT_UTF */
2555 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2556 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2557 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2558 /* The ctypes array contains only 256 values. */
2559 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2560 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2561 #endif
2562 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2563 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2564 JUMPHERE(jump);
2565 #endif
2566 }
2567
2568 static void skip_char_back(compiler_common *common)
2569 {
2570 /* Goes one character back. Affects STR_PTR and TMP1. Does not check begin. */
2571 DEFINE_COMPILER;
2572 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2573 #if defined COMPILE_PCRE8
2574 struct sljit_label *label;
2575
2576 if (common->utf)
2577 {
2578 label = LABEL();
2579 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2580 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2581 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
2582 CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);
2583 return;
2584 }
2585 #elif defined COMPILE_PCRE16
2586 if (common->utf)
2587 {
2588 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2589 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2590 /* Skip low surrogate if necessary. */
2591 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2592 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
2593 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2594 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2595 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2596 return;
2597 }
2598 #endif /* COMPILE_PCRE[8|16] */
2599 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2600 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2601 }
2602
2603 static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpiftrue)
2604 {
2605 /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */
2606 DEFINE_COMPILER;
2607
2608 if (nltype == NLTYPE_ANY)
2609 {
2610 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
2611 add_jump(compiler, backtracks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
2612 }
2613 else if (nltype == NLTYPE_ANYCRLF)
2614 {
2615 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_CR);
2616 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2617 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
2618 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
2619 add_jump(compiler, backtracks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
2620 }
2621 else
2622 {
2623 SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);
2624 add_jump(compiler, backtracks, CMP(jumpiftrue ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
2625 }
2626 }
2627
2628 #ifdef SUPPORT_UTF
2629
2630 #if defined COMPILE_PCRE8
2631 static void do_utfreadchar(compiler_common *common)
2632 {
2633 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
2634 of the character (>= 0xc0). Return char value in TMP1, length - 1 in TMP2. */
2635 DEFINE_COMPILER;
2636 struct sljit_jump *jump;
2637
2638 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2639 /* Searching for the first zero. */
2640 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
2641 jump = JUMP(SLJIT_C_NOT_ZERO);
2642 /* Two byte sequence. */
2643 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2644 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2645 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1f);
2646 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2647 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2648 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2649 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
2650 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2651 JUMPHERE(jump);
2652
2653 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10);
2654 jump = JUMP(SLJIT_C_NOT_ZERO);
2655 /* Three byte sequence. */
2656 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2657 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0f);
2658 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 12);
2659 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2660 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2661 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2662 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2663 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
2664 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2665 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2666 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(2));
2667 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2668 JUMPHERE(jump);
2669
2670 /* Four byte sequence. */
2671 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2672 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x07);
2673 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 18);
2674 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2675 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12);
2676 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2677 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2678 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2679 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2680 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2681 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(3));
2682 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
2683 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2684 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2685 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));
2686 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2687 }
2688
2689 static void do_utfreadtype8(compiler_common *common)
2690 {
2691 /* Fast decoding a UTF-8 character type. TMP2 contains the first byte
2692 of the character (>= 0xc0). Return value in TMP1. */
2693 DEFINE_COMPILER;
2694 struct sljit_jump *jump;
2695 struct sljit_jump *compare;
2696
2697 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2698
2699 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20);
2700 jump = JUMP(SLJIT_C_NOT_ZERO);
2701 /* Two byte sequence. */
2702 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2703 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2704 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
2705 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2706 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2707 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
2708 compare = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2709 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2710 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2711
2712 JUMPHERE(compare);
2713 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2714 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2715 JUMPHERE(jump);
2716
2717 /* We only have types for characters less than 256. */
2718 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2719 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2720 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2721 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2722 }
2723
2724 #elif defined COMPILE_PCRE16
2725
2726 static void do_utfreadchar(compiler_common *common)
2727 {
2728 /* Fast decoding a UTF-16 character. TMP1 contains the first 16 bit char
2729 of the character (>= 0xd800). Return char value in TMP1, length - 1 in TMP2. */
2730 DEFINE_COMPILER;
2731 struct sljit_jump *jump;
2732
2733 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2734 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xdc00);
2735 /* Do nothing, only return. */
2736 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2737
2738 JUMPHERE(jump);
2739 /* Combine two 16 bit characters. */
2740 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2741 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2742 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
2743 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 10);
2744 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3ff);
2745 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2746 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
2747 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
2748 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2749 }
2750
2751 #endif /* COMPILE_PCRE[8|16] */
2752
2753 #endif /* SUPPORT_UTF */
2754
2755 #ifdef SUPPORT_UCP
2756
2757 /* UCD_BLOCK_SIZE must be 128 (see the assert below). */
2758 #define UCD_BLOCK_MASK 127
2759 #define UCD_BLOCK_SHIFT 7
2760
2761 static void do_getucd(compiler_common *common)
2762 {
2763 /* Search the UCD record for the character comes in TMP1.
2764 Returns chartype in TMP1 and UCD offset in TMP2. */
2765 DEFINE_COMPILER;
2766
2767 SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 8);
2768
2769 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2770 OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
2771 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
2772 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
2773 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
2774 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
2775 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
2776 OP1(SLJIT_MOV_UH, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
2777 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
2778 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
2779 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2780 }
2781 #endif
2782
2783 static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common, BOOL hascrorlf, BOOL firstline)
2784 {
2785 DEFINE_COMPILER;
2786 struct sljit_label *mainloop;
2787 struct sljit_label *newlinelabel = NULL;
2788 struct sljit_jump *start;
2789 struct sljit_jump *end = NULL;
2790 struct sljit_jump *nl = NULL;
2791 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2792 struct sljit_jump *singlechar;
2793 #endif
2794 jump_list *newline = NULL;
2795 BOOL newlinecheck = FALSE;
2796 BOOL readuchar = FALSE;
2797
2798 if (!(hascrorlf || firstline) && (common->nltype == NLTYPE_ANY ||
2799 common->nltype == NLTYPE_ANYCRLF || common->newline > 255))
2800 newlinecheck = TRUE;
2801
2802 if (firstline)
2803 {
2804 /* Search for the end of the first line. */
2805 SLJIT_ASSERT(common->first_line_end != 0);
2806 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
2807
2808 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
2809 {
2810 mainloop = LABEL();
2811 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2812 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2813 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
2814 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2815 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);
2816 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);
2817 JUMPHERE(end);
2818 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2819 }
2820 else
2821 {
2822 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2823 mainloop = LABEL();
2824 /* Continual stores does not cause data dependency. */
2825 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0);
2826 read_char(common);
2827 check_newlinechar(common, common->nltype, &newline, TRUE);
2828 CMPTO(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0, mainloop);
2829 JUMPHERE(end);
2830 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0);
2831 set_jumps(newline, LABEL());
2832 }
2833
2834 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
2835 }
2836
2837 start = JUMP(SLJIT_JUMP);
2838
2839 if (newlinecheck)
2840 {
2841 newlinelabel = LABEL();
2842 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2843 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2844 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2845 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
2846 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2847 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2848 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
2849 #endif
2850 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2851 nl = JUMP(SLJIT_JUMP);
2852 }
2853
2854 mainloop = LABEL();
2855
2856 /* Increasing the STR_PTR here requires one less jump in the most common case. */
2857 #ifdef SUPPORT_UTF
2858 if (common->utf) readuchar = TRUE;
2859 #endif
2860 if (newlinecheck) readuchar = TRUE;
2861
2862 if (readuchar)
2863 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2864
2865 if (newlinecheck)
2866 CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);
2867
2868 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2869 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2870 #if defined COMPILE_PCRE8
2871 if (common->utf)
2872 {
2873 singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2874 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2875 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2876 JUMPHERE(singlechar);
2877 }
2878 #elif defined COMPILE_PCRE16
2879 if (common->utf)
2880 {
2881 singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
2882 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2883 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2884 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2885 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2886 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2887 JUMPHERE(singlechar);
2888 }
2889 #endif /* COMPILE_PCRE[8|16] */
2890 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2891 JUMPHERE(start);
2892
2893 if (newlinecheck)
2894 {
2895 JUMPHERE(end);
2896 JUMPHERE(nl);
2897 }
2898
2899 return mainloop;
2900 }
2901
2902 static int scan_prefix(compiler_common *common, pcre_uchar *cc, pcre_uint32 *chars, int max_chars)
2903 {
2904 /* Recursive function, which scans prefix literals. */
2905 int len, repeat, len_save, consumed = 0;
2906 pcre_int32 caseless, chr, mask;
2907 pcre_uchar *alternative, *cc_save;
2908 BOOL last, any;
2909
2910 repeat = 1;
2911 while (TRUE)
2912 {
2913 last = TRUE;
2914 any = FALSE;
2915 caseless = 0;
2916 switch (*cc)
2917 {
2918 case OP_CHARI:
2919 caseless = 1;
2920 case OP_CHAR:
2921 last = FALSE;
2922 cc++;
2923 break;
2924
2925 case OP_SOD:
2926 case OP_SOM:
2927 case OP_SET_SOM:
2928 case OP_NOT_WORD_BOUNDARY:
2929 case OP_WORD_BOUNDARY:
2930 case OP_EODN:
2931 case OP_EOD:
2932 case OP_CIRC:
2933 case OP_CIRCM:
2934 case OP_DOLL:
2935 case OP_DOLLM:
2936 /* Zero width assertions. */
2937 cc++;
2938 continue;
2939
2940 case OP_PLUS:
2941 case OP_MINPLUS:
2942 case OP_POSPLUS:
2943 cc++;
2944 break;
2945
2946 case OP_EXACTI:
2947 caseless = 1;
2948 case OP_EXACT:
2949 repeat = GET2(cc, 1);
2950 last = FALSE;
2951 cc += 1 + IMM2_SIZE;
2952 break;
2953
2954 case OP_PLUSI:
2955 case OP_MINPLUSI:
2956 case OP_POSPLUSI:
2957 caseless = 1;
2958 cc++;
2959 break;
2960
2961 case OP_KET:
2962 cc += 1 + LINK_SIZE;
2963 continue;
2964
2965 case OP_ALT:
2966 cc += GET(cc, 1);
2967 continue;
2968
2969 case OP_ONCE:
2970 case OP_ONCE_NC:
2971 case OP_BRA:
2972 case OP_BRAPOS:
2973 case OP_CBRA:
2974 case OP_CBRAPOS:
2975 alternative = cc + GET(cc, 1);
2976 while (*alternative == OP_ALT)
2977 {
2978 max_chars = scan_prefix(common, alternative + 1 + LINK_SIZE, chars, max_chars);
2979 if (max_chars == 0)
2980 return consumed;
2981 alternative += GET(alternative, 1);
2982 }
2983
2984 if (*cc == OP_CBRA || *cc == OP_CBRAPOS)
2985 cc += IMM2_SIZE;
2986 cc += 1 + LINK_SIZE;
2987 continue;
2988
2989 case OP_CLASS:
2990 case OP_NCLASS:
2991 any = TRUE;
2992 cc += 1 + 32 / sizeof(pcre_uchar);
2993 break;
2994
2995 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
2996 case OP_XCLASS:
2997 any = TRUE;
2998 cc += GET(cc, 1);
2999 break;
3000 #endif
3001
3002 case OP_NOT_DIGIT:
3003 case OP_DIGIT:
3004 case OP_NOT_WHITESPACE:
3005 case OP_WHITESPACE:
3006 case OP_NOT_WORDCHAR:
3007 case OP_WORDCHAR:
3008 case OP_ANY:
3009 case OP_ALLANY:
3010 any = TRUE;
3011 cc++;
3012 break;
3013
3014 #ifdef SUPPORT_UCP
3015 case OP_NOTPROP:
3016 case OP_PROP:
3017 any = TRUE;
3018 cc += 1 + 2;
3019 break;
3020 #endif
3021
3022 case OP_TYPEEXACT:
3023 repeat = GET2(cc, 1);
3024 cc += 1 + IMM2_SIZE;
3025 continue;
3026
3027 default:
3028 return consumed;
3029 }
3030
3031 if (any)
3032 {
3033 #ifdef SUPPORT_UTF
3034 if (common->utf) return consumed;
3035 #endif
3036 #if defined COMPILE_PCRE8
3037 mask = 0xff;
3038 #elif defined COMPILE_PCRE16
3039 mask = 0xffff;
3040 #elif defined COMPILE_PCRE32
3041 mask = 0xffffffff;
3042 #else
3043 SLJIT_ASSERT_STOP();
3044 #endif
3045
3046 do
3047 {
3048 chars[0] = mask;
3049 chars[1] = mask;
3050
3051 if (--max_chars == 0)
3052 return consumed;
3053 consumed++;
3054 chars += 2;
3055 }
3056 while (--repeat > 0);
3057
3058 repeat = 1;
3059 continue;
3060 }
3061
3062 len = 1;
3063 #ifdef SUPPORT_UTF
3064 if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
3065 #endif
3066
3067 if (caseless != 0 && char_has_othercase(common, cc))
3068 {
3069 caseless = char_get_othercase_bit(common, cc);
3070 if (caseless == 0)
3071 return consumed;
3072 #ifdef COMPILE_PCRE8
3073 caseless = ((caseless & 0xff) << 8) | (len - (caseless >> 8));
3074 #else
3075 if ((caseless & 0x100) != 0)
3076 caseless = ((caseless & 0xff) << 16) | (len - (caseless >> 9));
3077 else
3078 caseless = ((caseless & 0xff) << 8) | (len - (caseless >> 9));
3079 #endif
3080 }
3081 else
3082 caseless = 0;
3083
3084 len_save = len;
3085 cc_save = cc;
3086 while (TRUE)
3087 {
3088 do
3089 {
3090 chr = *cc;
3091 #ifdef COMPILE_PCRE32
3092 if (SLJIT_UNLIKELY(chr == NOTACHAR))
3093 return consumed;
3094 #endif
3095 mask = 0;
3096 if (len == (caseless & 0xff))
3097 {
3098 mask = caseless >> 8;
3099 chr |= mask;
3100 }
3101
3102 if (chars[0] == NOTACHAR)
3103 {
3104 chars[0] = chr;
3105 chars[1] = mask;
3106 }
3107 else
3108 {
3109 mask |= chars[0] ^ chr;
3110 chr |= mask;
3111 chars[0] = chr;
3112 chars[1] |= mask;
3113 }
3114
3115 len--;
3116 if (--max_chars == 0)
3117 return consumed;
3118 consumed++;
3119 chars += 2;
3120 cc++;
3121 }
3122 while (len > 0);
3123
3124 if (--repeat == 0)
3125 break;
3126
3127 len = len_save;
3128 cc = cc_save;
3129 }
3130
3131 repeat = 1;
3132 if (last)
3133 return consumed;
3134 }
3135 }
3136
3137 #define MAX_N_CHARS 16
3138
3139 static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common, BOOL firstline)
3140 {
3141 DEFINE_COMPILER;
3142 struct sljit_label *start;
3143 struct sljit_jump *quit;
3144 pcre_uint32 chars[MAX_N_CHARS * 2];
3145 pcre_uint8 ones[MAX_N_CHARS];
3146 pcre_uint32 mask;
3147 int i, max;
3148 int offsets[3];
3149
3150 for (i = 0; i < MAX_N_CHARS; i++)
3151 {
3152 chars[i << 1] = NOTACHAR;
3153 chars[(i << 1) + 1] = 0;
3154 }
3155
3156 max = scan_prefix(common, common->start, chars, MAX_N_CHARS);
3157
3158 if (max <= 1)
3159 return FALSE;
3160
3161 for (i = 0; i < max; i++)
3162 {
3163 mask = chars[(i << 1) + 1];
3164 ones[i] = ones_in_half_byte[mask & 0xf];
3165 mask >>= 4;
3166 while (mask != 0)
3167 {
3168 ones[i] += ones_in_half_byte[mask & 0xf];
3169 mask >>= 4;
3170 }
3171 }
3172
3173 offsets[0] = -1;
3174 /* Scan forward. */
3175 for (i = 0; i < max; i++)
3176 if (ones[i] <= 2) {
3177 offsets[0] = i;
3178 break;
3179 }
3180
3181 if (offsets[0] == -1)
3182 return FALSE;
3183
3184 /* Scan backward. */
3185 offsets[1] = -1;
3186 for (i = max - 1; i > offsets[0]; i--)
3187 if (ones[i] <= 2) {
3188 offsets[1] = i;
3189 break;
3190 }
3191
3192 offsets[2] = -1;
3193 if (offsets[1] >= 0)
3194 {
3195 /* Scan from middle. */
3196 for (i = (offsets[0] + offsets[1]) / 2 + 1; i < offsets[1]; i++)
3197 if (ones[i] <= 2)
3198 {
3199 offsets[2] = i;
3200 break;
3201 }
3202
3203 if (offsets[2] == -1)
3204 {
3205 for (i = (offsets[0] + offsets[1]) / 2; i > offsets[0]; i--)
3206 if (ones[i] <= 2)
3207 {
3208 offsets[2] = i;
3209 break;
3210 }
3211 }
3212 }
3213
3214 SLJIT_ASSERT(offsets[1] == -1 || (offsets[0] < offsets[1]));
3215 SLJIT_ASSERT(offsets[2] == -1 || (offsets[0] < offsets[2] && offsets[1] > offsets[2]));
3216
3217 chars[0] = chars[offsets[0] << 1];
3218 chars[1] = chars[(offsets[0] << 1) + 1];
3219 if (offsets[2] >= 0)
3220 {
3221 chars[2] = chars[offsets[2] << 1];
3222 chars[3] = chars[(offsets[2] << 1) + 1];
3223 }
3224 if (offsets[1] >= 0)
3225 {
3226 chars[4] = chars[offsets[1] << 1];
3227 chars[5] = chars[(offsets[1] << 1) + 1];
3228 }
3229
3230 max -= 1;
3231 if (firstline)
3232 {
3233 SLJIT_ASSERT(common->first_line_end != 0);
3234 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
3235 OP2(SLJIT_SUB, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, SLJIT_IMM, IN_UCHARS(max));
3236 }
3237 else
3238 OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
3239
3240 start = LABEL();
3241 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3242
3243 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[0]));
3244 if (offsets[1] >= 0)
3245 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[1]));
3246 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3247
3248 if (chars[1] != 0)
3249 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[1]);
3250 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[0], start);
3251 if (offsets[2] >= 0)
3252 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[2] - 1));
3253
3254 if (offsets[1] >= 0)
3255 {
3256 if (chars[5] != 0)
3257 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, chars[5]);
3258 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, chars[4], start);
3259 }
3260
3261 if (offsets[2] >= 0)
3262 {
3263 if (chars[3] != 0)
3264 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[3]);
3265 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[2], start);
3266 }
3267 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3268
3269 JUMPHERE(quit);
3270
3271 if (firstline)
3272 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3273 else
3274 OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
3275 return TRUE;
3276 }
3277
3278 #undef MAX_N_CHARS
3279
3280 static SLJIT_INLINE void fast_forward_first_char(compiler_common *common, pcre_uchar first_char, BOOL caseless, BOOL firstline)
3281 {
3282 DEFINE_COMPILER;
3283 struct sljit_label *start;
3284 struct sljit_jump *quit;
3285 struct sljit_jump *found;
3286 pcre_uchar oc, bit;
3287
3288 if (firstline)
3289 {
3290 SLJIT_ASSERT(common->first_line_end != 0);
3291 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
3292 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
3293 }
3294
3295 start = LABEL();
3296 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3297 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3298
3299 oc = first_char;
3300 if (caseless)
3301 {
3302 oc = TABLE_GET(first_char, common->fcc, first_char);
3303 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
3304 if (first_char > 127 && common->utf)
3305 oc = UCD_OTHERCASE(first_char);
3306 #endif
3307 }
3308 if (first_char == oc)
3309 found = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, first_char);
3310 else
3311 {
3312 bit = first_char ^ oc;
3313 if (is_powerof2(bit))
3314 {
3315 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, bit);
3316 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, first_char | bit);
3317 }
3318 else
3319 {
3320 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, first_char);
3321 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3322 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);
3323 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3324 found = JUMP(SLJIT_C_NOT_ZERO);
3325 }
3326 }
3327
3328 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3329 JUMPTO(SLJIT_JUMP, start);
3330 JUMPHERE(found);
3331 JUMPHERE(quit);
3332
3333 if (firstline)
3334 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3335 }
3336
3337 static SLJIT_INLINE void fast_forward_newline(compiler_common *common, BOOL firstline)
3338 {
3339 DEFINE_COMPILER;
3340 struct sljit_label *loop;
3341 struct sljit_jump *lastchar;
3342 struct sljit_jump *firstchar;
3343 struct sljit_jump *quit;
3344 struct sljit_jump *foundcr = NULL;
3345 struct sljit_jump *notfoundnl;
3346 jump_list *newline = NULL;
3347
3348 if (firstline)
3349 {
3350 SLJIT_ASSERT(common->first_line_end != 0);
3351 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
3352 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
3353 }
3354
3355 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3356 {
3357 lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3358 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3359 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
3360 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
3361 firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
3362
3363 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2));
3364 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0);
3365 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_GREATER_EQUAL);
3366 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3367 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
3368 #endif
3369 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3370
3371 loop = LABEL();
3372 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3373 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3374 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
3375 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
3376 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop);
3377 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop);
3378
3379 JUMPHERE(quit);
3380 JUMPHERE(firstchar);
3381 JUMPHERE(lastchar);
3382
3383 if (firstline)
3384 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
3385 return;
3386 }
3387
3388 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3389 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
3390 firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
3391 skip_char_back(common);
3392
3393 loop = LABEL();
3394 read_char(common);
3395 lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3396 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
3397 foundcr = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
3398 check_newlinechar(common, common->nltype, &newline, FALSE);
3399 set_jumps(newline, loop);
3400
3401 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
3402 {
3403 quit = JUMP(SLJIT_JUMP);
3404 JUMPHERE(foundcr);
3405 notfoundnl = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3406 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3407 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
3408 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3409 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3410 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
3411 #endif
3412 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3413 JUMPHERE(notfoundnl);
3414 JUMPHERE(quit);
3415 }
3416 JUMPHERE(lastchar);
3417 JUMPHERE(firstchar);
3418
3419 if (firstline)
3420 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3421 }
3422
3423 static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks);
3424
3425 static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, pcre_uint8 *start_bits, BOOL firstline)
3426 {
3427 DEFINE_COMPILER;
3428 struct sljit_label *start;
3429 struct sljit_jump *quit;
3430 struct sljit_jump *found = NULL;
3431 jump_list *matches = NULL;
3432 #ifndef COMPILE_PCRE8
3433 struct sljit_jump *jump;
3434 #endif
3435
3436 if (firstline)
3437 {
3438 SLJIT_ASSERT(common->first_line_end != 0);
3439 OP1(SLJIT_MOV, RETURN_ADDR, 0, STR_END, 0);
3440 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
3441 }
3442
3443 start = LABEL();
3444 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3445 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3446 #ifdef SUPPORT_UTF
3447 if (common->utf)
3448 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
3449 #endif
3450
3451 if (!check_class_ranges(common, start_bits, (start_bits[31] & 0x80) != 0, TRUE, &matches))
3452 {
3453 #ifndef COMPILE_PCRE8
3454 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 255);
3455 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 255);
3456 JUMPHERE(jump);
3457 #endif
3458 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
3459 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
3460 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)start_bits);
3461 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
3462 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
3463 found = JUMP(SLJIT_C_NOT_ZERO);
3464 }
3465
3466 #ifdef SUPPORT_UTF
3467 if (common->utf)
3468 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
3469 #endif
3470 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3471 #ifdef SUPPORT_UTF
3472 #if defined COMPILE_PCRE8
3473 if (common->utf)
3474 {
3475 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start);
3476 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3477 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3478 }
3479 #elif defined COMPILE_PCRE16
3480 if (common->utf)
3481 {
3482 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800, start);
3483 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
3484 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3485 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3486 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3487 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3488 }
3489 #endif /* COMPILE_PCRE[8|16] */
3490 #endif /* SUPPORT_UTF */
3491 JUMPTO(SLJIT_JUMP, start);
3492 if (found != NULL)
3493 JUMPHERE(found);
3494 if (matches != NULL)
3495 set_jumps(matches, LABEL());
3496 JUMPHERE(quit);
3497
3498 if (firstline)
3499 OP1(SLJIT_MOV, STR_END, 0, RETURN_ADDR, 0);
3500 }
3501
3502 static SLJIT_INLINE struct sljit_jump *search_requested_char(compiler_common *common, pcre_uchar req_char, BOOL caseless, BOOL has_firstchar)
3503 {
3504 DEFINE_COMPILER;
3505 struct sljit_label *loop;
3506 struct sljit_jump *toolong;
3507 struct sljit_jump *alreadyfound;
3508 struct sljit_jump *found;
3509 struct sljit_jump *foundoc = NULL;
3510 struct sljit_jump *notfound;
3511 pcre_uint32 oc, bit;
3512
3513 SLJIT_ASSERT(common->req_char_ptr != 0);
3514 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr);
3515 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, REQ_BYTE_MAX);
3516 toolong = CMP(SLJIT_C_LESS, TMP1, 0, STR_END, 0);
3517 alreadyfound = CMP(SLJIT_C_LESS, STR_PTR, 0, TMP2, 0);
3518
3519 if (has_firstchar)
3520 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3521 else
3522 OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);
3523
3524 loop = LABEL();
3525 notfound = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, STR_END, 0);
3526
3527 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0);
3528 oc = req_char;
3529 if (caseless)
3530 {
3531 oc = TABLE_GET(req_char, common->fcc, req_char);
3532 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
3533 if (req_char > 127 && common->utf)
3534 oc = UCD_OTHERCASE(req_char);
3535 #endif
3536 }
3537 if (req_char == oc)
3538 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
3539 else
3540 {
3541 bit = req_char ^ oc;
3542 if (is_powerof2(bit))
3543 {
3544 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);
3545 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit);
3546 }
3547 else
3548 {
3549 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
3550 foundoc = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, oc);
3551 }
3552 }
3553 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
3554 JUMPTO(SLJIT_JUMP, loop);
3555
3556 JUMPHERE(found);
3557 if (foundoc)
3558 JUMPHERE(foundoc);
3559 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr, TMP1, 0);
3560 JUMPHERE(alreadyfound);
3561 JUMPHERE(toolong);
3562 return notfound;
3563 }
3564
3565 static void do_revertframes(compiler_common *common)
3566 {
3567 DEFINE_COMPILER;
3568 struct sljit_jump *jump;
3569 struct sljit_label *mainloop;
3570
3571 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3572 OP1(SLJIT_MOV, TMP1, 0, STACK_TOP, 0);
3573 GET_LOCAL_BASE(TMP3, 0, 0);
3574
3575 /* Drop frames until we reach STACK_TOP. */
3576 mainloop = LABEL();
3577 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), 0);
3578 OP2(SLJIT_SUB | SLJIT_SET_S, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0);
3579 jump = JUMP(SLJIT_C_SIG_LESS_EQUAL);
3580
3581 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
3582 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw));
3583 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), SLJIT_MEM1(TMP1), 2 * sizeof(sljit_sw));
3584 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
3585 JUMPTO(SLJIT_JUMP, mainloop);
3586
3587 JUMPHERE(jump);
3588 jump = JUMP(SLJIT_C_SIG_LESS);
3589 /* End of dropping frames. */
3590 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3591
3592 JUMPHERE(jump);
3593 OP1(SLJIT_NEG, TMP2, 0, TMP2, 0);
3594 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
3595 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw));
3596 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_sw));
3597 JUMPTO(SLJIT_JUMP, mainloop);
3598 }
3599
3600 static void check_wordboundary(compiler_common *common)
3601 {
3602 DEFINE_COMPILER;
3603 struct sljit_jump *skipread;
3604 jump_list *skipread_list = NULL;
3605 #if !(defined COMPILE_PCRE8) || defined SUPPORT_UTF
3606 struct sljit_jump *jump;
3607 #endif
3608
3609 SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16);
3610
3611 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3612 /* Get type of the previous char, and put it to LOCALS1. */
3613 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3614 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
3615 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, 0);
3616 skipread = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
3617 skip_char_back(common);
3618 check_start_used_ptr(common);
3619 read_char(common);
3620
3621 /* Testing char type. */
3622 #ifdef SUPPORT_UCP
3623 if (common->use_ucp)
3624 {
3625 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
3626 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
3627 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
3628 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
3629 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
3630 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
3631 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
3632 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
3633 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
3634 JUMPHERE(jump);
3635 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP2, 0);
3636 }
3637 else
3638 #endif
3639 {
3640 #ifndef COMPILE_PCRE8
3641 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3642 #elif defined SUPPORT_UTF
3643 /* Here LOCALS1 has already been zeroed. */
3644 jump = NULL;
3645 if (common->utf)
3646 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3647 #endif /* COMPILE_PCRE8 */
3648 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);
3649 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */);
3650 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3651 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);
3652 #ifndef COMPILE_PCRE8
3653 JUMPHERE(jump);
3654 #elif defined SUPPORT_UTF
3655 if (jump != NULL)
3656 JUMPHERE(jump);
3657 #endif /* COMPILE_PCRE8 */
3658 }
3659 JUMPHERE(skipread);
3660
3661 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3662 check_str_end(common, &skipread_list);
3663 peek_char(common);
3664
3665 /* Testing char type. This is a code duplication. */
3666 #ifdef SUPPORT_UCP
3667 if (common->use_ucp)
3668 {
3669 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
3670 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
3671 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
3672 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
3673 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
3674 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
3675 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
3676 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
3677 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
3678 JUMPHERE(jump);
3679 }
3680 else
3681 #endif
3682 {
3683 #ifndef COMPILE_PCRE8
3684 /* TMP2 may be destroyed by peek_char. */
3685 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3686 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3687 #elif defined SUPPORT_UTF
3688 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3689 jump = NULL;
3690 if (common->utf)
3691 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3692 #endif
3693 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes);
3694 OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */);
3695 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
3696 #ifndef COMPILE_PCRE8
3697 JUMPHERE(jump);
3698 #elif defined SUPPORT_UTF
3699 if (jump != NULL)
3700 JUMPHERE(jump);
3701 #endif /* COMPILE_PCRE8 */
3702 }
3703 set_jumps(skipread_list, LABEL());
3704
3705 OP2(SLJIT_XOR | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
3706 sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3707 }
3708
3709 /*
3710 range format:
3711
3712 ranges[0] = length of the range (max MAX_RANGE_SIZE, -1 means invalid range).
3713 ranges[1] = first bit (0 or 1)
3714 ranges[2-length] = position of the bit change (when the current bit is not equal to the previous)
3715 */
3716
3717 static BOOL check_ranges(compiler_common *common, int *ranges, jump_list **backtracks, BOOL readch)
3718 {
3719 DEFINE_COMPILER;
3720
3721 if (ranges[0] < 0 || ranges[0] > 4)
3722 return FALSE;
3723
3724 /* No character is accepted. */
3725 if (ranges[0] == 0 && ranges[1] == 0)
3726 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
3727
3728 if (readch)
3729 read_char(common);
3730
3731 switch(ranges[0])
3732 {
3733 case 0:
3734 /* When ranges[1] != 0, all characters are accepted. */
3735 return TRUE;
3736
3737 case 1:
3738 add_jump(compiler, backtracks, CMP(ranges[1] == 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
3739 return TRUE;
3740
3741 case 2:
3742 if (ranges[2] + 1 != ranges[3])
3743 {
3744 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);
3745 add_jump(compiler, backtracks, CMP(ranges[1] != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
3746 }
3747 else
3748 add_jump(compiler, backtracks, CMP(ranges[1] != 0 ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
3749 return TRUE;
3750
3751 case 3:
3752 if (ranges[1] != 0)
3753 {
3754 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[4]));
3755 if (ranges[2] + 1 != ranges[3])
3756 {
3757 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);
3758 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
3759 }
3760 else
3761 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
3762 return TRUE;
3763 }
3764
3765 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[2]));
3766 if (ranges[3] + 1 != ranges[4])
3767 {
3768 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[3]);
3769 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[4] - ranges[3]));
3770 }
3771 else
3772 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3]));
3773 return TRUE;
3774
3775 case 4:
3776 if ((ranges[3] - ranges[2]) == (ranges[5] - ranges[4])
3777 && (ranges[2] | (ranges[4] - ranges[2])) == ranges[4]
3778 && is_powerof2(ranges[4] - ranges[2]))
3779 {
3780 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[4] - ranges[2]);
3781 if (ranges[4] + 1 != ranges[5])
3782 {
3783 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[4]);
3784 add_jump(compiler, backtracks, CMP(ranges[1] != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[5] - ranges[4]));
3785 }
3786 else
3787 add_jump(compiler, backtracks, CMP(ranges[1] != 0 ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[4]));
3788 return TRUE;
3789 }
3790
3791 if (ranges[1] != 0)
3792 {
3793 if (ranges[2] + 1 != ranges[3])
3794 {
3795 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);
3796 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
3797 ranges[4] -= ranges[2];
3798 ranges[5] -= ranges[2];
3799 }
3800 else
3801 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
3802
3803 if (ranges[4] + 1 != ranges[5])
3804 {
3805 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[4]);
3806 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[5] - ranges[4]));
3807 }
3808 else
3809 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[4]));
3810 return TRUE;
3811 }
3812
3813 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);
3814 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[5] - ranges[2]));
3815 if (ranges[3] + 1 != ranges[4])
3816 {
3817 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]);
3818 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[4] - ranges[3]));
3819 }
3820 else
3821 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
3822 return TRUE;
3823
3824 default:
3825 SLJIT_ASSERT_STOP();
3826 return FALSE;
3827 }
3828 }
3829
3830 static void get_ctype_ranges(compiler_common *common, int flag, int *ranges)
3831 {
3832 int i, bit, length;
3833 const pcre_uint8 *ctypes = (const pcre_uint8*)common->ctypes;
3834
3835 bit = ctypes[0] & flag;
3836 ranges[0] = -1;
3837 ranges[1] = bit != 0 ? 1 : 0;
3838 length = 0;
3839
3840 for (i = 1; i < 256; i++)
3841 if ((ctypes[i] & flag) != bit)
3842 {
3843 if (length >= MAX_RANGE_SIZE)
3844 return;
3845 ranges[2 + length] = i;
3846 length++;
3847 bit ^= flag;
3848 }
3849
3850 if (bit != 0)
3851 {
3852 if (length >= MAX_RANGE_SIZE)
3853 return;
3854 ranges[2 + length] = 256;
3855 length++;
3856 }
3857 ranges[0] = length;
3858 }
3859
3860 static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
3861 {
3862 int ranges[2 + MAX_RANGE_SIZE];
3863 pcre_uint8 bit, cbit, all;
3864 int i, byte, length = 0;
3865
3866 bit = bits[0] & 0x1;
3867 ranges[1] = !invert ? bit : (bit ^ 0x1);
3868 /* All bits will be zero or one (since bit is zero or one). */
3869 all = -bit;
3870
3871 for (i = 0; i < 256; )
3872 {
3873 byte = i >> 3;
3874 if ((i & 0x7) == 0 && bits[byte] == all)
3875 i += 8;
3876 else
3877 {
3878 cbit = (bits[byte] >> (i & 0x7)) & 0x1;
3879 if (cbit != bit)
3880 {
3881 if (length >= MAX_RANGE_SIZE)
3882 return FALSE;
3883 ranges[2 + length] = i;
3884 length++;
3885 bit = cbit;
3886 all = -cbit;
3887 }
3888 i++;
3889 }
3890 }
3891
3892 if (((bit == 0) && nclass) || ((bit == 1) && !nclass))
3893 {
3894 if (length >= MAX_RANGE_SIZE)
3895 return FALSE;
3896 ranges[2 + length] = 256;
3897 length++;
3898 }
3899 ranges[0] = length;
3900
3901 return check_ranges(common, ranges, backtracks, FALSE);
3902 }
3903
3904 static void check_anynewline(compiler_common *common)
3905 {
3906 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
3907 DEFINE_COMPILER;
3908
3909 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3910
3911 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
3912 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
3913 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
3914 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
3915 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3916 #ifdef COMPILE_PCRE8
3917 if (common->utf)
3918 {
3919 #endif
3920 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3921 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
3922 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
3923 #ifdef COMPILE_PCRE8
3924 }
3925 #endif
3926 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
3927 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3928 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3929 }
3930
3931 static void check_hspace(compiler_common *common)
3932 {
3933 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
3934 DEFINE_COMPILER;
3935
3936 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3937
3938 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x09);
3939 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3940 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
3941 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3942 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xa0);
3943 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3944 #ifdef COMPILE_PCRE8
3945 if (common->utf)
3946 {
3947 #endif
3948 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3949 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x1680);
3950 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3951 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e);
3952 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3953 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000);
3954 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000);
3955 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
3956 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000);
3957 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3958 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000);
3959 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3960 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000);
3961 #ifdef COMPILE_PCRE8
3962 }
3963 #endif
3964 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
3965 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3966
3967 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3968 }
3969
3970 static void check_vspace(compiler_common *common)
3971 {
3972 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
3973 DEFINE_COMPILER;
3974
3975 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3976
3977 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
3978 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
3979 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
3980 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
3981 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3982 #ifdef COMPILE_PCRE8
3983 if (common->utf)
3984 {
3985 #endif
3986 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3987 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
3988 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
3989 #ifdef COMPILE_PCRE8
3990 }
3991 #endif
3992 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
3993 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3994
3995 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3996 }
3997
3998 #define CHAR1 STR_END
3999 #define CHAR2 STACK_TOP
4000
4001 static void do_casefulcmp(compiler_common *common)
4002 {
4003 DEFINE_COMPILER;
4004 struct sljit_jump *jump;
4005 struct sljit_label *label;
4006
4007 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4008 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4009 OP1(SLJIT_MOV, TMP3, 0, CHAR1, 0);
4010 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR2, 0);
4011 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
4012 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4013
4014 label = LABEL();
4015 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
4016 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4017 jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
4018 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
4019 JUMPTO(SLJIT_C_NOT_ZERO, label);
4020
4021 JUMPHERE(jump);
4022 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4023 OP1(SLJIT_MOV, CHAR1, 0, TMP3, 0);
4024 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
4025 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4026 }
4027
4028 #define LCC_TABLE STACK_LIMIT
4029
4030 static void do_caselesscmp(compiler_common *common)
4031 {
4032 DEFINE_COMPILER;
4033 struct sljit_jump *jump;
4034 struct sljit_label *label;
4035
4036 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4037 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4038
4039 OP1(SLJIT_MOV, TMP3, 0, LCC_TABLE, 0);
4040 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR1, 0);
4041 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, CHAR2, 0);
4042 OP1(SLJIT_MOV, LCC_TABLE, 0, SLJIT_IMM, common->lcc);
4043 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
4044 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4045
4046 label = LABEL();
4047 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
4048 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4049 #ifndef COMPILE_PCRE8
4050 jump = CMP(SLJIT_C_GREATER, CHAR1, 0, SLJIT_IMM, 255);
4051 #endif
4052 OP1(SLJIT_MOV_UB, CHAR1, 0, SLJIT_MEM2(LCC_TABLE, CHAR1), 0);
4053 #ifndef COMPILE_PCRE8
4054 JUMPHERE(jump);
4055 jump = CMP(SLJIT_C_GREATER, CHAR2, 0, SLJIT_IMM, 255);
4056 #endif
4057 OP1(SLJIT_MOV_UB, CHAR2, 0, SLJIT_MEM2(LCC_TABLE, CHAR2), 0);
4058 #ifndef COMPILE_PCRE8
4059 JUMPHERE(jump);
4060 #endif
4061 jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
4062 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
4063 JUMPTO(SLJIT_C_NOT_ZERO, label);
4064
4065 JUMPHERE(jump);
4066 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4067 OP1(SLJIT_MOV, LCC_TABLE, 0, TMP3, 0);
4068 OP1(SLJIT_MOV, CHAR1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
4069 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
4070 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4071 }
4072
4073 #undef LCC_TABLE
4074 #undef CHAR1
4075 #undef CHAR2
4076
4077 #if defined SUPPORT_UTF && defined SUPPORT_UCP
4078
4079 static const pcre_uchar * SLJIT_CALL do_utf_caselesscmp(pcre_uchar *src1, jit_arguments *args, pcre_uchar *end1)
4080 {
4081 /* This function would be ineffective to do in JIT level. */
4082 pcre_uint32 c1, c2;
4083 const pcre_uchar *src2 = args->uchar_ptr;
4084 const pcre_uchar *end2 = args->end;
4085 const ucd_record *ur;
4086 const pcre_uint32 *pp;
4087
4088 while (src1 < end1)
4089 {
4090 if (src2 >= end2)
4091 return (pcre_uchar*)1;
4092 GETCHARINC(c1, src1);
4093 GETCHARINC(c2, src2);
4094 ur = GET_UCD(c2);
4095 if (c1 != c2 && c1 != c2 + ur->other_case)
4096 {
4097 pp = PRIV(ucd_caseless_sets) + ur->caseset;
4098 for (;;)
4099 {
4100 if (c1 < *pp) return NULL;
4101 if (c1 == *pp++) break;
4102 }
4103 }
4104 }
4105 return src2;
4106 }
4107
4108 #endif /* SUPPORT_UTF && SUPPORT_UCP */
4109
4110 static pcre_uchar *byte_sequence_compare(compiler_common *common, BOOL caseless, pcre_uchar *cc,
4111 compare_context* context, jump_list **backtracks)
4112 {
4113 DEFINE_COMPILER;
4114 unsigned int othercasebit = 0;
4115 pcre_uchar *othercasechar = NULL;
4116 #ifdef SUPPORT_UTF
4117 int utflength;
4118 #endif
4119
4120 if (caseless && char_has_othercase(common, cc))
4121 {
4122 othercasebit = char_get_othercase_bit(common, cc);
4123 SLJIT_ASSERT(othercasebit);
4124 /* Extracting bit difference info. */
4125 #if defined COMPILE_PCRE8
4126 othercasechar = cc + (othercasebit >> 8);
4127 othercasebit &= 0xff;
4128 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4129 /* Note that this code only handles characters in the BMP. If there
4130 ever are characters outside the BMP whose othercase differs in only one
4131 bit from itself (there currently are none), this code will need to be
4132 revised for COMPILE_PCRE32. */
4133 othercasechar = cc + (othercasebit >> 9);
4134 if ((othercasebit & 0x100) != 0)
4135 othercasebit = (othercasebit & 0xff) << 8;
4136 else
4137 othercasebit &= 0xff;
4138 #endif /* COMPILE_PCRE[8|16|32] */
4139 }
4140
4141 if (context->sourcereg == -1)
4142 {
4143 #if defined COMPILE_PCRE8
4144 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
4145 if (context->length >= 4)
4146 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4147 else if (context->length >= 2)
4148 OP1(SLJIT_MOV_UH, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4149 else
4150 #endif
4151 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4152 #elif defined COMPILE_PCRE16
4153 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
4154 if (context->length >= 4)
4155 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4156 else
4157 #endif
4158 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4159 #elif defined COMPILE_PCRE32
4160 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4161 #endif /* COMPILE_PCRE[8|16|32] */
4162 context->sourcereg = TMP2;
4163 }
4164
4165 #ifdef SUPPORT_UTF
4166 utflength = 1;
4167 if (common->utf && HAS_EXTRALEN(*cc))
4168 utflength += GET_EXTRALEN(*cc);
4169
4170 do
4171 {
4172 #endif
4173
4174 context->length -= IN_UCHARS(1);
4175 #if (defined SLJIT_UNALIGNED && SLJIT_UNALIGNED) && (defined COMPILE_PCRE8 || defined COMPILE_PCRE16)
4176
4177 /* Unaligned read is supported. */
4178 if (othercasebit != 0 && othercasechar == cc)
4179 {
4180 context->c.asuchars[context->ucharptr] = *cc | othercasebit;
4181 context->oc.asuchars[context->ucharptr] = othercasebit;
4182 }
4183 else
4184 {
4185 context->c.asuchars[context->ucharptr] = *cc;
4186 context->oc.asuchars[context->ucharptr] = 0;
4187 }
4188 context->ucharptr++;
4189
4190 #if defined COMPILE_PCRE8
4191 if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1))
4192 #else
4193 if (context->ucharptr >= 2 || context->length == 0)
4194 #endif
4195 {
4196 if (context->length >= 4)
4197 OP1(SLJIT_MOV_SI, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
4198 else if (context->length >= 2)
4199 OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
4200 #if defined COMPILE_PCRE8
4201 else if (context->length >= 1)
4202 OP1(SLJIT_MOV_UB, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
4203 #endif /* COMPILE_PCRE8 */
4204 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
4205
4206 switch(context->ucharptr)
4207 {
4208 case 4 / sizeof(pcre_uchar):
4209 if (context->oc.asint != 0)
4210 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint);
4211 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint));
4212 break;
4213
4214 case 2 / sizeof(pcre_uchar):
4215 if (context->oc.asushort != 0)
4216 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort);
4217 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort));
4218 break;
4219
4220 #ifdef COMPILE_PCRE8
4221 case 1:
4222 if (context->oc.asbyte != 0)
4223 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte);
4224 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte));
4225 break;
4226 #endif
4227
4228 default:
4229 SLJIT_ASSERT_STOP();
4230 break;
4231 }
4232 context->ucharptr = 0;
4233 }
4234
4235 #else
4236
4237 /* Unaligned read is unsupported or in 32 bit mode. */
4238 if (context->length >= 1)
4239 OP1(MOV_UCHAR, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
4240
4241 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
4242
4243 if (othercasebit != 0 && othercasechar == cc)
4244 {
4245 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit);
4246 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit));
4247 }
4248 else
4249 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc));
4250
4251 #endif
4252
4253 cc++;
4254 #ifdef SUPPORT_UTF
4255 utflength--;
4256 }
4257 while (utflength > 0);
4258 #endif
4259
4260 return cc;
4261 }
4262
4263 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
4264
4265 #define SET_TYPE_OFFSET(value) \
4266 if ((value) != typeoffset) \
4267 { \
4268 if ((value) > typeoffset) \
4269 OP2(SLJIT_SUB, typereg, 0, typereg, 0, SLJIT_IMM, (value) - typeoffset); \
4270 else \
4271 OP2(SLJIT_ADD, typereg, 0, typereg, 0, SLJIT_IMM, typeoffset - (value)); \
4272 } \
4273 typeoffset = (value);
4274
4275 #define SET_CHAR_OFFSET(value) \
4276 if ((value) != charoffset) \
4277 { \
4278 if ((value) > charoffset) \
4279 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (value) - charoffset); \
4280 else \
4281 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, charoffset - (value)); \
4282 } \
4283 charoffset = (value);
4284
4285 static void compile_xclass_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
4286 {
4287 DEFINE_COMPILER;
4288 jump_list *found = NULL;
4289 jump_list **list = (cc[0] & XCL_NOT) == 0 ? &found : backtracks;
4290 pcre_int32 c, charoffset;
4291 struct sljit_jump *jump = NULL;
4292 pcre_uchar *ccbegin;
4293 int compares, invertcmp, numberofcmps;
4294
4295 #ifdef SUPPORT_UCP
4296 BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE;
4297 BOOL charsaved = FALSE;
4298 int typereg = TMP1, scriptreg = TMP1;
4299 const pcre_uint32 *other_cases;
4300 pcre_int32 typeoffset;
4301 #endif
4302
4303 /* Although SUPPORT_UTF must be defined, we are
4304 not necessary in utf mode even in 8 bit mode. */
4305 detect_partial_match(common, backtracks);
4306 read_char(common);
4307
4308 cc++;
4309 if ((cc[-1] & XCL_HASPROP) == 0)
4310 {
4311 if ((cc[-1] & XCL_MAP) != 0)
4312 {
4313 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
4314 #ifdef SUPPORT_UCP
4315 charsaved = TRUE;
4316 #endif
4317 if (!check_class_ranges(common, (const pcre_uint8 *)cc, TRUE, FALSE, backtracks))
4318 {
4319 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
4320
4321 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
4322 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
4323 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
4324 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4325 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4326 add_jump(compiler, &found, JUMP(SLJIT_C_NOT_ZERO));
4327 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4328
4329 JUMPHERE(jump);
4330 }
4331 else
4332 add_jump(compiler, &found, CMP(SLJIT_C_LESS_EQUAL, TMP3, 0, SLJIT_IMM, 0xff));
4333
4334 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
4335 cc += 32 / sizeof(pcre_uchar);
4336 }
4337 else
4338 add_jump(compiler, (cc[-1] & XCL_NOT) == 0 ? backtracks : &found, CMP(SLJIT_C_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0xff));
4339 }
4340 else if ((cc[-1] & XCL_MAP) != 0)
4341 {
4342 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
4343 #ifdef SUPPORT_UCP
4344 charsaved = TRUE;
4345 #endif
4346 if (!check_class_ranges(common, (const pcre_uint8 *)cc, FALSE, TRUE, list))
4347 {
4348 #ifdef COMPILE_PCRE8
4349 SLJIT_ASSERT(common->utf);
4350 #endif
4351 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
4352
4353 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
4354 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
4355 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
4356 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4357 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4358 add_jump(compiler, list, JUMP(SLJIT_C_NOT_ZERO));
4359
4360 JUMPHERE(jump);
4361 }
4362
4363 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
4364 cc += 32 / sizeof(pcre_uchar);
4365 }
4366
4367 /* Scanning the necessary info. */
4368 ccbegin = cc;
4369 compares = 0;
4370 while (*cc != XCL_END)
4371 {
4372 compares++;
4373 if (*cc == XCL_SINGLE)
4374 {
4375 cc += 2;
4376 #ifdef SUPPORT_UTF
4377 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
4378 #endif
4379 #ifdef SUPPORT_UCP
4380 needschar = TRUE;
4381 #endif
4382 }
4383 else if (*cc == XCL_RANGE)
4384 {
4385 cc += 2;
4386 #ifdef SUPPORT_UTF
4387 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
4388 #endif
4389 cc++;
4390 #ifdef SUPPORT_UTF
4391 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
4392 #endif
4393 #ifdef SUPPORT_UCP
4394 needschar = TRUE;
4395 #endif
4396 }
4397 #ifdef SUPPORT_UCP
4398 else
4399 {
4400 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
4401 cc++;
4402 switch(*cc)
4403 {
4404 case PT_ANY:
4405 break;
4406
4407 case PT_LAMP:
4408 case PT_GC:
4409 case PT_PC:
4410 case PT_ALNUM:
4411 needstype = TRUE;
4412 break;
4413
4414 case PT_SC:
4415 needsscript = TRUE;
4416 break;
4417
4418 case PT_SPACE:
4419 case PT_PXSPACE:
4420 case PT_WORD:
4421 case PT_PXGRAPH:
4422 case PT_PXPRINT:
4423 case PT_PXPUNCT:
4424 needstype = TRUE;
4425 needschar = TRUE;
4426 break;
4427
4428 case PT_CLIST:
4429 case PT_UCNC:
4430 needschar = TRUE;
4431 break;
4432
4433 default:
4434 SLJIT_ASSERT_STOP();
4435 break;
4436 }
4437 cc += 2;
4438 }
4439 #endif
4440 }
4441
4442 #ifdef SUPPORT_UCP
4443 /* Simple register allocation. TMP1 is preferred if possible. */
4444 if (needstype || needsscript)
4445 {
4446 if (needschar && !charsaved)
4447 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
4448 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4449 if (needschar)
4450 {
4451 if (needstype)
4452 {
4453 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
4454 typereg = RETURN_ADDR;
4455 }
4456
4457 if (needsscript)
4458 scriptreg = TMP3;
4459 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
4460 }
4461 else if (needstype && needsscript)
4462 scriptreg = TMP3;
4463 /* In all other cases only one of them was specified, and that can goes to TMP1. */
4464
4465 if (needsscript)
4466 {
4467 if (scriptreg == TMP1)
4468 {
4469 OP1(SLJIT_MOV, scriptreg, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
4470 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM2(scriptreg, TMP2), 3);
4471 }
4472 else
4473 {
4474 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
4475 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
4476 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM1(TMP2), 0);
4477 }
4478 }
4479 }
4480 #endif
4481
4482 /* Generating code. */
4483 cc = ccbegin;
4484 charoffset = 0;
4485 numberofcmps = 0;
4486 #ifdef SUPPORT_UCP
4487 typeoffset = 0;
4488 #endif
4489
4490 while (*cc != XCL_END)
4491 {
4492 compares--;
4493 invertcmp = (compares == 0 && list != backtracks);
4494 jump = NULL;
4495
4496 if (*cc == XCL_SINGLE)
4497 {
4498 cc ++;
4499 #ifdef SUPPORT_UTF
4500 if (common->utf)
4501 {
4502 GETCHARINC(c, cc);
4503 }
4504 else
4505 #endif
4506 c = *cc++;
4507
4508 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
4509 {
4510 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
4511 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_EQUAL);
4512 numberofcmps++;
4513 }
4514 else if (numberofcmps > 0)
4515 {
4516 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
4517 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4518 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4519 numberofcmps = 0;
4520 }
4521 else
4522 {
4523 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, c - charoffset);
4524 numberofcmps = 0;
4525 }
4526 }
4527 else if (*cc == XCL_RANGE)
4528 {
4529 cc ++;
4530 #ifdef SUPPORT_UTF
4531 if (common->utf)
4532 {
4533 GETCHARINC(c, cc);
4534 }
4535 else
4536 #endif
4537 c = *cc++;
4538 SET_CHAR_OFFSET(c);
4539 #ifdef SUPPORT_UTF
4540 if (common->utf)
4541 {
4542 GETCHARINC(c, cc);
4543 }
4544 else
4545 #endif
4546 c = *cc++;
4547 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
4548 {
4549 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
4550 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_LESS_EQUAL);
4551 numberofcmps++;
4552 }
4553 else if (numberofcmps > 0)
4554 {
4555 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
4556 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4557 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4558 numberofcmps = 0;
4559 }
4560 else
4561 {
4562 jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, c - charoffset);
4563 numberofcmps = 0;
4564 }
4565 }
4566 #ifdef SUPPORT_UCP
4567 else
4568 {
4569 if (*cc == XCL_NOTPROP)
4570 invertcmp ^= 0x1;
4571 cc++;
4572 switch(*cc)
4573 {
4574 case PT_ANY:
4575 if (list != backtracks)
4576 {
4577 if ((cc[-1] == XCL_NOTPROP && compares > 0) || (cc[-1] == XCL_PROP && compares == 0))
4578 continue;
4579 }
4580 else if (cc[-1] == XCL_NOTPROP)
4581 continue;
4582 jump = JUMP(SLJIT_JUMP);
4583 break;
4584
4585 case PT_LAMP:
4586 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - typeoffset);
4587 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4588 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ll - typeoffset);
4589 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4590 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lt - typeoffset);
4591 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4592 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4593 break;
4594
4595 case PT_GC:
4596 c = PRIV(ucp_typerange)[(int)cc[1] * 2];
4597 SET_TYPE_OFFSET(c);
4598 jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, PRIV(ucp_typerange)[(int)cc[1] * 2 + 1] - c);
4599 break;
4600
4601 case PT_PC:
4602 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, (int)cc[1] - typeoffset);
4603 break;
4604
4605 case PT_SC:
4606 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, scriptreg, 0, SLJIT_IMM, (int)cc[1]);
4607 break;
4608
4609 case PT_SPACE:
4610 case PT_PXSPACE:
4611 SET_CHAR_OFFSET(9);
4612 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd - 0x9);
4613 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
4614
4615 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x9);
4616 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4617
4618 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x9);
4619 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4620
4621 SET_TYPE_OFFSET(ucp_Zl);
4622 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl);
4623 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4624 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4625 break;
4626
4627 case PT_WORD:
4628 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE - charoffset);
4629 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4630 /* Fall through. */
4631
4632 case PT_ALNUM:
4633 SET_TYPE_OFFSET(ucp_Ll);
4634 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
4635 OP_FLAGS((*cc == PT_ALNUM) ? SLJIT_MOV : SLJIT_OR, TMP2, 0, (*cc == PT_ALNUM) ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_LESS_EQUAL);
4636 SET_TYPE_OFFSET(ucp_Nd);
4637 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_No - ucp_Nd);
4638 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4639 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4640 break;
4641
4642 case PT_CLIST:
4643 other_cases = PRIV(ucd_caseless_sets) + cc[1];
4644
4645 /* At least three characters are required.
4646 Otherwise this case would be handled by the normal code path. */
4647 SLJIT_ASSERT(other_cases[0] != NOTACHAR && other_cases[1] != NOTACHAR && other_cases[2] != NOTACHAR);
4648 SLJIT_ASSERT(other_cases[0] < other_cases[1] && other_cases[1] < other_cases[2]);
4649
4650 /* Optimizing character pairs, if their difference is power of 2. */
4651 if (is_powerof2(other_cases[1] ^ other_cases[0]))
4652 {
4653 if (charoffset == 0)
4654 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
4655 else
4656 {
4657 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
4658 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
4659 }
4660 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[1]);
4661 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4662 other_cases += 2;
4663 }
4664 else if (is_powerof2(other_cases[2] ^ other_cases[1]))
4665 {
4666 if (charoffset == 0)
4667 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[2] ^ other_cases[1]);
4668 else
4669 {
4670 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
4671 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
4672 }
4673 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[2]);
4674 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4675
4676 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, other_cases[0] - charoffset);
4677 OP_FLAGS(SLJIT_OR | ((other_cases[3] == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4678
4679 other_cases += 3;
4680 }
4681 else
4682 {
4683 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, *other_cases++ - charoffset);
4684 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4685 }
4686
4687 while (*other_cases != NOTACHAR)
4688 {
4689 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, *other_cases++ - charoffset);
4690 OP_FLAGS(SLJIT_OR | ((*other_cases == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4691 }
4692 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4693 break;
4694
4695 case PT_UCNC:
4696 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_DOLLAR_SIGN - charoffset);
4697 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4698 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_COMMERCIAL_AT - charoffset);
4699 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4700 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_GRAVE_ACCENT - charoffset);
4701 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4702
4703 SET_CHAR_OFFSET(0xa0);
4704 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd7ff - charoffset);
4705 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4706 SET_CHAR_OFFSET(0);
4707 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xe000 - 0);
4708 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_GREATER_EQUAL);
4709 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4710 break;
4711
4712 case PT_PXGRAPH:
4713 /* C and Z groups are the farthest two groups. */
4714 SET_TYPE_OFFSET(ucp_Ll);
4715 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
4716 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_GREATER);
4717
4718 jump = CMP(SLJIT_C_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
4719
4720 /* In case of ucp_Cf, we overwrite the result. */
4721 SET_CHAR_OFFSET(0x2066);
4722 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
4723 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
4724
4725 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
4726 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4727
4728 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x2066);
4729 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4730
4731 JUMPHERE(jump);
4732 jump = CMP(SLJIT_C_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
4733 break;
4734
4735 case PT_PXPRINT:
4736 /* C and Z groups are the farthest two groups. */
4737 SET_TYPE_OFFSET(ucp_Ll);
4738 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
4739 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_GREATER);
4740
4741 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Ll);
4742 OP_FLAGS(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_C_NOT_EQUAL);
4743
4744 jump = CMP(SLJIT_C_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
4745
4746 /* In case of ucp_Cf, we overwrite the result. */
4747 SET_CHAR_OFFSET(0x2066);
4748 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
4749 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
4750
4751 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
4752 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4753
4754 JUMPHERE(jump);
4755 jump = CMP(SLJIT_C_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
4756 break;
4757
4758 case PT_PXPUNCT:
4759 SET_TYPE_OFFSET(ucp_Sc);
4760 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Sc);
4761 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
4762
4763 SET_CHAR_OFFSET(0);
4764 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xff);
4765 OP_FLAGS(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4766
4767 SET_TYPE_OFFSET(ucp_Pc);
4768 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ps - ucp_Pc);
4769 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4770 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4771 break;
4772 }
4773 cc += 2;
4774 }
4775 #endif
4776
4777 if (jump != NULL)
4778 add_jump(compiler, compares > 0 ? list : backtracks, jump);
4779 }
4780
4781 if (found != NULL)
4782 set_jumps(found, LABEL());
4783 }
4784
4785 #undef SET_TYPE_OFFSET
4786 #undef SET_CHAR_OFFSET
4787
4788 #endif
4789
4790 static pcre_uchar *compile_char1_matchingpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **backtracks)
4791 {
4792 DEFINE_COMPILER;
4793 int length;
4794 unsigned int c, oc, bit;
4795 compare_context context;
4796 struct sljit_jump *jump[4];
4797 jump_list *end_list;
4798 #ifdef SUPPORT_UTF
4799 struct sljit_label *label;
4800 #ifdef SUPPORT_UCP
4801 pcre_uchar propdata[5];
4802 #endif
4803 #endif
4804
4805 switch(type)
4806 {
4807 case OP_SOD:
4808 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4809 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4810 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
4811 return cc;
4812
4813 case OP_SOM:
4814 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4815 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
4816 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
4817 return cc;
4818
4819 case OP_NOT_WORD_BOUNDARY:
4820 case OP_WORD_BOUNDARY:
4821 add_jump(compiler, &common->wordboundary, JUMP(SLJIT_FAST_CALL));
4822 add_jump(compiler, backtracks, JUMP(type == OP_NOT_WORD_BOUNDARY ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
4823 return cc;
4824
4825 case OP_NOT_DIGIT:
4826 case OP_DIGIT:
4827 /* Digits are usually 0-9, so it is worth to optimize them. */
4828 if (common->digits[0] == -2)
4829 get_ctype_ranges(common, ctype_digit, common->digits);
4830 detect_partial_match(common, backtracks);
4831 /* Flip the starting bit in the negative case. */
4832 if (type == OP_NOT_DIGIT)
4833 common->digits[1] ^= 1;
4834 if (!check_ranges(common, common->digits, backtracks, TRUE))
4835 {
4836 read_char8_type(common);
4837 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit);
4838 add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
4839 }
4840 if (type == OP_NOT_DIGIT)
4841 common->digits[1] ^= 1;
4842 return cc;
4843
4844 case OP_NOT_WHITESPACE:
4845 case OP_WHITESPACE:
4846 detect_partial_match(common, backtracks);
4847 read_char8_type(common);
4848 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space);
4849 add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
4850 return cc;
4851
4852 case OP_NOT_WORDCHAR:
4853 case OP_WORDCHAR:
4854 detect_partial_match(common, backtracks);
4855 read_char8_type(common);
4856 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word);
4857 add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
4858 return cc;
4859
4860 case OP_ANY:
4861 detect_partial_match(common, backtracks);
4862 read_char(common);
4863 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4864 {
4865 jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
4866 end_list = NULL;
4867 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
4868 add_jump(compiler, &end_list, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4869 else
4870 check_str_end(common, &end_list);
4871
4872 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4873 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff));
4874 set_jumps(end_list, LABEL());
4875 JUMPHERE(jump[0]);
4876 }
4877 else
4878 check_newlinechar(common, common->nltype, backtracks, TRUE);
4879 return cc;
4880
4881 case OP_ALLANY:
4882 detect_partial_match(common, backtracks);
4883 #ifdef SUPPORT_UTF
4884 if (common->utf)
4885 {
4886 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4887 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4888 #if defined COMPILE_PCRE8 || defined COMPILE_PCRE16
4889 #if defined COMPILE_PCRE8
4890 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
4891 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4892 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4893 #elif defined COMPILE_PCRE16
4894 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
4895 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
4896 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4897 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4898 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4899 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4900 #endif
4901 JUMPHERE(jump[0]);
4902 #endif /* COMPILE_PCRE[8|16] */
4903 return cc;
4904 }
4905 #endif
4906 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4907 return cc;
4908
4909 case OP_ANYBYTE:
4910 detect_partial_match(common, backtracks);
4911 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4912 return cc;
4913
4914 #ifdef SUPPORT_UTF
4915 #ifdef SUPPORT_UCP
4916 case OP_NOTPROP:
4917 case OP_PROP:
4918 propdata[0] = XCL_HASPROP;
4919 propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP;
4920 propdata[2] = cc[0];
4921 propdata[3] = cc[1];
4922 propdata[4] = XCL_END;
4923 compile_xclass_matchingpath(common, propdata, backtracks);
4924 return cc + 2;
4925 #endif
4926 #endif
4927
4928 case OP_ANYNL:
4929 detect_partial_match(common, backtracks);
4930 read_char(common);
4931 jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
4932 /* We don't need to handle soft partial matching case. */
4933 end_list = NULL;
4934 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
4935 add_jump(compiler, &end_list, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4936 else
4937 check_str_end(common, &end_list);
4938 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4939 jump[1] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
4940 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4941 jump[2] = JUMP(SLJIT_JUMP);
4942 JUMPHERE(jump[0]);
4943 check_newlinechar(common, common->bsr_nltype, backtracks, FALSE);
4944 set_jumps(end_list, LABEL());
4945 JUMPHERE(jump[1]);
4946 JUMPHERE(jump[2]);
4947 return cc;
4948
4949 case OP_NOT_HSPACE:
4950 case OP_HSPACE:
4951 detect_partial_match(common, backtracks);
4952 read_char(common);
4953 add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));
4954 add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
4955 return cc;
4956
4957 case OP_NOT_VSPACE:
4958 case OP_VSPACE:
4959 detect_partial_match(common, backtracks);
4960 read_char(common);
4961 add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));
4962 add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
4963 return cc;
4964
4965 #ifdef SUPPORT_UCP
4966 case OP_EXTUNI:
4967 detect_partial_match(common, backtracks);
4968 read_char(common);
4969 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4970 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
4971 /* Optimize register allocation: use a real register. */
4972 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
4973 OP1(SLJIT_MOV_UB, STACK_TOP, 0, SLJIT_MEM2(TMP1, TMP2), 3);
4974
4975 label = LABEL();
4976 jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4977 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
4978 read_char(common);
4979 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4980 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
4981 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM2(TMP1, TMP2), 3);
4982
4983 OP2(SLJIT_SHL, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2);
4984 OP1(SLJIT_MOV_UI, TMP1, 0, SLJIT_MEM1(STACK_TOP), (sljit_sw)PRIV(ucp_gbtable));
4985 OP1(SLJIT_MOV, STACK_TOP, 0, TMP2, 0);
4986 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4987 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4988 JUMPTO(SLJIT_C_NOT_ZERO, label);
4989
4990 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
4991 JUMPHERE(jump[0]);
4992 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
4993
4994 if (common->mode == JIT_PARTIAL_HARD_COMPILE)
4995 {
4996 jump[0] = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
4997 /* Since we successfully read a char above, partial matching must occure. */
4998 check_partial(common, TRUE);
4999 JUMPHERE(jump[0]);
5000 }
5001 return cc;
5002 #endif
5003
5004 case OP_EODN:
5005 /* Requires rather complex checks. */
5006 jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5007 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
5008 {
5009 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
5010 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5011 if (common->mode == JIT_COMPILE)
5012 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0));
5013 else
5014 {
5015 jump[1] = CMP(SLJIT_C_EQUAL, TMP2, 0, STR_END, 0);
5016 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
5017 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS);
5018 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
5019 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_NOT_EQUAL);
5020 add_jump(compiler, backtracks, JUMP(SLJIT_C_NOT_EQUAL));
5021 check_partial(common, TRUE);
5022 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5023 JUMPHERE(jump[1]);
5024 }
5025 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
5026 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
5027 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
5028 }
5029 else if (common->nltype == NLTYPE_FIXED)
5030 {
5031 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5032 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5033 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0));
5034 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
5035 }
5036 else
5037 {
5038 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5039 jump[1] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
5040 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
5041 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
5042 jump[2] = JUMP(SLJIT_C_GREATER);
5043 add_jump(compiler, backtracks, JUMP(SLJIT_C_LESS));
5044 /* Equal. */
5045 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
5046 jump[3] = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
5047 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5048
5049 JUMPHERE(jump[1]);
5050 if (common->nltype == NLTYPE_ANYCRLF)
5051 {
5052 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5053 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP2, 0, STR_END, 0));
5054 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
5055 }
5056 else
5057 {
5058 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, STR_PTR, 0);
5059 read_char(common);
5060 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
5061 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
5062 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
5063 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
5064 }
5065 JUMPHERE(jump[2]);
5066 JUMPHERE(jump[3]);
5067 }
5068 JUMPHERE(jump[0]);
5069 check_partial(common, FALSE);
5070 return cc;
5071
5072 case OP_EOD:
5073 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0));
5074 check_partial(common, FALSE);
5075 return cc;
5076
5077 case OP_CIRC:
5078 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
5079 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
5080 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0));
5081 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
5082 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5083 return cc;
5084
5085 case OP_CIRCM:
5086 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
5087 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
5088 jump[1] = CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0);
5089 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
5090 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5091 jump[0] = JUMP(SLJIT_JUMP);
5092 JUMPHERE(jump[1]);
5093
5094 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
5095 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
5096 {
5097 OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
5098 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP2, 0, TMP1, 0));
5099 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
5100 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
5101 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
5102 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
5103 }
5104 else
5105 {
5106 skip_char_back(common);
5107 read_char(common);
5108 check_newlinechar(common, common->nltype, backtracks, FALSE);
5109 }
5110 JUMPHERE(jump[0]);
5111 return cc;
5112
5113 case OP_DOLL:
5114 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
5115 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
5116 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5117
5118 if (!common->endonly)
5119 compile_char1_matchingpath(common, OP_EODN, cc, backtracks);
5120 else
5121 {
5122 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0));
5123 check_partial(common, FALSE);
5124 }
5125 return cc;
5126
5127 case OP_DOLLM:
5128 jump[1] = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
5129 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
5130 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
5131 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5132 check_partial(common, FALSE);
5133 jump[0] = JUMP(SLJIT_JUMP);
5134 JUMPHERE(jump[1]);
5135
5136 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
5137 {
5138 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
5139 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5140 if (common->mode == JIT_COMPILE)
5141 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, TMP2, 0, STR_END, 0));
5142 else
5143 {
5144 jump[1] = CMP(SLJIT_C_LESS_EQUAL, TMP2, 0, STR_END, 0);
5145 /* STR_PTR = STR_END - IN_UCHARS(1) */
5146 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
5147 check_partial(common, TRUE);
5148 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5149 JUMPHERE(jump[1]);
5150 }
5151
5152 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
5153 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
5154 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
5155 }
5156 else
5157 {
5158 peek_char(common);
5159 check_newlinechar(common, common->nltype, backtracks, FALSE);
5160 }
5161 JUMPHERE(jump[0]);
5162 return cc;
5163
5164 case OP_CHAR:
5165 case OP_CHARI:
5166 length = 1;
5167 #ifdef SUPPORT_UTF
5168 if (common->utf && HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc);
5169 #endif
5170 if (common->mode == JIT_COMPILE && (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0))
5171 {
5172 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
5173 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
5174
5175 context.length = IN_UCHARS(length);
5176 context.sourcereg = -1;
5177 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
5178 context.ucharptr = 0;
5179 #endif
5180 return byte_sequence_compare(common, type == OP_CHARI, cc, &context, backtracks);
5181 }
5182 detect_partial_match(common, backtracks);
5183 read_char(common);
5184 #ifdef SUPPORT_UTF
5185 if (common->utf)
5186 {
5187 GETCHAR(c, cc);
5188 }
5189 else
5190 #endif
5191 c = *cc;
5192 if (type == OP_CHAR || !char_has_othercase(common, cc))
5193 {
5194 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c));
5195 return cc + length;
5196 }
5197 oc = char_othercase(common, c);
5198 bit = c ^ oc;
5199 if (is_powerof2(bit))
5200 {
5201 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
5202 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
5203 return cc + length;
5204 }
5205 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c);
5206 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
5207 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);
5208 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
5209 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
5210 return cc + length;
5211
5212 case OP_NOT:
5213 case OP_NOTI:
5214 detect_partial_match(common, backtracks);
5215 length = 1;
5216 #ifdef SUPPORT_UTF
5217 if (common->utf)
5218 {
5219 #ifdef COMPILE_PCRE8
5220 c = *cc;
5221 if (c < 128)
5222 {
5223 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5224 if (type == OP_NOT || !char_has_othercase(common, cc))
5225 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
5226 else
5227 {
5228 /* Since UTF8 code page is fixed, we know that c is in [a-z] or [A-Z] range. */
5229 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x20);
5230 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20));
5231 }
5232 /* Skip the variable-length character. */
5233 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5234 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
5235 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
5236 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5237 JUMPHERE(jump[0]);
5238 return cc + 1;
5239 }
5240 else
5241 #endif /* COMPILE_PCRE8 */
5242 {
5243 GETCHARLEN(c, cc, length);
5244 read_char(common);
5245 }
5246 }
5247 else
5248 #endif /* SUPPORT_UTF */
5249 {
5250 read_char(common);
5251 c = *cc;
5252 }
5253
5254 if (type == OP_NOT || !char_has_othercase(common, cc))
5255 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
5256 else
5257 {
5258 oc = char_othercase(common, c);
5259 bit = c ^ oc;
5260 if (is_powerof2(bit))
5261 {
5262 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
5263 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
5264 }
5265 else
5266 {
5267 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
5268 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, oc));
5269 }
5270 }
5271 return cc + length;
5272
5273 case OP_CLASS:
5274 case OP_NCLASS:
5275 detect_partial_match(common, backtracks);
5276 read_char(common);
5277 if (check_class_ranges(common, (const pcre_uint8 *)cc, type == OP_NCLASS, FALSE, backtracks))
5278 return cc + 32 / sizeof(pcre_uchar);
5279
5280 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
5281 jump[0] = NULL;
5282 #ifdef COMPILE_PCRE8
5283 /* This check only affects 8 bit mode. In other modes, we
5284 always need to compare the value with 255. */
5285 if (common->utf)
5286 #endif /* COMPILE_PCRE8 */
5287 {
5288 jump[0] = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
5289 if (type == OP_CLASS)
5290 {
5291 add_jump(compiler, backtracks, jump[0]);
5292 jump[0] = NULL;
5293 }
5294 }
5295 #endif /* SUPPORT_UTF || !COMPILE_PCRE8 */
5296 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
5297 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
5298 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
5299 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
5300 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
5301 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
5302 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
5303 if (jump[0] != NULL)
5304 JUMPHERE(jump[0]);
5305 #endif /* SUPPORT_UTF || !COMPILE_PCRE8 */
5306 return cc + 32 / sizeof(pcre_uchar);
5307
5308 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5309 case OP_XCLASS:
5310 compile_xclass_matchingpath(common, cc + LINK_SIZE, backtracks);
5311 return cc + GET(cc, 0) - 1;
5312 #endif
5313
5314 case OP_REVERSE:
5315 length = GET(cc, 0);
5316 if (length == 0)
5317 return cc + LINK_SIZE;
5318 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5319 #ifdef SUPPORT_UTF
5320 if (common->utf)
5321 {
5322 OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
5323 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, length);
5324 label = LABEL();
5325 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP3, 0));
5326 skip_char_back(common);
5327 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
5328 JUMPTO(SLJIT_C_NOT_ZERO, label);
5329 }
5330 else
5331 #endif
5332 {
5333 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
5334 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
5335 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, TMP1, 0));
5336 }
5337 check_start_used_ptr(common);
5338 return cc + LINK_SIZE;
5339 }
5340 SLJIT_ASSERT_STOP();
5341 return cc;
5342 }
5343
5344 static SLJIT_INLINE pcre_uchar *compile_charn_matchingpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, jump_list **backtracks)
5345 {
5346 /* This function consumes at least one input character. */
5347 /* To decrease the number of length checks, we try to concatenate the fixed length character sequences. */
5348 DEFINE_COMPILER;
5349 pcre_uchar *ccbegin = cc;
5350 compare_context context;
5351 int size;
5352
5353 context.length = 0;
5354 do
5355 {
5356 if (cc >= ccend)
5357 break;
5358
5359 if (*cc == OP_CHAR)
5360 {
5361 size = 1;
5362 #ifdef SUPPORT_UTF
5363 if (common->utf && HAS_EXTRALEN(cc[1]))
5364 size += GET_EXTRALEN(cc[1]);
5365 #endif
5366 }
5367 else if (*cc == OP_CHARI)
5368 {
5369 size = 1;
5370 #ifdef SUPPORT_UTF
5371 if (common->utf)
5372 {
5373 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
5374 size = 0;
5375 else if (HAS_EXTRALEN(cc[1]))
5376 size += GET_EXTRALEN(cc[1]);
5377 }
5378 else
5379 #endif
5380 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
5381 size = 0;
5382 }
5383 else
5384 size = 0;
5385
5386 cc += 1 + size;
5387 context.length += IN_UCHARS(size);
5388 }
5389 while (size > 0 && context.length <= 128);
5390
5391 cc = ccbegin;
5392 if (context.length > 0)
5393 {
5394 /* We have a fixed-length byte sequence. */
5395 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, context.length);
5396 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
5397
5398 context.sourcereg = -1;
5399 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
5400 context.ucharptr = 0;
5401 #endif
5402 do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, backtracks); while (context.length > 0);
5403 return cc;
5404 }
5405
5406 /* A non-fixed length character will be checked if length == 0. */
5407 return compile_char1_matchingpath(common, *cc, cc + 1, backtracks);
5408 }
5409
5410 /* Forward definitions. */
5411 static void compile_matchingpath(compiler_common *, pcre_uchar *, pcre_uchar *, backtrack_common *);
5412 static void compile_backtrackingpath(compiler_common *, struct backtrack_common *);
5413
5414 #define PUSH_BACKTRACK(size, ccstart, error) \
5415 do \
5416 { \
5417 backtrack = sljit_alloc_memory(compiler, (size)); \
5418 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
5419 return error; \
5420 memset(backtrack, 0, size); \
5421 backtrack->prev = parent->top; \
5422 backtrack->cc = (ccstart); \
5423 parent->top = backtrack; \
5424 } \
5425 while (0)
5426
5427 #define PUSH_BACKTRACK_NOVALUE(size, ccstart) \
5428 do \
5429 { \
5430 backtrack = sljit_alloc_memory(compiler, (size)); \
5431 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
5432 return; \
5433 memset(backtrack, 0, size); \
5434 backtrack->prev = parent->top; \
5435 backtrack->cc = (ccstart); \
5436 parent->top = backtrack; \
5437 } \
5438 while (0)
5439
5440 #define BACKTRACK_AS(type) ((type *)backtrack)
5441
5442 static void compile_dnref_search(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
5443 {
5444 /* The OVECTOR offset goes to TMP2. */
5445 DEFINE_COMPILER;
5446 int count = GET2(cc, 1 + IMM2_SIZE);
5447 pcre_uchar *slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
5448 unsigned int offset;
5449 jump_list *found = NULL;
5450
5451 SLJIT_ASSERT(*cc == OP_DNREF || *cc == OP_DNREFI);
5452
5453 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
5454
5455 count--;
5456 while (count-- > 0)
5457 {
5458 offset = GET2(slot, 0) << 1;
5459 GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
5460 add_jump(compiler, &found, CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0));
5461 slot += common->name_entry_size;
5462 }
5463
5464 offset = GET2(slot, 0) << 1;
5465 GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
5466 if (backtracks != NULL && !common->jscript_compat)
5467 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0));
5468
5469 set_jumps(found, LABEL());
5470 }
5471
5472 static void compile_ref_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks, BOOL withchecks, BOOL emptyfail)
5473 {
5474 DEFINE_COMPILER;
5475 BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
5476 int offset = 0;
5477 struct sljit_jump *jump = NULL;
5478 struct sljit_jump *partial;
5479 struct sljit_jump *nopartial;
5480
5481 if (ref)
5482 {
5483 offset = GET2(cc, 1) << 1;
5484 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
5485 /* OVECTOR(1) contains the "string begin - 1" constant. */
5486 if (withchecks && !common->jscript_compat)
5487 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
5488 }
5489 else
5490 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
5491
5492 #if defined SUPPORT_UTF && defined SUPPORT_UCP
5493 if (common->utf && *cc == OP_REFI)
5494 {
5495 SLJIT_ASSERT(TMP1 == SLJIT_SCRATCH_REG1 && STACK_TOP == SLJIT_SCRATCH_REG2 && TMP2 == SLJIT_SCRATCH_REG3);
5496 if (ref)
5497 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
5498 else
5499 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
5500
5501 if (withchecks)
5502 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, TMP2, 0);
5503
5504 /* Needed to save important temporary registers. */
5505 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
5506 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, ARGUMENTS, 0);
5507 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, uchar_ptr), STR_PTR, 0);
5508 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_utf_caselesscmp));
5509 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
5510 if (common->mode == JIT_COMPILE)
5511 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1));
5512 else
5513 {
5514 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
5515 nopartial = CMP(SLJIT_C_NOT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
5516 check_partial(common, FALSE);
5517 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5518 JUMPHERE(nopartial);
5519 }
5520 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
5521 }
5522 else
5523 #endif /* SUPPORT_UTF && SUPPORT_UCP */
5524 {
5525 if (ref)
5526 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP1, 0);
5527 else
5528 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
5529
5530 if (withchecks)
5531 jump = JUMP(SLJIT_C_ZERO);
5532
5533 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
5534 partial = CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0);
5535 if (common->mode == JIT_COMPILE)
5536 add_jump(compiler, backtracks, partial);
5537
5538 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
5539 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5540
5541 if (common->mode != JIT_COMPILE)
5542 {
5543 nopartial = JUMP(SLJIT_JUMP);
5544 JUMPHERE(partial);
5545 /* TMP2 -= STR_END - STR_PTR */
5546 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, STR_PTR, 0);
5547 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, STR_END, 0);
5548 partial = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0);
5549 OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
5550 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
5551 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5552 JUMPHERE(partial);
5553 check_partial(common, FALSE);
5554 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5555 JUMPHERE(nopartial);
5556 }
5557 }
5558
5559 if (jump != NULL)
5560 {
5561 if (emptyfail)
5562 add_jump(compiler, backtracks, jump);
5563 else
5564 JUMPHERE(jump);
5565 }
5566 }
5567
5568 static SLJIT_INLINE pcre_uchar *compile_ref_iterator_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
5569 {
5570 DEFINE_COMPILER;
5571 BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
5572 backtrack_common *backtrack;
5573 pcre_uchar type;
5574 int offset = 0;
5575 struct sljit_label *label;
5576 struct sljit_jump *zerolength;
5577 struct sljit_jump *jump = NULL;
5578 pcre_uchar *ccbegin = cc;
5579 int min = 0, max = 0;
5580 BOOL minimize;
5581
5582 PUSH_BACKTRACK(sizeof(iterator_backtrack), cc, NULL);
5583
5584 if (ref)
5585 offset = GET2(cc, 1) << 1;
5586 else
5587 cc += IMM2_SIZE;
5588 type = cc[1 + IMM2_SIZE];
5589
5590 SLJIT_COMPILE_ASSERT((OP_CRSTAR & 0x1) == 0, crstar_opcode_must_be_even);
5591 minimize = (type & 0x1) != 0;
5592 switch(type)
5593 {
5594 case OP_CRSTAR:
5595 case OP_CRMINSTAR:
5596 min = 0;
5597 max = 0;
5598 cc += 1 + IMM2_SIZE + 1;
5599 break;
5600 case OP_CRPLUS:
5601 case OP_CRMINPLUS:
5602 min = 1;
5603 max = 0;
5604 cc += 1 + IMM2_SIZE + 1;
5605 break;
5606 case OP_CRQUERY:
5607 case OP_CRMINQUERY:
5608 min = 0;
5609 max = 1;
5610 cc += 1 + IMM2_SIZE + 1;
5611 break;
5612 case OP_CRRANGE:
5613 case OP_CRMINRANGE:
5614 min = GET2(cc, 1 + IMM2_SIZE + 1);
5615 max = GET2(cc, 1 + IMM2_SIZE + 1 + IMM2_SIZE);
5616 cc += 1 + IMM2_SIZE + 1 + 2 * IMM2_SIZE;
5617 break;
5618 default:
5619 SLJIT_ASSERT_STOP();
5620 break;
5621 }
5622
5623 if (!minimize)
5624 {
5625 if (min == 0)
5626 {
5627 allocate_stack(common, 2);
5628 if (ref)
5629 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
5630 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5631 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
5632 /* Temporary release of STR_PTR. */
5633 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5634 /* Handles both invalid and empty cases. Since the minimum repeat,
5635 is zero the invalid case is basically the same as an empty case. */
5636 if (ref)
5637 zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
5638 else
5639 {
5640 compile_dnref_search(common, ccbegin, NULL);
5641 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
5642 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, TMP2, 0);
5643 zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
5644 }
5645 /* Restore if not zero length. */
5646 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5647 }
5648 else
5649 {
5650 allocate_stack(common, 1);
5651 if (ref)
5652 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
5653 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5654 if (ref)
5655 {
5656 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
5657 zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
5658 }
5659 else
5660 {
5661 compile_dnref_search(common, ccbegin, &backtrack->topbacktracks);
5662 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
5663 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, TMP2, 0);
5664 zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
5665 }
5666 }
5667
5668 if (min > 1 || max > 1)
5669 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 0);
5670
5671 label = LABEL();
5672 if (!ref)
5673 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);
5674 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, FALSE, FALSE);
5675
5676 if (min > 1 || max > 1)
5677 {
5678 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
5679 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
5680 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0);
5681 if (min > 1)
5682 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, label);
5683 if (max > 1)
5684 {
5685 jump = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max);
5686 allocate_stack(common, 1);
5687 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5688 JUMPTO(SLJIT_JUMP, label);
5689 JUMPHERE(jump);
5690 }
5691 }
5692
5693 if (max == 0)
5694 {
5695 /* Includes min > 1 case as well. */
5696 allocate_stack(common, 1);
5697 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5698 JUMPTO(SLJIT_JUMP, label);
5699 }
5700
5701 JUMPHERE(zerolength);
5702 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
5703
5704 count_match(common);
5705 return cc;
5706 }
5707
5708 allocate_stack(common, ref ? 2 : 3);
5709 if (ref)
5710 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
5711 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5712 if (type != OP_CRMINSTAR)
5713 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
5714
5715 if (min == 0)
5716 {
5717 /* Handles both invalid and empty cases. Since the minimum repeat,
5718 is zero the invalid case is basically the same as an empty case. */
5719 if (ref)
5720 zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
5721 else
5722 {
5723 compile_dnref_search(common, ccbegin, NULL);
5724 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
5725 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
5726 zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
5727 }
5728 /* Length is non-zero, we can match real repeats. */
5729 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5730 jump = JUMP(SLJIT_JUMP);
5731 }
5732 else
5733 {
5734 if (ref)
5735 {
5736 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
5737 zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
5738 }
5739 else
5740 {
5741 compile_dnref_search(common, ccbegin, &backtrack->topbacktracks);
5742 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
5743 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
5744 zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
5745 }
5746 }
5747
5748 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
5749 if (max > 0)
5750 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max));
5751
5752 if (!ref)
5753 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
5754 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, TRUE, TRUE);
5755 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5756
5757 if (min > 1)
5758 {
5759 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5760 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
5761 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5762 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, BACKTRACK_AS(iterator_backtrack)->matchingpath);
5763 }
5764 else if (max > 0)
5765 OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
5766
5767 if (jump != NULL)
5768 JUMPHERE(jump);
5769 JUMPHERE(zerolength);
5770
5771 count_match(common);
5772 return cc;
5773 }
5774
5775 static SLJIT_INLINE pcre_uchar *compile_recurse_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
5776 {
5777 DEFINE_COMPILER;
5778 backtrack_common *backtrack;
5779 recurse_entry *entry = common->entries;
5780 recurse_entry *prev = NULL;
5781 sljit_sw start = GET(cc, 1);
5782 pcre_uchar *start_cc;
5783 BOOL needs_control_head;
5784
5785 PUSH_BACKTRACK(sizeof(recurse_backtrack), cc, NULL);
5786
5787 /* Inlining simple patterns. */
5788 if (get_framesize(common, common->start + start, NULL, TRUE, &needs_control_head) == no_stack)
5789 {
5790 start_cc = common->start + start;
5791 compile_matchingpath(common, next_opcode(common, start_cc), bracketend(start_cc) - (1 + LINK_SIZE), backtrack);
5792 BACKTRACK_AS(recurse_backtrack)->inlined_pattern = TRUE;
5793 return cc + 1 + LINK_SIZE;
5794 }
5795
5796 while (entry != NULL)
5797 {
5798 if (entry->start == start)
5799 break;
5800 prev = entry;
5801 entry = entry->next;
5802 }
5803
5804 if (entry == NULL)
5805 {
5806 entry = sljit_alloc_memory(compiler, sizeof(recurse_entry));
5807 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5808 return NULL;
5809 entry->next = NULL;
5810 entry->entry = NULL;
5811 entry->calls = NULL;
5812 entry->start = start;
5813
5814 if (prev != NULL)
5815 prev->next = entry;
5816 else
5817 common->entries = entry;
5818 }
5819
5820 if (common->has_set_som && common->mark_ptr != 0)
5821 {
5822 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
5823 allocate_stack(common, 2);
5824 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
5825 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
5826 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5827 }
5828 else if (common->has_set_som || common->mark_ptr != 0)
5829 {
5830 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->has_set_som ? (int)(OVECTOR(0)) : common->mark_ptr);
5831 allocate_stack(common, 1);
5832 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
5833 }
5834
5835 if (entry->entry == NULL)
5836 add_jump(compiler, &entry->calls, JUMP(SLJIT_FAST_CALL));
5837 else
5838 JUMPTO(SLJIT_FAST_CALL, entry->entry);
5839 /* Leave if the match is failed. */
5840 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0));
5841 return cc + 1 + LINK_SIZE;
5842 }
5843
5844 static int SLJIT_CALL do_callout(struct jit_arguments* arguments, PUBL(callout_block) *callout_block, pcre_uchar **jit_ovector)
5845 {
5846 const pcre_uchar *begin = arguments->begin;
5847 int *offset_vector = arguments->offsets;
5848 int offset_count = arguments->offset_count;
5849 int i;
5850
5851 if (PUBL(callout) == NULL)
5852 return 0;
5853
5854 callout_block->version = 2;
5855 callout_block->callout_data = arguments->callout_data;
5856
5857 /* Offsets in subject. */
5858 callout_block->subject_length = arguments->end - arguments->begin;
5859 callout_block->start_match = (pcre_uchar*)callout_block->subject - arguments->begin;
5860 callout_block->current_position = (pcre_uchar*)callout_block->offset_vector - arguments->begin;
5861 #if defined COMPILE_PCRE8
5862 callout_block->subject = (PCRE_SPTR)begin;
5863 #elif defined COMPILE_PCRE16
5864 callout_block->subject = (PCRE_SPTR16)begin;
5865 #elif defined COMPILE_PCRE32
5866 callout_block->subject = (PCRE_SPTR32)begin;
5867 #endif
5868
5869 /* Convert and copy the JIT offset vector to the offset_vector array. */
5870 callout_block->capture_top = 0;
5871 callout_block->offset_vector = offset_vector;
5872 for (i = 2; i < offset_count; i += 2)
5873 {
5874 offset_vector[i] = jit_ovector[i] - begin;
5875 offset_vector[i + 1] = jit_ovector[i + 1] - begin;
5876 if (jit_ovector[i] >= begin)
5877 callout_block->capture_top = i;
5878 }
5879
5880 callout_block->capture_top = (callout_block->capture_top >> 1) + 1;
5881 if (offset_count > 0)
5882 offset_vector[0] = -1;
5883 if (offset_count > 1)
5884 offset_vector[1] = -1;
5885 return (*PUBL(callout))(callout_block);
5886 }
5887
5888 /* Aligning to 8 byte. */
5889 #define CALLOUT_ARG_SIZE \
5890 (((int)sizeof(PUBL(callout_block)) + 7) & ~7)
5891
5892 #define CALLOUT_ARG_OFFSET(arg) \
5893 (-CALLOUT_ARG_SIZE + SLJIT_OFFSETOF(PUBL(callout_block), arg))
5894
5895 static SLJIT_INLINE pcre_uchar *compile_callout_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
5896 {
5897 DEFINE_COMPILER;
5898 backtrack_common *backtrack;
5899
5900 PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
5901
5902 allocate_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw));
5903
5904 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
5905 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5906 SLJIT_ASSERT(common->capture_last_ptr != 0);
5907 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_number), SLJIT_IMM, cc[1]);
5908 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_last), TMP2, 0);
5909
5910 /* These pointer sized fields temporarly stores internal variables. */
5911 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
5912 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(offset_vector), STR_PTR, 0);
5913 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(subject), TMP2, 0);
5914
5915 if (common->mark_ptr != 0)
5916 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr));
5917 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(pattern_position), SLJIT_IMM, GET(cc, 2));
5918 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(next_item_length), SLJIT_IMM, GET(cc, 2 + LINK_SIZE));
5919 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(mark), (common->mark_ptr != 0) ? TMP2 : SLJIT_IMM, 0);
5920
5921 /* Needed to save important temporary registers. */
5922 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
5923 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG2, 0, STACK_TOP, 0, SLJIT_IMM, CALLOUT_ARG_SIZE);
5924 GET_LOCAL_BASE(SLJIT_SCRATCH_REG3, 0, OVECTOR_START);
5925 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_callout));
5926 OP1(SLJIT_MOV_SI, SLJIT_RETURN_REG, 0, SLJIT_RETURN_REG, 0);
5927 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
5928 free_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw));
5929
5930 /* Check return value. */
5931 OP2(SLJIT_SUB | SLJIT_SET_S, SLJIT_UNUSED, 0, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
5932 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_C_SIG_GREATER));
5933 if (common->forced_quit_label == NULL)
5934 add_jump(compiler, &common->forced_quit, JUMP(SLJIT_C_SIG_LESS));
5935 else
5936 JUMPTO(SLJIT_C_SIG_LESS, common->forced_quit_label);
5937 return cc + 2 + 2 * LINK_SIZE;
5938 }
5939
5940 #undef CALLOUT_ARG_SIZE
5941 #undef CALLOUT_ARG_OFFSET
5942
5943 static pcre_uchar *compile_assert_matchingpath(compiler_common *common, pcre_uchar *cc, assert_backtrack *backtrack, BOOL conditional)
5944 {
5945 DEFINE_COMPILER;
5946 int framesize;
5947 int extrasize;
5948 BOOL needs_control_head;
5949 int private_data_ptr;
5950 backtrack_common altbacktrack;
5951 pcre_uchar *ccbegin;
5952 pcre_uchar opcode;
5953 pcre_uchar bra = OP_BRA;
5954 jump_list *tmp = NULL;
5955 jump_list **target = (conditional) ? &backtrack->condfailed : &backtrack->common.topbacktracks;
5956 jump_list **found;
5957 /* Saving previous accept variables. */
5958 BOOL save_local_exit = common->local_exit;
5959 BOOL save_positive_assert = common->positive_assert;
5960 then_trap_backtrack *save_then_trap = common->then_trap;
5961 struct sljit_label *save_quit_label = common->quit_label;
5962 struct sljit_label *save_accept_label = common->accept_label;
5963 jump_list *save_quit = common->quit;
5964 jump_list *save_positive_assert_quit = common->positive_assert_quit;
5965 jump_list *save_accept = common->accept;
5966 struct sljit_jump *jump;
5967 struct sljit_jump *brajump = NULL;
5968
5969 /* Assert captures then. */
5970 common->then_trap = NULL;
5971
5972 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
5973 {
5974 SLJIT_ASSERT(!conditional);
5975 bra = *cc;
5976 cc++;
5977 }
5978 private_data_ptr = PRIVATE_DATA(cc);
5979 SLJIT_ASSERT(private_data_ptr != 0);
5980 framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
5981 backtrack->framesize = framesize;
5982 backtrack->private_data_ptr = private_data_ptr;
5983 opcode = *cc;
5984 SLJIT_ASSERT(opcode >= OP_ASSERT && opcode <= OP_ASSERTBACK_NOT);
5985 found = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) ? &tmp : target;
5986 ccbegin = cc;
5987 cc += GET(cc, 1);
5988
5989 if (bra == OP_BRAMINZERO)
5990 {
5991 /* This is a braminzero backtrack path. */
5992 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5993 free_stack(common, 1);
5994 brajump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
5995 }
5996
5997 if (framesize < 0)
5998 {
5999 extrasize = needs_control_head ? 2 : 1;
6000 if (framesize == no_frame)
6001 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);
6002 allocate_stack(common, extrasize);
6003 if (needs_control_head)
6004 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
6005 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6006 if (needs_control_head)
6007 {
6008 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_IMM, 0);
6009 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
6010 }
6011 }
6012 else
6013 {
6014 extrasize = needs_control_head ? 3 : 2;
6015 allocate_stack(common, framesize + extrasize);
6016 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6017 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw));
6018 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0);
6019 if (needs_control_head)
6020 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
6021 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6022 if (needs_control_head)
6023 {
6024 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
6025 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
6026 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_IMM, 0);
6027 }
6028 else
6029 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
6030 init_frame(common, ccbegin, NULL, framesize + extrasize - 1, extrasize, FALSE);
6031 }
6032
6033 memset(&altbacktrack, 0, sizeof(backtrack_common));
6034 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6035 {
6036 /* Negative assert is stronger than positive assert. */
6037 common->local_exit = TRUE;
6038 common->quit_label = NULL;
6039 common->quit = NULL;
6040 common->positive_assert = FALSE;
6041 }
6042 else
6043 common->positive_assert = TRUE;
6044 common->positive_assert_quit = NULL;
6045
6046 while (1)
6047 {
6048 common->accept_label = NULL;
6049 common->accept = NULL;
6050 altbacktrack.top = NULL;
6051 altbacktrack.topbacktracks = NULL;
6052
6053 if (*ccbegin == OP_ALT)
6054 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6055
6056 altbacktrack.cc = ccbegin;
6057 compile_matchingpath(common, ccbegin + 1 + LINK_SIZE, cc, &altbacktrack);
6058 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6059 {
6060 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6061 {
6062 common->local_exit = save_local_exit;
6063 common->quit_label = save_quit_label;
6064 common->quit = save_quit;
6065 }
6066 common->positive_assert = save_positive_assert;
6067 common->then_trap = save_then_trap;
6068 common->accept_label = save_accept_label;
6069 common->positive_assert_quit = save_positive_assert_quit;
6070 common->accept = save_accept;
6071 return NULL;
6072 }
6073 common->accept_label = LABEL();
6074 if (common->accept != NULL)
6075 set_jumps(common->accept, common->accept_label);
6076
6077 /* Reset stack. */
6078 if (framesize < 0)
6079 {
6080 if (framesize == no_frame)
6081 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6082 else
6083 free_stack(common, extrasize);
6084 if (needs_control_head)
6085 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), 0);
6086 }
6087 else
6088 {
6089 if ((opcode != OP_ASSERT_NOT && opcode != OP_ASSERTBACK_NOT) || conditional)
6090 {
6091 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
6092 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
6093 if (needs_control_head)
6094 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), 0);
6095 }
6096 else
6097 {
6098 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6099 if (needs_control_head)
6100 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), (framesize + 1) * sizeof(sljit_sw));
6101 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6102 }
6103 }
6104
6105 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6106 {
6107 /* We know that STR_PTR was stored on the top of the stack. */
6108 if (conditional)
6109 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), needs_control_head ? sizeof(sljit_sw) : 0);
6110 else if (bra == OP_BRAZERO)
6111 {
6112 if (framesize < 0)
6113 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 1) * sizeof(sljit_sw));
6114 else
6115 {
6116 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
6117 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (framesize + extrasize - 1) * sizeof(sljit_sw));
6118 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
6119 }
6120 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
6121 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6122 }
6123 else if (framesize >= 0)
6124 {
6125 /* For OP_BRA and OP_BRAMINZERO. */
6126 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
6127 }
6128 }
6129 add_jump(compiler, found, JUMP(SLJIT_JUMP));
6130
6131 compile_backtrackingpath(common, altbacktrack.top);
6132 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6133 {
6134 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6135 {
6136 common->local_exit = save_local_exit;
6137 common->quit_label = save_quit_label;
6138 common->quit = save_quit;
6139 }
6140 common->positive_assert = save_positive_assert;
6141 common->then_trap = save_then_trap;
6142 common->accept_label = save_accept_label;
6143 common->positive_assert_quit = save_positive_assert_quit;
6144 common->accept = save_accept;
6145 return NULL;
6146 }
6147 set_jumps(altbacktrack.topbacktracks, LABEL());
6148
6149 if (*cc != OP_ALT)
6150 break;
6151
6152 ccbegin = cc;
6153 cc += GET(cc, 1);
6154 }
6155
6156 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6157 {
6158 SLJIT_ASSERT(common->positive_assert_quit == NULL);
6159 /* Makes the check less complicated below. */
6160 common->positive_assert_quit = common->quit;
6161 }
6162
6163 /* None of them matched. */
6164 if (common->positive_assert_quit != NULL)
6165 {
6166 jump = JUMP(SLJIT_JUMP);
6167 set_jumps(common->positive_assert_quit, LABEL());
6168 SLJIT_ASSERT(framesize != no_stack);
6169 if (framesize < 0)
6170 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, extrasize * sizeof(sljit_sw));
6171 else
6172 {
6173 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6174 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6175 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw));
6176 }
6177 JUMPHERE(jump);
6178 }
6179
6180 if (needs_control_head)
6181 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(1));
6182
6183 if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK)
6184 {
6185 /* Assert is failed. */
6186 if (conditional || bra == OP_BRAZERO)
6187 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6188
6189 if (framesize < 0)
6190 {
6191 /* The topmost item should be 0. */
6192 if (bra == OP_BRAZERO)
6193 {
6194 if (extrasize == 2)
6195 free_stack(common, 1);
6196 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6197 }
6198 else
6199 free_stack(common, extrasize);
6200 }
6201 else
6202 {
6203 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
6204 /* The topmost item should be 0. */
6205 if (bra == OP_BRAZERO)
6206 {
6207 free_stack(common, framesize + extrasize - 1);
6208 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6209 }
6210 else
6211 free_stack(common, framesize + extrasize);
6212 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
6213 }
6214 jump = JUMP(SLJIT_JUMP);
6215 if (bra != OP_BRAZERO)
6216 add_jump(compiler, target, jump);
6217
6218 /* Assert is successful. */
6219 set_jumps(tmp, LABEL());
6220 if (framesize < 0)
6221 {
6222 /* We know that STR_PTR was stored on the top of the stack. */
6223 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 1) * sizeof(sljit_sw));
6224 /* Keep the STR_PTR on the top of the stack. */
6225 if (bra == OP_BRAZERO)
6226 {
6227 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
6228 if (extrasize == 2)
6229 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6230 }
6231 else if (bra == OP_BRAMINZERO)
6232 {
6233 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
6234 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6235 }
6236 }
6237 else
6238 {
6239 if (bra == OP_BRA)
6240 {
6241 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
6242 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
6243 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 2) * sizeof(sljit_sw));
6244 }
6245 else
6246 {
6247 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
6248 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 2) * sizeof(sljit_sw));
6249 if (extrasize == 2)
6250 {
6251 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6252 if (bra == OP_BRAMINZERO)
6253 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6254 }
6255 else
6256 {
6257 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
6258 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), bra == OP_BRAZERO ? STR_PTR : SLJIT_IMM, 0);
6259 }
6260 }
6261 }
6262
6263 if (bra == OP_BRAZERO)
6264 {
6265 backtrack->matchingpath = LABEL();
6266 SET_LABEL(jump, backtrack->matchingpath);
6267 }
6268 else if (bra == OP_BRAMINZERO)
6269 {
6270 JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
6271 JUMPHERE(brajump);
6272 if (framesize >= 0)
6273 {
6274 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6275 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6276 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
6277 }
6278 set_jumps(backtrack->common.topbacktracks, LABEL());
6279 }
6280 }
6281 else
6282 {
6283 /* AssertNot is successful. */
6284 if (framesize < 0)
6285 {
6286 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6287 if (bra != OP_BRA)
6288 {
6289 if (extrasize == 2)
6290 free_stack(common, 1);
6291 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6292 }
6293 else
6294 free_stack(common, extrasize);
6295 }
6296 else
6297 {
6298 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6299 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
6300 /* The topmost item should be 0. */
6301 if (bra != OP_BRA)
6302 {
6303 free_stack(common, framesize + extrasize - 1);
6304 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6305 }
6306 else
6307 free_stack(common, framesize + extrasize);
6308 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
6309 }
6310
6311 if (bra == OP_BRAZERO)
6312 backtrack->matchingpath = LABEL();
6313 else if (bra == OP_BRAMINZERO)
6314 {
6315 JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
6316 JUMPHERE(brajump);
6317 }
6318
6319 if (bra != OP_BRA)
6320 {
6321 SLJIT_ASSERT(found == &backtrack->common.topbacktracks);
6322 set_jumps(backtrack->common.topbacktracks, LABEL());
6323 backtrack->common.topbacktracks = NULL;
6324 }
6325 }
6326
6327 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6328 {
6329 common->local_exit = save_local_exit;
6330 common->quit_label = save_quit_label;
6331 common->quit = save_quit;
6332 }
6333 common->positive_assert = save_positive_assert;
6334 common->then_trap = save_then_trap;
6335 common->accept_label = save_accept_label;
6336 common->positive_assert_quit = save_positive_assert_quit;
6337 common->accept = save_accept;
6338 return cc + 1 + LINK_SIZE;
6339 }
6340
6341 static SLJIT_INLINE void match_once_common(compiler_common *common, pcre_uchar ket, int framesize, int private_data_ptr, BOOL has_alternatives, BOOL needs_control_head)
6342 {
6343 DEFINE_COMPILER;
6344 int stacksize;
6345
6346 if (framesize < 0)
6347 {
6348 if (framesize == no_frame)
6349 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6350 else
6351 {
6352 stacksize = needs_control_head ? 1 : 0;
6353 if (ket != OP_KET || has_alternatives)
6354 stacksize++;
6355 free_stack(common, stacksize);
6356 }
6357
6358 if (needs_control_head)
6359 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), (ket != OP_KET || has_alternatives) ? sizeof(sljit_sw) : 0);
6360
6361 /* TMP2 which is set here used by OP_KETRMAX below. */
6362 if (ket == OP_KETRMAX)
6363 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), 0);
6364 else if (ket == OP_KETRMIN)
6365 {
6366 /* Move the STR_PTR to the private_data_ptr. */
6367 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), 0);
6368 }
6369 }
6370 else
6371 {
6372 stacksize = (ket != OP_KET || has_alternatives) ? 2 : 1;
6373 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + stacksize) * sizeof(sljit_sw));
6374 if (needs_control_head)
6375 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), 0);
6376
6377 if (ket == OP_KETRMAX)
6378 {
6379 /* TMP2 which is set here used by OP_KETRMAX below. */
6380 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6381 }
6382 }
6383 if (needs_control_head)
6384 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, TMP1, 0);
6385 }
6386
6387 static SLJIT_INLINE int match_capture_common(compiler_common *common, int stacksize, int offset, int private_data_ptr)
6388 {
6389 DEFINE_COMPILER;
6390
6391 if (common->capture_last_ptr != 0)
6392 {
6393 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
6394 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
6395 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
6396 stacksize++;
6397 }
6398 if (common->optimized_cbracket[offset >> 1] == 0)
6399 {
6400 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
6401 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
6402 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
6403 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6404 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
6405 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
6406 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
6407 stacksize += 2;
6408 }
6409 return stacksize;
6410 }
6411
6412 /*
6413 Handling bracketed expressions is probably the most complex part.
6414
6415 Stack layout naming characters:
6416 S - Push the current STR_PTR
6417 0 - Push a 0 (NULL)
6418 A - Push the current STR_PTR. Needed for restoring the STR_PTR
6419 before the next alternative. Not pushed if there are no alternatives.
6420 M - Any values pushed by the current alternative. Can be empty, or anything.
6421 C - Push the previous OVECTOR(i), OVECTOR(i+1) and OVECTOR_PRIV(i) to the stack.
6422 L - Push the previous local (pointed by localptr) to the stack
6423 () - opional values stored on the stack
6424 ()* - optonal, can be stored multiple times
6425
6426 The following list shows the regular expression templates, their PCRE byte codes
6427 and stack layout supported by pcre-sljit.
6428
6429 (?:) OP_BRA | OP_KET A M
6430 () OP_CBRA | OP_KET C M
6431 (?:)+ OP_BRA | OP_KETRMAX 0 A M S ( A M S )*
6432 OP_SBRA | OP_KETRMAX 0 L M S ( L M S )*
6433 (?:)+? OP_BRA | OP_KETRMIN 0 A M S ( A M S )*
6434 OP_SBRA | OP_KETRMIN 0 L M S ( L M S )*
6435 ()+ OP_CBRA | OP_KETRMAX 0 C M S ( C M S )*
6436 OP_SCBRA | OP_KETRMAX 0 C M S ( C M S )*
6437 ()+? OP_CBRA | OP_KETRMIN 0 C M S ( C M S )*
6438 OP_SCBRA | OP_KETRMIN 0 C M S ( C M S )*
6439 (?:)? OP_BRAZERO | OP_BRA | OP_KET S ( A M 0 )
6440 (?:)?? OP_BRAMINZERO | OP_BRA | OP_KET S ( A M 0 )
6441 ()? OP_BRAZERO | OP_CBRA | OP_KET S ( C M 0 )
6442 ()?? OP_BRAMINZERO | OP_CBRA | OP_KET S ( C M 0 )
6443 (?:)* OP_BRAZERO | OP_BRA | OP_KETRMAX S 0 ( A M S )*
6444 OP_BRAZERO | OP_SBRA | OP_KETRMAX S 0 ( L M S )*
6445 (?:)*? OP_BRAMINZERO | OP_BRA | OP_KETRMIN S 0 ( A M S )*
6446 OP_BRAMINZERO | OP_SBRA | OP_KETRMIN S 0 ( L M S )*
6447 ()* OP_BRAZERO | OP_CBRA | OP_KETRMAX S 0 ( C M S )*
6448 OP_BRAZERO | OP_SCBRA | OP_KETRMAX S 0 ( C M S )*
6449 ()*? OP_BRAMINZERO | OP_CBRA | OP_KETRMIN S 0 ( C M S )*
6450 OP_BRAMINZERO | OP_SCBRA | OP_KETRMIN S 0 ( C M S )*
6451
6452
6453 Stack layout naming characters:
6454 A - Push the alternative index (starting from 0) on the stack.
6455 Not pushed if there is no alternatives.
6456 M - Any values pushed by the current alternative. Can be empty, or anything.
6457
6458 The next list shows the possible content of a bracket:
6459 (|) OP_*BRA | OP_ALT ... M A
6460 (?()|) OP_*COND | OP_ALT M A
6461 (?>|) OP_ONCE | OP_ALT ... [stack trace] M A
6462 (?>|) OP_ONCE_NC | OP_ALT ... [stack trace] M A
6463 Or nothing, if trace is unnecessary
6464 */
6465
6466 static pcre_uchar *compile_bracket_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6467 {
6468 DEFINE_COMPILER;
6469 backtrack_common *backtrack;
6470 pcre_uchar opcode;
6471 int private_data_ptr = 0;
6472 int offset = 0;
6473 int i, stacksize;
6474 int repeat_ptr = 0, repeat_length = 0;
6475 int repeat_type = 0, repeat_count = 0;
6476 pcre_uchar *ccbegin;
6477 pcre_uchar *matchingpath;
6478 pcre_uchar *slot;
6479 pcre_uchar bra = OP_BRA;
6480 pcre_uchar ket;
6481 assert_backtrack *assert;
6482 BOOL has_alternatives;
6483 BOOL needs_control_head = FALSE;
6484 struct sljit_jump *jump;
6485 struct sljit_jump *skip;
6486 struct sljit_label *rmax_label = NULL;
6487 struct sljit_jump *braminzero = NULL;
6488
6489 PUSH_BACKTRACK(sizeof(bracket_backtrack), cc, NULL);
6490
6491 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
6492 {
6493 bra = *cc;
6494 cc++;
6495 opcode = *cc;
6496 }
6497
6498 opcode = *cc;
6499 ccbegin = cc;
6500 matchingpath = bracketend(cc) - 1 - LINK_SIZE;
6501 ket = *matchingpath;
6502 if (ket == OP_KET && PRIVATE_DATA(matchingpath) != 0)
6503 {
6504 repeat_ptr = PRIVATE_DATA(matchingpath);
6505 repeat_length = PRIVATE_DATA(matchingpath + 1);
6506 repeat_type = PRIVATE_DATA(matchingpath + 2);
6507 repeat_count = PRIVATE_DATA(matchingpath + 3);
6508 SLJIT_ASSERT(repeat_length != 0 && repeat_type != 0 && repeat_count != 0);
6509 if (repeat_type == OP_UPTO)
6510 ket = OP_KETRMAX;
6511 if (repeat_type == OP_MINUPTO)
6512 ket = OP_KETRMIN;
6513 }
6514
6515 if ((opcode == OP_COND || opcode == OP_SCOND) && cc[1 + LINK_SIZE] == OP_DEF)
6516 {
6517 /* Drop this bracket_backtrack. */
6518 parent->top = backtrack->prev;
6519 return matchingpath + 1 + LINK_SIZE + repeat_length;
6520 }
6521
6522 matchingpath = ccbegin + 1 + LINK_SIZE;
6523 SLJIT_ASSERT(ket == OP_KET || ket == OP_KETRMAX || ket == OP_KETRMIN);
6524 SLJIT_ASSERT(!((bra == OP_BRAZERO && ket == OP_KETRMIN) || (bra == OP_BRAMINZERO && ket == OP_KETRMAX)));
6525 cc += GET(cc, 1);
6526
6527 has_alternatives = *cc == OP_ALT;
6528 if (SLJIT_UNLIKELY(opcode == OP_COND || opcode == OP_SCOND))
6529 has_alternatives = (*matchingpath == OP_RREF || *matchingpath == OP_DNRREF) ? FALSE : TRUE;
6530
6531 if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
6532 opcode = OP_SCOND;
6533 if (SLJIT_UNLIKELY(opcode == OP_ONCE_NC))
6534 opcode = OP_ONCE;
6535
6536 if (opcode == OP_CBRA || opcode == OP_SCBRA)
6537 {
6538 /* Capturing brackets has a pre-allocated space. */
6539 offset = GET2(ccbegin, 1 + LINK_SIZE);
6540 if (common->optimized_cbracket[offset] == 0)
6541 {
6542 private_data_ptr = OVECTOR_PRIV(offset);
6543 offset <<= 1;
6544 }
6545 else
6546 {
6547 offset <<= 1;
6548 private_data_ptr = OVECTOR(offset);
6549 }
6550 BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
6551 matchingpath += IMM2_SIZE;
6552 }
6553 else if (opcode == OP_ONCE || opcode == OP_SBRA || opcode == OP_SCOND)
6554 {
6555 /* Other brackets simply allocate the next entry. */
6556 private_data_ptr = PRIVATE_DATA(ccbegin);
6557 SLJIT_ASSERT(private_data_ptr != 0);
6558 BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
6559 if (opcode == OP_ONCE)
6560 BACKTRACK_AS(bracket_backtrack)->u.framesize = get_framesize(common, ccbegin, NULL, FALSE, &needs_control_head);
6561 }
6562
6563 /* Instructions before the first alternative. */
6564 stacksize = 0;
6565 if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
6566 stacksize++;
6567 if (bra == OP_BRAZERO)
6568 stacksize++;
6569
6570 if (stacksize > 0)
6571 allocate_stack(common, stacksize);
6572
6573 stacksize = 0;
6574 if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
6575 {
6576 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
6577 stacksize++;
6578 }
6579
6580 if (bra == OP_BRAZERO)
6581 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
6582
6583 if (bra == OP_BRAMINZERO)
6584 {
6585 /* This is a backtrack path! (Since the try-path of OP_BRAMINZERO matches to the empty string) */
6586 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6587 if (ket != OP_KETRMIN)
6588 {
6589 free_stack(common, 1);
6590 braminzero = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
6591 }
6592 else
6593 {
6594 if (opcode == OP_ONCE || opcode >= OP_SBRA)
6595 {
6596 jump = CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
6597 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
6598 /* Nothing stored during the first run. */
6599 skip = JUMP(SLJIT_JUMP);
6600 JUMPHERE(jump);
6601 /* Checking zero-length iteration. */
6602 if (opcode != OP_ONCE || BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
6603 {
6604 /* When we come from outside, private_data_ptr contains the previous STR_PTR. */
6605 braminzero = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6606 }
6607 else
6608 {
6609 /* Except when the whole stack frame must be saved. */
6610 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6611 braminzero = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), (BACKTRACK_AS(bracket_backtrack)->u.framesize + 1) * sizeof(sljit_sw));
6612 }
6613 JUMPHERE(skip);
6614 }
6615 else
6616 {
6617 jump = CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
6618 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
6619 JUMPHERE(jump);
6620 }
6621 }
6622 }
6623
6624 if (repeat_type != 0)
6625 {
6626 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr, SLJIT_IMM, repeat_count);
6627 if (repeat_type == OP_EXACT)
6628 rmax_label = LABEL();
6629 }
6630
6631 if (ket == OP_KETRMIN)
6632 BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
6633
6634 if (ket == OP_KETRMAX)
6635 {
6636 rmax_label = LABEL();
6637 if (has_alternatives && opcode != OP_ONCE && opcode < OP_SBRA && repeat_type == 0)
6638 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = rmax_label;
6639 }
6640
6641 /* Handling capturing brackets and alternatives. */
6642 if (opcode == OP_ONCE)
6643 {
6644 stacksize = 0;
6645 if (needs_control_head)
6646 {
6647 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
6648 stacksize++;
6649 }
6650
6651 if (BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
6652 {
6653 /* Neither capturing brackets nor recursions are found in the block. */
6654 if (ket == OP_KETRMIN)
6655 {
6656 stacksize += 2;
6657 if (!needs_control_head)
6658 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6659 }
6660 else
6661 {
6662 if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame)
6663 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);
6664 if (ket == OP_KETRMAX || has_alternatives)
6665 stacksize++;
6666 }
6667
6668 if (stacksize > 0)
6669 allocate_stack(common, stacksize);
6670
6671 stacksize = 0;
6672 if (needs_control_head)
6673 {
6674 stacksize++;
6675 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
6676 }
6677
6678 if (ket == OP_KETRMIN)
6679 {
6680 if (needs_control_head)
6681 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6682 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
6683 if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame)
6684 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, needs_control_head ? (2 * sizeof(sljit_sw)) : sizeof(sljit_sw));
6685 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
6686 }
6687 else if (ket == OP_KETRMAX || has_alternatives)
6688 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
6689 }
6690 else
6691 {
6692 if (ket != OP_KET || has_alternatives)
6693 stacksize++;
6694
6695 stacksize += BACKTRACK_AS(bracket_backtrack)->u.framesize + 1;
6696 allocate_stack(common, stacksize);
6697
6698 if (needs_control_head)
6699 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
6700
6701 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6702 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
6703
6704 stacksize = needs_control_head ? 1 : 0;
6705 if (ket != OP_KET || has_alternatives)
6706 {
6707 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
6708 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0);
6709 stacksize++;
6710 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
6711 }
6712 else
6713 {
6714 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0);
6715 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
6716 }
6717 init_frame(common, ccbegin, NULL, BACKTRACK_AS(bracket_backtrack)->u.framesize + stacksize, stacksize + 1, FALSE);
6718 }
6719 }
6720 else if (opcode == OP_CBRA || opcode == OP_SCBRA)
6721 {
6722 /* Saving the previous values. */
6723 if (common->optimized_cbracket[offset >> 1] != 0)
6724 {
6725 SLJIT_ASSERT(private_data_ptr == OVECTOR(offset));
6726 allocate_stack(common, 2);
6727 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6728 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr + sizeof(sljit_sw));
6729 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0);
6730 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
6731 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
6732 }
6733 else
6734 {
6735 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6736 allocate_stack(common, 1);
6737 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0);
6738 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
6739 }
6740 }
6741 else if (opcode == OP_SBRA || opcode == OP_SCOND)
6742 {
6743 /* Saving the previous value. */
6744 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6745 allocate_stack(common, 1);
6746 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0);
6747