/[pcre]/code/trunk/pcre_jit_compile.c
ViewVC logotype

Contents of /code/trunk/pcre_jit_compile.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1052 - (show annotations)
Wed Oct 3 11:36:18 2012 UTC (7 years ago) by zherczeg
File MIME type: text/plain
File size: 258497 byte(s)
Optimizing clists in JIT.
1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Written by Philip Hazel
9 Copyright (c) 1997-2012 University of Cambridge
10
11 The machine code generator part (this module) was written by Zoltan Herczeg
12 Copyright (c) 2010-2012
13
14 -----------------------------------------------------------------------------
15 Redistribution and use in source and binary forms, with or without
16 modification, are permitted provided that the following conditions are met:
17
18 * Redistributions of source code must retain the above copyright notice,
19 this list of conditions and the following disclaimer.
20
21 * Redistributions in binary form must reproduce the above copyright
22 notice, this list of conditions and the following disclaimer in the
23 documentation and/or other materials provided with the distribution.
24
25 * Neither the name of the University of Cambridge nor the names of its
26 contributors may be used to endorse or promote products derived from
27 this software without specific prior written permission.
28
29 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 POSSIBILITY OF SUCH DAMAGE.
40 -----------------------------------------------------------------------------
41 */
42
43 #ifdef HAVE_CONFIG_H
44 #include "config.h"
45 #endif
46
47 #include "pcre_internal.h"
48
49 #ifdef SUPPORT_JIT
50
51 /* All-in-one: Since we use the JIT compiler only from here,
52 we just include it. This way we don't need to touch the build
53 system files. */
54
55 #define SLJIT_MALLOC(size) (PUBL(malloc))(size)
56 #define SLJIT_FREE(ptr) (PUBL(free))(ptr)
57 #define SLJIT_CONFIG_AUTO 1
58 #define SLJIT_CONFIG_STATIC 1
59 #define SLJIT_VERBOSE 0
60 #define SLJIT_DEBUG 0
61
62 #include "sljit/sljitLir.c"
63
64 #if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED
65 #error Unsupported architecture
66 #endif
67
68 /* Allocate memory for the regex stack on the real machine stack.
69 Fast, but limited size. */
70 #define MACHINE_STACK_SIZE 32768
71
72 /* Growth rate for stack allocated by the OS. Should be the multiply
73 of page size. */
74 #define STACK_GROWTH_RATE 8192
75
76 /* Enable to check that the allocation could destroy temporaries. */
77 #if defined SLJIT_DEBUG && SLJIT_DEBUG
78 #define DESTROY_REGISTERS 1
79 #endif
80
81 /*
82 Short summary about the backtracking mechanism empolyed by the jit code generator:
83
84 The code generator follows the recursive nature of the PERL compatible regular
85 expressions. The basic blocks of regular expressions are condition checkers
86 whose execute different commands depending on the result of the condition check.
87 The relationship between the operators can be horizontal (concatenation) and
88 vertical (sub-expression) (See struct backtrack_common for more details).
89
90 'ab' - 'a' and 'b' regexps are concatenated
91 'a+' - 'a' is the sub-expression of the '+' operator
92
93 The condition checkers are boolean (true/false) checkers. Machine code is generated
94 for the checker itself and for the actions depending on the result of the checker.
95 The 'true' case is called as the matching path (expected path), and the other is called as
96 the 'backtrack' path. Branch instructions are expesive for all CPUs, so we avoid taken
97 branches on the matching path.
98
99 Greedy star operator (*) :
100 Matching path: match happens.
101 Backtrack path: match failed.
102 Non-greedy star operator (*?) :
103 Matching path: no need to perform a match.
104 Backtrack path: match is required.
105
106 The following example shows how the code generated for a capturing bracket
107 with two alternatives. Let A, B, C, D are arbirary regular expressions, and
108 we have the following regular expression:
109
110 A(B|C)D
111
112 The generated code will be the following:
113
114 A matching path
115 '(' matching path (pushing arguments to the stack)
116 B matching path
117 ')' matching path (pushing arguments to the stack)
118 D matching path
119 return with successful match
120
121 D backtrack path
122 ')' backtrack path (If we arrived from "C" jump to the backtrack of "C")
123 B backtrack path
124 C expected path
125 jump to D matching path
126 C backtrack path
127 A backtrack path
128
129 Notice, that the order of backtrack code paths are the opposite of the fast
130 code paths. In this way the topmost value on the stack is always belong
131 to the current backtrack code path. The backtrack path must check
132 whether there is a next alternative. If so, it needs to jump back to
133 the matching path eventually. Otherwise it needs to clear out its own stack
134 frame and continue the execution on the backtrack code paths.
135 */
136
137 /*
138 Saved stack frames:
139
140 Atomic blocks and asserts require reloading the values of private data
141 when the backtrack mechanism performed. Because of OP_RECURSE, the data
142 are not necessarly known in compile time, thus we need a dynamic restore
143 mechanism.
144
145 The stack frames are stored in a chain list, and have the following format:
146 ([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]
147
148 Thus we can restore the private data to a particular point in the stack.
149 */
150
151 typedef struct jit_arguments {
152 /* Pointers first. */
153 struct sljit_stack *stack;
154 const pcre_uchar *str;
155 const pcre_uchar *begin;
156 const pcre_uchar *end;
157 int *offsets;
158 pcre_uchar *uchar_ptr;
159 pcre_uchar *mark_ptr;
160 /* Everything else after. */
161 int offsetcount;
162 int calllimit;
163 pcre_uint8 notbol;
164 pcre_uint8 noteol;
165 pcre_uint8 notempty;
166 pcre_uint8 notempty_atstart;
167 } jit_arguments;
168
169 typedef struct executable_functions {
170 void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES];
171 PUBL(jit_callback) callback;
172 void *userdata;
173 sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];
174 } executable_functions;
175
176 typedef struct jump_list {
177 struct sljit_jump *jump;
178 struct jump_list *next;
179 } jump_list;
180
181 enum stub_types { stack_alloc };
182
183 typedef struct stub_list {
184 enum stub_types type;
185 int data;
186 struct sljit_jump *start;
187 struct sljit_label *quit;
188 struct stub_list *next;
189 } stub_list;
190
191 typedef int (SLJIT_CALL *jit_function)(jit_arguments *args);
192
193 /* The following structure is the key data type for the recursive
194 code generator. It is allocated by compile_matchingpath, and contains
195 the aguments for compile_backtrackingpath. Must be the first member
196 of its descendants. */
197 typedef struct backtrack_common {
198 /* Concatenation stack. */
199 struct backtrack_common *prev;
200 jump_list *nextbacktracks;
201 /* Internal stack (for component operators). */
202 struct backtrack_common *top;
203 jump_list *topbacktracks;
204 /* Opcode pointer. */
205 pcre_uchar *cc;
206 } backtrack_common;
207
208 typedef struct assert_backtrack {
209 backtrack_common common;
210 jump_list *condfailed;
211 /* Less than 0 (-1) if a frame is not needed. */
212 int framesize;
213 /* Points to our private memory word on the stack. */
214 int private_data_ptr;
215 /* For iterators. */
216 struct sljit_label *matchingpath;
217 } assert_backtrack;
218
219 typedef struct bracket_backtrack {
220 backtrack_common common;
221 /* Where to coninue if an alternative is successfully matched. */
222 struct sljit_label *alternative_matchingpath;
223 /* For rmin and rmax iterators. */
224 struct sljit_label *recursive_matchingpath;
225 /* For greedy ? operator. */
226 struct sljit_label *zero_matchingpath;
227 /* Contains the branches of a failed condition. */
228 union {
229 /* Both for OP_COND, OP_SCOND. */
230 jump_list *condfailed;
231 assert_backtrack *assert;
232 /* For OP_ONCE. -1 if not needed. */
233 int framesize;
234 } u;
235 /* Points to our private memory word on the stack. */
236 int private_data_ptr;
237 } bracket_backtrack;
238
239 typedef struct bracketpos_backtrack {
240 backtrack_common common;
241 /* Points to our private memory word on the stack. */
242 int private_data_ptr;
243 /* Reverting stack is needed. */
244 int framesize;
245 /* Allocated stack size. */
246 int stacksize;
247 } bracketpos_backtrack;
248
249 typedef struct braminzero_backtrack {
250 backtrack_common common;
251 struct sljit_label *matchingpath;
252 } braminzero_backtrack;
253
254 typedef struct iterator_backtrack {
255 backtrack_common common;
256 /* Next iteration. */
257 struct sljit_label *matchingpath;
258 } iterator_backtrack;
259
260 typedef struct recurse_entry {
261 struct recurse_entry *next;
262 /* Contains the function entry. */
263 struct sljit_label *entry;
264 /* Collects the calls until the function is not created. */
265 jump_list *calls;
266 /* Points to the starting opcode. */
267 int start;
268 } recurse_entry;
269
270 typedef struct recurse_backtrack {
271 backtrack_common common;
272 } recurse_backtrack;
273
274 #define MAX_RANGE_SIZE 6
275
276 typedef struct compiler_common {
277 struct sljit_compiler *compiler;
278 pcre_uchar *start;
279
280 /* Maps private data offset to each opcode. */
281 int *private_data_ptrs;
282 /* Tells whether the capturing bracket is optimized. */
283 pcre_uint8 *optimized_cbracket;
284 /* Starting offset of private data for capturing brackets. */
285 int cbraptr;
286 /* OVector starting point. Must be divisible by 2. */
287 int ovector_start;
288 /* Last known position of the requested byte. */
289 int req_char_ptr;
290 /* Head of the last recursion. */
291 int recursive_head;
292 /* First inspected character for partial matching. */
293 int start_used_ptr;
294 /* Starting pointer for partial soft matches. */
295 int hit_start;
296 /* End pointer of the first line. */
297 int first_line_end;
298 /* Points to the marked string. */
299 int mark_ptr;
300
301 /* Flipped and lower case tables. */
302 const pcre_uint8 *fcc;
303 sljit_w lcc;
304 /* Mode can be PCRE_STUDY_JIT_COMPILE and others. */
305 int mode;
306 /* Newline control. */
307 int nltype;
308 int newline;
309 int bsr_nltype;
310 /* Dollar endonly. */
311 int endonly;
312 BOOL has_set_som;
313 /* Tables. */
314 sljit_w ctypes;
315 int digits[2 + MAX_RANGE_SIZE];
316 /* Named capturing brackets. */
317 sljit_uw name_table;
318 sljit_w name_count;
319 sljit_w name_entry_size;
320
321 /* Labels and jump lists. */
322 struct sljit_label *partialmatchlabel;
323 struct sljit_label *quitlabel;
324 struct sljit_label *acceptlabel;
325 stub_list *stubs;
326 recurse_entry *entries;
327 recurse_entry *currententry;
328 jump_list *partialmatch;
329 jump_list *quit;
330 jump_list *accept;
331 jump_list *calllimit;
332 jump_list *stackalloc;
333 jump_list *revertframes;
334 jump_list *wordboundary;
335 jump_list *anynewline;
336 jump_list *hspace;
337 jump_list *vspace;
338 jump_list *casefulcmp;
339 jump_list *caselesscmp;
340 BOOL jscript_compat;
341 #ifdef SUPPORT_UTF
342 BOOL utf;
343 #ifdef SUPPORT_UCP
344 BOOL use_ucp;
345 #endif
346 jump_list *utfreadchar;
347 #ifdef COMPILE_PCRE8
348 jump_list *utfreadtype8;
349 #endif
350 #endif /* SUPPORT_UTF */
351 #ifdef SUPPORT_UCP
352 jump_list *getucd;
353 #endif
354 } compiler_common;
355
356 /* For byte_sequence_compare. */
357
358 typedef struct compare_context {
359 int length;
360 int sourcereg;
361 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
362 int ucharptr;
363 union {
364 sljit_i asint;
365 sljit_uh asushort;
366 #ifdef COMPILE_PCRE8
367 sljit_ub asbyte;
368 sljit_ub asuchars[4];
369 #else
370 #ifdef COMPILE_PCRE16
371 sljit_uh asuchars[2];
372 #endif
373 #endif
374 } c;
375 union {
376 sljit_i asint;
377 sljit_uh asushort;
378 #ifdef COMPILE_PCRE8
379 sljit_ub asbyte;
380 sljit_ub asuchars[4];
381 #else
382 #ifdef COMPILE_PCRE16
383 sljit_uh asuchars[2];
384 #endif
385 #endif
386 } oc;
387 #endif
388 } compare_context;
389
390 enum {
391 frame_end = 0,
392 frame_setstrbegin = -1,
393 frame_setmark = -2
394 };
395
396 /* Undefine sljit macros. */
397 #undef CMP
398
399 /* Used for accessing the elements of the stack. */
400 #define STACK(i) ((-(i) - 1) * (int)sizeof(sljit_w))
401
402 #define TMP1 SLJIT_TEMPORARY_REG1
403 #define TMP2 SLJIT_TEMPORARY_REG3
404 #define TMP3 SLJIT_TEMPORARY_EREG2
405 #define STR_PTR SLJIT_SAVED_REG1
406 #define STR_END SLJIT_SAVED_REG2
407 #define STACK_TOP SLJIT_TEMPORARY_REG2
408 #define STACK_LIMIT SLJIT_SAVED_REG3
409 #define ARGUMENTS SLJIT_SAVED_EREG1
410 #define CALL_COUNT SLJIT_SAVED_EREG2
411 #define RETURN_ADDR SLJIT_TEMPORARY_EREG1
412
413 /* Local space layout. */
414 /* These two locals can be used by the current opcode. */
415 #define LOCALS0 (0 * sizeof(sljit_w))
416 #define LOCALS1 (1 * sizeof(sljit_w))
417 /* Two local variables for possessive quantifiers (char1 cannot use them). */
418 #define POSSESSIVE0 (2 * sizeof(sljit_w))
419 #define POSSESSIVE1 (3 * sizeof(sljit_w))
420 /* Max limit of recursions. */
421 #define CALL_LIMIT (4 * sizeof(sljit_w))
422 /* The output vector is stored on the stack, and contains pointers
423 to characters. The vector data is divided into two groups: the first
424 group contains the start / end character pointers, and the second is
425 the start pointers when the end of the capturing group has not yet reached. */
426 #define OVECTOR_START (common->ovector_start)
427 #define OVECTOR(i) (OVECTOR_START + (i) * sizeof(sljit_w))
428 #define OVECTOR_PRIV(i) (common->cbraptr + (i) * sizeof(sljit_w))
429 #define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start])
430
431 #ifdef COMPILE_PCRE8
432 #define MOV_UCHAR SLJIT_MOV_UB
433 #define MOVU_UCHAR SLJIT_MOVU_UB
434 #else
435 #ifdef COMPILE_PCRE16
436 #define MOV_UCHAR SLJIT_MOV_UH
437 #define MOVU_UCHAR SLJIT_MOVU_UH
438 #else
439 #error Unsupported compiling mode
440 #endif
441 #endif
442
443 /* Shortcuts. */
444 #define DEFINE_COMPILER \
445 struct sljit_compiler *compiler = common->compiler
446 #define OP1(op, dst, dstw, src, srcw) \
447 sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw))
448 #define OP2(op, dst, dstw, src1, src1w, src2, src2w) \
449 sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w))
450 #define LABEL() \
451 sljit_emit_label(compiler)
452 #define JUMP(type) \
453 sljit_emit_jump(compiler, (type))
454 #define JUMPTO(type, label) \
455 sljit_set_label(sljit_emit_jump(compiler, (type)), (label))
456 #define JUMPHERE(jump) \
457 sljit_set_label((jump), sljit_emit_label(compiler))
458 #define CMP(type, src1, src1w, src2, src2w) \
459 sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))
460 #define CMPTO(type, src1, src1w, src2, src2w, label) \
461 sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))
462 #define COND_VALUE(op, dst, dstw, type) \
463 sljit_emit_cond_value(compiler, (op), (dst), (dstw), (type))
464 #define GET_LOCAL_BASE(dst, dstw, offset) \
465 sljit_get_local_base(compiler, (dst), (dstw), (offset))
466
467 static pcre_uchar* bracketend(pcre_uchar* cc)
468 {
469 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
470 do cc += GET(cc, 1); while (*cc == OP_ALT);
471 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
472 cc += 1 + LINK_SIZE;
473 return cc;
474 }
475
476 /* Functions whose might need modification for all new supported opcodes:
477 next_opcode
478 get_private_data_length
479 set_private_data_ptrs
480 get_framesize
481 init_frame
482 get_private_data_length_for_copy
483 copy_private_data
484 compile_matchingpath
485 compile_backtrackingpath
486 */
487
488 static pcre_uchar *next_opcode(compiler_common *common, pcre_uchar *cc)
489 {
490 SLJIT_UNUSED_ARG(common);
491 switch(*cc)
492 {
493 case OP_SOD:
494 case OP_SOM:
495 case OP_SET_SOM:
496 case OP_NOT_WORD_BOUNDARY:
497 case OP_WORD_BOUNDARY:
498 case OP_NOT_DIGIT:
499 case OP_DIGIT:
500 case OP_NOT_WHITESPACE:
501 case OP_WHITESPACE:
502 case OP_NOT_WORDCHAR:
503 case OP_WORDCHAR:
504 case OP_ANY:
505 case OP_ALLANY:
506 case OP_ANYNL:
507 case OP_NOT_HSPACE:
508 case OP_HSPACE:
509 case OP_NOT_VSPACE:
510 case OP_VSPACE:
511 case OP_EXTUNI:
512 case OP_EODN:
513 case OP_EOD:
514 case OP_CIRC:
515 case OP_CIRCM:
516 case OP_DOLL:
517 case OP_DOLLM:
518 case OP_TYPESTAR:
519 case OP_TYPEMINSTAR:
520 case OP_TYPEPLUS:
521 case OP_TYPEMINPLUS:
522 case OP_TYPEQUERY:
523 case OP_TYPEMINQUERY:
524 case OP_TYPEPOSSTAR:
525 case OP_TYPEPOSPLUS:
526 case OP_TYPEPOSQUERY:
527 case OP_CRSTAR:
528 case OP_CRMINSTAR:
529 case OP_CRPLUS:
530 case OP_CRMINPLUS:
531 case OP_CRQUERY:
532 case OP_CRMINQUERY:
533 case OP_DEF:
534 case OP_BRAZERO:
535 case OP_BRAMINZERO:
536 case OP_BRAPOSZERO:
537 case OP_COMMIT:
538 case OP_FAIL:
539 case OP_ACCEPT:
540 case OP_ASSERT_ACCEPT:
541 case OP_SKIPZERO:
542 return cc + 1;
543
544 case OP_ANYBYTE:
545 #ifdef SUPPORT_UTF
546 if (common->utf) return NULL;
547 #endif
548 return cc + 1;
549
550 case OP_CHAR:
551 case OP_CHARI:
552 case OP_NOT:
553 case OP_NOTI:
554 case OP_STAR:
555 case OP_MINSTAR:
556 case OP_PLUS:
557 case OP_MINPLUS:
558 case OP_QUERY:
559 case OP_MINQUERY:
560 case OP_POSSTAR:
561 case OP_POSPLUS:
562 case OP_POSQUERY:
563 case OP_STARI:
564 case OP_MINSTARI:
565 case OP_PLUSI:
566 case OP_MINPLUSI:
567 case OP_QUERYI:
568 case OP_MINQUERYI:
569 case OP_POSSTARI:
570 case OP_POSPLUSI:
571 case OP_POSQUERYI:
572 case OP_NOTSTAR:
573 case OP_NOTMINSTAR:
574 case OP_NOTPLUS:
575 case OP_NOTMINPLUS:
576 case OP_NOTQUERY:
577 case OP_NOTMINQUERY:
578 case OP_NOTPOSSTAR:
579 case OP_NOTPOSPLUS:
580 case OP_NOTPOSQUERY:
581 case OP_NOTSTARI:
582 case OP_NOTMINSTARI:
583 case OP_NOTPLUSI:
584 case OP_NOTMINPLUSI:
585 case OP_NOTQUERYI:
586 case OP_NOTMINQUERYI:
587 case OP_NOTPOSSTARI:
588 case OP_NOTPOSPLUSI:
589 case OP_NOTPOSQUERYI:
590 cc += 2;
591 #ifdef SUPPORT_UTF
592 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
593 #endif
594 return cc;
595
596 case OP_UPTO:
597 case OP_MINUPTO:
598 case OP_EXACT:
599 case OP_POSUPTO:
600 case OP_UPTOI:
601 case OP_MINUPTOI:
602 case OP_EXACTI:
603 case OP_POSUPTOI:
604 case OP_NOTUPTO:
605 case OP_NOTMINUPTO:
606 case OP_NOTEXACT:
607 case OP_NOTPOSUPTO:
608 case OP_NOTUPTOI:
609 case OP_NOTMINUPTOI:
610 case OP_NOTEXACTI:
611 case OP_NOTPOSUPTOI:
612 cc += 2 + IMM2_SIZE;
613 #ifdef SUPPORT_UTF
614 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
615 #endif
616 return cc;
617
618 case OP_NOTPROP:
619 case OP_PROP:
620 return cc + 1 + 2;
621
622 case OP_TYPEUPTO:
623 case OP_TYPEMINUPTO:
624 case OP_TYPEEXACT:
625 case OP_TYPEPOSUPTO:
626 case OP_REF:
627 case OP_REFI:
628 case OP_CREF:
629 case OP_NCREF:
630 case OP_RREF:
631 case OP_NRREF:
632 case OP_CLOSE:
633 cc += 1 + IMM2_SIZE;
634 return cc;
635
636 case OP_CRRANGE:
637 case OP_CRMINRANGE:
638 return cc + 1 + 2 * IMM2_SIZE;
639
640 case OP_CLASS:
641 case OP_NCLASS:
642 return cc + 1 + 32 / sizeof(pcre_uchar);
643
644 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
645 case OP_XCLASS:
646 return cc + GET(cc, 1);
647 #endif
648
649 case OP_RECURSE:
650 case OP_ASSERT:
651 case OP_ASSERT_NOT:
652 case OP_ASSERTBACK:
653 case OP_ASSERTBACK_NOT:
654 case OP_REVERSE:
655 case OP_ONCE:
656 case OP_ONCE_NC:
657 case OP_BRA:
658 case OP_BRAPOS:
659 case OP_COND:
660 case OP_SBRA:
661 case OP_SBRAPOS:
662 case OP_SCOND:
663 case OP_ALT:
664 case OP_KET:
665 case OP_KETRMAX:
666 case OP_KETRMIN:
667 case OP_KETRPOS:
668 return cc + 1 + LINK_SIZE;
669
670 case OP_CBRA:
671 case OP_CBRAPOS:
672 case OP_SCBRA:
673 case OP_SCBRAPOS:
674 return cc + 1 + LINK_SIZE + IMM2_SIZE;
675
676 case OP_MARK:
677 return cc + 1 + 2 + cc[1];
678
679 default:
680 return NULL;
681 }
682 }
683
684 #define CASE_ITERATOR_PRIVATE_DATA_1 \
685 case OP_MINSTAR: \
686 case OP_MINPLUS: \
687 case OP_QUERY: \
688 case OP_MINQUERY: \
689 case OP_MINSTARI: \
690 case OP_MINPLUSI: \
691 case OP_QUERYI: \
692 case OP_MINQUERYI: \
693 case OP_NOTMINSTAR: \
694 case OP_NOTMINPLUS: \
695 case OP_NOTQUERY: \
696 case OP_NOTMINQUERY: \
697 case OP_NOTMINSTARI: \
698 case OP_NOTMINPLUSI: \
699 case OP_NOTQUERYI: \
700 case OP_NOTMINQUERYI:
701
702 #define CASE_ITERATOR_PRIVATE_DATA_2A \
703 case OP_STAR: \
704 case OP_PLUS: \
705 case OP_STARI: \
706 case OP_PLUSI: \
707 case OP_NOTSTAR: \
708 case OP_NOTPLUS: \
709 case OP_NOTSTARI: \
710 case OP_NOTPLUSI:
711
712 #define CASE_ITERATOR_PRIVATE_DATA_2B \
713 case OP_UPTO: \
714 case OP_MINUPTO: \
715 case OP_UPTOI: \
716 case OP_MINUPTOI: \
717 case OP_NOTUPTO: \
718 case OP_NOTMINUPTO: \
719 case OP_NOTUPTOI: \
720 case OP_NOTMINUPTOI:
721
722 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_1 \
723 case OP_TYPEMINSTAR: \
724 case OP_TYPEMINPLUS: \
725 case OP_TYPEQUERY: \
726 case OP_TYPEMINQUERY:
727
728 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2A \
729 case OP_TYPESTAR: \
730 case OP_TYPEPLUS:
731
732 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2B \
733 case OP_TYPEUPTO: \
734 case OP_TYPEMINUPTO:
735
736 static int get_class_iterator_size(pcre_uchar *cc)
737 {
738 switch(*cc)
739 {
740 case OP_CRSTAR:
741 case OP_CRPLUS:
742 return 2;
743
744 case OP_CRMINSTAR:
745 case OP_CRMINPLUS:
746 case OP_CRQUERY:
747 case OP_CRMINQUERY:
748 return 1;
749
750 case OP_CRRANGE:
751 case OP_CRMINRANGE:
752 if (GET2(cc, 1) == GET2(cc, 1 + IMM2_SIZE))
753 return 0;
754 return 2;
755
756 default:
757 return 0;
758 }
759 }
760
761 static int get_private_data_length(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
762 {
763 int private_data_length = 0;
764 pcre_uchar *alternative;
765 pcre_uchar *name;
766 pcre_uchar *end = NULL;
767 int space, size, bracketlen, i;
768
769 /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
770 while (cc < ccend)
771 {
772 space = 0;
773 size = 0;
774 bracketlen = 0;
775 switch(*cc)
776 {
777 case OP_SET_SOM:
778 common->has_set_som = TRUE;
779 cc += 1;
780 break;
781
782 case OP_REF:
783 case OP_REFI:
784 common->optimized_cbracket[GET2(cc, 1)] = 0;
785 cc += 1 + IMM2_SIZE;
786 break;
787
788 case OP_ASSERT:
789 case OP_ASSERT_NOT:
790 case OP_ASSERTBACK:
791 case OP_ASSERTBACK_NOT:
792 case OP_ONCE:
793 case OP_ONCE_NC:
794 case OP_BRAPOS:
795 case OP_SBRA:
796 case OP_SBRAPOS:
797 private_data_length += sizeof(sljit_w);
798 bracketlen = 1 + LINK_SIZE;
799 break;
800
801 case OP_CBRAPOS:
802 case OP_SCBRAPOS:
803 private_data_length += sizeof(sljit_w);
804 common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] = 0;
805 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
806 break;
807
808 case OP_COND:
809 case OP_SCOND:
810 bracketlen = cc[1 + LINK_SIZE];
811 if (bracketlen == OP_CREF)
812 {
813 bracketlen = GET2(cc, 1 + LINK_SIZE + 1);
814 common->optimized_cbracket[bracketlen] = 0;
815 }
816 else if (bracketlen == OP_NCREF)
817 {
818 bracketlen = GET2(cc, 1 + LINK_SIZE + 1);
819 name = (pcre_uchar *)common->name_table;
820 alternative = name;
821 for (i = 0; i < common->name_count; i++)
822 {
823 if (GET2(name, 0) == bracketlen) break;
824 name += common->name_entry_size;
825 }
826 SLJIT_ASSERT(i != common->name_count);
827
828 for (i = 0; i < common->name_count; i++)
829 {
830 if (STRCMP_UC_UC(alternative + IMM2_SIZE, name + IMM2_SIZE) == 0)
831 common->optimized_cbracket[GET2(alternative, 0)] = 0;
832 alternative += common->name_entry_size;
833 }
834 }
835
836 if (*cc == OP_COND)
837 {
838 /* Might be a hidden SCOND. */
839 alternative = cc + GET(cc, 1);
840 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
841 private_data_length += sizeof(sljit_w);
842 }
843 else
844 private_data_length += sizeof(sljit_w);
845 bracketlen = 1 + LINK_SIZE;
846 break;
847
848 case OP_BRA:
849 bracketlen = 1 + LINK_SIZE;
850 break;
851
852 case OP_CBRA:
853 case OP_SCBRA:
854 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
855 break;
856
857 CASE_ITERATOR_PRIVATE_DATA_1
858 space = 1;
859 size = -2;
860 break;
861
862 CASE_ITERATOR_PRIVATE_DATA_2A
863 space = 2;
864 size = -2;
865 break;
866
867 CASE_ITERATOR_PRIVATE_DATA_2B
868 space = 2;
869 size = -(2 + IMM2_SIZE);
870 break;
871
872 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
873 space = 1;
874 size = 1;
875 break;
876
877 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
878 if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
879 space = 2;
880 size = 1;
881 break;
882
883 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
884 if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
885 space = 2;
886 size = 1 + IMM2_SIZE;
887 break;
888
889 case OP_CLASS:
890 case OP_NCLASS:
891 size += 1 + 32 / sizeof(pcre_uchar);
892 space = get_class_iterator_size(cc + size);
893 break;
894
895 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
896 case OP_XCLASS:
897 size = GET(cc, 1);
898 space = get_class_iterator_size(cc + size);
899 break;
900 #endif
901
902 case OP_RECURSE:
903 /* Set its value only once. */
904 if (common->recursive_head == 0)
905 {
906 common->recursive_head = common->ovector_start;
907 common->ovector_start += sizeof(sljit_w);
908 }
909 cc += 1 + LINK_SIZE;
910 break;
911
912 case OP_MARK:
913 if (common->mark_ptr == 0)
914 {
915 common->mark_ptr = common->ovector_start;
916 common->ovector_start += sizeof(sljit_w);
917 }
918 cc += 1 + 2 + cc[1];
919 break;
920
921 default:
922 cc = next_opcode(common, cc);
923 if (cc == NULL)
924 return -1;
925 break;
926 }
927
928 if (space > 0 && cc >= end)
929 private_data_length += sizeof(sljit_w) * space;
930
931 if (size != 0)
932 {
933 if (size < 0)
934 {
935 cc += -size;
936 #ifdef SUPPORT_UTF
937 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
938 #endif
939 }
940 else
941 cc += size;
942 }
943
944 if (bracketlen > 0)
945 {
946 if (cc >= end)
947 {
948 end = bracketend(cc);
949 if (end[-1 - LINK_SIZE] == OP_KET)
950 end = NULL;
951 }
952 cc += bracketlen;
953 }
954 }
955 return private_data_length;
956 }
957
958 static void set_private_data_ptrs(compiler_common *common, int private_data_ptr, pcre_uchar *ccend)
959 {
960 pcre_uchar *cc = common->start;
961 pcre_uchar *alternative;
962 pcre_uchar *end = NULL;
963 int space, size, bracketlen;
964
965 while (cc < ccend)
966 {
967 space = 0;
968 size = 0;
969 bracketlen = 0;
970 switch(*cc)
971 {
972 case OP_ASSERT:
973 case OP_ASSERT_NOT:
974 case OP_ASSERTBACK:
975 case OP_ASSERTBACK_NOT:
976 case OP_ONCE:
977 case OP_ONCE_NC:
978 case OP_BRAPOS:
979 case OP_SBRA:
980 case OP_SBRAPOS:
981 case OP_SCOND:
982 common->private_data_ptrs[cc - common->start] = private_data_ptr;
983 private_data_ptr += sizeof(sljit_w);
984 bracketlen = 1 + LINK_SIZE;
985 break;
986
987 case OP_CBRAPOS:
988 case OP_SCBRAPOS:
989 common->private_data_ptrs[cc - common->start] = private_data_ptr;
990 private_data_ptr += sizeof(sljit_w);
991 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
992 break;
993
994 case OP_COND:
995 /* Might be a hidden SCOND. */
996 alternative = cc + GET(cc, 1);
997 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
998 {
999 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1000 private_data_ptr += sizeof(sljit_w);
1001 }
1002 bracketlen = 1 + LINK_SIZE;
1003 break;
1004
1005 case OP_BRA:
1006 bracketlen = 1 + LINK_SIZE;
1007 break;
1008
1009 case OP_CBRA:
1010 case OP_SCBRA:
1011 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1012 break;
1013
1014 CASE_ITERATOR_PRIVATE_DATA_1
1015 space = 1;
1016 size = -2;
1017 break;
1018
1019 CASE_ITERATOR_PRIVATE_DATA_2A
1020 space = 2;
1021 size = -2;
1022 break;
1023
1024 CASE_ITERATOR_PRIVATE_DATA_2B
1025 space = 2;
1026 size = -(2 + IMM2_SIZE);
1027 break;
1028
1029 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1030 space = 1;
1031 size = 1;
1032 break;
1033
1034 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1035 if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
1036 space = 2;
1037 size = 1;
1038 break;
1039
1040 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1041 if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
1042 space = 2;
1043 size = 1 + IMM2_SIZE;
1044 break;
1045
1046 case OP_CLASS:
1047 case OP_NCLASS:
1048 size += 1 + 32 / sizeof(pcre_uchar);
1049 space = get_class_iterator_size(cc + size);
1050 break;
1051
1052 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1053 case OP_XCLASS:
1054 size = GET(cc, 1);
1055 space = get_class_iterator_size(cc + size);
1056 break;
1057 #endif
1058
1059 default:
1060 cc = next_opcode(common, cc);
1061 SLJIT_ASSERT(cc != NULL);
1062 break;
1063 }
1064
1065 if (space > 0 && cc >= end)
1066 {
1067 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1068 private_data_ptr += sizeof(sljit_w) * space;
1069 }
1070
1071 if (size != 0)
1072 {
1073 if (size < 0)
1074 {
1075 cc += -size;
1076 #ifdef SUPPORT_UTF
1077 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1078 #endif
1079 }
1080 else
1081 cc += size;
1082 }
1083
1084 if (bracketlen > 0)
1085 {
1086 if (cc >= end)
1087 {
1088 end = bracketend(cc);
1089 if (end[-1 - LINK_SIZE] == OP_KET)
1090 end = NULL;
1091 }
1092 cc += bracketlen;
1093 }
1094 }
1095 }
1096
1097 /* Returns with -1 if no need for frame. */
1098 static int get_framesize(compiler_common *common, pcre_uchar *cc, BOOL recursive)
1099 {
1100 pcre_uchar *ccend = bracketend(cc);
1101 int length = 0;
1102 BOOL possessive = FALSE;
1103 BOOL setsom_found = recursive;
1104 BOOL setmark_found = recursive;
1105
1106 if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))
1107 {
1108 length = 3;
1109 possessive = TRUE;
1110 }
1111
1112 cc = next_opcode(common, cc);
1113 SLJIT_ASSERT(cc != NULL);
1114 while (cc < ccend)
1115 switch(*cc)
1116 {
1117 case OP_SET_SOM:
1118 SLJIT_ASSERT(common->has_set_som);
1119 if (!setsom_found)
1120 {
1121 length += 2;
1122 setsom_found = TRUE;
1123 }
1124 cc += 1;
1125 break;
1126
1127 case OP_MARK:
1128 SLJIT_ASSERT(common->mark_ptr != 0);
1129 if (!setmark_found)
1130 {
1131 length += 2;
1132 setmark_found = TRUE;
1133 }
1134 cc += 1 + 2 + cc[1];
1135 break;
1136
1137 case OP_RECURSE:
1138 if (common->has_set_som && !setsom_found)
1139 {
1140 length += 2;
1141 setsom_found = TRUE;
1142 }
1143 if (common->mark_ptr != 0 && !setmark_found)
1144 {
1145 length += 2;
1146 setmark_found = TRUE;
1147 }
1148 cc += 1 + LINK_SIZE;
1149 break;
1150
1151 case OP_CBRA:
1152 case OP_CBRAPOS:
1153 case OP_SCBRA:
1154 case OP_SCBRAPOS:
1155 length += 3;
1156 cc += 1 + LINK_SIZE + IMM2_SIZE;
1157 break;
1158
1159 default:
1160 cc = next_opcode(common, cc);
1161 SLJIT_ASSERT(cc != NULL);
1162 break;
1163 }
1164
1165 /* Possessive quantifiers can use a special case. */
1166 if (SLJIT_UNLIKELY(possessive) && length == 3)
1167 return -1;
1168
1169 if (length > 0)
1170 return length + 1;
1171 return -1;
1172 }
1173
1174 static void init_frame(compiler_common *common, pcre_uchar *cc, int stackpos, int stacktop, BOOL recursive)
1175 {
1176 DEFINE_COMPILER;
1177 pcre_uchar *ccend = bracketend(cc);
1178 BOOL setsom_found = recursive;
1179 BOOL setmark_found = recursive;
1180 int offset;
1181
1182 /* >= 1 + shortest item size (2) */
1183 SLJIT_UNUSED_ARG(stacktop);
1184 SLJIT_ASSERT(stackpos >= stacktop + 2);
1185
1186 stackpos = STACK(stackpos);
1187 if (recursive || (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS))
1188 cc = next_opcode(common, cc);
1189 SLJIT_ASSERT(cc != NULL);
1190 while (cc < ccend)
1191 switch(*cc)
1192 {
1193 case OP_SET_SOM:
1194 SLJIT_ASSERT(common->has_set_som);
1195 if (!setsom_found)
1196 {
1197 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
1198 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_setstrbegin);
1199 stackpos += (int)sizeof(sljit_w);
1200 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1201 stackpos += (int)sizeof(sljit_w);
1202 setsom_found = TRUE;
1203 }
1204 cc += 1;
1205 break;
1206
1207 case OP_MARK:
1208 SLJIT_ASSERT(common->mark_ptr != 0);
1209 if (!setmark_found)
1210 {
1211 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
1212 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_setmark);
1213 stackpos += (int)sizeof(sljit_w);
1214 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1215 stackpos += (int)sizeof(sljit_w);
1216 setmark_found = TRUE;
1217 }
1218 cc += 1 + 2 + cc[1];
1219 break;
1220
1221 case OP_RECURSE:
1222 if (common->has_set_som && !setsom_found)
1223 {
1224 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
1225 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_setstrbegin);
1226 stackpos += (int)sizeof(sljit_w);
1227 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1228 stackpos += (int)sizeof(sljit_w);
1229 setsom_found = TRUE;
1230 }
1231 if (common->mark_ptr != 0 && !setmark_found)
1232 {
1233 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
1234 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_setmark);
1235 stackpos += (int)sizeof(sljit_w);
1236 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1237 stackpos += (int)sizeof(sljit_w);
1238 setmark_found = TRUE;
1239 }
1240 cc += 1 + LINK_SIZE;
1241 break;
1242
1243 case OP_CBRA:
1244 case OP_CBRAPOS:
1245 case OP_SCBRA:
1246 case OP_SCBRAPOS:
1247 offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
1248 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));
1249 stackpos += (int)sizeof(sljit_w);
1250 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
1251 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
1252 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1253 stackpos += (int)sizeof(sljit_w);
1254 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
1255 stackpos += (int)sizeof(sljit_w);
1256
1257 cc += 1 + LINK_SIZE + IMM2_SIZE;
1258 break;
1259
1260 default:
1261 cc = next_opcode(common, cc);
1262 SLJIT_ASSERT(cc != NULL);
1263 break;
1264 }
1265
1266 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_end);
1267 SLJIT_ASSERT(stackpos == STACK(stacktop));
1268 }
1269
1270 static SLJIT_INLINE int get_private_data_length_for_copy(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
1271 {
1272 int private_data_length = 2;
1273 int size;
1274 pcre_uchar *alternative;
1275 /* Calculate the sum of the private machine words. */
1276 while (cc < ccend)
1277 {
1278 size = 0;
1279 switch(*cc)
1280 {
1281 case OP_ASSERT:
1282 case OP_ASSERT_NOT:
1283 case OP_ASSERTBACK:
1284 case OP_ASSERTBACK_NOT:
1285 case OP_ONCE:
1286 case OP_ONCE_NC:
1287 case OP_BRAPOS:
1288 case OP_SBRA:
1289 case OP_SBRAPOS:
1290 case OP_SCOND:
1291 private_data_length++;
1292 cc += 1 + LINK_SIZE;
1293 break;
1294
1295 case OP_CBRA:
1296 case OP_SCBRA:
1297 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1298 private_data_length++;
1299 cc += 1 + LINK_SIZE + IMM2_SIZE;
1300 break;
1301
1302 case OP_CBRAPOS:
1303 case OP_SCBRAPOS:
1304 private_data_length += 2;
1305 cc += 1 + LINK_SIZE + IMM2_SIZE;
1306 break;
1307
1308 case OP_COND:
1309 /* Might be a hidden SCOND. */
1310 alternative = cc + GET(cc, 1);
1311 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1312 private_data_length++;
1313 cc += 1 + LINK_SIZE;
1314 break;
1315
1316 CASE_ITERATOR_PRIVATE_DATA_1
1317 if (PRIVATE_DATA(cc))
1318 private_data_length++;
1319 cc += 2;
1320 #ifdef SUPPORT_UTF
1321 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1322 #endif
1323 break;
1324
1325 CASE_ITERATOR_PRIVATE_DATA_2A
1326 if (PRIVATE_DATA(cc))
1327 private_data_length += 2;
1328 cc += 2;
1329 #ifdef SUPPORT_UTF
1330 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1331 #endif
1332 break;
1333
1334 CASE_ITERATOR_PRIVATE_DATA_2B
1335 if (PRIVATE_DATA(cc))
1336 private_data_length += 2;
1337 cc += 2 + IMM2_SIZE;
1338 #ifdef SUPPORT_UTF
1339 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1340 #endif
1341 break;
1342
1343 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1344 if (PRIVATE_DATA(cc))
1345 private_data_length++;
1346 cc += 1;
1347 break;
1348
1349 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1350 if (PRIVATE_DATA(cc))
1351 private_data_length += 2;
1352 cc += 1;
1353 break;
1354
1355 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1356 if (PRIVATE_DATA(cc))
1357 private_data_length += 2;
1358 cc += 1 + IMM2_SIZE;
1359 break;
1360
1361 case OP_CLASS:
1362 case OP_NCLASS:
1363 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1364 case OP_XCLASS:
1365 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1366 #else
1367 size = 1 + 32 / (int)sizeof(pcre_uchar);
1368 #endif
1369 if (PRIVATE_DATA(cc))
1370 private_data_length += get_class_iterator_size(cc + size);
1371 cc += size;
1372 break;
1373
1374 default:
1375 cc = next_opcode(common, cc);
1376 SLJIT_ASSERT(cc != NULL);
1377 break;
1378 }
1379 }
1380 SLJIT_ASSERT(cc == ccend);
1381 return private_data_length;
1382 }
1383
1384 static void copy_private_data(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend,
1385 BOOL save, int stackptr, int stacktop)
1386 {
1387 DEFINE_COMPILER;
1388 int srcw[2];
1389 int count, size;
1390 BOOL tmp1next = TRUE;
1391 BOOL tmp1empty = TRUE;
1392 BOOL tmp2empty = TRUE;
1393 pcre_uchar *alternative;
1394 enum {
1395 start,
1396 loop,
1397 end
1398 } status;
1399
1400 status = save ? start : loop;
1401 stackptr = STACK(stackptr - 2);
1402 stacktop = STACK(stacktop - 1);
1403
1404 if (!save)
1405 {
1406 stackptr += sizeof(sljit_w);
1407 if (stackptr < stacktop)
1408 {
1409 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1410 stackptr += sizeof(sljit_w);
1411 tmp1empty = FALSE;
1412 }
1413 if (stackptr < stacktop)
1414 {
1415 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1416 stackptr += sizeof(sljit_w);
1417 tmp2empty = FALSE;
1418 }
1419 /* The tmp1next must be TRUE in either way. */
1420 }
1421
1422 while (status != end)
1423 {
1424 count = 0;
1425 switch(status)
1426 {
1427 case start:
1428 SLJIT_ASSERT(save && common->recursive_head != 0);
1429 count = 1;
1430 srcw[0] = common->recursive_head;
1431 status = loop;
1432 break;
1433
1434 case loop:
1435 if (cc >= ccend)
1436 {
1437 status = end;
1438 break;
1439 }
1440
1441 switch(*cc)
1442 {
1443 case OP_ASSERT:
1444 case OP_ASSERT_NOT:
1445 case OP_ASSERTBACK:
1446 case OP_ASSERTBACK_NOT:
1447 case OP_ONCE:
1448 case OP_ONCE_NC:
1449 case OP_BRAPOS:
1450 case OP_SBRA:
1451 case OP_SBRAPOS:
1452 case OP_SCOND:
1453 count = 1;
1454 srcw[0] = PRIVATE_DATA(cc);
1455 SLJIT_ASSERT(srcw[0] != 0);
1456 cc += 1 + LINK_SIZE;
1457 break;
1458
1459 case OP_CBRA:
1460 case OP_SCBRA:
1461 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1462 {
1463 count = 1;
1464 srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1465 }
1466 cc += 1 + LINK_SIZE + IMM2_SIZE;
1467 break;
1468
1469 case OP_CBRAPOS:
1470 case OP_SCBRAPOS:
1471 count = 2;
1472 srcw[0] = PRIVATE_DATA(cc);
1473 srcw[1] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1474 SLJIT_ASSERT(srcw[0] != 0 && srcw[1] != 0);
1475 cc += 1 + LINK_SIZE + IMM2_SIZE;
1476 break;
1477
1478 case OP_COND:
1479 /* Might be a hidden SCOND. */
1480 alternative = cc + GET(cc, 1);
1481 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1482 {
1483 count = 1;
1484 srcw[0] = PRIVATE_DATA(cc);
1485 SLJIT_ASSERT(srcw[0] != 0);
1486 }
1487 cc += 1 + LINK_SIZE;
1488 break;
1489
1490 CASE_ITERATOR_PRIVATE_DATA_1
1491 if (PRIVATE_DATA(cc))
1492 {
1493 count = 1;
1494 srcw[0] = PRIVATE_DATA(cc);
1495 }
1496 cc += 2;
1497 #ifdef SUPPORT_UTF
1498 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1499 #endif
1500 break;
1501
1502 CASE_ITERATOR_PRIVATE_DATA_2A
1503 if (PRIVATE_DATA(cc))
1504 {
1505 count = 2;
1506 srcw[0] = PRIVATE_DATA(cc);
1507 srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_w);
1508 }
1509 cc += 2;
1510 #ifdef SUPPORT_UTF
1511 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1512 #endif
1513 break;
1514
1515 CASE_ITERATOR_PRIVATE_DATA_2B
1516 if (PRIVATE_DATA(cc))
1517 {
1518 count = 2;
1519 srcw[0] = PRIVATE_DATA(cc);
1520 srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_w);
1521 }
1522 cc += 2 + IMM2_SIZE;
1523 #ifdef SUPPORT_UTF
1524 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1525 #endif
1526 break;
1527
1528 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1529 if (PRIVATE_DATA(cc))
1530 {
1531 count = 1;
1532 srcw[0] = PRIVATE_DATA(cc);
1533 }
1534 cc += 1;
1535 break;
1536
1537 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1538 if (PRIVATE_DATA(cc))
1539 {
1540 count = 2;
1541 srcw[0] = PRIVATE_DATA(cc);
1542 srcw[1] = srcw[0] + sizeof(sljit_w);
1543 }
1544 cc += 1;
1545 break;
1546
1547 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1548 if (PRIVATE_DATA(cc))
1549 {
1550 count = 2;
1551 srcw[0] = PRIVATE_DATA(cc);
1552 srcw[1] = srcw[0] + sizeof(sljit_w);
1553 }
1554 cc += 1 + IMM2_SIZE;
1555 break;
1556
1557 case OP_CLASS:
1558 case OP_NCLASS:
1559 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1560 case OP_XCLASS:
1561 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1562 #else
1563 size = 1 + 32 / (int)sizeof(pcre_uchar);
1564 #endif
1565 if (PRIVATE_DATA(cc))
1566 switch(get_class_iterator_size(cc + size))
1567 {
1568 case 1:
1569 count = 1;
1570 srcw[0] = PRIVATE_DATA(cc);
1571 break;
1572
1573 case 2:
1574 count = 2;
1575 srcw[0] = PRIVATE_DATA(cc);
1576 srcw[1] = srcw[0] + sizeof(sljit_w);
1577 break;
1578
1579 default:
1580 SLJIT_ASSERT_STOP();
1581 break;
1582 }
1583 cc += size;
1584 break;
1585
1586 default:
1587 cc = next_opcode(common, cc);
1588 SLJIT_ASSERT(cc != NULL);
1589 break;
1590 }
1591 break;
1592
1593 case end:
1594 SLJIT_ASSERT_STOP();
1595 break;
1596 }
1597
1598 while (count > 0)
1599 {
1600 count--;
1601 if (save)
1602 {
1603 if (tmp1next)
1604 {
1605 if (!tmp1empty)
1606 {
1607 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1608 stackptr += sizeof(sljit_w);
1609 }
1610 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);
1611 tmp1empty = FALSE;
1612 tmp1next = FALSE;
1613 }
1614 else
1615 {
1616 if (!tmp2empty)
1617 {
1618 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1619 stackptr += sizeof(sljit_w);
1620 }
1621 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);
1622 tmp2empty = FALSE;
1623 tmp1next = TRUE;
1624 }
1625 }
1626 else
1627 {
1628 if (tmp1next)
1629 {
1630 SLJIT_ASSERT(!tmp1empty);
1631 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP1, 0);
1632 tmp1empty = stackptr >= stacktop;
1633 if (!tmp1empty)
1634 {
1635 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1636 stackptr += sizeof(sljit_w);
1637 }
1638 tmp1next = FALSE;
1639 }
1640 else
1641 {
1642 SLJIT_ASSERT(!tmp2empty);
1643 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP2, 0);
1644 tmp2empty = stackptr >= stacktop;
1645 if (!tmp2empty)
1646 {
1647 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1648 stackptr += sizeof(sljit_w);
1649 }
1650 tmp1next = TRUE;
1651 }
1652 }
1653 }
1654 }
1655
1656 if (save)
1657 {
1658 if (tmp1next)
1659 {
1660 if (!tmp1empty)
1661 {
1662 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1663 stackptr += sizeof(sljit_w);
1664 }
1665 if (!tmp2empty)
1666 {
1667 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1668 stackptr += sizeof(sljit_w);
1669 }
1670 }
1671 else
1672 {
1673 if (!tmp2empty)
1674 {
1675 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1676 stackptr += sizeof(sljit_w);
1677 }
1678 if (!tmp1empty)
1679 {
1680 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1681 stackptr += sizeof(sljit_w);
1682 }
1683 }
1684 }
1685 SLJIT_ASSERT(cc == ccend && stackptr == stacktop && (save || (tmp1empty && tmp2empty)));
1686 }
1687
1688 #undef CASE_ITERATOR_PRIVATE_DATA_1
1689 #undef CASE_ITERATOR_PRIVATE_DATA_2A
1690 #undef CASE_ITERATOR_PRIVATE_DATA_2B
1691 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1692 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1693 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1694
1695 static SLJIT_INLINE BOOL is_powerof2(unsigned int value)
1696 {
1697 return (value & (value - 1)) == 0;
1698 }
1699
1700 static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label)
1701 {
1702 while (list)
1703 {
1704 /* sljit_set_label is clever enough to do nothing
1705 if either the jump or the label is NULL. */
1706 sljit_set_label(list->jump, label);
1707 list = list->next;
1708 }
1709 }
1710
1711 static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump* jump)
1712 {
1713 jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list));
1714 if (list_item)
1715 {
1716 list_item->next = *list;
1717 list_item->jump = jump;
1718 *list = list_item;
1719 }
1720 }
1721
1722 static void add_stub(compiler_common *common, enum stub_types type, int data, struct sljit_jump *start)
1723 {
1724 DEFINE_COMPILER;
1725 stub_list* list_item = sljit_alloc_memory(compiler, sizeof(stub_list));
1726
1727 if (list_item)
1728 {
1729 list_item->type = type;
1730 list_item->data = data;
1731 list_item->start = start;
1732 list_item->quit = LABEL();
1733 list_item->next = common->stubs;
1734 common->stubs = list_item;
1735 }
1736 }
1737
1738 static void flush_stubs(compiler_common *common)
1739 {
1740 DEFINE_COMPILER;
1741 stub_list* list_item = common->stubs;
1742
1743 while (list_item)
1744 {
1745 JUMPHERE(list_item->start);
1746 switch(list_item->type)
1747 {
1748 case stack_alloc:
1749 add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));
1750 break;
1751 }
1752 JUMPTO(SLJIT_JUMP, list_item->quit);
1753 list_item = list_item->next;
1754 }
1755 common->stubs = NULL;
1756 }
1757
1758 static SLJIT_INLINE void decrease_call_count(compiler_common *common)
1759 {
1760 DEFINE_COMPILER;
1761
1762 OP2(SLJIT_SUB | SLJIT_SET_E, CALL_COUNT, 0, CALL_COUNT, 0, SLJIT_IMM, 1);
1763 add_jump(compiler, &common->calllimit, JUMP(SLJIT_C_ZERO));
1764 }
1765
1766 static SLJIT_INLINE void allocate_stack(compiler_common *common, int size)
1767 {
1768 /* May destroy all locals and registers except TMP2. */
1769 DEFINE_COMPILER;
1770
1771 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_w));
1772 #ifdef DESTROY_REGISTERS
1773 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);
1774 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
1775 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
1776 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, TMP1, 0);
1777 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);
1778 #endif
1779 add_stub(common, stack_alloc, 0, CMP(SLJIT_C_GREATER, STACK_TOP, 0, STACK_LIMIT, 0));
1780 }
1781
1782 static SLJIT_INLINE void free_stack(compiler_common *common, int size)
1783 {
1784 DEFINE_COMPILER;
1785 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_w));
1786 }
1787
1788 static SLJIT_INLINE void reset_ovector(compiler_common *common, int length)
1789 {
1790 DEFINE_COMPILER;
1791 struct sljit_label *loop;
1792 int i;
1793 /* At this point we can freely use all temporary registers. */
1794 /* TMP1 returns with begin - 1. */
1795 OP2(SLJIT_SUB, SLJIT_TEMPORARY_REG1, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));
1796 if (length < 8)
1797 {
1798 for (i = 0; i < length; i++)
1799 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(i), SLJIT_TEMPORARY_REG1, 0);
1800 }
1801 else
1802 {
1803 GET_LOCAL_BASE(SLJIT_TEMPORARY_REG2, 0, OVECTOR_START - sizeof(sljit_w));
1804 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, length);
1805 loop = LABEL();
1806 OP1(SLJIT_MOVU, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), sizeof(sljit_w), SLJIT_TEMPORARY_REG1, 0);
1807 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_TEMPORARY_REG3, 0, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, 1);
1808 JUMPTO(SLJIT_C_NOT_ZERO, loop);
1809 }
1810 }
1811
1812 static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)
1813 {
1814 DEFINE_COMPILER;
1815 struct sljit_label *loop;
1816 struct sljit_jump *earlyexit;
1817
1818 /* At this point we can freely use all registers. */
1819 OP1(SLJIT_MOV, SLJIT_SAVED_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
1820 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1), STR_PTR, 0);
1821
1822 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG1, 0, ARGUMENTS, 0);
1823 if (common->mark_ptr != 0)
1824 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
1825 OP1(SLJIT_MOV_SI, SLJIT_TEMPORARY_REG2, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), SLJIT_OFFSETOF(jit_arguments, offsetcount));
1826 if (common->mark_ptr != 0)
1827 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_TEMPORARY_REG3, 0);
1828 OP2(SLJIT_SUB, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), SLJIT_OFFSETOF(jit_arguments, offsets), SLJIT_IMM, sizeof(int));
1829 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG1, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), SLJIT_OFFSETOF(jit_arguments, begin));
1830 GET_LOCAL_BASE(SLJIT_SAVED_REG1, 0, OVECTOR_START);
1831 /* Unlikely, but possible */
1832 earlyexit = CMP(SLJIT_C_EQUAL, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, 0);
1833 loop = LABEL();
1834 OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), 0, SLJIT_TEMPORARY_REG1, 0);
1835 OP2(SLJIT_ADD, SLJIT_SAVED_REG1, 0, SLJIT_SAVED_REG1, 0, SLJIT_IMM, sizeof(sljit_w));
1836 /* Copy the integer value to the output buffer */
1837 #ifdef COMPILE_PCRE16
1838 OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, 1);
1839 #endif
1840 OP1(SLJIT_MOVU_SI, SLJIT_MEM1(SLJIT_TEMPORARY_REG3), sizeof(int), SLJIT_SAVED_REG2, 0);
1841 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_TEMPORARY_REG2, 0, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, 1);
1842 JUMPTO(SLJIT_C_NOT_ZERO, loop);
1843 JUMPHERE(earlyexit);
1844
1845 /* Calculate the return value, which is the maximum ovector value. */
1846 if (topbracket > 1)
1847 {
1848 GET_LOCAL_BASE(SLJIT_TEMPORARY_REG1, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_w));
1849 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, topbracket + 1);
1850
1851 /* OVECTOR(0) is never equal to SLJIT_SAVED_REG3. */
1852 loop = LABEL();
1853 OP1(SLJIT_MOVU, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), -(2 * (sljit_w)sizeof(sljit_w)));
1854 OP2(SLJIT_SUB, SLJIT_TEMPORARY_REG2, 0, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, 1);
1855 CMPTO(SLJIT_C_EQUAL, SLJIT_TEMPORARY_REG3, 0, SLJIT_SAVED_REG3, 0, loop);
1856 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_TEMPORARY_REG2, 0);
1857 }
1858 else
1859 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
1860 }
1861
1862 static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *quit)
1863 {
1864 DEFINE_COMPILER;
1865
1866 SLJIT_COMPILE_ASSERT(STR_END == SLJIT_SAVED_REG2, str_end_must_be_saved_reg2);
1867 SLJIT_ASSERT(common->start_used_ptr != 0 && (common->mode == JIT_PARTIAL_SOFT_COMPILE ? common->hit_start != 0 : common->hit_start == 0));
1868
1869 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG2, 0, ARGUMENTS, 0);
1870 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_PARTIAL);
1871 OP1(SLJIT_MOV_SI, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), SLJIT_OFFSETOF(jit_arguments, offsetcount));
1872 CMPTO(SLJIT_C_LESS, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, 2, quit);
1873
1874 /* Store match begin and end. */
1875 OP1(SLJIT_MOV, SLJIT_SAVED_REG1, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), SLJIT_OFFSETOF(jit_arguments, begin));
1876 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG2, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), SLJIT_OFFSETOF(jit_arguments, offsets));
1877 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_used_ptr : common->hit_start);
1878 OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, STR_END, 0, SLJIT_SAVED_REG1, 0);
1879 #ifdef COMPILE_PCRE16
1880 OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, 1);
1881 #endif
1882 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), sizeof(int), SLJIT_SAVED_REG2, 0);
1883
1884 OP2(SLJIT_SUB, SLJIT_TEMPORARY_REG3, 0, SLJIT_TEMPORARY_REG3, 0, SLJIT_SAVED_REG1, 0);
1885 #ifdef COMPILE_PCRE16
1886 OP2(SLJIT_ASHR, SLJIT_TEMPORARY_REG3, 0, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, 1);
1887 #endif
1888 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), 0, SLJIT_TEMPORARY_REG3, 0);
1889
1890 JUMPTO(SLJIT_JUMP, quit);
1891 }
1892
1893 static SLJIT_INLINE void check_start_used_ptr(compiler_common *common)
1894 {
1895 /* May destroy TMP1. */
1896 DEFINE_COMPILER;
1897 struct sljit_jump *jump;
1898
1899 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
1900 {
1901 /* The value of -1 must be kept for start_used_ptr! */
1902 OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, 1);
1903 /* Jumps if start_used_ptr < STR_PTR, or start_used_ptr == -1. Although overwriting
1904 is not necessary if start_used_ptr == STR_PTR, it does not hurt as well. */
1905 jump = CMP(SLJIT_C_LESS_EQUAL, TMP1, 0, STR_PTR, 0);
1906 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
1907 JUMPHERE(jump);
1908 }
1909 else if (common->mode == JIT_PARTIAL_HARD_COMPILE)
1910 {
1911 jump = CMP(SLJIT_C_LESS_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
1912 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
1913 JUMPHERE(jump);
1914 }
1915 }
1916
1917 static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, pcre_uchar* cc)
1918 {
1919 /* Detects if the character has an othercase. */
1920 unsigned int c;
1921
1922 #ifdef SUPPORT_UTF
1923 if (common->utf)
1924 {
1925 GETCHAR(c, cc);
1926 if (c > 127)
1927 {
1928 #ifdef SUPPORT_UCP
1929 return c != UCD_OTHERCASE(c);
1930 #else
1931 return FALSE;
1932 #endif
1933 }
1934 #ifndef COMPILE_PCRE8
1935 return common->fcc[c] != c;
1936 #endif
1937 }
1938 else
1939 #endif
1940 c = *cc;
1941 return MAX_255(c) ? common->fcc[c] != c : FALSE;
1942 }
1943
1944 static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)
1945 {
1946 /* Returns with the othercase. */
1947 #ifdef SUPPORT_UTF
1948 if (common->utf && c > 127)
1949 {
1950 #ifdef SUPPORT_UCP
1951 return UCD_OTHERCASE(c);
1952 #else
1953 return c;
1954 #endif
1955 }
1956 #endif
1957 return TABLE_GET(c, common->fcc, c);
1958 }
1959
1960 static unsigned int char_get_othercase_bit(compiler_common *common, pcre_uchar* cc)
1961 {
1962 /* Detects if the character and its othercase has only 1 bit difference. */
1963 unsigned int c, oc, bit;
1964 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
1965 int n;
1966 #endif
1967
1968 #ifdef SUPPORT_UTF
1969 if (common->utf)
1970 {
1971 GETCHAR(c, cc);
1972 if (c <= 127)
1973 oc = common->fcc[c];
1974 else
1975 {
1976 #ifdef SUPPORT_UCP
1977 oc = UCD_OTHERCASE(c);
1978 #else
1979 oc = c;
1980 #endif
1981 }
1982 }
1983 else
1984 {
1985 c = *cc;
1986 oc = TABLE_GET(c, common->fcc, c);
1987 }
1988 #else
1989 c = *cc;
1990 oc = TABLE_GET(c, common->fcc, c);
1991 #endif
1992
1993 SLJIT_ASSERT(c != oc);
1994
1995 bit = c ^ oc;
1996 /* Optimized for English alphabet. */
1997 if (c <= 127 && bit == 0x20)
1998 return (0 << 8) | 0x20;
1999
2000 /* Since c != oc, they must have at least 1 bit difference. */
2001 if (!is_powerof2(bit))
2002 return 0;
2003
2004 #ifdef COMPILE_PCRE8
2005
2006 #ifdef SUPPORT_UTF
2007 if (common->utf && c > 127)
2008 {
2009 n = GET_EXTRALEN(*cc);
2010 while ((bit & 0x3f) == 0)
2011 {
2012 n--;
2013 bit >>= 6;
2014 }
2015 return (n << 8) | bit;
2016 }
2017 #endif /* SUPPORT_UTF */
2018 return (0 << 8) | bit;
2019
2020 #else /* COMPILE_PCRE8 */
2021
2022 #ifdef COMPILE_PCRE16
2023 #ifdef SUPPORT_UTF
2024 if (common->utf && c > 65535)
2025 {
2026 if (bit >= (1 << 10))
2027 bit >>= 10;
2028 else
2029 return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8));
2030 }
2031 #endif /* SUPPORT_UTF */
2032 return (bit < 256) ? ((0 << 8) | bit) : ((1 << 8) | (bit >> 8));
2033 #endif /* COMPILE_PCRE16 */
2034
2035 #endif /* COMPILE_PCRE8 */
2036 }
2037
2038 static void check_partial(compiler_common *common, BOOL force)
2039 {
2040 /* Checks whether a partial matching is occured. Does not modify registers. */
2041 DEFINE_COMPILER;
2042 struct sljit_jump *jump = NULL;
2043
2044 SLJIT_ASSERT(!force || common->mode != JIT_COMPILE);
2045
2046 if (common->mode == JIT_COMPILE)
2047 return;
2048
2049 if (!force)
2050 jump = CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2051 else if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2052 jump = CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, -1);
2053
2054 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2055 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, -1);
2056 else
2057 {
2058 if (common->partialmatchlabel != NULL)
2059 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2060 else
2061 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2062 }
2063
2064 if (jump != NULL)
2065 JUMPHERE(jump);
2066 }
2067
2068 static struct sljit_jump *check_str_end(compiler_common *common)
2069 {
2070 /* Does not affect registers. Usually used in a tight spot. */
2071 DEFINE_COMPILER;
2072 struct sljit_jump *jump;
2073 struct sljit_jump *nohit;
2074 struct sljit_jump *return_value;
2075
2076 if (common->mode == JIT_COMPILE)
2077 return CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2078
2079 jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
2080 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2081 {
2082 nohit = CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2083 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, -1);
2084 JUMPHERE(nohit);
2085 return_value = JUMP(SLJIT_JUMP);
2086 }
2087 else
2088 {
2089 return_value = CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2090 if (common->partialmatchlabel != NULL)
2091 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2092 else
2093 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2094 }
2095 JUMPHERE(jump);
2096 return return_value;
2097 }
2098
2099 static void detect_partial_match(compiler_common *common, jump_list **backtracks)
2100 {
2101 DEFINE_COMPILER;
2102 struct sljit_jump *jump;
2103
2104 if (common->mode == JIT_COMPILE)
2105 {
2106 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2107 return;
2108 }
2109
2110 /* Partial matching mode. */
2111 jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
2112 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0));
2113 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2114 {
2115 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, -1);
2116 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
2117 }
2118 else
2119 {
2120 if (common->partialmatchlabel != NULL)
2121 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2122 else
2123 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2124 }
2125 JUMPHERE(jump);
2126 }
2127
2128 static void read_char(compiler_common *common)
2129 {
2130 /* Reads the character into TMP1, updates STR_PTR.
2131 Does not check STR_END. TMP2 Destroyed. */
2132 DEFINE_COMPILER;
2133 #ifdef SUPPORT_UTF
2134 struct sljit_jump *jump;
2135 #endif
2136
2137 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2138 #ifdef SUPPORT_UTF
2139 if (common->utf)
2140 {
2141 #ifdef COMPILE_PCRE8
2142 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2143 #else
2144 #ifdef COMPILE_PCRE16
2145 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
2146 #endif
2147 #endif /* COMPILE_PCRE8 */
2148 add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2149 JUMPHERE(jump);
2150 }
2151 #endif
2152 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2153 }
2154
2155 static void peek_char(compiler_common *common)
2156 {
2157 /* Reads the character into TMP1, keeps STR_PTR.
2158 Does not check STR_END. TMP2 Destroyed. */
2159 DEFINE_COMPILER;
2160 #ifdef SUPPORT_UTF
2161 struct sljit_jump *jump;
2162 #endif
2163
2164 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2165 #ifdef SUPPORT_UTF
2166 if (common->utf)
2167 {
2168 #ifdef COMPILE_PCRE8
2169 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2170 #else
2171 #ifdef COMPILE_PCRE16
2172 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
2173 #endif
2174 #endif /* COMPILE_PCRE8 */
2175 add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2176 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2177 JUMPHERE(jump);
2178 }
2179 #endif
2180 }
2181
2182 static void read_char8_type(compiler_common *common)
2183 {
2184 /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */
2185 DEFINE_COMPILER;
2186 #if defined SUPPORT_UTF || defined COMPILE_PCRE16
2187 struct sljit_jump *jump;
2188 #endif
2189
2190 #ifdef SUPPORT_UTF
2191 if (common->utf)
2192 {
2193 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2194 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2195 #ifdef COMPILE_PCRE8
2196 /* This can be an extra read in some situations, but hopefully
2197 it is needed in most cases. */
2198 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2199 jump = CMP(SLJIT_C_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
2200 add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));
2201 JUMPHERE(jump);
2202 #else
2203 #ifdef COMPILE_PCRE16
2204 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2205 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2206 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2207 JUMPHERE(jump);
2208 /* Skip low surrogate if necessary. */
2209 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xfc00);
2210 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0xd800);
2211 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
2212 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
2213 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2214 #endif
2215 #endif /* COMPILE_PCRE8 */
2216 return;
2217 }
2218 #endif
2219 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2220 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2221 #ifdef COMPILE_PCRE16
2222 /* The ctypes array contains only 256 values. */
2223 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2224 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2225 #endif
2226 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2227 #ifdef COMPILE_PCRE16
2228 JUMPHERE(jump);
2229 #endif
2230 }
2231
2232 static void skip_char_back(compiler_common *common)
2233 {
2234 /* Goes one character back. Affects STR_PTR and TMP1. Does not check begin. */
2235 DEFINE_COMPILER;
2236 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2237 struct sljit_label *label;
2238
2239 if (common->utf)
2240 {
2241 label = LABEL();
2242 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2243 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2244 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
2245 CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);
2246 return;
2247 }
2248 #endif
2249 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2250 if (common->utf)
2251 {
2252 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2253 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2254 /* Skip low surrogate if necessary. */
2255 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2256 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
2257 COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
2258 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2259 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2260 return;
2261 }
2262 #endif
2263 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2264 }
2265
2266 static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpiftrue)
2267 {
2268 /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */
2269 DEFINE_COMPILER;
2270
2271 if (nltype == NLTYPE_ANY)
2272 {
2273 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
2274 add_jump(compiler, backtracks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
2275 }
2276 else if (nltype == NLTYPE_ANYCRLF)
2277 {
2278 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_CR);
2279 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
2280 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
2281 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
2282 add_jump(compiler, backtracks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
2283 }
2284 else
2285 {
2286 SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);
2287 add_jump(compiler, backtracks, CMP(jumpiftrue ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
2288 }
2289 }
2290
2291 #ifdef SUPPORT_UTF
2292
2293 #ifdef COMPILE_PCRE8
2294 static void do_utfreadchar(compiler_common *common)
2295 {
2296 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
2297 of the character (>= 0xc0). Return char value in TMP1, length - 1 in TMP2. */
2298 DEFINE_COMPILER;
2299 struct sljit_jump *jump;
2300
2301 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2302 /* Searching for the first zero. */
2303 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
2304 jump = JUMP(SLJIT_C_NOT_ZERO);
2305 /* Two byte sequence. */
2306 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2307 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2308 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1f);
2309 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2310 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2311 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2312 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
2313 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2314 JUMPHERE(jump);
2315
2316 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10);
2317 jump = JUMP(SLJIT_C_NOT_ZERO);
2318 /* Three byte sequence. */
2319 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2320 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0f);
2321 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 12);
2322 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2323 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2324 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2325 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2326 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
2327 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2328 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2329 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(2));
2330 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2331 JUMPHERE(jump);
2332
2333 /* Four byte sequence. */
2334 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2335 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x07);
2336 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 18);
2337 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2338 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12);
2339 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2340 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2341 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2342 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2343 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2344 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(3));
2345 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
2346 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2347 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2348 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));
2349 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2350 }
2351
2352 static void do_utfreadtype8(compiler_common *common)
2353 {
2354 /* Fast decoding a UTF-8 character type. TMP2 contains the first byte
2355 of the character (>= 0xc0). Return value in TMP1. */
2356 DEFINE_COMPILER;
2357 struct sljit_jump *jump;
2358 struct sljit_jump *compare;
2359
2360 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2361
2362 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20);
2363 jump = JUMP(SLJIT_C_NOT_ZERO);
2364 /* Two byte sequence. */
2365 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2366 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2367 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
2368 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2369 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2370 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
2371 compare = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2372 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2373 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2374
2375 JUMPHERE(compare);
2376 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2377 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2378 JUMPHERE(jump);
2379
2380 /* We only have types for characters less than 256. */
2381 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_w)PRIV(utf8_table4) - 0xc0);
2382 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2383 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2384 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2385 }
2386
2387 #else /* COMPILE_PCRE8 */
2388
2389 #ifdef COMPILE_PCRE16
2390 static void do_utfreadchar(compiler_common *common)
2391 {
2392 /* Fast decoding a UTF-16 character. TMP1 contains the first 16 bit char
2393 of the character (>= 0xd800). Return char value in TMP1, length - 1 in TMP2. */
2394 DEFINE_COMPILER;
2395 struct sljit_jump *jump;
2396
2397 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2398 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xdc00);
2399 /* Do nothing, only return. */
2400 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2401
2402 JUMPHERE(jump);
2403 /* Combine two 16 bit characters. */
2404 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2405 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2406 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
2407 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 10);
2408 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3ff);
2409 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2410 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
2411 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
2412 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2413 }
2414 #endif /* COMPILE_PCRE16 */
2415
2416 #endif /* COMPILE_PCRE8 */
2417
2418 #endif /* SUPPORT_UTF */
2419
2420 #ifdef SUPPORT_UCP
2421
2422 /* UCD_BLOCK_SIZE must be 128 (see the assert below). */
2423 #define UCD_BLOCK_MASK 127
2424 #define UCD_BLOCK_SHIFT 7
2425
2426 static void do_getucd(compiler_common *common)
2427 {
2428 /* Search the UCD record for the character comes in TMP1.
2429 Returns chartype in TMP1 and UCD offset in TMP2. */
2430 DEFINE_COMPILER;
2431
2432 SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 8);
2433
2434 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2435 OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
2436 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_w)PRIV(ucd_stage1));
2437 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
2438 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
2439 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
2440 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_w)PRIV(ucd_stage2));
2441 OP1(SLJIT_MOV_UH, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
2442 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_w)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
2443 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
2444 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2445 }
2446 #endif
2447
2448 static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common, BOOL hascrorlf, BOOL firstline)
2449 {
2450 DEFINE_COMPILER;
2451 struct sljit_label *mainloop;
2452 struct sljit_label *newlinelabel = NULL;
2453 struct sljit_jump *start;
2454 struct sljit_jump *end = NULL;
2455 struct sljit_jump *nl = NULL;
2456 #ifdef SUPPORT_UTF
2457 struct sljit_jump *singlechar;
2458 #endif
2459 jump_list *newline = NULL;
2460 BOOL newlinecheck = FALSE;
2461 BOOL readuchar = FALSE;
2462
2463 if (!(hascrorlf || firstline) && (common->nltype == NLTYPE_ANY ||
2464 common->nltype == NLTYPE_ANYCRLF || common->newline > 255))
2465 newlinecheck = TRUE;
2466
2467 if (firstline)
2468 {
2469 /* Search for the end of the first line. */
2470 SLJIT_ASSERT(common->first_line_end != 0);
2471 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
2472
2473 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
2474 {
2475 mainloop = LABEL();
2476 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2477 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2478 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
2479 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2480 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);
2481 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);
2482 JUMPHERE(end);
2483 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2484 }
2485 else
2486 {
2487 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2488 mainloop = LABEL();
2489 /* Continual stores does not cause data dependency. */
2490 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0);
2491 read_char(common);
2492 check_newlinechar(common, common->nltype, &newline, TRUE);
2493 CMPTO(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0, mainloop);
2494 JUMPHERE(end);
2495 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0);
2496 set_jumps(newline, LABEL());
2497 }
2498
2499 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
2500 }
2501
2502 start = JUMP(SLJIT_JUMP);
2503
2504 if (newlinecheck)
2505 {
2506 newlinelabel = LABEL();
2507 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2508 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2509 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2510 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
2511 COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
2512 #ifdef COMPILE_PCRE16
2513 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2514 #endif
2515 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2516 nl = JUMP(SLJIT_JUMP);
2517 }
2518
2519 mainloop = LABEL();
2520
2521 /* Increasing the STR_PTR here requires one less jump in the most common case. */
2522 #ifdef SUPPORT_UTF
2523 if (common->utf) readuchar = TRUE;
2524 #endif
2525 if (newlinecheck) readuchar = TRUE;
2526
2527 if (readuchar)
2528 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2529
2530 if (newlinecheck)
2531 CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);
2532
2533 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2534 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2535 if (common->utf)
2536 {
2537 singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2538 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0);
2539 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2540 JUMPHERE(singlechar);
2541 }
2542 #endif
2543 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2544 if (common->utf)
2545 {
2546 singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
2547 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2548 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2549 COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
2550 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2551 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2552 JUMPHERE(singlechar);
2553 }
2554 #endif
2555 JUMPHERE(start);
2556
2557 if (newlinecheck)
2558 {
2559 JUMPHERE(end);
2560 JUMPHERE(nl);
2561 }
2562
2563 return mainloop;
2564 }
2565
2566 #define MAX_N_CHARS 3
2567
2568 static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common, BOOL firstline)
2569 {
2570 DEFINE_COMPILER;
2571 struct sljit_label *start;
2572 struct sljit_jump *quit;
2573 pcre_int32 chars[MAX_N_CHARS * 2];
2574 pcre_uchar *cc = common->start + 1 + IMM2_SIZE;
2575 int location = 0;
2576 pcre_int32 len, c, bit, caseless;
2577 int must_stop;
2578
2579 /* We do not support alternatives now. */
2580 if (*(common->start + GET(common->start, 1)) == OP_ALT)
2581 return FALSE;
2582
2583 while (TRUE)
2584 {
2585 caseless = 0;
2586 must_stop = 1;
2587 switch(*cc)
2588 {
2589 case OP_CHAR:
2590 must_stop = 0;
2591 cc++;
2592 break;
2593
2594 case OP_CHARI:
2595 caseless = 1;
2596 must_stop = 0;
2597 cc++;
2598 break;
2599
2600 case OP_SOD:
2601 case OP_SOM:
2602 case OP_SET_SOM:
2603 case OP_NOT_WORD_BOUNDARY:
2604 case OP_WORD_BOUNDARY:
2605 case OP_EODN:
2606 case OP_EOD:
2607 case OP_CIRC:
2608 case OP_CIRCM:
2609 case OP_DOLL:
2610 case OP_DOLLM:
2611 /* Zero width assertions. */
2612 cc++;
2613 continue;
2614
2615 case OP_PLUS:
2616 case OP_MINPLUS:
2617 case OP_POSPLUS:
2618 cc++;
2619 break;
2620
2621 case OP_EXACT:
2622 cc += 1 + IMM2_SIZE;
2623 break;
2624
2625 case OP_PLUSI:
2626 case OP_MINPLUSI:
2627 case OP_POSPLUSI:
2628 caseless = 1;
2629 cc++;
2630 break;
2631
2632 case OP_EXACTI:
2633 caseless = 1;
2634 cc += 1 + IMM2_SIZE;
2635 break;
2636
2637 default:
2638 must_stop = 2;
2639 break;
2640 }
2641
2642 if (must_stop == 2)
2643 break;
2644
2645 len = 1;
2646 #ifdef SUPPORT_UTF
2647 if (common->utf && HAS_EXTRALEN(cc[0])) len += GET_EXTRALEN(cc[0]);
2648 #endif
2649
2650 if (caseless && char_has_othercase(common, cc))
2651 {
2652 caseless = char_get_othercase_bit(common, cc);
2653 if (caseless == 0)
2654 return FALSE;
2655 #ifdef COMPILE_PCRE8
2656 caseless = ((caseless & 0xff) << 8) | (len - (caseless >> 8));
2657 #else
2658 if ((caseless & 0x100) != 0)
2659 caseless = ((caseless & 0xff) << 16) | (len - (caseless >> 9));
2660 else
2661 caseless = ((caseless & 0xff) << 8) | (len - (caseless >> 9));
2662 #endif
2663 }
2664 else
2665 caseless = 0;
2666
2667 while (len > 0 && location < MAX_N_CHARS * 2)
2668 {
2669 c = *cc;
2670 bit = 0;
2671 if (len == (caseless & 0xff))
2672 {
2673 bit = caseless >> 8;
2674 c |= bit;
2675 }
2676
2677 chars[location] = c;
2678 chars[location + 1] = bit;
2679
2680 len--;
2681 location += 2;
2682 cc++;
2683 }
2684
2685 if (location >= MAX_N_CHARS * 2 || must_stop != 0)
2686 break;
2687 }
2688
2689 /* At least two characters are required. */
2690 if (location < 2 * 2)
2691 return FALSE;
2692
2693 if (firstline)
2694 {
2695 SLJIT_ASSERT(common->first_line_end != 0);
2696 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
2697 OP2(SLJIT_SUB, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, SLJIT_IMM, (location >> 1) - 1);
2698 }
2699 else
2700 OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, (location >> 1) - 1);
2701
2702 start = LABEL();
2703 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2704
2705 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2706 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2707 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2708 if (chars[1] != 0)
2709 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[1]);
2710 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[0], start);
2711 if (location > 2 * 2)
2712 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2713 if (chars[3] != 0)
2714 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, chars[3]);
2715 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, chars[2], start);
2716 if (location > 2 * 2)
2717 {
2718 if (chars[5] != 0)
2719 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[5]);
2720 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[4], start);
2721 }
2722 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2723
2724 JUMPHERE(quit);
2725
2726 if (firstline)
2727 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
2728 else
2729 OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, (location >> 1) - 1);
2730 return TRUE;
2731 }
2732
2733 #undef MAX_N_CHARS
2734
2735 static SLJIT_INLINE void fast_forward_first_char(compiler_common *common, pcre_uchar first_char, BOOL caseless, BOOL firstline)
2736 {
2737 DEFINE_COMPILER;
2738 struct sljit_label *start;
2739 struct sljit_jump *quit;
2740 struct sljit_jump *found;
2741 pcre_uchar oc, bit;
2742
2743 if (firstline)
2744 {
2745 SLJIT_ASSERT(common->first_line_end != 0);
2746 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
2747 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
2748 }
2749
2750 start = LABEL();
2751 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2752 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2753
2754 oc = first_char;
2755 if (caseless)
2756 {
2757 oc = TABLE_GET(first_char, common->fcc, first_char);
2758 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
2759 if (first_char > 127 && common->utf)
2760 oc = UCD_OTHERCASE(first_char);
2761 #endif
2762 }
2763 if (first_char == oc)
2764 found = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, first_char);
2765 else
2766 {
2767 bit = first_char ^ oc;
2768 if (is_powerof2(bit))
2769 {
2770 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, bit);
2771 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, first_char | bit);
2772 }
2773 else
2774 {
2775 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, first_char);
2776 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
2777 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);
2778 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
2779 found = JUMP(SLJIT_C_NOT_ZERO);
2780 }
2781 }
2782
2783 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2784 JUMPTO(SLJIT_JUMP, start);
2785 JUMPHERE(found);
2786 JUMPHERE(quit);
2787
2788 if (firstline)
2789 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
2790 }
2791
2792 static SLJIT_INLINE void fast_forward_newline(compiler_common *common, BOOL firstline)
2793 {
2794 DEFINE_COMPILER;
2795 struct sljit_label *loop;
2796 struct sljit_jump *lastchar;
2797 struct sljit_jump *firstchar;
2798 struct sljit_jump *quit;
2799 struct sljit_jump *foundcr = NULL;
2800 struct sljit_jump *notfoundnl;
2801 jump_list *newline = NULL;
2802
2803 if (firstline)
2804 {
2805 SLJIT_ASSERT(common->first_line_end != 0);
2806 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
2807 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
2808 }
2809
2810 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
2811 {
2812 lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2813 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
2814 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
2815 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
2816 firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
2817
2818 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2));
2819 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0);
2820 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_GREATER_EQUAL);
2821 #ifdef COMPILE_PCRE16
2822 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
2823 #endif
2824 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2825
2826 loop = LABEL();
2827 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2828 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2829 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
2830 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
2831 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop);
2832 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop);
2833
2834 JUMPHERE(quit);
2835 JUMPHERE(firstchar);
2836 JUMPHERE(lastchar);
2837
2838 if (firstline)
2839 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
2840 return;
2841 }
2842
2843 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
2844 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
2845 firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
2846 skip_char_back(common);
2847
2848 loop = LABEL();
2849 read_char(common);
2850 lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2851 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
2852 foundcr = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
2853 check_newlinechar(common, common->nltype, &newline, FALSE);
2854 set_jumps(newline, loop);
2855
2856 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
2857 {
2858 quit = JUMP(SLJIT_JUMP);
2859 JUMPHERE(foundcr);
2860 notfoundnl = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2861 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2862 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
2863 COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
2864 #ifdef COMPILE_PCRE16
2865 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2866 #endif
2867 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2868 JUMPHERE(notfoundnl);
2869 JUMPHERE(quit);
2870 }
2871 JUMPHERE(lastchar);
2872 JUMPHERE(firstchar);
2873
2874 if (firstline)
2875 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
2876 }
2877
2878 static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, sljit_uw start_bits, BOOL firstline)
2879 {
2880 DEFINE_COMPILER;
2881 struct sljit_label *start;
2882 struct sljit_jump *quit;
2883 struct sljit_jump *found;
2884 #ifndef COMPILE_PCRE8
2885 struct sljit_jump *jump;
2886 #endif
2887
2888 if (firstline)
2889 {
2890 SLJIT_ASSERT(common->first_line_end != 0);
2891 OP1(SLJIT_MOV, RETURN_ADDR, 0, STR_END, 0);
2892 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
2893 }
2894
2895 start = LABEL();
2896 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2897 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2898 #ifdef SUPPORT_UTF
2899 if (common->utf)
2900 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
2901 #endif
2902 #ifndef COMPILE_PCRE8
2903 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 255);
2904 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 255);
2905 JUMPHERE(jump);
2906 #endif
2907 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
2908 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
2909 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), start_bits);
2910 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
2911 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
2912 found = JUMP(SLJIT_C_NOT_ZERO);
2913
2914 #ifdef SUPPORT_UTF
2915 if (common->utf)
2916 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
2917 #endif
2918 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2919 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2920 if (common->utf)
2921 {
2922 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start);
2923 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0);
2924 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2925 }
2926 #endif
2927 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2928 if (common->utf)
2929 {
2930 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800, start);
2931 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2932 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2933 COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
2934 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2935 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2936 }
2937 #endif
2938 JUMPTO(SLJIT_JUMP, start);
2939 JUMPHERE(found);
2940 JUMPHERE(quit);
2941
2942 if (firstline)
2943 OP1(SLJIT_MOV, STR_END, 0, RETURN_ADDR, 0);
2944 }
2945
2946 static SLJIT_INLINE struct sljit_jump *search_requested_char(compiler_common *common, pcre_uchar req_char, BOOL caseless, BOOL has_firstchar)
2947 {
2948 DEFINE_COMPILER;
2949 struct sljit_label *loop;
2950 struct sljit_jump *toolong;
2951 struct sljit_jump *alreadyfound;
2952 struct sljit_jump *found;
2953 struct sljit_jump *foundoc = NULL;
2954 struct sljit_jump *notfound;
2955 pcre_uchar oc, bit;
2956
2957 SLJIT_ASSERT(common->req_char_ptr != 0);
2958 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr);
2959 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, REQ_BYTE_MAX);
2960 toolong = CMP(SLJIT_C_LESS, TMP1, 0, STR_END, 0);
2961 alreadyfound = CMP(SLJIT_C_LESS, STR_PTR, 0, TMP2, 0);
2962
2963 if (has_firstchar)
2964 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2965 else
2966 OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);
2967
2968 loop = LABEL();
2969 notfound = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, STR_END, 0);
2970
2971 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0);
2972 oc = req_char;
2973 if (caseless)
2974 {
2975 oc = TABLE_GET(req_char, common->fcc, req_char);
2976 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
2977 if (req_char > 127 && common->utf)
2978 oc = UCD_OTHERCASE(req_char);
2979 #endif
2980 }
2981 if (req_char == oc)
2982 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
2983 else
2984 {
2985 bit = req_char ^ oc;
2986 if (is_powerof2(bit))
2987 {
2988 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);
2989 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit);
2990 }
2991 else
2992 {
2993 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
2994 foundoc = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, oc);
2995 }
2996 }
2997 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
2998 JUMPTO(SLJIT_JUMP, loop);
2999
3000 JUMPHERE(found);
3001 if (foundoc)
3002 JUMPHERE(foundoc);
3003 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr, TMP1, 0);
3004 JUMPHERE(alreadyfound);
3005 JUMPHERE(toolong);
3006 return notfound;
3007 }
3008
3009 static void do_revertframes(compiler_common *common)
3010 {
3011 DEFINE_COMPILER;
3012 struct sljit_jump *jump;
3013 struct sljit_label *mainloop;
3014
3015 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3016 OP1(SLJIT_MOV, TMP1, 0, STACK_TOP, 0);
3017 GET_LOCAL_BASE(TMP3, 0, 0);
3018
3019 /* Drop frames until we reach STACK_TOP. */
3020 mainloop = LABEL();
3021 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), 0);
3022 jump = CMP(SLJIT_C_SIG_LESS_EQUAL, TMP2, 0, SLJIT_IMM, frame_end);
3023 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
3024 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_w));
3025 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_w), SLJIT_MEM1(TMP1), 2 * sizeof(sljit_w));
3026 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_w));
3027 JUMPTO(SLJIT_JUMP, mainloop);
3028
3029 JUMPHERE(jump);
3030 jump = CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, frame_end);
3031 /* End of dropping frames. */
3032 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3033
3034 JUMPHERE(jump);
3035 jump = CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, frame_setstrbegin);
3036 /* Set string begin. */
3037 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), sizeof(sljit_w));
3038 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_w));
3039 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), TMP2, 0);
3040 JUMPTO(SLJIT_JUMP, mainloop);
3041
3042 JUMPHERE(jump);
3043 if (common->mark_ptr != 0)
3044 {
3045 jump = CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, frame_setmark);
3046 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), sizeof(sljit_w));
3047 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_w));
3048 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr, TMP2, 0);
3049 JUMPTO(SLJIT_JUMP, mainloop);
3050
3051 JUMPHERE(jump);
3052 }
3053
3054 /* Unknown command. */
3055 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_w));
3056 JUMPTO(SLJIT_JUMP, mainloop);
3057 }
3058
3059 static void check_wordboundary(compiler_common *common)
3060 {
3061 DEFINE_COMPILER;
3062 struct sljit_jump *skipread;
3063 #if !(defined COMPILE_PCRE8) || defined SUPPORT_UTF
3064 struct sljit_jump *jump;
3065 #endif
3066
3067 SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16);
3068
3069 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3070 /* Get type of the previous char, and put it to LOCALS1. */
3071 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3072 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
3073 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, 0);
3074 skipread = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
3075 skip_char_back(common);
3076 check_start_used_ptr(common);
3077 read_char(common);
3078
3079 /* Testing char type. */
3080 #ifdef SUPPORT_UCP
3081 if (common->use_ucp)
3082 {
3083 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
3084 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
3085 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
3086 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
3087 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
3088 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);
3089 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
3090 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
3091 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_LESS_EQUAL);
3092 JUMPHERE(jump);
3093 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP2, 0);
3094 }
3095 else
3096 #endif
3097 {
3098 #ifndef COMPILE_PCRE8
3099 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3100 #elif defined SUPPORT_UTF
3101 /* Here LOCALS1 has already been zeroed. */
3102 jump = NULL;
3103 if (common->utf)
3104 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3105 #endif /* COMPILE_PCRE8 */
3106 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);
3107 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */);
3108 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3109 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);
3110 #ifndef COMPILE_PCRE8
3111 JUMPHERE(jump);
3112 #elif defined SUPPORT_UTF
3113 if (jump != NULL)
3114 JUMPHERE(jump);
3115 #endif /* COMPILE_PCRE8 */
3116 }
3117 JUMPHERE(skipread);
3118
3119 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3120 skipread = check_str_end(common);
3121 peek_char(common);
3122
3123 /* Testing char type. This is a code duplication. */
3124 #ifdef SUPPORT_UCP
3125 if (common->use_ucp)
3126 {
3127 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
3128 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
3129 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
3130 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
3131 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
3132 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);
3133 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
3134 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
3135 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_LESS_EQUAL);
3136 JUMPHERE(jump);
3137 }
3138 else
3139 #endif
3140 {
3141 #ifndef COMPILE_PCRE8
3142 /* TMP2 may be destroyed by peek_char. */
3143 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3144 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3145 #elif defined SUPPORT_UTF
3146 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3147 jump = NULL;
3148 if (common->utf)
3149 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3150 #endif
3151 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes);
3152 OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */);
3153 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
3154 #ifndef COMPILE_PCRE8
3155 JUMPHERE(jump);
3156 #elif defined SUPPORT_UTF
3157 if (jump != NULL)
3158 JUMPHERE(jump);
3159 #endif /* COMPILE_PCRE8 */
3160 }
3161 JUMPHERE(skipread);
3162
3163 OP2(SLJIT_XOR | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
3164 sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3165 }
3166
3167 /*
3168 range format:
3169
3170 ranges[0] = length of the range (max MAX_RANGE_SIZE, -1 means invalid range).
3171 ranges[1] = first bit (0 or 1)
3172 ranges[2-length] = position of the bit change (when the current bit is not equal to the previous)
3173 */
3174
3175 static BOOL check_ranges(compiler_common *common, int *ranges, jump_list **backtracks, BOOL readch)
3176 {
3177 DEFINE_COMPILER;
3178 struct sljit_jump *jump;
3179
3180 if (ranges[0] < 0)
3181 return FALSE;
3182
3183 switch(ranges[0])
3184 {
3185 case 1:
3186 if (readch)
3187 read_char(common);
3188 add_jump(compiler, backtracks, CMP(ranges[1] == 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
3189 return TRUE;
3190
3191 case 2:
3192 if (readch)
3193 read_char(common);
3194 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);
3195 add_jump(compiler, backtracks, CMP(ranges[1] != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
3196 return TRUE;
3197
3198 case 4:
3199 if (ranges[2] + 1 == ranges[3] && ranges[4] + 1 == ranges[5])
3200 {
3201 if (readch)
3202 read_char(common);
3203 if (ranges[1] != 0)
3204 {
3205 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
3206 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[4]));
3207 }
3208 else
3209 {
3210 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]);
3211 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[4]));
3212 JUMPHERE(jump);
3213 }
3214 return TRUE;
3215 }
3216 if ((ranges[3] - ranges[2]) == (ranges[5] - ranges[4]) && is_powerof2(ranges[4] - ranges[2]))
3217 {
3218 if (readch)
3219 read_char(common);
3220 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[4] - ranges[2]);
3221 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[4]);
3222 add_jump(compiler, backtracks, CMP(ranges[1] != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[5] - ranges[4]));
3223 return TRUE;
3224 }
3225 return FALSE;
3226
3227 default:
3228 return FALSE;
3229 }
3230 }
3231
3232 static void get_ctype_ranges(compiler_common *common, int flag, int *ranges)
3233 {
3234 int i, bit, length;
3235 const pcre_uint8 *ctypes = (const pcre_uint8*)common->ctypes;
3236
3237 bit = ctypes[0] & flag;
3238 ranges[0] = -1;
3239 ranges[1] = bit != 0 ? 1 : 0;
3240 length = 0;
3241
3242 for (i = 1; i < 256; i++)
3243 if ((ctypes[i] & flag) != bit)
3244 {
3245 if (length >= MAX_RANGE_SIZE)
3246 return;
3247 ranges[2 + length] = i;
3248 length++;
3249 bit ^= flag;
3250 }
3251
3252 if (bit != 0)
3253 {
3254 if (length >= MAX_RANGE_SIZE)
3255 return;
3256 ranges[2 + length] = 256;
3257 length++;
3258 }
3259 ranges[0] = length;
3260 }
3261
3262 static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, jump_list **backtracks)
3263 {
3264 int ranges[2 + MAX_RANGE_SIZE];
3265 pcre_uint8 bit, cbit, all;
3266 int i, byte, length = 0;
3267
3268 bit = bits[0] & 0x1;
3269 ranges[1] = bit;
3270 /* Can be 0 or 255. */
3271 all = -bit;
3272
3273 for (i = 0; i < 256; )
3274 {
3275 byte = i >> 3;
3276 if ((i & 0x7) == 0 && bits[byte] == all)
3277 i += 8;
3278 else
3279 {
3280 cbit = (bits[byte] >> (i & 0x7)) & 0x1;
3281 if (cbit != bit)
3282 {
3283 if (length >= MAX_RANGE_SIZE)
3284 return FALSE;
3285 ranges[2 + length] = i;
3286 length++;
3287 bit = cbit;
3288 all = -cbit;
3289 }
3290 i++;
3291 }
3292 }
3293
3294 if (((bit == 0) && nclass) || ((bit == 1) && !nclass))
3295 {
3296 if (length >= MAX_RANGE_SIZE)
3297 return FALSE;
3298 ranges[2 + length] = 256;
3299 length++;
3300 }
3301 ranges[0] = length;
3302
3303 return check_ranges(common, ranges, backtracks, FALSE);
3304 }
3305
3306 static void check_anynewline(compiler_common *common)
3307 {
3308 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
3309 DEFINE_COMPILER;
3310
3311 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3312
3313 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
3314 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
3315 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);
3316 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
3317 #if defined SUPPORT_UTF || defined COMPILE_PCRE16
3318 #ifdef COMPILE_PCRE8
3319 if (common->utf)
3320 {
3321 #endif
3322 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
3323 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
3324 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
3325 #ifdef COMPILE_PCRE8
3326 }
3327 #endif
3328 #endif /* SUPPORT_UTF || COMPILE_PCRE16 */
3329 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
3330 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3331 }
3332
3333 static void check_hspace(compiler_common *common)
3334 {
3335 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
3336 DEFINE_COMPILER;
3337
3338 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3339
3340 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x09);
3341 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
3342 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
3343 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
3344 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xa0);
3345 #if defined SUPPORT_UTF || defined COMPILE_PCRE16
3346 #ifdef COMPILE_PCRE8
3347 if (common->utf)
3348 {
3349 #endif
3350 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
3351 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x1680);
3352 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
3353 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e);
3354 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
3355 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000);
3356 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000);
3357 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_LESS_EQUAL);
3358 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000);
3359 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
3360 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000);
3361 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
3362 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000);
3363 #ifdef COMPILE_PCRE8
3364 }
3365 #endif
3366 #endif /* SUPPORT_UTF || COMPILE_PCRE16 */
3367 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
3368
3369 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3370 }
3371
3372 static void check_vspace(compiler_common *common)
3373 {
3374 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
3375 DEFINE_COMPILER;
3376
3377 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3378
3379 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
3380 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
3381 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);
3382 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
3383 #if defined SUPPORT_UTF || defined COMPILE_PCRE16
3384 #ifdef COMPILE_PCRE8
3385 if (common->utf)
3386 {
3387 #endif
3388 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
3389 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
3390 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
3391 #ifdef COMPILE_PCRE8
3392 }
3393 #endif
3394 #endif /* SUPPORT_UTF || COMPILE_PCRE16 */
3395 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
3396
3397 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3398 }
3399
3400 #define CHAR1 STR_END
3401 #define CHAR2 STACK_TOP
3402
3403 static void do_casefulcmp(compiler_common *common)
3404 {
3405 DEFINE_COMPILER;
3406 struct sljit_jump *jump;
3407 struct sljit_label *label;
3408
3409 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3410 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3411 OP1(SLJIT_MOV, TMP3, 0, CHAR1, 0);
3412 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR2, 0);
3413 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
3414 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3415
3416 label = LABEL();
3417 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
3418 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3419 jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
3420 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
3421 JUMPTO(SLJIT_C_NOT_ZERO, label);
3422
3423 JUMPHERE(jump);
3424 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3425 OP1(SLJIT_MOV, CHAR1, 0, TMP3, 0);
3426 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3427 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3428 }
3429
3430 #define LCC_TABLE STACK_LIMIT
3431
3432 static void do_caselesscmp(compiler_common *common)
3433 {
3434 DEFINE_COMPILER;
3435 struct sljit_jump *jump;
3436 struct sljit_label *label;
3437
3438 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3439 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3440
3441 OP1(SLJIT_MOV, TMP3, 0, LCC_TABLE, 0);
3442 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR1, 0);
3443 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, CHAR2, 0);
3444 OP1(SLJIT_MOV, LCC_TABLE, 0, SLJIT_IMM, common->lcc);
3445 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
3446 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3447
3448 label = LABEL();
3449 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
3450 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3451 #ifndef COMPILE_PCRE8
3452 jump = CMP(SLJIT_C_GREATER, CHAR1, 0, SLJIT_IMM, 255);
3453 #endif
3454 OP1(SLJIT_MOV_UB, CHAR1, 0, SLJIT_MEM2(LCC_TABLE, CHAR1), 0);
3455 #ifndef COMPILE_PCRE8
3456 JUMPHERE(jump);
3457 jump = CMP(SLJIT_C_GREATER, CHAR2, 0, SLJIT_IMM, 255);
3458 #endif
3459 OP1(SLJIT_MOV_UB, CHAR2, 0, SLJIT_MEM2(LCC_TABLE, CHAR2), 0);
3460 #ifndef COMPILE_PCRE8
3461 JUMPHERE(jump);
3462 #endif
3463 jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
3464 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
3465 JUMPTO(SLJIT_C_NOT_ZERO, label);
3466
3467 JUMPHERE(jump);
3468 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3469 OP1(SLJIT_MOV, LCC_TABLE, 0, TMP3, 0);
3470 OP1(SLJIT_MOV, CHAR1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3471 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
3472 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3473 }
3474
3475 #undef LCC_TABLE
3476 #undef CHAR1
3477 #undef CHAR2
3478
3479 #if defined SUPPORT_UTF && defined SUPPORT_UCP
3480
3481 static const pcre_uchar *SLJIT_CALL do_utf_caselesscmp(pcre_uchar *src1, jit_arguments *args, pcre_uchar *end1)
3482 {
3483 /* This function would be ineffective to do in JIT level. */
3484 pcre_uint32 c1, c2;
3485 const pcre_uchar *src2 = args->uchar_ptr;
3486 const pcre_uchar *end2 = args->end;
3487 const ucd_record *ur;
3488 const pcre_uint32 *pp;
3489
3490 while (src1 < end1)
3491 {
3492 if (src2 >= end2)
3493 return (pcre_uchar*)1;
3494 GETCHARINC(c1, src1);
3495 GETCHARINC(c2, src2);
3496 ur = GET_UCD(c2);
3497 if (c1 != c2 && c1 != c2 + ur->other_case)
3498 {
3499 pp = PRIV(ucd_caseless_sets) + ur->caseset;
3500 for (;;)
3501 {
3502 if (c1 < *pp) return NULL;
3503 if (c1 == *pp++) break;
3504 }
3505 }
3506 }
3507 return src2;
3508 }
3509
3510 #endif /* SUPPORT_UTF && SUPPORT_UCP */
3511
3512 static pcre_uchar *byte_sequence_compare(compiler_common *common, BOOL caseless, pcre_uchar *cc,
3513 compare_context* context, jump_list **backtracks)
3514 {
3515 DEFINE_COMPILER;
3516 unsigned int othercasebit = 0;
3517 pcre_uchar *othercasechar = NULL;
3518 #ifdef SUPPORT_UTF
3519 int utflength;
3520 #endif
3521
3522 if (caseless && char_has_othercase(common, cc))
3523 {
3524 othercasebit = char_get_othercase_bit(common, cc);
3525 SLJIT_ASSERT(othercasebit);
3526 /* Extracting bit difference info. */
3527 #ifdef COMPILE_PCRE8
3528 othercasechar = cc + (othercasebit >> 8);
3529 othercasebit &= 0xff;
3530 #else
3531 #ifdef COMPILE_PCRE16
3532 othercasechar = cc + (othercasebit >> 9);
3533 if ((othercasebit & 0x100) != 0)
3534 othercasebit = (othercasebit & 0xff) << 8;
3535 else
3536 othercasebit &= 0xff;
3537 #endif
3538 #endif
3539 }
3540
3541 if (context->sourcereg == -1)
3542 {
3543 #ifdef COMPILE_PCRE8
3544 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
3545 if (context->length >= 4)
3546 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3547 else if (context->length >= 2)
3548 OP1(SLJIT_MOV_UH, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3549 else
3550 #endif
3551 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3552 #else
3553 #ifdef COMPILE_PCRE16
3554 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
3555 if (context->length >= 4)
3556 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3557 else
3558 #endif
3559 OP1(SLJIT_MOV_UH, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3560 #endif
3561 #endif /* COMPILE_PCRE8 */
3562 context->sourcereg = TMP2;
3563 }
3564
3565 #ifdef SUPPORT_UTF
3566 utflength = 1;
3567 if (common->utf && HAS_EXTRALEN(*cc))
3568 utflength += GET_EXTRALEN(*cc);
3569
3570 do
3571 {
3572 #endif
3573
3574 context->length -= IN_UCHARS(1);
3575 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
3576
3577 /* Unaligned read is supported. */
3578 if (othercasebit != 0 && othercasechar == cc)
3579 {
3580 context->c.asuchars[context->ucharptr] = *cc | othercasebit;
3581 context->oc.asuchars[context->ucharptr] = othercasebit;
3582 }
3583 else
3584 {
3585 context->c.asuchars[context->ucharptr] = *cc;
3586 context->oc.asuchars[context->ucharptr] = 0;
3587 }
3588 context->ucharptr++;
3589
3590 #ifdef COMPILE_PCRE8
3591 if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1))
3592 #else
3593 if (context->ucharptr >= 2 || context->length == 0)
3594 #endif
3595 {
3596 if (context->length >= 4)
3597 OP1(SLJIT_MOV_SI, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3598 #ifdef COMPILE_PCRE8
3599 else if (context->length >= 2)
3600 OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3601 else if (context->length >= 1)
3602 OP1(SLJIT_MOV_UB, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3603 #else
3604 else if (context->length >= 2)
3605 OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3606 #endif
3607 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
3608
3609 switch(context->ucharptr)
3610 {
3611 case 4 / sizeof(pcre_uchar):
3612 if (context->oc.asint != 0)
3613 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint);
3614 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint));
3615 break;
3616
3617 case 2 / sizeof(pcre_uchar):
3618 if (context->oc.asushort != 0)
3619 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort);
3620 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort));
3621 break;
3622
3623 #ifdef COMPILE_PCRE8
3624 case 1:
3625 if (context->oc.asbyte != 0)
3626 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte);
3627 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte));
3628 break;
3629 #endif
3630
3631 default:
3632 SLJIT_ASSERT_STOP();
3633 break;
3634 }
3635 context->ucharptr = 0;
3636 }
3637
3638 #else
3639
3640 /* Unaligned read is unsupported. */
3641 #ifdef COMPILE_PCRE8
3642 if (context->length > 0)
3643 OP1(SLJIT_MOV_UB, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3644 #else
3645 if (context->length > 0)
3646 OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3647 #endif
3648 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
3649
3650 if (othercasebit != 0 && othercasechar == cc)
3651 {
3652 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit);
3653 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit));
3654 }
3655 else
3656 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc));
3657
3658 #endif
3659
3660 cc++;
3661 #ifdef SUPPORT_UTF
3662 utflength--;
3663 }
3664 while (utflength > 0);
3665 #endif
3666
3667 return cc;
3668 }
3669
3670 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3671
3672 #define SET_TYPE_OFFSET(value) \
3673 if ((value) != typeoffset) \
3674 { \
3675 if ((value) > typeoffset) \
3676 OP2(SLJIT_SUB, typereg, 0, typereg, 0, SLJIT_IMM, (value) - typeoffset); \
3677 else \
3678 OP2(SLJIT_ADD, typereg, 0, typereg, 0, SLJIT_IMM, typeoffset - (value)); \
3679 } \
3680 typeoffset = (value);
3681
3682 #define SET_CHAR_OFFSET(value) \
3683 if ((value) != charoffset) \
3684 { \
3685 if ((value) > charoffset) \
3686 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (value) - charoffset); \
3687 else \
3688 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, charoffset - (value)); \
3689 } \
3690 charoffset = (value);
3691
3692 static void compile_xclass_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
3693 {
3694 DEFINE_COMPILER;
3695 jump_list *found = NULL;
3696 jump_list **list = (*cc & XCL_NOT) == 0 ? &found : backtracks;
3697 pcre_int32 c, charoffset;
3698 const pcre_uint32 *other_cases;
3699 struct sljit_jump *jump = NULL;
3700 pcre_uchar *ccbegin;
3701 int compares, invertcmp, numberofcmps;
3702 #ifdef SUPPORT_UCP
3703 BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE;
3704 BOOL charsaved = FALSE;
3705 int typereg = TMP1, scriptreg = TMP1;
3706 pcre_int32 typeoffset;
3707 #endif
3708
3709 /* Although SUPPORT_UTF must be defined, we are
3710 not necessary in utf mode even in 8 bit mode. */
3711 detect_partial_match(common, backtracks);
3712 read_char(common);
3713
3714 if ((*cc++ & XCL_MAP) != 0)
3715 {
3716 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
3717 #ifndef COMPILE_PCRE8
3718 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3719 #elif defined SUPPORT_UTF
3720 if (common->utf)
3721 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3722 #endif
3723
3724 if (!check_class_ranges(common, (const pcre_uint8 *)cc, TRUE, list))
3725 {
3726 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
3727 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
3728 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)cc);
3729 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
3730 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
3731 add_jump(compiler, list, JUMP(SLJIT_C_NOT_ZERO));
3732 }
3733
3734 #ifndef COMPILE_PCRE8
3735 JUMPHERE(jump);
3736 #elif defined SUPPORT_UTF
3737 if (common->utf)
3738 JUMPHERE(jump);
3739 #endif
3740 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
3741 #ifdef SUPPORT_UCP
3742 charsaved = TRUE;
3743 #endif
3744 cc += 32 / sizeof(pcre_uchar);
3745 }
3746
3747 /* Scanning the necessary info. */
3748 ccbegin = cc;
3749 compares = 0;
3750 while (*cc != XCL_END)
3751 {
3752 compares++;
3753 if (*cc == XCL_SINGLE)
3754 {
3755 cc += 2;
3756 #ifdef SUPPORT_UTF
3757 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
3758 #endif
3759 #ifdef SUPPORT_UCP
3760 needschar = TRUE;
3761 #endif
3762 }
3763 else if (*cc == XCL_RANGE)
3764 {
3765 cc += 2;
3766 #ifdef SUPPORT_UTF
3767 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
3768 #endif
3769 cc++;
3770 #ifdef SUPPORT_UTF
3771 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
3772 #endif
3773 #ifdef SUPPORT_UCP
3774 needschar = TRUE;
3775 #endif
3776 }
3777 #ifdef SUPPORT_UCP
3778 else
3779 {
3780 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
3781 cc++;
3782 switch(*cc)
3783 {
3784 case PT_ANY:
3785 break;
3786
3787 case PT_LAMP:
3788 case PT_GC:
3789 case PT_PC:
3790 case PT_ALNUM:
3791 needstype = TRUE;
3792 break;
3793
3794 case PT_SC:
3795 needsscript = TRUE;
3796 break;
3797
3798 case PT_SPACE:
3799 case PT_PXSPACE:
3800 case PT_WORD:
3801 needstype = TRUE;
3802 needschar = TRUE;
3803 break;
3804
3805 case PT_CLIST:
3806 needschar = TRUE;
3807 break;
3808
3809 default:
3810 SLJIT_ASSERT_STOP();
3811 break;
3812 }
3813 cc += 2;
3814 }
3815 #endif
3816 }
3817
3818 #ifdef SUPPORT_UCP
3819 /* Simple register allocation. TMP1 is preferred if possible. */
3820 if (needstype || needsscript)
3821 {
3822 if (needschar && !charsaved)
3823 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
3824 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
3825 if (needschar)
3826 {
3827 if (needstype)
3828 {
3829 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
3830 typereg = RETURN_ADDR;
3831 }
3832
3833 if (needsscript)
3834 scriptreg = TMP3;
3835 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
3836 }
3837 else if (needstype && needsscript)
3838 scriptreg = TMP3;
3839 /* In all other cases only one of them was specified, and that can goes to TMP1. */
3840
3841 if (needsscript)
3842 {
3843 if (scriptreg == TMP1)
3844 {
3845 OP1(SLJIT_MOV, scriptreg, 0, SLJIT_IMM, (sljit_w)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
3846 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM2(scriptreg, TMP2), 3);
3847 }
3848 else
3849 {
3850 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
3851 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_w)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
3852 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM1(TMP2), 0);
3853 }
3854 }
3855 }
3856 #endif
3857
3858 /* Generating code. */
3859 cc = ccbegin;
3860 charoffset = 0;
3861 numberofcmps = 0;
3862 #ifdef SUPPORT_UCP
3863 typeoffset = 0;
3864 #endif
3865
3866 while (*cc != XCL_END)
3867 {
3868 compares--;
3869 invertcmp = (compares == 0 && list != backtracks);
3870 jump = NULL;
3871
3872 if (*cc == XCL_SINGLE)
3873 {
3874 cc ++;
3875 #ifdef SUPPORT_UTF
3876 if (common->utf)
3877 {
3878 GETCHARINC(c, cc);
3879 }
3880 else
3881 #endif
3882 c = *cc++;
3883
3884 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
3885 {
3886 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
3887 COND_VALUE(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
3888 numberofcmps++;
3889 }
3890 else if (numberofcmps > 0)
3891 {
3892 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
3893 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
3894 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
3895 numberofcmps = 0;
3896 }
3897 else
3898 {
3899 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, c - charoffset);
3900 numberofcmps = 0;
3901 }
3902 }
3903 else if (*cc == XCL_RANGE)
3904 {
3905 cc ++;
3906 #ifdef SUPPORT_UTF
3907 if (common->utf)
3908 {
3909 GETCHARINC(c, cc);
3910 }
3911 else
3912 #endif
3913 c = *cc++;
3914 SET_CHAR_OFFSET(c);
3915 #ifdef SUPPORT_UTF
3916 if (common->utf)
3917 {
3918 GETCHARINC(c, cc);
3919 }
3920 else
3921 #endif
3922 c = *cc++;
3923 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
3924 {
3925 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
3926 COND_VALUE(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_C_LESS_EQUAL);
3927 numberofcmps++;
3928 }
3929 else if (numberofcmps > 0)
3930 {
3931 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
3932 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_LESS_EQUAL);
3933 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
3934 numberofcmps = 0;
3935 }
3936 else
3937 {
3938 jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, c - charoffset);
3939 numberofcmps = 0;
3940 }
3941 }
3942 #ifdef SUPPORT_UCP
3943 else
3944 {
3945 if (*cc == XCL_NOTPROP)
3946 invertcmp ^= 0x1;
3947 cc++;
3948 switch(*cc)
3949 {
3950 case PT_ANY:
3951 if (list != backtracks)
3952 {
3953 if ((cc[-1] == XCL_NOTPROP && compares > 0) || (cc[-1] == XCL_PROP && compares == 0))
3954 continue;
3955 }
3956 else if (cc[-1] == XCL_NOTPROP)
3957 continue;
3958 jump = JUMP(SLJIT_JUMP);
3959 break;
3960
3961 case PT_LAMP:
3962 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - typeoffset);
3963 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
3964 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ll - typeoffset);
3965 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
3966 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lt - typeoffset);
3967 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
3968 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
3969 break;
3970
3971 case PT_GC:
3972 c = PRIV(ucp_typerange)[(int)cc[1] * 2];
3973 SET_TYPE_OFFSET(c);
3974 jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, PRIV(ucp_typerange)[(int)cc[1] * 2 + 1] - c);
3975 break;
3976
3977 case PT_PC:
3978 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, (int)cc[1] - typeoffset);
3979 break;
3980
3981 case PT_SC:
3982 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, scriptreg, 0, SLJIT_IMM, (int)cc[1]);
3983 break;
3984
3985 case PT_SPACE:
3986 case PT_PXSPACE:
3987 if (*cc == PT_SPACE)
3988 {
3989 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3990 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 11 - charoffset);
3991 }
3992 SET_CHAR_OFFSET(9);
3993 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 13 - 9);
3994 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);
3995 if (*cc == PT_SPACE)
3996 JUMPHERE(jump);
3997
3998 SET_TYPE_OFFSET(ucp_Zl);
3999 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl);
4000 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_LESS_EQUAL);
4001 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4002 break;
4003
4004 case PT_WORD:
4005 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE - charoffset);
4006 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
4007 /* ... fall through */
4008
4009 case PT_ALNUM:
4010 SET_TYPE_OFFSET(ucp_Ll);
4011 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
4012 COND_VALUE((*cc == PT_ALNUM) ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_C_LESS_EQUAL);
4013 SET_TYPE_OFFSET(ucp_Nd);
4014 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_No - ucp_Nd);
4015 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_LESS_EQUAL);
4016 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4017 break;
4018
4019 case PT_CLIST:
4020 other_cases = PRIV(ucd_caseless_sets) + cc[1];
4021
4022 /* At least three characters are required.
4023 Otherwise this case would be handled by the normal code path. */
4024 SLJIT_ASSERT(other_cases[0] != NOTACHAR && other_cases[1] != NOTACHAR && other_cases[2] != NOTACHAR);
4025 SLJIT_ASSERT(other_cases[0] < other_cases[1] && other_cases[1] < other_cases[2]);
4026
4027 /* Optimizing character pairs, if their difference is power of 2. */
4028 if (is_powerof2(other_cases[1] ^ other_cases[0]))
4029 {
4030 if (charoffset == 0)
4031 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
4032 else
4033 {
4034 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_w)charoffset);
4035 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
4036 }
4037 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[1]);
4038 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
4039 other_cases += 2;
4040 }
4041 else if (is_powerof2(other_cases[2] ^ other_cases[1]))
4042 {
4043 if (charoffset == 0)
4044 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[2] ^ other_cases[1]);
4045 else
4046 {
4047 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_w)charoffset);
4048 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
4049 }
4050 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[2]);
4051 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
4052
4053 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, other_cases[0] - charoffset);
4054 COND_VALUE(SLJIT_OR | ((other_cases[3] == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, SLJIT_C_EQUAL);
4055
4056 other_cases += 3;
4057 }
4058 else
4059 {
4060 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, *other_cases++ - charoffset);
4061 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
4062 }
4063
4064 while (*other_cases != NOTACHAR)
4065 {
4066 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, *other_cases++ - charoffset);
4067 COND_VALUE(SLJIT_OR | ((*other_cases == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, SLJIT_C_EQUAL);
4068 }
4069 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4070 break;
4071 }
4072 cc += 2;
4073 }
4074 #endif
4075
4076 if (jump != NULL)
4077 add_jump(compiler, compares > 0 ? list : backtracks, jump);
4078 }
4079
4080 if (found != NULL)
4081 set_jumps(found, LABEL());
4082 }
4083
4084 #undef SET_TYPE_OFFSET
4085 #undef SET_CHAR_OFFSET
4086
4087 #endif
4088
4089 static pcre_uchar *compile_char1_matchingpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **backtracks)
4090 {
4091 DEFINE_COMPILER;
4092 int length;
4093 unsigned int c, oc, bit;
4094 compare_context context;
4095 struct sljit_jump *jump[4];
4096 #ifdef SUPPORT_UTF
4097 struct sljit_label *label;
4098 #ifdef SUPPORT_UCP
4099 pcre_uchar propdata[5];
4100 #endif
4101 #endif
4102
4103 switch(type)
4104 {
4105 case OP_SOD:
4106 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4107 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4108 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
4109 return cc;
4110
4111 case OP_SOM:
4112 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4113 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
4114 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
4115 return cc;
4116
4117 case OP_NOT_WORD_BOUNDARY:
4118 case OP_WORD_BOUNDARY:
4119 add_jump(compiler, &common->wordboundary, JUMP(SLJIT_FAST_CALL));
4120 add_jump(compiler, backtracks, JUMP(type == OP_NOT_WORD_BOUNDARY ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
4121 return cc;
4122
4123 case OP_NOT_DIGIT:
4124 case OP_DIGIT:
4125 /* Digits are usually 0-9, so it is worth to optimize them. */
4126 if (common->digits[0] == -2)
4127 get_ctype_ranges(common, ctype_digit, common->digits);
4128 detect_partial_match(common, backtracks);
4129 /* Flip the starting bit in the negative case. */
4130 if (type == OP_NOT_DIGIT)
4131 common->digits[1] ^= 1;
4132 if (!check_ranges(common, common->digits, backtracks, TRUE))
4133 {
4134 read_char8_type(common);
4135 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit);
4136 add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
4137 }
4138 if (type == OP_NOT_DIGIT)
4139 common->digits[1] ^= 1;
4140 return cc;
4141
4142 case OP_NOT_WHITESPACE:
4143 case OP_WHITESPACE:
4144 detect_partial_match(common, backtracks);
4145 read_char8_type(common);
4146 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space);
4147 add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
4148 return cc;
4149
4150 case OP_NOT_WORDCHAR:
4151 case OP_WORDCHAR:
4152 detect_partial_match(common, backtracks);
4153 read_char8_type(common);
4154 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word);
4155 add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
4156 return cc;
4157
4158 case OP_ANY:
4159 detect_partial_match(common, backtracks);
4160 read_char(common);
4161 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4162 {
4163 jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
4164 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
4165 jump[1] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4166 else
4167 jump[1] = check_str_end(common);
4168
4169 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4170 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff));
4171 if (jump[1] != NULL)
4172 JUMPHERE(jump[1]);
4173 JUMPHERE(jump[0]);
4174 }
4175 else
4176 check_newlinechar(common, common->nltype, backtracks, TRUE);
4177 return cc;
4178
4179 case OP_ALLANY:
4180 detect_partial_match(common, backtracks);
4181 #ifdef SUPPORT_UTF
4182 if (common->utf)
4183 {
4184 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4185 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4186 #ifdef COMPILE_PCRE8
4187 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
4188 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0);
4189 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4190 #else /* COMPILE_PCRE8 */
4191 #ifdef COMPILE_PCRE16
4192 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
4193 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
4194 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4195 COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
4196 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4197 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4198 #endif /* COMPILE_PCRE16 */
4199 #endif /* COMPILE_PCRE8 */
4200 JUMPHERE(jump[0]);
4201 return cc;
4202 }
4203 #endif
4204 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4205 return cc;
4206
4207 case OP_ANYBYTE:
4208 detect_partial_match(common, backtracks);
4209 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4210 return cc;
4211
4212 #ifdef SUPPORT_UTF
4213 #ifdef SUPPORT_UCP
4214 case OP_NOTPROP:
4215 case OP_PROP:
4216 propdata[0] = 0;
4217 propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP;
4218 propdata[2] = cc[0];
4219 propdata[3] = cc[1];
4220 propdata[4] = XCL_END;
4221 compile_xclass_matchingpath(common, propdata, backtracks);
4222 return cc + 2;
4223 #endif
4224 #endif
4225
4226 case OP_ANYNL:
4227 detect_partial_match(common, backtracks);
4228 read_char(common);
4229 jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
4230 /* We don't need to handle soft partial matching case. */
4231 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
4232 jump[1] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4233 else
4234 jump[1] = check_str_end(common);
4235 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4236 jump[2] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
4237 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4238 jump[3] = JUMP(SLJIT_JUMP);
4239 JUMPHERE(jump[0]);
4240 check_newlinechar(common, common->bsr_nltype, backtracks, FALSE);
4241 JUMPHERE(jump[1]);
4242 JUMPHERE(jump[2]);
4243 JUMPHERE(jump[3]);
4244 return cc;
4245
4246 case OP_NOT_HSPACE:
4247 case OP_HSPACE:
4248 detect_partial_match(common, backtracks);
4249 read_char(common);
4250 add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));
4251 add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
4252 return cc;
4253
4254 case OP_NOT_VSPACE:
4255 case OP_VSPACE:
4256 detect_partial_match(common, backtracks);
4257 read_char(common);
4258 add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));
4259 add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
4260 return cc;
4261
4262 #ifdef SUPPORT_UCP
4263 case OP_EXTUNI:
4264 detect_partial_match(common, backtracks);
4265 read_char(common);
4266 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4267 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_w)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
4268 /* Optimize register allocation: use a real register. */
4269 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
4270 OP1(SLJIT_MOV_UB, STACK_TOP, 0, SLJIT_MEM2(TMP1, TMP2), 3);
4271
4272 label = LABEL();
4273 jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4274 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
4275 read_char(common);
4276 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4277 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_w)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
4278 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM2(TMP1, TMP2), 3);
4279
4280 OP2(SLJIT_SHL, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2);
4281 OP1(SLJIT_MOV_UI, TMP1, 0, SLJIT_MEM1(STACK_TOP), (sljit_w)PRIV(ucp_gbtable));
4282 OP1(SLJIT_MOV, STACK_TOP, 0, TMP2, 0);
4283 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4284 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4285 JUMPTO(SLJIT_C_NOT_ZERO, label);
4286
4287 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
4288 JUMPHERE(jump[0]);
4289 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
4290
4291 if (common->mode == JIT_PARTIAL_HARD_COMPILE)
4292 {
4293 jump[0] = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
4294 /* Since we successfully read a char above, partial matching must occure. */
4295 check_partial(common, TRUE);
4296 JUMPHERE(jump[0]);
4297 }
4298 return cc;
4299 #endif
4300
4301 case OP_EODN:
4302 /* Requires rather complex checks. */
4303 jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4304 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4305 {
4306 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4307 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4308 if (common->mode == JIT_COMPILE)
4309 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0));
4310 else
4311 {
4312 jump[1] = CMP(SLJIT_C_EQUAL, TMP2, 0, STR_END, 0);
4313 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
4314 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS);
4315 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
4316 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_NOT_EQUAL);
4317 add_jump(compiler, backtracks, JUMP(SLJIT_C_NOT_EQUAL));
4318 check_partial(common, TRUE);
4319 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4320 JUMPHERE(jump[1]);
4321 }
4322 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4323 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4324 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
4325 }
4326 else if (common->nltype == NLTYPE_FIXED)
4327 {
4328 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4329 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4330 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0));
4331 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
4332 }
4333 else
4334 {
4335 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4336 jump[1] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
4337 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4338 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
4339 jump[2] = JUMP(SLJIT_C_GREATER);
4340 add_jump(compiler, backtracks, JUMP(SLJIT_C_LESS));
4341 /* Equal. */
4342 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4343 jump[3] = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
4344 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4345
4346 JUMPHERE(jump[1]);
4347 if (common->nltype == NLTYPE_ANYCRLF)
4348 {
4349 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4350 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP2, 0, STR_END, 0));
4351 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
4352 }
4353 else
4354 {
4355 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, STR_PTR, 0);
4356 read_char(common);
4357 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
4358 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
4359 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
4360 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
4361 }
4362 JUMPHERE(jump[2]);
4363 JUMPHERE(jump[3]);
4364 }
4365 JUMPHERE(jump[0]);
4366 check_partial(common, FALSE);
4367 return cc;
4368
4369 case OP_EOD:
4370 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0));
4371 check_partial(common, FALSE);
4372 return cc;
4373
4374 case OP_CIRC:
4375 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4376 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
4377 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0));
4378 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
4379 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4380 return cc;
4381
4382 case OP_CIRCM:
4383 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4384 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
4385 jump[1] = CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0);
4386 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
4387 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4388 jump[0] = JUMP(SLJIT_JUMP);
4389 JUMPHERE(jump[1]);
4390
4391 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4392 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4393 {
4394 OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4395 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP2, 0, TMP1, 0));
4396 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4397 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
4398 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4399 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
4400 }
4401 else
4402 {
4403 skip_char_back(common);
4404 read_char(common);
4405 check_newlinechar(common, common->nltype, backtracks, FALSE);
4406 }
4407 JUMPHERE(jump[0]);
4408 return cc;
4409
4410 case OP_DOLL:
4411 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4412 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
4413 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4414
4415 if (!common->endonly)
4416 compile_char1_matchingpath(common, OP_EODN, cc, backtracks);
4417 else
4418 {
4419 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0));
4420 check_partial(common, FALSE);
4421 }
4422 return cc;
4423
4424 case OP_DOLLM:
4425 jump[1] = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
4426 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4427 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
4428 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4429 check_partial(common, FALSE);
4430 jump[0] = JUMP(SLJIT_JUMP);
4431 JUMPHERE(jump[1]);
4432
4433 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4434 {
4435 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4436 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4437 if (common->mode == JIT_COMPILE)
4438 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, TMP2, 0, STR_END, 0));
4439 else
4440 {
4441 jump[1] = CMP(SLJIT_C_LESS_EQUAL, TMP2, 0, STR_END, 0);
4442 /* STR_PTR = STR_END - IN_UCHARS(1) */
4443 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4444 check_partial(common, TRUE);
4445 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4446 JUMPHERE(jump[1]);
4447 }
4448
4449 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4450 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4451 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
4452 }
4453 else
4454 {
4455 peek_char(common);
4456 check_newlinechar(common, common->nltype, backtracks, FALSE);
4457 }
4458 JUMPHERE(jump[0]);
4459 return cc;
4460
4461 case OP_CHAR:
4462 case OP_CHARI:
4463 length = 1;
4464 #ifdef SUPPORT_UTF
4465 if (common->utf && HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc);
4466 #endif
4467 if (common->mode == JIT_COMPILE && (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0))
4468 {
4469 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
4470 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
4471
4472 context.length = IN_UCHARS(length);
4473 context.sourcereg = -1;
4474 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
4475 context.ucharptr = 0;
4476 #endif
4477 return byte_sequence_compare(common, type == OP_CHARI, cc, &context, backtracks);
4478 }
4479 detect_partial_match(common, backtracks);
4480 read_char(common);
4481 #ifdef SUPPORT_UTF
4482 if (common->utf)
4483 {
4484 GETCHAR(c, cc);
4485 }
4486 else
4487 #endif
4488 c = *cc;
4489 if (type == OP_CHAR || !char_has_othercase(common, cc))
4490 {
4491 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c));
4492 return cc + length;
4493 }
4494 oc = char_othercase(common, c);
4495 bit = c ^ oc;
4496 if (is_powerof2(bit))
4497 {
4498 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
4499 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
4500 return cc + length;
4501 }
4502 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c);
4503 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
4504 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);
4505 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
4506 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
4507 return cc + length;
4508
4509 case OP_NOT:
4510 case OP_NOTI:
4511 detect_partial_match(common, backtracks);
4512 length = 1;
4513 #ifdef SUPPORT_UTF
4514 if (common->utf)
4515 {
4516 #ifdef COMPILE_PCRE8
4517 c = *cc;
4518 if (c < 128)
4519 {
4520 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4521 if (type == OP_NOT || !char_has_othercase(common, cc))
4522 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
4523 else
4524 {
4525 /* Since UTF8 code page is fixed, we know that c is in [a-z] or [A-Z] range. */
4526 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x20);
4527 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20));
4528 }
4529 /* Skip the variable-length character. */
4530 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4531 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
4532 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0);
4533 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4534 JUMPHERE(jump[0]);
4535 return cc + 1;
4536 }
4537 else
4538 #endif /* COMPILE_PCRE8 */
4539 {
4540 GETCHARLEN(c, cc, length);
4541 read_char(common);
4542 }
4543 }
4544 else
4545 #endif /* SUPPORT_UTF */
4546 {
4547 read_char(common);
4548 c = *cc;
4549 }
4550
4551 if (type == OP_NOT || !char_has_othercase(common, cc))
4552 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
4553 else
4554 {
4555 oc = char_othercase(common, c);
4556 bit = c ^ oc;
4557 if (is_powerof2(bit))
4558 {
4559 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
4560 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
4561 }
4562 else
4563 {
4564 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
4565 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, oc));
4566 }
4567 }
4568 return cc + length;
4569
4570 case OP_CLASS:
4571 case OP_NCLASS:
4572 detect_partial_match(common, backtracks);
4573 read_char(common);
4574 if (check_class_ranges(common, (const pcre_uint8 *)cc, type == OP_NCLASS, backtracks))
4575 return cc + 32 / sizeof(pcre_uchar);
4576
4577 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
4578 jump[0] = NULL;
4579 #ifdef COMPILE_PCRE8
4580 /* This check only affects 8 bit mode. In other modes, we
4581 always need to compare the value with 255. */
4582 if (common->utf)
4583 #endif /* COMPILE_PCRE8 */
4584 {
4585 jump[0] = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
4586 if (type == OP_CLASS)
4587 {
4588 add_jump(compiler, backtracks, jump[0]);
4589 jump[0] = NULL;
4590 }
4591 }
4592 #endif /* SUPPORT_UTF || !COMPILE_PCRE8 */
4593 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
4594 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
4595 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)cc);
4596 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4597 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4598 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
4599 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
4600 if (jump[0] != NULL)
4601 JUMPHERE(jump[0]);
4602 #endif /* SUPPORT_UTF || !COMPILE_PCRE8 */
4603 return cc + 32 / sizeof(pcre_uchar);
4604
4605 #if defined SUPPORT_UTF || defined COMPILE_PCRE16
4606 case OP_XCLASS:
4607 compile_xclass_matchingpath(common, cc + LINK_SIZE, backtracks);
4608 return cc + GET(cc, 0) - 1;
4609 #endif
4610
4611 case OP_REVERSE:
4612 length = GET(cc, 0);
4613 if (length == 0)
4614 return cc + LINK_SIZE;
4615 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4616 #ifdef SUPPORT_UTF
4617 if (common->utf)
4618 {
4619 OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4620 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, length);
4621 label = LABEL();
4622 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP3, 0));
4623 skip_char_back(common);
4624 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
4625 JUMPTO(SLJIT_C_NOT_ZERO, label);
4626 }
4627 else
4628 #endif
4629 {
4630 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4631 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
4632 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, TMP1, 0));
4633 }
4634 check_start_used_ptr(common);
4635 return cc + LINK_SIZE;
4636 }
4637 SLJIT_ASSERT_STOP();
4638 return cc;
4639 }
4640
4641 static SLJIT_INLINE pcre_uchar *compile_charn_matchingpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, jump_list **backtracks)
4642 {
4643 /* This function consumes at least one input character. */
4644 /* To decrease the number of length checks, we try to concatenate the fixed length character sequences. */
4645 DEFINE_COMPILER;
4646 pcre_uchar *ccbegin = cc;
4647 compare_context context;
4648 int size;
4649
4650 context.length = 0;
4651 do
4652 {
4653 if (cc >= ccend)
4654 break;
4655
4656 if (*cc == OP_CHAR)
4657 {
4658 size = 1;
4659 #ifdef SUPPORT_UTF
4660 if (common->utf && HAS_EXTRALEN(cc[1]))
4661 size += GET_EXTRALEN(cc[1]);
4662 #endif
4663 }
4664 else if (*cc == OP_CHARI)
4665 {
4666 size = 1;
4667 #ifdef SUPPORT_UTF
4668 if (common->utf)
4669 {
4670 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
4671 size = 0;
4672 else if (HAS_EXTRALEN(cc[1]))
4673 size += GET_EXTRALEN(cc[1]);
4674 }
4675 else
4676 #endif
4677 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
4678 size = 0;
4679 }
4680 else
4681 size = 0;
4682
4683 cc += 1 + size;
4684 context.length += IN_UCHARS(size);
4685 }
4686 while (size > 0 && context.length <= 128);
4687
4688 cc = ccbegin;
4689 if (context.length > 0)
4690 {
4691 /* We have a fixed-length byte sequence. */
4692 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, context.length);
4693 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
4694
4695 context.sourcereg = -1;
4696 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
4697 context.ucharptr = 0;
4698 #endif
4699 do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, backtracks); while (context.length > 0);
4700 return cc;
4701 }
4702
4703 /* A non-fixed length character will be checked if length == 0. */
4704 return compile_char1_matchingpath(common, *cc, cc + 1, backtracks);
4705 }
4706
4707 static struct sljit_jump *compile_ref_checks(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
4708 {
4709 DEFINE_COMPILER;
4710 int offset = GET2(cc, 1) << 1;
4711
4712 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
4713 if (!common->jscript_compat)
4714 {
4715 if (backtracks == NULL)
4716 {
4717 /* OVECTOR(1) contains the "string begin - 1" constant. */
4718 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
4719 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
4720 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
4721 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
4722 return JUMP(SLJIT_C_NOT_ZERO);
4723 }
4724 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
4725 }
4726 return CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
4727 }
4728
4729 /* Forward definitions. */
4730 static void compile_matchingpath(compiler_common *, pcre_uchar *, pcre_uchar *, backtrack_common *);
4731 static void compile_backtrackingpath(compiler_common *, struct backtrack_common *);
4732
4733 #define PUSH_BACKTRACK(size, ccstart, error) \
4734 do \
4735 { \
4736 backtrack = sljit_alloc_memory(compiler, (size)); \
4737 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
4738 return error; \
4739 memset(backtrack, 0, size); \
4740 backtrack->prev = parent->top; \
4741 backtrack->cc = (ccstart); \
4742 parent->top = backtrack; \
4743 } \
4744 while (0)
4745
4746 #define PUSH_BACKTRACK_NOVALUE(size, ccstart) \
4747 do \
4748 { \
4749 backtrack = sljit_alloc_memory(compiler, (size)); \
4750 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
4751 return; \
4752 memset(backtrack, 0, size); \
4753 backtrack->prev = parent->top; \
4754 backtrack->cc = (ccstart); \
4755 parent->top = backtrack; \
4756 } \
4757 while (0)
4758
4759 #define BACKTRACK_AS(type) ((type *)backtrack)
4760
4761 static pcre_uchar *compile_ref_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks, BOOL withchecks, BOOL emptyfail)
4762 {
4763 DEFINE_COMPILER;
4764 int offset = GET2(cc, 1) << 1;
4765 struct sljit_jump *jump = NULL;
4766 struct sljit_jump *partial;
4767 struct sljit_jump *nopartial;
4768
4769 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
4770 /* OVECTOR(1) contains the "string begin - 1" constant. */
4771 if (withchecks && !common->jscript_compat)
4772 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
4773
4774 #if defined SUPPORT_UTF && defined SUPPORT_UCP
4775 if (common->utf && *cc == OP_REFI)
4776 {
4777 SLJIT_ASSERT(TMP1 == SLJIT_TEMPORARY_REG1 && STACK_TOP == SLJIT_TEMPORARY_REG2 && TMP2 == SLJIT_TEMPORARY_REG3);
4778 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
4779 if (withchecks)
4780 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, TMP2, 0);
4781
4782 /* Needed to save important temporary registers. */
4783 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
4784 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG2, 0, ARGUMENTS, 0);
4785 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), SLJIT_OFFSETOF(jit_arguments, uchar_ptr), STR_PTR, 0);
4786 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_utf_caselesscmp));
4787 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
4788 if (common->mode == JIT_COMPILE)
4789 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1));
4790 else
4791 {
4792 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
4793 nopartial = CMP(SLJIT_C_NOT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
4794 check_partial(common, FALSE);
4795 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4796 JUMPHERE(nopartial);
4797 }
4798 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
4799 }
4800 else
4801 #endif /* SUPPORT_UTF && SUPPORT_UCP */
4802 {
4803 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP1, 0);
4804 if (withchecks)
4805 jump = JUMP(SLJIT_C_ZERO);
4806
4807 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4808 partial = CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0);
4809 if (common->mode == JIT_COMPILE)
4810 add_jump(compiler, backtracks, partial);
4811
4812 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
4813 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4814
4815 if (common->mode != JIT_COMPILE)
4816 {
4817 nopartial = JUMP(SLJIT_JUMP);
4818 JUMPHERE(partial);
4819 /* TMP2 -= STR_END - STR_PTR */
4820 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, STR_PTR, 0);
4821 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, STR_END, 0);
4822 partial = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0);
4823 OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
4824 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
4825 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4826 JUMPHERE(partial);
4827 check_partial(common, FALSE);
4828 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4829 JUMPHERE(nopartial);
4830 }
4831 }
4832
4833 if (jump != NULL)
4834 {
4835 if (emptyfail)
4836 add_jump(compiler, backtracks, jump);
4837 else
4838 JUMPHERE(jump);
4839 }
4840 return cc + 1 + IMM2_SIZE;
4841 }
4842
4843 static SLJIT_INLINE pcre_uchar *compile_ref_iterator_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
4844 {
4845 DEFINE_COMPILER;
4846 backtrack_common *backtrack;
4847 pcre_uchar type;
4848 struct sljit_label *label;
4849 struct sljit_jump *zerolength;
4850 struct sljit_jump *jump = NULL;
4851 pcre_uchar *ccbegin = cc;
4852 int min = 0, max = 0;
4853 BOOL minimize;
4854
4855 PUSH_BACKTRACK(sizeof(iterator_backtrack), cc, NULL);
4856
4857 type = cc[1 + IMM2_SIZE];
4858 minimize = (type & 0x1) != 0;
4859 switch(type)
4860 {
4861 case OP_CRSTAR:
4862 case OP_CRMINSTAR:
4863 min = 0;
4864 max = 0;
4865 cc += 1 + IMM2_SIZE + 1;
4866 break;
4867 case OP_CRPLUS:
4868 case OP_CRMINPLUS:
4869 min = 1;
4870 max = 0;
4871 cc += 1 + IMM2_SIZE + 1;
4872 break;
4873 case OP_CRQUERY:
4874 case OP_CRMINQUERY:
4875 min = 0;
4876 max = 1;
4877 cc += 1 + IMM2_SIZE + 1;
4878 break;
4879 case OP_CRRANGE:
4880 case OP_CRMINRANGE:
4881 min = GET2(cc, 1 + IMM2_SIZE + 1);
4882 max = GET2(cc, 1 + IMM2_SIZE + 1 + IMM2_SIZE);
4883 cc += 1 + IMM2_SIZE + 1 + 2 * IMM2_SIZE;
4884 break;
4885 default:
4886 SLJIT_ASSERT_STOP();
4887 break;
4888 }
4889
4890 if (!minimize)
4891 {
4892 if (min == 0)
4893 {
4894 allocate_stack(common, 2);
4895 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4896 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
4897 /* Temporary release of STR_PTR. */
4898 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
4899 zerolength = compile_ref_checks(common, ccbegin, NULL);
4900 /* Restore if not zero length. */
4901 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
4902 }
4903 else
4904 {
4905 allocate_stack(common, 1);
4906 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
4907 zerolength = compile_ref_checks(common, ccbegin, &backtrack->topbacktracks);
4908 }
4909
4910 if (min > 1 || max > 1)
4911 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 0);
4912
4913 label = LABEL();
4914 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, FALSE, FALSE);
4915
4916 if (min > 1 || max > 1)
4917 {
4918 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
4919 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4920 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0);
4921 if (min > 1)
4922 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, label);
4923 if (max > 1)
4924 {
4925 jump = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max);
4926 allocate_stack(common, 1);
4927 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4928 JUMPTO(SLJIT_JUMP, label);
4929 JUMPHERE(jump);
4930 }
4931 }
4932
4933 if (max == 0)
4934 {
4935 /* Includes min > 1 case as well. */
4936 allocate_stack(common, 1);
4937 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4938 JUMPTO(SLJIT_JUMP, label);
4939 }
4940
4941 JUMPHERE(zerolength);
4942 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
4943
4944 decrease_call_count(common);
4945 return cc;
4946 }
4947
4948 allocate_stack(common, 2);
4949 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
4950 if (type != OP_CRMINSTAR)
4951 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
4952
4953 if (min == 0)
4954 {
4955 zerolength = compile_ref_checks(common, ccbegin, NULL);
4956 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4957 jump = JUMP(SLJIT_JUMP);
4958 }
4959 else
4960 zerolength = compile_ref_checks(common, ccbegin, &backtrack->topbacktracks);
4961
4962 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
4963 if (max > 0)
4964 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max));
4965
4966 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, TRUE, TRUE);
4967 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4968
4969 if (min > 1)
4970 {
4971 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
4972 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4973 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
4974 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, BACKTRACK_AS(iterator_backtrack)->matchingpath);
4975 }
4976 else if (max > 0)
4977 OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
4978
4979 if (jump != NULL)
4980 JUMPHERE(jump);
4981 JUMPHERE(zerolength);
4982
4983 decrease_call_count(common);
4984 return cc;
4985 }
4986
4987 static SLJIT_INLINE pcre_uchar *compile_recurse_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
4988 {
4989 DEFINE_COMPILER;
4990 backtrack_common *backtrack;
4991 recurse_entry *entry = common->entries;
4992 recurse_entry *prev = NULL;
4993 int start = GET(cc, 1);
4994
4995 PUSH_BACKTRACK(sizeof(recurse_backtrack), cc, NULL);
4996 while (entry != NULL)
4997 {
4998 if (entry->start == start)
4999 break;
5000 prev = entry;
5001 entry = entry->next;
5002 }
5003
5004 if (entry == NULL)
5005 {
5006 entry = sljit_alloc_memory(compiler, sizeof(recurse_entry));
5007 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5008 return NULL;
5009 entry->next = NULL;
5010 entry->entry = NULL;
5011 entry->calls = NULL;
5012 entry->start = start;
5013
5014 if (prev != NULL)
5015 prev->next = entry;
5016 else
5017 common->entries = entry;
5018 }
5019
5020 if (common->has_set_som && common->mark_ptr != 0)
5021 {
5022 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
5023 allocate_stack(common, 2);
5024 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
5025 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
5026 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5027 }
5028 else if (common->has_set_som || common->mark_ptr != 0)
5029 {
5030 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->has_set_som ? (int)(OVECTOR(0)) : common->mark_ptr);
5031 allocate_stack(common, 1);
5032 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
5033 }
5034
5035 if (entry->entry == NULL)
5036 add_jump(compiler, &entry->calls, JUMP(SLJIT_FAST_CALL));
5037 else
5038 JUMPTO(SLJIT_FAST_CALL, entry->entry);
5039 /* Leave if the match is failed. */
5040 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0));
5041 return cc + 1 + LINK_SIZE;
5042 }
5043
5044 static pcre_uchar *compile_assert_matchingpath(compiler_common *common, pcre_uchar *cc, assert_backtrack *backtrack, BOOL conditional)
5045 {
5046 DEFINE_COMPILER;
5047 int framesize;
5048 int private_data_ptr;
5049 backtrack_common altbacktrack;
5050 pcre_uchar *ccbegin;
5051 pcre_uchar opcode;
5052 pcre_uchar bra = OP_BRA;
5053 jump_list *tmp = NULL;
5054 jump_list **target = (conditional) ? &backtrack->condfailed : &backtrack->common.topbacktracks;
5055 jump_list **found;
5056 /* Saving previous accept variables. */
5057 struct sljit_label *save_quitlabel = common->quitlabel;
5058 struct sljit_label *save_acceptlabel = common->acceptlabel;
5059 jump_list *save_quit = common->quit;
5060 jump_list *save_accept = common->accept;
5061 struct sljit_jump *jump;
5062 struct sljit_jump *brajump = NULL;
5063
5064 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
5065 {
5066 SLJIT_ASSERT(!conditional);
5067 bra = *cc;
5068 cc++;
5069 }
5070 private_data_ptr = PRIVATE_DATA(cc);
5071 SLJIT_ASSERT(private_data_ptr != 0);
5072 framesize = get_framesize(common, cc, FALSE);
5073 backtrack->framesize = framesize;
5074 backtrack->private_data_ptr = private_data_ptr;
5075 opcode = *cc;
5076 SLJIT_ASSERT(opcode >= OP_ASSERT && opcode <= OP_ASSERTBACK_NOT);
5077 found = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) ? &tmp : target;
5078 ccbegin = cc;
5079 cc += GET(cc, 1);
5080
5081 if (bra == OP_BRAMINZERO)
5082 {
5083 /* This is a braminzero backtrack path. */
5084 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5085 free_stack(common, 1);
5086 brajump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
5087 }
5088
5089 if (framesize < 0)
5090 {
5091 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);
5092 allocate_stack(common, 1);
5093 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5094 }
5095 else
5096 {
5097 allocate_stack(common, framesize + 2);
5098 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5099 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(framesize + 1));
5100 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0);
5101 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5102 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5103 init_frame(common, ccbegin, framesize + 1, 2, FALSE);
5104 }
5105
5106 memset(&altbacktrack, 0, sizeof(backtrack_common));
5107 common->quitlabel = NULL;
5108 common->quit = NULL;
5109 while (1)
5110 {
5111 common->acceptlabel = NULL;
5112 common->accept = NULL;
5113 altbacktrack.top = NULL;
5114 altbacktrack.topbacktracks = NULL;
5115
5116 if (*ccbegin == OP_ALT)
5117 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5118
5119 altbacktrack.cc = ccbegin;
5120 compile_matchingpath(common, ccbegin + 1 + LINK_SIZE, cc, &altbacktrack);
5121 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5122 {
5123 common->quitlabel = save_quitlabel;
5124 common->acceptlabel = save_acceptlabel;
5125 common->quit = save_quit;
5126 common->accept = save_accept;
5127 return NULL;
5128 }
5129 common->acceptlabel = LABEL();
5130 if (common->accept != NULL)
5131 set_jumps(common->accept, common->acceptlabel);
5132
5133 /* Reset stack. */
5134 if (framesize < 0)
5135 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5136 else {
5137 if ((opcode != OP_ASSERT_NOT && opcode != OP_ASSERTBACK_NOT) || conditional)
5138 {
5139 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
5140 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_w));
5141 }
5142 else
5143 {
5144 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5145 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
5146 }
5147 }
5148
5149 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
5150 {
5151 /* We know that STR_PTR was stored on the top of the stack. */
5152 if (conditional)
5153 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
5154 else if (bra == OP_BRAZERO)
5155 {
5156 if (framesize < 0)
5157 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
5158 else
5159 {
5160 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_w));
5161 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (framesize + 1) * sizeof(sljit_w));
5162 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
5163 }
5164 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
5165 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5166 }
5167 else if (framesize >= 0)
5168 {
5169 /* For OP_BRA and OP_BRAMINZERO. */
5170 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_w));
5171 }
5172 }
5173 add_jump(compiler, found, JUMP(SLJIT_JUMP));
5174
5175 compile_backtrackingpath(common, altbacktrack.top);
5176 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5177 {
5178 common->quitlabel = save_quitlabel;
5179 common->acceptlabel = save_acceptlabel;
5180 common->quit = save_quit;
5181 common->accept = save_accept;
5182 return NULL;
5183 }
5184 set_jumps(altbacktrack.topbacktracks, LABEL());
5185
5186 if (*cc != OP_ALT)
5187 break;
5188
5189 ccbegin = cc;
5190 cc += GET(cc, 1);
5191 }
5192 /* None of them matched. */
5193 if (common->quit != NULL)
5194 set_jumps(common->quit, LABEL());
5195
5196 if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK)
5197 {
5198 /* Assert is failed. */
5199 if (conditional || bra == OP_BRAZERO)
5200 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5201
5202 if (framesize < 0)
5203 {
5204 /* The topmost item should be 0. */
5205 if (bra == OP_BRAZERO)
5206 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5207 else
5208 free_stack(common, 1);
5209 }
5210 else
5211 {
5212 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5213 /* The topmost item should be 0. */
5214 if (bra == OP_BRAZERO)
5215 {
5216 free_stack(common, framesize + 1);
5217 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5218 }
5219 else
5220 free_stack(common, framesize + 2);
5221 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
5222 }
5223 jump = JUMP(SLJIT_JUMP);
5224 if (bra != OP_BRAZERO)
5225 add_jump(compiler, target, jump);
5226
5227 /* Assert is successful. */
5228 set_jumps(tmp, LABEL());
5229 if (framesize < 0)
5230 {
5231 /* We know that STR_PTR was stored on the top of the stack. */
5232 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
5233 /* Keep the STR_PTR on the top of the stack. */
5234 if (bra == OP_BRAZERO)
5235 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
5236 else if (bra == OP_BRAMINZERO)
5237 {
5238 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
5239 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5240 }
5241 }
5242 else
5243 {
5244 if (bra == OP_BRA)
5245 {
5246 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
5247 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_w));
5248 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
5249 }
5250 else
5251 {
5252 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
5253 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 2) * sizeof(sljit_w));
5254 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5255 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), bra == OP_BRAZERO ? STR_PTR : SLJIT_IMM, 0);
5256 }
5257 }
5258
5259 if (bra == OP_BRAZERO)
5260 {
5261 backtrack->matchingpath = LABEL();
5262 sljit_set_label(jump, backtrack->matchingpath);
5263 }
5264 else if (bra == OP_BRAMINZERO)
5265 {
5266 JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
5267 JUMPHERE(brajump);
5268 if (framesize >= 0)
5269 {
5270 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5271 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
5272 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_w));
5273 }
5274 set_jumps(backtrack->common.topbacktracks, LABEL());
5275 }
5276 }
5277 else
5278 {
5279 /* AssertNot is successful. */
5280 if (framesize < 0)
5281 {
5282 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5283 if (bra != OP_BRA)
5284 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5285 else
5286 free_stack(common, 1);
5287 }
5288 else
5289 {
5290 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5291 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5292 /* The topmost item should be 0. */
5293 if (bra != OP_BRA)
5294 {
5295 free_stack(common, framesize + 1);
5296 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5297 }
5298 else
5299 free_stack(common, framesize + 2);
5300 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
5301 }
5302
5303 if (bra == OP_BRAZERO)
5304 backtrack->matchingpath = LABEL();
5305 else if (bra == OP_BRAMINZERO)
5306 {
5307 JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
5308 JUMPHERE(brajump);
5309 }
5310
5311 if (bra != OP_BRA)
5312 {
5313 SLJIT_ASSERT(found == &backtrack->common.topbacktracks);
5314 set_jumps(backtrack->common.topbacktracks, LABEL());
5315 backtrack->common.topbacktracks = NULL;
5316 }
5317 }
5318
5319 common->quitlabel = save_quitlabel;
5320 common->acceptlabel = save_acceptlabel;
5321 common->quit = save_quit;
5322 common->accept = save_accept;
5323 return cc + 1 + LINK_SIZE;
5324 }
5325
5326 static sljit_w SLJIT_CALL do_searchovector(sljit_w refno, sljit_w* locals, pcre_uchar *name_table)
5327 {
5328 int condition = FALSE;
5329 pcre_uchar *slotA = name_table;
5330 pcre_uchar *slotB;
5331 sljit_w name_count = locals[LOCALS0 / sizeof(sljit_w)];
5332 sljit_w name_entry_size = locals[LOCALS1 / sizeof(sljit_w)];
5333 sljit_w no_capture;
5334 int i;
5335
5336 locals += refno & 0xff;
5337 refno >>= 8;
5338 no_capture = locals[1];
5339
5340 for (i = 0; i < name_count; i++)
5341 {
5342 if (GET2(slotA, 0) == refno) break;
5343 slotA += name_entry_size;
5344 }
5345
5346 if (i < name_count)
5347 {
5348 /* Found a name for the number - there can be only one; duplicate names
5349 for different numbers are allowed, but not vice versa. First scan down
5350 for duplicates. */
5351
5352 slotB = slotA;
5353 while (slotB > name_table)
5354 {
5355 slotB -= name_entry_size;
5356 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
5357 {
5358 condition = locals[GET2(slotB, 0) << 1] != no_capture;
5359 if (condition) break;
5360 }
5361 else break;
5362 }
5363
5364 /* Scan up for duplicates */
5365 if (!condition)
5366 {
5367 slotB = slotA;
5368 for (i++; i < name_count; i++)
5369 {
5370 slotB += name_entry_size;
5371 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
5372 {
5373 condition = locals[GET2(slotB, 0) << 1] != no_capture;
5374 if (condition) break;
5375 }
5376 else break;
5377 }
5378 }
5379 }
5380 return condition;
5381 }
5382
5383 static sljit_w SLJIT_CALL do_searchgroups(sljit_w recno, sljit_w* locals, pcre_uchar *name_table)
5384 {
5385 int condition = FALSE;
5386 pcre_uchar *slotA = name_table;
5387 pcre_uchar *slotB;
5388 sljit_w name_count = locals[LOCALS0 / sizeof(sljit_w)];
5389 sljit_w name_entry_size = locals[LOCALS1 / sizeof(sljit_w)];
5390 sljit_w group_num = locals[POSSESSIVE0 / sizeof(sljit_w)];
5391 int i;
5392
5393 for (i = 0; i < name_count; i++)
5394 {
5395 if (GET2(slotA, 0) == recno) break;
5396 slotA += name_entry_size;
5397 }
5398
5399 if (i < name_count)
5400 {
5401 /* Found a name for the number - there can be only one; duplicate
5402 names for different numbers are allowed, but not vice versa. First
5403 scan down for duplicates. */
5404
5405 slotB = slotA;
5406 while (slotB > name_table)
5407 {
5408 slotB -= name_entry_size;
5409 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
5410 {
5411 condition = GET2(slotB, 0) == group_num;
5412 if (condition) break;
5413 }
5414 else break;
5415 }
5416
5417 /* Scan up for duplicates */
5418 if (!condition)
5419 {
5420 slotB = slotA;
5421 for (i++; i < name_count; i++)
5422 {
5423 slotB += name_entry_size;
5424 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
5425 {
5426 condition = GET2(slotB, 0) == group_num;
5427 if (condition) break;
5428 }
5429 else break;
5430 }
5431 }
5432 }
5433 return condition;
5434 }
5435
5436 /*
5437 Handling bracketed expressions is probably the most complex part.
5438
5439 Stack layout naming characters:
5440 S - Push the current STR_PTR
5441 0 - Push a 0 (NULL)
5442 A - Push the current STR_PTR. Needed for restoring the STR_PTR
5443 before the next alternative. Not pushed if there are no alternatives.
5444 M - Any values pushed by the current alternative. Can be empty, or anything.
5445 C - Push the previous OVECTOR(i), OVECTOR(i+1) and OVECTOR_PRIV(i) to the stack.
5446 L - Push the previous local (pointed by localptr) to the stack
5447 () - opional values stored on the stack
5448 ()* - optonal, can be stored multiple times
5449
5450 The following list shows the regular expression templates, their PCRE byte codes
5451 and stack layout supported by pcre-sljit.
5452
5453 (?:) OP_BRA | OP_KET A M
5454 () OP_CBRA | OP_KET C M
5455 (?:)+ OP_BRA | OP_KETRMAX 0 A M S ( A M S )*
5456 OP_SBRA | OP_KETRMAX 0 L M S ( L M S )*
5457 (?:)+? OP_BRA | OP_KETRMIN 0 A M S ( A M S )*
5458 OP_SBRA | OP_KETRMIN 0 L M S ( L M S )*
5459 ()+ OP_CBRA | OP_KETRMAX 0 C M S ( C M S )*
5460 OP_SCBRA | OP_KETRMAX 0 C M S ( C M S )*
5461 ()+? OP_CBRA | OP_KETRMIN 0 C M S ( C M S )*
5462 OP_SCBRA | OP_KETRMIN 0 C M S ( C M S )*
5463 (?:)? OP_BRAZERO | OP_BRA | OP_KET S ( A M 0 )
5464 (?:)?? OP_BRAMINZERO | OP_BRA | OP_KET S ( A M 0 )
5465 ()? OP_BRAZERO | OP_CBRA | OP_KET S ( C M 0 )
5466 ()?? OP_BRAMINZERO | OP_CBRA | OP_KET S ( C M 0 )
5467 (?:)* OP_BRAZERO | OP_BRA | OP_KETRMAX S 0 ( A M S )*
5468 OP_BRAZERO | OP_SBRA | OP_KETRMAX S 0 ( L M S )*
5469 (?:)*? OP_BRAMINZERO | OP_BRA | OP_KETRMIN S 0 ( A M S )*
5470 OP_BRAMINZERO | OP_SBRA | OP_KETRMIN S 0 ( L M S )*
5471 ()* OP_BRAZERO | OP_CBRA | OP_KETRMAX S 0 ( C M S )*
5472 OP_BRAZERO | OP_SCBRA | OP_KETRMAX S 0 ( C M S )*
5473 ()*? OP_BRAMINZERO | OP_CBRA | OP_KETRMIN S 0 ( C M S )*
5474 OP_BRAMINZERO | OP_SCBRA | OP_KETRMIN S 0 ( C M S )*
5475
5476
5477 Stack layout naming characters:
5478 A - Push the alternative index (starting from 0) on the stack.
5479 Not pushed if there is no alternatives.
5480 M - Any values pushed by the current alternative. Can be empty, or anything.
5481
5482 The next list shows the possible content of a bracket:
5483 (|) OP_*BRA | OP_ALT ... M A
5484 (?()|) OP_*COND | OP_ALT M A
5485 (?>|) OP_ONCE | OP_ALT ... [stack trace] M A
5486 (?>|) OP_ONCE_NC | OP_ALT ... [stack trace] M A
5487 Or nothing, if trace is unnecessary
5488 */
5489
5490 static pcre_uchar *compile_bracket_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
5491 {
5492 DEFINE_COMPILER;
5493 backtrack_common *backtrack;
5494 pcre_uchar opcode;
5495 int private_data_ptr = 0;
5496 int offset = 0;
5497 int stacksize;
5498 pcre_uchar *ccbegin;
5499 pcre_uchar *matchingpath;
5500 pcre_uchar bra = OP_BRA;
5501 pcre_uchar ket;
5502 assert_backtrack *assert;
5503 BOOL has_alternatives;
5504 struct sljit_jump *jump;
5505 struct sljit_jump *skip;
5506 struct sljit_label *rmaxlabel = NULL;
5507 struct sljit_jump *braminzerojump = NULL;
5508
5509 PUSH_BACKTRACK(sizeof(bracket_backtrack), cc, NULL);
5510
5511 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
5512 {
5513 bra = *cc;
5514 cc++;
5515 opcode = *cc;
5516 }
5517
5518 opcode = *cc;
5519 ccbegin = cc;
5520 matchingpath = ccbegin + 1 + LINK_SIZE;
5521
5522 if ((opcode == OP_COND || opcode == OP_SCOND) && cc[1 + LINK_SIZE] == OP_DEF)
5523 {
5524 /* Drop this bracket_backtrack. */
5525 parent->top = backtrack->prev;
5526 return bracketend(cc);
5527 }
5528
5529 ket = *(bracketend(cc) - 1 - LINK_SIZE);
5530 SLJIT_ASSERT(ket == OP_KET || ket == OP_KETRMAX || ket == OP_KETRMIN);
5531 SLJIT_ASSERT(!((bra == OP_BRAZERO && ket == OP_KETRMIN) || (bra == OP_BRAMINZERO && ket == OP_KETRMAX)));
5532 cc += GET(cc, 1);
5533
5534 has_alternatives = *cc == OP_ALT;
5535 if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
5536 {
5537 has_alternatives = (*matchingpath == OP_RREF) ? FALSE : TRUE;
5538 if (*matchingpath == OP_NRREF)
5539 {
5540 stacksize = GET2(matchingpath, 1);
5541 if (common->currententry == NULL || stacksize == RREF_ANY)
5542 has_alternatives = FALSE;
5543 else if (common->currententry->start == 0)
5544 has_alternatives = stacksize != 0;
5545 else
5546 has_alternatives = stacksize != GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
5547 }
5548 }
5549
5550 if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
5551 opcode = OP_SCOND;
5552 if (SLJIT_UNLIKELY(opcode == OP_ONCE_NC))
5553 opcode = OP_ONCE;
5554
5555 if (opcode == OP_CBRA || opcode == OP_SCBRA)
5556 {
5557 /* Capturing brackets has a pre-allocated space. */
5558 offset = GET2(ccbegin, 1 + LINK_SIZE);
5559 if (common->optimized_cbracket[offset] == 0)
5560 {
5561 private_data_ptr = OVECTOR_PRIV(offset);
5562 offset <<= 1;
5563 }
5564 else
5565 {
5566 offset <<= 1;
5567 private_data_ptr = OVECTOR(offset);
5568 }
5569 BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
5570 matchingpath += IMM2_SIZE;
5571 }
5572 else if (opcode == OP_ONCE || opcode == OP_SBRA || opcode == OP_SCOND)
5573 {
5574 /* Other brackets simply allocate the next entry. */
5575 private_data_ptr = PRIVATE_DATA(ccbegin);
5576 SLJIT_ASSERT(private_data_ptr != 0);
5577 BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
5578 if (opcode == OP_ONCE)
5579 BACKTRACK_AS(bracket_backtrack)->u.framesize = get_framesize(common, ccbegin, FALSE);
5580 }
5581
5582 /* Instructions before the first alternative. */
5583 stacksize = 0;
5584 if ((ket == OP_KETRMAX) || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
5585 stacksize++;
5586 if (bra == OP_BRAZERO)
5587 stacksize++;
5588
5589 if (stacksize > 0)
5590 allocate_stack(common, stacksize);
5591
5592 stacksize = 0;
5593 if ((ket == OP_KETRMAX) || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
5594 {
5595 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
5596 stacksize++;
5597 }
5598
5599 if (bra == OP_BRAZERO)
5600 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
5601
5602 if (bra == OP_BRAMINZERO)
5603 {
5604 /* This is a backtrack path! (Since the try-path of OP_BRAMINZERO matches to the empty string) */
5605 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5606 if (ket != OP_KETRMIN)
5607 {
5608 free_stack(common, 1);
5609 braminzerojump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
5610 }
5611 else
5612 {
5613 if (opcode == OP_ONCE || opcode >= OP_SBRA)
5614 {
5615 jump = CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
5616 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5617 /* Nothing stored during the first run. */
5618 skip = JUMP(SLJIT_JUMP);
5619 JUMPHERE(jump);
5620 /* Checking zero-length iteration. */
5621 if (opcode != OP_ONCE || BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
5622 {
5623 /* When we come from outside, private_data_ptr contains the previous STR_PTR. */
5624 braminzerojump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5625 }
5626 else
5627 {
5628 /* Except when the whole stack frame must be saved. */
5629 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5630 braminzerojump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), (BACKTRACK_AS(bracket_backtrack)->u.framesize + 1) * sizeof(sljit_w));
5631 }
5632 JUMPHERE(skip);
5633 }
5634 else
5635 {
5636 jump = CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
5637 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5638 JUMPHERE(jump);
5639 }
5640 }
5641 }
5642
5643 if (ket == OP_KETRMIN)
5644 BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
5645
5646 if (ket == OP_KETRMAX)
5647 {
5648 rmaxlabel = LABEL();
5649 if (has_alternatives && opcode != OP_ONCE && opcode < OP_SBRA)
5650 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = rmaxlabel;
5651 }
5652
5653 /* Handling capturing brackets and alternatives. */
5654 if (opcode == OP_ONCE)
5655 {
5656 if (BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
5657 {
5658 /* Neither capturing brackets nor recursions are not found in the block. */
5659 if (ket == OP_KETRMIN)
5660 {
5661 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5662 allocate_stack(common, 2);
5663 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5664 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
5665 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
5666 }
5667 else if (ket == OP_KETRMAX || has_alternatives)
5668 {
5669 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);
5670 allocate_stack(common, 1);
5671 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5672 }
5673 else
5674 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);
5675 }
5676 else
5677 {
5678 if (ket == OP_KETRMIN || ket == OP_KETRMAX || has_alternatives)
5679 {
5680 allocate_stack(common, BACKTRACK_AS(bracket_backtrack)->u.framesize + 2);
5681 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5682 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(BACKTRACK_AS(bracket_backtrack)->u.framesize + 1));
5683 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5684 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0);
5685 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5686 init_frame(common, ccbegin, BACKTRACK_AS(bracket_backtrack)->u.framesize + 1, 2, FALSE);
5687 }
5688 else
5689 {
5690 allocate_stack(common, BACKTRACK_AS(bracket_backtrack)->u.framesize + 1);
5691 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5692 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(BACKTRACK_AS(bracket_backtrack)->u.framesize));
5693 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0);
5694 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
5695 init_frame(common, ccbegin, BACKTRACK_AS(bracket_backtrack)->u.framesize, 1, FALSE);
5696 }
5697 }
5698 }
5699 else if (opcode == OP_CBRA || opcode == OP_SCBRA)
5700 {
5701 /* Saving the previous values. */
5702 if (common->optimized_cbracket[offset >> 1] == 0)
5703 {
5704 allocate_stack(common, 3);
5705 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
5706 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
5707 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
5708 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5709 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
5710 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0);
5711 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
5712 }
5713 else
5714 {
5715 SLJIT_ASSERT(private_data_ptr == OVECTOR(offset));
5716 allocate_stack(common, 2);
5717 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5718 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr + sizeof(sljit_w));
5719 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0);
5720 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
5721 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
5722 }
5723 }
5724 else if (opcode == OP_SBRA || opcode == OP_SCOND)
5725 {
5726 /* Saving the previous value. */
5727 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5728 allocate_stack(common, 1);
5729 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0);
5730 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
5731 }
5732 else if (has_alternatives)
5733 {
5734 /* Pushing the starting string pointer. */
5735 allocate_stack(common, 1);
5736 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5737 }
5738
5739 /* Generating code for the first alternative. */
5740 if (opcode == OP_COND || opcode == OP_SCOND)
5741 {
5742 if (*matchingpath == OP_CREF)
5743 {
5744 SLJIT_ASSERT(has_alternatives);
5745 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed),
5746 CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(GET2(matchingpath, 1) << 1), SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
5747 matchingpath += 1 + IMM2_SIZE;
5748 }
5749 else if (*matchingpath == OP_NCREF)
5750 {
5751 SLJIT_ASSERT(has_alternatives);
5752 stacksize = GET2(matchingpath, 1);
5753 jump = CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(stacksize << 1), SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
5754
5755 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STACK_TOP, 0);
5756 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, SLJIT_IMM, common->name_count);
5757 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, common->name_entry_size);
5758 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG1, 0, SLJIT_IMM, (stacksize << 8) | (common->ovector_start / sizeof(sljit_w)));
5759 GET_LOCAL_BASE(SLJIT_TEMPORARY_REG2, 0, 0);
5760 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, common->name_table);
5761 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_searchovector));
5762 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);
5763 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), CMP(SLJIT_C_EQUAL, SLJIT_TEMPORARY_REG1, 0, SLJIT_IMM, 0));
5764
5765 JUMPHERE(jump);
5766 matchingpath += 1 + IMM2_SIZE;
5767 }
5768 else if (*matchingpath == OP_RREF || *matchingpath == OP_NRREF)
5769 {
5770 /* Never has other case. */
5771 BACKTRACK_AS(bracket_backtrack)->u.condfailed = NULL;
5772
5773 stacksize = GET2(matchingpath, 1);
5774 if (common->currententry == NULL)
5775 stacksize = 0;
5776 else if (stacksize == RREF_ANY)
5777 stacksize = 1;
5778 else if (common->currententry->start == 0)
5779 stacksize = stacksize == 0;
5780 else
5781 stacksize = stacksize == GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
5782
5783 if (*matchingpath == OP_RREF || stacksize || common->currententry == NULL)
5784 {
5785 SLJIT_ASSERT(!has_alternatives);
5786 if (stacksize != 0)
5787 matchingpath += 1 + IMM2_SIZE;
5788 else
5789 {
5790 if (*cc == OP_ALT)
5791 {
5792 matchingpath = cc + 1 + LINK_SIZE;
5793 cc += GET(cc, 1);
5794 }
5795 else
5796 matchingpath = cc;
5797 }
5798 }
5799 else
5800 {
5801 SLJIT_ASSERT(has_alternatives);
5802
5803 stacksize = GET2(matchingpath, 1);
5804 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STACK_TOP, 0);
5805 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, SLJIT_IMM, common->name_count);
5806 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, common->name_entry_size);
5807 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, GET2(common->start, common->currententry->start + 1 + LINK_SIZE));
5808 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG1, 0, SLJIT_IMM, stacksize);
5809 GET_LOCAL_BASE(SLJIT_TEMPORARY_REG2, 0, 0);
5810 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, common->name_table);
5811 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_searchgroups));
5812 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);
5813 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), CMP(SLJIT_C_EQUAL, SLJIT_TEMPORARY_REG1, 0, SLJIT_IMM, 0));
5814 matchingpath += 1 + IMM2_SIZE;
5815 }
5816 }
5817 else
5818 {
5819 SLJIT_ASSERT(has_alternatives && *matchingpath >= OP_ASSERT && *matchingpath <= OP_ASSERTBACK_NOT);
5820 /* Similar code as PUSH_BACKTRACK macro. */
5821 assert = sljit_alloc_memory(compiler, sizeof(assert_backtrack));
5822 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5823 return NULL;
5824 memset(assert, 0, sizeof(assert_backtrack));
5825 assert->common.cc = matchingpath;
5826 BACKTRACK_AS(bracket_backtrack)->u.assert = assert;
5827 matchingpath = compile_assert_matchingpath(common, matchingpath, assert, TRUE);
5828 }
5829 }
5830
5831 compile_matchingpath(common, matchingpath, cc, backtrack);
5832 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5833 return NULL;
5834
5835 if (opcode == OP_ONCE)
5836 {
5837 if (BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
5838 {
5839 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5840 /* TMP2 which is set here used by OP_KETRMAX below. */
5841 if (ket == OP_KETRMAX)
5842 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), 0);
5843 else if (ket == OP_KETRMIN)
5844 {
5845 /* Move the STR_PTR to the private_data_ptr. */
5846 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), 0);
5847 }
5848 }
5849 else
5850 {
5851 stacksize = (ket == OP_KETRMIN || ket == OP_KETRMAX || has_alternatives) ? 2 : 1;
5852 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (BACKTRACK_AS(bracket_backtrack)->u.framesize + stacksize) * sizeof(sljit_w));
5853 if (ket == OP_KETRMAX)
5854 {
5855 /* TMP2 which is set here used by OP_KETRMAX below. */
5856 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5857 }
5858 }
5859 }
5860
5861 stacksize = 0;
5862 if (ket != OP_KET || bra != OP_BRA)
5863 stacksize++;
5864 if (has_alternatives && opcode != OP_ONCE)
5865 stacksize++;
5866
5867 if (stacksize > 0)
5868 allocate_stack(common, stacksize);
5869
5870 stacksize = 0;
5871 if (ket != OP_KET)
5872 {
5873 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
5874 stacksize++;
5875 }
5876 else if (bra != OP_BRA)
5877 {
5878 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
5879 stacksize++;
5880 }
5881
5882 if (has_alternatives)
5883 {
5884 if (opcode != OP_ONCE)
5885 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
5886 if (ket != OP_KETRMAX)
5887 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
5888 }
5889
5890 /* Must be after the matchingpath label. */
5891 if (offset != 0)
5892 {
5893 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5894 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
5895 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 0), TMP1, 0);
5896 }
5897
5898 if (ket == OP_KETRMAX)
5899 {
5900 if (opcode == OP_ONCE || opcode >= OP_SBRA)
5901 {
5902 if (has_alternatives)
5903 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
5904 /* Checking zero-length iteration. */
5905 if (opcode != OP_ONCE)
5906 {
5907 CMPTO(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0, rmaxlabel);
5908 /* Drop STR_PTR for greedy plus quantifier. */
5909 if (bra != OP_BRAZERO)
5910 free_stack(common, 1);
5911 }
5912 else
5913 /* TMP2 must contain the starting STR_PTR. */
5914 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0, rmaxlabel);
5915 }
5916 else
5917 JUMPTO(SLJIT_JUMP, rmaxlabel);
5918 BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
5919 }
5920
5921 if (bra == OP_BRAZERO)
5922 BACKTRACK_AS(bracket_backtrack)->zero_matchingpath = LABEL();
5923
5924 if (bra == OP_BRAMINZERO)
5925 {
5926 /* This is a backtrack path! (From the viewpoint of OP_BRAMINZERO) */
5927 JUMPTO(SLJIT_JUMP, ((braminzero_backtrack *)parent)->matchingpath);
5928 if (braminzerojump != NULL)
5929 {
5930 JUMPHERE(braminzerojump);
5931 /* We need to release the end pointer to perform the
5932 backtrack for the zero-length iteration. When
5933 framesize is < 0, OP_ONCE will do the release itself. */
5934 if (opcode == OP_ONCE && BACKTRACK_AS(bracket_backtrack)->u.framesize >= 0)
5935 {
5936 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5937 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
5938 }
5939 else if (ket == OP_KETRMIN && opcode != OP_ONCE)
5940 free_stack(common, 1);
5941 }
5942 /* Continue to the normal backtrack. */
5943 }
5944
5945 if ((ket != OP_KET && bra != OP_BRAMINZERO) || bra == OP_BRAZERO)
5946 decrease_call_count(common);
5947
5948 /* Skip the other alternatives. */
5949 while (*cc == OP_ALT)
5950 cc += GET(cc, 1);
5951 cc += 1 + LINK_SIZE;
5952 return cc;
5953 }
5954
5955 static pcre_uchar *compile_bracketpos_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
5956 {
5957 DEFINE_COMPILER;
5958 backtrack_common *backtrack;
5959 pcre_uchar opcode;
5960 int private_data_ptr;
5961 int cbraprivptr = 0;
5962 int framesize;
5963 int stacksize;
5964 int offset = 0;
5965 BOOL zero = FALSE;
5966 pcre_uchar *ccbegin = NULL;
5967 int stack;
5968 struct sljit_label *loop = NULL;
5969 struct jump_list *emptymatch = NULL;
5970
5971 PUSH_BACKTRACK(sizeof(bracketpos_backtrack), cc, NULL);
5972 if (*cc == OP_BRAPOSZERO)
5973 {
5974 zero = TRUE;
5975 cc++;
5976 }
5977
5978 opcode = *cc;
5979 private_data_ptr = PRIVATE_DATA(cc);
5980 SLJIT_ASSERT(private_data_ptr != 0);
5981 BACKTRACK_AS(bracketpos_backtrack)->private_data_ptr = private_data_ptr;
5982 switch(opcode)
5983 {
5984 case OP_BRAPOS:
5985 case OP_SBRAPOS:
5986 ccbegin = cc + 1 + LINK_SIZE;
5987 break;
5988
5989 case OP_CBRAPOS:
5990 case OP_SCBRAPOS:
5991 offset = GET2(cc, 1 + LINK_SIZE);
5992 /* This case cannot be optimized in the same was as
5993 normal capturing brackets. */
5994 SLJIT_ASSERT(common->optimized_cbracket[offset] == 0);
5995 cbraprivptr = OVECTOR_PRIV(offset);
5996 offset <<= 1;
5997 ccbegin = cc + 1 + LINK_SIZE + IMM2_SIZE;
5998 break;
5999
6000 default:
6001 SLJIT_ASSERT_STOP();
6002 break;
6003 }
6004
6005 framesize = get_framesize(common, cc, FALSE);
6006 BACKTRACK_AS(bracketpos_backtrack)->framesize = framesize;
6007 if (framesize < 0)
6008 {
6009 stacksize = (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS) ? 2 : 1;
6010 if (!zero)
6011 stacksize++;
6012 BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
6013 allocate_stack(common, stacksize);
6014 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);
6015
6016 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
6017 {
6018 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
6019 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
6020 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
6021 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
6022 }
6023 else
6024 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6025
6026 if (!zero)
6027 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 1);
6028 }
6029 else
6030 {
6031 stacksize = framesize + 1;
6032 if (!zero)
6033 stacksize++;
6034 if (opcode == OP_BRAPOS || opcode == OP_SBRAPOS)
6035 stacksize++;
6036 BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
6037 allocate_stack(common, stacksize);
6038
6039 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6040 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(stacksize - 1));
6041 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0);
6042 stack = 0;
6043 if (!zero)
6044 {
6045 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 1);
6046 stack++;
6047 }
6048 if (opcode == OP_BRAPOS || opcode == OP_SBRAPOS)
6049 {
6050 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), STR_PTR, 0);
6051 stack++;
6052 }
6053 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP1, 0);
6054 init_frame(common, cc, stacksize - 1, stacksize - framesize, FALSE);
6055 }
6056
6057 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
6058 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr, STR_PTR, 0);
6059
6060 loop = LABEL();
6061 while (*cc != OP_KETRPOS)
6062 {
6063 backtrack->top = NULL;
6064 backtrack->topbacktracks = NULL;
6065 cc += GET(cc, 1);
6066
6067 compile_matchingpath(common, ccbegin, cc, backtrack);
6068 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6069 return NULL;
6070
6071 if (framesize < 0)
6072 {
6073 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6074
6075 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
6076 {
6077 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
6078 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
6079 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr, STR_PTR, 0);
6080 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
6081 }
6082 else
6083 {
6084 if (opcode == OP_SBRAPOS)
6085 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6086 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6087 }
6088
6089 if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
6090 add_jump(compiler, &emptymatch, CMP(SLJIT_C_EQUAL, TMP1, 0, STR_PTR, 0));
6091
6092 if (!zero)
6093 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
6094 }
6095 else
6096 {
6097 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
6098 {
6099 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, stacksize * sizeof(sljit_w));
6100 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
6101 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
6102 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr, STR_PTR, 0);
6103 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
6104 }
6105 else
6106 {
6107 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6108 OP2(SLJIT_ADD, STACK_TOP, 0, TMP2, 0, SLJIT_IMM, stacksize * sizeof(sljit_w));
6109 if (opcode == OP_SBRAPOS)
6110 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_w));
6111 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_w), STR_PTR, 0);
6112 }
6113
6114 if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
6115 add_jump(compiler, &emptymatch, CMP(SLJIT_C_EQUAL, TMP1, 0, STR_PTR, 0));
6116
6117 if (!zero)
6118 {
6119 if (framesize < 0)
6120 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
6121 else
6122 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6123 }
6124 }
6125 JUMPTO(SLJIT_JUMP, loop);
6126 flush_stubs(common);
6127
6128 compile_backtrackingpath(common, backtrack->top);
6129 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6130 return NULL;
6131 set_jumps(backtrack->topbacktracks, LABEL());
6132
6133 if (framesize < 0)
6134 {
6135 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
6136 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
6137 else
6138 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6139 }
6140 else
6141 {
6142 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
6143 {
6144 /* Last alternative. */
6145 if (*cc == OP_KETRPOS)
6146 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6147 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
6148 }
6149 else
6150 {
6151 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6152 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_w));
6153 }
6154 }
6155
6156 if (*cc == OP_KETRPOS)
6157 break;
6158 ccbegin = cc + 1 + LINK_SIZE;
6159 }
6160
6161 backtrack->topbacktracks = NULL;
6162 if (!zero)
6163 {
6164 if (framesize < 0)
6165 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0));
6166 else /* TMP2 is set to [private_data_ptr] above. */
6167 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(TMP2), (stacksize - 1) * sizeof(sljit_w), SLJIT_IMM, 0));
6168 }
6169
6170 /* None of them matched. */
6171 set_jumps(emptymatch, LABEL());
6172 decrease_call_count(common);
6173 return cc + 1 + LINK_SIZE;
6174 }
6175
6176 static SLJIT_INLINE pcre_uchar *get_iterator_parameters(compiler_common *common, pcre_uchar *cc, pcre_uchar *opcode, pcre_uchar *type, int *arg1, int *arg2, pcre_uchar **end)
6177 {
6178 int class_len;
6179
6180 *opcode = *cc;
6181 if (*opcode >= OP_STAR && *opcode <= OP_POSUPTO)
6182 {
6183 cc++;
6184 *type = OP_CHAR;
6185 }
6186 else if (*opcode >= OP_STARI && *opcode <= OP_POSUPTOI)
6187 {
6188 cc++;
6189 *type = OP_CHARI;
6190 *opcode -= OP_STARI - OP_STAR;
6191 }
6192 else if (*opcode >= OP_NOTSTAR && *opcode <= OP_NOTPOSUPTO)
6193 {
6194 cc++;
6195 *type = OP_NOT;
6196 *opcode -= OP_NOTSTAR - OP_STAR;
6197 }
6198 else if (*opcode >= OP_NOTSTARI && *opcode <= OP_NOTPOSUPTOI)
6199 {
6200 cc++;
6201 *type = OP_NOTI;
6202 *opcode -= OP_NOTSTARI - OP_STAR;
6203 }
6204 else if (*opcode >= OP_TYPESTAR && *opcode <= OP_TYPEPOSUPTO)
6205 {
6206 cc++;
6207 *opcode -= OP_TYPESTAR - OP_STAR;
6208 *type = 0;
6209 }
6210 else
6211 {
6212 SLJIT_ASSERT(*opcode >= OP_CLASS || *opcode <= OP_XCLASS);
6213 *type = *opcode;
6214 cc++;
6215 class_len = (*type < OP_XCLASS) ? (int)(1 + (32 / sizeof(pcre_uchar))) : GET(cc, 0);
6216 *opcode = cc[class_len - 1];
6217 if (*opcode >= OP_CRSTAR && *opcode <= OP_CRMINQUERY)
6218 {
6219 *opcode -= OP_CRSTAR - OP_STAR;
6220 if (end != NULL)
6221 *end = cc + class_len;
6222 }
6223 else
6224 {
6225 SLJIT_ASSERT(*opcode == OP_CRRANGE || *opcode == OP_CRMINRANGE);
6226 *arg1 = GET2(cc, (class_len + IMM2_SIZE));
6227 *arg2 = GET2(cc, class_len);
6228
6229 if (*arg2 == 0)
6230 {
6231 SLJIT_ASSERT(*arg1 != 0);
6232 *opcode = (*opcode == OP_CRRANGE) ? OP_UPTO : OP_MINUPTO;
6233 }
6234 if (*arg1 == *arg2)
6235 *opcode = OP_EXACT;
6236
6237 if (end != NULL)
6238 *end = cc + class_len + 2 * IMM2_SIZE;
6239 }
6240 return cc;
6241 }
6242
6243 if (*opcode == OP_UPTO || *opcode == OP_MINUPTO || *opcode == OP_EXACT || *opcode == OP_POSUPTO)
6244 {
6245 *arg1 = GET2(cc, 0);
6246 cc += IMM2_SIZE;
6247 }
6248
6249 if (*type == 0)
6250 {
6251 *type = *cc;
6252 if (end != NULL)
6253 *end = next_opcode(common, cc);
6254 cc++;
6255 return cc;
6256 }
6257
6258 if (end != NULL)
6259 {
6260 *end = cc + 1;
6261 #ifdef SUPPORT_UTF
6262 if (common->utf && HAS_EXTRALEN(*cc)) *end += GET_EXTRALEN(*cc);
6263 #endif
6264 }
6265 return cc;
6266 }
6267
6268 static pcre_uchar *compile_iterator_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6269 {
6270 DEFINE_COMPILER;
6271 backtrack_common *backtrack;
6272 pcre_uchar opcode;
6273 pcre_uchar type;
6274 int arg1 = -1, arg2 = -1;
6275 pcre_uchar* end;
6276 jump_list *nomatch = NULL;
6277 struct sljit_jump *jump = NULL;
6278 struct sljit_label *label;
6279 int private_data_ptr = PRIVATE_DATA(cc);
6280 int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_LOCALS_REG);
6281 int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr;
6282 int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + (int)sizeof(sljit_w);
6283 int tmp_base, tmp_offset;
6284
6285 PUSH_BACKTRACK(sizeof(iterator_backtrack), cc, NULL);
6286
6287 cc = get_iterator_parameters(common, cc, &opcode, &type, &arg1, &arg2, &end);
6288
6289 switch (type)
6290 {
6291 case OP_NOT_DIGIT:
6292 case OP_DIGIT:
6293 case OP_NOT_WHITESPACE:
6294 case OP_WHITESPACE:
6295 case OP_NOT_WORDCHAR:
6296 case OP_WORDCHAR:
6297 case OP_ANY:
6298 case OP_ALLANY:
6299 case OP_ANYBYTE:
6300 case OP_ANYNL:
6301 case OP_NOT_HSPACE:
6302 case OP_HSPACE:
6303 case OP_NOT_VSPACE:
6304 case OP_VSPACE:
6305 case OP_CHAR:
6306 case OP_CHARI:
6307 case OP_NOT:
6308 case OP_NOTI:
6309 case OP_CLASS:
6310 case OP_NCLASS:
6311 tmp_base = TMP3;
6312 tmp_offset = 0;
6313 break;
6314
6315 default:
6316 SLJIT_ASSERT_STOP();
6317 /* Fall through. */
6318
6319 case OP_EXTUNI:
6320 case OP_XCLASS:
6321 case OP_NOTPROP:
6322 case OP_PROP:
6323 tmp_base = SLJIT_MEM1(SLJIT_LOCALS_REG);
6324 tmp_offset = POSSESSIVE0;
6325 break;
6326 }
6327
6328 switch(opcode)
6329 {
6330 case OP_STAR:
6331 case OP_PLUS:
6332 case OP_UPTO:
6333 case OP_CRRANGE:
6334 if (type == OP_ANYNL || type == OP_EXTUNI)
6335 {
6336 SLJIT_ASSERT(private_data_ptr == 0);
6337 if (opcode == OP_STAR || opcode == OP_UPTO)
6338 {
6339 allocate_stack(common, 2);
6340 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6341 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
6342 }
6343 else
6344 {
6345 allocate_stack(common, 1);
6346 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6347 }
6348
6349 if (opcode == OP_UPTO || opcode == OP_CRRANGE)
6350 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 0);
6351
6352 label = LABEL();
6353 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
6354 if (opcode == OP_UPTO || opcode == OP_CRRANGE)
6355 {
6356 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
6357 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
6358 if (opcode == OP_CRRANGE && arg2 > 0)
6359 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg2, label);
6360 if (opcode == OP_UPTO || (opcode == OP_CRRANGE && arg1 > 0))
6361 jump = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, arg1);
6362 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0);
6363 }
6364
6365 /* We cannot use TMP3 because of this allocate_stack. */
6366 allocate_stack(common, 1);
6367 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6368 JUMPTO(SLJIT_JUMP, label);
6369 if (jump != NULL)
6370 JUMPHERE(jump);
6371 }
6372 else
6373 {
6374 if (opcode == OP_PLUS)
6375 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
6376 if (private_data_ptr == 0)
6377 allocate_stack(common, 2);
6378 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
6379 if (opcode <= OP_PLUS)
6380 OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
6381 else
6382 OP1(SLJIT_MOV, base, offset1, SLJIT_IMM, 1);
6383 label = LABEL();
6384 compile_char1_matchingpath(common, type, cc, &nomatch);
6385 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
6386 if (opcode <= OP_PLUS)
6387 JUMPTO(SLJIT_JUMP, label);
6388 else if (opcode == OP_CRRANGE && arg1 == 0)
6389 {
6390 OP2(SLJIT_ADD, base, offset1, base, offset1, SLJIT_IMM, 1);
6391 JUMPTO(SLJIT_JUMP, label);
6392 }
6393 else
6394 {
6395 OP1(SLJIT_MOV, TMP1, 0, base, offset1);
6396 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
6397 OP1(SLJIT_MOV, base, offset1, TMP1, 0);
6398 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg1 + 1, label);
6399 }
6400 set_jumps(nomatch, LABEL());
6401 if (opcode == OP_CRRANGE)
6402 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_LESS, base, offset1, SLJIT_IMM, arg2 + 1));
6403 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
6404 }
6405 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
6406 break;
6407
6408 case OP_MINSTAR:
6409 case OP_MINPLUS:
6410 if (opcode == OP_MINPLUS)
6411 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
6412 if (private_data_ptr == 0)
6413 allocate_stack(common, 1);
6414 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
6415 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
6416 break;
6417
6418 case OP_MINUPTO:
6419 case OP_CRMINRANGE:
6420 if (private_data_ptr == 0)
6421 allocate_stack(common, 2);
6422 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
6423 OP1(SLJIT_MOV, base, offset1, SLJIT_IMM, 1);
6424 if (opcode == OP_CRMINRANGE)
6425 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
6426 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
6427 break;
6428
6429 case OP_QUERY:
6430 case OP_MINQUERY:
6431 if (private_data_ptr == 0)
6432 allocate_stack(common, 1);
6433 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
6434 if (opcode == OP_QUERY)
6435 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
6436 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
6437 break;
6438
6439 case OP_EXACT:
6440 OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, arg1);
6441 label = LABEL();
6442 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
6443 OP2(SLJIT_SUB | SLJIT_SET_E, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
6444 JUMPTO(SLJIT_C_NOT_ZERO, label);
6445 break;
6446
6447 case OP_POSSTAR:
6448 case OP_POSPLUS:
6449 case OP_POSUPTO:
6450 if (opcode == OP_POSPLUS)
6451 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
6452 if (opcode == OP_POSUPTO)
6453 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, SLJIT_IMM, arg1);
6454 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
6455 label = LABEL();
6456 compile_char1_matchingpath(common, type, cc, &nomatch);
6457 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
6458 if (opcode != OP_POSUPTO)
6459 JUMPTO(SLJIT_JUMP, label);
6460 else
6461 {
6462 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, SLJIT_IMM, 1);
6463 JUMPTO(SLJIT_C_NOT_ZERO, label);
6464 }
6465 set_jumps(nomatch, LABEL());
6466 OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
6467 break;
6468
6469 case OP_POSQUERY:
6470 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
6471 compile_char1_matchingpath(common, type, cc, &nomatch);
6472 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
6473 set_jumps(nomatch, LABEL());
6474 OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
6475 break;
6476
6477 default:
6478 SLJIT_ASSERT_STOP();
6479 break;
6480 }
6481
6482 decrease_call_count(common);
6483 return end;
6484 }
6485
6486 static SLJIT_INLINE pcre_uchar *compile_fail_accept_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6487 {
6488 DEFINE_COMPILER;
6489 backtrack_common *backtrack;
6490
6491 PUSH_BACKTRACK(sizeof(bracket_backtrack), cc, NULL);
6492
6493 if (*cc == OP_FAIL)
6494 {
6495 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
6496 return cc + 1;
6497 }
6498
6499 if (*cc == OP_ASSERT_ACCEPT || common->currententry != NULL)
6500 {
6501 /* No need to check notempty conditions. */
6502 if (common->acceptlabel == NULL)
6503 add_jump(compiler, &common->accept, JUMP(SLJIT_JUMP));
6504 else
6505 JUMPTO(SLJIT_JUMP, common->acceptlabel);
6506 return cc + 1;
6507 }
6508
6509 if (common->acceptlabel == NULL)
6510 add_jump(compiler, &common->accept, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0)));
6511 else
6512 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), common->acceptlabel);
6513 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6514 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty));
6515 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
6516 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty_atstart));
6517 if (common->acceptlabel == NULL)
6518 add_jump(compiler, &common->accept, CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0));
6519 else
6520 CMPTO(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0, common->acceptlabel);
6521 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
6522 if (common->acceptlabel == NULL)
6523 add_jump(compiler, &common->accept, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0));
6524 else
6525 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0, common->acceptlabel);
6526 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
6527 return cc + 1;
6528 }
6529
6530 static SLJIT_INLINE pcre_uchar *compile_close_matchingpath(compiler_common *common, pcre_uchar *cc)
6531 {
6532 DEFINE_COMPILER;
6533 int offset = GET2(cc, 1);
6534 BOOL optimized_cbracket = common->optimized_cbracket[offset] != 0;
6535
6536 /* Data will be discarded anyway... */
6537 if (common->currententry != NULL)
6538 return cc + 1 + IMM2_SIZE;
6539
6540 if (!optimized_cbracket)
6541 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR_PRIV(offset));
6542 offset <<= 1;
6543 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
6544 if (!optimized_cbracket)
6545 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
6546 return cc + 1 + IMM2_SIZE;
6547 }
6548
6549 static void compile_matchingpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, backtrack_common *parent)
6550 {
6551 DEFINE_COMPILER;
6552 backtrack_common *backtrack;
6553
6554 while (cc < ccend)
6555 {
6556 switch(*cc)
6557 {
6558 case OP_SOD:
6559 case OP_SOM:
6560 case OP_NOT_WORD_BOUNDARY:
6561 case OP_WORD_BOUNDARY:
6562 case OP_NOT_DIGIT:
6563 case OP_DIGIT:
6564 case OP_NOT_WHITESPACE:
6565 case OP_WHITESPACE:
6566 case OP_NOT_WORDCHAR:
6567 case OP_WORDCHAR:
6568 case OP_ANY:
6569 case OP_ALLANY:
6570 case OP_ANYBYTE:
6571 case OP_NOTPROP:
6572 case OP_PROP:
6573 case OP_ANYNL:
6574 case OP_NOT_HSPACE:
6575 case OP_HSPACE:
6576 case OP_NOT_VSPACE:
6577 case OP_VSPACE:
6578 case OP_EXTUNI:
6579 case OP_EODN:
6580 case OP_EOD:
6581 case OP_CIRC:
6582 case OP_CIRCM:
6583 case OP_DOLL:
6584 case OP_DOLLM:
6585 case OP_NOT:
6586 case OP_NOTI:
6587 case OP_REVERSE:
6588 cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
6589 break;
6590
6591 case OP_SET_SOM:
6592 PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc);
6593 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
6594 allocate_stack(common, 1);
6595 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), STR_PTR, 0);
6596 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
6597 cc++;
6598 break;
6599
6600 case OP_CHAR:
6601 case OP_CHARI:
6602 if (common->mode == JIT_COMPILE)
6603 cc = compile_charn_matchingpath(common, cc, ccend, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
6604 else
6605 cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
6606 break;
6607
6608 case OP_STAR:
6609 case OP_MINSTAR:
6610 case OP_PLUS:
6611 case OP_MINPLUS:
6612 case OP_QUERY:
6613 case OP_MINQUERY:
6614 case OP_UPTO:
6615 case OP_MINUPTO:
6616 case OP_EXACT:
6617 case OP_POSSTAR:
6618 case OP_POSPLUS:
6619 case OP_POSQUERY:
6620 case OP_POSUPTO:
6621 case OP_STARI:
6622 case OP_MINSTARI:
6623 case OP_PLUSI:
6624 case OP_MINPLUSI:
6625 case OP_QUERYI:
6626 case OP_MINQUERYI:
6627 case OP_UPTOI:
6628 case OP_MINUPTOI:
6629 case OP_EXACTI:
6630 case OP_POSSTARI:
6631 case OP_POSPLUSI:
6632 case OP_POSQUERYI:
6633 case OP_POSUPTOI:
6634 case OP_NOTSTAR:
6635 case OP_NOTMINSTAR:
6636 case OP_NOTPLUS:
6637 case OP_NOTMINPLUS:
6638 case OP_NOTQUERY:
6639 case OP_NOTMINQUERY:
6640 case OP_NOTUPTO:
6641 case OP_NOTMINUPTO:
6642 case OP_NOTEXACT:
6643 case OP_NOTPOSSTAR:
6644 case OP_NOTPOSPLUS:
6645 case OP_NOTPOSQUERY:
6646 case OP_NOTPOSUPTO:
6647 case OP_NOTSTARI:
6648 case OP_NOTMINSTARI:
6649 case OP_NOTPLUSI:
6650 case OP_NOTMINPLUSI:
6651 case OP_NOTQUERYI:
6652 case OP_NOTMINQUERYI:
6653 case OP_NOTUPTOI:
6654 case OP_NOTMINUPTOI:
6655 case OP_NOTEXACTI:
6656 case OP_NOTPOSSTARI:
6657 case OP_NOTPOSPLUSI:
6658 case OP_NOTPOSQUERYI:
6659 case OP_NOTPOSUPTOI:
6660 case OP_TYPESTAR:
6661 case OP_TYPEMINSTAR:
6662 case OP_TYPEPLUS:
6663 case OP_TYPEMINPLUS:
6664 case OP_TYPEQUERY:
6665 case OP_TYPEMINQUERY:
6666 case OP_TYPEUPTO:
6667 case OP_TYPEMINUPTO:
6668 case OP_TYPEEXACT:
6669 case OP_TYPEPOSSTAR:
6670 case OP_TYPEPOSPLUS:
6671 case OP_TYPEPOSQUERY:
6672 case OP_TYPEPOSUPTO:
6673 cc = compile_iterator_matchingpath(common, cc, parent);
6674 break;
6675
6676 case OP_CLASS:
6677 case OP_NCLASS:
6678 if (cc[1 + (32 / sizeof(pcre_uchar))] >= OP_CRSTAR && cc[1 + (32 / sizeof(pcre_uchar))] <= OP_CRMINRANGE)
6679 cc = compile_iterator_matchingpath(common, cc, parent);
6680 else
6681 cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
6682 break;
6683
6684 #if defined SUPPORT_UTF || defined COMPILE_PCRE16
6685 case OP_XCLASS:
6686 if (*(cc + GET(cc, 1)) >= OP_CRSTAR && *(cc + GET(cc, 1)) <= OP_CRMINRANGE)
6687 cc = compile_iterator_matchingpath(common, cc, parent);
6688 else
6689 cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
6690 break;
6691 #endif
6692
6693 case OP_REF:
6694 case OP_REFI:
6695 if (cc[1 + IMM2_SIZE] >= OP_CRSTAR && cc[1 + IMM2_SIZE] <= OP_CRMINRANGE)
6696 cc = compile_ref_iterator_matchingpath(common, cc, parent);
6697 else
6698 cc = compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE, FALSE);
6699 break;
6700
6701 case OP_RECURSE:
6702 cc = compile_recurse_matchingpath(common, cc, parent);
6703 break;
6704
6705 case OP_ASSERT:
6706 case OP_ASSERT_NOT:
6707 case OP_ASSERTBACK:
6708 case OP_ASSERTBACK_NOT:
6709 PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc);
6710 cc = compile_assert_matchingpath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE);
6711 break;
6712
6713 case OP_BRAMINZERO:
6714 PUSH_BACKTRACK_NOVALUE(sizeof(braminzero_backtrack), cc);
6715 cc = bracketend(cc + 1);
6716 if (*(cc - 1 - LINK_SIZE) != OP_KETRMIN)
6717 {
6718 allocate_stack(common, 1);
6719 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6720 }
6721 else
6722 {
6723 allocate_stack(common, 2);
6724 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6725 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), STR_PTR, 0);
6726 }
6727 BACKTRACK_AS(braminzero_backtrack)->matchingpath = LABEL();
6728 if (cc[1] > OP_ASSERTBACK_NOT)
6729 decrease_call_count(common);
6730 break;
6731
6732 case OP_ONCE:
6733 case OP_ONCE_NC:
6734 case OP_BRA:
6735 case OP_CBRA:
6736 case OP_COND:
6737 case OP_SBRA:
6738 case OP_SCBRA:
6739 case OP_SCOND:
6740 cc = compile_bracket_matchingpath(common, cc, parent);
6741 break;
6742
6743 case OP_BRAZERO:
6744 if (cc[1] > OP_ASSERTBACK_NOT)
6745 cc = compile_bracket_matchingpath(common, cc, parent);
6746 else
6747 {
6748 PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc);
6749 cc = compile_assert_matchingpath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE);
6750 }
6751 break;
6752
6753 case OP_BRAPOS:
6754 case OP_CBRAPOS:
6755 case OP_SBRAPOS:
6756 case OP_SCBRAPOS:
6757 case OP_BRAPOSZERO:
6758 cc = compile_bracketpos_matchingpath(common, cc, parent);
6759 break;
6760
6761 case OP_MARK:
6762 PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc);
6763 SLJIT_ASSERT(common->mark_ptr != 0);
6764 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
6765 allocate_stack(common, 1);
6766 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6767 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
6768 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_w)(cc + 2));
6769 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr, TMP2, 0);
6770 OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), TMP2, 0);
6771 cc += 1 + 2 + cc[1];
6772 break;
6773