/[pcre]/code/trunk/pcre_jit_compile.c
ViewVC logotype

Contents of /code/trunk/pcre_jit_compile.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 974 - (show annotations)
Sat Jun 2 05:56:58 2012 UTC (7 years, 6 months ago) by zherczeg
File MIME type: text/plain
File size: 232169 byte(s)
Improve JIT code generation for greedy plus quantifier
1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Written by Philip Hazel
9 Copyright (c) 1997-2012 University of Cambridge
10
11 The machine code generator part (this module) was written by Zoltan Herczeg
12 Copyright (c) 2010-2012
13
14 -----------------------------------------------------------------------------
15 Redistribution and use in source and binary forms, with or without
16 modification, are permitted provided that the following conditions are met:
17
18 * Redistributions of source code must retain the above copyright notice,
19 this list of conditions and the following disclaimer.
20
21 * Redistributions in binary form must reproduce the above copyright
22 notice, this list of conditions and the following disclaimer in the
23 documentation and/or other materials provided with the distribution.
24
25 * Neither the name of the University of Cambridge nor the names of its
26 contributors may be used to endorse or promote products derived from
27 this software without specific prior written permission.
28
29 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 POSSIBILITY OF SUCH DAMAGE.
40 -----------------------------------------------------------------------------
41 */
42
43 #ifdef HAVE_CONFIG_H
44 #include "config.h"
45 #endif
46
47 #include "pcre_internal.h"
48
49 #ifdef SUPPORT_JIT
50
51 /* All-in-one: Since we use the JIT compiler only from here,
52 we just include it. This way we don't need to touch the build
53 system files. */
54
55 #define SLJIT_MALLOC(size) (PUBL(malloc))(size)
56 #define SLJIT_FREE(ptr) (PUBL(free))(ptr)
57 #define SLJIT_CONFIG_AUTO 1
58 #define SLJIT_CONFIG_STATIC 1
59 #define SLJIT_VERBOSE 0
60 #define SLJIT_DEBUG 0
61
62 #include "sljit/sljitLir.c"
63
64 #if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED
65 #error Unsupported architecture
66 #endif
67
68 /* Allocate memory on the stack. Fast, but limited size. */
69 #define LOCAL_SPACE_SIZE 32768
70
71 #define STACK_GROWTH_RATE 8192
72
73 /* Enable to check that the allocation could destroy temporaries. */
74 #if defined SLJIT_DEBUG && SLJIT_DEBUG
75 #define DESTROY_REGISTERS 1
76 #endif
77
78 /*
79 Short summary about the backtracking mechanism empolyed by the jit code generator:
80
81 The code generator follows the recursive nature of the PERL compatible regular
82 expressions. The basic blocks of regular expressions are condition checkers
83 whose execute different commands depending on the result of the condition check.
84 The relationship between the operators can be horizontal (concatenation) and
85 vertical (sub-expression) (See struct backtrack_common for more details).
86
87 'ab' - 'a' and 'b' regexps are concatenated
88 'a+' - 'a' is the sub-expression of the '+' operator
89
90 The condition checkers are boolean (true/false) checkers. Machine code is generated
91 for the checker itself and for the actions depending on the result of the checker.
92 The 'true' case is called as the try path (expected path), and the other is called as
93 the 'backtrack' path. Branch instructions are expesive for all CPUs, so we avoid taken
94 branches on the try path.
95
96 Greedy star operator (*) :
97 Try path: match happens.
98 Backtrack path: match failed.
99 Non-greedy star operator (*?) :
100 Try path: no need to perform a match.
101 Backtrack path: match is required.
102
103 The following example shows how the code generated for a capturing bracket
104 with two alternatives. Let A, B, C, D are arbirary regular expressions, and
105 we have the following regular expression:
106
107 A(B|C)D
108
109 The generated code will be the following:
110
111 A try path
112 '(' try path (pushing arguments to the stack)
113 B try path
114 ')' try path (pushing arguments to the stack)
115 D try path
116 return with successful match
117
118 D backtrack path
119 ')' backtrack path (If we arrived from "C" jump to the backtrack of "C")
120 B backtrack path
121 C expected path
122 jump to D try path
123 C backtrack path
124 A backtrack path
125
126 Notice, that the order of backtrack code paths are the opposite of the fast
127 code paths. In this way the topmost value on the stack is always belong
128 to the current backtrack code path. The backtrack path must check
129 whether there is a next alternative. If so, it needs to jump back to
130 the try path eventually. Otherwise it needs to clear out its own stack
131 frame and continue the execution on the backtrack code paths.
132 */
133
134 /*
135 Saved stack frames:
136
137 Atomic blocks and asserts require reloading the values of local variables
138 when the backtrack mechanism performed. Because of OP_RECURSE, the locals
139 are not necessarly known in compile time, thus we need a dynamic restore
140 mechanism.
141
142 The stack frames are stored in a chain list, and have the following format:
143 ([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]
144
145 Thus we can restore the locals to a particular point in the stack.
146 */
147
148 typedef struct jit_arguments {
149 /* Pointers first. */
150 struct sljit_stack *stack;
151 const pcre_uchar *str;
152 const pcre_uchar *begin;
153 const pcre_uchar *end;
154 int *offsets;
155 pcre_uchar *uchar_ptr;
156 pcre_uchar *mark_ptr;
157 /* Everything else after. */
158 int offsetcount;
159 int calllimit;
160 pcre_uint8 notbol;
161 pcre_uint8 noteol;
162 pcre_uint8 notempty;
163 pcre_uint8 notempty_atstart;
164 } jit_arguments;
165
166 typedef struct executable_functions {
167 void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES];
168 PUBL(jit_callback) callback;
169 void *userdata;
170 sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];
171 } executable_functions;
172
173 typedef struct jump_list {
174 struct sljit_jump *jump;
175 struct jump_list *next;
176 } jump_list;
177
178 enum stub_types { stack_alloc };
179
180 typedef struct stub_list {
181 enum stub_types type;
182 int data;
183 struct sljit_jump *start;
184 struct sljit_label *leave;
185 struct stub_list *next;
186 } stub_list;
187
188 typedef int (SLJIT_CALL *jit_function)(jit_arguments *args);
189
190 /* The following structure is the key data type for the recursive
191 code generator. It is allocated by compile_trypath, and contains
192 the aguments for compile_backtrackpath. Must be the first member
193 of its descendants. */
194 typedef struct backtrack_common {
195 /* Concatenation stack. */
196 struct backtrack_common *prev;
197 jump_list *nextbacktracks;
198 /* Internal stack (for component operators). */
199 struct backtrack_common *top;
200 jump_list *topbacktracks;
201 /* Opcode pointer. */
202 pcre_uchar *cc;
203 } backtrack_common;
204
205 typedef struct assert_backtrack {
206 backtrack_common common;
207 jump_list *condfailed;
208 /* Less than 0 (-1) if a frame is not needed. */
209 int framesize;
210 /* Points to our private memory word on the stack. */
211 int localptr;
212 /* For iterators. */
213 struct sljit_label *trypath;
214 } assert_backtrack;
215
216 typedef struct bracket_backtrack {
217 backtrack_common common;
218 /* Where to coninue if an alternative is successfully matched. */
219 struct sljit_label *alttrypath;
220 /* For rmin and rmax iterators. */
221 struct sljit_label *recursivetrypath;
222 /* For greedy ? operator. */
223 struct sljit_label *zerotrypath;
224 /* Contains the branches of a failed condition. */
225 union {
226 /* Both for OP_COND, OP_SCOND. */
227 jump_list *condfailed;
228 assert_backtrack *assert;
229 /* For OP_ONCE. -1 if not needed. */
230 int framesize;
231 } u;
232 /* Points to our private memory word on the stack. */
233 int localptr;
234 } bracket_backtrack;
235
236 typedef struct bracketpos_backtrack {
237 backtrack_common common;
238 /* Points to our private memory word on the stack. */
239 int localptr;
240 /* Reverting stack is needed. */
241 int framesize;
242 /* Allocated stack size. */
243 int stacksize;
244 } bracketpos_backtrack;
245
246 typedef struct braminzero_backtrack {
247 backtrack_common common;
248 struct sljit_label *trypath;
249 } braminzero_backtrack;
250
251 typedef struct iterator_backtrack {
252 backtrack_common common;
253 /* Next iteration. */
254 struct sljit_label *trypath;
255 } iterator_backtrack;
256
257 typedef struct recurse_entry {
258 struct recurse_entry *next;
259 /* Contains the function entry. */
260 struct sljit_label *entry;
261 /* Collects the calls until the function is not created. */
262 jump_list *calls;
263 /* Points to the starting opcode. */
264 int start;
265 } recurse_entry;
266
267 typedef struct recurse_backtrack {
268 backtrack_common common;
269 } recurse_backtrack;
270
271 typedef struct compiler_common {
272 struct sljit_compiler *compiler;
273 pcre_uchar *start;
274
275 /* Opcode local area direct map. */
276 int *localptrs;
277 int cbraptr;
278 /* OVector starting point. Must be divisible by 2. */
279 int ovector_start;
280 /* Last known position of the requested byte. */
281 int req_char_ptr;
282 /* Head of the last recursion. */
283 int recursive_head;
284 /* First inspected character for partial matching. */
285 int start_used_ptr;
286 /* Starting pointer for partial soft matches. */
287 int hit_start;
288 /* End pointer of the first line. */
289 int first_line_end;
290 /* Points to the marked string. */
291 int mark_ptr;
292
293 /* Other */
294 const pcre_uint8 *fcc;
295 sljit_w lcc;
296 int mode;
297 int nltype;
298 int newline;
299 int bsr_nltype;
300 int endonly;
301 BOOL has_set_som;
302 sljit_w ctypes;
303 sljit_uw name_table;
304 sljit_w name_count;
305 sljit_w name_entry_size;
306
307 /* Labels and jump lists. */
308 struct sljit_label *partialmatchlabel;
309 struct sljit_label *leavelabel;
310 struct sljit_label *acceptlabel;
311 stub_list *stubs;
312 recurse_entry *entries;
313 recurse_entry *currententry;
314 jump_list *partialmatch;
315 jump_list *leave;
316 jump_list *accept;
317 jump_list *calllimit;
318 jump_list *stackalloc;
319 jump_list *revertframes;
320 jump_list *wordboundary;
321 jump_list *anynewline;
322 jump_list *hspace;
323 jump_list *vspace;
324 jump_list *casefulcmp;
325 jump_list *caselesscmp;
326 BOOL jscript_compat;
327 #ifdef SUPPORT_UTF
328 BOOL utf;
329 #ifdef SUPPORT_UCP
330 BOOL use_ucp;
331 #endif
332 jump_list *utfreadchar;
333 #ifdef COMPILE_PCRE8
334 jump_list *utfreadtype8;
335 #endif
336 #endif /* SUPPORT_UTF */
337 #ifdef SUPPORT_UCP
338 jump_list *getucd;
339 #endif
340 } compiler_common;
341
342 /* For byte_sequence_compare. */
343
344 typedef struct compare_context {
345 int length;
346 int sourcereg;
347 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
348 int ucharptr;
349 union {
350 sljit_i asint;
351 sljit_uh asushort;
352 #ifdef COMPILE_PCRE8
353 sljit_ub asbyte;
354 sljit_ub asuchars[4];
355 #else
356 #ifdef COMPILE_PCRE16
357 sljit_uh asuchars[2];
358 #endif
359 #endif
360 } c;
361 union {
362 sljit_i asint;
363 sljit_uh asushort;
364 #ifdef COMPILE_PCRE8
365 sljit_ub asbyte;
366 sljit_ub asuchars[4];
367 #else
368 #ifdef COMPILE_PCRE16
369 sljit_uh asuchars[2];
370 #endif
371 #endif
372 } oc;
373 #endif
374 } compare_context;
375
376 enum {
377 frame_end = 0,
378 frame_setstrbegin = -1,
379 frame_setmark = -2
380 };
381
382 /* Undefine sljit macros. */
383 #undef CMP
384
385 /* Used for accessing the elements of the stack. */
386 #define STACK(i) ((-(i) - 1) * (int)sizeof(sljit_w))
387
388 #define TMP1 SLJIT_TEMPORARY_REG1
389 #define TMP2 SLJIT_TEMPORARY_REG3
390 #define TMP3 SLJIT_TEMPORARY_EREG2
391 #define STR_PTR SLJIT_SAVED_REG1
392 #define STR_END SLJIT_SAVED_REG2
393 #define STACK_TOP SLJIT_TEMPORARY_REG2
394 #define STACK_LIMIT SLJIT_SAVED_REG3
395 #define ARGUMENTS SLJIT_SAVED_EREG1
396 #define CALL_COUNT SLJIT_SAVED_EREG2
397 #define RETURN_ADDR SLJIT_TEMPORARY_EREG1
398
399 /* Locals layout. */
400 /* These two locals can be used by the current opcode. */
401 #define LOCALS0 (0 * sizeof(sljit_w))
402 #define LOCALS1 (1 * sizeof(sljit_w))
403 /* Two local variables for possessive quantifiers (char1 cannot use them). */
404 #define POSSESSIVE0 (2 * sizeof(sljit_w))
405 #define POSSESSIVE1 (3 * sizeof(sljit_w))
406 /* Max limit of recursions. */
407 #define CALL_LIMIT (4 * sizeof(sljit_w))
408 /* The output vector is stored on the stack, and contains pointers
409 to characters. The vector data is divided into two groups: the first
410 group contains the start / end character pointers, and the second is
411 the start pointers when the end of the capturing group has not yet reached. */
412 #define OVECTOR_START (common->ovector_start)
413 #define OVECTOR(i) (OVECTOR_START + (i) * sizeof(sljit_w))
414 #define OVECTOR_PRIV(i) (common->cbraptr + (i) * sizeof(sljit_w))
415 #define PRIV_DATA(cc) (common->localptrs[(cc) - common->start])
416
417 #ifdef COMPILE_PCRE8
418 #define MOV_UCHAR SLJIT_MOV_UB
419 #define MOVU_UCHAR SLJIT_MOVU_UB
420 #else
421 #ifdef COMPILE_PCRE16
422 #define MOV_UCHAR SLJIT_MOV_UH
423 #define MOVU_UCHAR SLJIT_MOVU_UH
424 #else
425 #error Unsupported compiling mode
426 #endif
427 #endif
428
429 /* Shortcuts. */
430 #define DEFINE_COMPILER \
431 struct sljit_compiler *compiler = common->compiler
432 #define OP1(op, dst, dstw, src, srcw) \
433 sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw))
434 #define OP2(op, dst, dstw, src1, src1w, src2, src2w) \
435 sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w))
436 #define LABEL() \
437 sljit_emit_label(compiler)
438 #define JUMP(type) \
439 sljit_emit_jump(compiler, (type))
440 #define JUMPTO(type, label) \
441 sljit_set_label(sljit_emit_jump(compiler, (type)), (label))
442 #define JUMPHERE(jump) \
443 sljit_set_label((jump), sljit_emit_label(compiler))
444 #define CMP(type, src1, src1w, src2, src2w) \
445 sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))
446 #define CMPTO(type, src1, src1w, src2, src2w, label) \
447 sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))
448 #define COND_VALUE(op, dst, dstw, type) \
449 sljit_emit_cond_value(compiler, (op), (dst), (dstw), (type))
450 #define GET_LOCAL_BASE(dst, dstw, offset) \
451 sljit_get_local_base(compiler, (dst), (dstw), (offset))
452
453 static pcre_uchar* bracketend(pcre_uchar* cc)
454 {
455 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
456 do cc += GET(cc, 1); while (*cc == OP_ALT);
457 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
458 cc += 1 + LINK_SIZE;
459 return cc;
460 }
461
462 /* Functions whose might need modification for all new supported opcodes:
463 next_opcode
464 get_localspace
465 set_localptrs
466 get_framesize
467 init_frame
468 get_localsize
469 copy_locals
470 compile_trypath
471 compile_backtrackpath
472 */
473
474 static pcre_uchar *next_opcode(compiler_common *common, pcre_uchar *cc)
475 {
476 SLJIT_UNUSED_ARG(common);
477 switch(*cc)
478 {
479 case OP_SOD:
480 case OP_SOM:
481 case OP_SET_SOM:
482 case OP_NOT_WORD_BOUNDARY:
483 case OP_WORD_BOUNDARY:
484 case OP_NOT_DIGIT:
485 case OP_DIGIT:
486 case OP_NOT_WHITESPACE:
487 case OP_WHITESPACE:
488 case OP_NOT_WORDCHAR:
489 case OP_WORDCHAR:
490 case OP_ANY:
491 case OP_ALLANY:
492 case OP_ANYNL:
493 case OP_NOT_HSPACE:
494 case OP_HSPACE:
495 case OP_NOT_VSPACE:
496 case OP_VSPACE:
497 case OP_EXTUNI:
498 case OP_EODN:
499 case OP_EOD:
500 case OP_CIRC:
501 case OP_CIRCM:
502 case OP_DOLL:
503 case OP_DOLLM:
504 case OP_TYPESTAR:
505 case OP_TYPEMINSTAR:
506 case OP_TYPEPLUS:
507 case OP_TYPEMINPLUS:
508 case OP_TYPEQUERY:
509 case OP_TYPEMINQUERY:
510 case OP_TYPEPOSSTAR:
511 case OP_TYPEPOSPLUS:
512 case OP_TYPEPOSQUERY:
513 case OP_CRSTAR:
514 case OP_CRMINSTAR:
515 case OP_CRPLUS:
516 case OP_CRMINPLUS:
517 case OP_CRQUERY:
518 case OP_CRMINQUERY:
519 case OP_DEF:
520 case OP_BRAZERO:
521 case OP_BRAMINZERO:
522 case OP_BRAPOSZERO:
523 case OP_COMMIT:
524 case OP_FAIL:
525 case OP_ACCEPT:
526 case OP_ASSERT_ACCEPT:
527 case OP_SKIPZERO:
528 return cc + 1;
529
530 case OP_ANYBYTE:
531 #ifdef SUPPORT_UTF
532 if (common->utf) return NULL;
533 #endif
534 return cc + 1;
535
536 case OP_CHAR:
537 case OP_CHARI:
538 case OP_NOT:
539 case OP_NOTI:
540 case OP_STAR:
541 case OP_MINSTAR:
542 case OP_PLUS:
543 case OP_MINPLUS:
544 case OP_QUERY:
545 case OP_MINQUERY:
546 case OP_POSSTAR:
547 case OP_POSPLUS:
548 case OP_POSQUERY:
549 case OP_STARI:
550 case OP_MINSTARI:
551 case OP_PLUSI:
552 case OP_MINPLUSI:
553 case OP_QUERYI:
554 case OP_MINQUERYI:
555 case OP_POSSTARI:
556 case OP_POSPLUSI:
557 case OP_POSQUERYI:
558 case OP_NOTSTAR:
559 case OP_NOTMINSTAR:
560 case OP_NOTPLUS:
561 case OP_NOTMINPLUS:
562 case OP_NOTQUERY:
563 case OP_NOTMINQUERY:
564 case OP_NOTPOSSTAR:
565 case OP_NOTPOSPLUS:
566 case OP_NOTPOSQUERY:
567 case OP_NOTSTARI:
568 case OP_NOTMINSTARI:
569 case OP_NOTPLUSI:
570 case OP_NOTMINPLUSI:
571 case OP_NOTQUERYI:
572 case OP_NOTMINQUERYI:
573 case OP_NOTPOSSTARI:
574 case OP_NOTPOSPLUSI:
575 case OP_NOTPOSQUERYI:
576 cc += 2;
577 #ifdef SUPPORT_UTF
578 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
579 #endif
580 return cc;
581
582 case OP_UPTO:
583 case OP_MINUPTO:
584 case OP_EXACT:
585 case OP_POSUPTO:
586 case OP_UPTOI:
587 case OP_MINUPTOI:
588 case OP_EXACTI:
589 case OP_POSUPTOI:
590 case OP_NOTUPTO:
591 case OP_NOTMINUPTO:
592 case OP_NOTEXACT:
593 case OP_NOTPOSUPTO:
594 case OP_NOTUPTOI:
595 case OP_NOTMINUPTOI:
596 case OP_NOTEXACTI:
597 case OP_NOTPOSUPTOI:
598 cc += 2 + IMM2_SIZE;
599 #ifdef SUPPORT_UTF
600 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
601 #endif
602 return cc;
603
604 case OP_NOTPROP:
605 case OP_PROP:
606 return cc + 1 + 2;
607
608 case OP_TYPEUPTO:
609 case OP_TYPEMINUPTO:
610 case OP_TYPEEXACT:
611 case OP_TYPEPOSUPTO:
612 case OP_REF:
613 case OP_REFI:
614 case OP_CREF:
615 case OP_NCREF:
616 case OP_RREF:
617 case OP_NRREF:
618 case OP_CLOSE:
619 cc += 1 + IMM2_SIZE;
620 return cc;
621
622 case OP_CRRANGE:
623 case OP_CRMINRANGE:
624 return cc + 1 + 2 * IMM2_SIZE;
625
626 case OP_CLASS:
627 case OP_NCLASS:
628 return cc + 1 + 32 / sizeof(pcre_uchar);
629
630 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
631 case OP_XCLASS:
632 return cc + GET(cc, 1);
633 #endif
634
635 case OP_RECURSE:
636 case OP_ASSERT:
637 case OP_ASSERT_NOT:
638 case OP_ASSERTBACK:
639 case OP_ASSERTBACK_NOT:
640 case OP_REVERSE:
641 case OP_ONCE:
642 case OP_ONCE_NC:
643 case OP_BRA:
644 case OP_BRAPOS:
645 case OP_COND:
646 case OP_SBRA:
647 case OP_SBRAPOS:
648 case OP_SCOND:
649 case OP_ALT:
650 case OP_KET:
651 case OP_KETRMAX:
652 case OP_KETRMIN:
653 case OP_KETRPOS:
654 return cc + 1 + LINK_SIZE;
655
656 case OP_CBRA:
657 case OP_CBRAPOS:
658 case OP_SCBRA:
659 case OP_SCBRAPOS:
660 return cc + 1 + LINK_SIZE + IMM2_SIZE;
661
662 case OP_MARK:
663 return cc + 1 + 2 + cc[1];
664
665 default:
666 return NULL;
667 }
668 }
669
670 static int get_localspace(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
671 {
672 int localspace = 0;
673 pcre_uchar *alternative;
674 /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
675 while (cc < ccend)
676 {
677 switch(*cc)
678 {
679 case OP_SET_SOM:
680 common->has_set_som = TRUE;
681 cc += 1;
682 break;
683
684 case OP_ASSERT:
685 case OP_ASSERT_NOT:
686 case OP_ASSERTBACK:
687 case OP_ASSERTBACK_NOT:
688 case OP_ONCE:
689 case OP_ONCE_NC:
690 case OP_BRAPOS:
691 case OP_SBRA:
692 case OP_SBRAPOS:
693 case OP_SCOND:
694 localspace += sizeof(sljit_w);
695 cc += 1 + LINK_SIZE;
696 break;
697
698 case OP_CBRAPOS:
699 case OP_SCBRAPOS:
700 localspace += sizeof(sljit_w);
701 cc += 1 + LINK_SIZE + IMM2_SIZE;
702 break;
703
704 case OP_COND:
705 /* Might be a hidden SCOND. */
706 alternative = cc + GET(cc, 1);
707 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
708 localspace += sizeof(sljit_w);
709 cc += 1 + LINK_SIZE;
710 break;
711
712 case OP_RECURSE:
713 /* Set its value only once. */
714 if (common->recursive_head == 0)
715 {
716 common->recursive_head = common->ovector_start;
717 common->ovector_start += sizeof(sljit_w);
718 }
719 cc += 1 + LINK_SIZE;
720 break;
721
722 case OP_MARK:
723 if (common->mark_ptr == 0)
724 {
725 common->mark_ptr = common->ovector_start;
726 common->ovector_start += sizeof(sljit_w);
727 }
728 cc += 1 + 2 + cc[1];
729 break;
730
731 default:
732 cc = next_opcode(common, cc);
733 if (cc == NULL)
734 return -1;
735 break;
736 }
737 }
738 return localspace;
739 }
740
741 static void set_localptrs(compiler_common *common, int localptr, pcre_uchar *ccend)
742 {
743 pcre_uchar *cc = common->start;
744 pcre_uchar *alternative;
745 while (cc < ccend)
746 {
747 switch(*cc)
748 {
749 case OP_ASSERT:
750 case OP_ASSERT_NOT:
751 case OP_ASSERTBACK:
752 case OP_ASSERTBACK_NOT:
753 case OP_ONCE:
754 case OP_ONCE_NC:
755 case OP_BRAPOS:
756 case OP_SBRA:
757 case OP_SBRAPOS:
758 case OP_SCOND:
759 common->localptrs[cc - common->start] = localptr;
760 localptr += sizeof(sljit_w);
761 cc += 1 + LINK_SIZE;
762 break;
763
764 case OP_CBRAPOS:
765 case OP_SCBRAPOS:
766 common->localptrs[cc - common->start] = localptr;
767 localptr += sizeof(sljit_w);
768 cc += 1 + LINK_SIZE + IMM2_SIZE;
769 break;
770
771 case OP_COND:
772 /* Might be a hidden SCOND. */
773 alternative = cc + GET(cc, 1);
774 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
775 {
776 common->localptrs[cc - common->start] = localptr;
777 localptr += sizeof(sljit_w);
778 }
779 cc += 1 + LINK_SIZE;
780 break;
781
782 default:
783 cc = next_opcode(common, cc);
784 SLJIT_ASSERT(cc != NULL);
785 break;
786 }
787 }
788 }
789
790 /* Returns with -1 if no need for frame. */
791 static int get_framesize(compiler_common *common, pcre_uchar *cc, BOOL recursive)
792 {
793 pcre_uchar *ccend = bracketend(cc);
794 int length = 0;
795 BOOL possessive = FALSE;
796 BOOL setsom_found = recursive;
797 BOOL setmark_found = recursive;
798
799 if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))
800 {
801 length = 3;
802 possessive = TRUE;
803 }
804
805 cc = next_opcode(common, cc);
806 SLJIT_ASSERT(cc != NULL);
807 while (cc < ccend)
808 switch(*cc)
809 {
810 case OP_SET_SOM:
811 SLJIT_ASSERT(common->has_set_som);
812 if (!setsom_found)
813 {
814 length += 2;
815 setsom_found = TRUE;
816 }
817 cc += 1;
818 break;
819
820 case OP_MARK:
821 SLJIT_ASSERT(common->mark_ptr != 0);
822 if (!setmark_found)
823 {
824 length += 2;
825 setmark_found = TRUE;
826 }
827 cc += 1 + 2 + cc[1];
828 break;
829
830 case OP_RECURSE:
831 if (common->has_set_som && !setsom_found)
832 {
833 length += 2;
834 setsom_found = TRUE;
835 }
836 if (common->mark_ptr != 0 && !setmark_found)
837 {
838 length += 2;
839 setmark_found = TRUE;
840 }
841 cc += 1 + LINK_SIZE;
842 break;
843
844 case OP_CBRA:
845 case OP_CBRAPOS:
846 case OP_SCBRA:
847 case OP_SCBRAPOS:
848 length += 3;
849 cc += 1 + LINK_SIZE + IMM2_SIZE;
850 break;
851
852 default:
853 cc = next_opcode(common, cc);
854 SLJIT_ASSERT(cc != NULL);
855 break;
856 }
857
858 /* Possessive quantifiers can use a special case. */
859 if (SLJIT_UNLIKELY(possessive) && length == 3)
860 return -1;
861
862 if (length > 0)
863 return length + 1;
864 return -1;
865 }
866
867 static void init_frame(compiler_common *common, pcre_uchar *cc, int stackpos, int stacktop, BOOL recursive)
868 {
869 DEFINE_COMPILER;
870 pcre_uchar *ccend = bracketend(cc);
871 BOOL setsom_found = recursive;
872 BOOL setmark_found = recursive;
873 int offset;
874
875 /* >= 1 + shortest item size (2) */
876 SLJIT_UNUSED_ARG(stacktop);
877 SLJIT_ASSERT(stackpos >= stacktop + 2);
878
879 stackpos = STACK(stackpos);
880 if (recursive || (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS))
881 cc = next_opcode(common, cc);
882 SLJIT_ASSERT(cc != NULL);
883 while (cc < ccend)
884 switch(*cc)
885 {
886 case OP_SET_SOM:
887 SLJIT_ASSERT(common->has_set_som);
888 if (!setsom_found)
889 {
890 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
891 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_setstrbegin);
892 stackpos += (int)sizeof(sljit_w);
893 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
894 stackpos += (int)sizeof(sljit_w);
895 setsom_found = TRUE;
896 }
897 cc += 1;
898 break;
899
900 case OP_MARK:
901 SLJIT_ASSERT(common->mark_ptr != 0);
902 if (!setmark_found)
903 {
904 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
905 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_setmark);
906 stackpos += (int)sizeof(sljit_w);
907 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
908 stackpos += (int)sizeof(sljit_w);
909 setmark_found = TRUE;
910 }
911 cc += 1 + 2 + cc[1];
912 break;
913
914 case OP_RECURSE:
915 if (common->has_set_som && !setsom_found)
916 {
917 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
918 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_setstrbegin);
919 stackpos += (int)sizeof(sljit_w);
920 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
921 stackpos += (int)sizeof(sljit_w);
922 setsom_found = TRUE;
923 }
924 if (common->mark_ptr != 0 && !setmark_found)
925 {
926 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
927 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_setmark);
928 stackpos += (int)sizeof(sljit_w);
929 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
930 stackpos += (int)sizeof(sljit_w);
931 setmark_found = TRUE;
932 }
933 cc += 1 + LINK_SIZE;
934 break;
935
936 case OP_CBRA:
937 case OP_CBRAPOS:
938 case OP_SCBRA:
939 case OP_SCBRAPOS:
940 offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
941 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));
942 stackpos += (int)sizeof(sljit_w);
943 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
944 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
945 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
946 stackpos += (int)sizeof(sljit_w);
947 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
948 stackpos += (int)sizeof(sljit_w);
949
950 cc += 1 + LINK_SIZE + IMM2_SIZE;
951 break;
952
953 default:
954 cc = next_opcode(common, cc);
955 SLJIT_ASSERT(cc != NULL);
956 break;
957 }
958
959 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_end);
960 SLJIT_ASSERT(stackpos == STACK(stacktop));
961 }
962
963 static SLJIT_INLINE int get_localsize(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
964 {
965 int localsize = 2;
966 pcre_uchar *alternative;
967 /* Calculate the sum of the local variables. */
968 while (cc < ccend)
969 {
970 switch(*cc)
971 {
972 case OP_ASSERT:
973 case OP_ASSERT_NOT:
974 case OP_ASSERTBACK:
975 case OP_ASSERTBACK_NOT:
976 case OP_ONCE:
977 case OP_ONCE_NC:
978 case OP_BRAPOS:
979 case OP_SBRA:
980 case OP_SBRAPOS:
981 case OP_SCOND:
982 localsize++;
983 cc += 1 + LINK_SIZE;
984 break;
985
986 case OP_CBRA:
987 case OP_SCBRA:
988 localsize++;
989 cc += 1 + LINK_SIZE + IMM2_SIZE;
990 break;
991
992 case OP_CBRAPOS:
993 case OP_SCBRAPOS:
994 localsize += 2;
995 cc += 1 + LINK_SIZE + IMM2_SIZE;
996 break;
997
998 case OP_COND:
999 /* Might be a hidden SCOND. */
1000 alternative = cc + GET(cc, 1);
1001 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1002 localsize++;
1003 cc += 1 + LINK_SIZE;
1004 break;
1005
1006 default:
1007 cc = next_opcode(common, cc);
1008 SLJIT_ASSERT(cc != NULL);
1009 break;
1010 }
1011 }
1012 SLJIT_ASSERT(cc == ccend);
1013 return localsize;
1014 }
1015
1016 static void copy_locals(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend,
1017 BOOL save, int stackptr, int stacktop)
1018 {
1019 DEFINE_COMPILER;
1020 int srcw[2];
1021 int count;
1022 BOOL tmp1next = TRUE;
1023 BOOL tmp1empty = TRUE;
1024 BOOL tmp2empty = TRUE;
1025 pcre_uchar *alternative;
1026 enum {
1027 start,
1028 loop,
1029 end
1030 } status;
1031
1032 status = save ? start : loop;
1033 stackptr = STACK(stackptr - 2);
1034 stacktop = STACK(stacktop - 1);
1035
1036 if (!save)
1037 {
1038 stackptr += sizeof(sljit_w);
1039 if (stackptr < stacktop)
1040 {
1041 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1042 stackptr += sizeof(sljit_w);
1043 tmp1empty = FALSE;
1044 }
1045 if (stackptr < stacktop)
1046 {
1047 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1048 stackptr += sizeof(sljit_w);
1049 tmp2empty = FALSE;
1050 }
1051 /* The tmp1next must be TRUE in either way. */
1052 }
1053
1054 while (status != end)
1055 {
1056 count = 0;
1057 switch(status)
1058 {
1059 case start:
1060 SLJIT_ASSERT(save && common->recursive_head != 0);
1061 count = 1;
1062 srcw[0] = common->recursive_head;
1063 status = loop;
1064 break;
1065
1066 case loop:
1067 if (cc >= ccend)
1068 {
1069 status = end;
1070 break;
1071 }
1072
1073 switch(*cc)
1074 {
1075 case OP_ASSERT:
1076 case OP_ASSERT_NOT:
1077 case OP_ASSERTBACK:
1078 case OP_ASSERTBACK_NOT:
1079 case OP_ONCE:
1080 case OP_ONCE_NC:
1081 case OP_BRAPOS:
1082 case OP_SBRA:
1083 case OP_SBRAPOS:
1084 case OP_SCOND:
1085 count = 1;
1086 srcw[0] = PRIV_DATA(cc);
1087 SLJIT_ASSERT(srcw[0] != 0);
1088 cc += 1 + LINK_SIZE;
1089 break;
1090
1091 case OP_CBRA:
1092 case OP_SCBRA:
1093 count = 1;
1094 srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1095 cc += 1 + LINK_SIZE + IMM2_SIZE;
1096 break;
1097
1098 case OP_CBRAPOS:
1099 case OP_SCBRAPOS:
1100 count = 2;
1101 srcw[1] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1102 srcw[0] = PRIV_DATA(cc);
1103 SLJIT_ASSERT(srcw[0] != 0);
1104 cc += 1 + LINK_SIZE + IMM2_SIZE;
1105 break;
1106
1107 case OP_COND:
1108 /* Might be a hidden SCOND. */
1109 alternative = cc + GET(cc, 1);
1110 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1111 {
1112 count = 1;
1113 srcw[0] = PRIV_DATA(cc);
1114 SLJIT_ASSERT(srcw[0] != 0);
1115 }
1116 cc += 1 + LINK_SIZE;
1117 break;
1118
1119 default:
1120 cc = next_opcode(common, cc);
1121 SLJIT_ASSERT(cc != NULL);
1122 break;
1123 }
1124 break;
1125
1126 case end:
1127 SLJIT_ASSERT_STOP();
1128 break;
1129 }
1130
1131 while (count > 0)
1132 {
1133 count--;
1134 if (save)
1135 {
1136 if (tmp1next)
1137 {
1138 if (!tmp1empty)
1139 {
1140 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1141 stackptr += sizeof(sljit_w);
1142 }
1143 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);
1144 tmp1empty = FALSE;
1145 tmp1next = FALSE;
1146 }
1147 else
1148 {
1149 if (!tmp2empty)
1150 {
1151 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1152 stackptr += sizeof(sljit_w);
1153 }
1154 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);
1155 tmp2empty = FALSE;
1156 tmp1next = TRUE;
1157 }
1158 }
1159 else
1160 {
1161 if (tmp1next)
1162 {
1163 SLJIT_ASSERT(!tmp1empty);
1164 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP1, 0);
1165 tmp1empty = stackptr >= stacktop;
1166 if (!tmp1empty)
1167 {
1168 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1169 stackptr += sizeof(sljit_w);
1170 }
1171 tmp1next = FALSE;
1172 }
1173 else
1174 {
1175 SLJIT_ASSERT(!tmp2empty);
1176 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP2, 0);
1177 tmp2empty = stackptr >= stacktop;
1178 if (!tmp2empty)
1179 {
1180 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1181 stackptr += sizeof(sljit_w);
1182 }
1183 tmp1next = TRUE;
1184 }
1185 }
1186 }
1187 }
1188
1189 if (save)
1190 {
1191 if (tmp1next)
1192 {
1193 if (!tmp1empty)
1194 {
1195 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1196 stackptr += sizeof(sljit_w);
1197 }
1198 if (!tmp2empty)
1199 {
1200 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1201 stackptr += sizeof(sljit_w);
1202 }
1203 }
1204 else
1205 {
1206 if (!tmp2empty)
1207 {
1208 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1209 stackptr += sizeof(sljit_w);
1210 }
1211 if (!tmp1empty)
1212 {
1213 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1214 stackptr += sizeof(sljit_w);
1215 }
1216 }
1217 }
1218 SLJIT_ASSERT(cc == ccend && stackptr == stacktop && (save || (tmp1empty && tmp2empty)));
1219 }
1220
1221 static SLJIT_INLINE BOOL ispowerof2(unsigned int value)
1222 {
1223 return (value & (value - 1)) == 0;
1224 }
1225
1226 static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label)
1227 {
1228 while (list)
1229 {
1230 /* sljit_set_label is clever enough to do nothing
1231 if either the jump or the label is NULL */
1232 sljit_set_label(list->jump, label);
1233 list = list->next;
1234 }
1235 }
1236
1237 static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump* jump)
1238 {
1239 jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list));
1240 if (list_item)
1241 {
1242 list_item->next = *list;
1243 list_item->jump = jump;
1244 *list = list_item;
1245 }
1246 }
1247
1248 static void add_stub(compiler_common *common, enum stub_types type, int data, struct sljit_jump *start)
1249 {
1250 DEFINE_COMPILER;
1251 stub_list* list_item = sljit_alloc_memory(compiler, sizeof(stub_list));
1252
1253 if (list_item)
1254 {
1255 list_item->type = type;
1256 list_item->data = data;
1257 list_item->start = start;
1258 list_item->leave = LABEL();
1259 list_item->next = common->stubs;
1260 common->stubs = list_item;
1261 }
1262 }
1263
1264 static void flush_stubs(compiler_common *common)
1265 {
1266 DEFINE_COMPILER;
1267 stub_list* list_item = common->stubs;
1268
1269 while (list_item)
1270 {
1271 JUMPHERE(list_item->start);
1272 switch(list_item->type)
1273 {
1274 case stack_alloc:
1275 add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));
1276 break;
1277 }
1278 JUMPTO(SLJIT_JUMP, list_item->leave);
1279 list_item = list_item->next;
1280 }
1281 common->stubs = NULL;
1282 }
1283
1284 static SLJIT_INLINE void decrease_call_count(compiler_common *common)
1285 {
1286 DEFINE_COMPILER;
1287
1288 OP2(SLJIT_SUB | SLJIT_SET_E, CALL_COUNT, 0, CALL_COUNT, 0, SLJIT_IMM, 1);
1289 add_jump(compiler, &common->calllimit, JUMP(SLJIT_C_ZERO));
1290 }
1291
1292 static SLJIT_INLINE void allocate_stack(compiler_common *common, int size)
1293 {
1294 /* May destroy all locals and registers except TMP2. */
1295 DEFINE_COMPILER;
1296
1297 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_w));
1298 #ifdef DESTROY_REGISTERS
1299 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);
1300 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
1301 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
1302 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, TMP1, 0);
1303 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);
1304 #endif
1305 add_stub(common, stack_alloc, 0, CMP(SLJIT_C_GREATER, STACK_TOP, 0, STACK_LIMIT, 0));
1306 }
1307
1308 static SLJIT_INLINE void free_stack(compiler_common *common, int size)
1309 {
1310 DEFINE_COMPILER;
1311 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_w));
1312 }
1313
1314 static SLJIT_INLINE void reset_ovector(compiler_common *common, int length)
1315 {
1316 DEFINE_COMPILER;
1317 struct sljit_label *loop;
1318 int i;
1319 /* At this point we can freely use all temporary registers. */
1320 /* TMP1 returns with begin - 1. */
1321 OP2(SLJIT_SUB, SLJIT_TEMPORARY_REG1, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));
1322 if (length < 8)
1323 {
1324 for (i = 0; i < length; i++)
1325 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(i), SLJIT_TEMPORARY_REG1, 0);
1326 }
1327 else
1328 {
1329 GET_LOCAL_BASE(SLJIT_TEMPORARY_REG2, 0, OVECTOR_START - sizeof(sljit_w));
1330 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, length);
1331 loop = LABEL();
1332 OP1(SLJIT_MOVU, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), sizeof(sljit_w), SLJIT_TEMPORARY_REG1, 0);
1333 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_TEMPORARY_REG3, 0, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, 1);
1334 JUMPTO(SLJIT_C_NOT_ZERO, loop);
1335 }
1336 }
1337
1338 static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)
1339 {
1340 DEFINE_COMPILER;
1341 struct sljit_label *loop;
1342 struct sljit_jump *earlyexit;
1343
1344 /* At this point we can freely use all registers. */
1345 OP1(SLJIT_MOV, SLJIT_SAVED_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
1346 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1), STR_PTR, 0);
1347
1348 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG1, 0, ARGUMENTS, 0);
1349 if (common->mark_ptr != 0)
1350 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
1351 OP1(SLJIT_MOV_SI, SLJIT_TEMPORARY_REG2, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), SLJIT_OFFSETOF(jit_arguments, offsetcount));
1352 if (common->mark_ptr != 0)
1353 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_TEMPORARY_REG3, 0);
1354 OP2(SLJIT_SUB, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), SLJIT_OFFSETOF(jit_arguments, offsets), SLJIT_IMM, sizeof(int));
1355 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG1, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), SLJIT_OFFSETOF(jit_arguments, begin));
1356 GET_LOCAL_BASE(SLJIT_SAVED_REG1, 0, OVECTOR_START);
1357 /* Unlikely, but possible */
1358 earlyexit = CMP(SLJIT_C_EQUAL, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, 0);
1359 loop = LABEL();
1360 OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), 0, SLJIT_TEMPORARY_REG1, 0);
1361 OP2(SLJIT_ADD, SLJIT_SAVED_REG1, 0, SLJIT_SAVED_REG1, 0, SLJIT_IMM, sizeof(sljit_w));
1362 /* Copy the integer value to the output buffer */
1363 #ifdef COMPILE_PCRE16
1364 OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, 1);
1365 #endif
1366 OP1(SLJIT_MOVU_SI, SLJIT_MEM1(SLJIT_TEMPORARY_REG3), sizeof(int), SLJIT_SAVED_REG2, 0);
1367 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_TEMPORARY_REG2, 0, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, 1);
1368 JUMPTO(SLJIT_C_NOT_ZERO, loop);
1369 JUMPHERE(earlyexit);
1370
1371 /* Calculate the return value, which is the maximum ovector value. */
1372 if (topbracket > 1)
1373 {
1374 GET_LOCAL_BASE(SLJIT_TEMPORARY_REG1, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_w));
1375 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, topbracket + 1);
1376
1377 /* OVECTOR(0) is never equal to SLJIT_SAVED_REG3. */
1378 loop = LABEL();
1379 OP1(SLJIT_MOVU, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), -(2 * (sljit_w)sizeof(sljit_w)));
1380 OP2(SLJIT_SUB, SLJIT_TEMPORARY_REG2, 0, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, 1);
1381 CMPTO(SLJIT_C_EQUAL, SLJIT_TEMPORARY_REG3, 0, SLJIT_SAVED_REG3, 0, loop);
1382 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_TEMPORARY_REG2, 0);
1383 }
1384 else
1385 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
1386 }
1387
1388 static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *leave)
1389 {
1390 DEFINE_COMPILER;
1391
1392 SLJIT_COMPILE_ASSERT(STR_END == SLJIT_SAVED_REG2, str_end_must_be_saved_reg2);
1393 SLJIT_ASSERT(common->start_used_ptr != 0 && (common->mode == JIT_PARTIAL_SOFT_COMPILE ? common->hit_start != 0 : common->hit_start == 0));
1394
1395 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG2, 0, ARGUMENTS, 0);
1396 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_PARTIAL);
1397 OP1(SLJIT_MOV_SI, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), SLJIT_OFFSETOF(jit_arguments, offsetcount));
1398 CMPTO(SLJIT_C_LESS, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, 2, leave);
1399
1400 /* Store match begin and end. */
1401 OP1(SLJIT_MOV, SLJIT_SAVED_REG1, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), SLJIT_OFFSETOF(jit_arguments, begin));
1402 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG2, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), SLJIT_OFFSETOF(jit_arguments, offsets));
1403 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_used_ptr : common->hit_start);
1404 OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, STR_END, 0, SLJIT_SAVED_REG1, 0);
1405 #ifdef COMPILE_PCRE16
1406 OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, 1);
1407 #endif
1408 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), sizeof(int), SLJIT_SAVED_REG2, 0);
1409
1410 OP2(SLJIT_SUB, SLJIT_TEMPORARY_REG3, 0, SLJIT_TEMPORARY_REG3, 0, SLJIT_SAVED_REG1, 0);
1411 #ifdef COMPILE_PCRE16
1412 OP2(SLJIT_ASHR, SLJIT_TEMPORARY_REG3, 0, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, 1);
1413 #endif
1414 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), 0, SLJIT_TEMPORARY_REG3, 0);
1415
1416 JUMPTO(SLJIT_JUMP, leave);
1417 }
1418
1419 static SLJIT_INLINE void check_start_used_ptr(compiler_common *common)
1420 {
1421 /* May destroy TMP1. */
1422 DEFINE_COMPILER;
1423 struct sljit_jump *jump;
1424
1425 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
1426 {
1427 /* The value of -1 must be kept for start_used_ptr! */
1428 OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, 1);
1429 /* Jumps if start_used_ptr < STR_PTR, or start_used_ptr == -1. Although overwriting
1430 is not necessary if start_used_ptr == STR_PTR, it does not hurt as well. */
1431 jump = CMP(SLJIT_C_LESS_EQUAL, TMP1, 0, STR_PTR, 0);
1432 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
1433 JUMPHERE(jump);
1434 }
1435 else if (common->mode == JIT_PARTIAL_HARD_COMPILE)
1436 {
1437 jump = CMP(SLJIT_C_LESS_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
1438 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
1439 JUMPHERE(jump);
1440 }
1441 }
1442
1443 static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, pcre_uchar* cc)
1444 {
1445 /* Detects if the character has an othercase. */
1446 unsigned int c;
1447
1448 #ifdef SUPPORT_UTF
1449 if (common->utf)
1450 {
1451 GETCHAR(c, cc);
1452 if (c > 127)
1453 {
1454 #ifdef SUPPORT_UCP
1455 return c != UCD_OTHERCASE(c);
1456 #else
1457 return FALSE;
1458 #endif
1459 }
1460 #ifndef COMPILE_PCRE8
1461 return common->fcc[c] != c;
1462 #endif
1463 }
1464 else
1465 #endif
1466 c = *cc;
1467 return MAX_255(c) ? common->fcc[c] != c : FALSE;
1468 }
1469
1470 static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)
1471 {
1472 /* Returns with the othercase. */
1473 #ifdef SUPPORT_UTF
1474 if (common->utf && c > 127)
1475 {
1476 #ifdef SUPPORT_UCP
1477 return UCD_OTHERCASE(c);
1478 #else
1479 return c;
1480 #endif
1481 }
1482 #endif
1483 return TABLE_GET(c, common->fcc, c);
1484 }
1485
1486 static unsigned int char_get_othercase_bit(compiler_common *common, pcre_uchar* cc)
1487 {
1488 /* Detects if the character and its othercase has only 1 bit difference. */
1489 unsigned int c, oc, bit;
1490 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
1491 int n;
1492 #endif
1493
1494 #ifdef SUPPORT_UTF
1495 if (common->utf)
1496 {
1497 GETCHAR(c, cc);
1498 if (c <= 127)
1499 oc = common->fcc[c];
1500 else
1501 {
1502 #ifdef SUPPORT_UCP
1503 oc = UCD_OTHERCASE(c);
1504 #else
1505 oc = c;
1506 #endif
1507 }
1508 }
1509 else
1510 {
1511 c = *cc;
1512 oc = TABLE_GET(c, common->fcc, c);
1513 }
1514 #else
1515 c = *cc;
1516 oc = TABLE_GET(c, common->fcc, c);
1517 #endif
1518
1519 SLJIT_ASSERT(c != oc);
1520
1521 bit = c ^ oc;
1522 /* Optimized for English alphabet. */
1523 if (c <= 127 && bit == 0x20)
1524 return (0 << 8) | 0x20;
1525
1526 /* Since c != oc, they must have at least 1 bit difference. */
1527 if (!ispowerof2(bit))
1528 return 0;
1529
1530 #ifdef COMPILE_PCRE8
1531
1532 #ifdef SUPPORT_UTF
1533 if (common->utf && c > 127)
1534 {
1535 n = GET_EXTRALEN(*cc);
1536 while ((bit & 0x3f) == 0)
1537 {
1538 n--;
1539 bit >>= 6;
1540 }
1541 return (n << 8) | bit;
1542 }
1543 #endif /* SUPPORT_UTF */
1544 return (0 << 8) | bit;
1545
1546 #else /* COMPILE_PCRE8 */
1547
1548 #ifdef COMPILE_PCRE16
1549 #ifdef SUPPORT_UTF
1550 if (common->utf && c > 65535)
1551 {
1552 if (bit >= (1 << 10))
1553 bit >>= 10;
1554 else
1555 return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8));
1556 }
1557 #endif /* SUPPORT_UTF */
1558 return (bit < 256) ? ((0 << 8) | bit) : ((1 << 8) | (bit >> 8));
1559 #endif /* COMPILE_PCRE16 */
1560
1561 #endif /* COMPILE_PCRE8 */
1562 }
1563
1564 static void check_partial(compiler_common *common, BOOL force)
1565 {
1566 /* Checks whether a partial matching is occured. Does not modify registers. */
1567 DEFINE_COMPILER;
1568 struct sljit_jump *jump = NULL;
1569
1570 SLJIT_ASSERT(!force || common->mode != JIT_COMPILE);
1571
1572 if (common->mode == JIT_COMPILE)
1573 return;
1574
1575 if (!force)
1576 jump = CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
1577 else if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
1578 jump = CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, -1);
1579
1580 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
1581 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, -1);
1582 else
1583 {
1584 if (common->partialmatchlabel != NULL)
1585 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
1586 else
1587 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
1588 }
1589
1590 if (jump != NULL)
1591 JUMPHERE(jump);
1592 }
1593
1594 static struct sljit_jump *check_str_end(compiler_common *common)
1595 {
1596 /* Does not affect registers. Usually used in a tight spot. */
1597 DEFINE_COMPILER;
1598 struct sljit_jump *jump;
1599 struct sljit_jump *nohit;
1600 struct sljit_jump *return_value;
1601
1602 if (common->mode == JIT_COMPILE)
1603 return CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
1604
1605 jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
1606 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
1607 {
1608 nohit = CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
1609 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, -1);
1610 JUMPHERE(nohit);
1611 return_value = JUMP(SLJIT_JUMP);
1612 }
1613 else
1614 {
1615 return_value = CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
1616 if (common->partialmatchlabel != NULL)
1617 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
1618 else
1619 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
1620 }
1621 JUMPHERE(jump);
1622 return return_value;
1623 }
1624
1625 static void detect_partial_match(compiler_common *common, jump_list **backtracks)
1626 {
1627 DEFINE_COMPILER;
1628 struct sljit_jump *jump;
1629
1630 if (common->mode == JIT_COMPILE)
1631 {
1632 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
1633 return;
1634 }
1635
1636 /* Partial matching mode. */
1637 jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
1638 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0));
1639 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
1640 {
1641 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, -1);
1642 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
1643 }
1644 else
1645 {
1646 if (common->partialmatchlabel != NULL)
1647 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
1648 else
1649 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
1650 }
1651 JUMPHERE(jump);
1652 }
1653
1654 static void read_char(compiler_common *common)
1655 {
1656 /* Reads the character into TMP1, updates STR_PTR.
1657 Does not check STR_END. TMP2 Destroyed. */
1658 DEFINE_COMPILER;
1659 #ifdef SUPPORT_UTF
1660 struct sljit_jump *jump;
1661 #endif
1662
1663 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
1664 #ifdef SUPPORT_UTF
1665 if (common->utf)
1666 {
1667 #ifdef COMPILE_PCRE8
1668 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
1669 #else
1670 #ifdef COMPILE_PCRE16
1671 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
1672 #endif
1673 #endif /* COMPILE_PCRE8 */
1674 add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
1675 JUMPHERE(jump);
1676 }
1677 #endif
1678 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1679 }
1680
1681 static void peek_char(compiler_common *common)
1682 {
1683 /* Reads the character into TMP1, keeps STR_PTR.
1684 Does not check STR_END. TMP2 Destroyed. */
1685 DEFINE_COMPILER;
1686 #ifdef SUPPORT_UTF
1687 struct sljit_jump *jump;
1688 #endif
1689
1690 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
1691 #ifdef SUPPORT_UTF
1692 if (common->utf)
1693 {
1694 #ifdef COMPILE_PCRE8
1695 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
1696 #else
1697 #ifdef COMPILE_PCRE16
1698 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
1699 #endif
1700 #endif /* COMPILE_PCRE8 */
1701 add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
1702 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
1703 JUMPHERE(jump);
1704 }
1705 #endif
1706 }
1707
1708 static void read_char8_type(compiler_common *common)
1709 {
1710 /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */
1711 DEFINE_COMPILER;
1712 #if defined SUPPORT_UTF || defined COMPILE_PCRE16
1713 struct sljit_jump *jump;
1714 #endif
1715
1716 #ifdef SUPPORT_UTF
1717 if (common->utf)
1718 {
1719 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
1720 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1721 #ifdef COMPILE_PCRE8
1722 /* This can be an extra read in some situations, but hopefully
1723 it is needed in most cases. */
1724 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
1725 jump = CMP(SLJIT_C_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
1726 add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));
1727 JUMPHERE(jump);
1728 #else
1729 #ifdef COMPILE_PCRE16
1730 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
1731 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
1732 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
1733 JUMPHERE(jump);
1734 /* Skip low surrogate if necessary. */
1735 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xfc00);
1736 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0xd800);
1737 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
1738 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
1739 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
1740 #endif
1741 #endif /* COMPILE_PCRE8 */
1742 return;
1743 }
1744 #endif
1745 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
1746 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1747 #ifdef COMPILE_PCRE16
1748 /* The ctypes array contains only 256 values. */
1749 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
1750 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
1751 #endif
1752 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
1753 #ifdef COMPILE_PCRE16
1754 JUMPHERE(jump);
1755 #endif
1756 }
1757
1758 static void skip_char_back(compiler_common *common)
1759 {
1760 /* Goes one character back. Affects STR_PTR and TMP1. Does not check begin. */
1761 DEFINE_COMPILER;
1762 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
1763 struct sljit_label *label;
1764
1765 if (common->utf)
1766 {
1767 label = LABEL();
1768 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
1769 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1770 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
1771 CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);
1772 return;
1773 }
1774 #endif
1775 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
1776 if (common->utf)
1777 {
1778 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
1779 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1780 /* Skip low surrogate if necessary. */
1781 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
1782 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
1783 COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
1784 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
1785 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
1786 return;
1787 }
1788 #endif
1789 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1790 }
1791
1792 static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpiftrue)
1793 {
1794 /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */
1795 DEFINE_COMPILER;
1796
1797 if (nltype == NLTYPE_ANY)
1798 {
1799 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
1800 add_jump(compiler, backtracks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
1801 }
1802 else if (nltype == NLTYPE_ANYCRLF)
1803 {
1804 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_CR);
1805 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
1806 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
1807 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
1808 add_jump(compiler, backtracks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
1809 }
1810 else
1811 {
1812 SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);
1813 add_jump(compiler, backtracks, CMP(jumpiftrue ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
1814 }
1815 }
1816
1817 #ifdef SUPPORT_UTF
1818
1819 #ifdef COMPILE_PCRE8
1820 static void do_utfreadchar(compiler_common *common)
1821 {
1822 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
1823 of the character (>= 0xc0). Return char value in TMP1, length - 1 in TMP2. */
1824 DEFINE_COMPILER;
1825 struct sljit_jump *jump;
1826
1827 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
1828 /* Searching for the first zero. */
1829 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
1830 jump = JUMP(SLJIT_C_NOT_ZERO);
1831 /* Two byte sequence. */
1832 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
1833 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1834 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1f);
1835 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
1836 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
1837 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
1838 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
1839 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1840 JUMPHERE(jump);
1841
1842 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10);
1843 jump = JUMP(SLJIT_C_NOT_ZERO);
1844 /* Three byte sequence. */
1845 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
1846 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0f);
1847 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 12);
1848 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
1849 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
1850 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
1851 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
1852 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
1853 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
1854 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
1855 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(2));
1856 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1857 JUMPHERE(jump);
1858
1859 /* Four byte sequence. */
1860 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
1861 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x07);
1862 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 18);
1863 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
1864 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12);
1865 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
1866 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
1867 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
1868 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
1869 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
1870 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(3));
1871 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
1872 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
1873 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
1874 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));
1875 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1876 }
1877
1878 static void do_utfreadtype8(compiler_common *common)
1879 {
1880 /* Fast decoding a UTF-8 character type. TMP2 contains the first byte
1881 of the character (>= 0xc0). Return value in TMP1. */
1882 DEFINE_COMPILER;
1883 struct sljit_jump *jump;
1884 struct sljit_jump *compare;
1885
1886 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
1887
1888 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20);
1889 jump = JUMP(SLJIT_C_NOT_ZERO);
1890 /* Two byte sequence. */
1891 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
1892 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1893 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
1894 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
1895 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
1896 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
1897 compare = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
1898 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
1899 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1900
1901 JUMPHERE(compare);
1902 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
1903 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1904 JUMPHERE(jump);
1905
1906 /* We only have types for characters less than 256. */
1907 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_w)PRIV(utf8_table4) - 0xc0);
1908 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
1909 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
1910 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1911 }
1912
1913 #else /* COMPILE_PCRE8 */
1914
1915 #ifdef COMPILE_PCRE16
1916 static void do_utfreadchar(compiler_common *common)
1917 {
1918 /* Fast decoding a UTF-16 character. TMP1 contains the first 16 bit char
1919 of the character (>= 0xd800). Return char value in TMP1, length - 1 in TMP2. */
1920 DEFINE_COMPILER;
1921 struct sljit_jump *jump;
1922
1923 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
1924 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xdc00);
1925 /* Do nothing, only return. */
1926 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1927
1928 JUMPHERE(jump);
1929 /* Combine two 16 bit characters. */
1930 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
1931 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1932 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
1933 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 10);
1934 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3ff);
1935 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
1936 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
1937 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
1938 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1939 }
1940 #endif /* COMPILE_PCRE16 */
1941
1942 #endif /* COMPILE_PCRE8 */
1943
1944 #endif /* SUPPORT_UTF */
1945
1946 #ifdef SUPPORT_UCP
1947
1948 /* UCD_BLOCK_SIZE must be 128 (see the assert below). */
1949 #define UCD_BLOCK_MASK 127
1950 #define UCD_BLOCK_SHIFT 7
1951
1952 static void do_getucd(compiler_common *common)
1953 {
1954 /* Search the UCD record for the character comes in TMP1.
1955 Returns chartype in TMP1 and UCD offset in TMP2. */
1956 DEFINE_COMPILER;
1957
1958 SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 8);
1959
1960 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
1961 OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
1962 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_w)PRIV(ucd_stage1));
1963 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
1964 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
1965 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
1966 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_w)PRIV(ucd_stage2));
1967 OP1(SLJIT_MOV_UH, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
1968 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_w)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
1969 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
1970 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1971 }
1972 #endif
1973
1974 static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common, BOOL hascrorlf, BOOL firstline)
1975 {
1976 DEFINE_COMPILER;
1977 struct sljit_label *mainloop;
1978 struct sljit_label *newlinelabel = NULL;
1979 struct sljit_jump *start;
1980 struct sljit_jump *end = NULL;
1981 struct sljit_jump *nl = NULL;
1982 #ifdef SUPPORT_UTF
1983 struct sljit_jump *singlechar;
1984 #endif
1985 jump_list *newline = NULL;
1986 BOOL newlinecheck = FALSE;
1987 BOOL readuchar = FALSE;
1988
1989 if (!(hascrorlf || firstline) && (common->nltype == NLTYPE_ANY ||
1990 common->nltype == NLTYPE_ANYCRLF || common->newline > 255))
1991 newlinecheck = TRUE;
1992
1993 if (firstline)
1994 {
1995 /* Search for the end of the first line. */
1996 SLJIT_ASSERT(common->first_line_end != 0);
1997 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STR_PTR, 0);
1998 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_END, 0);
1999
2000 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
2001 {
2002 mainloop = LABEL();
2003 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2004 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2005 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
2006 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2007 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);
2008 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);
2009 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2010 }
2011 else
2012 {
2013 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2014 mainloop = LABEL();
2015 /* Continual stores does not cause data dependency. */
2016 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0);
2017 read_char(common);
2018 check_newlinechar(common, common->nltype, &newline, TRUE);
2019 CMPTO(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0, mainloop);
2020 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0);
2021 set_jumps(newline, LABEL());
2022 }
2023
2024 JUMPHERE(end);
2025 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
2026 }
2027
2028 start = JUMP(SLJIT_JUMP);
2029
2030 if (newlinecheck)
2031 {
2032 newlinelabel = LABEL();
2033 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2034 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2035 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2036 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
2037 COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
2038 #ifdef COMPILE_PCRE16
2039 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2040 #endif
2041 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2042 nl = JUMP(SLJIT_JUMP);
2043 }
2044
2045 mainloop = LABEL();
2046
2047 /* Increasing the STR_PTR here requires one less jump in the most common case. */
2048 #ifdef SUPPORT_UTF
2049 if (common->utf) readuchar = TRUE;
2050 #endif
2051 if (newlinecheck) readuchar = TRUE;
2052
2053 if (readuchar)
2054 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2055
2056 if (newlinecheck)
2057 CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);
2058
2059 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2060 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2061 if (common->utf)
2062 {
2063 singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2064 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0);
2065 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2066 JUMPHERE(singlechar);
2067 }
2068 #endif
2069 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2070 if (common->utf)
2071 {
2072 singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
2073 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2074 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2075 COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
2076 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2077 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2078 JUMPHERE(singlechar);
2079 }
2080 #endif
2081 JUMPHERE(start);
2082
2083 if (newlinecheck)
2084 {
2085 JUMPHERE(end);
2086 JUMPHERE(nl);
2087 }
2088
2089 return mainloop;
2090 }
2091
2092 static SLJIT_INLINE void fast_forward_first_char(compiler_common *common, pcre_uchar first_char, BOOL caseless, BOOL firstline)
2093 {
2094 DEFINE_COMPILER;
2095 struct sljit_label *start;
2096 struct sljit_jump *leave;
2097 struct sljit_jump *found;
2098 pcre_uchar oc, bit;
2099
2100 if (firstline)
2101 {
2102 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, STR_END, 0);
2103 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
2104 }
2105
2106 start = LABEL();
2107 leave = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2108 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2109
2110 oc = first_char;
2111 if (caseless)
2112 {
2113 oc = TABLE_GET(first_char, common->fcc, first_char);
2114 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
2115 if (first_char > 127 && common->utf)
2116 oc = UCD_OTHERCASE(first_char);
2117 #endif
2118 }
2119 if (first_char == oc)
2120 found = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, first_char);
2121 else
2122 {
2123 bit = first_char ^ oc;
2124 if (ispowerof2(bit))
2125 {
2126 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, bit);
2127 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, first_char | bit);
2128 }
2129 else
2130 {
2131 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, first_char);
2132 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
2133 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);
2134 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
2135 found = JUMP(SLJIT_C_NOT_ZERO);
2136 }
2137 }
2138
2139 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2140 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2141 if (common->utf)
2142 {
2143 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start);
2144 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0);
2145 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2146 }
2147 #endif
2148 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2149 if (common->utf)
2150 {
2151 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800, start);
2152 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2153 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2154 COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
2155 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2156 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2157 }
2158 #endif
2159 JUMPTO(SLJIT_JUMP, start);
2160 JUMPHERE(found);
2161 JUMPHERE(leave);
2162
2163 if (firstline)
2164 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
2165 }
2166
2167 static SLJIT_INLINE void fast_forward_newline(compiler_common *common, BOOL firstline)
2168 {
2169 DEFINE_COMPILER;
2170 struct sljit_label *loop;
2171 struct sljit_jump *lastchar;
2172 struct sljit_jump *firstchar;
2173 struct sljit_jump *leave;
2174 struct sljit_jump *foundcr = NULL;
2175 struct sljit_jump *notfoundnl;
2176 jump_list *newline = NULL;
2177
2178 if (firstline)
2179 {
2180 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, STR_END, 0);
2181 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
2182 }
2183
2184 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
2185 {
2186 lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2187 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
2188 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
2189 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
2190 firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
2191
2192 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2));
2193 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0);
2194 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_GREATER_EQUAL);
2195 #ifdef COMPILE_PCRE16
2196 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
2197 #endif
2198 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2199
2200 loop = LABEL();
2201 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2202 leave = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2203 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
2204 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
2205 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop);
2206 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop);
2207
2208 JUMPHERE(leave);
2209 JUMPHERE(firstchar);
2210 JUMPHERE(lastchar);
2211
2212 if (firstline)
2213 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
2214 return;
2215 }
2216
2217 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
2218 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
2219 firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
2220 skip_char_back(common);
2221
2222 loop = LABEL();
2223 read_char(common);
2224 lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2225 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
2226 foundcr = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
2227 check_newlinechar(common, common->nltype, &newline, FALSE);
2228 set_jumps(newline, loop);
2229
2230 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
2231 {
2232 leave = JUMP(SLJIT_JUMP);
2233 JUMPHERE(foundcr);
2234 notfoundnl = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2235 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2236 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
2237 COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
2238 #ifdef COMPILE_PCRE16
2239 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2240 #endif
2241 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2242 JUMPHERE(notfoundnl);
2243 JUMPHERE(leave);
2244 }
2245 JUMPHERE(lastchar);
2246 JUMPHERE(firstchar);
2247
2248 if (firstline)
2249 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
2250 }
2251
2252 static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, sljit_uw start_bits, BOOL firstline)
2253 {
2254 DEFINE_COMPILER;
2255 struct sljit_label *start;
2256 struct sljit_jump *leave;
2257 struct sljit_jump *found;
2258 #ifndef COMPILE_PCRE8
2259 struct sljit_jump *jump;
2260 #endif
2261
2262 if (firstline)
2263 {
2264 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, STR_END, 0);
2265 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
2266 }
2267
2268 start = LABEL();
2269 leave = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2270 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2271 #ifdef SUPPORT_UTF
2272 if (common->utf)
2273 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
2274 #endif
2275 #ifndef COMPILE_PCRE8
2276 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 255);
2277 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 255);
2278 JUMPHERE(jump);
2279 #endif
2280 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
2281 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
2282 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), start_bits);
2283 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
2284 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
2285 found = JUMP(SLJIT_C_NOT_ZERO);
2286
2287 #ifdef SUPPORT_UTF
2288 if (common->utf)
2289 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
2290 #endif
2291 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2292 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2293 if (common->utf)
2294 {
2295 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start);
2296 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0);
2297 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2298 }
2299 #endif
2300 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2301 if (common->utf)
2302 {
2303 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800, start);
2304 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2305 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2306 COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
2307 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2308 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2309 }
2310 #endif
2311 JUMPTO(SLJIT_JUMP, start);
2312 JUMPHERE(found);
2313 JUMPHERE(leave);
2314
2315 if (firstline)
2316 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
2317 }
2318
2319 static SLJIT_INLINE struct sljit_jump *search_requested_char(compiler_common *common, pcre_uchar req_char, BOOL caseless, BOOL has_firstchar)
2320 {
2321 DEFINE_COMPILER;
2322 struct sljit_label *loop;
2323 struct sljit_jump *toolong;
2324 struct sljit_jump *alreadyfound;
2325 struct sljit_jump *found;
2326 struct sljit_jump *foundoc = NULL;
2327 struct sljit_jump *notfound;
2328 pcre_uchar oc, bit;
2329
2330 SLJIT_ASSERT(common->req_char_ptr != 0);
2331 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr);
2332 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, REQ_BYTE_MAX);
2333 toolong = CMP(SLJIT_C_LESS, TMP1, 0, STR_END, 0);
2334 alreadyfound = CMP(SLJIT_C_LESS, STR_PTR, 0, TMP2, 0);
2335
2336 if (has_firstchar)
2337 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2338 else
2339 OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);
2340
2341 loop = LABEL();
2342 notfound = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, STR_END, 0);
2343
2344 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0);
2345 oc = req_char;
2346 if (caseless)
2347 {
2348 oc = TABLE_GET(req_char, common->fcc, req_char);
2349 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
2350 if (req_char > 127 && common->utf)
2351 oc = UCD_OTHERCASE(req_char);
2352 #endif
2353 }
2354 if (req_char == oc)
2355 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
2356 else
2357 {
2358 bit = req_char ^ oc;
2359 if (ispowerof2(bit))
2360 {
2361 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);
2362 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit);
2363 }
2364 else
2365 {
2366 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
2367 foundoc = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, oc);
2368 }
2369 }
2370 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
2371 JUMPTO(SLJIT_JUMP, loop);
2372
2373 JUMPHERE(found);
2374 if (foundoc)
2375 JUMPHERE(foundoc);
2376 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr, TMP1, 0);
2377 JUMPHERE(alreadyfound);
2378 JUMPHERE(toolong);
2379 return notfound;
2380 }
2381
2382 static void do_revertframes(compiler_common *common)
2383 {
2384 DEFINE_COMPILER;
2385 struct sljit_jump *jump;
2386 struct sljit_label *mainloop;
2387
2388 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2389 OP1(SLJIT_MOV, TMP1, 0, STACK_TOP, 0);
2390 GET_LOCAL_BASE(TMP3, 0, 0);
2391
2392 /* Drop frames until we reach STACK_TOP. */
2393 mainloop = LABEL();
2394 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), 0);
2395 jump = CMP(SLJIT_C_SIG_LESS_EQUAL, TMP2, 0, SLJIT_IMM, frame_end);
2396 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
2397 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_w));
2398 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_w), SLJIT_MEM1(TMP1), 2 * sizeof(sljit_w));
2399 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_w));
2400 JUMPTO(SLJIT_JUMP, mainloop);
2401
2402 JUMPHERE(jump);
2403 jump = CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, frame_end);
2404 /* End of dropping frames. */
2405 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2406
2407 JUMPHERE(jump);
2408 jump = CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, frame_setstrbegin);
2409 /* Set string begin. */
2410 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), sizeof(sljit_w));
2411 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_w));
2412 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), TMP2, 0);
2413 JUMPTO(SLJIT_JUMP, mainloop);
2414
2415 JUMPHERE(jump);
2416 if (common->mark_ptr != 0)
2417 {
2418 jump = CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, frame_setmark);
2419 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), sizeof(sljit_w));
2420 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_w));
2421 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr, TMP2, 0);
2422 JUMPTO(SLJIT_JUMP, mainloop);
2423
2424 JUMPHERE(jump);
2425 }
2426
2427 /* Unknown command. */
2428 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_w));
2429 JUMPTO(SLJIT_JUMP, mainloop);
2430 }
2431
2432 static void check_wordboundary(compiler_common *common)
2433 {
2434 DEFINE_COMPILER;
2435 struct sljit_jump *skipread;
2436 #if !(defined COMPILE_PCRE8) || defined SUPPORT_UTF
2437 struct sljit_jump *jump;
2438 #endif
2439
2440 SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16);
2441
2442 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
2443 /* Get type of the previous char, and put it to LOCALS1. */
2444 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
2445 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
2446 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, 0);
2447 skipread = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
2448 skip_char_back(common);
2449 check_start_used_ptr(common);
2450 read_char(common);
2451
2452 /* Testing char type. */
2453 #ifdef SUPPORT_UCP
2454 if (common->use_ucp)
2455 {
2456 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
2457 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
2458 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
2459 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
2460 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
2461 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);
2462 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
2463 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
2464 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_LESS_EQUAL);
2465 JUMPHERE(jump);
2466 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP2, 0);
2467 }
2468 else
2469 #endif
2470 {
2471 #ifndef COMPILE_PCRE8
2472 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
2473 #elif defined SUPPORT_UTF
2474 /* Here LOCALS1 has already been zeroed. */
2475 jump = NULL;
2476 if (common->utf)
2477 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
2478 #endif /* COMPILE_PCRE8 */
2479 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);
2480 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */);
2481 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2482 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);
2483 #ifndef COMPILE_PCRE8
2484 JUMPHERE(jump);
2485 #elif defined SUPPORT_UTF
2486 if (jump != NULL)
2487 JUMPHERE(jump);
2488 #endif /* COMPILE_PCRE8 */
2489 }
2490 JUMPHERE(skipread);
2491
2492 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
2493 skipread = check_str_end(common);
2494 peek_char(common);
2495
2496 /* Testing char type. This is a code duplication. */
2497 #ifdef SUPPORT_UCP
2498 if (common->use_ucp)
2499 {
2500 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
2501 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
2502 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
2503 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
2504 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
2505 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);
2506 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
2507 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
2508 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_LESS_EQUAL);
2509 JUMPHERE(jump);
2510 }
2511 else
2512 #endif
2513 {
2514 #ifndef COMPILE_PCRE8
2515 /* TMP2 may be destroyed by peek_char. */
2516 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
2517 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
2518 #elif defined SUPPORT_UTF
2519 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
2520 jump = NULL;
2521 if (common->utf)
2522 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
2523 #endif
2524 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes);
2525 OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */);
2526 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
2527 #ifndef COMPILE_PCRE8
2528 JUMPHERE(jump);
2529 #elif defined SUPPORT_UTF
2530 if (jump != NULL)
2531 JUMPHERE(jump);
2532 #endif /* COMPILE_PCRE8 */
2533 }
2534 JUMPHERE(skipread);
2535
2536 OP2(SLJIT_XOR | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
2537 sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
2538 }
2539
2540 static void check_anynewline(compiler_common *common)
2541 {
2542 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
2543 DEFINE_COMPILER;
2544
2545 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2546
2547 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
2548 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
2549 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);
2550 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
2551 #if defined SUPPORT_UTF || defined COMPILE_PCRE16
2552 #ifdef COMPILE_PCRE8
2553 if (common->utf)
2554 {
2555 #endif
2556 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
2557 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
2558 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
2559 #ifdef COMPILE_PCRE8
2560 }
2561 #endif
2562 #endif /* SUPPORT_UTF || COMPILE_PCRE16 */
2563 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
2564 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2565 }
2566
2567 static void check_hspace(compiler_common *common)
2568 {
2569 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
2570 DEFINE_COMPILER;
2571
2572 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2573
2574 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x09);
2575 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
2576 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
2577 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
2578 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xa0);
2579 #if defined SUPPORT_UTF || defined COMPILE_PCRE16
2580 #ifdef COMPILE_PCRE8
2581 if (common->utf)
2582 {
2583 #endif
2584 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
2585 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x1680);
2586 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
2587 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e);
2588 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
2589 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000);
2590 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000);
2591 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_LESS_EQUAL);
2592 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000);
2593 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
2594 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000);
2595 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
2596 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000);
2597 #ifdef COMPILE_PCRE8
2598 }
2599 #endif
2600 #endif /* SUPPORT_UTF || COMPILE_PCRE16 */
2601 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
2602
2603 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2604 }
2605
2606 static void check_vspace(compiler_common *common)
2607 {
2608 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
2609 DEFINE_COMPILER;
2610
2611 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2612
2613 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
2614 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
2615 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);
2616 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
2617 #if defined SUPPORT_UTF || defined COMPILE_PCRE16
2618 #ifdef COMPILE_PCRE8
2619 if (common->utf)
2620 {
2621 #endif
2622 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
2623 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
2624 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
2625 #ifdef COMPILE_PCRE8
2626 }
2627 #endif
2628 #endif /* SUPPORT_UTF || COMPILE_PCRE16 */
2629 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
2630
2631 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2632 }
2633
2634 #define CHAR1 STR_END
2635 #define CHAR2 STACK_TOP
2636
2637 static void do_casefulcmp(compiler_common *common)
2638 {
2639 DEFINE_COMPILER;
2640 struct sljit_jump *jump;
2641 struct sljit_label *label;
2642
2643 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2644 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2645 OP1(SLJIT_MOV, TMP3, 0, CHAR1, 0);
2646 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR2, 0);
2647 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
2648 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2649
2650 label = LABEL();
2651 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
2652 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2653 jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
2654 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
2655 JUMPTO(SLJIT_C_NOT_ZERO, label);
2656
2657 JUMPHERE(jump);
2658 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2659 OP1(SLJIT_MOV, CHAR1, 0, TMP3, 0);
2660 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
2661 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2662 }
2663
2664 #define LCC_TABLE STACK_LIMIT
2665
2666 static void do_caselesscmp(compiler_common *common)
2667 {
2668 DEFINE_COMPILER;
2669 struct sljit_jump *jump;
2670 struct sljit_label *label;
2671
2672 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2673 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2674
2675 OP1(SLJIT_MOV, TMP3, 0, LCC_TABLE, 0);
2676 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR1, 0);
2677 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, CHAR2, 0);
2678 OP1(SLJIT_MOV, LCC_TABLE, 0, SLJIT_IMM, common->lcc);
2679 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
2680 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2681
2682 label = LABEL();
2683 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
2684 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2685 #ifndef COMPILE_PCRE8
2686 jump = CMP(SLJIT_C_GREATER, CHAR1, 0, SLJIT_IMM, 255);
2687 #endif
2688 OP1(SLJIT_MOV_UB, CHAR1, 0, SLJIT_MEM2(LCC_TABLE, CHAR1), 0);
2689 #ifndef COMPILE_PCRE8
2690 JUMPHERE(jump);
2691 jump = CMP(SLJIT_C_GREATER, CHAR2, 0, SLJIT_IMM, 255);
2692 #endif
2693 OP1(SLJIT_MOV_UB, CHAR2, 0, SLJIT_MEM2(LCC_TABLE, CHAR2), 0);
2694 #ifndef COMPILE_PCRE8
2695 JUMPHERE(jump);
2696 #endif
2697 jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
2698 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
2699 JUMPTO(SLJIT_C_NOT_ZERO, label);
2700
2701 JUMPHERE(jump);
2702 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2703 OP1(SLJIT_MOV, LCC_TABLE, 0, TMP3, 0);
2704 OP1(SLJIT_MOV, CHAR1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
2705 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
2706 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2707 }
2708
2709 #undef LCC_TABLE
2710 #undef CHAR1
2711 #undef CHAR2
2712
2713 #if defined SUPPORT_UTF && defined SUPPORT_UCP
2714
2715 static const pcre_uchar *SLJIT_CALL do_utf_caselesscmp(pcre_uchar *src1, jit_arguments *args, pcre_uchar *end1)
2716 {
2717 /* This function would be ineffective to do in JIT level. */
2718 int c1, c2;
2719 const pcre_uchar *src2 = args->uchar_ptr;
2720 const pcre_uchar *end2 = args->end;
2721
2722 while (src1 < end1)
2723 {
2724 if (src2 >= end2)
2725 return (pcre_uchar*)1;
2726 GETCHARINC(c1, src1);
2727 GETCHARINC(c2, src2);
2728 if (c1 != c2 && c1 != UCD_OTHERCASE(c2)) return NULL;
2729 }
2730 return src2;
2731 }
2732
2733 #endif /* SUPPORT_UTF && SUPPORT_UCP */
2734
2735 static pcre_uchar *byte_sequence_compare(compiler_common *common, BOOL caseless, pcre_uchar *cc,
2736 compare_context* context, jump_list **backtracks)
2737 {
2738 DEFINE_COMPILER;
2739 unsigned int othercasebit = 0;
2740 pcre_uchar *othercasechar = NULL;
2741 #ifdef SUPPORT_UTF
2742 int utflength;
2743 #endif
2744
2745 if (caseless && char_has_othercase(common, cc))
2746 {
2747 othercasebit = char_get_othercase_bit(common, cc);
2748 SLJIT_ASSERT(othercasebit);
2749 /* Extracting bit difference info. */
2750 #ifdef COMPILE_PCRE8
2751 othercasechar = cc + (othercasebit >> 8);
2752 othercasebit &= 0xff;
2753 #else
2754 #ifdef COMPILE_PCRE16
2755 othercasechar = cc + (othercasebit >> 9);
2756 if ((othercasebit & 0x100) != 0)
2757 othercasebit = (othercasebit & 0xff) << 8;
2758 else
2759 othercasebit &= 0xff;
2760 #endif
2761 #endif
2762 }
2763
2764 if (context->sourcereg == -1)
2765 {
2766 #ifdef COMPILE_PCRE8
2767 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
2768 if (context->length >= 4)
2769 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
2770 else if (context->length >= 2)
2771 OP1(SLJIT_MOV_UH, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
2772 else
2773 #endif
2774 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
2775 #else
2776 #ifdef COMPILE_PCRE16
2777 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
2778 if (context->length >= 4)
2779 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
2780 else
2781 #endif
2782 OP1(SLJIT_MOV_UH, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
2783 #endif
2784 #endif /* COMPILE_PCRE8 */
2785 context->sourcereg = TMP2;
2786 }
2787
2788 #ifdef SUPPORT_UTF
2789 utflength = 1;
2790 if (common->utf && HAS_EXTRALEN(*cc))
2791 utflength += GET_EXTRALEN(*cc);
2792
2793 do
2794 {
2795 #endif
2796
2797 context->length -= IN_UCHARS(1);
2798 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
2799
2800 /* Unaligned read is supported. */
2801 if (othercasebit != 0 && othercasechar == cc)
2802 {
2803 context->c.asuchars[context->ucharptr] = *cc | othercasebit;
2804 context->oc.asuchars[context->ucharptr] = othercasebit;
2805 }
2806 else
2807 {
2808 context->c.asuchars[context->ucharptr] = *cc;
2809 context->oc.asuchars[context->ucharptr] = 0;
2810 }
2811 context->ucharptr++;
2812
2813 #ifdef COMPILE_PCRE8
2814 if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1))
2815 #else
2816 if (context->ucharptr >= 2 || context->length == 0)
2817 #endif
2818 {
2819 if (context->length >= 4)
2820 OP1(SLJIT_MOV_SI, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
2821 #ifdef COMPILE_PCRE8
2822 else if (context->length >= 2)
2823 OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
2824 else if (context->length >= 1)
2825 OP1(SLJIT_MOV_UB, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
2826 #else
2827 else if (context->length >= 2)
2828 OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
2829 #endif
2830 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
2831
2832 switch(context->ucharptr)
2833 {
2834 case 4 / sizeof(pcre_uchar):
2835 if (context->oc.asint != 0)
2836 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint);
2837 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint));
2838 break;
2839
2840 case 2 / sizeof(pcre_uchar):
2841 if (context->oc.asushort != 0)
2842 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort);
2843 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort));
2844 break;
2845
2846 #ifdef COMPILE_PCRE8
2847 case 1:
2848 if (context->oc.asbyte != 0)
2849 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte);
2850 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte));
2851 break;
2852 #endif
2853
2854 default:
2855 SLJIT_ASSERT_STOP();
2856 break;
2857 }
2858 context->ucharptr = 0;
2859 }
2860
2861 #else
2862
2863 /* Unaligned read is unsupported. */
2864 #ifdef COMPILE_PCRE8
2865 if (context->length > 0)
2866 OP1(SLJIT_MOV_UB, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
2867 #else
2868 if (context->length > 0)
2869 OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
2870 #endif
2871 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
2872
2873 if (othercasebit != 0 && othercasechar == cc)
2874 {
2875 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit);
2876 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit));
2877 }
2878 else
2879 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc));
2880
2881 #endif
2882
2883 cc++;
2884 #ifdef SUPPORT_UTF
2885 utflength--;
2886 }
2887 while (utflength > 0);
2888 #endif
2889
2890 return cc;
2891 }
2892
2893 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
2894
2895 #define SET_TYPE_OFFSET(value) \
2896 if ((value) != typeoffset) \
2897 { \
2898 if ((value) > typeoffset) \
2899 OP2(SLJIT_SUB, typereg, 0, typereg, 0, SLJIT_IMM, (value) - typeoffset); \
2900 else \
2901 OP2(SLJIT_ADD, typereg, 0, typereg, 0, SLJIT_IMM, typeoffset - (value)); \
2902 } \
2903 typeoffset = (value);
2904
2905 #define SET_CHAR_OFFSET(value) \
2906 if ((value) != charoffset) \
2907 { \
2908 if ((value) > charoffset) \
2909 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (value) - charoffset); \
2910 else \
2911 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, charoffset - (value)); \
2912 } \
2913 charoffset = (value);
2914
2915 static void compile_xclass_trypath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
2916 {
2917 DEFINE_COMPILER;
2918 jump_list *found = NULL;
2919 jump_list **list = (*cc & XCL_NOT) == 0 ? &found : backtracks;
2920 unsigned int c;
2921 int compares;
2922 struct sljit_jump *jump = NULL;
2923 pcre_uchar *ccbegin;
2924 #ifdef SUPPORT_UCP
2925 BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE;
2926 BOOL charsaved = FALSE;
2927 int typereg = TMP1, scriptreg = TMP1;
2928 unsigned int typeoffset;
2929 #endif
2930 int invertcmp, numberofcmps;
2931 unsigned int charoffset;
2932
2933 /* Although SUPPORT_UTF must be defined, we are not necessary in utf mode. */
2934 detect_partial_match(common, backtracks);
2935 read_char(common);
2936
2937 if ((*cc++ & XCL_MAP) != 0)
2938 {
2939 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
2940 #ifndef COMPILE_PCRE8
2941 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
2942 #elif defined SUPPORT_UTF
2943 if (common->utf)
2944 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
2945 #endif
2946
2947 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
2948 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
2949 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)cc);
2950 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
2951 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
2952 add_jump(compiler, list, JUMP(SLJIT_C_NOT_ZERO));
2953
2954 #ifndef COMPILE_PCRE8
2955 JUMPHERE(jump);
2956 #elif defined SUPPORT_UTF
2957 if (common->utf)
2958 JUMPHERE(jump);
2959 #endif
2960 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
2961 #ifdef SUPPORT_UCP
2962 charsaved = TRUE;
2963 #endif
2964 cc += 32 / sizeof(pcre_uchar);
2965 }
2966
2967 /* Scanning the necessary info. */
2968 ccbegin = cc;
2969 compares = 0;
2970 while (*cc != XCL_END)
2971 {
2972 compares++;
2973 if (*cc == XCL_SINGLE)
2974 {
2975 cc += 2;
2976 #ifdef SUPPORT_UTF
2977 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2978 #endif
2979 #ifdef SUPPORT_UCP
2980 needschar = TRUE;
2981 #endif
2982 }
2983 else if (*cc == XCL_RANGE)
2984 {
2985 cc += 2;
2986 #ifdef SUPPORT_UTF
2987 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2988 #endif
2989 cc++;
2990 #ifdef SUPPORT_UTF
2991 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2992 #endif
2993 #ifdef SUPPORT_UCP
2994 needschar = TRUE;
2995 #endif
2996 }
2997 #ifdef SUPPORT_UCP
2998 else
2999 {
3000 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
3001 cc++;
3002 switch(*cc)
3003 {
3004 case PT_ANY:
3005 break;
3006
3007 case PT_LAMP:
3008 case PT_GC:
3009 case PT_PC:
3010 case PT_ALNUM:
3011 needstype = TRUE;
3012 break;
3013
3014 case PT_SC:
3015 needsscript = TRUE;
3016 break;
3017
3018 case PT_SPACE:
3019 case PT_PXSPACE:
3020 case PT_WORD:
3021 needstype = TRUE;
3022 needschar = TRUE;
3023 break;
3024
3025 default:
3026 SLJIT_ASSERT_STOP();
3027 break;
3028 }
3029 cc += 2;
3030 }
3031 #endif
3032 }
3033
3034 #ifdef SUPPORT_UCP
3035 /* Simple register allocation. TMP1 is preferred if possible. */
3036 if (needstype || needsscript)
3037 {
3038 if (needschar && !charsaved)
3039 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
3040 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
3041 if (needschar)
3042 {
3043 if (needstype)
3044 {
3045 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
3046 typereg = RETURN_ADDR;
3047 }
3048
3049 if (needsscript)
3050 scriptreg = TMP3;
3051 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
3052 }
3053 else if (needstype && needsscript)
3054 scriptreg = TMP3;
3055 /* In all other cases only one of them was specified, and that can goes to TMP1. */
3056
3057 if (needsscript)
3058 {
3059 if (scriptreg == TMP1)
3060 {
3061 OP1(SLJIT_MOV, scriptreg, 0, SLJIT_IMM, (sljit_w)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
3062 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM2(scriptreg, TMP2), 3);
3063 }
3064 else
3065 {
3066 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
3067 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_w)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
3068 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM1(TMP2), 0);
3069 }
3070 }
3071 }
3072 #endif
3073
3074 /* Generating code. */
3075 cc = ccbegin;
3076 charoffset = 0;
3077 numberofcmps = 0;
3078 #ifdef SUPPORT_UCP
3079 typeoffset = 0;
3080 #endif
3081
3082 while (*cc != XCL_END)
3083 {
3084 compares--;
3085 invertcmp = (compares == 0 && list != backtracks);
3086 jump = NULL;
3087
3088 if (*cc == XCL_SINGLE)
3089 {
3090 cc ++;
3091 #ifdef SUPPORT_UTF
3092 if (common->utf)
3093 {
3094 GETCHARINC(c, cc);
3095 }
3096 else
3097 #endif
3098 c = *cc++;
3099
3100 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
3101 {
3102 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
3103 COND_VALUE(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
3104 numberofcmps++;
3105 }
3106 else if (numberofcmps > 0)
3107 {
3108 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
3109 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
3110 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
3111 numberofcmps = 0;
3112 }
3113 else
3114 {
3115 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, c - charoffset);
3116 numberofcmps = 0;
3117 }
3118 }
3119 else if (*cc == XCL_RANGE)
3120 {
3121 cc ++;
3122 #ifdef SUPPORT_UTF
3123 if (common->utf)
3124 {
3125 GETCHARINC(c, cc);
3126 }
3127 else
3128 #endif
3129 c = *cc++;
3130 SET_CHAR_OFFSET(c);
3131 #ifdef SUPPORT_UTF
3132 if (common->utf)
3133 {
3134 GETCHARINC(c, cc);
3135 }
3136 else
3137 #endif
3138 c = *cc++;
3139 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
3140 {
3141 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
3142 COND_VALUE(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_C_LESS_EQUAL);
3143 numberofcmps++;
3144 }
3145 else if (numberofcmps > 0)
3146 {
3147 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
3148 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_LESS_EQUAL);
3149 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
3150 numberofcmps = 0;
3151 }
3152 else
3153 {
3154 jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, c - charoffset);
3155 numberofcmps = 0;
3156 }
3157 }
3158 #ifdef SUPPORT_UCP
3159 else
3160 {
3161 if (*cc == XCL_NOTPROP)
3162 invertcmp ^= 0x1;
3163 cc++;
3164 switch(*cc)
3165 {
3166 case PT_ANY:
3167 if (list != backtracks)
3168 {
3169 if ((cc[-1] == XCL_NOTPROP && compares > 0) || (cc[-1] == XCL_PROP && compares == 0))
3170 continue;
3171 }
3172 else if (cc[-1] == XCL_NOTPROP)
3173 continue;
3174 jump = JUMP(SLJIT_JUMP);
3175 break;
3176
3177 case PT_LAMP:
3178 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - typeoffset);
3179 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
3180 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ll - typeoffset);
3181 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
3182 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lt - typeoffset);
3183 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
3184 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
3185 break;
3186
3187 case PT_GC:
3188 c = PRIV(ucp_typerange)[(int)cc[1] * 2];
3189 SET_TYPE_OFFSET(c);
3190 jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, PRIV(ucp_typerange)[(int)cc[1] * 2 + 1] - c);
3191 break;
3192
3193 case PT_PC:
3194 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, (int)cc[1] - typeoffset);
3195 break;
3196
3197 case PT_SC:
3198 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, scriptreg, 0, SLJIT_IMM, (int)cc[1]);
3199 break;
3200
3201 case PT_SPACE:
3202 case PT_PXSPACE:
3203 if (*cc == PT_SPACE)
3204 {
3205 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3206 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 11 - charoffset);
3207 }
3208 SET_CHAR_OFFSET(9);
3209 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 13 - 9);
3210 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);
3211 if (*cc == PT_SPACE)
3212 JUMPHERE(jump);
3213
3214 SET_TYPE_OFFSET(ucp_Zl);
3215 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl);
3216 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_LESS_EQUAL);
3217 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
3218 break;
3219
3220 case PT_WORD:
3221 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE - charoffset);
3222 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
3223 /* ... fall through */
3224
3225 case PT_ALNUM:
3226 SET_TYPE_OFFSET(ucp_Ll);
3227 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
3228 COND_VALUE((*cc == PT_ALNUM) ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_C_LESS_EQUAL);
3229 SET_TYPE_OFFSET(ucp_Nd);
3230 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_No - ucp_Nd);
3231 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_LESS_EQUAL);
3232 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
3233 break;
3234 }
3235 cc += 2;
3236 }
3237 #endif
3238
3239 if (jump != NULL)
3240 add_jump(compiler, compares > 0 ? list : backtracks, jump);
3241 }
3242
3243 if (found != NULL)
3244 set_jumps(found, LABEL());
3245 }
3246
3247 #undef SET_TYPE_OFFSET
3248 #undef SET_CHAR_OFFSET
3249
3250 #endif
3251
3252 static pcre_uchar *compile_char1_trypath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **backtracks)
3253 {
3254 DEFINE_COMPILER;
3255 int length;
3256 unsigned int c, oc, bit;
3257 compare_context context;
3258 struct sljit_jump *jump[4];
3259 #ifdef SUPPORT_UTF
3260 struct sljit_label *label;
3261 #ifdef SUPPORT_UCP
3262 pcre_uchar propdata[5];
3263 #endif
3264 #endif
3265
3266 switch(type)
3267 {
3268 case OP_SOD:
3269 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3270 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
3271 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
3272 return cc;
3273
3274 case OP_SOM:
3275 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3276 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
3277 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
3278 return cc;
3279
3280 case OP_NOT_WORD_BOUNDARY:
3281 case OP_WORD_BOUNDARY:
3282 add_jump(compiler, &common->wordboundary, JUMP(SLJIT_FAST_CALL));
3283 add_jump(compiler, backtracks, JUMP(type == OP_NOT_WORD_BOUNDARY ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
3284 return cc;
3285
3286 case OP_NOT_DIGIT:
3287 case OP_DIGIT:
3288 detect_partial_match(common, backtracks);
3289 read_char8_type(common);
3290 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit);
3291 add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
3292 return cc;
3293
3294 case OP_NOT_WHITESPACE:
3295 case OP_WHITESPACE:
3296 detect_partial_match(common, backtracks);
3297 read_char8_type(common);
3298 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space);
3299 add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
3300 return cc;
3301
3302 case OP_NOT_WORDCHAR:
3303 case OP_WORDCHAR:
3304 detect_partial_match(common, backtracks);
3305 read_char8_type(common);
3306 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word);
3307 add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
3308 return cc;
3309
3310 case OP_ANY:
3311 detect_partial_match(common, backtracks);
3312 read_char(common);
3313 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3314 {
3315 jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
3316 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
3317 jump[1] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3318 else
3319 jump[1] = check_str_end(common);
3320
3321 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3322 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff));
3323 if (jump[1] != NULL)
3324 JUMPHERE(jump[1]);
3325 JUMPHERE(jump[0]);
3326 }
3327 else
3328 check_newlinechar(common, common->nltype, backtracks, TRUE);
3329 return cc;
3330
3331 case OP_ALLANY:
3332 detect_partial_match(common, backtracks);
3333 #ifdef SUPPORT_UTF
3334 if (common->utf)
3335 {
3336 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3337 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3338 #ifdef COMPILE_PCRE8
3339 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
3340 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0);
3341 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3342 #else /* COMPILE_PCRE8 */
3343 #ifdef COMPILE_PCRE16
3344 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
3345 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
3346 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3347 COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
3348 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3349 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3350 #endif /* COMPILE_PCRE16 */
3351 #endif /* COMPILE_PCRE8 */
3352 JUMPHERE(jump[0]);
3353 return cc;
3354 }
3355 #endif
3356 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3357 return cc;
3358
3359 case OP_ANYBYTE:
3360 detect_partial_match(common, backtracks);
3361 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3362 return cc;
3363
3364 #ifdef SUPPORT_UTF
3365 #ifdef SUPPORT_UCP
3366 case OP_NOTPROP:
3367 case OP_PROP:
3368 propdata[0] = 0;
3369 propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP;
3370 propdata[2] = cc[0];
3371 propdata[3] = cc[1];
3372 propdata[4] = XCL_END;
3373 compile_xclass_trypath(common, propdata, backtracks);
3374 return cc + 2;
3375 #endif
3376 #endif
3377
3378 case OP_ANYNL:
3379 detect_partial_match(common, backtracks);
3380 read_char(common);
3381 jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
3382 /* We don't need to handle soft partial matching case. */
3383 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
3384 jump[1] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3385 else
3386 jump[1] = check_str_end(common);
3387 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3388 jump[2] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
3389 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3390 jump[3] = JUMP(SLJIT_JUMP);
3391 JUMPHERE(jump[0]);
3392 check_newlinechar(common, common->bsr_nltype, backtracks, FALSE);
3393 JUMPHERE(jump[1]);
3394 JUMPHERE(jump[2]);
3395 JUMPHERE(jump[3]);
3396 return cc;
3397
3398 case OP_NOT_HSPACE:
3399 case OP_HSPACE:
3400 detect_partial_match(common, backtracks);
3401 read_char(common);
3402 add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));
3403 add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
3404 return cc;
3405
3406 case OP_NOT_VSPACE:
3407 case OP_VSPACE:
3408 detect_partial_match(common, backtracks);
3409 read_char(common);
3410 add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));
3411 add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
3412 return cc;
3413
3414 #ifdef SUPPORT_UCP
3415 case OP_EXTUNI:
3416 detect_partial_match(common, backtracks);
3417 read_char(common);
3418 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
3419 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Mc);
3420 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS_EQUAL, TMP1, 0, SLJIT_IMM, ucp_Mn - ucp_Mc));
3421
3422 label = LABEL();
3423 jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3424 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
3425 read_char(common);
3426 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
3427 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Mc);
3428 CMPTO(SLJIT_C_LESS_EQUAL, TMP1, 0, SLJIT_IMM, ucp_Mn - ucp_Mc, label);
3429
3430 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
3431 JUMPHERE(jump[0]);
3432 if (common->mode == JIT_PARTIAL_HARD_COMPILE)
3433 {
3434 jump[0] = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
3435 /* Since we successfully read a char above, partial matching must occure. */
3436 check_partial(common, TRUE);
3437 JUMPHERE(jump[0]);
3438 }
3439 return cc;
3440 #endif
3441
3442 case OP_EODN:
3443 /* Requires rather complex checks. */
3444 jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3445 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3446 {
3447 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
3448 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3449 if (common->mode == JIT_COMPILE)
3450 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0));
3451 else
3452 {
3453 jump[1] = CMP(SLJIT_C_EQUAL, TMP2, 0, STR_END, 0);
3454 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
3455 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS);
3456 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
3457 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_NOT_EQUAL);
3458 add_jump(compiler, backtracks, JUMP(SLJIT_C_NOT_EQUAL));
3459 check_partial(common, TRUE);
3460 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
3461 JUMPHERE(jump[1]);
3462 }
3463 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3464 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
3465 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
3466 }
3467 else if (common->nltype == NLTYPE_FIXED)
3468 {
3469 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3470 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3471 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0));
3472 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
3473 }
3474 else
3475 {
3476 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3477 jump[1] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
3478 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
3479 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
3480 jump[2] = JUMP(SLJIT_C_GREATER);
3481 add_jump(compiler, backtracks, JUMP(SLJIT_C_LESS));
3482 /* Equal. */
3483 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3484 jump[3] = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
3485 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
3486
3487 JUMPHERE(jump[1]);
3488 if (common->nltype == NLTYPE_ANYCRLF)
3489 {
3490 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3491 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP2, 0, STR_END, 0));
3492 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
3493 }
3494 else
3495 {
3496 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, STR_PTR, 0);
3497 read_char(common);
3498 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
3499 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
3500 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
3501 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
3502 }
3503 JUMPHERE(jump[2]);
3504 JUMPHERE(jump[3]);
3505 }
3506 JUMPHERE(jump[0]);
3507 check_partial(common, FALSE);
3508 return cc;
3509
3510 case OP_EOD:
3511 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0));
3512 check_partial(common, FALSE);
3513 return cc;
3514
3515 case OP_CIRC:
3516 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
3517 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
3518 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0));
3519 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
3520 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
3521 return cc;
3522
3523 case OP_CIRCM:
3524 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
3525 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
3526 jump[1] = CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0);
3527 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
3528 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
3529 jump[0] = JUMP(SLJIT_JUMP);
3530 JUMPHERE(jump[1]);
3531
3532 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
3533 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3534 {
3535 OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
3536 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP2, 0, TMP1, 0));
3537 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
3538 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
3539 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
3540 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
3541 }
3542 else
3543 {
3544 skip_char_back(common);
3545 read_char(common);
3546 check_newlinechar(common, common->nltype, backtracks, FALSE);
3547 }
3548 JUMPHERE(jump[0]);
3549 return cc;
3550
3551 case OP_DOLL:
3552 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
3553 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
3554 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
3555
3556 if (!common->endonly)
3557 compile_char1_trypath(common, OP_EODN, cc, backtracks);
3558 else
3559 {
3560 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0));
3561 check_partial(common, FALSE);
3562 }
3563 return cc;
3564
3565 case OP_DOLLM:
3566 jump[1] = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
3567 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
3568 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
3569 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
3570 check_partial(common, FALSE);
3571 jump[0] = JUMP(SLJIT_JUMP);
3572 JUMPHERE(jump[1]);
3573
3574 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3575 {
3576 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
3577 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3578 if (common->mode == JIT_COMPILE)
3579 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, TMP2, 0, STR_END, 0));
3580 else
3581 {
3582 jump[1] = CMP(SLJIT_C_LESS_EQUAL, TMP2, 0, STR_END, 0);
3583 /* STR_PTR = STR_END - IN_UCHARS(1) */
3584 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
3585 check_partial(common, TRUE);
3586 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
3587 JUMPHERE(jump[1]);
3588 }
3589
3590 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3591 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
3592 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
3593 }
3594 else
3595 {
3596 peek_char(common);
3597 check_newlinechar(common, common->nltype, backtracks, FALSE);
3598 }
3599 JUMPHERE(jump[0]);
3600 return cc;
3601
3602 case OP_CHAR:
3603 case OP_CHARI:
3604 length = 1;
3605 #ifdef SUPPORT_UTF
3606 if (common->utf && HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc);
3607 #endif
3608 if (common->mode == JIT_COMPILE && (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0))
3609 {
3610 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
3611 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
3612
3613 context.length = IN_UCHARS(length);
3614 context.sourcereg = -1;
3615 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
3616 context.ucharptr = 0;
3617 #endif
3618 return byte_sequence_compare(common, type == OP_CHARI, cc, &context, backtracks);
3619 }
3620 detect_partial_match(common, backtracks);
3621 read_char(common);
3622 #ifdef SUPPORT_UTF
3623 if (common->utf)
3624 {
3625 GETCHAR(c, cc);
3626 }
3627 else
3628 #endif
3629 c = *cc;
3630 if (type == OP_CHAR || !char_has_othercase(common, cc))
3631 {
3632 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c));
3633 return cc + length;
3634 }
3635 oc = char_othercase(common, c);
3636 bit = c ^ oc;
3637 if (ispowerof2(bit))
3638 {
3639 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
3640 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
3641 return cc + length;
3642 }
3643 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c);
3644 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
3645 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, char_othercase(common, c));
3646 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
3647 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
3648 return cc + length;
3649
3650 case OP_NOT:
3651 case OP_NOTI:
3652 detect_partial_match(common, backtracks);
3653 length = 1;
3654 #ifdef SUPPORT_UTF
3655 if (common->utf)
3656 {
3657 #ifdef COMPILE_PCRE8
3658 c = *cc;
3659 if (c < 128)
3660 {
3661 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3662 if (type == OP_NOT || !char_has_othercase(common, cc))
3663 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
3664 else
3665 {
3666 /* Since UTF8 code page is fixed, we know that c is in [a-z] or [A-Z] range. */
3667 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x20);
3668 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20));
3669 }
3670 /* Skip the variable-length character. */
3671 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3672 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
3673 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0);
3674 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3675 JUMPHERE(jump[0]);
3676 return cc + 1;
3677 }
3678 else
3679 #endif /* COMPILE_PCRE8 */
3680 {
3681 GETCHARLEN(c, cc, length);
3682 read_char(common);
3683 }
3684 }
3685 else
3686 #endif /* SUPPORT_UTF */
3687 {
3688 read_char(common);
3689 c = *cc;
3690 }
3691
3692 if (type == OP_NOT || !char_has_othercase(common, cc))
3693 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
3694 else
3695 {
3696 oc = char_othercase(common, c);
3697 bit = c ^ oc;
3698 if (ispowerof2(bit))
3699 {
3700 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
3701 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
3702 }
3703 else
3704 {
3705 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
3706 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, oc));
3707 }
3708 }
3709 return cc + length;
3710
3711 case OP_CLASS:
3712 case OP_NCLASS:
3713 detect_partial_match(common, backtracks);
3714 read_char(common);
3715 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3716 jump[0] = NULL;
3717 #ifdef COMPILE_PCRE8
3718 /* This check only affects 8 bit mode. In other modes, we
3719 always need to compare the value with 255. */
3720 if (common->utf)
3721 #endif /* COMPILE_PCRE8 */
3722 {
3723 jump[0] = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3724 if (type == OP_CLASS)
3725 {
3726 add_jump(compiler, backtracks, jump[0]);
3727 jump[0] = NULL;
3728 }
3729 }
3730 #endif /* SUPPORT_UTF || !COMPILE_PCRE8 */
3731 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
3732 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
3733 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)cc);
3734 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
3735 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
3736 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
3737 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3738 if (jump[0] != NULL)
3739 JUMPHERE(jump[0]);
3740 #endif /* SUPPORT_UTF || !COMPILE_PCRE8 */
3741 return cc + 32 / sizeof(pcre_uchar);
3742
3743 #if defined SUPPORT_UTF || defined COMPILE_PCRE16
3744 case OP_XCLASS:
3745 compile_xclass_trypath(common, cc + LINK_SIZE, backtracks);
3746 return cc + GET(cc, 0) - 1;
3747 #endif
3748
3749 case OP_REVERSE:
3750 length = GET(cc, 0);
3751 if (length == 0)
3752 return cc + LINK_SIZE;
3753 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3754 #ifdef SUPPORT_UTF
3755 if (common->utf)
3756 {
3757 OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
3758 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, length);
3759 label = LABEL();
3760 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP3, 0));
3761 skip_char_back(common);
3762 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
3763 JUMPTO(SLJIT_C_NOT_ZERO, label);
3764 }
3765 else
3766 #endif
3767 {
3768 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
3769 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
3770 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, TMP1, 0));
3771 }
3772 check_start_used_ptr(common);
3773 return cc + LINK_SIZE;
3774 }
3775 SLJIT_ASSERT_STOP();
3776 return cc;
3777 }
3778
3779 static SLJIT_INLINE pcre_uchar *compile_charn_trypath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, jump_list **backtracks)
3780 {
3781 /* This function consumes at least one input character. */
3782 /* To decrease the number of length checks, we try to concatenate the fixed length character sequences. */
3783 DEFINE_COMPILER;
3784 pcre_uchar *ccbegin = cc;
3785 compare_context context;
3786 int size;
3787
3788 context.length = 0;
3789 do
3790 {
3791 if (cc >= ccend)
3792 break;
3793
3794 if (*cc == OP_CHAR)
3795 {
3796 size = 1;
3797 #ifdef SUPPORT_UTF
3798 if (common->utf && HAS_EXTRALEN(cc[1]))
3799 size += GET_EXTRALEN(cc[1]);
3800 #endif
3801 }
3802 else if (*cc == OP_CHARI)
3803 {
3804 size = 1;
3805 #ifdef SUPPORT_UTF
3806 if (common->utf)
3807 {
3808 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
3809 size = 0;
3810 else if (HAS_EXTRALEN(cc[1]))
3811 size += GET_EXTRALEN(cc[1]);
3812 }
3813 else
3814 #endif
3815 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
3816 size = 0;
3817 }
3818 else
3819 size = 0;
3820
3821 cc += 1 + size;
3822 context.length += IN_UCHARS(size);
3823 }
3824 while (size > 0 && context.length <= 128);
3825
3826 cc = ccbegin;
3827 if (context.length > 0)
3828 {
3829 /* We have a fixed-length byte sequence. */
3830 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, context.length);
3831 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
3832
3833 context.sourcereg = -1;
3834 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
3835 context.ucharptr = 0;
3836 #endif
3837 do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, backtracks); while (context.length > 0);
3838 return cc;
3839 }
3840
3841 /* A non-fixed length character will be checked if length == 0. */
3842 return compile_char1_trypath(common, *cc, cc + 1, backtracks);
3843 }
3844
3845 static struct sljit_jump *compile_ref_checks(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
3846 {
3847 DEFINE_COMPILER;
3848 int offset = GET2(cc, 1) << 1;
3849
3850 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
3851 if (!common->jscript_compat)
3852 {
3853 if (backtracks == NULL)
3854 {
3855 /* OVECTOR(1) contains the "string begin - 1" constant. */
3856 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
3857 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
3858 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
3859 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
3860 return JUMP(SLJIT_C_NOT_ZERO);
3861 }
3862 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
3863 }
3864 return CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
3865 }
3866
3867 /* Forward definitions. */
3868 static void compile_trypath(compiler_common *, pcre_uchar *, pcre_uchar *, backtrack_common *);
3869 static void compile_backtrackpath(compiler_common *, struct backtrack_common *);
3870
3871 #define PUSH_BACKTRACK(size, ccstart, error) \
3872 do \
3873 { \
3874 backtrack = sljit_alloc_memory(compiler, (size)); \
3875 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
3876 return error; \
3877 memset(backtrack, 0, size); \
3878 backtrack->prev = parent->top; \
3879 backtrack->cc = (ccstart); \
3880 parent->top = backtrack; \
3881 } \
3882 while (0)
3883
3884 #define PUSH_BACKTRACK_NOVALUE(size, ccstart) \
3885 do \
3886 { \
3887 backtrack = sljit_alloc_memory(compiler, (size)); \
3888 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
3889 return; \
3890 memset(backtrack, 0, size); \
3891 backtrack->prev = parent->top; \
3892 backtrack->cc = (ccstart); \
3893 parent->top = backtrack; \
3894 } \
3895 while (0)
3896
3897 #define BACKTRACK_AS(type) ((type *)backtrack)
3898
3899 static pcre_uchar *compile_ref_trypath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks, BOOL withchecks, BOOL emptyfail)
3900 {
3901 DEFINE_COMPILER;
3902 int offset = GET2(cc, 1) << 1;
3903 struct sljit_jump *jump = NULL;
3904 struct sljit_jump *partial;
3905 struct sljit_jump *nopartial;
3906
3907 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
3908 /* OVECTOR(1) contains the "string begin - 1" constant. */
3909 if (withchecks && !common->jscript_compat)
3910 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
3911
3912 #if defined SUPPORT_UTF && defined SUPPORT_UCP
3913 if (common->utf && *cc == OP_REFI)
3914 {
3915 SLJIT_ASSERT(TMP1 == SLJIT_TEMPORARY_REG1 && STACK_TOP == SLJIT_TEMPORARY_REG2 && TMP2 == SLJIT_TEMPORARY_REG3);
3916 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
3917 if (withchecks)
3918 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, TMP2, 0);
3919
3920 /* Needed to save important temporary registers. */
3921 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
3922 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG2, 0, ARGUMENTS, 0);
3923 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), SLJIT_OFFSETOF(jit_arguments, uchar_ptr), STR_PTR, 0);
3924 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_utf_caselesscmp));
3925 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3926 if (common->mode == JIT_COMPILE)
3927 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1));
3928 else
3929 {
3930 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
3931 nopartial = CMP(SLJIT_C_NOT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
3932 check_partial(common, FALSE);
3933 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
3934 JUMPHERE(nopartial);
3935 }
3936 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
3937 }
3938 else
3939 #endif /* SUPPORT_UTF && SUPPORT_UCP */
3940 {
3941 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP1, 0);
3942 if (withchecks)
3943 jump = JUMP(SLJIT_C_ZERO);
3944
3945 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3946 partial = CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0);
3947 if (common->mode == JIT_COMPILE)
3948 add_jump(compiler, backtracks, partial);
3949
3950 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
3951 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
3952
3953 if (common->mode != JIT_COMPILE)
3954 {
3955 nopartial = JUMP(SLJIT_JUMP);
3956 JUMPHERE(partial);
3957 /* TMP2 -= STR_END - STR_PTR */
3958 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, STR_PTR, 0);
3959 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, STR_END, 0);
3960 partial = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0);
3961 OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
3962 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
3963 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
3964 JUMPHERE(partial);
3965 check_partial(common, FALSE);
3966 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
3967 JUMPHERE(nopartial);
3968 }
3969 }
3970
3971 if (jump != NULL)
3972 {
3973 if (emptyfail)
3974 add_jump(compiler, backtracks, jump);
3975 else
3976 JUMPHERE(jump);
3977 }
3978 return cc + 1 + IMM2_SIZE;
3979 }
3980
3981 static SLJIT_INLINE pcre_uchar *compile_ref_iterator_trypath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
3982 {
3983 DEFINE_COMPILER;
3984 backtrack_common *backtrack;
3985 pcre_uchar type;
3986 struct sljit_label *label;
3987 struct sljit_jump *zerolength;
3988 struct sljit_jump *jump = NULL;
3989 pcre_uchar *ccbegin = cc;
3990 int min = 0, max = 0;
3991 BOOL minimize;
3992
3993 PUSH_BACKTRACK(sizeof(iterator_backtrack), cc, NULL);
3994
3995 type = cc[1 + IMM2_SIZE];
3996 minimize = (type & 0x1) != 0;
3997 switch(type)
3998 {
3999 case OP_CRSTAR:
4000 case OP_CRMINSTAR:
4001 min = 0;
4002 max = 0;
4003 cc += 1 + IMM2_SIZE + 1;
4004 break;
4005 case OP_CRPLUS:
4006 case OP_CRMINPLUS:
4007 min = 1;
4008 max = 0;
4009 cc += 1 + IMM2_SIZE + 1;
4010 break;
4011 case OP_CRQUERY:
4012 case OP_CRMINQUERY:
4013 min = 0;
4014 max = 1;
4015 cc += 1 + IMM2_SIZE + 1;
4016 break;
4017 case OP_CRRANGE:
4018 case OP_CRMINRANGE:
4019 min = GET2(cc, 1 + IMM2_SIZE + 1);
4020 max = GET2(cc, 1 + IMM2_SIZE + 1 + IMM2_SIZE);
4021 cc += 1 + IMM2_SIZE + 1 + 2 * IMM2_SIZE;
4022 break;
4023 default:
4024 SLJIT_ASSERT_STOP();
4025 break;
4026 }
4027
4028 if (!minimize)
4029 {
4030 if (min == 0)
4031 {
4032 allocate_stack(common, 2);
4033 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4034 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
4035 /* Temporary release of STR_PTR. */
4036 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
4037 zerolength = compile_ref_checks(common, ccbegin, NULL);
4038 /* Restore if not zero length. */
4039 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
4040 }
4041 else
4042 {
4043 allocate_stack(common, 1);
4044 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
4045 zerolength = compile_ref_checks(common, ccbegin, &backtrack->topbacktracks);
4046 }
4047
4048 if (min > 1 || max > 1)
4049 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 0);
4050
4051 label = LABEL();
4052 compile_ref_trypath(common, ccbegin, &backtrack->topbacktracks, FALSE, FALSE);
4053
4054 if (min > 1 || max > 1)
4055 {
4056 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
4057 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4058 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0);
4059 if (min > 1)
4060 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, label);
4061 if (max > 1)
4062 {
4063 jump = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max);
4064 allocate_stack(common, 1);
4065 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4066 JUMPTO(SLJIT_JUMP, label);
4067 JUMPHERE(jump);
4068 }
4069 }
4070
4071 if (max == 0)
4072 {
4073 /* Includes min > 1 case as well. */
4074 allocate_stack(common, 1);
4075 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4076 JUMPTO(SLJIT_JUMP, label);
4077 }
4078
4079 JUMPHERE(zerolength);
4080 BACKTRACK_AS(iterator_backtrack)->trypath = LABEL();
4081
4082 decrease_call_count(common);
4083 return cc;
4084 }
4085
4086 allocate_stack(common, 2);
4087 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
4088 if (type != OP_CRMINSTAR)
4089 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
4090
4091 if (min == 0)
4092 {
4093 zerolength = compile_ref_checks(common, ccbegin, NULL);
4094 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4095 jump = JUMP(SLJIT_JUMP);
4096 }
4097 else
4098 zerolength = compile_ref_checks(common, ccbegin, &backtrack->topbacktracks);
4099
4100 BACKTRACK_AS(iterator_backtrack)->trypath = LABEL();
4101 if (max > 0)
4102 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max));
4103
4104 compile_ref_trypath(common, ccbegin, &backtrack->topbacktracks, TRUE, TRUE);
4105 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4106
4107 if (min > 1)
4108 {
4109 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
4110 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4111 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
4112 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, BACKTRACK_AS(iterator_backtrack)->trypath);
4113 }
4114 else if (max > 0)
4115 OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
4116
4117 if (jump != NULL)
4118 JUMPHERE(jump);
4119 JUMPHERE(zerolength);
4120
4121 decrease_call_count(common);
4122 return cc;
4123 }
4124
4125 static SLJIT_INLINE pcre_uchar *compile_recurse_trypath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
4126 {
4127 DEFINE_COMPILER;
4128 backtrack_common *backtrack;
4129 recurse_entry *entry = common->entries;
4130 recurse_entry *prev = NULL;
4131 int start = GET(cc, 1);
4132
4133 PUSH_BACKTRACK(sizeof(recurse_backtrack), cc, NULL);
4134 while (entry != NULL)
4135 {
4136 if (entry->start == start)
4137 break;
4138 prev = entry;
4139 entry = entry->next;
4140 }
4141
4142 if (entry == NULL)
4143 {
4144 entry = sljit_alloc_memory(compiler, sizeof(recurse_entry));
4145 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
4146 return NULL;
4147 entry->next = NULL;
4148 entry->entry = NULL;
4149 entry->calls = NULL;
4150 entry->start = start;
4151
4152 if (prev != NULL)
4153 prev->next = entry;
4154 else
4155 common->entries = entry;
4156 }
4157
4158 if (common->has_set_som && common->mark_ptr != 0)
4159 {
4160 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
4161 allocate_stack(common, 2);
4162 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
4163 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
4164 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
4165 }
4166 else if (common->has_set_som || common->mark_ptr != 0)
4167 {
4168 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->has_set_som ? (int)(OVECTOR(0)) : common->mark_ptr);
4169 allocate_stack(common, 1);
4170 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
4171 }
4172
4173 if (entry->entry == NULL)
4174 add_jump(compiler, &entry->calls, JUMP(SLJIT_FAST_CALL));
4175 else
4176 JUMPTO(SLJIT_FAST_CALL, entry->entry);
4177 /* Leave if the match is failed. */
4178 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0));
4179 return cc + 1 + LINK_SIZE;
4180 }
4181
4182 static pcre_uchar *compile_assert_trypath(compiler_common *common, pcre_uchar *cc, assert_backtrack *backtrack, BOOL conditional)
4183 {
4184 DEFINE_COMPILER;
4185 int framesize;
4186 int localptr;
4187 backtrack_common altbacktrack;
4188 pcre_uchar *ccbegin;
4189 pcre_uchar opcode;
4190 pcre_uchar bra = OP_BRA;
4191 jump_list *tmp = NULL;
4192 jump_list **target = (conditional) ? &backtrack->condfailed : &backtrack->common.topbacktracks;
4193 jump_list **found;
4194 /* Saving previous accept variables. */
4195 struct sljit_label *save_leavelabel = common->leavelabel;
4196 struct sljit_label *save_acceptlabel = common->acceptlabel;
4197 jump_list *save_leave = common->leave;
4198 jump_list *save_accept = common->accept;
4199 struct sljit_jump *jump;
4200 struct sljit_jump *brajump = NULL;
4201
4202 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
4203 {
4204 SLJIT_ASSERT(!conditional);
4205 bra = *cc;
4206 cc++;
4207 }
4208 localptr = PRIV_DATA(cc);
4209 SLJIT_ASSERT(localptr != 0);
4210 framesize = get_framesize(common, cc, FALSE);
4211 backtrack->framesize = framesize;
4212 backtrack->localptr = localptr;
4213 opcode = *cc;
4214 SLJIT_ASSERT(opcode >= OP_ASSERT && opcode <= OP_ASSERTBACK_NOT);
4215 found = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) ? &tmp : target;
4216 ccbegin = cc;
4217 cc += GET(cc, 1);
4218
4219 if (bra == OP_BRAMINZERO)
4220 {
4221 /* This is a braminzero backtrack path. */
4222 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
4223 free_stack(common, 1);
4224 brajump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
4225 }
4226
4227 if (framesize < 0)
4228 {
4229 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STACK_TOP, 0);
4230 allocate_stack(common, 1);
4231 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4232 }
4233 else
4234 {
4235 allocate_stack(common, framesize + 2);
4236 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4237 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(framesize + 1));
4238 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, TMP2, 0);
4239 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4240 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
4241 init_frame(common, ccbegin, framesize + 1, 2, FALSE);
4242 }
4243
4244 memset(&altbacktrack, 0, sizeof(backtrack_common));
4245 common->leavelabel = NULL;
4246 common->leave = NULL;
4247 while (1)
4248 {
4249 common->acceptlabel = NULL;
4250 common->accept = NULL;
4251 altbacktrack.top = NULL;
4252 altbacktrack.topbacktracks = NULL;
4253
4254 if (*ccbegin == OP_ALT)
4255 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
4256
4257 altbacktrack.cc = ccbegin;
4258 compile_trypath(common, ccbegin + 1 + LINK_SIZE, cc, &altbacktrack);
4259 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
4260 {
4261 common->leavelabel = save_leavelabel;
4262 common->acceptlabel = save_acceptlabel;
4263 common->leave = save_leave;
4264 common->accept = save_accept;
4265 return NULL;
4266 }
4267 common->acceptlabel = LABEL();
4268 if (common->accept != NULL)
4269 set_jumps(common->accept, common->acceptlabel);
4270
4271 /* Reset stack. */
4272 if (framesize < 0)
4273 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4274 else {
4275 if ((opcode != OP_ASSERT_NOT && opcode != OP_ASSERTBACK_NOT) || conditional)
4276 {
4277 /* We don't need to keep the STR_PTR, only the previous localptr. */
4278 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_w));
4279 }
4280 else
4281 {
4282 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4283 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
4284 }
4285 }
4286
4287 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
4288 {
4289 /* We know that STR_PTR was stored on the top of the stack. */
4290 if (conditional)
4291 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
4292 else if (bra == OP_BRAZERO)
4293 {
4294 if (framesize < 0)
4295 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
4296 else
4297 {
4298 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_w));
4299 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (framesize + 1) * sizeof(sljit_w));
4300 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, TMP1, 0);
4301 }
4302 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
4303 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
4304 }
4305 else if (framesize >= 0)
4306 {
4307 /* For OP_BRA and OP_BRAMINZERO. */
4308 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_w));
4309 }
4310 }
4311 add_jump(compiler, found, JUMP(SLJIT_JUMP));
4312
4313 compile_backtrackpath(common, altbacktrack.top);
4314 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
4315 {
4316 common->leavelabel = save_leavelabel;
4317 common->acceptlabel = save_acceptlabel;
4318 common->leave = save_leave;
4319 common->accept = save_accept;
4320 return NULL;
4321 }
4322 set_jumps(altbacktrack.topbacktracks, LABEL());
4323
4324 if (*cc != OP_ALT)
4325 break;
4326
4327 ccbegin = cc;
4328 cc += GET(cc, 1);
4329 }
4330 /* None of them matched. */
4331 if (common->leave != NULL)
4332 set_jumps(common->leave, LABEL());
4333
4334 if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK)
4335 {
4336 /* Assert is failed. */
4337 if (conditional || bra == OP_BRAZERO)
4338 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
4339
4340 if (framesize < 0)
4341 {
4342 /* The topmost item should be 0. */
4343 if (bra == OP_BRAZERO)
4344 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
4345 else
4346 free_stack(common, 1);
4347 }
4348 else
4349 {
4350 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
4351 /* The topmost item should be 0. */
4352 if (bra == OP_BRAZERO)
4353 {
4354 free_stack(common, framesize + 1);
4355 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
4356 }
4357 else
4358 free_stack(common, framesize + 2);
4359 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, TMP1, 0);
4360 }
4361 jump = JUMP(SLJIT_JUMP);
4362 if (bra != OP_BRAZERO)
4363 add_jump(compiler, target, jump);
4364
4365 /* Assert is successful. */
4366 set_jumps(tmp, LABEL());
4367 if (framesize < 0)
4368 {
4369 /* We know that STR_PTR was stored on the top of the stack. */
4370 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
4371 /* Keep the STR_PTR on the top of the stack. */
4372 if (bra == OP_BRAZERO)
4373 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
4374 else if (bra == OP_BRAMINZERO)
4375 {
4376 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
4377 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
4378 }
4379 }
4380 else
4381 {
4382 if (bra == OP_BRA)
4383 {
4384 /* We don't need to keep the STR_PTR, only the previous localptr. */
4385 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_w));
4386 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
4387 }
4388 else
4389 {
4390 /* We don't need to keep the STR_PTR, only the previous localptr. */
4391 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_IMM, (framesize + 2) * sizeof(sljit_w));
4392 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
4393 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), bra == OP_BRAZERO ? STR_PTR : SLJIT_IMM, 0);
4394 }
4395 }
4396
4397 if (bra == OP_BRAZERO)
4398 {
4399 backtrack->trypath = LABEL();
4400 sljit_set_label(jump, backtrack->trypath);
4401 }
4402 else if (bra == OP_BRAMINZERO)
4403 {
4404 JUMPTO(SLJIT_JUMP, backtrack->trypath);
4405 JUMPHERE(brajump);
4406 if (framesize >= 0)
4407 {
4408 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4409 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
4410 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_w));
4411 }
4412 set_jumps(backtrack->common.topbacktracks, LABEL());
4413 }
4414 }
4415 else
4416 {
4417 /* AssertNot is successful. */
4418 if (framesize < 0)
4419 {
4420 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
4421 if (bra != OP_BRA)
4422 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
4423 else
4424 free_stack(common, 1);
4425 }
4426 else
4427 {
4428 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
4429 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
4430 /* The topmost item should be 0. */
4431 if (bra != OP_BRA)
4432 {
4433 free_stack(common, framesize + 1);
4434 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
4435 }
4436 else
4437 free_stack(common, framesize + 2);
4438 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, TMP1, 0);
4439 }
4440
4441 if (bra == OP_BRAZERO)
4442 backtrack->trypath = LABEL();
4443 else if (bra == OP_BRAMINZERO)
4444 {
4445 JUMPTO(SLJIT_JUMP, backtrack->trypath);
4446 JUMPHERE(brajump);
4447 }
4448
4449 if (bra != OP_BRA)
4450 {
4451 SLJIT_ASSERT(found == &backtrack->common.topbacktracks);
4452 set_jumps(backtrack->common.topbacktracks, LABEL());
4453 backtrack->common.topbacktracks = NULL;
4454 }
4455 }
4456
4457 common->leavelabel = save_leavelabel;
4458 common->acceptlabel = save_acceptlabel;
4459 common->leave = save_leave;
4460 common->accept = save_accept;
4461 return cc + 1 + LINK_SIZE;
4462 }
4463
4464 static sljit_w SLJIT_CALL do_searchovector(sljit_w refno, sljit_w* locals, pcre_uchar *name_table)
4465 {
4466 int condition = FALSE;
4467 pcre_uchar *slotA = name_table;
4468 pcre_uchar *slotB;
4469 sljit_w name_count = locals[LOCALS0 / sizeof(sljit_w)];
4470 sljit_w name_entry_size = locals[LOCALS1 / sizeof(sljit_w)];
4471 sljit_w no_capture;
4472 int i;
4473
4474 locals += refno & 0xff;
4475 refno >>= 8;
4476 no_capture = locals[1];
4477
4478 for (i = 0; i < name_count; i++)
4479 {
4480 if (GET2(slotA, 0) == refno) break;
4481 slotA += name_entry_size;
4482 }
4483
4484 if (i < name_count)
4485 {
4486 /* Found a name for the number - there can be only one; duplicate names
4487 for different numbers are allowed, but not vice versa. First scan down
4488 for duplicates. */
4489
4490 slotB = slotA;
4491 while (slotB > name_table)
4492 {
4493 slotB -= name_entry_size;
4494 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
4495 {
4496 condition = locals[GET2(slotB, 0) << 1] != no_capture;
4497 if (condition) break;
4498 }
4499 else break;
4500 }
4501
4502 /* Scan up for duplicates */
4503 if (!condition)
4504 {
4505 slotB = slotA;
4506 for (i++; i < name_count; i++)
4507 {
4508 slotB += name_entry_size;
4509 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
4510 {
4511 condition = locals[GET2(slotB, 0) << 1] != no_capture;
4512 if (condition) break;
4513 }
4514 else break;
4515 }
4516 }
4517 }
4518 return condition;
4519 }
4520
4521 static sljit_w SLJIT_CALL do_searchgroups(sljit_w recno, sljit_w* locals, pcre_uchar *name_table)
4522 {
4523 int condition = FALSE;
4524 pcre_uchar *slotA = name_table;
4525 pcre_uchar *slotB;
4526 sljit_w name_count = locals[LOCALS0 / sizeof(sljit_w)];
4527 sljit_w name_entry_size = locals[LOCALS1 / sizeof(sljit_w)];
4528 sljit_w group_num = locals[POSSESSIVE0 / sizeof(sljit_w)];
4529 int i;
4530
4531 for (i = 0; i < name_count; i++)
4532 {
4533 if (GET2(slotA, 0) == recno) break;
4534 slotA += name_entry_size;
4535 }
4536
4537 if (i < name_count)
4538 {
4539 /* Found a name for the number - there can be only one; duplicate
4540 names for different numbers are allowed, but not vice versa. First
4541 scan down for duplicates. */
4542
4543 slotB = slotA;
4544 while (slotB > name_table)
4545 {
4546 slotB -= name_entry_size;
4547 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
4548 {
4549 condition = GET2(slotB, 0) == group_num;
4550 if (condition) break;
4551 }
4552 else break;
4553 }
4554
4555 /* Scan up for duplicates */
4556 if (!condition)
4557 {
4558 slotB = slotA;
4559 for (i++; i < name_count; i++)
4560 {
4561 slotB += name_entry_size;
4562 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
4563 {
4564 condition = GET2(slotB, 0) == group_num;
4565 if (condition) break;
4566 }
4567 else break;
4568 }
4569 }
4570 }
4571 return condition;
4572 }
4573
4574 /*
4575 Handling bracketed expressions is probably the most complex part.
4576
4577 Stack layout naming characters:
4578 S - Push the current STR_PTR
4579 0 - Push a 0 (NULL)
4580 A - Push the current STR_PTR. Needed for restoring the STR_PTR
4581 before the next alternative. Not pushed if there are no alternatives.
4582 M - Any values pushed by the current alternative. Can be empty, or anything.
4583 C - Push the previous OVECTOR(i), OVECTOR(i+1) and OVECTOR_PRIV(i) to the stack.
4584 L - Push the previous local (pointed by localptr) to the stack
4585 () - opional values stored on the stack
4586 ()* - optonal, can be stored multiple times
4587
4588 The following list shows the regular expression templates, their PCRE byte codes
4589 and stack layout supported by pcre-sljit.
4590
4591 (?:) OP_BRA | OP_KET A M
4592 () OP_CBRA | OP_KET C M
4593 (?:)+ OP_BRA | OP_KETRMAX 0 A M S ( A M S )*
4594 OP_SBRA | OP_KETRMAX 0 L M S ( L M S )*
4595 (?:)+? OP_BRA | OP_KETRMIN 0 A M S ( A M S )*
4596 OP_SBRA | OP_KETRMIN 0 L M S ( L M S )*
4597 ()+ OP_CBRA | OP_KETRMAX 0 C M S ( C M S )*
4598 OP_SCBRA | OP_KETRMAX 0 C M S ( C M S )*
4599 ()+? OP_CBRA | OP_KETRMIN 0 C M S ( C M S )*
4600 OP_SCBRA | OP_KETRMIN 0 C M S ( C M S )*
4601 (?:)? OP_BRAZERO | OP_BRA | OP_KET S ( A M 0 )
4602 (?:)?? OP_BRAMINZERO | OP_BRA | OP_KET S ( A M 0 )
4603 ()? OP_BRAZERO | OP_CBRA | OP_KET S ( C M 0 )
4604 ()?? OP_BRAMINZERO | OP_CBRA | OP_KET S ( C M 0 )
4605 (?:)* OP_BRAZERO | OP_BRA | OP_KETRMAX S 0 ( A M S )*
4606 OP_BRAZERO | OP_SBRA | OP_KETRMAX S 0 ( L M S )*
4607 (?:)*? OP_BRAMINZERO | OP_BRA | OP_KETRMIN S 0 ( A M S )*
4608 OP_BRAMINZERO | OP_SBRA | OP_KETRMIN S 0 ( L M S )*
4609 ()* OP_BRAZERO | OP_CBRA | OP_KETRMAX S 0 ( C M S )*
4610 OP_BRAZERO | OP_SCBRA | OP_KETRMAX S 0 ( C M S )*
4611 ()*? OP_BRAMINZERO | OP_CBRA | OP_KETRMIN S 0 ( C M S )*
4612 OP_BRAMINZERO | OP_SCBRA | OP_KETRMIN S 0 ( C M S )*
4613
4614
4615 Stack layout naming characters:
4616 A - Push the alternative index (starting from 0) on the stack.
4617 Not pushed if there is no alternatives.
4618 M - Any values pushed by the current alternative. Can be empty, or anything.
4619
4620 The next list shows the possible content of a bracket:
4621 (|) OP_*BRA | OP_ALT ... M A
4622 (?()|) OP_*COND | OP_ALT M A
4623 (?>|) OP_ONCE | OP_ALT ... [stack trace] M A
4624 (?>|) OP_ONCE_NC | OP_ALT ... [stack trace] M A
4625 Or nothing, if trace is unnecessary
4626 */
4627
4628 static pcre_uchar *compile_bracket_trypath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
4629 {
4630 DEFINE_COMPILER;
4631 backtrack_common *backtrack;
4632 pcre_uchar opcode;
4633 int localptr = 0;
4634 int offset = 0;
4635 int stacksize;
4636 pcre_uchar *ccbegin;
4637 pcre_uchar *trypath;
4638 pcre_uchar bra = OP_BRA;
4639 pcre_uchar ket;
4640 assert_backtrack *assert;
4641 BOOL has_alternatives;
4642 struct sljit_jump *jump;
4643 struct sljit_jump *skip;
4644 struct sljit_label *rmaxlabel = NULL;
4645 struct sljit_jump *braminzerojump = NULL;
4646
4647 PUSH_BACKTRACK(sizeof(bracket_backtrack), cc, NULL);
4648
4649 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
4650 {
4651 bra = *cc;
4652 cc++;
4653 opcode = *cc;
4654 }
4655
4656 opcode = *cc;
4657 ccbegin = cc;
4658 trypath = ccbegin + 1 + LINK_SIZE;
4659
4660 if ((opcode == OP_COND || opcode == OP_SCOND) && cc[1 + LINK_SIZE] == OP_DEF)
4661 {
4662 /* Drop this bracket_backtrack. */
4663 parent->top = backtrack->prev;
4664 return bracketend(cc);
4665 }
4666
4667 ket = *(bracketend(cc) - 1 - LINK_SIZE);
4668 SLJIT_ASSERT(ket == OP_KET || ket == OP_KETRMAX || ket == OP_KETRMIN);
4669 SLJIT_ASSERT(!((bra == OP_BRAZERO && ket == OP_KETRMIN) || (bra == OP_BRAMINZERO && ket == OP_KETRMAX)));
4670 cc += GET(cc, 1);
4671
4672 has_alternatives = *cc == OP_ALT;
4673 if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
4674 {
4675 has_alternatives = (*trypath == OP_RREF) ? FALSE : TRUE;
4676 if (*trypath == OP_NRREF)
4677 {
4678 stacksize = GET2(trypath, 1);
4679 if (common->currententry == NULL || stacksize == RREF_ANY)
4680 has_alternatives = FALSE;
4681 else if (common->currententry->start == 0)
4682 has_alternatives = stacksize != 0;
4683 else
4684 has_alternatives = stacksize != GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
4685 }
4686 }
4687
4688 if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
4689 opcode = OP_SCOND;
4690 if (SLJIT_UNLIKELY(opcode == OP_ONCE_NC))
4691 opcode = OP_ONCE;
4692
4693 if (opcode == OP_CBRA || opcode == OP_SCBRA)
4694 {
4695 /* Capturing brackets has a pre-allocated space. */
4696 offset = GET2(ccbegin, 1 + LINK_SIZE);
4697 localptr = OVECTOR_PRIV(offset);
4698 offset <<= 1;
4699 BACKTRACK_AS(bracket_backtrack)->localptr = localptr;
4700 trypath += IMM2_SIZE;
4701 }
4702 else if (opcode == OP_ONCE || opcode == OP_SBRA || opcode == OP_SCOND)
4703 {
4704 /* Other brackets simply allocate the next entry. */
4705 localptr = PRIV_DATA(ccbegin);
4706 SLJIT_ASSERT(localptr != 0);
4707 BACKTRACK_AS(bracket_backtrack)->localptr = localptr;
4708 if (opcode == OP_ONCE)
4709 BACKTRACK_AS(bracket_backtrack)->u.framesize = get_framesize(common, ccbegin, FALSE);
4710 }
4711
4712 /* Instructions before the first alternative. */
4713 stacksize = 0;
4714 if ((ket == OP_KETRMAX) || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
4715 stacksize++;
4716 if (bra == OP_BRAZERO)
4717 stacksize++;
4718
4719 if (stacksize > 0)
4720 allocate_stack(common, stacksize);
4721
4722 stacksize = 0;
4723 if ((ket == OP_KETRMAX) || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
4724 {
4725 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
4726 stacksize++;
4727 }
4728
4729 if (bra == OP_BRAZERO)
4730 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
4731
4732 if (bra == OP_BRAMINZERO)
4733 {
4734 /* This is a backtrack path! (Since the try-path of OP_BRAMINZERO matches to the empty string) */
4735 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
4736 if (ket != OP_KETRMIN)
4737 {
4738 free_stack(common, 1);
4739 braminzerojump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
4740 }
4741 else
4742 {
4743 if (opcode == OP_ONCE || opcode >= OP_SBRA)
4744 {
4745 jump = CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
4746 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
4747 /* Nothing stored during the first run. */
4748 skip = JUMP(SLJIT_JUMP);
4749 JUMPHERE(jump);
4750 /* Checking zero-length iteration. */
4751 if (opcode != OP_ONCE || BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
4752 {
4753 /* When we come from outside, localptr contains the previous STR_PTR. */
4754 braminzerojump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4755 }
4756 else
4757 {
4758 /* Except when the whole stack frame must be saved. */
4759 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4760 braminzerojump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), (BACKTRACK_AS(bracket_backtrack)->u.framesize + 1) * sizeof(sljit_w));
4761 }
4762 JUMPHERE(skip);
4763 }
4764 else
4765 {
4766 jump = CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
4767 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
4768 JUMPHERE(jump);
4769 }
4770 }
4771 }
4772
4773 if (ket == OP_KETRMIN)
4774 BACKTRACK_AS(bracket_backtrack)->recursivetrypath = LABEL();
4775
4776 if (ket == OP_KETRMAX)
4777 {
4778 rmaxlabel = LABEL();
4779 if (has_alternatives && opcode != OP_ONCE && opcode < OP_SBRA)
4780 BACKTRACK_AS(bracket_backtrack)->alttrypath = rmaxlabel;
4781 }
4782
4783 /* Handling capturing brackets and alternatives. */
4784 if (opcode == OP_ONCE)
4785 {
4786 if (BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
4787 {
4788 /* Neither capturing brackets nor recursions are not found in the block. */
4789 if (ket == OP_KETRMIN)
4790 {
4791 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4792 allocate_stack(common, 2);
4793 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4794 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
4795 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
4796 }
4797 else if (ket == OP_KETRMAX || has_alternatives)
4798 {
4799 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STACK_TOP, 0);
4800 allocate_stack(common, 1);
4801 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4802 }
4803 else
4804 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STACK_TOP, 0);
4805 }
4806 else
4807 {
4808 if (ket == OP_KETRMIN || ket == OP_KETRMAX || has_alternatives)
4809 {
4810 allocate_stack(common, BACKTRACK_AS(bracket_backtrack)->u.framesize + 2);
4811 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4812 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(BACKTRACK_AS(bracket_backtrack)->u.framesize + 1));
4813 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4814 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, TMP2, 0);
4815 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
4816 init_frame(common, ccbegin, BACKTRACK_AS(bracket_backtrack)->u.framesize + 1, 2, FALSE);
4817 }
4818 else
4819 {
4820 allocate_stack(common, BACKTRACK_AS(bracket_backtrack)->u.framesize + 1);
4821 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4822 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(BACKTRACK_AS(bracket_backtrack)->u.framesize));
4823 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, TMP2, 0);
4824 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
4825 init_frame(common, ccbegin, BACKTRACK_AS(bracket_backtrack)->u.framesize, 1, FALSE);
4826 }
4827 }
4828 }
4829 else if (opcode == OP_CBRA || opcode == OP_SCBRA)
4830 {
4831 /* Saving the previous values. */
4832 allocate_stack(common, 3);
4833 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
4834 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
4835 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
4836 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
4837 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4838 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STR_PTR, 0);
4839 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
4840 }
4841 else if (opcode == OP_SBRA || opcode == OP_SCOND)
4842 {
4843 /* Saving the previous value. */
4844 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4845 allocate_stack(common, 1);
4846 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STR_PTR, 0);
4847 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
4848 }
4849 else if (has_alternatives)
4850 {
4851 /* Pushing the starting string pointer. */
4852 allocate_stack(common, 1);
4853 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4854 }
4855
4856 /* Generating code for the first alternative. */
4857 if (opcode == OP_COND || opcode == OP_SCOND)
4858 {
4859 if (*trypath == OP_CREF)
4860 {
4861 SLJIT_ASSERT(has_alternatives);
4862 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed),
4863 CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(GET2(trypath, 1) << 1), SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
4864 trypath += 1 + IMM2_SIZE;
4865 }
4866 else if (*trypath == OP_NCREF)
4867 {
4868 SLJIT_ASSERT(has_alternatives);
4869 stacksize = GET2(trypath, 1);
4870 jump = CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(stacksize << 1), SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
4871
4872 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STACK_TOP, 0);
4873 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, SLJIT_IMM, common->name_count);
4874 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, common->name_entry_size);
4875 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG1, 0, SLJIT_IMM, (stacksize << 8) | (common->ovector_start / sizeof(sljit_w)));
4876 GET_LOCAL_BASE(SLJIT_TEMPORARY_REG2, 0, 0);
4877 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, common->name_table);
4878 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_searchovector));
4879 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);
4880 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), CMP(SLJIT_C_EQUAL, SLJIT_TEMPORARY_REG1, 0, SLJIT_IMM, 0));
4881
4882 JUMPHERE(jump);
4883 trypath += 1 + IMM2_SIZE;
4884 }
4885 else if (*trypath == OP_RREF || *trypath == OP_NRREF)
4886 {
4887 /* Never has other case. */
4888 BACKTRACK_AS(bracket_backtrack)->u.condfailed = NULL;
4889
4890 stacksize = GET2(trypath, 1);
4891 if (common->currententry == NULL)
4892 stacksize = 0;
4893 else if (stacksize == RREF_ANY)
4894 stacksize = 1;
4895 else if (common->currententry->start == 0)
4896 stacksize = stacksize == 0;
4897 else
4898 stacksize = stacksize == GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
4899
4900 if (*trypath == OP_RREF || stacksize || common->currententry == NULL)
4901 {
4902 SLJIT_ASSERT(!has_alternatives);
4903 if (stacksize != 0)
4904 trypath += 1 + IMM2_SIZE;
4905 else
4906 {
4907 if (*cc == OP_ALT)
4908 {
4909 trypath = cc + 1 + LINK_SIZE;
4910 cc += GET(cc, 1);
4911 }
4912 else
4913 trypath = cc;
4914 }
4915 }
4916 else
4917 {
4918 SLJIT_ASSERT(has_alternatives);
4919
4920 stacksize = GET2(trypath, 1);
4921 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STACK_TOP, 0);
4922 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, SLJIT_IMM, common->name_count);
4923 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, common->name_entry_size);
4924 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, GET2(common->start, common->currententry->start + 1 + LINK_SIZE));
4925 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG1, 0, SLJIT_IMM, stacksize);
4926 GET_LOCAL_BASE(SLJIT_TEMPORARY_REG2, 0, 0);
4927 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, common->name_table);
4928 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_searchgroups));
4929 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);
4930 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), CMP(SLJIT_C_EQUAL, SLJIT_TEMPORARY_REG1, 0, SLJIT_IMM, 0));
4931 trypath += 1 + IMM2_SIZE;
4932 }
4933 }
4934 else
4935 {
4936 SLJIT_ASSERT(has_alternatives && *trypath >= OP_ASSERT && *trypath <= OP_ASSERTBACK_NOT);
4937 /* Similar code as PUSH_BACKTRACK macro. */
4938 assert = sljit_alloc_memory(compiler, sizeof(assert_backtrack));
4939 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
4940 return NULL;
4941 memset(assert, 0, sizeof(assert_backtrack));
4942 assert->common.cc = trypath;
4943 BACKTRACK_AS(bracket_backtrack)->u.assert = assert;
4944 trypath = compile_assert_trypath(common, trypath, assert, TRUE);
4945 }
4946 }
4947
4948 compile_trypath(common, trypath, cc, backtrack);
4949 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
4950 return NULL;
4951
4952 if (opcode == OP_ONCE)
4953 {
4954 if (BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
4955 {
4956 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4957 /* TMP2 which is set here used by OP_KETRMAX below. */
4958 if (ket == OP_KETRMAX)
4959 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), 0);
4960 else if (ket == OP_KETRMIN)
4961 {
4962 /* Move the STR_PTR to the localptr. */
4963 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_MEM1(STACK_TOP), 0);
4964 }
4965 }
4966 else
4967 {
4968 stacksize = (ket == OP_KETRMIN || ket == OP_KETRMAX || has_alternatives) ? 2 : 1;
4969 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_IMM, (BACKTRACK_AS(bracket_backtrack)->u.framesize + stacksize) * sizeof(sljit_w));
4970 if (ket == OP_KETRMAX)
4971 {
4972 /* TMP2 which is set here used by OP_KETRMAX below. */
4973 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
4974 }
4975 }
4976 }
4977
4978 stacksize = 0;
4979 if (ket != OP_KET || bra != OP_BRA)
4980 stacksize++;
4981 if (has_alternatives && opcode != OP_ONCE)
4982 stacksize++;
4983
4984 if (stacksize > 0)
4985 allocate_stack(common, stacksize);
4986
4987 stacksize = 0;
4988 if (ket != OP_KET)
4989 {
4990 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
4991 stacksize++;
4992 }
4993 else if (bra != OP_BRA)
4994 {
4995 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
4996 stacksize++;
4997 }
4998
4999 if (has_alternatives)
5000 {
5001 if (opcode != OP_ONCE)
5002 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
5003 if (ket != OP_KETRMAX)
5004 BACKTRACK_AS(bracket_backtrack)->alttrypath = LABEL();
5005 }
5006
5007 /* Must be after the trypath label. */
5008 if (offset != 0)
5009 {
5010 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
5011 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
5012 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 0), TMP1, 0);
5013 }
5014
5015 if (ket == OP_KETRMAX)
5016 {
5017 if (opcode == OP_ONCE || opcode >= OP_SBRA)
5018 {
5019 if (has_alternatives)
5020 BACKTRACK_AS(bracket_backtrack)->alttrypath = LABEL();
5021 /* Checking zero-length iteration. */
5022 if (opcode != OP_ONCE)
5023 {
5024 CMPTO(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STR_PTR, 0, rmaxlabel);
5025 /* Drop STR_PTR for greedy plus quantifier. */
5026 if (bra != OP_BRAZERO)
5027 free_stack(common, 1);
5028 }
5029 else
5030 /* TMP2 must contain the starting STR_PTR. */
5031 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0, rmaxlabel);
5032 }
5033 else
5034 JUMPTO(SLJIT_JUMP, rmaxlabel);
5035 BACKTRACK_AS(bracket_backtrack)->recursivetrypath = LABEL();
5036 }
5037
5038 if (bra == OP_BRAZERO)
5039 BACKTRACK_AS(bracket_backtrack)->zerotrypath = LABEL();
5040
5041 if (bra == OP_BRAMINZERO)
5042 {
5043 /* This is a backtrack path! (From the viewpoint of OP_BRAMINZERO) */
5044 JUMPTO(SLJIT_JUMP, ((braminzero_backtrack *)parent)->trypath);
5045 if (braminzerojump != NULL)
5046 {
5047 JUMPHERE(braminzerojump);
5048 /* We need to release the end pointer to perform the
5049 backtrack for the zero-length iteration. When
5050 framesize is < 0, OP_ONCE will do the release itself. */
5051 if (opcode == OP_ONCE && BACKTRACK_AS(bracket_backtrack)->u.framesize >= 0)
5052 {
5053 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
5054 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
5055 }
5056 else if (ket == OP_KETRMIN && opcode != OP_ONCE)
5057 free_stack(common, 1);
5058 }
5059 /* Continue to the normal backtrack. */
5060 }
5061
5062 if ((ket != OP_KET && bra != OP_BRAMINZERO) || bra == OP_BRAZERO)
5063 decrease_call_count(common);
5064
5065 /* Skip the other alternatives. */
5066 while (*cc == OP_ALT)
5067 cc += GET(cc, 1);
5068 cc += 1 + LINK_SIZE;
5069 return cc;
5070 }
5071
5072 static pcre_uchar *compile_bracketpos_trypath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
5073 {
5074 DEFINE_COMPILER;
5075 backtrack_common *backtrack;
5076 pcre_uchar opcode;
5077 int localptr;
5078 int cbraprivptr = 0;
5079 int framesize;
5080 int stacksize;
5081 int offset = 0;
5082 BOOL zero = FALSE;
5083 pcre_uchar *ccbegin = NULL;
5084 int stack;
5085 struct sljit_label *loop = NULL;
5086 struct jump_list *emptymatch = NULL;
5087
5088 PUSH_BACKTRACK(sizeof(bracketpos_backtrack), cc, NULL);
5089 if (*cc == OP_BRAPOSZERO)
5090 {
5091 zero = TRUE;
5092 cc++;
5093 }
5094
5095 opcode = *cc;
5096 localptr = PRIV_DATA(cc);
5097 SLJIT_ASSERT(localptr != 0);
5098 BACKTRACK_AS(bracketpos_backtrack)->localptr = localptr;
5099 switch(opcode)
5100 {
5101 case OP_BRAPOS:
5102 case OP_SBRAPOS:
5103 ccbegin = cc + 1 + LINK_SIZE;
5104 break;
5105
5106 case OP_CBRAPOS:
5107 case OP_SCBRAPOS:
5108 offset = GET2(cc, 1 + LINK_SIZE);
5109 cbraprivptr = OVECTOR_PRIV(offset);
5110 offset <<= 1;
5111 ccbegin = cc + 1 + LINK_SIZE + IMM2_SIZE;
5112 break;
5113
5114 default:
5115 SLJIT_ASSERT_STOP();
5116 break;
5117 }
5118
5119 framesize = get_framesize(common, cc, FALSE);
5120 BACKTRACK_AS(bracketpos_backtrack)->framesize = framesize;
5121 if (framesize < 0)
5122 {
5123 stacksize = (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS) ? 2 : 1;
5124 if (!zero)
5125 stacksize++;
5126 BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
5127 allocate_stack(common, stacksize);
5128 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STACK_TOP, 0);
5129
5130 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
5131 {
5132 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
5133 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
5134 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
5135 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
5136 }
5137 else
5138 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5139
5140 if (!zero)
5141 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 1);
5142 }
5143 else
5144 {
5145 stacksize = framesize + 1;
5146 if (!zero)
5147 stacksize++;
5148 if (opcode == OP_BRAPOS || opcode == OP_SBRAPOS)
5149 stacksize++;
5150 BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
5151 allocate_stack(common, stacksize);
5152
5153 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
5154 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(stacksize - 1));
5155 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, TMP2, 0);
5156 stack = 0;
5157 if (!zero)
5158 {
5159 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 1);
5160 stack++;
5161 }
5162 if (opcode == OP_BRAPOS || opcode == OP_SBRAPOS)
5163 {
5164 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), STR_PTR, 0);
5165 stack++;
5166 }
5167 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP1, 0);
5168 init_frame(common, cc, stacksize - 1, stacksize - framesize, FALSE);
5169 }
5170
5171 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
5172 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr, STR_PTR, 0);
5173
5174 loop = LABEL();
5175 while (*cc != OP_KETRPOS)
5176 {
5177 backtrack->top = NULL;
5178 backtrack->topbacktracks = NULL;
5179 cc += GET(cc, 1);
5180
5181 compile_trypath(common, ccbegin, cc, backtrack);
5182 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5183 return NULL;
5184
5185 if (framesize < 0)
5186 {
5187 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
5188
5189 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
5190 {
5191 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
5192 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
5193 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr, STR_PTR, 0);
5194 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
5195 }
5196 else
5197 {
5198 if (opcode == OP_SBRAPOS)
5199 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5200 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5201 }
5202
5203 if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
5204 add_jump(compiler, &emptymatch, CMP(SLJIT_C_EQUAL, TMP1, 0, STR_PTR, 0));
5205
5206 if (!zero)
5207 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
5208 }
5209 else
5210 {
5211 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
5212 {
5213 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_IMM, stacksize * sizeof(sljit_w));
5214 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
5215 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
5216 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr, STR_PTR, 0);
5217 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
5218 }
5219 else
5220 {
5221 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
5222 OP2(SLJIT_ADD, STACK_TOP, 0, TMP2, 0, SLJIT_IMM, stacksize * sizeof(sljit_w));
5223 if (opcode == OP_SBRAPOS)
5224 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_w));
5225 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_w), STR_PTR, 0);
5226 }
5227
5228 if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
5229 add_jump(compiler, &emptymatch, CMP(SLJIT_C_EQUAL, TMP1, 0, STR_PTR, 0));
5230
5231 if (!zero)
5232 {
5233 if (framesize < 0)
5234 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
5235 else
5236 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5237 }
5238 }
5239 JUMPTO(SLJIT_JUMP, loop);
5240 flush_stubs(common);
5241
5242 compile_backtrackpath(common, backtrack->top);
5243 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5244 return NULL;
5245 set_jumps(backtrack->topbacktracks, LABEL());
5246
5247 if (framesize < 0)
5248 {
5249 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
5250 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
5251 else
5252 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5253 }
5254 else
5255 {
5256 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
5257 {
5258 /* Last alternative. */
5259 if (*cc == OP_KETRPOS)
5260 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
5261 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
5262 }
5263 else
5264 {
5265 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
5266 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_w));
5267 }
5268 }
5269
5270 if (*cc == OP_KETRPOS)
5271 break;
5272 ccbegin = cc + 1 + LINK_SIZE;
5273 }
5274
5275 backtrack->topbacktracks = NULL;
5276 if (!zero)
5277 {
5278 if (framesize < 0)
5279 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0));
5280 else /* TMP2 is set to [localptr] above. */
5281 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(TMP2), (stacksize - 1) * sizeof(sljit_w), SLJIT_IMM, 0));
5282 }
5283
5284 /* None of them matched. */
5285 set_jumps(emptymatch, LABEL());
5286 decrease_call_count(common);
5287 return cc + 1 + LINK_SIZE;
5288 }
5289
5290 static SLJIT_INLINE pcre_uchar *get_iterator_parameters(compiler_common *common, pcre_uchar *cc, pcre_uchar *opcode, pcre_uchar *type, int *arg1, int *arg2, pcre_uchar **end)
5291 {
5292 int class_len;
5293
5294 *opcode = *cc;
5295 if (*opcode >= OP_STAR && *opcode <= OP_POSUPTO)
5296 {
5297 cc++;
5298 *type = OP_CHAR;
5299 }
5300 else if (*opcode >= OP_STARI && *opcode <= OP_POSUPTOI)
5301 {
5302 cc++;
5303 *type = OP_CHARI;
5304 *opcode -= OP_STARI - OP_STAR;
5305 }
5306 else if (*opcode >= OP_NOTSTAR && *opcode <= OP_NOTPOSUPTO)
5307 {
5308 cc++;
5309 *type = OP_NOT;
5310 *opcode -= OP_NOTSTAR - OP_STAR;
5311 }
5312 else if (*opcode >= OP_NOTSTARI && *opcode <= OP_NOTPOSUPTOI)
5313 {
5314 cc++;
5315 *type = OP_NOTI;
5316 *opcode -= OP_NOTSTARI - OP_STAR;
5317 }
5318 else if (*opcode >= OP_TYPESTAR && *opcode <= OP_TYPEPOSUPTO)
5319 {
5320 cc++;
5321 *opcode -= OP_TYPESTAR - OP_STAR;
5322 *type = 0;
5323 }
5324 else
5325 {
5326 SLJIT_ASSERT(*opcode >= OP_CLASS || *opcode <= OP_XCLASS);
5327 *type = *opcode;
5328 cc++;
5329 class_len = (*type < OP_XCLASS) ? (int)(1 + (32 / sizeof(pcre_uchar))) : GET(cc, 0);
5330 *opcode = cc[class_len - 1];
5331 if (*opcode >= OP_CRSTAR && *opcode <= OP_CRMINQUERY)
5332 {
5333 *opcode -= OP_CRSTAR - OP_STAR;
5334 if (end != NULL)
5335 *end = cc + class_len;
5336 }
5337 else
5338 {
5339 SLJIT_ASSERT(*opcode == OP_CRRANGE || *opcode == OP_CRMINRANGE);
5340 *arg1 = GET2(cc, (class_len + IMM2_SIZE));
5341 *arg2 = GET2(cc, class_len);
5342
5343 if (*arg2 == 0)
5344 {
5345 SLJIT_ASSERT(*arg1 != 0);
5346 *opcode = (*opcode == OP_CRRANGE) ? OP_UPTO : OP_MINUPTO;
5347 }
5348 if (*arg1 == *arg2)
5349 *opcode = OP_EXACT;
5350
5351 if (end != NULL)
5352 *end = cc + class_len + 2 * IMM2_SIZE;
5353 }
5354 return cc;
5355 }
5356
5357 if (*opcode == OP_UPTO || *opcode == OP_MINUPTO || *opcode == OP_EXACT || *opcode == OP_POSUPTO)
5358 {
5359 *arg1 = GET2(cc, 0);
5360 cc += IMM2_SIZE;
5361 }
5362
5363 if (*type == 0)
5364 {
5365 *type = *cc;
5366 if (end != NULL)
5367 *end = next_opcode(common, cc);
5368 cc++;
5369 return cc;
5370 }
5371
5372 if (end != NULL)
5373 {
5374 *end = cc + 1;
5375 #ifdef SUPPORT_UTF
5376 if (common->utf && HAS_EXTRALEN(*cc)) *end += GET_EXTRALEN(*cc);
5377 #endif
5378 }
5379 return cc;
5380 }
5381
5382 static pcre_uchar *compile_iterator_trypath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
5383 {
5384 DEFINE_COMPILER;
5385 backtrack_common *backtrack;
5386 pcre_uchar opcode;
5387 pcre_uchar type;
5388 int arg1 = -1, arg2 = -1;
5389 pcre_uchar* end;
5390 jump_list *nomatch = NULL;
5391 struct sljit_jump *jump = NULL;
5392 struct sljit_label *label;
5393
5394 PUSH_BACKTRACK(sizeof(iterator_backtrack), cc, NULL);
5395
5396 cc = get_iterator_parameters(common, cc, &opcode, &type, &arg1, &arg2, &end);
5397
5398 switch(opcode)
5399 {
5400 case OP_STAR:
5401 case OP_PLUS:
5402 case OP_UPTO:
5403 case OP_CRRANGE:
5404 if (type == OP_ANYNL || type == OP_EXTUNI)
5405 {
5406 if (opcode == OP_STAR || opcode == OP_UPTO)
5407 {
5408 allocate_stack(common, 2);
5409 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5410 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
5411 }
5412 else
5413 {
5414 allocate_stack(common, 1);
5415 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5416 }
5417 if (opcode == OP_UPTO || opcode == OP_CRRANGE)
5418 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 0);
5419
5420 label = LABEL();
5421 compile_char1_trypath(common, type, cc, &backtrack->topbacktracks);
5422 if (opcode == OP_UPTO || opcode == OP_CRRANGE)
5423 {
5424 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
5425 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
5426 if (opcode == OP_CRRANGE && arg2 > 0)
5427 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg2, label);
5428 if (opcode == OP_UPTO || (opcode == OP_CRRANGE && arg1 > 0))
5429 jump = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, arg1);
5430 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0);
5431 }
5432
5433 allocate_stack(common, 1);
5434 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5435 JUMPTO(SLJIT_JUMP, label);
5436 if (jump != NULL)
5437 JUMPHERE(jump);
5438 }
5439 else
5440 {
5441 allocate_stack(common, 2);
5442 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5443 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
5444 label = LABEL();
5445 compile_char1_trypath(common, type, cc, &nomatch);
5446 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5447 if (opcode <= OP_PLUS || (opcode == OP_CRRANGE && arg1 == 0))
5448 {
5449 OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
5450 JUMPTO(SLJIT_JUMP, label);
5451 }
5452 else
5453 {
5454 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5455 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
5456 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5457 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg1 + 1, label);
5458 }
5459 set_jumps(nomatch, LABEL());
5460 if (opcode == OP_PLUS || opcode == OP_CRRANGE)
5461 add_jump(compiler, &backtrack->topbacktracks,
5462 CMP(SLJIT_C_LESS, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, opcode == OP_PLUS ? 2 : arg2 + 1));
5463 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5464 }
5465 BACKTRACK_AS(iterator_backtrack)->trypath = LABEL();
5466 break;
5467
5468 case OP_MINSTAR:
5469 case OP_MINPLUS:
5470 allocate_stack(common, 1);
5471 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5472 if (opcode == OP_MINPLUS)
5473 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
5474 BACKTRACK_AS(iterator_backtrack)->trypath = LABEL();
5475 break;
5476
5477 case OP_MINUPTO:
5478 case OP_CRMINRANGE:
5479 allocate_stack(common, 2);
5480 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5481 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
5482 if (opcode == OP_CRMINRANGE)
5483 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
5484 BACKTRACK_AS(iterator_backtrack)->trypath = LABEL();
5485 break;
5486
5487 case OP_QUERY:
5488 case OP_MINQUERY:
5489 allocate_stack(common, 1);
5490 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5491 if (opcode == OP_QUERY)
5492 compile_char1_trypath(common, type, cc, &backtrack->topbacktracks);
5493 BACKTRACK_AS(iterator_backtrack)->trypath = LABEL();
5494 break;
5495
5496 case OP_EXACT:
5497 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 1);
5498 label = LABEL();
5499 compile_char1_trypath(common, type, cc, &backtrack->topbacktracks);
5500 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
5501 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
5502 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0);
5503 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg1 + 1, label);
5504 break;
5505
5506 case OP_POSSTAR:
5507 case OP_POSPLUS:
5508 case OP_POSUPTO:
5509 if (opcode != OP_POSSTAR)
5510 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 1);
5511 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STR_PTR, 0);
5512 label = LABEL();
5513 compile_char1_trypath(common, type, cc, &nomatch);
5514 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STR_PTR, 0);
5515 if (opcode != OP_POSUPTO)
5516 {
5517 if (opcode == OP_POSPLUS)
5518 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 2);
5519 JUMPTO(SLJIT_JUMP, label);
5520 }
5521 else
5522 {
5523 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
5524 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
5525 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0);
5526 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg1 + 1, label);
5527 }
5528 set_jumps(nomatch, LABEL());
5529 if (opcode == OP_POSPLUS)
5530 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_LESS, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 2));
5531 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);
5532 break;
5533
5534 case OP_POSQUERY:
5535 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STR_PTR, 0);
5536 compile_char1_trypath(common, type, cc, &nomatch);
5537 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STR_PTR, 0);
5538 set_jumps(nomatch, LABEL());
5539 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);
5540 break;
5541
5542 default:
5543 SLJIT_ASSERT_STOP();
5544 break;
5545 }
5546
5547 decrease_call_count(common);
5548 return end;
5549 }
5550
5551 static SLJIT_INLINE pcre_uchar *compile_fail_accept_trypath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
5552 {
5553 DEFINE_COMPILER;
5554 backtrack_common *backtrack;
5555
5556 PUSH_BACKTRACK(sizeof(bracket_backtrack), cc, NULL);
5557
5558 if (*cc == OP_FAIL)
5559 {
5560 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
5561 return cc + 1;
5562 }
5563
5564 if (*cc == OP_ASSERT_ACCEPT || common->currententry != NULL)
5565 {
5566 /* No need to check notempty conditions. */
5567 if (common->acceptlabel == NULL)
5568 add_jump(compiler, &common->accept, JUMP(SLJIT_JUMP));
5569 else
5570 JUMPTO(SLJIT_JUMP, common->acceptlabel);
5571 return cc + 1;
5572 }
5573
5574 if (common->acceptlabel == NULL)
5575 add_jump(compiler, &common->accept, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0)));
5576 else
5577 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), common->acceptlabel);
5578 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5579 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty));
5580 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5581 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty_atstart));
5582 if (common->acceptlabel == NULL)
5583 add_jump(compiler, &common->accept, CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5584 else
5585 CMPTO(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0, common->acceptlabel);
5586 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
5587 if (common->acceptlabel == NULL)
5588 add_jump(compiler, &common->accept, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0));
5589 else
5590 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0, common->acceptlabel);
5591 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
5592 return cc + 1;
5593 }
5594
5595 static SLJIT_INLINE pcre_uchar *compile_close_trypath(compiler_common *common, pcre_uchar *cc)
5596 {
5597 DEFINE_COMPILER;
5598 int offset = GET2(cc, 1);
5599
5600 /* Data will be discarded anyway... */
5601 if (common->currententry != NULL)
5602 return cc + 1 + IMM2_SIZE;
5603
5604 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR_PRIV(offset));
5605 offset <<= 1;
5606 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
5607 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
5608 return cc + 1 + IMM2_SIZE;
5609 }
5610
5611 static void compile_trypath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, backtrack_common *parent)
5612 {
5613 DEFINE_COMPILER;
5614 backtrack_common *backtrack;
5615
5616 while (cc < ccend)
5617 {
5618 switch(*cc)
5619 {
5620 case OP_SOD:
5621 case OP_SOM:
5622 case OP_NOT_WORD_BOUNDARY:
5623 case OP_WORD_BOUNDARY:
5624 case OP_NOT_DIGIT:
5625 case OP_DIGIT:
5626 case OP_NOT_WHITESPACE:
5627 case OP_WHITESPACE:
5628 case OP_NOT_WORDCHAR:
5629 case OP_WORDCHAR:
5630 case OP_ANY:
5631 case OP_ALLANY:
5632 case OP_ANYBYTE:
5633 case OP_NOTPROP:
5634 case OP_PROP:
5635 case OP_ANYNL:
5636 case OP_NOT_HSPACE:
5637 case OP_HSPACE:
5638 case OP_NOT_VSPACE:
5639 case OP_VSPACE:
5640 case OP_EXTUNI:
5641 case OP_EODN:
5642 case OP_EOD:
5643 case OP_CIRC:
5644 case OP_CIRCM:
5645 case OP_DOLL:
5646 case OP_DOLLM:
5647 case OP_NOT:
5648 case OP_NOTI:
5649 case OP_REVERSE:
5650 cc = compile_char1_trypath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
5651 break;
5652
5653 case OP_SET_SOM:
5654 PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc);
5655 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
5656 allocate_stack(common, 1);
5657 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), STR_PTR, 0);
5658 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
5659 cc++;
5660 break;
5661
5662 case OP_CHAR:
5663 case OP_CHARI:
5664 if (common->mode == JIT_COMPILE)
5665 cc = compile_charn_trypath(common, cc, ccend, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
5666 else
5667 cc = compile_char1_trypath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
5668 break;
5669
5670 case OP_STAR:
5671 case OP_MINSTAR:
5672 case OP_PLUS:
5673 case OP_MINPLUS:
5674 case OP_QUERY:
5675 case OP_MINQUERY:
5676 case OP_UPTO:
5677 case OP_MINUPTO:
5678 case OP_EXACT:
5679 case OP_POSSTAR:
5680 case OP_POSPLUS:
5681 case OP_POSQUERY:
5682 case OP_POSUPTO:
5683 case OP_STARI:
5684 case OP_MINSTARI:
5685 case OP_PLUSI:
5686 case OP_MINPLUSI:
5687 case OP_QUERYI:
5688 case OP_MINQUERYI:
5689 case OP_UPTOI:
5690 case OP_MINUPTOI:
5691 case OP_EXACTI:
5692 case OP_POSSTARI:
5693 case OP_POSPLUSI:
5694 case OP_POSQUERYI:
5695 case OP_POSUPTOI:
5696 case OP_NOTSTAR:
5697 case OP_NOTMINSTAR:
5698 case OP_NOTPLUS:
5699 case OP_NOTMINPLUS:
5700 case OP_NOTQUERY:
5701 case OP_NOTMINQUERY:
5702 case OP_NOTUPTO:
5703 case OP_NOTMINUPTO:
5704 case OP_NOTEXACT:
5705 case OP_NOTPOSSTAR:
5706 case OP_NOTPOSPLUS:
5707 case OP_NOTPOSQUERY:
5708 case OP_NOTPOSUPTO:
5709 case OP_NOTSTARI:
5710 case OP_NOTMINSTARI:
5711 case OP_NOTPLUSI:
5712 case OP_NOTMINPLUSI:
5713 case OP_NOTQUERYI:
5714 case OP_NOTMINQUERYI:
5715 case OP_NOTUPTOI:
5716 case OP_NOTMINUPTOI:
5717 case OP_NOTEXACTI:
5718 case OP_NOTPOSSTARI:
5719 case OP_NOTPOSPLUSI:
5720 case OP_NOTPOSQUERYI:
5721 case OP_NOTPOSUPTOI:
5722 case OP_TYPESTAR:
5723 case OP_TYPEMINSTAR:
5724 case OP_TYPEPLUS:
5725 case OP_TYPEMINPLUS:
5726 case OP_TYPEQUERY:
5727 case OP_TYPEMINQUERY:
5728 case OP_TYPEUPTO:
5729 case OP_TYPEMINUPTO:
5730 case OP_TYPEEXACT:
5731 case OP_TYPEPOSSTAR:
5732 case OP_TYPEPOSPLUS:
5733 case OP_TYPEPOSQUERY:
5734 case OP_TYPEPOSUPTO:
5735 cc = compile_iterator_trypath(common, cc, parent);
5736 break;
5737
5738 case OP_CLASS:
5739 case OP_NCLASS:
5740 if (cc[1 + (32 / sizeof(pcre_uchar))] >= OP_CRSTAR && cc[1 + (32 / sizeof(pcre_uchar))] <= OP_CRMINRANGE)
5741 cc = compile_iterator_trypath(common, cc, parent);
5742 else
5743 cc = compile_char1_trypath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
5744 break;
5745
5746 #if defined SUPPORT_UTF || defined COMPILE_PCRE16
5747 case OP_XCLASS:
5748 if (*(cc + GET(cc, 1)) >= OP_CRSTAR && *(cc + GET(cc, 1)) <= OP_CRMINRANGE)
5749 cc = compile_iterator_trypath(common, cc, parent);
5750 else
5751 cc = compile_char1_trypath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
5752 break;
5753 #endif
5754
5755 case OP_REF:
5756 case OP_REFI:
5757 if (cc[1 + IMM2_SIZE] >= OP_CRSTAR && cc[1 + IMM2_SIZE] <= OP_CRMINRANGE)
5758 cc = compile_ref_iterator_trypath(common, cc, parent);
5759 else
5760 cc = compile_ref_trypath(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE, FALSE);
5761 break;
5762
5763 case OP_RECURSE:
5764 cc = compile_recurse_trypath(common, cc, parent);
5765 break;
5766
5767 case OP_ASSERT:
5768 case OP_ASSERT_NOT:
5769 case OP_ASSERTBACK:
5770 case OP_ASSERTBACK_NOT:
5771 PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc);
5772 cc = compile_assert_trypath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE);
5773 break;
5774
5775 case OP_BRAMINZERO:
5776 PUSH_BACKTRACK_NOVALUE(sizeof(braminzero_backtrack), cc);
5777 cc = bracketend(cc + 1);
5778 if (*(cc - 1 - LINK_SIZE) != OP_KETRMIN)
5779 {
5780 allocate_stack(common, 1);
5781 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5782 }
5783 else
5784 {
5785 allocate_stack(common, 2);
5786 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5787 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), STR_PTR, 0);
5788 }
5789 BACKTRACK_AS(braminzero_backtrack)->trypath = LABEL();
5790 if (cc[1] > OP_ASSERTBACK_NOT)
5791 decrease_call_count(common);
5792 break;
5793
5794 case OP_ONCE:
5795 case OP_ONCE_NC:
5796 case OP_BRA:
5797 case OP_CBRA:
5798 case OP_COND:
5799 case OP_SBRA:
5800 case OP_SCBRA:
5801 case OP_SCOND:
5802 cc = compile_bracket_trypath(common, cc, parent);
5803 break;
5804
5805 case OP_BRAZERO:
5806 if (cc[1] > OP_ASSERTBACK_NOT)
5807 cc = compile_bracket_trypath(common, cc, parent);
5808 else
5809 {
5810 PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc);
5811 cc = compile_assert_trypath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE);
5812 }
5813 break;
5814
5815 case OP_BRAPOS:
5816 case OP_CBRAPOS:
5817 case OP_SBRAPOS:
5818 case OP_SCBRAPOS:
5819 case OP_BRAPOSZERO:
5820 cc = compile_bracketpos_trypath(common, cc, parent);
5821 break;
5822
5823 case OP_MARK:
5824 PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc);
5825 SLJIT_ASSERT(common->mark_ptr != 0);
5826 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
5827 allocate_stack(common, 1);
5828 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5829 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
5830 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_w)(cc + 2));
5831 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr, TMP2, 0);
5832 OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), TMP2, 0);
5833 cc += 1 + 2 + cc[1];
5834 break;
5835
5836 case OP_COMMIT:
5837 PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc);
5838 cc += 1;
5839 break;
5840
5841 case OP_FAIL:
5842 case OP_ACCEPT:
5843 case OP_ASSERT_ACCEPT:
5844 cc = compile_fail_accept_trypath(common, cc, parent);
5845 break;
5846
5847 case OP_CLOSE:
5848 cc = compile_close_trypath(common, cc);
5849 break;
5850
5851 case OP_SKIPZERO:
5852 cc = bracketend(cc + 1);
5853 break;
5854
5855 default:
5856 SLJIT_ASSERT_STOP();
5857 return;
5858 }
5859 if (cc == NULL)
5860 return;
5861 }
5862 SLJIT_ASSERT(cc == ccend);
5863 }
5864
5865 #undef PUSH_BACKTRACK
5866 #undef PUSH_BACKTRACK_NOVALUE
5867 #undef BACKTRACK_AS
5868
5869 #define COMPILE_BACKTRACKPATH(current) \
5870 do \
5871 { \
5872 compile_backtrackpath(common, (current)); \
5873 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
5874 return; \
5875 } \
5876 while (0)
5877
5878 #define CURRENT_AS(type) ((type *)current)
5879
5880 static void compile_iterator_backtrackpath(compiler_common *common, struct backtrack_common *current)
5881 {
5882 DEFINE_COMPILER;
5883 pcre_uchar *cc = current->cc;
5884 pcre_uchar opcode;
5885 pcre_uchar type;
5886 int arg1 = -1, arg2 = -1;
5887 struct sljit_label *label = NULL;
5888 struct sljit_jump *jump = NULL;
5889
5890 cc = get_iterator_parameters(common, cc, &opcode, &type, &arg1, &arg2, NULL);
5891
5892 switch(opcode)
5893 {
5894 case OP_STAR:
5895 case OP_PLUS:
5896 case OP_UPTO:
5897 case OP_CRRANGE:
5898 if (type == OP_ANYNL || type == OP_EXTUNI)
5899 {
5900 set_jumps(current->topbacktracks, LABEL());
5901 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5902 free_stack(common, 1);
5903 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_backtrack)->trypath);
5904 }
5905 else
5906 {
5907 if (opcode == OP_STAR || opcode == OP_UPTO)
5908 arg2 = 0;
5909 else if (opcode == OP_PLUS)
5910 arg2 = 1;
5911 jump = CMP(SLJIT_C_LESS_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, arg2 + 1);
5912 OP2(SLJIT_SUB, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
5913 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5914 skip_char_back(common);
5915 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5916 JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_backtrack)->trypath);
5917 if (opcode == OP_PLUS || opcode == OP_CRRANGE)
5918 set_jumps(current->topbacktracks, LABEL());
5919 JUMPHERE(jump);
5920 free_stack(common, 2);
5921 }
5922 break;
5923
5924 case OP_MINSTAR:
5925 case OP_MINPLUS:
5926 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5927 if (opcode == OP_MINPLUS)
5928 {
5929 set_jumps(current->topbacktracks, LABEL());
5930 current->topbacktracks = NULL;
5931 }
5932 compile_char1_trypath(common, type, cc, &current->topbacktracks);
5933 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5934 JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_backtrack)->trypath);
5935 set_jumps(current->topbacktracks, LABEL());
5936 free_stack(common, 1);
5937 break;
5938
5939 case OP_MINUPTO:
5940 case OP_CRMINRANGE:
5941 if (opcode == OP_CRMINRANGE)
5942 {
5943 set_jumps(current->topbacktracks, LABEL());
5944 current->topbacktracks = NULL;
5945 label = LABEL();
5946 }
5947 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5948 compile_char1_trypath(common, type, cc, &current->topbacktracks);
5949
5950 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5951 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5952 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
5953 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5954
5955 if (opcode == OP_CRMINRANGE)
5956 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg2 + 1, label);
5957
5958 if (opcode == OP_CRMINRANGE && arg1 == 0)
5959 JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_backtrack)->trypath);
5960 else
5961 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg1 + 2, CURRENT_AS(iterator_backtrack)->trypath);
5962
5963 set_jumps(current->topbacktracks, LABEL());
5964 free_stack(common, 2);
5965 break;
5966
5967 case OP_QUERY:
5968 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5969 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5970 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_backtrack)->trypath);
5971 jump = JUMP(SLJIT_JUMP);
5972 set_jumps(current->topbacktracks, LABEL());
5973 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5974 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5975 JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_backtrack)->trypath);
5976 JUMPHERE(jump);
5977 free_stack(common, 1);
5978 break;
5979
5980 case OP_MINQUERY:
5981 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5982 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5983 jump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
5984 compile_char1_trypath(common, type, cc, &current->topbacktracks);
5985 JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_backtrack)->trypath);
5986 set_jumps(current->topbacktracks, LABEL());
5987 JUMPHERE(jump);
5988 free_stack(common, 1);
5989 break;
5990
5991 case OP_EXACT:
5992 case OP_POSPLUS:
5993 set_jumps(current->topbacktracks, LABEL());
5994 break;
5995
5996 case OP_POSSTAR:
5997 case OP_POSQUERY:
5998 case OP_POSUPTO:
5999 break;
6000
6001 default:
6002 SLJIT_ASSERT_STOP();
6003 break;
6004 }
6005 }
6006
6007 static void compile_ref_iterator_backtrackpath(compiler_common *common, struct backtrack_common *current)
6008 {
6009 DEFINE_COMPILER;
6010 pcre_uchar *cc = current->cc;
6011 pcre_uchar type;
6012
6013 type = cc[1 + IMM2_SIZE];
6014 if ((type & 0x1) == 0)
6015 {
6016 set_jumps(current->topbacktracks, LABEL());
6017 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6018 free_stack(common, 1);
6019 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_backtrack)->trypath);
6020 return;
6021 }
6022
6023 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6024 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_backtrack)->trypath);
6025 set_jumps(current->topbacktracks, LABEL());
6026 free_stack(common, 2);
6027 }
6028
6029 static void compile_recurse_backtrackpath(compiler_common *common, struct backtrack_common *current)
6030 {
6031 DEFINE_COMPILER;
6032
6033 set_jumps(current->topbacktracks, LABEL());
6034
6035 if (common->has_set_som && common->mark_ptr != 0)
6036 {
6037 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6038 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
6039 free_stack(common, 2);
6040 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), TMP2, 0);
6041 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr, TMP1, 0);
6042 }
6043 else if (common->has_set_som || common->mark_ptr != 0)
6044 {
6045 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6046 free_stack(common, 1);
6047 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->has_set_som ? (int)(OVECTOR(0)) : common->mark_ptr, TMP2, 0);
6048 }
6049 }
6050
6051 static void compile_assert_backtrackpath(compiler_common *common, struct backtrack_common *current)
6052 {
6053 DEFINE_COMPILER;
6054 pcre_uchar *cc = current->cc;
6055 pcre_uchar bra = OP_BRA;
6056 struct sljit_jump *brajump = NULL;
6057
6058 SLJIT_ASSERT(*cc != OP_BRAMINZERO);
6059 if (*cc == OP_BRAZERO)
6060 {
6061 bra = *cc;
6062 cc++;
6063 }
6064
6065 if (bra == OP_BRAZERO)
6066 {
6067 SLJIT_ASSERT(current->topbacktracks == NULL);
6068 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6069 }
6070
6071 if (CURRENT_AS(assert_backtrack)->framesize < 0)
6072 {
6073 set_jumps(current->topbacktracks, LABEL());
6074
6075 if (bra == OP_BRAZERO)
6076 {
6077 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6078 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_backtrack)->trypath);
6079 free_stack(common, 1);
6080 }
6081 return;
6082 }
6083
6084 if (bra == OP_BRAZERO)
6085 {
6086 if (*cc == OP_ASSERT_NOT || *cc == OP_ASSERTBACK_NOT)
6087 {
6088 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6089 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_backtrack)->trypath);
6090 free_stack(common, 1);
6091 return;
6092 }
6093 free_stack(common, 1);
6094 brajump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
6095 }
6096
6097 if (*cc == OP_ASSERT || *cc == OP_ASSERTBACK)
6098 {
6099 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), CURRENT_AS(assert_backtrack)->localptr);
6100 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6101 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), CURRENT_AS(assert_backtrack)->localptr, SLJIT_MEM1(STACK_TOP), CURRENT_AS(assert_backtrack)->framesize * sizeof(sljit_w));
6102
6103 set_jumps(current->topbacktracks, LABEL());
6104 }
6105 else
6106 set_jumps(current->topbacktracks, LABEL());
6107
6108 if (bra == OP_BRAZERO)
6109 {
6110 /* We know there is enough place on the stack. */
6111 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
6112 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6113 JUMPTO(SLJIT_JUMP, CURRENT_AS(assert_backtrack)->trypath);
6114 JUMPHERE(brajump);
6115 }
6116 }
6117
6118 static void compile_bracket_backtrackpath(compiler_common *common, struct backtrack_common *current)
6119 {
6120 DEFINE_COMPILER;
6121 int opcode;
6122 int offset = 0;
6123 int localptr = CURRENT_AS(bracket_backtrack)->localptr;
6124 int stacksize;
6125 int count;
6126 pcre_uchar *cc = current->cc;
6127 pcre_uchar *ccbegin;
6128 pcre_uchar *ccprev;
6129 jump_list *jumplist = NULL;
6130 jump_list *jumplistitem = NULL;
6131 pcre_uchar bra = OP_BRA;
6132 pcre_uchar ket;
6133 assert_backtrack *assert;
6134 BOOL has_alternatives;
6135 struct sljit_jump *brazero = NULL;
6136 struct sljit_jump *once = NULL;
6137 struct sljit_jump *cond = NULL;
6138 struct sljit_label *rminlabel = NULL;
6139
6140 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
6141 {
6142 bra = *cc;
6143 cc++;
6144 }
6145
6146 opcode = *cc;
6147 ccbegin = cc;
6148 ket = *(bracketend(ccbegin) - 1 - LINK_SIZE);
6149 cc += GET(cc, 1);
6150 has_alternatives = *cc == OP_ALT;
6151 if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
6152 has_alternatives = (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT) || CURRENT_AS(bracket_backtrack)->u.condfailed != NULL;
6153 if (opcode == OP_CBRA || opcode == OP_SCBRA)
6154 offset = (GET2(ccbegin, 1 + LINK_SIZE)) << 1;
6155 if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
6156 opcode = OP_SCOND;
6157 if (SLJIT_UNLIKELY(opcode == OP_ONCE_NC))
6158 opcode = OP_ONCE;
6159
6160 if (ket == OP_KETRMAX)
6161 {
6162 if (bra == OP_BRAZERO)
6163 {
6164 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6165 free_stack(common, 1);
6166 brazero = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0);
6167 }
6168 }
6169 else if (ket == OP_KETRMIN)
6170 {
6171 if (bra != OP_BRAMINZERO)
6172 {
6173 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6174 if (opcode >= OP_SBRA || opcode == OP_ONCE)
6175 {
6176 /* Checking zero-length iteration. */
6177 if (opcode != OP_ONCE || CURRENT_AS(bracket_backtrack)->u.framesize < 0)
6178 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, CURRENT_AS(bracket_backtrack)->recursivetrypath);
6179 else
6180 {
6181 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
6182 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), (CURRENT_AS(bracket_backtrack)->u.framesize + 1) * sizeof(sljit_w), CURRENT_AS(bracket_backtrack)->recursivetrypath);
6183 }
6184 if (opcode != OP_ONCE)
6185 free_stack(common, 1);
6186 }
6187 else
6188 JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->recursivetrypath);
6189 }
6190 rminlabel = LABEL();
6191 }
6192 else if (bra == OP_BRAZERO)
6193 {
6194 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6195 free_stack(common, 1);
6196 brazero = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
6197 }
6198
6199 if (SLJIT_UNLIKELY(opcode == OP_ONCE))
6200 {
6201 if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
6202 {
6203 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
6204 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6205 }
6206 once = JUMP(SLJIT_JUMP);
6207 }
6208 else if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
6209 {
6210 if (has_alternatives)
6211 {
6212 /* Always exactly one alternative. */
6213 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6214 free_stack(common, 1);
6215
6216 jumplistitem = sljit_alloc_memory(compiler, sizeof(jump_list));
6217 if (SLJIT_UNLIKELY(!jumplistitem))
6218 return;
6219 jumplist = jumplistitem;
6220 jumplistitem->next = NULL;
6221 jumplistitem->jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 1);
6222 }
6223 }
6224 else if (*cc == OP_ALT)
6225 {
6226 /* Build a jump list. Get the last successfully matched branch index. */
6227 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6228 free_stack(common, 1);
6229 count = 1;
6230 do
6231 {
6232 /* Append as the last item. */
6233 if (jumplist != NULL)
6234 {
6235 jumplistitem->next = sljit_alloc_memory(compiler, sizeof(jump_list));
6236 jumplistitem = jumplistitem->next;
6237 }
6238 else
6239 {
6240 jumplistitem = sljit_alloc_memory(compiler, sizeof(jump_list));
6241 jumplist = jumplistitem;
6242 }
6243
6244 if (SLJIT_UNLIKELY(!jumplistitem))
6245 return;
6246
6247 jumplistitem->next = NULL;
6248 jumplistitem->jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, count++);
6249 cc += GET(cc, 1);
6250 }
6251 while (*cc == OP_ALT);
6252
6253 cc = ccbegin + GET(ccbegin, 1);
6254 }
6255
6256 COMPILE_BACKTRACKPATH(current->top);
6257 if (current->topbacktracks)
6258 set_jumps(current->topbacktracks, LABEL());
6259
6260 if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
6261 {
6262 /* Conditional block always has at most one alternative. */
6263 if (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT)
6264 {
6265 SLJIT_ASSERT(has_alternatives);
6266 assert = CURRENT_AS(bracket_backtrack)->u.assert;
6267 if (assert->framesize >= 0 && (ccbegin[1 + LINK_SIZE] == OP_ASSERT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK))
6268 {
6269 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), assert->localptr);
6270 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6271 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), assert->localptr, SLJIT_MEM1(STACK_TOP), assert->framesize * sizeof(sljit_w));
6272 }
6273 cond = JUMP(SLJIT_JUMP);
6274 set_jumps(CURRENT_AS(bracket_backtrack)->u.assert->condfailed, LABEL());
6275 }
6276 else if (CURRENT_AS(bracket_backtrack)->u.condfailed != NULL)
6277 {
6278 SLJIT_ASSERT(has_alternatives);
6279 cond = JUMP(SLJIT_JUMP);
6280 set_jumps(CURRENT_AS(bracket_backtrack)->u.condfailed, LABEL());
6281 }
6282 else
6283 SLJIT_ASSERT(!has_alternatives);
6284 }
6285
6286 if (has_alternatives)
6287 {
6288 count = 1;
6289 do
6290 {
6291 current->top = NULL;
6292 current->topbacktracks = NULL;
6293 current->nextbacktracks = NULL;
6294 if (*cc == OP_ALT)
6295 {
6296 ccprev = cc + 1 + LINK_SIZE;
6297 cc += GET(cc, 1);
6298 if (opcode != OP_COND && opcode != OP_SCOND)
6299 {
6300 if (localptr != 0 && opcode != OP_ONCE)
6301 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
6302 else
6303 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6304 }
6305 compile_trypath(common, ccprev, cc, current);
6306 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6307 return;
6308 }
6309
6310 /* Instructions after the current alternative is succesfully matched. */
6311 /* There is a similar code in compile_bracket_trypath. */
6312 if (opcode == OP_ONCE)
6313 {
6314 if (CURRENT_AS(bracket_backtrack)->u.framesize < 0)
6315 {
6316 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
6317 /* TMP2 which is set here used by OP_KETRMAX below. */
6318 if (ket == OP_KETRMAX)
6319 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), 0);
6320 else if (ket == OP_KETRMIN)
6321 {
6322 /* Move the STR_PTR to the localptr. */
6323 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_MEM1(STACK_TOP), 0);
6324 }
6325 }
6326 else
6327 {
6328 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_IMM, (CURRENT_AS(bracket_backtrack)->u.framesize + 2) * sizeof(sljit_w));
6329 if (ket == OP_KETRMAX)
6330 {
6331 /* TMP2 which is set here used by OP_KETRMAX below. */
6332 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6333 }
6334 }
6335 }
6336
6337 stacksize = 0;
6338 if (opcode != OP_ONCE)
6339 stacksize++;
6340 if (ket != OP_KET || bra != OP_BRA)
6341 stacksize++;
6342
6343 if (stacksize > 0) {
6344 if (opcode != OP_ONCE || CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
6345 allocate_stack(common, stacksize);
6346 else
6347 {
6348 /* We know we have place at least for one item on the top of the stack. */
6349 SLJIT_ASSERT(stacksize == 1);
6350 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
6351 }
6352 }
6353
6354 stacksize = 0;
6355 if (ket != OP_KET || bra != OP_BRA)
6356 {
6357 if (ket != OP_KET)
6358 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
6359 else
6360 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
6361 stacksize++;
6362 }
6363
6364 if (opcode != OP_ONCE)
6365 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, count++);
6366
6367 if (offset != 0)
6368 {
6369 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
6370 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
6371 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 0), TMP1, 0);
6372 }
6373
6374 JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->alttrypath);
6375
6376 if (opcode != OP_ONCE)
6377 {
6378 SLJIT_ASSERT(jumplist);
6379 JUMPHERE(jumplist->jump);
6380 jumplist = jumplist->next;
6381 }
6382
6383 COMPILE_BACKTRACKPATH(current->top);
6384 if (current->topbacktracks)
6385 set_jumps(current->topbacktracks, LABEL());
6386 SLJIT_ASSERT(!current->nextbacktracks);
6387 }
6388 while (*cc == OP_ALT);
6389 SLJIT_ASSERT(!jumplist);
6390
6391 if (cond != NULL)
6392 {
6393 SLJIT_ASSERT(opcode == OP_COND || opcode == OP_SCOND);
6394 assert = CURRENT_AS(bracket_backtrack)->u.assert;
6395 if ((ccbegin[1 + LINK_SIZE] == OP_ASSERT_NOT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK_NOT) && assert->framesize >= 0)
6396
6397 {
6398 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), assert->localptr);
6399 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6400 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), assert->localptr, SLJIT_MEM1(STACK_TOP), assert->framesize * sizeof(sljit_w));
6401 }
6402 JUMPHERE(cond);
6403 }
6404
6405 /* Free the STR_PTR. */
6406 if (localptr == 0)
6407 free_stack(common, 1);
6408 }
6409
6410 if (offset != 0)
6411 {
6412 /* Using both tmp register is better for instruction scheduling. */
6413 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6414 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
6415 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
6416 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP2, 0);
6417 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_MEM1(STACK_TOP), STACK(2));
6418 free_stack(common, 3);
6419 }
6420 else if (opcode == OP_SBRA || opcode == OP_SCOND)
6421 {
6422 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_MEM1(STACK_TOP), STACK(0));
6423 free_stack(common, 1);
6424 }
6425 else if (opcode == OP_ONCE)
6426 {
6427 cc = ccbegin + GET(ccbegin, 1);
6428 if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
6429 {
6430 /* Reset head and drop saved frame. */
6431 stacksize = (ket == OP_KETRMAX || ket == OP_KETRMIN || *cc == OP_ALT) ? 2 : 1;
6432 free_stack(common, CURRENT_AS(bracket_backtrack)->u.framesize + stacksize);
6433 }
6434 else if (ket == OP_KETRMAX || (*cc == OP_ALT && ket != OP_KETRMIN))
6435 {
6436 /* The STR_PTR must be released. */
6437 free_stack(common, 1);
6438 }
6439
6440 JUMPHERE(once);
6441 /* Restore previous localptr */
6442 if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
6443 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_MEM1(STACK_TOP), CURRENT_AS(bracket_backtrack)->u.framesize * sizeof(sljit_w));
6444 else if (ket == OP_KETRMIN)
6445 {
6446 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
6447 /* See the comment below. */
6448 free_stack(common, 2);
6449 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, TMP1, 0);
6450 }
6451 }
6452
6453 if (ket == OP_KETRMAX)
6454 {
6455 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6456 if (bra != OP_BRAZERO)
6457 free_stack(common, 1);
6458 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(bracket_backtrack)->recursivetrypath);
6459 if (bra == OP_BRAZERO)
6460 {
6461 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
6462 JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->zerotrypath);
6463 JUMPHERE(brazero);
6464 free_stack(common, 1);
6465 }
6466 }
6467 else if (ket == OP_KETRMIN)
6468 {
6469 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6470
6471 /* OP_ONCE removes everything in case of a backtrack, so we don't
6472 need to explicitly release the STR_PTR. The extra release would
6473 affect badly the free_stack(2) above. */
6474 if (opcode != OP_ONCE)
6475 free_stack(common, 1);
6476 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, rminlabel);
6477 if (opcode == OP_ONCE)
6478 free_stack(common, bra == OP_BRAMINZERO ? 2 : 1);
6479 else if (bra == OP_BRAMINZERO)
6480 free_stack(common, 1);
6481 }
6482 else if (bra == OP_BRAZERO)
6483 {
6484 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6485 JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->zerotrypath);
6486 JUMPHERE(brazero);
6487 }
6488 }
6489
6490 static void compile_bracketpos_backtrackpath(compiler_common *common, struct backtrack_common *current)
6491 {
6492 DEFINE_COMPILER;
6493 int offset;
6494 struct sljit_jump *jump;
6495
6496 if (CURRENT_AS(bracketpos_backtrack)->framesize < 0)
6497 {
6498 if (*current->cc == OP_CBRAPOS || *current->cc == OP_SCBRAPOS)
6499 {
6500 offset = (GET2(current->cc, 1 + LINK_SIZE)) << 1;
6501 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6502 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
6503 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
6504 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP2, 0);
6505 }
6506 set_jumps(current->topbacktracks, LABEL());
6507 free_stack(common, CURRENT_AS(bracketpos_backtrack)->stacksize);
6508 return;
6509 }
6510
6511 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), CURRENT_AS(bracketpos_backtrack)->localptr);
6512 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6513
6514 if (current->topbacktracks)
6515 {
6516 jump = JUMP(SLJIT_JUMP);
6517 set_jumps(current->topbacktracks, LABEL());
6518 /* Drop the stack frame. */
6519 free_stack(common, CURRENT_AS(bracketpos_backtrack)->stacksize);
6520 JUMPHERE(jump);
6521 }
6522 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), CURRENT_AS(bracketpos_backtrack)->localptr, SLJIT_MEM1(STACK_TOP), CURRENT_AS(bracketpos_backtrack)->framesize * sizeof(sljit_w));
6523 }
6524
6525 static void compile_braminzero_backtrackpath(compiler_common *common, struct backtrack_common *current)
6526 {
6527 assert_backtrack backtrack;
6528
6529 current->top = NULL;
6530 current->topbacktracks = NULL;
6531 current->nextbacktracks = NULL;
6532 if (current->cc[1] > OP_ASSERTBACK_NOT)
6533 {
6534 /* Manual call of compile_bracket_trypath and compile_bracket_backtrackpath. */
6535 compile_bracket_trypath(common, current->cc, current);
6536 compile_bracket_backtrackpath(common, current->top);
6537 }
6538 else
6539 {
6540 memset(&backtrack, 0, sizeof(backtrack));
6541 backtrack.common.cc = current->cc;
6542 backtrack.trypath = CURRENT_AS(braminzero_backtrack)->trypath;
6543 /* Manual call of compile_assert_trypath. */
6544 compile_assert_trypath(common, current->cc, &backtrack, FALSE);
6545 }
6546 SLJIT_ASSERT(!current->nextbacktracks && !current->topbacktracks);
6547 }
6548
6549 static void compile_backtrackpath(compiler_common *common, struct backtrack_common *current)
6550 {
6551 DEFINE_COMPILER;
6552
6553 while (current)
6554 {
6555 if (current->nextbacktracks != NULL)
6556 set_jumps(current->nextbacktracks, LABEL());
6557 switch(*current->cc)
6558 {
6559 case OP_SET_SOM:
6560 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6561 free_stack(common, 1);
6562 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), TMP1, 0);
6563 break;
6564
6565 case OP_STAR:
6566 case OP_MINSTAR:
6567 case OP_PLUS:
6568 case OP_MINPLUS:
6569 case OP_QUERY:
6570 case OP_MINQUERY:
6571 case OP_UPTO:
6572 case OP_MINUPTO:
6573 case OP_EXACT:
6574 case OP_POSSTAR:
6575 case OP_POSPLUS:
6576 case OP_POSQUERY:
6577 case OP_POSUPTO:
6578 case OP_STARI:
6579 case OP_MINSTARI:
6580 case OP_PLUSI:
6581 case OP_MINPLUSI:
6582 case OP_QUERYI:
6583 case OP_MINQUERYI:
6584 case OP_UPTOI:
6585 case OP_MINUPTOI:
6586 case OP_EXACTI:
6587 case OP_POSSTARI:
6588 case OP_POSPLUSI:
6589 case OP_POSQUERYI:
6590 case OP_POSUPTOI:
6591 case OP_NOTSTAR:
6592 case OP_NOTMINSTAR:
6593 case OP_NOTPLUS:
6594 case OP_NOTMINPLUS:
6595 case OP_NOTQUERY:
6596 case OP_NOTMINQUERY:
6597 case OP_NOTUPTO:
6598 case OP_NOTMINUPTO:
6599 case OP_NOTEXACT:
6600 case OP_NOTPOSSTAR:
6601 case OP_NOTPOSPLUS:
6602 case OP_NOTPOSQUERY:
6603 case OP_NOTPOSUPTO:
6604 case OP_NOTSTARI:
6605 case OP_NOTMINSTARI:
6606 case OP_NOTPLUSI:
6607 case OP_NOTMINPLUSI:
6608 case OP_NOTQUERYI:
6609 case OP_NOTMINQUERYI:
6610 case OP_NOTUPTOI:
6611 case OP_NOTMINUPTOI:
6612 case OP_NOTEXACTI:
6613 case OP_NOTPOSSTARI:
6614 case OP_NOTPOSPLUSI:
6615 case OP_NOTPOSQUERYI:
6616 case OP_NOTPOSUPTOI:
6617 case OP_TYPESTAR:
6618 case OP_TYPEMINSTAR:
6619 case OP_TYPEPLUS:
6620 case OP_TYPEMINPLUS:
6621 case OP_TYPEQUERY:
6622 case OP_TYPEMINQUERY:
6623 case OP_TYPEUPTO:
6624 case OP_TYPEMINUPTO:
6625 case OP_TYPEEXACT:
6626 case OP_TYPEPOSSTAR:
6627 case OP_TYPEPOSPLUS:
6628 case OP_TYPEPOSQUERY:
6629 case OP_TYPEPOSUPTO:
6630 case OP_CLASS:
6631 case OP_NCLASS:
6632 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
6633 case OP_XCLASS:
6634 #endif
6635 compile_iterator_backtrackpath(common, current);
6636 break;
6637
6638 case OP_REF:
6639 case OP_REFI:
6640 compile_ref_iterator_backtrackpath(common, current);
6641 break;
6642
6643 case OP_RECURSE:
6644 compile_recurse_backtrackpath(common, current);
6645 break;
6646
6647 case OP_ASSERT:
6648 case OP_ASSERT_NOT:
6649 case OP_ASSERTBACK:
6650 case OP_ASSERTBACK_NOT:
6651 compile_assert_backtrackpath(common, current);
6652 break;
6653
6654 case OP_ONCE:
6655 case OP_ONCE_NC:
6656 case OP_BRA:
6657 case OP_CBRA:
6658 case OP_COND:
6659 case OP_SBRA:
6660 case OP_SCBRA:
6661 case OP_SCOND:
6662 compile_bracket_backtrackpath(common, current);
6663 break;
6664
6665 case OP_BRAZERO:
6666 if (current->cc[1] > OP_ASSERTBACK_NOT)
6667 compile_bracket_backtrackpath(common, current);
6668 else
6669 compile_assert_backtrackpath(common, current);
6670 break;
6671
6672 case OP_BRAPOS:
6673 case OP_CBRAPOS:
6674 case OP_SBRAPOS:
6675 case OP_SCBRAPOS:
6676 case OP_BRAPOSZERO:
6677 compile_bracketpos_backtrackpath(common, current);
6678 break;
6679
6680 case OP_BRAMINZERO:
6681 compile_braminzero_backtrackpath(common, current);
6682 break;
6683
6684 case OP_MARK:
6685 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6686 free_stack(common, 1);
6687 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr, TMP1, 0);
6688 break;
6689
6690 case OP_COMMIT:
6691 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_NOMATCH);
6692 if (common->leavelabel == NULL)
6693 add_jump(compiler, &common->leave, JUMP(SLJIT_JUMP));
6694 else
6695 JUMPTO(SLJIT_JUMP, common->leavelabel);
6696 break;
6697
6698 case OP_FAIL:
6699 case OP_ACCEPT:
6700 case OP_ASSERT_ACCEPT:
6701 set_jumps(current->topbacktracks, LABEL());
6702 break;
6703
6704 default:
6705 SLJIT_ASSERT_STOP();
6706 break;
6707 }
6708 current = current->prev;
6709 }
6710 }
6711
6712 static SLJIT_INLINE void compile_recurse(compiler_common *common)
6713 {
6714 DEFINE_COMPILER;
6715 pcre_uchar *cc = common->start + common->currententry->start;
6716 pcre_uchar *ccbegin = cc + 1 + LINK_SIZE + (*cc == OP_BRA ? 0 : IMM2_SIZE);
6717 pcre_uchar *ccend = bracketend(cc);
6718 int localsize = get_localsize(common, ccbegin, ccend);
6719 int framesize = get_framesize(common, cc, TRUE);
6720 int alternativesize;
6721 BOOL needsframe;
6722 backtrack_common altbacktrack;
6723 struct sljit_label *save_leavelabel = common->leavelabel;
6724 jump_list *save_leave = common->leave;
6725 struct sljit_jump *jump;
6726
6727 SLJIT_ASSERT(*cc == OP_BRA || *cc == OP_CBRA || *cc == OP_CBRAPOS || *cc == OP_SCBRA || *cc == OP_SCBRAPOS);
6728 needsframe = framesize >= 0;
6729 if (!needsframe)
6730 framesize = 0;
6731 alternativesize = *(cc + GET(cc, 1)) == OP_ALT ? 1 : 0;
6732
6733 SLJIT_ASSERT(common->currententry->entry == NULL && common->recursive_head != 0);
6734 common->currententry->entry = LABEL();
6735 set_jumps(common->currententry->calls, common->currententry->entry);
6736
6737 sljit_emit_fast_enter(compiler, TMP2, 0);
6738 allocate_stack(common, localsize + framesize + alternativesize);
6739 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(localsize + framesize + alternativesize - 1), TMP2, 0);
6740