/[pcre]/code/trunk/pcre_jit_compile.c
ViewVC logotype

Contents of /code/trunk/pcre_jit_compile.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 987 - (show annotations)
Sat Jul 7 04:11:29 2012 UTC (7 years, 4 months ago) by zherczeg
File MIME type: text/plain
File size: 244686 byte(s)
Error occurred while calculating annotation data.
Supporting IBM XL C compilers for PPC architectures in the JIT compiler.
1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Written by Philip Hazel
9 Copyright (c) 1997-2012 University of Cambridge
10
11 The machine code generator part (this module) was written by Zoltan Herczeg
12 Copyright (c) 2010-2012
13
14 -----------------------------------------------------------------------------
15 Redistribution and use in source and binary forms, with or without
16 modification, are permitted provided that the following conditions are met:
17
18 * Redistributions of source code must retain the above copyright notice,
19 this list of conditions and the following disclaimer.
20
21 * Redistributions in binary form must reproduce the above copyright
22 notice, this list of conditions and the following disclaimer in the
23 documentation and/or other materials provided with the distribution.
24
25 * Neither the name of the University of Cambridge nor the names of its
26 contributors may be used to endorse or promote products derived from
27 this software without specific prior written permission.
28
29 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 POSSIBILITY OF SUCH DAMAGE.
40 -----------------------------------------------------------------------------
41 */
42
43 #ifdef HAVE_CONFIG_H
44 #include "config.h"
45 #endif
46
47 #include "pcre_internal.h"
48
49 #ifdef SUPPORT_JIT
50
51 /* All-in-one: Since we use the JIT compiler only from here,
52 we just include it. This way we don't need to touch the build
53 system files. */
54
55 #define SLJIT_MALLOC(size) (PUBL(malloc))(size)
56 #define SLJIT_FREE(ptr) (PUBL(free))(ptr)
57 #define SLJIT_CONFIG_AUTO 1
58 #define SLJIT_CONFIG_STATIC 1
59 #define SLJIT_VERBOSE 0
60 #define SLJIT_DEBUG 0
61
62 #include "sljit/sljitLir.c"
63
64 #if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED
65 #error Unsupported architecture
66 #endif
67
68 /* Allocate memory on the stack. Fast, but limited size. */
69 #define LOCAL_SPACE_SIZE 32768
70
71 #define STACK_GROWTH_RATE 8192
72
73 /* Enable to check that the allocation could destroy temporaries. */
74 #if defined SLJIT_DEBUG && SLJIT_DEBUG
75 #define DESTROY_REGISTERS 1
76 #endif
77
78 /*
79 Short summary about the backtracking mechanism empolyed by the jit code generator:
80
81 The code generator follows the recursive nature of the PERL compatible regular
82 expressions. The basic blocks of regular expressions are condition checkers
83 whose execute different commands depending on the result of the condition check.
84 The relationship between the operators can be horizontal (concatenation) and
85 vertical (sub-expression) (See struct backtrack_common for more details).
86
87 'ab' - 'a' and 'b' regexps are concatenated
88 'a+' - 'a' is the sub-expression of the '+' operator
89
90 The condition checkers are boolean (true/false) checkers. Machine code is generated
91 for the checker itself and for the actions depending on the result of the checker.
92 The 'true' case is called as the try path (expected path), and the other is called as
93 the 'backtrack' path. Branch instructions are expesive for all CPUs, so we avoid taken
94 branches on the try path.
95
96 Greedy star operator (*) :
97 Try path: match happens.
98 Backtrack path: match failed.
99 Non-greedy star operator (*?) :
100 Try path: no need to perform a match.
101 Backtrack path: match is required.
102
103 The following example shows how the code generated for a capturing bracket
104 with two alternatives. Let A, B, C, D are arbirary regular expressions, and
105 we have the following regular expression:
106
107 A(B|C)D
108
109 The generated code will be the following:
110
111 A try path
112 '(' try path (pushing arguments to the stack)
113 B try path
114 ')' try path (pushing arguments to the stack)
115 D try path
116 return with successful match
117
118 D backtrack path
119 ')' backtrack path (If we arrived from "C" jump to the backtrack of "C")
120 B backtrack path
121 C expected path
122 jump to D try path
123 C backtrack path
124 A backtrack path
125
126 Notice, that the order of backtrack code paths are the opposite of the fast
127 code paths. In this way the topmost value on the stack is always belong
128 to the current backtrack code path. The backtrack path must check
129 whether there is a next alternative. If so, it needs to jump back to
130 the try path eventually. Otherwise it needs to clear out its own stack
131 frame and continue the execution on the backtrack code paths.
132 */
133
134 /*
135 Saved stack frames:
136
137 Atomic blocks and asserts require reloading the values of local variables
138 when the backtrack mechanism performed. Because of OP_RECURSE, the locals
139 are not necessarly known in compile time, thus we need a dynamic restore
140 mechanism.
141
142 The stack frames are stored in a chain list, and have the following format:
143 ([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]
144
145 Thus we can restore the locals to a particular point in the stack.
146 */
147
148 typedef struct jit_arguments {
149 /* Pointers first. */
150 struct sljit_stack *stack;
151 const pcre_uchar *str;
152 const pcre_uchar *begin;
153 const pcre_uchar *end;
154 int *offsets;
155 pcre_uchar *uchar_ptr;
156 pcre_uchar *mark_ptr;
157 /* Everything else after. */
158 int offsetcount;
159 int calllimit;
160 pcre_uint8 notbol;
161 pcre_uint8 noteol;
162 pcre_uint8 notempty;
163 pcre_uint8 notempty_atstart;
164 } jit_arguments;
165
166 typedef struct executable_functions {
167 void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES];
168 PUBL(jit_callback) callback;
169 void *userdata;
170 sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];
171 } executable_functions;
172
173 typedef struct jump_list {
174 struct sljit_jump *jump;
175 struct jump_list *next;
176 } jump_list;
177
178 enum stub_types { stack_alloc };
179
180 typedef struct stub_list {
181 enum stub_types type;
182 int data;
183 struct sljit_jump *start;
184 struct sljit_label *leave;
185 struct stub_list *next;
186 } stub_list;
187
188 typedef int (SLJIT_CALL *jit_function)(jit_arguments *args);
189
190 /* The following structure is the key data type for the recursive
191 code generator. It is allocated by compile_trypath, and contains
192 the aguments for compile_backtrackpath. Must be the first member
193 of its descendants. */
194 typedef struct backtrack_common {
195 /* Concatenation stack. */
196 struct backtrack_common *prev;
197 jump_list *nextbacktracks;
198 /* Internal stack (for component operators). */
199 struct backtrack_common *top;
200 jump_list *topbacktracks;
201 /* Opcode pointer. */
202 pcre_uchar *cc;
203 } backtrack_common;
204
205 typedef struct assert_backtrack {
206 backtrack_common common;
207 jump_list *condfailed;
208 /* Less than 0 (-1) if a frame is not needed. */
209 int framesize;
210 /* Points to our private memory word on the stack. */
211 int localptr;
212 /* For iterators. */
213 struct sljit_label *trypath;
214 } assert_backtrack;
215
216 typedef struct bracket_backtrack {
217 backtrack_common common;
218 /* Where to coninue if an alternative is successfully matched. */
219 struct sljit_label *alttrypath;
220 /* For rmin and rmax iterators. */
221 struct sljit_label *recursivetrypath;
222 /* For greedy ? operator. */
223 struct sljit_label *zerotrypath;
224 /* Contains the branches of a failed condition. */
225 union {
226 /* Both for OP_COND, OP_SCOND. */
227 jump_list *condfailed;
228 assert_backtrack *assert;
229 /* For OP_ONCE. -1 if not needed. */
230 int framesize;
231 } u;
232 /* Points to our private memory word on the stack. */
233 int localptr;
234 } bracket_backtrack;
235
236 typedef struct bracketpos_backtrack {
237 backtrack_common common;
238 /* Points to our private memory word on the stack. */
239 int localptr;
240 /* Reverting stack is needed. */
241 int framesize;
242 /* Allocated stack size. */
243 int stacksize;
244 } bracketpos_backtrack;
245
246 typedef struct braminzero_backtrack {
247 backtrack_common common;
248 struct sljit_label *trypath;
249 } braminzero_backtrack;
250
251 typedef struct iterator_backtrack {
252 backtrack_common common;
253 /* Next iteration. */
254 struct sljit_label *trypath;
255 } iterator_backtrack;
256
257 typedef struct recurse_entry {
258 struct recurse_entry *next;
259 /* Contains the function entry. */
260 struct sljit_label *entry;
261 /* Collects the calls until the function is not created. */
262 jump_list *calls;
263 /* Points to the starting opcode. */
264 int start;
265 } recurse_entry;
266
267 typedef struct recurse_backtrack {
268 backtrack_common common;
269 } recurse_backtrack;
270
271 typedef struct compiler_common {
272 struct sljit_compiler *compiler;
273 pcre_uchar *start;
274
275 /* Opcode local area direct map. */
276 int *localptrs;
277 int cbraptr;
278 /* OVector starting point. Must be divisible by 2. */
279 int ovector_start;
280 /* Last known position of the requested byte. */
281 int req_char_ptr;
282 /* Head of the last recursion. */
283 int recursive_head;
284 /* First inspected character for partial matching. */
285 int start_used_ptr;
286 /* Starting pointer for partial soft matches. */
287 int hit_start;
288 /* End pointer of the first line. */
289 int first_line_end;
290 /* Points to the marked string. */
291 int mark_ptr;
292
293 /* Other */
294 const pcre_uint8 *fcc;
295 sljit_w lcc;
296 int mode;
297 int nltype;
298 int newline;
299 int bsr_nltype;
300 int endonly;
301 BOOL has_set_som;
302 sljit_w ctypes;
303 sljit_uw name_table;
304 sljit_w name_count;
305 sljit_w name_entry_size;
306
307 /* Labels and jump lists. */
308 struct sljit_label *partialmatchlabel;
309 struct sljit_label *leavelabel;
310 struct sljit_label *acceptlabel;
311 stub_list *stubs;
312 recurse_entry *entries;
313 recurse_entry *currententry;
314 jump_list *partialmatch;
315 jump_list *leave;
316 jump_list *accept;
317 jump_list *calllimit;
318 jump_list *stackalloc;
319 jump_list *revertframes;
320 jump_list *wordboundary;
321 jump_list *anynewline;
322 jump_list *hspace;
323 jump_list *vspace;
324 jump_list *casefulcmp;
325 jump_list *caselesscmp;
326 BOOL jscript_compat;
327 #ifdef SUPPORT_UTF
328 BOOL utf;
329 #ifdef SUPPORT_UCP
330 BOOL use_ucp;
331 #endif
332 jump_list *utfreadchar;
333 #ifdef COMPILE_PCRE8
334 jump_list *utfreadtype8;
335 #endif
336 #endif /* SUPPORT_UTF */
337 #ifdef SUPPORT_UCP
338 jump_list *getucd;
339 #endif
340 } compiler_common;
341
342 /* For byte_sequence_compare. */
343
344 typedef struct compare_context {
345 int length;
346 int sourcereg;
347 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
348 int ucharptr;
349 union {
350 sljit_i asint;
351 sljit_uh asushort;
352 #ifdef COMPILE_PCRE8
353 sljit_ub asbyte;
354 sljit_ub asuchars[4];
355 #else
356 #ifdef COMPILE_PCRE16
357 sljit_uh asuchars[2];
358 #endif
359 #endif
360 } c;
361 union {
362 sljit_i asint;
363 sljit_uh asushort;
364 #ifdef COMPILE_PCRE8
365 sljit_ub asbyte;
366 sljit_ub asuchars[4];
367 #else
368 #ifdef COMPILE_PCRE16
369 sljit_uh asuchars[2];
370 #endif
371 #endif
372 } oc;
373 #endif
374 } compare_context;
375
376 enum {
377 frame_end = 0,
378 frame_setstrbegin = -1,
379 frame_setmark = -2
380 };
381
382 /* Undefine sljit macros. */
383 #undef CMP
384
385 /* Used for accessing the elements of the stack. */
386 #define STACK(i) ((-(i) - 1) * (int)sizeof(sljit_w))
387
388 #define TMP1 SLJIT_TEMPORARY_REG1
389 #define TMP2 SLJIT_TEMPORARY_REG3
390 #define TMP3 SLJIT_TEMPORARY_EREG2
391 #define STR_PTR SLJIT_SAVED_REG1
392 #define STR_END SLJIT_SAVED_REG2
393 #define STACK_TOP SLJIT_TEMPORARY_REG2
394 #define STACK_LIMIT SLJIT_SAVED_REG3
395 #define ARGUMENTS SLJIT_SAVED_EREG1
396 #define CALL_COUNT SLJIT_SAVED_EREG2
397 #define RETURN_ADDR SLJIT_TEMPORARY_EREG1
398
399 /* Locals layout. */
400 /* These two locals can be used by the current opcode. */
401 #define LOCALS0 (0 * sizeof(sljit_w))
402 #define LOCALS1 (1 * sizeof(sljit_w))
403 /* Two local variables for possessive quantifiers (char1 cannot use them). */
404 #define POSSESSIVE0 (2 * sizeof(sljit_w))
405 #define POSSESSIVE1 (3 * sizeof(sljit_w))
406 /* Max limit of recursions. */
407 #define CALL_LIMIT (4 * sizeof(sljit_w))
408 /* The output vector is stored on the stack, and contains pointers
409 to characters. The vector data is divided into two groups: the first
410 group contains the start / end character pointers, and the second is
411 the start pointers when the end of the capturing group has not yet reached. */
412 #define OVECTOR_START (common->ovector_start)
413 #define OVECTOR(i) (OVECTOR_START + (i) * sizeof(sljit_w))
414 #define OVECTOR_PRIV(i) (common->cbraptr + (i) * sizeof(sljit_w))
415 #define PRIV_DATA(cc) (common->localptrs[(cc) - common->start])
416
417 #ifdef COMPILE_PCRE8
418 #define MOV_UCHAR SLJIT_MOV_UB
419 #define MOVU_UCHAR SLJIT_MOVU_UB
420 #else
421 #ifdef COMPILE_PCRE16
422 #define MOV_UCHAR SLJIT_MOV_UH
423 #define MOVU_UCHAR SLJIT_MOVU_UH
424 #else
425 #error Unsupported compiling mode
426 #endif
427 #endif
428
429 /* Shortcuts. */
430 #define DEFINE_COMPILER \
431 struct sljit_compiler *compiler = common->compiler
432 #define OP1(op, dst, dstw, src, srcw) \
433 sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw))
434 #define OP2(op, dst, dstw, src1, src1w, src2, src2w) \
435 sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w))
436 #define LABEL() \
437 sljit_emit_label(compiler)
438 #define JUMP(type) \
439 sljit_emit_jump(compiler, (type))
440 #define JUMPTO(type, label) \
441 sljit_set_label(sljit_emit_jump(compiler, (type)), (label))
442 #define JUMPHERE(jump) \
443 sljit_set_label((jump), sljit_emit_label(compiler))
444 #define CMP(type, src1, src1w, src2, src2w) \
445 sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))
446 #define CMPTO(type, src1, src1w, src2, src2w, label) \
447 sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))
448 #define COND_VALUE(op, dst, dstw, type) \
449 sljit_emit_cond_value(compiler, (op), (dst), (dstw), (type))
450 #define GET_LOCAL_BASE(dst, dstw, offset) \
451 sljit_get_local_base(compiler, (dst), (dstw), (offset))
452
453 static pcre_uchar* bracketend(pcre_uchar* cc)
454 {
455 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
456 do cc += GET(cc, 1); while (*cc == OP_ALT);
457 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
458 cc += 1 + LINK_SIZE;
459 return cc;
460 }
461
462 /* Functions whose might need modification for all new supported opcodes:
463 next_opcode
464 get_localspace
465 set_localptrs
466 get_framesize
467 init_frame
468 get_localsize
469 copy_locals
470 compile_trypath
471 compile_backtrackpath
472 */
473
474 static pcre_uchar *next_opcode(compiler_common *common, pcre_uchar *cc)
475 {
476 SLJIT_UNUSED_ARG(common);
477 switch(*cc)
478 {
479 case OP_SOD:
480 case OP_SOM:
481 case OP_SET_SOM:
482 case OP_NOT_WORD_BOUNDARY:
483 case OP_WORD_BOUNDARY:
484 case OP_NOT_DIGIT:
485 case OP_DIGIT:
486 case OP_NOT_WHITESPACE:
487 case OP_WHITESPACE:
488 case OP_NOT_WORDCHAR:
489 case OP_WORDCHAR:
490 case OP_ANY:
491 case OP_ALLANY:
492 case OP_ANYNL:
493 case OP_NOT_HSPACE:
494 case OP_HSPACE:
495 case OP_NOT_VSPACE:
496 case OP_VSPACE:
497 case OP_EXTUNI:
498 case OP_EODN:
499 case OP_EOD:
500 case OP_CIRC:
501 case OP_CIRCM:
502 case OP_DOLL:
503 case OP_DOLLM:
504 case OP_TYPESTAR:
505 case OP_TYPEMINSTAR:
506 case OP_TYPEPLUS:
507 case OP_TYPEMINPLUS:
508 case OP_TYPEQUERY:
509 case OP_TYPEMINQUERY:
510 case OP_TYPEPOSSTAR:
511 case OP_TYPEPOSPLUS:
512 case OP_TYPEPOSQUERY:
513 case OP_CRSTAR:
514 case OP_CRMINSTAR:
515 case OP_CRPLUS:
516 case OP_CRMINPLUS:
517 case OP_CRQUERY:
518 case OP_CRMINQUERY:
519 case OP_DEF:
520 case OP_BRAZERO:
521 case OP_BRAMINZERO:
522 case OP_BRAPOSZERO:
523 case OP_COMMIT:
524 case OP_FAIL:
525 case OP_ACCEPT:
526 case OP_ASSERT_ACCEPT:
527 case OP_SKIPZERO:
528 return cc + 1;
529
530 case OP_ANYBYTE:
531 #ifdef SUPPORT_UTF
532 if (common->utf) return NULL;
533 #endif
534 return cc + 1;
535
536 case OP_CHAR:
537 case OP_CHARI:
538 case OP_NOT:
539 case OP_NOTI:
540 case OP_STAR:
541 case OP_MINSTAR:
542 case OP_PLUS:
543 case OP_MINPLUS:
544 case OP_QUERY:
545 case OP_MINQUERY:
546 case OP_POSSTAR:
547 case OP_POSPLUS:
548 case OP_POSQUERY:
549 case OP_STARI:
550 case OP_MINSTARI:
551 case OP_PLUSI:
552 case OP_MINPLUSI:
553 case OP_QUERYI:
554 case OP_MINQUERYI:
555 case OP_POSSTARI:
556 case OP_POSPLUSI:
557 case OP_POSQUERYI:
558 case OP_NOTSTAR:
559 case OP_NOTMINSTAR:
560 case OP_NOTPLUS:
561 case OP_NOTMINPLUS:
562 case OP_NOTQUERY:
563 case OP_NOTMINQUERY:
564 case OP_NOTPOSSTAR:
565 case OP_NOTPOSPLUS:
566 case OP_NOTPOSQUERY:
567 case OP_NOTSTARI:
568 case OP_NOTMINSTARI:
569 case OP_NOTPLUSI:
570 case OP_NOTMINPLUSI:
571 case OP_NOTQUERYI:
572 case OP_NOTMINQUERYI:
573 case OP_NOTPOSSTARI:
574 case OP_NOTPOSPLUSI:
575 case OP_NOTPOSQUERYI:
576 cc += 2;
577 #ifdef SUPPORT_UTF
578 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
579 #endif
580 return cc;
581
582 case OP_UPTO:
583 case OP_MINUPTO:
584 case OP_EXACT:
585 case OP_POSUPTO:
586 case OP_UPTOI:
587 case OP_MINUPTOI:
588 case OP_EXACTI:
589 case OP_POSUPTOI:
590 case OP_NOTUPTO:
591 case OP_NOTMINUPTO:
592 case OP_NOTEXACT:
593 case OP_NOTPOSUPTO:
594 case OP_NOTUPTOI:
595 case OP_NOTMINUPTOI:
596 case OP_NOTEXACTI:
597 case OP_NOTPOSUPTOI:
598 cc += 2 + IMM2_SIZE;
599 #ifdef SUPPORT_UTF
600 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
601 #endif
602 return cc;
603
604 case OP_NOTPROP:
605 case OP_PROP:
606 return cc + 1 + 2;
607
608 case OP_TYPEUPTO:
609 case OP_TYPEMINUPTO:
610 case OP_TYPEEXACT:
611 case OP_TYPEPOSUPTO:
612 case OP_REF:
613 case OP_REFI:
614 case OP_CREF:
615 case OP_NCREF:
616 case OP_RREF:
617 case OP_NRREF:
618 case OP_CLOSE:
619 cc += 1 + IMM2_SIZE;
620 return cc;
621
622 case OP_CRRANGE:
623 case OP_CRMINRANGE:
624 return cc + 1 + 2 * IMM2_SIZE;
625
626 case OP_CLASS:
627 case OP_NCLASS:
628 return cc + 1 + 32 / sizeof(pcre_uchar);
629
630 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
631 case OP_XCLASS:
632 return cc + GET(cc, 1);
633 #endif
634
635 case OP_RECURSE:
636 case OP_ASSERT:
637 case OP_ASSERT_NOT:
638 case OP_ASSERTBACK:
639 case OP_ASSERTBACK_NOT:
640 case OP_REVERSE:
641 case OP_ONCE:
642 case OP_ONCE_NC:
643 case OP_BRA:
644 case OP_BRAPOS:
645 case OP_COND:
646 case OP_SBRA:
647 case OP_SBRAPOS:
648 case OP_SCOND:
649 case OP_ALT:
650 case OP_KET:
651 case OP_KETRMAX:
652 case OP_KETRMIN:
653 case OP_KETRPOS:
654 return cc + 1 + LINK_SIZE;
655
656 case OP_CBRA:
657 case OP_CBRAPOS:
658 case OP_SCBRA:
659 case OP_SCBRAPOS:
660 return cc + 1 + LINK_SIZE + IMM2_SIZE;
661
662 case OP_MARK:
663 return cc + 1 + 2 + cc[1];
664
665 default:
666 return NULL;
667 }
668 }
669
670 #define CASE_ITERATOR_LOCAL1 \
671 case OP_MINSTAR: \
672 case OP_MINPLUS: \
673 case OP_QUERY: \
674 case OP_MINQUERY: \
675 case OP_MINSTARI: \
676 case OP_MINPLUSI: \
677 case OP_QUERYI: \
678 case OP_MINQUERYI: \
679 case OP_NOTMINSTAR: \
680 case OP_NOTMINPLUS: \
681 case OP_NOTQUERY: \
682 case OP_NOTMINQUERY: \
683 case OP_NOTMINSTARI: \
684 case OP_NOTMINPLUSI: \
685 case OP_NOTQUERYI: \
686 case OP_NOTMINQUERYI:
687
688 #define CASE_ITERATOR_LOCAL2A \
689 case OP_STAR: \
690 case OP_PLUS: \
691 case OP_STARI: \
692 case OP_PLUSI: \
693 case OP_NOTSTAR: \
694 case OP_NOTPLUS: \
695 case OP_NOTSTARI: \
696 case OP_NOTPLUSI:
697
698 #define CASE_ITERATOR_LOCAL2B \
699 case OP_UPTO: \
700 case OP_MINUPTO: \
701 case OP_UPTOI: \
702 case OP_MINUPTOI: \
703 case OP_NOTUPTO: \
704 case OP_NOTMINUPTO: \
705 case OP_NOTUPTOI: \
706 case OP_NOTMINUPTOI:
707
708 #define CASE_ITERATOR_TYPE_LOCAL1 \
709 case OP_TYPEMINSTAR: \
710 case OP_TYPEMINPLUS: \
711 case OP_TYPEQUERY: \
712 case OP_TYPEMINQUERY:
713
714 #define CASE_ITERATOR_TYPE_LOCAL2A \
715 case OP_TYPESTAR: \
716 case OP_TYPEPLUS:
717
718 #define CASE_ITERATOR_TYPE_LOCAL2B \
719 case OP_TYPEUPTO: \
720 case OP_TYPEMINUPTO:
721
722 static int get_class_iterator_size(pcre_uchar *cc)
723 {
724 switch(*cc)
725 {
726 case OP_CRSTAR:
727 case OP_CRPLUS:
728 return 2;
729
730 case OP_CRMINSTAR:
731 case OP_CRMINPLUS:
732 case OP_CRQUERY:
733 case OP_CRMINQUERY:
734 return 1;
735
736 case OP_CRRANGE:
737 case OP_CRMINRANGE:
738 if (GET2(cc, 1) == GET2(cc, 1 + IMM2_SIZE))
739 return 0;
740 return 2;
741
742 default:
743 return 0;
744 }
745 }
746
747 static int get_localspace(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
748 {
749 int localspace = 0;
750 pcre_uchar *alternative;
751 pcre_uchar *end = NULL;
752 int space, size, bracketlen;
753
754 /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
755 while (cc < ccend)
756 {
757 space = 0;
758 size = 0;
759 bracketlen = 0;
760 switch(*cc)
761 {
762 case OP_SET_SOM:
763 common->has_set_som = TRUE;
764 cc += 1;
765 break;
766
767 case OP_ASSERT:
768 case OP_ASSERT_NOT:
769 case OP_ASSERTBACK:
770 case OP_ASSERTBACK_NOT:
771 case OP_ONCE:
772 case OP_ONCE_NC:
773 case OP_BRAPOS:
774 case OP_SBRA:
775 case OP_SBRAPOS:
776 case OP_SCOND:
777 localspace += sizeof(sljit_w);
778 bracketlen = 1 + LINK_SIZE;
779 break;
780
781 case OP_CBRAPOS:
782 case OP_SCBRAPOS:
783 localspace += sizeof(sljit_w);
784 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
785 break;
786
787 case OP_COND:
788 /* Might be a hidden SCOND. */
789 alternative = cc + GET(cc, 1);
790 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
791 localspace += sizeof(sljit_w);
792 bracketlen = 1 + LINK_SIZE;
793 break;
794
795 case OP_BRA:
796 bracketlen = 1 + LINK_SIZE;
797 break;
798
799 case OP_CBRA:
800 case OP_SCBRA:
801 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
802 break;
803
804 CASE_ITERATOR_LOCAL1
805 space = 1;
806 size = -2;
807 break;
808
809 CASE_ITERATOR_LOCAL2A
810 space = 2;
811 size = -2;
812 break;
813
814 CASE_ITERATOR_LOCAL2B
815 space = 2;
816 size = -(2 + IMM2_SIZE);
817 break;
818
819 CASE_ITERATOR_TYPE_LOCAL1
820 space = 1;
821 size = 1;
822 break;
823
824 CASE_ITERATOR_TYPE_LOCAL2A
825 if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
826 space = 2;
827 size = 1;
828 break;
829
830 CASE_ITERATOR_TYPE_LOCAL2B
831 if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
832 space = 2;
833 size = 1 + IMM2_SIZE;
834 break;
835
836 case OP_CLASS:
837 case OP_NCLASS:
838 size += 1 + 32 / sizeof(pcre_uchar);
839 space = get_class_iterator_size(cc + size);
840 break;
841
842 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
843 case OP_XCLASS:
844 size = GET(cc, 1);
845 space = get_class_iterator_size(cc + size);
846 break;
847 #endif
848
849 case OP_RECURSE:
850 /* Set its value only once. */
851 if (common->recursive_head == 0)
852 {
853 common->recursive_head = common->ovector_start;
854 common->ovector_start += sizeof(sljit_w);
855 }
856 cc += 1 + LINK_SIZE;
857 break;
858
859 case OP_MARK:
860 if (common->mark_ptr == 0)
861 {
862 common->mark_ptr = common->ovector_start;
863 common->ovector_start += sizeof(sljit_w);
864 }
865 cc += 1 + 2 + cc[1];
866 break;
867
868 default:
869 cc = next_opcode(common, cc);
870 if (cc == NULL)
871 return -1;
872 break;
873 }
874
875 if (space > 0 && cc >= end)
876 localspace += sizeof(sljit_w) * space;
877
878 if (size != 0)
879 {
880 if (size < 0)
881 {
882 cc += -size;
883 #ifdef SUPPORT_UTF
884 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
885 #endif
886 }
887 else
888 cc += size;
889 }
890
891 if (bracketlen > 0)
892 {
893 if (cc >= end)
894 {
895 end = bracketend(cc);
896 if (end[-1 - LINK_SIZE] == OP_KET)
897 end = NULL;
898 }
899 cc += bracketlen;
900 }
901 }
902 return localspace;
903 }
904
905 static void set_localptrs(compiler_common *common, int localptr, pcre_uchar *ccend)
906 {
907 pcre_uchar *cc = common->start;
908 pcre_uchar *alternative;
909 pcre_uchar *end = NULL;
910 int space, size, bracketlen;
911
912 while (cc < ccend)
913 {
914 space = 0;
915 size = 0;
916 bracketlen = 0;
917 switch(*cc)
918 {
919 case OP_ASSERT:
920 case OP_ASSERT_NOT:
921 case OP_ASSERTBACK:
922 case OP_ASSERTBACK_NOT:
923 case OP_ONCE:
924 case OP_ONCE_NC:
925 case OP_BRAPOS:
926 case OP_SBRA:
927 case OP_SBRAPOS:
928 case OP_SCOND:
929 common->localptrs[cc - common->start] = localptr;
930 localptr += sizeof(sljit_w);
931 bracketlen = 1 + LINK_SIZE;
932 break;
933
934 case OP_CBRAPOS:
935 case OP_SCBRAPOS:
936 common->localptrs[cc - common->start] = localptr;
937 localptr += sizeof(sljit_w);
938 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
939 break;
940
941 case OP_COND:
942 /* Might be a hidden SCOND. */
943 alternative = cc + GET(cc, 1);
944 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
945 {
946 common->localptrs[cc - common->start] = localptr;
947 localptr += sizeof(sljit_w);
948 }
949 bracketlen = 1 + LINK_SIZE;
950 break;
951
952 case OP_BRA:
953 bracketlen = 1 + LINK_SIZE;
954 break;
955
956 case OP_CBRA:
957 case OP_SCBRA:
958 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
959 break;
960
961 CASE_ITERATOR_LOCAL1
962 space = 1;
963 size = -2;
964 break;
965
966 CASE_ITERATOR_LOCAL2A
967 space = 2;
968 size = -2;
969 break;
970
971 CASE_ITERATOR_LOCAL2B
972 space = 2;
973 size = -(2 + IMM2_SIZE);
974 break;
975
976 CASE_ITERATOR_TYPE_LOCAL1
977 space = 1;
978 size = 1;
979 break;
980
981 CASE_ITERATOR_TYPE_LOCAL2A
982 if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
983 space = 2;
984 size = 1;
985 break;
986
987 CASE_ITERATOR_TYPE_LOCAL2B
988 if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
989 space = 2;
990 size = 1 + IMM2_SIZE;
991 break;
992
993 case OP_CLASS:
994 case OP_NCLASS:
995 size += 1 + 32 / sizeof(pcre_uchar);
996 space = get_class_iterator_size(cc + size);
997 break;
998
999 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1000 case OP_XCLASS:
1001 size = GET(cc, 1);
1002 space = get_class_iterator_size(cc + size);
1003 break;
1004 #endif
1005
1006 default:
1007 cc = next_opcode(common, cc);
1008 SLJIT_ASSERT(cc != NULL);
1009 break;
1010 }
1011
1012 if (space > 0 && cc >= end)
1013 {
1014 common->localptrs[cc - common->start] = localptr;
1015 localptr += sizeof(sljit_w) * space;
1016 }
1017
1018 if (size != 0)
1019 {
1020 if (size < 0)
1021 {
1022 cc += -size;
1023 #ifdef SUPPORT_UTF
1024 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1025 #endif
1026 }
1027 else
1028 cc += size;
1029 }
1030
1031 if (bracketlen > 0)
1032 {
1033 if (cc >= end)
1034 {
1035 end = bracketend(cc);
1036 if (end[-1 - LINK_SIZE] == OP_KET)
1037 end = NULL;
1038 }
1039 cc += bracketlen;
1040 }
1041 }
1042 }
1043
1044 /* Returns with -1 if no need for frame. */
1045 static int get_framesize(compiler_common *common, pcre_uchar *cc, BOOL recursive)
1046 {
1047 pcre_uchar *ccend = bracketend(cc);
1048 int length = 0;
1049 BOOL possessive = FALSE;
1050 BOOL setsom_found = recursive;
1051 BOOL setmark_found = recursive;
1052
1053 if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))
1054 {
1055 length = 3;
1056 possessive = TRUE;
1057 }
1058
1059 cc = next_opcode(common, cc);
1060 SLJIT_ASSERT(cc != NULL);
1061 while (cc < ccend)
1062 switch(*cc)
1063 {
1064 case OP_SET_SOM:
1065 SLJIT_ASSERT(common->has_set_som);
1066 if (!setsom_found)
1067 {
1068 length += 2;
1069 setsom_found = TRUE;
1070 }
1071 cc += 1;
1072 break;
1073
1074 case OP_MARK:
1075 SLJIT_ASSERT(common->mark_ptr != 0);
1076 if (!setmark_found)
1077 {
1078 length += 2;
1079 setmark_found = TRUE;
1080 }
1081 cc += 1 + 2 + cc[1];
1082 break;
1083
1084 case OP_RECURSE:
1085 if (common->has_set_som && !setsom_found)
1086 {
1087 length += 2;
1088 setsom_found = TRUE;
1089 }
1090 if (common->mark_ptr != 0 && !setmark_found)
1091 {
1092 length += 2;
1093 setmark_found = TRUE;
1094 }
1095 cc += 1 + LINK_SIZE;
1096 break;
1097
1098 case OP_CBRA:
1099 case OP_CBRAPOS:
1100 case OP_SCBRA:
1101 case OP_SCBRAPOS:
1102 length += 3;
1103 cc += 1 + LINK_SIZE + IMM2_SIZE;
1104 break;
1105
1106 default:
1107 cc = next_opcode(common, cc);
1108 SLJIT_ASSERT(cc != NULL);
1109 break;
1110 }
1111
1112 /* Possessive quantifiers can use a special case. */
1113 if (SLJIT_UNLIKELY(possessive) && length == 3)
1114 return -1;
1115
1116 if (length > 0)
1117 return length + 1;
1118 return -1;
1119 }
1120
1121 static void init_frame(compiler_common *common, pcre_uchar *cc, int stackpos, int stacktop, BOOL recursive)
1122 {
1123 DEFINE_COMPILER;
1124 pcre_uchar *ccend = bracketend(cc);
1125 BOOL setsom_found = recursive;
1126 BOOL setmark_found = recursive;
1127 int offset;
1128
1129 /* >= 1 + shortest item size (2) */
1130 SLJIT_UNUSED_ARG(stacktop);
1131 SLJIT_ASSERT(stackpos >= stacktop + 2);
1132
1133 stackpos = STACK(stackpos);
1134 if (recursive || (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS))
1135 cc = next_opcode(common, cc);
1136 SLJIT_ASSERT(cc != NULL);
1137 while (cc < ccend)
1138 switch(*cc)
1139 {
1140 case OP_SET_SOM:
1141 SLJIT_ASSERT(common->has_set_som);
1142 if (!setsom_found)
1143 {
1144 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
1145 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_setstrbegin);
1146 stackpos += (int)sizeof(sljit_w);
1147 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1148 stackpos += (int)sizeof(sljit_w);
1149 setsom_found = TRUE;
1150 }
1151 cc += 1;
1152 break;
1153
1154 case OP_MARK:
1155 SLJIT_ASSERT(common->mark_ptr != 0);
1156 if (!setmark_found)
1157 {
1158 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
1159 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_setmark);
1160 stackpos += (int)sizeof(sljit_w);
1161 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1162 stackpos += (int)sizeof(sljit_w);
1163 setmark_found = TRUE;
1164 }
1165 cc += 1 + 2 + cc[1];
1166 break;
1167
1168 case OP_RECURSE:
1169 if (common->has_set_som && !setsom_found)
1170 {
1171 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
1172 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_setstrbegin);
1173 stackpos += (int)sizeof(sljit_w);
1174 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1175 stackpos += (int)sizeof(sljit_w);
1176 setsom_found = TRUE;
1177 }
1178 if (common->mark_ptr != 0 && !setmark_found)
1179 {
1180 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
1181 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_setmark);
1182 stackpos += (int)sizeof(sljit_w);
1183 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1184 stackpos += (int)sizeof(sljit_w);
1185 setmark_found = TRUE;
1186 }
1187 cc += 1 + LINK_SIZE;
1188 break;
1189
1190 case OP_CBRA:
1191 case OP_CBRAPOS:
1192 case OP_SCBRA:
1193 case OP_SCBRAPOS:
1194 offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
1195 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));
1196 stackpos += (int)sizeof(sljit_w);
1197 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
1198 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
1199 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1200 stackpos += (int)sizeof(sljit_w);
1201 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
1202 stackpos += (int)sizeof(sljit_w);
1203
1204 cc += 1 + LINK_SIZE + IMM2_SIZE;
1205 break;
1206
1207 default:
1208 cc = next_opcode(common, cc);
1209 SLJIT_ASSERT(cc != NULL);
1210 break;
1211 }
1212
1213 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_end);
1214 SLJIT_ASSERT(stackpos == STACK(stacktop));
1215 }
1216
1217 static SLJIT_INLINE int get_localsize(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
1218 {
1219 int localsize = 2;
1220 int size;
1221 pcre_uchar *alternative;
1222 /* Calculate the sum of the local variables. */
1223 while (cc < ccend)
1224 {
1225 size = 0;
1226 switch(*cc)
1227 {
1228 case OP_ASSERT:
1229 case OP_ASSERT_NOT:
1230 case OP_ASSERTBACK:
1231 case OP_ASSERTBACK_NOT:
1232 case OP_ONCE:
1233 case OP_ONCE_NC:
1234 case OP_BRAPOS:
1235 case OP_SBRA:
1236 case OP_SBRAPOS:
1237 case OP_SCOND:
1238 localsize++;
1239 cc += 1 + LINK_SIZE;
1240 break;
1241
1242 case OP_CBRA:
1243 case OP_SCBRA:
1244 localsize++;
1245 cc += 1 + LINK_SIZE + IMM2_SIZE;
1246 break;
1247
1248 case OP_CBRAPOS:
1249 case OP_SCBRAPOS:
1250 localsize += 2;
1251 cc += 1 + LINK_SIZE + IMM2_SIZE;
1252 break;
1253
1254 case OP_COND:
1255 /* Might be a hidden SCOND. */
1256 alternative = cc + GET(cc, 1);
1257 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1258 localsize++;
1259 cc += 1 + LINK_SIZE;
1260 break;
1261
1262 CASE_ITERATOR_LOCAL1
1263 if (PRIV_DATA(cc))
1264 localsize++;
1265 cc += 2;
1266 #ifdef SUPPORT_UTF
1267 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1268 #endif
1269 break;
1270
1271 CASE_ITERATOR_LOCAL2A
1272 if (PRIV_DATA(cc))
1273 localsize += 2;
1274 cc += 2;
1275 #ifdef SUPPORT_UTF
1276 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1277 #endif
1278 break;
1279
1280 CASE_ITERATOR_LOCAL2B
1281 if (PRIV_DATA(cc))
1282 localsize += 2;
1283 cc += 2 + IMM2_SIZE;
1284 #ifdef SUPPORT_UTF
1285 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1286 #endif
1287 break;
1288
1289 CASE_ITERATOR_TYPE_LOCAL1
1290 if (PRIV_DATA(cc))
1291 localsize++;
1292 cc += 1;
1293 break;
1294
1295 CASE_ITERATOR_TYPE_LOCAL2A
1296 if (PRIV_DATA(cc))
1297 localsize += 2;
1298 cc += 1;
1299 break;
1300
1301 CASE_ITERATOR_TYPE_LOCAL2B
1302 if (PRIV_DATA(cc))
1303 localsize += 2;
1304 cc += 1 + IMM2_SIZE;
1305 break;
1306
1307 case OP_CLASS:
1308 case OP_NCLASS:
1309 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1310 case OP_XCLASS:
1311 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / sizeof(pcre_uchar);
1312 #else
1313 size = 1 + 32 / sizeof(pcre_uchar);
1314 #endif
1315 if (PRIV_DATA(cc))
1316 localsize += get_class_iterator_size(cc + size);
1317 cc += size;
1318 break;
1319
1320 default:
1321 cc = next_opcode(common, cc);
1322 SLJIT_ASSERT(cc != NULL);
1323 break;
1324 }
1325 }
1326 SLJIT_ASSERT(cc == ccend);
1327 return localsize;
1328 }
1329
1330 static void copy_locals(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend,
1331 BOOL save, int stackptr, int stacktop)
1332 {
1333 DEFINE_COMPILER;
1334 int srcw[2];
1335 int count, size;
1336 BOOL tmp1next = TRUE;
1337 BOOL tmp1empty = TRUE;
1338 BOOL tmp2empty = TRUE;
1339 pcre_uchar *alternative;
1340 enum {
1341 start,
1342 loop,
1343 end
1344 } status;
1345
1346 status = save ? start : loop;
1347 stackptr = STACK(stackptr - 2);
1348 stacktop = STACK(stacktop - 1);
1349
1350 if (!save)
1351 {
1352 stackptr += sizeof(sljit_w);
1353 if (stackptr < stacktop)
1354 {
1355 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1356 stackptr += sizeof(sljit_w);
1357 tmp1empty = FALSE;
1358 }
1359 if (stackptr < stacktop)
1360 {
1361 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1362 stackptr += sizeof(sljit_w);
1363 tmp2empty = FALSE;
1364 }
1365 /* The tmp1next must be TRUE in either way. */
1366 }
1367
1368 while (status != end)
1369 {
1370 count = 0;
1371 switch(status)
1372 {
1373 case start:
1374 SLJIT_ASSERT(save && common->recursive_head != 0);
1375 count = 1;
1376 srcw[0] = common->recursive_head;
1377 status = loop;
1378 break;
1379
1380 case loop:
1381 if (cc >= ccend)
1382 {
1383 status = end;
1384 break;
1385 }
1386
1387 switch(*cc)
1388 {
1389 case OP_ASSERT:
1390 case OP_ASSERT_NOT:
1391 case OP_ASSERTBACK:
1392 case OP_ASSERTBACK_NOT:
1393 case OP_ONCE:
1394 case OP_ONCE_NC:
1395 case OP_BRAPOS:
1396 case OP_SBRA:
1397 case OP_SBRAPOS:
1398 case OP_SCOND:
1399 count = 1;
1400 srcw[0] = PRIV_DATA(cc);
1401 SLJIT_ASSERT(srcw[0] != 0);
1402 cc += 1 + LINK_SIZE;
1403 break;
1404
1405 case OP_CBRA:
1406 case OP_SCBRA:
1407 count = 1;
1408 srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1409 cc += 1 + LINK_SIZE + IMM2_SIZE;
1410 break;
1411
1412 case OP_CBRAPOS:
1413 case OP_SCBRAPOS:
1414 count = 2;
1415 srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1416 srcw[1] = PRIV_DATA(cc);
1417 SLJIT_ASSERT(srcw[0] != 0);
1418 cc += 1 + LINK_SIZE + IMM2_SIZE;
1419 break;
1420
1421 case OP_COND:
1422 /* Might be a hidden SCOND. */
1423 alternative = cc + GET(cc, 1);
1424 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1425 {
1426 count = 1;
1427 srcw[0] = PRIV_DATA(cc);
1428 SLJIT_ASSERT(srcw[0] != 0);
1429 }
1430 cc += 1 + LINK_SIZE;
1431 break;
1432
1433 CASE_ITERATOR_LOCAL1
1434 if (PRIV_DATA(cc))
1435 {
1436 count = 1;
1437 srcw[0] = PRIV_DATA(cc);
1438 }
1439 cc += 2;
1440 #ifdef SUPPORT_UTF
1441 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1442 #endif
1443 break;
1444
1445 CASE_ITERATOR_LOCAL2A
1446 if (PRIV_DATA(cc))
1447 {
1448 count = 2;
1449 srcw[0] = PRIV_DATA(cc);
1450 srcw[1] = PRIV_DATA(cc) + sizeof(sljit_w);
1451 }
1452 cc += 2;
1453 #ifdef SUPPORT_UTF
1454 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1455 #endif
1456 break;
1457
1458 CASE_ITERATOR_LOCAL2B
1459 if (PRIV_DATA(cc))
1460 {
1461 count = 2;
1462 srcw[0] = PRIV_DATA(cc);
1463 srcw[1] = PRIV_DATA(cc) + sizeof(sljit_w);
1464 }
1465 cc += 2 + IMM2_SIZE;
1466 #ifdef SUPPORT_UTF
1467 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1468 #endif
1469 break;
1470
1471 CASE_ITERATOR_TYPE_LOCAL1
1472 if (PRIV_DATA(cc))
1473 {
1474 count = 1;
1475 srcw[0] = PRIV_DATA(cc);
1476 }
1477 cc += 1;
1478 break;
1479
1480 CASE_ITERATOR_TYPE_LOCAL2A
1481 if (PRIV_DATA(cc))
1482 {
1483 count = 2;
1484 srcw[0] = PRIV_DATA(cc);
1485 srcw[1] = srcw[0] + sizeof(sljit_w);
1486 }
1487 cc += 1;
1488 break;
1489
1490 CASE_ITERATOR_TYPE_LOCAL2B
1491 if (PRIV_DATA(cc))
1492 {
1493 count = 2;
1494 srcw[0] = PRIV_DATA(cc);
1495 srcw[1] = srcw[0] + sizeof(sljit_w);
1496 }
1497 cc += 1 + IMM2_SIZE;
1498 break;
1499
1500 case OP_CLASS:
1501 case OP_NCLASS:
1502 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1503 case OP_XCLASS:
1504 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / sizeof(pcre_uchar);
1505 #else
1506 size = 1 + 32 / sizeof(pcre_uchar);
1507 #endif
1508 if (PRIV_DATA(cc))
1509 switch(get_class_iterator_size(cc + size))
1510 {
1511 case 1:
1512 count = 1;
1513 srcw[0] = PRIV_DATA(cc);
1514 break;
1515
1516 case 2:
1517 count = 2;
1518 srcw[0] = PRIV_DATA(cc);
1519 srcw[1] = srcw[0] + sizeof(sljit_w);
1520 break;
1521
1522 default:
1523 SLJIT_ASSERT_STOP();
1524 break;
1525 }
1526 cc += size;
1527 break;
1528
1529 default:
1530 cc = next_opcode(common, cc);
1531 SLJIT_ASSERT(cc != NULL);
1532 break;
1533 }
1534 break;
1535
1536 case end:
1537 SLJIT_ASSERT_STOP();
1538 break;
1539 }
1540
1541 while (count > 0)
1542 {
1543 count--;
1544 if (save)
1545 {
1546 if (tmp1next)
1547 {
1548 if (!tmp1empty)
1549 {
1550 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1551 stackptr += sizeof(sljit_w);
1552 }
1553 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);
1554 tmp1empty = FALSE;
1555 tmp1next = FALSE;
1556 }
1557 else
1558 {
1559 if (!tmp2empty)
1560 {
1561 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1562 stackptr += sizeof(sljit_w);
1563 }
1564 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);
1565 tmp2empty = FALSE;
1566 tmp1next = TRUE;
1567 }
1568 }
1569 else
1570 {
1571 if (tmp1next)
1572 {
1573 SLJIT_ASSERT(!tmp1empty);
1574 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP1, 0);
1575 tmp1empty = stackptr >= stacktop;
1576 if (!tmp1empty)
1577 {
1578 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1579 stackptr += sizeof(sljit_w);
1580 }
1581 tmp1next = FALSE;
1582 }
1583 else
1584 {
1585 SLJIT_ASSERT(!tmp2empty);
1586 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP2, 0);
1587 tmp2empty = stackptr >= stacktop;
1588 if (!tmp2empty)
1589 {
1590 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1591 stackptr += sizeof(sljit_w);
1592 }
1593 tmp1next = TRUE;
1594 }
1595 }
1596 }
1597 }
1598
1599 if (save)
1600 {
1601 if (tmp1next)
1602 {
1603 if (!tmp1empty)
1604 {
1605 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1606 stackptr += sizeof(sljit_w);
1607 }
1608 if (!tmp2empty)
1609 {
1610 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1611 stackptr += sizeof(sljit_w);
1612 }
1613 }
1614 else
1615 {
1616 if (!tmp2empty)
1617 {
1618 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1619 stackptr += sizeof(sljit_w);
1620 }
1621 if (!tmp1empty)
1622 {
1623 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1624 stackptr += sizeof(sljit_w);
1625 }
1626 }
1627 }
1628 SLJIT_ASSERT(cc == ccend && stackptr == stacktop && (save || (tmp1empty && tmp2empty)));
1629 }
1630
1631 #undef CASE_ITERATOR_LOCAL1
1632 #undef CASE_ITERATOR_LOCAL2A
1633 #undef CASE_ITERATOR_LOCAL2B
1634 #undef CASE_ITERATOR_TYPE_LOCAL1
1635 #undef CASE_ITERATOR_TYPE_LOCAL2A
1636 #undef CASE_ITERATOR_TYPE_LOCAL2B
1637
1638 static SLJIT_INLINE BOOL ispowerof2(unsigned int value)
1639 {
1640 return (value & (value - 1)) == 0;
1641 }
1642
1643 static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label)
1644 {
1645 while (list)
1646 {
1647 /* sljit_set_label is clever enough to do nothing
1648 if either the jump or the label is NULL */
1649 sljit_set_label(list->jump, label);
1650 list = list->next;
1651 }
1652 }
1653
1654 static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump* jump)
1655 {
1656 jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list));
1657 if (list_item)
1658 {
1659 list_item->next = *list;
1660 list_item->jump = jump;
1661 *list = list_item;
1662 }
1663 }
1664
1665 static void add_stub(compiler_common *common, enum stub_types type, int data, struct sljit_jump *start)
1666 {
1667 DEFINE_COMPILER;
1668 stub_list* list_item = sljit_alloc_memory(compiler, sizeof(stub_list));
1669
1670 if (list_item)
1671 {
1672 list_item->type = type;
1673 list_item->data = data;
1674 list_item->start = start;
1675 list_item->leave = LABEL();
1676 list_item->next = common->stubs;
1677 common->stubs = list_item;
1678 }
1679 }
1680
1681 static void flush_stubs(compiler_common *common)
1682 {
1683 DEFINE_COMPILER;
1684 stub_list* list_item = common->stubs;
1685
1686 while (list_item)
1687 {
1688 JUMPHERE(list_item->start);
1689 switch(list_item->type)
1690 {
1691 case stack_alloc:
1692 add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));
1693 break;
1694 }
1695 JUMPTO(SLJIT_JUMP, list_item->leave);
1696 list_item = list_item->next;
1697 }
1698 common->stubs = NULL;
1699 }
1700
1701 static SLJIT_INLINE void decrease_call_count(compiler_common *common)
1702 {
1703 DEFINE_COMPILER;
1704
1705 OP2(SLJIT_SUB | SLJIT_SET_E, CALL_COUNT, 0, CALL_COUNT, 0, SLJIT_IMM, 1);
1706 add_jump(compiler, &common->calllimit, JUMP(SLJIT_C_ZERO));
1707 }
1708
1709 static SLJIT_INLINE void allocate_stack(compiler_common *common, int size)
1710 {
1711 /* May destroy all locals and registers except TMP2. */
1712 DEFINE_COMPILER;
1713
1714 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_w));
1715 #ifdef DESTROY_REGISTERS
1716 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);
1717 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
1718 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
1719 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, TMP1, 0);
1720 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);
1721 #endif
1722 add_stub(common, stack_alloc, 0, CMP(SLJIT_C_GREATER, STACK_TOP, 0, STACK_LIMIT, 0));
1723 }
1724
1725 static SLJIT_INLINE void free_stack(compiler_common *common, int size)
1726 {
1727 DEFINE_COMPILER;
1728 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_w));
1729 }
1730
1731 static SLJIT_INLINE void reset_ovector(compiler_common *common, int length)
1732 {
1733 DEFINE_COMPILER;
1734 struct sljit_label *loop;
1735 int i;
1736 /* At this point we can freely use all temporary registers. */
1737 /* TMP1 returns with begin - 1. */
1738 OP2(SLJIT_SUB, SLJIT_TEMPORARY_REG1, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));
1739 if (length < 8)
1740 {
1741 for (i = 0; i < length; i++)
1742 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(i), SLJIT_TEMPORARY_REG1, 0);
1743 }
1744 else
1745 {
1746 GET_LOCAL_BASE(SLJIT_TEMPORARY_REG2, 0, OVECTOR_START - sizeof(sljit_w));
1747 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, length);
1748 loop = LABEL();
1749 OP1(SLJIT_MOVU, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), sizeof(sljit_w), SLJIT_TEMPORARY_REG1, 0);
1750 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_TEMPORARY_REG3, 0, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, 1);
1751 JUMPTO(SLJIT_C_NOT_ZERO, loop);
1752 }
1753 }
1754
1755 static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)
1756 {
1757 DEFINE_COMPILER;
1758 struct sljit_label *loop;
1759 struct sljit_jump *earlyexit;
1760
1761 /* At this point we can freely use all registers. */
1762 OP1(SLJIT_MOV, SLJIT_SAVED_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
1763 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1), STR_PTR, 0);
1764
1765 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG1, 0, ARGUMENTS, 0);
1766 if (common->mark_ptr != 0)
1767 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
1768 OP1(SLJIT_MOV_SI, SLJIT_TEMPORARY_REG2, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), SLJIT_OFFSETOF(jit_arguments, offsetcount));
1769 if (common->mark_ptr != 0)
1770 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_TEMPORARY_REG3, 0);
1771 OP2(SLJIT_SUB, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), SLJIT_OFFSETOF(jit_arguments, offsets), SLJIT_IMM, sizeof(int));
1772 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG1, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), SLJIT_OFFSETOF(jit_arguments, begin));
1773 GET_LOCAL_BASE(SLJIT_SAVED_REG1, 0, OVECTOR_START);
1774 /* Unlikely, but possible */
1775 earlyexit = CMP(SLJIT_C_EQUAL, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, 0);
1776 loop = LABEL();
1777 OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), 0, SLJIT_TEMPORARY_REG1, 0);
1778 OP2(SLJIT_ADD, SLJIT_SAVED_REG1, 0, SLJIT_SAVED_REG1, 0, SLJIT_IMM, sizeof(sljit_w));
1779 /* Copy the integer value to the output buffer */
1780 #ifdef COMPILE_PCRE16
1781 OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, 1);
1782 #endif
1783 OP1(SLJIT_MOVU_SI, SLJIT_MEM1(SLJIT_TEMPORARY_REG3), sizeof(int), SLJIT_SAVED_REG2, 0);
1784 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_TEMPORARY_REG2, 0, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, 1);
1785 JUMPTO(SLJIT_C_NOT_ZERO, loop);
1786 JUMPHERE(earlyexit);
1787
1788 /* Calculate the return value, which is the maximum ovector value. */
1789 if (topbracket > 1)
1790 {
1791 GET_LOCAL_BASE(SLJIT_TEMPORARY_REG1, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_w));
1792 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, topbracket + 1);
1793
1794 /* OVECTOR(0) is never equal to SLJIT_SAVED_REG3. */
1795 loop = LABEL();
1796 OP1(SLJIT_MOVU, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), -(2 * (sljit_w)sizeof(sljit_w)));
1797 OP2(SLJIT_SUB, SLJIT_TEMPORARY_REG2, 0, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, 1);
1798 CMPTO(SLJIT_C_EQUAL, SLJIT_TEMPORARY_REG3, 0, SLJIT_SAVED_REG3, 0, loop);
1799 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_TEMPORARY_REG2, 0);
1800 }
1801 else
1802 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
1803 }
1804
1805 static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *leave)
1806 {
1807 DEFINE_COMPILER;
1808
1809 SLJIT_COMPILE_ASSERT(STR_END == SLJIT_SAVED_REG2, str_end_must_be_saved_reg2);
1810 SLJIT_ASSERT(common->start_used_ptr != 0 && (common->mode == JIT_PARTIAL_SOFT_COMPILE ? common->hit_start != 0 : common->hit_start == 0));
1811
1812 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG2, 0, ARGUMENTS, 0);
1813 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_PARTIAL);
1814 OP1(SLJIT_MOV_SI, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), SLJIT_OFFSETOF(jit_arguments, offsetcount));
1815 CMPTO(SLJIT_C_LESS, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, 2, leave);
1816
1817 /* Store match begin and end. */
1818 OP1(SLJIT_MOV, SLJIT_SAVED_REG1, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), SLJIT_OFFSETOF(jit_arguments, begin));
1819 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG2, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), SLJIT_OFFSETOF(jit_arguments, offsets));
1820 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_used_ptr : common->hit_start);
1821 OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, STR_END, 0, SLJIT_SAVED_REG1, 0);
1822 #ifdef COMPILE_PCRE16
1823 OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, 1);
1824 #endif
1825 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), sizeof(int), SLJIT_SAVED_REG2, 0);
1826
1827 OP2(SLJIT_SUB, SLJIT_TEMPORARY_REG3, 0, SLJIT_TEMPORARY_REG3, 0, SLJIT_SAVED_REG1, 0);
1828 #ifdef COMPILE_PCRE16
1829 OP2(SLJIT_ASHR, SLJIT_TEMPORARY_REG3, 0, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, 1);
1830 #endif
1831 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), 0, SLJIT_TEMPORARY_REG3, 0);
1832
1833 JUMPTO(SLJIT_JUMP, leave);
1834 }
1835
1836 static SLJIT_INLINE void check_start_used_ptr(compiler_common *common)
1837 {
1838 /* May destroy TMP1. */
1839 DEFINE_COMPILER;
1840 struct sljit_jump *jump;
1841
1842 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
1843 {
1844 /* The value of -1 must be kept for start_used_ptr! */
1845 OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, 1);
1846 /* Jumps if start_used_ptr < STR_PTR, or start_used_ptr == -1. Although overwriting
1847 is not necessary if start_used_ptr == STR_PTR, it does not hurt as well. */
1848 jump = CMP(SLJIT_C_LESS_EQUAL, TMP1, 0, STR_PTR, 0);
1849 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
1850 JUMPHERE(jump);
1851 }
1852 else if (common->mode == JIT_PARTIAL_HARD_COMPILE)
1853 {
1854 jump = CMP(SLJIT_C_LESS_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
1855 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
1856 JUMPHERE(jump);
1857 }
1858 }
1859
1860 static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, pcre_uchar* cc)
1861 {
1862 /* Detects if the character has an othercase. */
1863 unsigned int c;
1864
1865 #ifdef SUPPORT_UTF
1866 if (common->utf)
1867 {
1868 GETCHAR(c, cc);
1869 if (c > 127)
1870 {
1871 #ifdef SUPPORT_UCP
1872 return c != UCD_OTHERCASE(c);
1873 #else
1874 return FALSE;
1875 #endif
1876 }
1877 #ifndef COMPILE_PCRE8
1878 return common->fcc[c] != c;
1879 #endif
1880 }
1881 else
1882 #endif
1883 c = *cc;
1884 return MAX_255(c) ? common->fcc[c] != c : FALSE;
1885 }
1886
1887 static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)
1888 {
1889 /* Returns with the othercase. */
1890 #ifdef SUPPORT_UTF
1891 if (common->utf && c > 127)
1892 {
1893 #ifdef SUPPORT_UCP
1894 return UCD_OTHERCASE(c);
1895 #else
1896 return c;
1897 #endif
1898 }
1899 #endif
1900 return TABLE_GET(c, common->fcc, c);
1901 }
1902
1903 static unsigned int char_get_othercase_bit(compiler_common *common, pcre_uchar* cc)
1904 {
1905 /* Detects if the character and its othercase has only 1 bit difference. */
1906 unsigned int c, oc, bit;
1907 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
1908 int n;
1909 #endif
1910
1911 #ifdef SUPPORT_UTF
1912 if (common->utf)
1913 {
1914 GETCHAR(c, cc);
1915 if (c <= 127)
1916 oc = common->fcc[c];
1917 else
1918 {
1919 #ifdef SUPPORT_UCP
1920 oc = UCD_OTHERCASE(c);
1921 #else
1922 oc = c;
1923 #endif
1924 }
1925 }
1926 else
1927 {
1928 c = *cc;
1929 oc = TABLE_GET(c, common->fcc, c);
1930 }
1931 #else
1932 c = *cc;
1933 oc = TABLE_GET(c, common->fcc, c);
1934 #endif
1935
1936 SLJIT_ASSERT(c != oc);
1937
1938 bit = c ^ oc;
1939 /* Optimized for English alphabet. */
1940 if (c <= 127 && bit == 0x20)
1941 return (0 << 8) | 0x20;
1942
1943 /* Since c != oc, they must have at least 1 bit difference. */
1944 if (!ispowerof2(bit))
1945 return 0;
1946
1947 #ifdef COMPILE_PCRE8
1948
1949 #ifdef SUPPORT_UTF
1950 if (common->utf && c > 127)
1951 {
1952 n = GET_EXTRALEN(*cc);
1953 while ((bit & 0x3f) == 0)
1954 {
1955 n--;
1956 bit >>= 6;
1957 }
1958 return (n << 8) | bit;
1959 }
1960 #endif /* SUPPORT_UTF */
1961 return (0 << 8) | bit;
1962
1963 #else /* COMPILE_PCRE8 */
1964
1965 #ifdef COMPILE_PCRE16
1966 #ifdef SUPPORT_UTF
1967 if (common->utf && c > 65535)
1968 {
1969 if (bit >= (1 << 10))
1970 bit >>= 10;
1971 else
1972 return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8));
1973 }
1974 #endif /* SUPPORT_UTF */
1975 return (bit < 256) ? ((0 << 8) | bit) : ((1 << 8) | (bit >> 8));
1976 #endif /* COMPILE_PCRE16 */
1977
1978 #endif /* COMPILE_PCRE8 */
1979 }
1980
1981 static void check_partial(compiler_common *common, BOOL force)
1982 {
1983 /* Checks whether a partial matching is occured. Does not modify registers. */
1984 DEFINE_COMPILER;
1985 struct sljit_jump *jump = NULL;
1986
1987 SLJIT_ASSERT(!force || common->mode != JIT_COMPILE);
1988
1989 if (common->mode == JIT_COMPILE)
1990 return;
1991
1992 if (!force)
1993 jump = CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
1994 else if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
1995 jump = CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, -1);
1996
1997 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
1998 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, -1);
1999 else
2000 {
2001 if (common->partialmatchlabel != NULL)
2002 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2003 else
2004 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2005 }
2006
2007 if (jump != NULL)
2008 JUMPHERE(jump);
2009 }
2010
2011 static struct sljit_jump *check_str_end(compiler_common *common)
2012 {
2013 /* Does not affect registers. Usually used in a tight spot. */
2014 DEFINE_COMPILER;
2015 struct sljit_jump *jump;
2016 struct sljit_jump *nohit;
2017 struct sljit_jump *return_value;
2018
2019 if (common->mode == JIT_COMPILE)
2020 return CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2021
2022 jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
2023 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2024 {
2025 nohit = CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2026 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, -1);
2027 JUMPHERE(nohit);
2028 return_value = JUMP(SLJIT_JUMP);
2029 }
2030 else
2031 {
2032 return_value = CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2033 if (common->partialmatchlabel != NULL)
2034 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2035 else
2036 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2037 }
2038 JUMPHERE(jump);
2039 return return_value;
2040 }
2041
2042 static void detect_partial_match(compiler_common *common, jump_list **backtracks)
2043 {
2044 DEFINE_COMPILER;
2045 struct sljit_jump *jump;
2046
2047 if (common->mode == JIT_COMPILE)
2048 {
2049 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2050 return;
2051 }
2052
2053 /* Partial matching mode. */
2054 jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
2055 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0));
2056 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2057 {
2058 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, -1);
2059 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
2060 }
2061 else
2062 {
2063 if (common->partialmatchlabel != NULL)
2064 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2065 else
2066 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2067 }
2068 JUMPHERE(jump);
2069 }
2070
2071 static void read_char(compiler_common *common)
2072 {
2073 /* Reads the character into TMP1, updates STR_PTR.
2074 Does not check STR_END. TMP2 Destroyed. */
2075 DEFINE_COMPILER;
2076 #ifdef SUPPORT_UTF
2077 struct sljit_jump *jump;
2078 #endif
2079
2080 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2081 #ifdef SUPPORT_UTF
2082 if (common->utf)
2083 {
2084 #ifdef COMPILE_PCRE8
2085 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2086 #else
2087 #ifdef COMPILE_PCRE16
2088 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
2089 #endif
2090 #endif /* COMPILE_PCRE8 */
2091 add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2092 JUMPHERE(jump);
2093 }
2094 #endif
2095 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2096 }
2097
2098 static void peek_char(compiler_common *common)
2099 {
2100 /* Reads the character into TMP1, keeps STR_PTR.
2101 Does not check STR_END. TMP2 Destroyed. */
2102 DEFINE_COMPILER;
2103 #ifdef SUPPORT_UTF
2104 struct sljit_jump *jump;
2105 #endif
2106
2107 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2108 #ifdef SUPPORT_UTF
2109 if (common->utf)
2110 {
2111 #ifdef COMPILE_PCRE8
2112 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2113 #else
2114 #ifdef COMPILE_PCRE16
2115 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
2116 #endif
2117 #endif /* COMPILE_PCRE8 */
2118 add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2119 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2120 JUMPHERE(jump);
2121 }
2122 #endif
2123 }
2124
2125 static void read_char8_type(compiler_common *common)
2126 {
2127 /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */
2128 DEFINE_COMPILER;
2129 #if defined SUPPORT_UTF || defined COMPILE_PCRE16
2130 struct sljit_jump *jump;
2131 #endif
2132
2133 #ifdef SUPPORT_UTF
2134 if (common->utf)
2135 {
2136 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2137 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2138 #ifdef COMPILE_PCRE8
2139 /* This can be an extra read in some situations, but hopefully
2140 it is needed in most cases. */
2141 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2142 jump = CMP(SLJIT_C_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
2143 add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));
2144 JUMPHERE(jump);
2145 #else
2146 #ifdef COMPILE_PCRE16
2147 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2148 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2149 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2150 JUMPHERE(jump);
2151 /* Skip low surrogate if necessary. */
2152 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xfc00);
2153 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0xd800);
2154 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
2155 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
2156 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2157 #endif
2158 #endif /* COMPILE_PCRE8 */
2159 return;
2160 }
2161 #endif
2162 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2163 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2164 #ifdef COMPILE_PCRE16
2165 /* The ctypes array contains only 256 values. */
2166 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2167 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2168 #endif
2169 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2170 #ifdef COMPILE_PCRE16
2171 JUMPHERE(jump);
2172 #endif
2173 }
2174
2175 static void skip_char_back(compiler_common *common)
2176 {
2177 /* Goes one character back. Affects STR_PTR and TMP1. Does not check begin. */
2178 DEFINE_COMPILER;
2179 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2180 struct sljit_label *label;
2181
2182 if (common->utf)
2183 {
2184 label = LABEL();
2185 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2186 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2187 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
2188 CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);
2189 return;
2190 }
2191 #endif
2192 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2193 if (common->utf)
2194 {
2195 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2196 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2197 /* Skip low surrogate if necessary. */
2198 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2199 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
2200 COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
2201 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2202 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2203 return;
2204 }
2205 #endif
2206 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2207 }
2208
2209 static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpiftrue)
2210 {
2211 /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */
2212 DEFINE_COMPILER;
2213
2214 if (nltype == NLTYPE_ANY)
2215 {
2216 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
2217 add_jump(compiler, backtracks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
2218 }
2219 else if (nltype == NLTYPE_ANYCRLF)
2220 {
2221 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_CR);
2222 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
2223 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
2224 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
2225 add_jump(compiler, backtracks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
2226 }
2227 else
2228 {
2229 SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);
2230 add_jump(compiler, backtracks, CMP(jumpiftrue ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
2231 }
2232 }
2233
2234 #ifdef SUPPORT_UTF
2235
2236 #ifdef COMPILE_PCRE8
2237 static void do_utfreadchar(compiler_common *common)
2238 {
2239 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
2240 of the character (>= 0xc0). Return char value in TMP1, length - 1 in TMP2. */
2241 DEFINE_COMPILER;
2242 struct sljit_jump *jump;
2243
2244 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2245 /* Searching for the first zero. */
2246 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
2247 jump = JUMP(SLJIT_C_NOT_ZERO);
2248 /* Two byte sequence. */
2249 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2250 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2251 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1f);
2252 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2253 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2254 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2255 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
2256 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2257 JUMPHERE(jump);
2258
2259 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10);
2260 jump = JUMP(SLJIT_C_NOT_ZERO);
2261 /* Three byte sequence. */
2262 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2263 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0f);
2264 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 12);
2265 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2266 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2267 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2268 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2269 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
2270 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2271 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2272 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(2));
2273 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2274 JUMPHERE(jump);
2275
2276 /* Four byte sequence. */
2277 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2278 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x07);
2279 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 18);
2280 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2281 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12);
2282 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2283 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2284 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2285 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2286 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2287 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(3));
2288 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
2289 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2290 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2291 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));
2292 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2293 }
2294
2295 static void do_utfreadtype8(compiler_common *common)
2296 {
2297 /* Fast decoding a UTF-8 character type. TMP2 contains the first byte
2298 of the character (>= 0xc0). Return value in TMP1. */
2299 DEFINE_COMPILER;
2300 struct sljit_jump *jump;
2301 struct sljit_jump *compare;
2302
2303 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2304
2305 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20);
2306 jump = JUMP(SLJIT_C_NOT_ZERO);
2307 /* Two byte sequence. */
2308 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2309 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2310 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
2311 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2312 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2313 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
2314 compare = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2315 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2316 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2317
2318 JUMPHERE(compare);
2319 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2320 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2321 JUMPHERE(jump);
2322
2323 /* We only have types for characters less than 256. */
2324 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_w)PRIV(utf8_table4) - 0xc0);
2325 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2326 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2327 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2328 }
2329
2330 #else /* COMPILE_PCRE8 */
2331
2332 #ifdef COMPILE_PCRE16
2333 static void do_utfreadchar(compiler_common *common)
2334 {
2335 /* Fast decoding a UTF-16 character. TMP1 contains the first 16 bit char
2336 of the character (>= 0xd800). Return char value in TMP1, length - 1 in TMP2. */
2337 DEFINE_COMPILER;
2338 struct sljit_jump *jump;
2339
2340 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2341 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xdc00);
2342 /* Do nothing, only return. */
2343 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2344
2345 JUMPHERE(jump);
2346 /* Combine two 16 bit characters. */
2347 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2348 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2349 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
2350 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 10);
2351 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3ff);
2352 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2353 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
2354 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
2355 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2356 }
2357 #endif /* COMPILE_PCRE16 */
2358
2359 #endif /* COMPILE_PCRE8 */
2360
2361 #endif /* SUPPORT_UTF */
2362
2363 #ifdef SUPPORT_UCP
2364
2365 /* UCD_BLOCK_SIZE must be 128 (see the assert below). */
2366 #define UCD_BLOCK_MASK 127
2367 #define UCD_BLOCK_SHIFT 7
2368
2369 static void do_getucd(compiler_common *common)
2370 {
2371 /* Search the UCD record for the character comes in TMP1.
2372 Returns chartype in TMP1 and UCD offset in TMP2. */
2373 DEFINE_COMPILER;
2374
2375 SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 8);
2376
2377 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2378 OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
2379 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_w)PRIV(ucd_stage1));
2380 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
2381 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
2382 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
2383 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_w)PRIV(ucd_stage2));
2384 OP1(SLJIT_MOV_UH, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
2385 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_w)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
2386 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
2387 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2388 }
2389 #endif
2390
2391 static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common, BOOL hascrorlf, BOOL firstline)
2392 {
2393 DEFINE_COMPILER;
2394 struct sljit_label *mainloop;
2395 struct sljit_label *newlinelabel = NULL;
2396 struct sljit_jump *start;
2397 struct sljit_jump *end = NULL;
2398 struct sljit_jump *nl = NULL;
2399 #ifdef SUPPORT_UTF
2400 struct sljit_jump *singlechar;
2401 #endif
2402 jump_list *newline = NULL;
2403 BOOL newlinecheck = FALSE;
2404 BOOL readuchar = FALSE;
2405
2406 if (!(hascrorlf || firstline) && (common->nltype == NLTYPE_ANY ||
2407 common->nltype == NLTYPE_ANYCRLF || common->newline > 255))
2408 newlinecheck = TRUE;
2409
2410 if (firstline)
2411 {
2412 /* Search for the end of the first line. */
2413 SLJIT_ASSERT(common->first_line_end != 0);
2414 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STR_PTR, 0);
2415 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_END, 0);
2416
2417 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
2418 {
2419 mainloop = LABEL();
2420 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2421 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2422 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
2423 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2424 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);
2425 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);
2426 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2427 }
2428 else
2429 {
2430 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2431 mainloop = LABEL();
2432 /* Continual stores does not cause data dependency. */
2433 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0);
2434 read_char(common);
2435 check_newlinechar(common, common->nltype, &newline, TRUE);
2436 CMPTO(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0, mainloop);
2437 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0);
2438 set_jumps(newline, LABEL());
2439 }
2440
2441 JUMPHERE(end);
2442 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
2443 }
2444
2445 start = JUMP(SLJIT_JUMP);
2446
2447 if (newlinecheck)
2448 {
2449 newlinelabel = LABEL();
2450 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2451 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2452 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2453 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
2454 COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
2455 #ifdef COMPILE_PCRE16
2456 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2457 #endif
2458 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2459 nl = JUMP(SLJIT_JUMP);
2460 }
2461
2462 mainloop = LABEL();
2463
2464 /* Increasing the STR_PTR here requires one less jump in the most common case. */
2465 #ifdef SUPPORT_UTF
2466 if (common->utf) readuchar = TRUE;
2467 #endif
2468 if (newlinecheck) readuchar = TRUE;
2469
2470 if (readuchar)
2471 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2472
2473 if (newlinecheck)
2474 CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);
2475
2476 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2477 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2478 if (common->utf)
2479 {
2480 singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2481 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0);
2482 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2483 JUMPHERE(singlechar);
2484 }
2485 #endif
2486 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2487 if (common->utf)
2488 {
2489 singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
2490 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2491 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2492 COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
2493 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2494 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2495 JUMPHERE(singlechar);
2496 }
2497 #endif
2498 JUMPHERE(start);
2499
2500 if (newlinecheck)
2501 {
2502 JUMPHERE(end);
2503 JUMPHERE(nl);
2504 }
2505
2506 return mainloop;
2507 }
2508
2509 static SLJIT_INLINE BOOL fast_forward_first_two_chars(compiler_common *common, BOOL firstline)
2510 {
2511 DEFINE_COMPILER;
2512 struct sljit_label *start;
2513 struct sljit_jump *leave;
2514 struct sljit_jump *found;
2515 pcre_int32 chars[4];
2516 pcre_uchar *cc = common->start + 1 + IMM2_SIZE;
2517 int index = 0;
2518 pcre_int32 len, c, bit;
2519 unsigned int caseless;
2520 BOOL must_end;
2521
2522 #ifdef COMPILE_PCRE8
2523 union {
2524 sljit_uh ascombined;
2525 sljit_ub asuchars[2];
2526 } pair;
2527 #else
2528 union {
2529 sljit_ui ascombined;
2530 sljit_uh asuchars[2];
2531 } pair;
2532 #endif
2533
2534 if (*(common->start + GET(common->start, 1)) == OP_ALT)
2535 return FALSE;
2536
2537 while (TRUE)
2538 {
2539 caseless = 0;
2540 must_end = TRUE;
2541 switch(*cc)
2542 {
2543 case OP_CHAR:
2544 must_end = FALSE;
2545 cc++;
2546 break;
2547
2548 case OP_CHARI:
2549 caseless = 1;
2550 must_end = FALSE;
2551 cc++;
2552 break;
2553
2554 case OP_SOD:
2555 case OP_SOM:
2556 case OP_SET_SOM:
2557 case OP_NOT_WORD_BOUNDARY:
2558 case OP_WORD_BOUNDARY:
2559 case OP_EODN:
2560 case OP_EOD:
2561 case OP_CIRC:
2562 case OP_CIRCM:
2563 case OP_DOLL:
2564 case OP_DOLLM:
2565 /* Zero width assertions. */
2566 cc++;
2567 continue;
2568
2569 case OP_PLUS:
2570 case OP_MINPLUS:
2571 case OP_POSPLUS:
2572 cc++;
2573 break;
2574
2575 case OP_EXACT:
2576 cc += 1 + IMM2_SIZE;
2577 break;
2578
2579 case OP_PLUSI:
2580 case OP_MINPLUSI:
2581 case OP_POSPLUSI:
2582 caseless = 1;
2583 cc++;
2584 break;
2585
2586 case OP_EXACTI:
2587 caseless = 1;
2588 cc += 1 + IMM2_SIZE;
2589 break;
2590
2591 default:
2592 return FALSE;
2593 }
2594
2595 len = 1;
2596 #ifdef SUPPORT_UTF
2597 if (common->utf && HAS_EXTRALEN(cc[0])) len += GET_EXTRALEN(cc[0]);
2598 #endif
2599
2600 if (caseless && char_has_othercase(common, cc))
2601 {
2602 caseless = char_get_othercase_bit(common, cc);
2603 if (caseless == 0)
2604 return FALSE;
2605 #ifdef COMPILE_PCRE8
2606 caseless = ((caseless & 0xff) << 8) | (len - (caseless >> 8));
2607 #else
2608 if ((caseless & 0x100) != 0)
2609 caseless = ((caseless & 0xff) << 16) | (len - (caseless >> 9));
2610 else
2611 caseless = ((caseless & 0xff) << 8) | (len - (caseless >> 9));
2612 #endif
2613 }
2614 else
2615 caseless = 0;
2616
2617 while (len > 0 && index < 2 * 2)
2618 {
2619 c = *cc;
2620 bit = 0;
2621 if (len == (caseless & 0xff))
2622 {
2623 bit = caseless >> 8;
2624 c |= bit;
2625 }
2626
2627 chars[index] = c;
2628 chars[index + 1] = bit;
2629
2630 len--;
2631 index += 2;
2632 cc++;
2633 }
2634
2635 if (index == 2 * 2)
2636 break;
2637 else if (must_end)
2638 return FALSE;
2639 }
2640
2641 if (firstline)
2642 {
2643 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, STR_END, 0);
2644 OP2(SLJIT_SUB, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, SLJIT_IMM, 1);
2645 }
2646 else
2647 OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, 1);
2648
2649 start = LABEL();
2650 leave = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2651 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
2652 #ifdef COMPILE_PCRE8
2653 OP1(SLJIT_MOV_UH, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2654 #else /* COMPILE_PCRE8 */
2655 OP1(SLJIT_MOV_UI, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2656 #endif
2657
2658 #else /* SLJIT_UNALIGNED */
2659
2660 #if defined SLJIT_BIG_ENDIAN && SLJIT_BIG_ENDIAN
2661 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2662 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2663 #else /* SLJIT_BIG_ENDIAN */
2664 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2665 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2666 #endif /* SLJIT_BIG_ENDIAN */
2667
2668 #ifdef COMPILE_PCRE8
2669 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 8);
2670 #else /* COMPILE_PCRE8 */
2671 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 16);
2672 #endif
2673 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2674
2675 #endif
2676
2677 if (chars[1] != 0 || chars[3] != 0)
2678 {
2679 pair.asuchars[0] = chars[1];
2680 pair.asuchars[1] = chars[3];
2681 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, pair.ascombined);
2682 }
2683
2684 pair.asuchars[0] = chars[0];
2685 pair.asuchars[1] = chars[2];
2686 found = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, pair.ascombined);
2687
2688 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2689 JUMPTO(SLJIT_JUMP, start);
2690 JUMPHERE(found);
2691 JUMPHERE(leave);
2692
2693 if (firstline)
2694 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
2695 OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, 1);
2696 return TRUE;
2697 }
2698
2699 static SLJIT_INLINE void fast_forward_first_char(compiler_common *common, pcre_uchar first_char, BOOL caseless, BOOL firstline)
2700 {
2701 DEFINE_COMPILER;
2702 struct sljit_label *start;
2703 struct sljit_jump *leave;
2704 struct sljit_jump *found;
2705 pcre_uchar oc, bit;
2706
2707 if (firstline)
2708 {
2709 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, STR_END, 0);
2710 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
2711 }
2712
2713 start = LABEL();
2714 leave = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2715 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2716
2717 oc = first_char;
2718 if (caseless)
2719 {
2720 oc = TABLE_GET(first_char, common->fcc, first_char);
2721 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
2722 if (first_char > 127 && common->utf)
2723 oc = UCD_OTHERCASE(first_char);
2724 #endif
2725 }
2726 if (first_char == oc)
2727 found = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, first_char);
2728 else
2729 {
2730 bit = first_char ^ oc;
2731 if (ispowerof2(bit))
2732 {
2733 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, bit);
2734 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, first_char | bit);
2735 }
2736 else
2737 {
2738 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, first_char);
2739 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
2740 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);
2741 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
2742 found = JUMP(SLJIT_C_NOT_ZERO);
2743 }
2744 }
2745
2746 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2747 JUMPTO(SLJIT_JUMP, start);
2748 JUMPHERE(found);
2749 JUMPHERE(leave);
2750
2751 if (firstline)
2752 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
2753 }
2754
2755 static SLJIT_INLINE void fast_forward_newline(compiler_common *common, BOOL firstline)
2756 {
2757 DEFINE_COMPILER;
2758 struct sljit_label *loop;
2759 struct sljit_jump *lastchar;
2760 struct sljit_jump *firstchar;
2761 struct sljit_jump *leave;
2762 struct sljit_jump *foundcr = NULL;
2763 struct sljit_jump *notfoundnl;
2764 jump_list *newline = NULL;
2765
2766 if (firstline)
2767 {
2768 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, STR_END, 0);
2769 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
2770 }
2771
2772 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
2773 {
2774 lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2775 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
2776 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
2777 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
2778 firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
2779
2780 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2));
2781 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0);
2782 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_GREATER_EQUAL);
2783 #ifdef COMPILE_PCRE16
2784 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
2785 #endif
2786 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2787
2788 loop = LABEL();
2789 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2790 leave = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2791 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
2792 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
2793 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop);
2794 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop);
2795
2796 JUMPHERE(leave);
2797 JUMPHERE(firstchar);
2798 JUMPHERE(lastchar);
2799
2800 if (firstline)
2801 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
2802 return;
2803 }
2804
2805 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
2806 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
2807 firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
2808 skip_char_back(common);
2809
2810 loop = LABEL();
2811 read_char(common);
2812 lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2813 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
2814 foundcr = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
2815 check_newlinechar(common, common->nltype, &newline, FALSE);
2816 set_jumps(newline, loop);
2817
2818 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
2819 {
2820 leave = JUMP(SLJIT_JUMP);
2821 JUMPHERE(foundcr);
2822 notfoundnl = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2823 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2824 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
2825 COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
2826 #ifdef COMPILE_PCRE16
2827 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2828 #endif
2829 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2830 JUMPHERE(notfoundnl);
2831 JUMPHERE(leave);
2832 }
2833 JUMPHERE(lastchar);
2834 JUMPHERE(firstchar);
2835
2836 if (firstline)
2837 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
2838 }
2839
2840 static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, sljit_uw start_bits, BOOL firstline)
2841 {
2842 DEFINE_COMPILER;
2843 struct sljit_label *start;
2844 struct sljit_jump *leave;
2845 struct sljit_jump *found;
2846 #ifndef COMPILE_PCRE8
2847 struct sljit_jump *jump;
2848 #endif
2849
2850 if (firstline)
2851 {
2852 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, STR_END, 0);
2853 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
2854 }
2855
2856 start = LABEL();
2857 leave = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2858 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2859 #ifdef SUPPORT_UTF
2860 if (common->utf)
2861 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
2862 #endif
2863 #ifndef COMPILE_PCRE8
2864 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 255);
2865 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 255);
2866 JUMPHERE(jump);
2867 #endif
2868 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
2869 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
2870 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), start_bits);
2871 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
2872 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
2873 found = JUMP(SLJIT_C_NOT_ZERO);
2874
2875 #ifdef SUPPORT_UTF
2876 if (common->utf)
2877 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
2878 #endif
2879 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2880 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2881 if (common->utf)
2882 {
2883 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start);
2884 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0);
2885 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2886 }
2887 #endif
2888 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2889 if (common->utf)
2890 {
2891 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800, start);
2892 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2893 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2894 COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
2895 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2896 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2897 }
2898 #endif
2899 JUMPTO(SLJIT_JUMP, start);
2900 JUMPHERE(found);
2901 JUMPHERE(leave);
2902
2903 if (firstline)
2904 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
2905 }
2906
2907 static SLJIT_INLINE struct sljit_jump *search_requested_char(compiler_common *common, pcre_uchar req_char, BOOL caseless, BOOL has_firstchar)
2908 {
2909 DEFINE_COMPILER;
2910 struct sljit_label *loop;
2911 struct sljit_jump *toolong;
2912 struct sljit_jump *alreadyfound;
2913 struct sljit_jump *found;
2914 struct sljit_jump *foundoc = NULL;
2915 struct sljit_jump *notfound;
2916 pcre_uchar oc, bit;
2917
2918 SLJIT_ASSERT(common->req_char_ptr != 0);
2919 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr);
2920 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, REQ_BYTE_MAX);
2921 toolong = CMP(SLJIT_C_LESS, TMP1, 0, STR_END, 0);
2922 alreadyfound = CMP(SLJIT_C_LESS, STR_PTR, 0, TMP2, 0);
2923
2924 if (has_firstchar)
2925 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2926 else
2927 OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);
2928
2929 loop = LABEL();
2930 notfound = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, STR_END, 0);
2931
2932 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0);
2933 oc = req_char;
2934 if (caseless)
2935 {
2936 oc = TABLE_GET(req_char, common->fcc, req_char);
2937 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
2938 if (req_char > 127 && common->utf)
2939 oc = UCD_OTHERCASE(req_char);
2940 #endif
2941 }
2942 if (req_char == oc)
2943 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
2944 else
2945 {
2946 bit = req_char ^ oc;
2947 if (ispowerof2(bit))
2948 {
2949 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);
2950 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit);
2951 }
2952 else
2953 {
2954 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
2955 foundoc = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, oc);
2956 }
2957 }
2958 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
2959 JUMPTO(SLJIT_JUMP, loop);
2960
2961 JUMPHERE(found);
2962 if (foundoc)
2963 JUMPHERE(foundoc);
2964 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr, TMP1, 0);
2965 JUMPHERE(alreadyfound);
2966 JUMPHERE(toolong);
2967 return notfound;
2968 }
2969
2970 static void do_revertframes(compiler_common *common)
2971 {
2972 DEFINE_COMPILER;
2973 struct sljit_jump *jump;
2974 struct sljit_label *mainloop;
2975
2976 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2977 OP1(SLJIT_MOV, TMP1, 0, STACK_TOP, 0);
2978 GET_LOCAL_BASE(TMP3, 0, 0);
2979
2980 /* Drop frames until we reach STACK_TOP. */
2981 mainloop = LABEL();
2982 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), 0);
2983 jump = CMP(SLJIT_C_SIG_LESS_EQUAL, TMP2, 0, SLJIT_IMM, frame_end);
2984 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
2985 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_w));
2986 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_w), SLJIT_MEM1(TMP1), 2 * sizeof(sljit_w));
2987 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_w));
2988 JUMPTO(SLJIT_JUMP, mainloop);
2989
2990 JUMPHERE(jump);
2991 jump = CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, frame_end);
2992 /* End of dropping frames. */
2993 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2994
2995 JUMPHERE(jump);
2996 jump = CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, frame_setstrbegin);
2997 /* Set string begin. */
2998 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), sizeof(sljit_w));
2999 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_w));
3000 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), TMP2, 0);
3001 JUMPTO(SLJIT_JUMP, mainloop);
3002
3003 JUMPHERE(jump);
3004 if (common->mark_ptr != 0)
3005 {
3006 jump = CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, frame_setmark);
3007 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), sizeof(sljit_w));
3008 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_w));
3009 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr, TMP2, 0);
3010 JUMPTO(SLJIT_JUMP, mainloop);
3011
3012 JUMPHERE(jump);
3013 }
3014
3015 /* Unknown command. */
3016 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_w));
3017 JUMPTO(SLJIT_JUMP, mainloop);
3018 }
3019
3020 static void check_wordboundary(compiler_common *common)
3021 {
3022 DEFINE_COMPILER;
3023 struct sljit_jump *skipread;
3024 #if !(defined COMPILE_PCRE8) || defined SUPPORT_UTF
3025 struct sljit_jump *jump;
3026 #endif
3027
3028 SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16);
3029
3030 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3031 /* Get type of the previous char, and put it to LOCALS1. */
3032 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3033 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
3034 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, 0);
3035 skipread = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
3036 skip_char_back(common);
3037 check_start_used_ptr(common);
3038 read_char(common);
3039
3040 /* Testing char type. */
3041 #ifdef SUPPORT_UCP
3042 if (common->use_ucp)
3043 {
3044 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
3045 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
3046 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
3047 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
3048 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
3049 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);
3050 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
3051 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
3052 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_LESS_EQUAL);
3053 JUMPHERE(jump);
3054 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP2, 0);
3055 }
3056 else
3057 #endif
3058 {
3059 #ifndef COMPILE_PCRE8
3060 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3061 #elif defined SUPPORT_UTF
3062 /* Here LOCALS1 has already been zeroed. */
3063 jump = NULL;
3064 if (common->utf)
3065 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3066 #endif /* COMPILE_PCRE8 */
3067 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);
3068 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */);
3069 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3070 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);
3071 #ifndef COMPILE_PCRE8
3072 JUMPHERE(jump);
3073 #elif defined SUPPORT_UTF
3074 if (jump != NULL)
3075 JUMPHERE(jump);
3076 #endif /* COMPILE_PCRE8 */
3077 }
3078 JUMPHERE(skipread);
3079
3080 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3081 skipread = check_str_end(common);
3082 peek_char(common);
3083
3084 /* Testing char type. This is a code duplication. */
3085 #ifdef SUPPORT_UCP
3086 if (common->use_ucp)
3087 {
3088 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
3089 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
3090 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
3091 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
3092 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
3093 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);
3094 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
3095 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
3096 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_LESS_EQUAL);
3097 JUMPHERE(jump);
3098 }
3099 else
3100 #endif
3101 {
3102 #ifndef COMPILE_PCRE8
3103 /* TMP2 may be destroyed by peek_char. */
3104 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3105 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3106 #elif defined SUPPORT_UTF
3107 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3108 jump = NULL;
3109 if (common->utf)
3110 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3111 #endif
3112 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes);
3113 OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */);
3114 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
3115 #ifndef COMPILE_PCRE8
3116 JUMPHERE(jump);
3117 #elif defined SUPPORT_UTF
3118 if (jump != NULL)
3119 JUMPHERE(jump);
3120 #endif /* COMPILE_PCRE8 */
3121 }
3122 JUMPHERE(skipread);
3123
3124 OP2(SLJIT_XOR | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
3125 sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3126 }
3127
3128 static void check_anynewline(compiler_common *common)
3129 {
3130 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
3131 DEFINE_COMPILER;
3132
3133 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3134
3135 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
3136 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
3137 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);
3138 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
3139 #if defined SUPPORT_UTF || defined COMPILE_PCRE16
3140 #ifdef COMPILE_PCRE8
3141 if (common->utf)
3142 {
3143 #endif
3144 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
3145 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
3146 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
3147 #ifdef COMPILE_PCRE8
3148 }
3149 #endif
3150 #endif /* SUPPORT_UTF || COMPILE_PCRE16 */
3151 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
3152 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3153 }
3154
3155 static void check_hspace(compiler_common *common)
3156 {
3157 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
3158 DEFINE_COMPILER;
3159
3160 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3161
3162 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x09);
3163 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
3164 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
3165 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
3166 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xa0);
3167 #if defined SUPPORT_UTF || defined COMPILE_PCRE16
3168 #ifdef COMPILE_PCRE8
3169 if (common->utf)
3170 {
3171 #endif
3172 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
3173 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x1680);
3174 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
3175 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e);
3176 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
3177 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000);
3178 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000);
3179 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_LESS_EQUAL);
3180 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000);
3181 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
3182 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000);
3183 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
3184 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000);
3185 #ifdef COMPILE_PCRE8
3186 }
3187 #endif
3188 #endif /* SUPPORT_UTF || COMPILE_PCRE16 */
3189 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
3190
3191 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3192 }
3193
3194 static void check_vspace(compiler_common *common)
3195 {
3196 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
3197 DEFINE_COMPILER;
3198
3199 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3200
3201 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
3202 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
3203 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);
3204 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
3205 #if defined SUPPORT_UTF || defined COMPILE_PCRE16
3206 #ifdef COMPILE_PCRE8
3207 if (common->utf)
3208 {
3209 #endif
3210 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
3211 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
3212 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
3213 #ifdef COMPILE_PCRE8
3214 }
3215 #endif
3216 #endif /* SUPPORT_UTF || COMPILE_PCRE16 */
3217 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
3218
3219 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3220 }
3221
3222 #define CHAR1 STR_END
3223 #define CHAR2 STACK_TOP
3224
3225 static void do_casefulcmp(compiler_common *common)
3226 {
3227 DEFINE_COMPILER;
3228 struct sljit_jump *jump;
3229 struct sljit_label *label;
3230
3231 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3232 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3233 OP1(SLJIT_MOV, TMP3, 0, CHAR1, 0);
3234 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR2, 0);
3235 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
3236 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3237
3238 label = LABEL();
3239 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
3240 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3241 jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
3242 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
3243 JUMPTO(SLJIT_C_NOT_ZERO, label);
3244
3245 JUMPHERE(jump);
3246 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3247 OP1(SLJIT_MOV, CHAR1, 0, TMP3, 0);
3248 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3249 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3250 }
3251
3252 #define LCC_TABLE STACK_LIMIT
3253
3254 static void do_caselesscmp(compiler_common *common)
3255 {
3256 DEFINE_COMPILER;
3257 struct sljit_jump *jump;
3258 struct sljit_label *label;
3259
3260 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3261 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3262
3263 OP1(SLJIT_MOV, TMP3, 0, LCC_TABLE, 0);
3264 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR1, 0);
3265 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, CHAR2, 0);
3266 OP1(SLJIT_MOV, LCC_TABLE, 0, SLJIT_IMM, common->lcc);
3267 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
3268 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3269
3270 label = LABEL();
3271 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
3272 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3273 #ifndef COMPILE_PCRE8
3274 jump = CMP(SLJIT_C_GREATER, CHAR1, 0, SLJIT_IMM, 255);
3275 #endif
3276 OP1(SLJIT_MOV_UB, CHAR1, 0, SLJIT_MEM2(LCC_TABLE, CHAR1), 0);
3277 #ifndef COMPILE_PCRE8
3278 JUMPHERE(jump);
3279 jump = CMP(SLJIT_C_GREATER, CHAR2, 0, SLJIT_IMM, 255);
3280 #endif
3281 OP1(SLJIT_MOV_UB, CHAR2, 0, SLJIT_MEM2(LCC_TABLE, CHAR2), 0);
3282 #ifndef COMPILE_PCRE8
3283 JUMPHERE(jump);
3284 #endif
3285 jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
3286 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
3287 JUMPTO(SLJIT_C_NOT_ZERO, label);
3288
3289 JUMPHERE(jump);
3290 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3291 OP1(SLJIT_MOV, LCC_TABLE, 0, TMP3, 0);
3292 OP1(SLJIT_MOV, CHAR1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3293 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
3294 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3295 }
3296
3297 #undef LCC_TABLE
3298 #undef CHAR1
3299 #undef CHAR2
3300
3301 #if defined SUPPORT_UTF && defined SUPPORT_UCP
3302
3303 static const pcre_uchar *SLJIT_CALL do_utf_caselesscmp(pcre_uchar *src1, jit_arguments *args, pcre_uchar *end1)
3304 {
3305 /* This function would be ineffective to do in JIT level. */
3306 int c1, c2;
3307 const pcre_uchar *src2 = args->uchar_ptr;
3308 const pcre_uchar *end2 = args->end;
3309
3310 while (src1 < end1)
3311 {
3312 if (src2 >= end2)
3313 return (pcre_uchar*)1;
3314 GETCHARINC(c1, src1);
3315 GETCHARINC(c2, src2);
3316 if (c1 != c2 && c1 != UCD_OTHERCASE(c2)) return NULL;
3317 }
3318 return src2;
3319 }
3320
3321 #endif /* SUPPORT_UTF && SUPPORT_UCP */
3322
3323 static pcre_uchar *byte_sequence_compare(compiler_common *common, BOOL caseless, pcre_uchar *cc,
3324 compare_context* context, jump_list **backtracks)
3325 {
3326 DEFINE_COMPILER;
3327 unsigned int othercasebit = 0;
3328 pcre_uchar *othercasechar = NULL;
3329 #ifdef SUPPORT_UTF
3330 int utflength;
3331 #endif
3332
3333 if (caseless && char_has_othercase(common, cc))
3334 {
3335 othercasebit = char_get_othercase_bit(common, cc);
3336 SLJIT_ASSERT(othercasebit);
3337 /* Extracting bit difference info. */
3338 #ifdef COMPILE_PCRE8
3339 othercasechar = cc + (othercasebit >> 8);
3340 othercasebit &= 0xff;
3341 #else
3342 #ifdef COMPILE_PCRE16
3343 othercasechar = cc + (othercasebit >> 9);
3344 if ((othercasebit & 0x100) != 0)
3345 othercasebit = (othercasebit & 0xff) << 8;
3346 else
3347 othercasebit &= 0xff;
3348 #endif
3349 #endif
3350 }
3351
3352 if (context->sourcereg == -1)
3353 {
3354 #ifdef COMPILE_PCRE8
3355 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
3356 if (context->length >= 4)
3357 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3358 else if (context->length >= 2)
3359 OP1(SLJIT_MOV_UH, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3360 else
3361 #endif
3362 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3363 #else
3364 #ifdef COMPILE_PCRE16
3365 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
3366 if (context->length >= 4)
3367 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3368 else
3369 #endif
3370 OP1(SLJIT_MOV_UH, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3371 #endif
3372 #endif /* COMPILE_PCRE8 */
3373 context->sourcereg = TMP2;
3374 }
3375
3376 #ifdef SUPPORT_UTF
3377 utflength = 1;
3378 if (common->utf && HAS_EXTRALEN(*cc))
3379 utflength += GET_EXTRALEN(*cc);
3380
3381 do
3382 {
3383 #endif
3384
3385 context->length -= IN_UCHARS(1);
3386 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
3387
3388 /* Unaligned read is supported. */
3389 if (othercasebit != 0 && othercasechar == cc)
3390 {
3391 context->c.asuchars[context->ucharptr] = *cc | othercasebit;
3392 context->oc.asuchars[context->ucharptr] = othercasebit;
3393 }
3394 else
3395 {
3396 context->c.asuchars[context->ucharptr] = *cc;
3397 context->oc.asuchars[context->ucharptr] = 0;
3398 }
3399 context->ucharptr++;
3400
3401 #ifdef COMPILE_PCRE8
3402 if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1))
3403 #else
3404 if (context->ucharptr >= 2 || context->length == 0)
3405 #endif
3406 {
3407 if (context->length >= 4)
3408 OP1(SLJIT_MOV_SI, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3409 #ifdef COMPILE_PCRE8
3410 else if (context->length >= 2)
3411 OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3412 else if (context->length >= 1)
3413 OP1(SLJIT_MOV_UB, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3414 #else
3415 else if (context->length >= 2)
3416 OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3417 #endif
3418 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
3419
3420 switch(context->ucharptr)
3421 {
3422 case 4 / sizeof(pcre_uchar):
3423 if (context->oc.asint != 0)
3424 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint);
3425 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint));
3426 break;
3427
3428 case 2 / sizeof(pcre_uchar):
3429 if (context->oc.asushort != 0)
3430 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort);
3431 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort));
3432 break;
3433
3434 #ifdef COMPILE_PCRE8
3435 case 1:
3436 if (context->oc.asbyte != 0)
3437 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte);
3438 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte));
3439 break;
3440 #endif
3441
3442 default:
3443 SLJIT_ASSERT_STOP();
3444 break;
3445 }
3446 context->ucharptr = 0;
3447 }
3448
3449 #else
3450
3451 /* Unaligned read is unsupported. */
3452 #ifdef COMPILE_PCRE8
3453 if (context->length > 0)
3454 OP1(SLJIT_MOV_UB, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3455 #else
3456 if (context->length > 0)
3457 OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3458 #endif
3459 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
3460
3461 if (othercasebit != 0 && othercasechar == cc)
3462 {
3463 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit);
3464 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit));
3465 }
3466 else
3467 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc));
3468
3469 #endif
3470
3471 cc++;
3472 #ifdef SUPPORT_UTF
3473 utflength--;
3474 }
3475 while (utflength > 0);
3476 #endif
3477
3478 return cc;
3479 }
3480
3481 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3482
3483 #define SET_TYPE_OFFSET(value) \
3484 if ((value) != typeoffset) \
3485 { \
3486 if ((value) > typeoffset) \
3487 OP2(SLJIT_SUB, typereg, 0, typereg, 0, SLJIT_IMM, (value) - typeoffset); \
3488 else \
3489 OP2(SLJIT_ADD, typereg, 0, typereg, 0, SLJIT_IMM, typeoffset - (value)); \
3490 } \
3491 typeoffset = (value);
3492
3493 #define SET_CHAR_OFFSET(value) \
3494 if ((value) != charoffset) \
3495 { \
3496 if ((value) > charoffset) \
3497 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (value) - charoffset); \
3498 else \
3499 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, charoffset - (value)); \
3500 } \
3501 charoffset = (value);
3502
3503 static void compile_xclass_trypath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
3504 {
3505 DEFINE_COMPILER;
3506 jump_list *found = NULL;
3507 jump_list **list = (*cc & XCL_NOT) == 0 ? &found : backtracks;
3508 unsigned int c;
3509 int compares;
3510 struct sljit_jump *jump = NULL;
3511 pcre_uchar *ccbegin;
3512 #ifdef SUPPORT_UCP
3513 BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE;
3514 BOOL charsaved = FALSE;
3515 int typereg = TMP1, scriptreg = TMP1;
3516 unsigned int typeoffset;
3517 #endif
3518 int invertcmp, numberofcmps;
3519 unsigned int charoffset;
3520
3521 /* Although SUPPORT_UTF must be defined, we are not necessary in utf mode. */
3522 detect_partial_match(common, backtracks);
3523 read_char(common);
3524
3525 if ((*cc++ & XCL_MAP) != 0)
3526 {
3527 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
3528 #ifndef COMPILE_PCRE8
3529 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3530 #elif defined SUPPORT_UTF
3531 if (common->utf)
3532 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3533 #endif
3534
3535 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
3536 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
3537 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)cc);
3538 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
3539 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
3540 add_jump(compiler, list, JUMP(SLJIT_C_NOT_ZERO));
3541
3542 #ifndef COMPILE_PCRE8
3543 JUMPHERE(jump);
3544 #elif defined SUPPORT_UTF
3545 if (common->utf)
3546 JUMPHERE(jump);
3547 #endif
3548 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
3549 #ifdef SUPPORT_UCP
3550 charsaved = TRUE;
3551 #endif
3552 cc += 32 / sizeof(pcre_uchar);
3553 }
3554
3555 /* Scanning the necessary info. */
3556 ccbegin = cc;
3557 compares = 0;
3558 while (*cc != XCL_END)
3559 {
3560 compares++;
3561 if (*cc == XCL_SINGLE)
3562 {
3563 cc += 2;
3564 #ifdef SUPPORT_UTF
3565 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
3566 #endif
3567 #ifdef SUPPORT_UCP
3568 needschar = TRUE;
3569 #endif
3570 }
3571 else if (*cc == XCL_RANGE)
3572 {
3573 cc += 2;
3574 #ifdef SUPPORT_UTF
3575 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
3576 #endif
3577 cc++;
3578 #ifdef SUPPORT_UTF
3579 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
3580 #endif
3581 #ifdef SUPPORT_UCP
3582 needschar = TRUE;
3583 #endif
3584 }
3585 #ifdef SUPPORT_UCP
3586 else
3587 {
3588 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
3589 cc++;
3590 switch(*cc)
3591 {
3592 case PT_ANY:
3593 break;
3594
3595 case PT_LAMP:
3596 case PT_GC:
3597 case PT_PC:
3598 case PT_ALNUM:
3599 needstype = TRUE;
3600 break;
3601
3602 case PT_SC:
3603 needsscript = TRUE;
3604 break;
3605
3606 case PT_SPACE:
3607 case PT_PXSPACE:
3608 case PT_WORD:
3609 needstype = TRUE;
3610 needschar = TRUE;
3611 break;
3612
3613 default:
3614 SLJIT_ASSERT_STOP();
3615 break;
3616 }
3617 cc += 2;
3618 }
3619 #endif
3620 }
3621
3622 #ifdef SUPPORT_UCP
3623 /* Simple register allocation. TMP1 is preferred if possible. */
3624 if (needstype || needsscript)
3625 {
3626 if (needschar && !charsaved)
3627 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
3628 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
3629 if (needschar)
3630 {
3631 if (needstype)
3632 {
3633 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
3634 typereg = RETURN_ADDR;
3635 }
3636
3637 if (needsscript)
3638 scriptreg = TMP3;
3639 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
3640 }
3641 else if (needstype && needsscript)
3642 scriptreg = TMP3;
3643 /* In all other cases only one of them was specified, and that can goes to TMP1. */
3644
3645 if (needsscript)
3646 {
3647 if (scriptreg == TMP1)
3648 {
3649 OP1(SLJIT_MOV, scriptreg, 0, SLJIT_IMM, (sljit_w)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
3650 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM2(scriptreg, TMP2), 3);
3651 }
3652 else
3653 {
3654 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
3655 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_w)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
3656 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM1(TMP2), 0);
3657 }
3658 }
3659 }
3660 #endif
3661
3662 /* Generating code. */
3663 cc = ccbegin;
3664 charoffset = 0;
3665 numberofcmps = 0;
3666 #ifdef SUPPORT_UCP
3667 typeoffset = 0;
3668 #endif
3669
3670 while (*cc != XCL_END)
3671 {
3672 compares--;
3673 invertcmp = (compares == 0 && list != backtracks);
3674 jump = NULL;
3675
3676 if (*cc == XCL_SINGLE)
3677 {
3678 cc ++;
3679 #ifdef SUPPORT_UTF
3680 if (common->utf)
3681 {
3682 GETCHARINC(c, cc);
3683 }
3684 else
3685 #endif
3686 c = *cc++;
3687
3688 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
3689 {
3690 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
3691 COND_VALUE(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
3692 numberofcmps++;
3693 }
3694 else if (numberofcmps > 0)
3695 {
3696 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
3697 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
3698 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
3699 numberofcmps = 0;
3700 }
3701 else
3702 {
3703 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, c - charoffset);
3704 numberofcmps = 0;
3705 }
3706 }
3707 else if (*cc == XCL_RANGE)
3708 {
3709 cc ++;
3710 #ifdef SUPPORT_UTF
3711 if (common->utf)
3712 {
3713 GETCHARINC(c, cc);
3714 }
3715 else
3716 #endif
3717 c = *cc++;
3718 SET_CHAR_OFFSET(c);
3719 #ifdef SUPPORT_UTF
3720 if (common->utf)
3721 {
3722 GETCHARINC(c, cc);
3723 }
3724 else
3725 #endif
3726 c = *cc++;
3727 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
3728 {
3729 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
3730 COND_VALUE(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_C_LESS_EQUAL);
3731 numberofcmps++;
3732 }
3733 else if (numberofcmps > 0)
3734 {
3735 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
3736 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_LESS_EQUAL);
3737 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
3738 numberofcmps = 0;
3739 }
3740 else
3741 {
3742 jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, c - charoffset);
3743 numberofcmps = 0;
3744 }
3745 }
3746 #ifdef SUPPORT_UCP
3747 else
3748 {
3749 if (*cc == XCL_NOTPROP)
3750 invertcmp ^= 0x1;
3751 cc++;
3752 switch(*cc)
3753 {
3754 case PT_ANY:
3755 if (list != backtracks)
3756 {
3757 if ((cc[-1] == XCL_NOTPROP && compares > 0) || (cc[-1] == XCL_PROP && compares == 0))
3758 continue;
3759 }
3760 else if (cc[-1] == XCL_NOTPROP)
3761 continue;
3762 jump = JUMP(SLJIT_JUMP);
3763 break;
3764
3765 case PT_LAMP:
3766 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - typeoffset);
3767 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
3768 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ll - typeoffset);
3769 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
3770 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lt - typeoffset);
3771 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
3772 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
3773 break;
3774
3775 case PT_GC:
3776 c = PRIV(ucp_typerange)[(int)cc[1] * 2];
3777 SET_TYPE_OFFSET(c);
3778 jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, PRIV(ucp_typerange)[(int)cc[1] * 2 + 1] - c);
3779 break;
3780
3781 case PT_PC:
3782 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, (int)cc[1] - typeoffset);
3783 break;
3784
3785 case PT_SC:
3786 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, scriptreg, 0, SLJIT_IMM, (int)cc[1]);
3787 break;
3788
3789 case PT_SPACE:
3790 case PT_PXSPACE:
3791 if (*cc == PT_SPACE)
3792 {
3793 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3794 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 11 - charoffset);
3795 }
3796 SET_CHAR_OFFSET(9);
3797 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 13 - 9);
3798 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);
3799 if (*cc == PT_SPACE)
3800 JUMPHERE(jump);
3801
3802 SET_TYPE_OFFSET(ucp_Zl);
3803 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl);
3804 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_LESS_EQUAL);
3805 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
3806 break;
3807
3808 case PT_WORD:
3809 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE - charoffset);
3810 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
3811 /* ... fall through */
3812
3813 case PT_ALNUM:
3814 SET_TYPE_OFFSET(ucp_Ll);
3815 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
3816 COND_VALUE((*cc == PT_ALNUM) ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_C_LESS_EQUAL);
3817 SET_TYPE_OFFSET(ucp_Nd);
3818 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_No - ucp_Nd);
3819 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_LESS_EQUAL);
3820 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
3821 break;
3822 }
3823 cc += 2;
3824 }
3825 #endif
3826
3827 if (jump != NULL)
3828 add_jump(compiler, compares > 0 ? list : backtracks, jump);
3829 }
3830
3831 if (found != NULL)
3832 set_jumps(found, LABEL());
3833 }
3834
3835 #undef SET_TYPE_OFFSET
3836 #undef SET_CHAR_OFFSET
3837
3838 #endif
3839
3840 static pcre_uchar *compile_char1_trypath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **backtracks)
3841 {
3842 DEFINE_COMPILER;
3843 int length;
3844 unsigned int c, oc, bit;
3845 compare_context context;
3846 struct sljit_jump *jump[4];
3847 #ifdef SUPPORT_UTF
3848 struct sljit_label *label;
3849 #ifdef SUPPORT_UCP
3850 pcre_uchar propdata[5];
3851 #endif
3852 #endif
3853
3854 switch(type)
3855 {
3856 case OP_SOD:
3857 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3858 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
3859 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
3860 return cc;
3861
3862 case OP_SOM:
3863 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3864 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
3865 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
3866 return cc;
3867
3868 case OP_NOT_WORD_BOUNDARY:
3869 case OP_WORD_BOUNDARY:
3870 add_jump(compiler, &common->wordboundary, JUMP(SLJIT_FAST_CALL));
3871 add_jump(compiler, backtracks, JUMP(type == OP_NOT_WORD_BOUNDARY ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
3872 return cc;
3873
3874 case OP_NOT_DIGIT:
3875 case OP_DIGIT:
3876 detect_partial_match(common, backtracks);
3877 read_char8_type(common);
3878 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit);
3879 add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
3880 return cc;
3881
3882 case OP_NOT_WHITESPACE:
3883 case OP_WHITESPACE:
3884 detect_partial_match(common, backtracks);
3885 read_char8_type(common);
3886 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space);
3887 add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
3888 return cc;
3889
3890 case OP_NOT_WORDCHAR:
3891 case OP_WORDCHAR:
3892 detect_partial_match(common, backtracks);
3893 read_char8_type(common);
3894 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word);
3895 add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
3896 return cc;
3897
3898 case OP_ANY:
3899 detect_partial_match(common, backtracks);
3900 read_char(common);
3901 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3902 {
3903 jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
3904 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
3905 jump[1] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3906 else
3907 jump[1] = check_str_end(common);
3908
3909 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3910 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff));
3911 if (jump[1] != NULL)
3912 JUMPHERE(jump[1]);
3913 JUMPHERE(jump[0]);
3914 }
3915 else
3916 check_newlinechar(common, common->nltype, backtracks, TRUE);
3917 return cc;
3918
3919 case OP_ALLANY:
3920 detect_partial_match(common, backtracks);
3921 #ifdef SUPPORT_UTF
3922 if (common->utf)
3923 {
3924 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3925 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3926 #ifdef COMPILE_PCRE8
3927 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
3928 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0);
3929 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3930 #else /* COMPILE_PCRE8 */
3931 #ifdef COMPILE_PCRE16
3932 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
3933 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
3934 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3935 COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
3936 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3937 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3938 #endif /* COMPILE_PCRE16 */
3939 #endif /* COMPILE_PCRE8 */
3940 JUMPHERE(jump[0]);
3941 return cc;
3942 }
3943 #endif
3944 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3945 return cc;
3946
3947 case OP_ANYBYTE:
3948 detect_partial_match(common, backtracks);
3949 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3950 return cc;
3951
3952 #ifdef SUPPORT_UTF
3953 #ifdef SUPPORT_UCP
3954 case OP_NOTPROP:
3955 case OP_PROP:
3956 propdata[0] = 0;
3957 propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP;
3958 propdata[2] = cc[0];
3959 propdata[3] = cc[1];
3960 propdata[4] = XCL_END;
3961 compile_xclass_trypath(common, propdata, backtracks);
3962 return cc + 2;
3963 #endif
3964 #endif
3965
3966 case OP_ANYNL:
3967 detect_partial_match(common, backtracks);
3968 read_char(common);
3969 jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
3970 /* We don't need to handle soft partial matching case. */
3971 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
3972 jump[1] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3973 else
3974 jump[1] = check_str_end(common);
3975 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3976 jump[2] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
3977 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3978 jump[3] = JUMP(SLJIT_JUMP);
3979 JUMPHERE(jump[0]);
3980 check_newlinechar(common, common->bsr_nltype, backtracks, FALSE);
3981 JUMPHERE(jump[1]);
3982 JUMPHERE(jump[2]);
3983 JUMPHERE(jump[3]);
3984 return cc;
3985
3986 case OP_NOT_HSPACE:
3987 case OP_HSPACE:
3988 detect_partial_match(common, backtracks);
3989 read_char(common);
3990 add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));
3991 add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
3992 return cc;
3993
3994 case OP_NOT_VSPACE:
3995 case OP_VSPACE:
3996 detect_partial_match(common, backtracks);
3997 read_char(common);
3998 add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));
3999 add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
4000 return cc;
4001
4002 #ifdef SUPPORT_UCP
4003 case OP_EXTUNI:
4004 detect_partial_match(common, backtracks);
4005 read_char(common);
4006 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4007 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Mc);
4008 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS_EQUAL, TMP1, 0, SLJIT_IMM, ucp_Mn - ucp_Mc));
4009
4010 label = LABEL();
4011 jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4012 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
4013 read_char(common);
4014 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4015 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Mc);
4016 CMPTO(SLJIT_C_LESS_EQUAL, TMP1, 0, SLJIT_IMM, ucp_Mn - ucp_Mc, label);
4017
4018 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
4019 JUMPHERE(jump[0]);
4020 if (common->mode == JIT_PARTIAL_HARD_COMPILE)
4021 {
4022 jump[0] = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
4023 /* Since we successfully read a char above, partial matching must occure. */
4024 check_partial(common, TRUE);
4025 JUMPHERE(jump[0]);
4026 }
4027 return cc;
4028 #endif
4029
4030 case OP_EODN:
4031 /* Requires rather complex checks. */
4032 jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4033 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4034 {
4035 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4036 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4037 if (common->mode == JIT_COMPILE)
4038 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0));
4039 else
4040 {
4041 jump[1] = CMP(SLJIT_C_EQUAL, TMP2, 0, STR_END, 0);
4042 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
4043 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS);
4044 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
4045 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_NOT_EQUAL);
4046 add_jump(compiler, backtracks, JUMP(SLJIT_C_NOT_EQUAL));
4047 check_partial(common, TRUE);
4048 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4049 JUMPHERE(jump[1]);
4050 }
4051 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4052 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4053 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
4054 }
4055 else if (common->nltype == NLTYPE_FIXED)
4056 {
4057 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4058 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4059 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0));
4060 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
4061 }
4062 else
4063 {
4064 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4065 jump[1] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
4066 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4067 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
4068 jump[2] = JUMP(SLJIT_C_GREATER);
4069 add_jump(compiler, backtracks, JUMP(SLJIT_C_LESS));
4070 /* Equal. */
4071 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4072 jump[3] = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
4073 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4074
4075 JUMPHERE(jump[1]);
4076 if (common->nltype == NLTYPE_ANYCRLF)
4077 {
4078 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4079 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP2, 0, STR_END, 0));
4080 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
4081 }
4082 else
4083 {
4084 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, STR_PTR, 0);
4085 read_char(common);
4086 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
4087 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
4088 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
4089 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
4090 }
4091 JUMPHERE(jump[2]);
4092 JUMPHERE(jump[3]);
4093 }
4094 JUMPHERE(jump[0]);
4095 check_partial(common, FALSE);
4096 return cc;
4097
4098 case OP_EOD:
4099 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0));
4100 check_partial(common, FALSE);
4101 return cc;
4102
4103 case OP_CIRC:
4104 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4105 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
4106 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0));
4107 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
4108 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4109 return cc;
4110
4111 case OP_CIRCM:
4112 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4113 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
4114 jump[1] = CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0);
4115 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
4116 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4117 jump[0] = JUMP(SLJIT_JUMP);
4118 JUMPHERE(jump[1]);
4119
4120 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4121 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4122 {
4123 OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4124 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP2, 0, TMP1, 0));
4125 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4126 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
4127 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4128 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
4129 }
4130 else
4131 {
4132 skip_char_back(common);
4133 read_char(common);
4134 check_newlinechar(common, common->nltype, backtracks, FALSE);
4135 }
4136 JUMPHERE(jump[0]);
4137 return cc;
4138
4139 case OP_DOLL:
4140 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4141 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
4142 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4143
4144 if (!common->endonly)
4145 compile_char1_trypath(common, OP_EODN, cc, backtracks);
4146 else
4147 {
4148 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0));
4149 check_partial(common, FALSE);
4150 }
4151 return cc;
4152
4153 case OP_DOLLM:
4154 jump[1] = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
4155 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4156 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
4157 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4158 check_partial(common, FALSE);
4159 jump[0] = JUMP(SLJIT_JUMP);
4160 JUMPHERE(jump[1]);
4161
4162 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4163 {
4164 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4165 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4166 if (common->mode == JIT_COMPILE)
4167 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, TMP2, 0, STR_END, 0));
4168 else
4169 {
4170 jump[1] = CMP(SLJIT_C_LESS_EQUAL, TMP2, 0, STR_END, 0);
4171 /* STR_PTR = STR_END - IN_UCHARS(1) */
4172 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4173 check_partial(common, TRUE);
4174 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4175 JUMPHERE(jump[1]);
4176 }
4177
4178 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4179 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4180 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
4181 }
4182 else
4183 {
4184 peek_char(common);
4185 check_newlinechar(common, common->nltype, backtracks, FALSE);
4186 }
4187 JUMPHERE(jump[0]);
4188 return cc;
4189
4190 case OP_CHAR:
4191 case OP_CHARI:
4192 length = 1;
4193 #ifdef SUPPORT_UTF
4194 if (common->utf && HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc);
4195 #endif
4196 if (common->mode == JIT_COMPILE && (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0))
4197 {
4198 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
4199 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
4200
4201 context.length = IN_UCHARS(length);
4202 context.sourcereg = -1;
4203 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
4204 context.ucharptr = 0;
4205 #endif
4206 return byte_sequence_compare(common, type == OP_CHARI, cc, &context, backtracks);
4207 }
4208 detect_partial_match(common, backtracks);
4209 read_char(common);
4210 #ifdef SUPPORT_UTF
4211 if (common->utf)
4212 {
4213 GETCHAR(c, cc);
4214 }
4215 else
4216 #endif
4217 c = *cc;
4218 if (type == OP_CHAR || !char_has_othercase(common, cc))
4219 {
4220 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c));
4221 return cc + length;
4222 }
4223 oc = char_othercase(common, c);
4224 bit = c ^ oc;
4225 if (ispowerof2(bit))
4226 {
4227 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
4228 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
4229 return cc + length;
4230 }
4231 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c);
4232 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
4233 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, char_othercase(common, c));
4234 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
4235 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
4236 return cc + length;
4237
4238 case OP_NOT:
4239 case OP_NOTI:
4240 detect_partial_match(common, backtracks);
4241 length = 1;
4242 #ifdef SUPPORT_UTF
4243 if (common->utf)
4244 {
4245 #ifdef COMPILE_PCRE8
4246 c = *cc;
4247 if (c < 128)
4248 {
4249 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4250 if (type == OP_NOT || !char_has_othercase(common, cc))
4251 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
4252 else
4253 {
4254 /* Since UTF8 code page is fixed, we know that c is in [a-z] or [A-Z] range. */
4255 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x20);
4256 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20));
4257 }
4258 /* Skip the variable-length character. */
4259 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4260 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
4261 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0);
4262 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4263 JUMPHERE(jump[0]);
4264 return cc + 1;
4265 }
4266 else
4267 #endif /* COMPILE_PCRE8 */
4268 {
4269 GETCHARLEN(c, cc, length);
4270 read_char(common);
4271 }
4272 }
4273 else
4274 #endif /* SUPPORT_UTF */
4275 {
4276 read_char(common);
4277 c = *cc;
4278 }
4279
4280 if (type == OP_NOT || !char_has_othercase(common, cc))
4281 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
4282 else
4283 {
4284 oc = char_othercase(common, c);
4285 bit = c ^ oc;
4286 if (ispowerof2(bit))
4287 {
4288 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
4289 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
4290 }
4291 else
4292 {
4293 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
4294 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, oc));
4295 }
4296 }
4297 return cc + length;
4298
4299 case OP_CLASS:
4300 case OP_NCLASS:
4301 detect_partial_match(common, backtracks);
4302 read_char(common);
4303 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
4304 jump[0] = NULL;
4305 #ifdef COMPILE_PCRE8
4306 /* This check only affects 8 bit mode. In other modes, we
4307 always need to compare the value with 255. */
4308 if (common->utf)
4309 #endif /* COMPILE_PCRE8 */
4310 {
4311 jump[0] = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
4312 if (type == OP_CLASS)
4313 {
4314 add_jump(compiler, backtracks, jump[0]);
4315 jump[0] = NULL;
4316 }
4317 }
4318 #endif /* SUPPORT_UTF || !COMPILE_PCRE8 */
4319 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
4320 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
4321 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)cc);
4322 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4323 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4324 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
4325 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
4326 if (jump[0] != NULL)
4327 JUMPHERE(jump[0]);
4328 #endif /* SUPPORT_UTF || !COMPILE_PCRE8 */
4329 return cc + 32 / sizeof(pcre_uchar);
4330
4331 #if defined SUPPORT_UTF || defined COMPILE_PCRE16
4332 case OP_XCLASS:
4333 compile_xclass_trypath(common, cc + LINK_SIZE, backtracks);
4334 return cc + GET(cc, 0) - 1;
4335 #endif
4336
4337 case OP_REVERSE:
4338 length = GET(cc, 0);
4339 if (length == 0)
4340 return cc + LINK_SIZE;
4341 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4342 #ifdef SUPPORT_UTF
4343 if (common->utf)
4344 {
4345 OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4346 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, length);
4347 label = LABEL();
4348 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP3, 0));
4349 skip_char_back(common);
4350 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
4351 JUMPTO(SLJIT_C_NOT_ZERO, label);
4352 }
4353 else
4354 #endif
4355 {
4356 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4357 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
4358 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, TMP1, 0));
4359 }
4360 check_start_used_ptr(common);
4361 return cc + LINK_SIZE;
4362 }
4363 SLJIT_ASSERT_STOP();
4364 return cc;
4365 }
4366
4367 static SLJIT_INLINE pcre_uchar *compile_charn_trypath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, jump_list **backtracks)
4368 {
4369 /* This function consumes at least one input character. */
4370 /* To decrease the number of length checks, we try to concatenate the fixed length character sequences. */
4371 DEFINE_COMPILER;
4372 pcre_uchar *ccbegin = cc;
4373 compare_context context;
4374 int size;
4375
4376 context.length = 0;
4377 do
4378 {
4379 if (cc >= ccend)
4380 break;
4381
4382 if (*cc == OP_CHAR)
4383 {
4384 size = 1;
4385 #ifdef SUPPORT_UTF
4386 if (common->utf && HAS_EXTRALEN(cc[1]))
4387 size += GET_EXTRALEN(cc[1]);
4388 #endif
4389 }
4390 else if (*cc == OP_CHARI)
4391 {
4392 size = 1;
4393 #ifdef SUPPORT_UTF
4394 if (common->utf)
4395 {
4396 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
4397 size = 0;
4398 else if (HAS_EXTRALEN(cc[1]))
4399 size += GET_EXTRALEN(cc[1]);
4400 }
4401 else
4402 #endif
4403 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
4404 size = 0;
4405 }
4406 else
4407 size = 0;
4408
4409 cc += 1 + size;
4410 context.length += IN_UCHARS(size);
4411 }
4412 while (size > 0 && context.length <= 128);
4413
4414 cc = ccbegin;
4415 if (context.length > 0)
4416 {
4417 /* We have a fixed-length byte sequence. */
4418 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, context.length);
4419 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
4420
4421 context.sourcereg = -1;
4422 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
4423 context.ucharptr = 0;
4424 #endif
4425 do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, backtracks); while (context.length > 0);
4426 return cc;
4427 }
4428
4429 /* A non-fixed length character will be checked if length == 0. */
4430 return compile_char1_trypath(common, *cc, cc + 1, backtracks);
4431 }
4432
4433 static struct sljit_jump *compile_ref_checks(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
4434 {
4435 DEFINE_COMPILER;
4436 int offset = GET2(cc, 1) << 1;
4437
4438 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
4439 if (!common->jscript_compat)
4440 {
4441 if (backtracks == NULL)
4442 {
4443 /* OVECTOR(1) contains the "string begin - 1" constant. */
4444 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
4445 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
4446 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
4447 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
4448 return JUMP(SLJIT_C_NOT_ZERO);
4449 }
4450 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
4451 }
4452 return CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
4453 }
4454
4455 /* Forward definitions. */
4456 static void compile_trypath(compiler_common *, pcre_uchar *, pcre_uchar *, backtrack_common *);
4457 static void compile_backtrackpath(compiler_common *, struct backtrack_common *);
4458
4459 #define PUSH_BACKTRACK(size, ccstart, error) \
4460 do \
4461 { \
4462 backtrack = sljit_alloc_memory(compiler, (size)); \
4463 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
4464 return error; \
4465 memset(backtrack, 0, size); \
4466 backtrack->prev = parent->top; \
4467 backtrack->cc = (ccstart); \
4468 parent->top = backtrack; \
4469 } \
4470 while (0)
4471
4472 #define PUSH_BACKTRACK_NOVALUE(size, ccstart) \
4473 do \
4474 { \
4475 backtrack = sljit_alloc_memory(compiler, (size)); \
4476 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
4477 return; \
4478 memset(backtrack, 0, size); \
4479 backtrack->prev = parent->top; \
4480 backtrack->cc = (ccstart); \
4481 parent->top = backtrack; \
4482 } \
4483 while (0)
4484
4485 #define BACKTRACK_AS(type) ((type *)backtrack)
4486
4487 static pcre_uchar *compile_ref_trypath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks, BOOL withchecks, BOOL emptyfail)
4488 {
4489 DEFINE_COMPILER;
4490 int offset = GET2(cc, 1) << 1;
4491 struct sljit_jump *jump = NULL;
4492 struct sljit_jump *partial;
4493 struct sljit_jump *nopartial;
4494
4495 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
4496 /* OVECTOR(1) contains the "string begin - 1" constant. */
4497 if (withchecks && !common->jscript_compat)
4498 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
4499
4500 #if defined SUPPORT_UTF && defined SUPPORT_UCP
4501 if (common->utf && *cc == OP_REFI)
4502 {
4503 SLJIT_ASSERT(TMP1 == SLJIT_TEMPORARY_REG1 && STACK_TOP == SLJIT_TEMPORARY_REG2 && TMP2 == SLJIT_TEMPORARY_REG3);
4504 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
4505 if (withchecks)
4506 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, TMP2, 0);
4507
4508 /* Needed to save important temporary registers. */
4509 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
4510 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG2, 0, ARGUMENTS, 0);
4511 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), SLJIT_OFFSETOF(jit_arguments, uchar_ptr), STR_PTR, 0);
4512 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_utf_caselesscmp));
4513 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
4514 if (common->mode == JIT_COMPILE)
4515 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1));
4516 else
4517 {
4518 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
4519 nopartial = CMP(SLJIT_C_NOT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
4520 check_partial(common, FALSE);
4521 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4522 JUMPHERE(nopartial);
4523 }
4524 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
4525 }
4526 else
4527 #endif /* SUPPORT_UTF && SUPPORT_UCP */
4528 {
4529 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP1, 0);
4530 if (withchecks)
4531 jump = JUMP(SLJIT_C_ZERO);
4532
4533 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4534 partial = CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0);
4535 if (common->mode == JIT_COMPILE)
4536 add_jump(compiler, backtracks, partial);
4537
4538 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
4539 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4540
4541 if (common->mode != JIT_COMPILE)
4542 {
4543 nopartial = JUMP(SLJIT_JUMP);
4544 JUMPHERE(partial);
4545 /* TMP2 -= STR_END - STR_PTR */
4546 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, STR_PTR, 0);
4547 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, STR_END, 0);
4548 partial = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0);
4549 OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
4550 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
4551 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4552 JUMPHERE(partial);
4553 check_partial(common, FALSE);
4554 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4555 JUMPHERE(nopartial);
4556 }
4557 }
4558
4559 if (jump != NULL)
4560 {
4561 if (emptyfail)
4562 add_jump(compiler, backtracks, jump);
4563 else
4564 JUMPHERE(jump);
4565 }
4566 return cc + 1 + IMM2_SIZE;
4567 }
4568
4569 static SLJIT_INLINE pcre_uchar *compile_ref_iterator_trypath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
4570 {
4571 DEFINE_COMPILER;
4572 backtrack_common *backtrack;
4573 pcre_uchar type;
4574 struct sljit_label *label;
4575 struct sljit_jump *zerolength;
4576 struct sljit_jump *jump = NULL;
4577 pcre_uchar *ccbegin = cc;
4578 int min = 0, max = 0;
4579 BOOL minimize;
4580
4581 PUSH_BACKTRACK(sizeof(iterator_backtrack), cc, NULL);
4582
4583 type = cc[1 + IMM2_SIZE];
4584 minimize = (type & 0x1) != 0;
4585 switch(type)
4586 {
4587 case OP_CRSTAR:
4588 case OP_CRMINSTAR:
4589 min = 0;
4590 max = 0;
4591 cc += 1 + IMM2_SIZE + 1;
4592 break;
4593 case OP_CRPLUS:
4594 case OP_CRMINPLUS:
4595 min = 1;
4596 max = 0;
4597 cc += 1 + IMM2_SIZE + 1;
4598 break;
4599 case OP_CRQUERY:
4600 case OP_CRMINQUERY:
4601 min = 0;
4602 max = 1;
4603 cc += 1 + IMM2_SIZE + 1;
4604 break;
4605 case OP_CRRANGE:
4606 case OP_CRMINRANGE:
4607 min = GET2(cc, 1 + IMM2_SIZE + 1);
4608 max = GET2(cc, 1 + IMM2_SIZE + 1 + IMM2_SIZE);
4609 cc += 1 + IMM2_SIZE + 1 + 2 * IMM2_SIZE;
4610 break;
4611 default:
4612 SLJIT_ASSERT_STOP();
4613 break;
4614 }
4615
4616 if (!minimize)
4617 {
4618 if (min == 0)
4619 {
4620 allocate_stack(common, 2);
4621 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4622 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
4623 /* Temporary release of STR_PTR. */
4624 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
4625 zerolength = compile_ref_checks(common, ccbegin, NULL);
4626 /* Restore if not zero length. */
4627 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
4628 }
4629 else
4630 {
4631 allocate_stack(common, 1);
4632 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
4633 zerolength = compile_ref_checks(common, ccbegin, &backtrack->topbacktracks);
4634 }
4635
4636 if (min > 1 || max > 1)
4637 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 0);
4638
4639 label = LABEL();
4640 compile_ref_trypath(common, ccbegin, &backtrack->topbacktracks, FALSE, FALSE);
4641
4642 if (min > 1 || max > 1)
4643 {
4644 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
4645 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4646 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0);
4647 if (min > 1)
4648 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, label);
4649 if (max > 1)
4650 {
4651 jump = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max);
4652 allocate_stack(common, 1);
4653 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4654 JUMPTO(SLJIT_JUMP, label);
4655 JUMPHERE(jump);
4656 }
4657 }
4658
4659 if (max == 0)
4660 {
4661 /* Includes min > 1 case as well. */
4662 allocate_stack(common, 1);
4663 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4664 JUMPTO(SLJIT_JUMP, label);
4665 }
4666
4667 JUMPHERE(zerolength);
4668 BACKTRACK_AS(iterator_backtrack)->trypath = LABEL();
4669
4670 decrease_call_count(common);
4671 return cc;
4672 }
4673
4674 allocate_stack(common, 2);
4675 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
4676 if (type != OP_CRMINSTAR)
4677 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
4678
4679 if (min == 0)
4680 {
4681 zerolength = compile_ref_checks(common, ccbegin, NULL);
4682 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4683 jump = JUMP(SLJIT_JUMP);
4684 }
4685 else
4686 zerolength = compile_ref_checks(common, ccbegin, &backtrack->topbacktracks);
4687
4688 BACKTRACK_AS(iterator_backtrack)->trypath = LABEL();
4689 if (max > 0)
4690 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max));
4691
4692 compile_ref_trypath(common, ccbegin, &backtrack->topbacktracks, TRUE, TRUE);
4693 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4694
4695 if (min > 1)
4696 {
4697 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
4698 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4699 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
4700 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, BACKTRACK_AS(iterator_backtrack)->trypath);
4701 }
4702 else if (max > 0)
4703 OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
4704
4705 if (jump != NULL)
4706 JUMPHERE(jump);
4707 JUMPHERE(zerolength);
4708
4709 decrease_call_count(common);
4710 return cc;
4711 }
4712
4713 static SLJIT_INLINE pcre_uchar *compile_recurse_trypath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
4714 {
4715 DEFINE_COMPILER;
4716 backtrack_common *backtrack;
4717 recurse_entry *entry = common->entries;
4718 recurse_entry *prev = NULL;
4719 int start = GET(cc, 1);
4720
4721 PUSH_BACKTRACK(sizeof(recurse_backtrack), cc, NULL);
4722 while (entry != NULL)
4723 {
4724 if (entry->start == start)
4725 break;
4726 prev = entry;
4727 entry = entry->next;
4728 }
4729
4730 if (entry == NULL)
4731 {
4732 entry = sljit_alloc_memory(compiler, sizeof(recurse_entry));
4733 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
4734 return NULL;
4735 entry->next = NULL;
4736 entry->entry = NULL;
4737 entry->calls = NULL;
4738 entry->start = start;
4739
4740 if (prev != NULL)
4741 prev->next = entry;
4742 else
4743 common->entries = entry;
4744 }
4745
4746 if (common->has_set_som && common->mark_ptr != 0)
4747 {
4748 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
4749 allocate_stack(common, 2);
4750 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
4751 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
4752 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
4753 }
4754 else if (common->has_set_som || common->mark_ptr != 0)
4755 {
4756 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->has_set_som ? (int)(OVECTOR(0)) : common->mark_ptr);
4757 allocate_stack(common, 1);
4758 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
4759 }
4760
4761 if (entry->entry == NULL)
4762 add_jump(compiler, &entry->calls, JUMP(SLJIT_FAST_CALL));
4763 else
4764 JUMPTO(SLJIT_FAST_CALL, entry->entry);
4765 /* Leave if the match is failed. */
4766 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0));
4767 return cc + 1 + LINK_SIZE;
4768 }
4769
4770 static pcre_uchar *compile_assert_trypath(compiler_common *common, pcre_uchar *cc, assert_backtrack *backtrack, BOOL conditional)
4771 {
4772 DEFINE_COMPILER;
4773 int framesize;
4774 int localptr;
4775 backtrack_common altbacktrack;
4776 pcre_uchar *ccbegin;
4777 pcre_uchar opcode;
4778 pcre_uchar bra = OP_BRA;
4779 jump_list *tmp = NULL;
4780 jump_list **target = (conditional) ? &backtrack->condfailed : &backtrack->common.topbacktracks;
4781 jump_list **found;
4782 /* Saving previous accept variables. */
4783 struct sljit_label *save_leavelabel = common->leavelabel;
4784 struct sljit_label *save_acceptlabel = common->acceptlabel;
4785 jump_list *save_leave = common->leave;
4786 jump_list *save_accept = common->accept;
4787 struct sljit_jump *jump;
4788 struct sljit_jump *brajump = NULL;
4789
4790 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
4791 {
4792 SLJIT_ASSERT(!conditional);
4793 bra = *cc;
4794 cc++;
4795 }
4796 localptr = PRIV_DATA(cc);
4797 SLJIT_ASSERT(localptr != 0);
4798 framesize = get_framesize(common, cc, FALSE);
4799 backtrack->framesize = framesize;
4800 backtrack->localptr = localptr;
4801 opcode = *cc;
4802 SLJIT_ASSERT(opcode >= OP_ASSERT && opcode <= OP_ASSERTBACK_NOT);
4803 found = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) ? &tmp : target;
4804 ccbegin = cc;
4805 cc += GET(cc, 1);
4806
4807 if (bra == OP_BRAMINZERO)
4808 {
4809 /* This is a braminzero backtrack path. */
4810 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
4811 free_stack(common, 1);
4812 brajump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
4813 }
4814
4815 if (framesize < 0)
4816 {
4817 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STACK_TOP, 0);
4818 allocate_stack(common, 1);
4819 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4820 }
4821 else
4822 {
4823 allocate_stack(common, framesize + 2);
4824 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4825 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(framesize + 1));
4826 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, TMP2, 0);
4827 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4828 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
4829 init_frame(common, ccbegin, framesize + 1, 2, FALSE);
4830 }
4831
4832 memset(&altbacktrack, 0, sizeof(backtrack_common));
4833 common->leavelabel = NULL;
4834 common->leave = NULL;
4835 while (1)
4836 {
4837 common->acceptlabel = NULL;
4838 common->accept = NULL;
4839 altbacktrack.top = NULL;
4840 altbacktrack.topbacktracks = NULL;
4841
4842 if (*ccbegin == OP_ALT)
4843 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
4844
4845 altbacktrack.cc = ccbegin;
4846 compile_trypath(common, ccbegin + 1 + LINK_SIZE, cc, &altbacktrack);
4847 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
4848 {
4849 common->leavelabel = save_leavelabel;
4850 common->acceptlabel = save_acceptlabel;
4851 common->leave = save_leave;
4852 common->accept = save_accept;
4853 return NULL;
4854 }
4855 common->acceptlabel = LABEL();
4856 if (common->accept != NULL)
4857 set_jumps(common->accept, common->acceptlabel);
4858
4859 /* Reset stack. */
4860 if (framesize < 0)
4861 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4862 else {
4863 if ((opcode != OP_ASSERT_NOT && opcode != OP_ASSERTBACK_NOT) || conditional)
4864 {
4865 /* We don't need to keep the STR_PTR, only the previous localptr. */
4866 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_w));
4867 }
4868 else
4869 {
4870 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4871 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
4872 }
4873 }
4874
4875 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
4876 {
4877 /* We know that STR_PTR was stored on the top of the stack. */
4878 if (conditional)
4879 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
4880 else if (bra == OP_BRAZERO)
4881 {
4882 if (framesize < 0)
4883 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
4884 else
4885 {
4886 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_w));
4887 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (framesize + 1) * sizeof(sljit_w));
4888 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, TMP1, 0);
4889 }
4890 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
4891 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
4892 }
4893 else if (framesize >= 0)
4894 {
4895 /* For OP_BRA and OP_BRAMINZERO. */
4896 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_w));
4897 }
4898 }
4899 add_jump(compiler, found, JUMP(SLJIT_JUMP));
4900
4901 compile_backtrackpath(common, altbacktrack.top);
4902 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
4903 {
4904 common->leavelabel = save_leavelabel;
4905 common->acceptlabel = save_acceptlabel;
4906 common->leave = save_leave;
4907 common->accept = save_accept;
4908 return NULL;
4909 }
4910 set_jumps(altbacktrack.topbacktracks, LABEL());
4911
4912 if (*cc != OP_ALT)
4913 break;
4914
4915 ccbegin = cc;
4916 cc += GET(cc, 1);
4917 }
4918 /* None of them matched. */
4919 if (common->leave != NULL)
4920 set_jumps(common->leave, LABEL());
4921
4922 if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK)
4923 {
4924 /* Assert is failed. */
4925 if (conditional || bra == OP_BRAZERO)
4926 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
4927
4928 if (framesize < 0)
4929 {
4930 /* The topmost item should be 0. */
4931 if (bra == OP_BRAZERO)
4932 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
4933 else
4934 free_stack(common, 1);
4935 }
4936 else
4937 {
4938 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
4939 /* The topmost item should be 0. */
4940 if (bra == OP_BRAZERO)
4941 {
4942 free_stack(common, framesize + 1);
4943 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
4944 }
4945 else
4946 free_stack(common, framesize + 2);
4947 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, TMP1, 0);
4948 }
4949 jump = JUMP(SLJIT_JUMP);
4950 if (bra != OP_BRAZERO)
4951 add_jump(compiler, target, jump);
4952
4953 /* Assert is successful. */
4954 set_jumps(tmp, LABEL());
4955 if (framesize < 0)
4956 {
4957 /* We know that STR_PTR was stored on the top of the stack. */
4958 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
4959 /* Keep the STR_PTR on the top of the stack. */
4960 if (bra == OP_BRAZERO)
4961 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
4962 else if (bra == OP_BRAMINZERO)
4963 {
4964 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
4965 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
4966 }
4967 }
4968 else
4969 {
4970 if (bra == OP_BRA)
4971 {
4972 /* We don't need to keep the STR_PTR, only the previous localptr. */
4973 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_w));
4974 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
4975 }
4976 else
4977 {
4978 /* We don't need to keep the STR_PTR, only the previous localptr. */
4979 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_IMM, (framesize + 2) * sizeof(sljit_w));
4980 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
4981 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), bra == OP_BRAZERO ? STR_PTR : SLJIT_IMM, 0);
4982 }
4983 }
4984
4985 if (bra == OP_BRAZERO)
4986 {
4987 backtrack->trypath = LABEL();
4988 sljit_set_label(jump, backtrack->trypath);
4989 }
4990 else if (bra == OP_BRAMINZERO)
4991 {
4992 JUMPTO(SLJIT_JUMP, backtrack->trypath);
4993 JUMPHERE(brajump);
4994 if (framesize >= 0)
4995 {
4996 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4997 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
4998 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_w));
4999 }
5000 set_jumps(backtrack->common.topbacktracks, LABEL());
5001 }
5002 }
5003 else
5004 {
5005 /* AssertNot is successful. */
5006 if (framesize < 0)
5007 {
5008 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5009 if (bra != OP_BRA)
5010 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5011 else
5012 free_stack(common, 1);
5013 }
5014 else
5015 {
5016 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5017 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5018 /* The topmost item should be 0. */
5019 if (bra != OP_BRA)
5020 {
5021 free_stack(common, framesize + 1);
5022 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5023 }
5024 else
5025 free_stack(common, framesize + 2);
5026 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, TMP1, 0);
5027 }
5028
5029 if (bra == OP_BRAZERO)
5030 backtrack->trypath = LABEL();
5031 else if (bra == OP_BRAMINZERO)
5032 {
5033 JUMPTO(SLJIT_JUMP, backtrack->trypath);
5034 JUMPHERE(brajump);
5035 }
5036
5037 if (bra != OP_BRA)
5038 {
5039 SLJIT_ASSERT(found == &backtrack->common.topbacktracks);
5040 set_jumps(backtrack->common.topbacktracks, LABEL());
5041 backtrack->common.topbacktracks = NULL;
5042 }
5043 }
5044
5045 common->leavelabel = save_leavelabel;
5046 common->acceptlabel = save_acceptlabel;
5047 common->leave = save_leave;
5048 common->accept = save_accept;
5049 return cc + 1 + LINK_SIZE;
5050 }
5051
5052 static sljit_w SLJIT_CALL do_searchovector(sljit_w refno, sljit_w* locals, pcre_uchar *name_table)
5053 {
5054 int condition = FALSE;
5055 pcre_uchar *slotA = name_table;
5056 pcre_uchar *slotB;
5057 sljit_w name_count = locals[LOCALS0 / sizeof(sljit_w)];
5058 sljit_w name_entry_size = locals[LOCALS1 / sizeof(sljit_w)];
5059 sljit_w no_capture;
5060 int i;
5061
5062 locals += refno & 0xff;
5063 refno >>= 8;
5064 no_capture = locals[1];
5065
5066 for (i = 0; i < name_count; i++)
5067 {
5068 if (GET2(slotA, 0) == refno) break;
5069 slotA += name_entry_size;
5070 }
5071
5072 if (i < name_count)
5073 {
5074 /* Found a name for the number - there can be only one; duplicate names
5075 for different numbers are allowed, but not vice versa. First scan down
5076 for duplicates. */
5077
5078 slotB = slotA;
5079 while (slotB > name_table)
5080 {
5081 slotB -= name_entry_size;
5082 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
5083 {
5084 condition = locals[GET2(slotB, 0) << 1] != no_capture;
5085 if (condition) break;
5086 }
5087 else break;
5088 }
5089
5090 /* Scan up for duplicates */
5091 if (!condition)
5092 {
5093 slotB = slotA;
5094 for (i++; i < name_count; i++)
5095 {
5096 slotB += name_entry_size;
5097 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
5098 {
5099 condition = locals[GET2(slotB, 0) << 1] != no_capture;
5100 if (condition) break;
5101 }
5102 else break;
5103 }
5104 }
5105 }
5106 return condition;
5107 }
5108
5109 static sljit_w SLJIT_CALL do_searchgroups(sljit_w recno, sljit_w* locals, pcre_uchar *name_table)
5110 {
5111 int condition = FALSE;
5112 pcre_uchar *slotA = name_table;
5113 pcre_uchar *slotB;
5114 sljit_w name_count = locals[LOCALS0 / sizeof(sljit_w)];
5115 sljit_w name_entry_size = locals[LOCALS1 / sizeof(sljit_w)];
5116 sljit_w group_num = locals[POSSESSIVE0 / sizeof(sljit_w)];
5117 int i;
5118
5119 for (i = 0; i < name_count; i++)
5120 {
5121 if (GET2(slotA, 0) == recno) break;
5122 slotA += name_entry_size;
5123 }
5124
5125 if (i < name_count)
5126 {
5127 /* Found a name for the number - there can be only one; duplicate
5128 names for different numbers are allowed, but not vice versa. First
5129 scan down for duplicates. */
5130
5131 slotB = slotA;
5132 while (slotB > name_table)
5133 {
5134 slotB -= name_entry_size;
5135 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
5136 {
5137 condition = GET2(slotB, 0) == group_num;
5138 if (condition) break;
5139 }
5140 else break;
5141 }
5142
5143 /* Scan up for duplicates */
5144 if (!condition)
5145 {
5146 slotB = slotA;
5147 for (i++; i < name_count; i++)
5148 {
5149 slotB += name_entry_size;
5150 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
5151 {
5152 condition = GET2(slotB, 0) == group_num;
5153 if (condition) break;
5154 }
5155 else break;
5156 }
5157 }
5158 }
5159 return condition;
5160 }
5161
5162 /*
5163 Handling bracketed expressions is probably the most complex part.
5164
5165 Stack layout naming characters:
5166 S - Push the current STR_PTR
5167 0 - Push a 0 (NULL)
5168 A - Push the current STR_PTR. Needed for restoring the STR_PTR
5169 before the next alternative. Not pushed if there are no alternatives.
5170 M - Any values pushed by the current alternative. Can be empty, or anything.
5171 C - Push the previous OVECTOR(i), OVECTOR(i+1) and OVECTOR_PRIV(i) to the stack.
5172 L - Push the previous local (pointed by localptr) to the stack
5173 () - opional values stored on the stack
5174 ()* - optonal, can be stored multiple times
5175
5176 The following list shows the regular expression templates, their PCRE byte codes
5177 and stack layout supported by pcre-sljit.
5178
5179 (?:) OP_BRA | OP_KET A M
5180 () OP_CBRA | OP_KET C M
5181 (?:)+ OP_BRA | OP_KETRMAX 0 A M S ( A M S )*
5182 OP_SBRA | OP_KETRMAX 0 L M S ( L M S )*
5183 (?:)+? OP_BRA | OP_KETRMIN 0 A M S ( A M S )*
5184 OP_SBRA | OP_KETRMIN 0 L M S ( L M S )*
5185 ()+ OP_CBRA | OP_KETRMAX 0 C M S ( C M S )*
5186 OP_SCBRA | OP_KETRMAX 0 C M S ( C M S )*
5187 ()+? OP_CBRA | OP_KETRMIN 0 C M S ( C M S )*
5188 OP_SCBRA | OP_KETRMIN 0 C M S ( C M S )*
5189 (?:)? OP_BRAZERO | OP_BRA | OP_KET S ( A M 0 )
5190 (?:)?? OP_BRAMINZERO | OP_BRA | OP_KET S ( A M 0 )
5191 ()? OP_BRAZERO | OP_CBRA | OP_KET S ( C M 0 )
5192 ()?? OP_BRAMINZERO | OP_CBRA | OP_KET S ( C M 0 )
5193 (?:)* OP_BRAZERO | OP_BRA | OP_KETRMAX S 0 ( A M S )*
5194 OP_BRAZERO | OP_SBRA | OP_KETRMAX S 0 ( L M S )*
5195 (?:)*? OP_BRAMINZERO | OP_BRA | OP_KETRMIN S 0 ( A M S )*
5196 OP_BRAMINZERO | OP_SBRA | OP_KETRMIN S 0 ( L M S )*
5197 ()* OP_BRAZERO | OP_CBRA | OP_KETRMAX S 0 ( C M S )*
5198 OP_BRAZERO | OP_SCBRA | OP_KETRMAX S 0 ( C M S )*
5199 ()*? OP_BRAMINZERO | OP_CBRA | OP_KETRMIN S 0 ( C M S )*
5200 OP_BRAMINZERO | OP_SCBRA | OP_KETRMIN S 0 ( C M S )*
5201
5202
5203 Stack layout naming characters:
5204 A - Push the alternative index (starting from 0) on the stack.
5205 Not pushed if there is no alternatives.
5206 M - Any values pushed by the current alternative. Can be empty, or anything.
5207
5208 The next list shows the possible content of a bracket:
5209 (|) OP_*BRA | OP_ALT ... M A
5210 (?()|) OP_*COND | OP_ALT M A
5211 (?>|) OP_ONCE | OP_ALT ... [stack trace] M A
5212 (?>|) OP_ONCE_NC | OP_ALT ... [stack trace] M A
5213 Or nothing, if trace is unnecessary
5214 */
5215
5216 static pcre_uchar *compile_bracket_trypath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
5217 {
5218 DEFINE_COMPILER;
5219 backtrack_common *backtrack;
5220 pcre_uchar opcode;
5221 int localptr = 0;
5222 int offset = 0;
5223 int stacksize;
5224 pcre_uchar *ccbegin;
5225 pcre_uchar *trypath;
5226 pcre_uchar bra = OP_BRA;
5227 pcre_uchar ket;
5228 assert_backtrack *assert;
5229 BOOL has_alternatives;
5230 struct sljit_jump *jump;
5231 struct sljit_jump *skip;
5232 struct sljit_label *rmaxlabel = NULL;
5233 struct sljit_jump *braminzerojump = NULL;
5234
5235 PUSH_BACKTRACK(sizeof(bracket_backtrack), cc, NULL);
5236
5237 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
5238 {
5239 bra = *cc;
5240 cc++;
5241 opcode = *cc;
5242 }
5243
5244 opcode = *cc;
5245 ccbegin = cc;
5246 trypath = ccbegin + 1 + LINK_SIZE;
5247
5248 if ((opcode == OP_COND || opcode == OP_SCOND) && cc[1 + LINK_SIZE] == OP_DEF)
5249 {
5250 /* Drop this bracket_backtrack. */
5251 parent->top = backtrack->prev;
5252 return bracketend(cc);
5253 }
5254
5255 ket = *(bracketend(cc) - 1 - LINK_SIZE);
5256 SLJIT_ASSERT(ket == OP_KET || ket == OP_KETRMAX || ket == OP_KETRMIN);
5257 SLJIT_ASSERT(!((bra == OP_BRAZERO && ket == OP_KETRMIN) || (bra == OP_BRAMINZERO && ket == OP_KETRMAX)));
5258 cc += GET(cc, 1);
5259
5260 has_alternatives = *cc == OP_ALT;
5261 if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
5262 {
5263 has_alternatives = (*trypath == OP_RREF) ? FALSE : TRUE;
5264 if (*trypath == OP_NRREF)
5265 {
5266 stacksize = GET2(trypath, 1);
5267 if (common->currententry == NULL || stacksize == RREF_ANY)
5268 has_alternatives = FALSE;
5269 else if (common->currententry->start == 0)
5270 has_alternatives = stacksize != 0;
5271 else
5272 has_alternatives = stacksize != GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
5273 }
5274 }
5275
5276 if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
5277 opcode = OP_SCOND;
5278 if (SLJIT_UNLIKELY(opcode == OP_ONCE_NC))
5279 opcode = OP_ONCE;
5280
5281 if (opcode == OP_CBRA || opcode == OP_SCBRA)
5282 {
5283 /* Capturing brackets has a pre-allocated space. */
5284 offset = GET2(ccbegin, 1 + LINK_SIZE);
5285 localptr = OVECTOR_PRIV(offset);
5286 offset <<= 1;
5287 BACKTRACK_AS(bracket_backtrack)->localptr = localptr;
5288 trypath += IMM2_SIZE;
5289 }
5290 else if (opcode == OP_ONCE || opcode == OP_SBRA || opcode == OP_SCOND)
5291 {
5292 /* Other brackets simply allocate the next entry. */
5293 localptr = PRIV_DATA(ccbegin);
5294 SLJIT_ASSERT(localptr != 0);
5295 BACKTRACK_AS(bracket_backtrack)->localptr = localptr;
5296 if (opcode == OP_ONCE)
5297 BACKTRACK_AS(bracket_backtrack)->u.framesize = get_framesize(common, ccbegin, FALSE);
5298 }
5299
5300 /* Instructions before the first alternative. */
5301 stacksize = 0;
5302 if ((ket == OP_KETRMAX) || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
5303 stacksize++;
5304 if (bra == OP_BRAZERO)
5305 stacksize++;
5306
5307 if (stacksize > 0)
5308 allocate_stack(common, stacksize);
5309
5310 stacksize = 0;
5311 if ((ket == OP_KETRMAX) || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
5312 {
5313 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
5314 stacksize++;
5315 }
5316
5317 if (bra == OP_BRAZERO)
5318 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
5319
5320 if (bra == OP_BRAMINZERO)
5321 {
5322 /* This is a backtrack path! (Since the try-path of OP_BRAMINZERO matches to the empty string) */
5323 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5324 if (ket != OP_KETRMIN)
5325 {
5326 free_stack(common, 1);
5327 braminzerojump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
5328 }
5329 else
5330 {
5331 if (opcode == OP_ONCE || opcode >= OP_SBRA)
5332 {
5333 jump = CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
5334 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5335 /* Nothing stored during the first run. */
5336 skip = JUMP(SLJIT_JUMP);
5337 JUMPHERE(jump);
5338 /* Checking zero-length iteration. */
5339 if (opcode != OP_ONCE || BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
5340 {
5341 /* When we come from outside, localptr contains the previous STR_PTR. */
5342 braminzerojump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
5343 }
5344 else
5345 {
5346 /* Except when the whole stack frame must be saved. */
5347 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
5348 braminzerojump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), (BACKTRACK_AS(bracket_backtrack)->u.framesize + 1) * sizeof(sljit_w));
5349 }
5350 JUMPHERE(skip);
5351 }
5352 else
5353 {
5354 jump = CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
5355 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5356 JUMPHERE(jump);
5357 }
5358 }
5359 }
5360
5361 if (ket == OP_KETRMIN)
5362 BACKTRACK_AS(bracket_backtrack)->recursivetrypath = LABEL();
5363
5364 if (ket == OP_KETRMAX)
5365 {
5366 rmaxlabel = LABEL();
5367 if (has_alternatives && opcode != OP_ONCE && opcode < OP_SBRA)
5368 BACKTRACK_AS(bracket_backtrack)->alttrypath = rmaxlabel;
5369 }
5370
5371 /* Handling capturing brackets and alternatives. */
5372 if (opcode == OP_ONCE)
5373 {
5374 if (BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
5375 {
5376 /* Neither capturing brackets nor recursions are not found in the block. */
5377 if (ket == OP_KETRMIN)
5378 {
5379 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
5380 allocate_stack(common, 2);
5381 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5382 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
5383 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
5384 }
5385 else if (ket == OP_KETRMAX || has_alternatives)
5386 {
5387 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STACK_TOP, 0);
5388 allocate_stack(common, 1);
5389 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5390 }
5391 else
5392 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STACK_TOP, 0);
5393 }
5394 else
5395 {
5396 if (ket == OP_KETRMIN || ket == OP_KETRMAX || has_alternatives)
5397 {
5398 allocate_stack(common, BACKTRACK_AS(bracket_backtrack)->u.framesize + 2);
5399 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
5400 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(BACKTRACK_AS(bracket_backtrack)->u.framesize + 1));
5401 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5402 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, TMP2, 0);
5403 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5404 init_frame(common, ccbegin, BACKTRACK_AS(bracket_backtrack)->u.framesize + 1, 2, FALSE);
5405 }
5406 else
5407 {
5408 allocate_stack(common, BACKTRACK_AS(bracket_backtrack)->u.framesize + 1);
5409 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
5410 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(BACKTRACK_AS(bracket_backtrack)->u.framesize));
5411 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, TMP2, 0);
5412 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
5413 init_frame(common, ccbegin, BACKTRACK_AS(bracket_backtrack)->u.framesize, 1, FALSE);
5414 }
5415 }
5416 }
5417 else if (opcode == OP_CBRA || opcode == OP_SCBRA)
5418 {
5419 /* Saving the previous values. */
5420 allocate_stack(common, 3);
5421 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
5422 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
5423 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
5424 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
5425 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
5426 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STR_PTR, 0);
5427 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
5428 }
5429 else if (opcode == OP_SBRA || opcode == OP_SCOND)
5430 {
5431 /* Saving the previous value. */
5432 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
5433 allocate_stack(common, 1);
5434 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STR_PTR, 0);
5435 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
5436 }
5437 else if (has_alternatives)
5438 {
5439 /* Pushing the starting string pointer. */
5440 allocate_stack(common, 1);
5441 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5442 }
5443
5444 /* Generating code for the first alternative. */
5445 if (opcode == OP_COND || opcode == OP_SCOND)
5446 {
5447 if (*trypath == OP_CREF)
5448 {
5449 SLJIT_ASSERT(has_alternatives);
5450 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed),
5451 CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(GET2(trypath, 1) << 1), SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
5452 trypath += 1 + IMM2_SIZE;
5453 }
5454 else if (*trypath == OP_NCREF)
5455 {
5456 SLJIT_ASSERT(has_alternatives);
5457 stacksize = GET2(trypath, 1);
5458 jump = CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(stacksize << 1), SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
5459
5460 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STACK_TOP, 0);
5461 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, SLJIT_IMM, common->name_count);
5462 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, common->name_entry_size);
5463 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG1, 0, SLJIT_IMM, (stacksize << 8) | (common->ovector_start / sizeof(sljit_w)));
5464 GET_LOCAL_BASE(SLJIT_TEMPORARY_REG2, 0, 0);
5465 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, common->name_table);
5466 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_searchovector));
5467 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);
5468 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), CMP(SLJIT_C_EQUAL, SLJIT_TEMPORARY_REG1, 0, SLJIT_IMM, 0));
5469
5470 JUMPHERE(jump);
5471 trypath += 1 + IMM2_SIZE;
5472 }
5473 else if (*trypath == OP_RREF || *trypath == OP_NRREF)
5474 {
5475 /* Never has other case. */
5476 BACKTRACK_AS(bracket_backtrack)->u.condfailed = NULL;
5477
5478 stacksize = GET2(trypath, 1);
5479 if (common->currententry == NULL)
5480 stacksize = 0;
5481 else if (stacksize == RREF_ANY)
5482 stacksize = 1;
5483 else if (common->currententry->start == 0)
5484 stacksize = stacksize == 0;
5485 else
5486 stacksize = stacksize == GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
5487
5488 if (*trypath == OP_RREF || stacksize || common->currententry == NULL)
5489 {
5490 SLJIT_ASSERT(!has_alternatives);
5491 if (stacksize != 0)
5492 trypath += 1 + IMM2_SIZE;
5493 else
5494 {
5495 if (*cc == OP_ALT)
5496 {
5497 trypath = cc + 1 + LINK_SIZE;
5498 cc += GET(cc, 1);
5499 }
5500 else
5501 trypath = cc;
5502 }
5503 }
5504 else
5505 {
5506 SLJIT_ASSERT(has_alternatives);
5507
5508 stacksize = GET2(trypath, 1);
5509 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STACK_TOP, 0);
5510 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, SLJIT_IMM, common->name_count);
5511 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, common->name_entry_size);
5512 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, GET2(common->start, common->currententry->start + 1 + LINK_SIZE));
5513 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG1, 0, SLJIT_IMM, stacksize);
5514 GET_LOCAL_BASE(SLJIT_TEMPORARY_REG2, 0, 0);
5515 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, common->name_table);
5516 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_searchgroups));
5517 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);
5518 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), CMP(SLJIT_C_EQUAL, SLJIT_TEMPORARY_REG1, 0, SLJIT_IMM, 0));
5519 trypath += 1 + IMM2_SIZE;
5520 }
5521 }
5522 else
5523 {
5524 SLJIT_ASSERT(has_alternatives && *trypath >= OP_ASSERT && *trypath <= OP_ASSERTBACK_NOT);
5525 /* Similar code as PUSH_BACKTRACK macro. */
5526 assert = sljit_alloc_memory(compiler, sizeof(assert_backtrack));
5527 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5528 return NULL;
5529 memset(assert, 0, sizeof(assert_backtrack));
5530 assert->common.cc = trypath;
5531 BACKTRACK_AS(bracket_backtrack)->u.assert = assert;
5532 trypath = compile_assert_trypath(common, trypath, assert, TRUE);
5533 }
5534 }
5535
5536 compile_trypath(common, trypath, cc, backtrack);
5537 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5538 return NULL;
5539
5540 if (opcode == OP_ONCE)
5541 {
5542 if (BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
5543 {
5544 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
5545 /* TMP2 which is set here used by OP_KETRMAX below. */
5546 if (ket == OP_KETRMAX)
5547 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), 0);
5548 else if (ket == OP_KETRMIN)
5549 {
5550 /* Move the STR_PTR to the localptr. */
5551 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_MEM1(STACK_TOP), 0);
5552 }
5553 }
5554 else
5555 {
5556 stacksize = (ket == OP_KETRMIN || ket == OP_KETRMAX || has_alternatives) ? 2 : 1;
5557 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_IMM, (BACKTRACK_AS(bracket_backtrack)->u.framesize + stacksize) * sizeof(sljit_w));
5558 if (ket == OP_KETRMAX)
5559 {
5560 /* TMP2 which is set here used by OP_KETRMAX below. */
5561 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5562 }
5563 }
5564 }
5565
5566 stacksize = 0;
5567 if (ket != OP_KET || bra != OP_BRA)
5568 stacksize++;
5569 if (has_alternatives && opcode != OP_ONCE)
5570 stacksize++;
5571
5572 if (stacksize > 0)
5573 allocate_stack(common, stacksize);
5574
5575 stacksize = 0;
5576 if (ket != OP_KET)
5577 {
5578 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
5579 stacksize++;
5580 }
5581 else if (bra != OP_BRA)
5582 {
5583 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
5584 stacksize++;
5585 }
5586
5587 if (has_alternatives)
5588 {
5589 if (opcode != OP_ONCE)
5590 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
5591 if (ket != OP_KETRMAX)
5592 BACKTRACK_AS(bracket_backtrack)->alttrypath = LABEL();
5593 }
5594
5595 /* Must be after the trypath label. */
5596 if (offset != 0)
5597 {
5598 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
5599 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
5600 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 0), TMP1, 0);
5601 }
5602
5603 if (ket == OP_KETRMAX)
5604 {
5605 if (opcode == OP_ONCE || opcode >= OP_SBRA)
5606 {
5607 if (has_alternatives)
5608 BACKTRACK_AS(bracket_backtrack)->alttrypath = LABEL();
5609 /* Checking zero-length iteration. */
5610 if (opcode != OP_ONCE)
5611 {
5612 CMPTO(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STR_PTR, 0, rmaxlabel);
5613 /* Drop STR_PTR for greedy plus quantifier. */
5614 if (bra != OP_BRAZERO)
5615 free_stack(common, 1);
5616 }
5617 else
5618 /* TMP2 must contain the starting STR_PTR. */
5619 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0, rmaxlabel);
5620 }
5621 else
5622 JUMPTO(SLJIT_JUMP, rmaxlabel);
5623 BACKTRACK_AS(bracket_backtrack)->recursivetrypath = LABEL();
5624 }
5625
5626 if (bra == OP_BRAZERO)
5627 BACKTRACK_AS(bracket_backtrack)->zerotrypath = LABEL();
5628
5629 if (bra == OP_BRAMINZERO)
5630 {
5631 /* This is a backtrack path! (From the viewpoint of OP_BRAMINZERO) */
5632 JUMPTO(SLJIT_JUMP, ((braminzero_backtrack *)parent)->trypath);
5633 if (braminzerojump != NULL)
5634 {
5635 JUMPHERE(braminzerojump);
5636 /* We need to release the end pointer to perform the
5637 backtrack for the zero-length iteration. When
5638 framesize is < 0, OP_ONCE will do the release itself. */
5639 if (opcode == OP_ONCE && BACKTRACK_AS(bracket_backtrack)->u.framesize >= 0)
5640 {
5641 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
5642 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
5643 }
5644 else if (ket == OP_KETRMIN && opcode != OP_ONCE)
5645 free_stack(common, 1);
5646 }
5647 /* Continue to the normal backtrack. */
5648 }
5649
5650 if ((ket != OP_KET && bra != OP_BRAMINZERO) || bra == OP_BRAZERO)
5651 decrease_call_count(common);
5652
5653 /* Skip the other alternatives. */
5654 while (*cc == OP_ALT)
5655 cc += GET(cc, 1);
5656 cc += 1 + LINK_SIZE;
5657 return cc;
5658 }
5659
5660 static pcre_uchar *compile_bracketpos_trypath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
5661 {
5662 DEFINE_COMPILER;
5663 backtrack_common *backtrack;
5664 pcre_uchar opcode;
5665 int localptr;
5666 int cbraprivptr = 0;
5667 int framesize;
5668 int stacksize;
5669 int offset = 0;
5670 BOOL zero = FALSE;
5671 pcre_uchar *ccbegin = NULL;
5672 int stack;
5673 struct sljit_label *loop = NULL;
5674 struct jump_list *emptymatch = NULL;
5675
5676 PUSH_BACKTRACK(sizeof(bracketpos_backtrack), cc, NULL);
5677 if (*cc == OP_BRAPOSZERO)
5678 {
5679 zero = TRUE;
5680 cc++;
5681 }
5682
5683 opcode = *cc;
5684 localptr = PRIV_DATA(cc);
5685 SLJIT_ASSERT(localptr != 0);
5686 BACKTRACK_AS(bracketpos_backtrack)->localptr = localptr;
5687 switch(opcode)
5688 {
5689 case OP_BRAPOS:
5690 case OP_SBRAPOS:
5691 ccbegin = cc + 1 + LINK_SIZE;
5692 break;
5693
5694 case OP_CBRAPOS:
5695 case OP_SCBRAPOS:
5696 offset = GET2(cc, 1 + LINK_SIZE);
5697 cbraprivptr = OVECTOR_PRIV(offset);
5698 offset <<= 1;
5699 ccbegin = cc + 1 + LINK_SIZE + IMM2_SIZE;
5700 break;
5701
5702 default:
5703 SLJIT_ASSERT_STOP();
5704 break;
5705 }
5706
5707 framesize = get_framesize(common, cc, FALSE);
5708 BACKTRACK_AS(bracketpos_backtrack)->framesize = framesize;
5709 if (framesize < 0)
5710 {
5711 stacksize = (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS) ? 2 : 1;
5712 if (!zero)
5713 stacksize++;
5714 BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
5715 allocate_stack(common, stacksize);
5716 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STACK_TOP, 0);
5717
5718 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
5719 {
5720 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
5721 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
5722 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
5723 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
5724 }
5725 else
5726 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5727
5728 if (!zero)
5729 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 1);
5730 }
5731 else
5732 {
5733 stacksize = framesize + 1;
5734 if (!zero)
5735 stacksize++;
5736 if (opcode == OP_BRAPOS || opcode == OP_SBRAPOS)
5737 stacksize++;
5738 BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
5739 allocate_stack(common, stacksize);
5740
5741 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
5742 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(stacksize - 1));
5743 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, TMP2, 0);
5744 stack = 0;
5745 if (!zero)
5746 {
5747 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 1);
5748 stack++;
5749 }
5750 if (opcode == OP_BRAPOS || opcode == OP_SBRAPOS)
5751 {
5752 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), STR_PTR, 0);
5753 stack++;
5754 }
5755 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP1, 0);
5756 init_frame(common, cc, stacksize - 1, stacksize - framesize, FALSE);
5757 }
5758
5759 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
5760 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr, STR_PTR, 0);
5761
5762 loop = LABEL();
5763 while (*cc != OP_KETRPOS)
5764 {
5765 backtrack->top = NULL;
5766 backtrack->topbacktracks = NULL;
5767 cc += GET(cc, 1);
5768
5769 compile_trypath(common, ccbegin, cc, backtrack);
5770 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5771 return NULL;
5772
5773 if (framesize < 0)
5774 {
5775 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
5776
5777 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
5778 {
5779 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
5780 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
5781 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr, STR_PTR, 0);
5782 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
5783 }
5784 else
5785 {
5786 if (opcode == OP_SBRAPOS)
5787 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5788 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5789 }
5790
5791 if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
5792 add_jump(compiler, &emptymatch, CMP(SLJIT_C_EQUAL, TMP1, 0, STR_PTR, 0));
5793
5794 if (!zero)
5795 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
5796 }
5797 else
5798 {
5799 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
5800 {
5801 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_IMM, stacksize * sizeof(sljit_w));
5802 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
5803 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
5804 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr, STR_PTR, 0);
5805 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
5806 }
5807 else
5808 {
5809 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
5810 OP2(SLJIT_ADD, STACK_TOP, 0, TMP2, 0, SLJIT_IMM, stacksize * sizeof(sljit_w));
5811 if (opcode == OP_SBRAPOS)
5812 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_w));
5813 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_w), STR_PTR, 0);
5814 }
5815
5816 if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
5817 add_jump(compiler, &emptymatch, CMP(SLJIT_C_EQUAL, TMP1, 0, STR_PTR, 0));
5818
5819 if (!zero)
5820 {
5821 if (framesize < 0)
5822 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
5823 else
5824 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5825 }
5826 }
5827 JUMPTO(SLJIT_JUMP, loop);
5828 flush_stubs(common);
5829
5830 compile_backtrackpath(common, backtrack->top);
5831 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5832 return NULL;
5833 set_jumps(backtrack->topbacktracks, LABEL());
5834
5835 if (framesize < 0)
5836 {
5837 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
5838 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
5839 else
5840 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5841 }
5842 else
5843 {
5844 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
5845 {
5846 /* Last alternative. */
5847 if (*cc == OP_KETRPOS)
5848 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
5849 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
5850 }
5851 else
5852 {
5853 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
5854 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_w));
5855 }
5856 }
5857
5858 if (*cc == OP_KETRPOS)
5859 break;
5860 ccbegin = cc + 1 + LINK_SIZE;
5861 }
5862
5863 backtrack->topbacktracks = NULL;
5864 if (!zero)
5865 {
5866 if (framesize < 0)
5867 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0));
5868 else /* TMP2 is set to [localptr] above. */
5869 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(TMP2), (stacksize - 1) * sizeof(sljit_w), SLJIT_IMM, 0));
5870 }
5871
5872 /* None of them matched. */
5873 set_jumps(emptymatch, LABEL());
5874 decrease_call_count(common);
5875 return cc + 1 + LINK_SIZE;
5876 }
5877
5878 static SLJIT_INLINE pcre_uchar *get_iterator_parameters(compiler_common *common, pcre_uchar *cc, pcre_uchar *opcode, pcre_uchar *type, int *arg1, int *arg2, pcre_uchar **end)
5879 {
5880 int class_len;
5881
5882 *opcode = *cc;
5883 if (*opcode >= OP_STAR && *opcode <= OP_POSUPTO)
5884 {
5885 cc++;
5886 *type = OP_CHAR;
5887 }
5888 else if (*opcode >= OP_STARI && *opcode <= OP_POSUPTOI)
5889 {
5890 cc++;
5891 *type = OP_CHARI;
5892 *opcode -= OP_STARI - OP_STAR;
5893 }
5894 else if (*opcode >= OP_NOTSTAR && *opcode <= OP_NOTPOSUPTO)
5895 {
5896 cc++;
5897 *type = OP_NOT;
5898 *opcode -= OP_NOTSTAR - OP_STAR;
5899 }
5900 else if (*opcode >= OP_NOTSTARI && *opcode <= OP_NOTPOSUPTOI)
5901 {
5902 cc++;
5903 *type = OP_NOTI;
5904 *opcode -= OP_NOTSTARI - OP_STAR;
5905 }
5906 else if (*opcode >= OP_TYPESTAR && *opcode <= OP_TYPEPOSUPTO)
5907 {
5908 cc++;
5909 *opcode -= OP_TYPESTAR - OP_STAR;
5910 *type = 0;
5911 }
5912 else
5913 {
5914 SLJIT_ASSERT(*opcode >= OP_CLASS || *opcode <= OP_XCLASS);
5915 *type = *opcode;
5916 cc++;
5917 class_len = (*type < OP_XCLASS) ? (int)(1 + (32 / sizeof(pcre_uchar))) : GET(cc, 0);
5918 *opcode = cc[class_len - 1];
5919 if (*opcode >= OP_CRSTAR && *opcode <= OP_CRMINQUERY)
5920 {
5921 *opcode -= OP_CRSTAR - OP_STAR;
5922 if (end != NULL)
5923 *end = cc + class_len;
5924 }
5925 else
5926 {
5927 SLJIT_ASSERT(*opcode == OP_CRRANGE || *opcode == OP_CRMINRANGE);
5928 *arg1 = GET2(cc, (class_len + IMM2_SIZE));
5929 *arg2 = GET2(cc, class_len);
5930
5931 if (*arg2 == 0)
5932 {
5933 SLJIT_ASSERT(*arg1 != 0);
5934 *opcode = (*opcode == OP_CRRANGE) ? OP_UPTO : OP_MINUPTO;
5935 }
5936 if (*arg1 == *arg2)
5937 *opcode = OP_EXACT;
5938
5939 if (end != NULL)
5940 *end = cc + class_len + 2 * IMM2_SIZE;
5941 }
5942 return cc;
5943 }
5944
5945 if (*opcode == OP_UPTO || *opcode == OP_MINUPTO || *opcode == OP_EXACT || *opcode == OP_POSUPTO)
5946 {
5947 *arg1 = GET2(cc, 0);
5948 cc += IMM2_SIZE;
5949 }
5950
5951 if (*type == 0)
5952 {
5953 *type = *cc;
5954 if (end != NULL)
5955 *end = next_opcode(common, cc);
5956 cc++;
5957 return cc;
5958 }
5959
5960 if (end != NULL)
5961 {
5962 *end = cc + 1;
5963 #ifdef SUPPORT_UTF
5964 if (common->utf && HAS_EXTRALEN(*cc)) *end += GET_EXTRALEN(*cc);
5965 #endif
5966 }
5967 return cc;
5968 }
5969
5970 static pcre_uchar *compile_iterator_trypath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
5971 {
5972 DEFINE_COMPILER;
5973 backtrack_common *backtrack;
5974 pcre_uchar opcode;
5975 pcre_uchar type;
5976 int arg1 = -1, arg2 = -1;
5977 pcre_uchar* end;
5978 jump_list *nomatch = NULL;
5979 struct sljit_jump *jump = NULL;
5980 struct sljit_label *label;
5981 int localptr = PRIV_DATA(cc);
5982 int base = (localptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_LOCALS_REG);
5983 int offset0 = (localptr == 0) ? STACK(0) : localptr;
5984 int offset1 = (localptr == 0) ? STACK(1) : localptr + sizeof(sljit_w);
5985
5986 PUSH_BACKTRACK(sizeof(iterator_backtrack), cc, NULL);
5987
5988 cc = get_iterator_parameters(common, cc, &opcode, &type, &arg1, &arg2, &end);
5989
5990 switch(opcode)
5991 {
5992 case OP_STAR:
5993 case OP_PLUS:
5994 case OP_UPTO:
5995 case OP_CRRANGE:
5996 if (type == OP_ANYNL || type == OP_EXTUNI)
5997 {
5998 SLJIT_ASSERT(localptr == 0);
5999 if (opcode == OP_STAR || opcode == OP_UPTO)
6000 {
6001 allocate_stack(common, 2);
6002 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6003 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
6004 }
6005 else
6006 {
6007 allocate_stack(common, 1);
6008 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6009 }
6010 if (opcode == OP_UPTO || opcode == OP_CRRANGE)
6011 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 0);
6012
6013 label = LABEL();
6014 compile_char1_trypath(common, type, cc, &backtrack->topbacktracks);
6015 if (opcode == OP_UPTO || opcode == OP_CRRANGE)
6016 {
6017 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
6018 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
6019 if (opcode == OP_CRRANGE && arg2 > 0)
6020 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg2, label);
6021 if (opcode == OP_UPTO || (opcode == OP_CRRANGE && arg1 > 0))
6022 jump = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, arg1);
6023 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0);
6024 }
6025
6026 allocate_stack(common, 1);
6027 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6028 JUMPTO(SLJIT_JUMP, label);
6029 if (jump != NULL)
6030 JUMPHERE(jump);
6031 }
6032 else
6033 {
6034 if (opcode == OP_PLUS)
6035 compile_char1_trypath(common, type, cc, &backtrack->topbacktracks);
6036 if (localptr == 0)
6037 allocate_stack(common, 2);
6038 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
6039 OP1(SLJIT_MOV, base, offset1, SLJIT_IMM, 1);
6040 label = LABEL();
6041 compile_char1_trypath(common, type, cc, &nomatch);
6042 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
6043 if (opcode <= OP_PLUS || (opcode == OP_CRRANGE && arg1 == 0))
6044 {
6045 OP2(SLJIT_ADD, base, offset1, base, offset1, SLJIT_IMM, 1);
6046 JUMPTO(SLJIT_JUMP, label);
6047 }
6048 else
6049 {
6050 OP1(SLJIT_MOV, TMP1, 0, base, offset1);
6051 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
6052 OP1(SLJIT_MOV, base, offset1, TMP1, 0);
6053 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg1 + 1, label);
6054 }
6055 set_jumps(nomatch, LABEL());
6056 if (opcode == OP_CRRANGE)
6057 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_LESS, base, offset1, SLJIT_IMM, arg2 + 1));
6058 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
6059 }
6060 BACKTRACK_AS(iterator_backtrack)->trypath = LABEL();
6061 break;
6062
6063 case OP_MINSTAR:
6064 case OP_MINPLUS:
6065 if (opcode == OP_MINPLUS)
6066 compile_char1_trypath(common, type, cc, &backtrack->topbacktracks);
6067 if (localptr == 0)
6068 allocate_stack(common, 1);
6069 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
6070 BACKTRACK_AS(iterator_backtrack)->trypath = LABEL();
6071 break;
6072
6073 case OP_MINUPTO:
6074 case OP_CRMINRANGE:
6075 if (localptr == 0)
6076 allocate_stack(common, 2);
6077 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
6078 OP1(SLJIT_MOV, base, offset1, SLJIT_IMM, 1);
6079 if (opcode == OP_CRMINRANGE)
6080 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
6081 BACKTRACK_AS(iterator_backtrack)->trypath = LABEL();
6082 break;
6083
6084 case OP_QUERY:
6085 case OP_MINQUERY:
6086 if (localptr == 0)
6087 allocate_stack(common, 1);
6088 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
6089 if (opcode == OP_QUERY)
6090 compile_char1_trypath(common, type, cc, &backtrack->topbacktracks);
6091 BACKTRACK_AS(iterator_backtrack)->trypath = LABEL();
6092 break;
6093
6094 case OP_EXACT:
6095 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 1);
6096 label = LABEL();
6097 compile_char1_trypath(common, type, cc, &backtrack->topbacktracks);
6098 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
6099 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
6100 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0);
6101 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg1 + 1, label);
6102 break;
6103
6104 case OP_POSSTAR:
6105 case OP_POSPLUS:
6106 case OP_POSUPTO:
6107 if (opcode != OP_POSSTAR)
6108 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 1);
6109 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STR_PTR, 0);
6110 label = LABEL();
6111 compile_char1_trypath(common, type, cc, &nomatch);
6112 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STR_PTR, 0);
6113 if (opcode != OP_POSUPTO)
6114 {
6115 if (opcode == OP_POSPLUS)
6116 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 2);
6117 JUMPTO(SLJIT_JUMP, label);
6118 }
6119 else
6120 {
6121 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
6122 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
6123 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0);
6124 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg1 + 1, label);
6125 }
6126 set_jumps(nomatch, LABEL());
6127 if (opcode == OP_POSPLUS)
6128 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_LESS, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 2));
6129 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);
6130 break;
6131
6132 case OP_POSQUERY:
6133 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STR_PTR, 0);
6134 compile_char1_trypath(common, type, cc, &nomatch);
6135 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STR_PTR, 0);
6136 set_jumps(nomatch, LABEL());
6137 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);
6138 break;
6139
6140 default:
6141 SLJIT_ASSERT_STOP();
6142 break;
6143 }
6144
6145 decrease_call_count(common);
6146 return end;
6147 }
6148
6149 static SLJIT_INLINE pcre_uchar *compile_fail_accept_trypath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6150 {
6151 DEFINE_COMPILER;
6152 backtrack_common *backtrack;
6153
6154 PUSH_BACKTRACK(sizeof(bracket_backtrack), cc, NULL);
6155
6156 if (*cc == OP_FAIL)
6157 {
6158 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
6159 return cc + 1;
6160 }
6161
6162 if (*cc == OP_ASSERT_ACCEPT || common->currententry != NULL)
6163 {
6164 /* No need to check notempty conditions. */
6165 if (common->acceptlabel == NULL)
6166 add_jump(compiler, &common->accept, JUMP(SLJIT_JUMP));
6167 else
6168 JUMPTO(SLJIT_JUMP, common->acceptlabel);
6169 return cc + 1;
6170 }
6171
6172 if (common->acceptlabel == NULL)
6173 add_jump(compiler, &common->accept, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0)));
6174 else
6175 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), common->acceptlabel);
6176 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6177 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty));
6178 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
6179 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty_atstart));
6180 if (common->acceptlabel == NULL)
6181 add_jump(compiler, &common->accept, CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0));
6182 else
6183 CMPTO(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0, common->acceptlabel);
6184 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
6185 if (common->acceptlabel == NULL)
6186 add_jump(compiler, &common->accept, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0));
6187 else
6188 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0, common->acceptlabel);
6189 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
6190 return cc + 1;
6191 }
6192
6193 static SLJIT_INLINE pcre_uchar *compile_close_trypath(compiler_common *common, pcre_uchar *cc)
6194 {
6195 DEFINE_COMPILER;
6196 int offset = GET2(cc, 1);
6197
6198 /* Data will be discarded anyway... */
6199 if (common->currententry != NULL)
6200 return cc + 1 + IMM2_SIZE;
6201
6202 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR_PRIV(offset));
6203 offset <<= 1;
6204 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
6205 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
6206 return cc + 1 + IMM2_SIZE;
6207 }
6208
6209 static void compile_trypath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, backtrack_common *parent)
6210 {
6211 DEFINE_COMPILER;
6212 backtrack_common *backtrack;
6213
6214 while (cc < ccend)
6215 {
6216 switch(*cc)
6217 {
6218 case OP_SOD:
6219 case OP_SOM:
6220 case OP_NOT_WORD_BOUNDARY:
6221 case OP_WORD_BOUNDARY:
6222 case OP_NOT_DIGIT:
6223 case OP_DIGIT:
6224 case OP_NOT_WHITESPACE:
6225 case OP_WHITESPACE:
6226 case OP_NOT_WORDCHAR:
6227 case OP_WORDCHAR:
6228 case OP_ANY:
6229 case OP_ALLANY:
6230 case OP_ANYBYTE:
6231 case OP_NOTPROP:
6232 case OP_PROP:
6233 case OP_ANYNL:
6234 case OP_NOT_HSPACE:
6235 case OP_HSPACE:
6236 case OP_NOT_VSPACE:
6237 case OP_VSPACE:
6238 case OP_EXTUNI:
6239 case OP_EODN:
6240 case OP_EOD:
6241 case OP_CIRC:
6242 case OP_CIRCM:
6243 case OP_DOLL:
6244 case OP_DOLLM:
6245 case OP_NOT:
6246 case OP_NOTI:
6247 case OP_REVERSE:
6248 cc = compile_char1_trypath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
6249 break;
6250
6251 case OP_SET_SOM:
6252 PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc);
6253 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
6254 allocate_stack(common, 1);
6255 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), STR_PTR, 0);
6256 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
6257 cc++;
6258 break;
6259
6260 case OP_CHAR:
6261 case OP_CHARI:
6262 if (common->mode == JIT_COMPILE)
6263 cc = compile_charn_trypath(common, cc, ccend, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
6264 else
6265 cc = compile_char1_trypath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
6266 break;
6267
6268 case OP_STAR:
6269 case OP_MINSTAR:
6270 case OP_PLUS:
6271 case OP_MINPLUS:
6272 case OP_QUERY:
6273 case OP_MINQUERY:
6274 case OP_UPTO:
6275 case OP_MINUPTO:
6276 case OP_EXACT:
6277 case OP_POSSTAR:
6278 case OP_POSPLUS:
6279 case OP_POSQUERY:
6280 case OP_POSUPTO:
6281 case OP_STARI:
6282 case OP_MINSTARI:
6283 case OP_PLUSI:
6284 case OP_MINPLUSI:
6285 case OP_QUERYI:
6286 case OP_MINQUERYI:
6287 case OP_UPTOI:
6288 case OP_MINUPTOI:
6289 case OP_EXACTI:
6290 case OP_POSSTARI:
6291 case OP_POSPLUSI:
6292 case OP_POSQUERYI:
6293 case OP_POSUPTOI:
6294 case OP_NOTSTAR:
6295 case OP_NOTMINSTAR:
6296 case OP_NOTPLUS:
6297 case OP_NOTMINPLUS:
6298 case OP_NOTQUERY:
6299 case OP_NOTMINQUERY:
6300 case OP_NOTUPTO:
6301 case OP_NOTMINUPTO:
6302 case OP_NOTEXACT:
6303 case OP_NOTPOSSTAR:
6304 case OP_NOTPOSPLUS:
6305 case OP_NOTPOSQUERY:
6306 case OP_NOTPOSUPTO:
6307 case OP_NOTSTARI:
6308 case OP_NOTMINSTARI:
6309 case OP_NOTPLUSI:
6310 case OP_NOTMINPLUSI:
6311 case OP_NOTQUERYI:
6312 case OP_NOTMINQUERYI:
6313 case OP_NOTUPTOI:
6314 case OP_NOTMINUPTOI:
6315 case OP_NOTEXACTI:
6316 case OP_NOTPOSSTARI:
6317 case OP_NOTPOSPLUSI:
6318 case OP_NOTPOSQUERYI:
6319 case OP_NOTPOSUPTOI:
6320 case OP_TYPESTAR:
6321 case OP_TYPEMINSTAR:
6322 case OP_TYPEPLUS:
6323 case OP_TYPEMINPLUS:
6324 case OP_TYPEQUERY:
6325 case OP_TYPEMINQUERY:
6326 case OP_TYPEUPTO:
6327 case OP_TYPEMINUPTO:
6328 case OP_TYPEEXACT:
6329 case OP_TYPEPOSSTAR:
6330 case OP_TYPEPOSPLUS:
6331 case OP_TYPEPOSQUERY:
6332 case OP_TYPEPOSUPTO:
6333 cc = compile_iterator_trypath(common, cc, parent);
6334 break;
6335
6336 case OP_CLASS:
6337 case OP_NCLASS:
6338 if (cc[1 + (32 / sizeof(pcre_uchar))] >= OP_CRSTAR && cc[1 + (32 / sizeof(pcre_uchar))] <= OP_CRMINRANGE)
6339 cc = compile_iterator_trypath(common, cc, parent);
6340 else
6341 cc = compile_char1_trypath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
6342 break;
6343
6344 #if defined SUPPORT_UTF || defined COMPILE_PCRE16
6345 case OP_XCLASS:
6346 if (*(cc + GET(cc, 1)) >= OP_CRSTAR && *(cc + GET(cc, 1)) <= OP_CRMINRANGE)
6347 cc = compile_iterator_trypath(common, cc, parent);
6348 else
6349 cc = compile_char1_trypath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
6350 break;
6351 #endif
6352
6353 case OP_REF:
6354 case OP_REFI:
6355 if (cc[1 + IMM2_SIZE] >= OP_CRSTAR && cc[1 + IMM2_SIZE] <= OP_CRMINRANGE)
6356 cc = compile_ref_iterator_trypath(common, cc, parent);
6357 else
6358 cc = compile_ref_trypath(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE, FALSE);
6359 break;
6360
6361 case OP_RECURSE:
6362 cc = compile_recurse_trypath(common, cc, parent);
6363 break;
6364
6365 case OP_ASSERT:
6366 case OP_ASSERT_NOT:
6367 case OP_ASSERTBACK:
6368 case OP_ASSERTBACK_NOT:
6369 PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc);
6370 cc = compile_assert_trypath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE);
6371 break;
6372
6373 case OP_BRAMINZERO:
6374 PUSH_BACKTRACK_NOVALUE(sizeof(braminzero_backtrack), cc);
6375 cc = bracketend(cc + 1);
6376 if (*(cc - 1 - LINK_SIZE) != OP_KETRMIN)
6377 {
6378 allocate_stack(common, 1);
6379 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6380 }
6381 else
6382 {
6383 allocate_stack(common, 2);
6384 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6385 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), STR_PTR, 0);
6386 }
6387 BACKTRACK_AS(braminzero_backtrack)->trypath = LABEL();
6388 if (cc[1] > OP_ASSERTBACK_NOT)
6389 decrease_call_count(common);
6390 break;
6391
6392 case OP_ONCE:
6393 case OP_ONCE_NC:
6394 case OP_BRA:
6395 case OP_CBRA:
6396 case OP_COND:
6397 case OP_SBRA:
6398 case OP_SCBRA:
6399 case OP_SCOND:
6400 cc = compile_bracket_trypath(common, cc, parent);
6401 break;
6402
6403 case OP_BRAZERO:
6404 if (cc[1] > OP_ASSERTBACK_NOT)
6405 cc = compile_bracket_trypath(common, cc, parent);
6406 else
6407 {
6408 PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc);
6409 cc = compile_assert_trypath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE);
6410 }
6411 break;
6412
6413 case OP_BRAPOS:
6414 case OP_CBRAPOS:
6415 case OP_SBRAPOS:
6416 case OP_SCBRAPOS:
6417 case OP_BRAPOSZERO:
6418 cc = compile_bracketpos_trypath(common, cc, parent);
6419 break;
6420
6421 case OP_MARK:
6422 PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc);
6423 SLJIT_ASSERT(common->mark_ptr != 0);
6424 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
6425 allocate_stack(common, 1);
6426 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6427 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
6428 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_w)(cc + 2));
6429 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr, TMP2, 0);
6430 OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), TMP2, 0);
6431 cc += 1 + 2 + cc[1];
6432 break;
6433
6434 case OP_COMMIT:
6435 PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc);
6436 cc += 1;
6437 break;
6438
6439 case OP_FAIL:
6440 case OP_ACCEPT:
6441 case OP_ASSERT_ACCEPT:
6442 cc = compile_fail_accept_trypath(common, cc, parent);
6443 break;
6444
6445 case OP_CLOSE:
6446 cc = compile_close_trypath(common, cc);
6447 break;
6448
6449 case OP_SKIPZERO:
6450 cc = bracketend(cc + 1);
6451 break;
6452
6453 default:
6454 SLJIT_ASSERT_STOP();
6455 return;
6456 }
6457 if (cc == NULL)
6458 return;
6459 }
6460 SLJIT_ASSERT(cc == ccend);
6461 }
6462
6463 #undef PUSH_BACKTRACK
6464 #undef PUSH_BACKTRACK_NOVALUE
6465 #undef BACKTRACK_AS
6466
6467 #define COMPILE_BACKTRACKPATH(current) \
6468 do \
6469 { \
6470 compile_backtrackpath(common, (current)); \
6471 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
6472 return; \
6473 } \
6474 while (0)
6475
6476 #define CURRENT_AS(type) ((type *)current)
6477
6478 static void compile_iterator_backtrackpath(compiler_common *common, struct backtrack_common *current)
6479 {
6480 DEFINE_COMPILER;
6481 pcre_uchar *cc = current->cc;
6482 pcre_uchar opcode;
6483 pcre_uchar type;
6484 int arg1 = -1, arg2 = -1;
6485 struct sljit_label *label = NULL;
6486 struct sljit_jump *jump = NULL;
6487 jump_list *jumplist = NULL;
6488 int localptr = PRIV_DATA(cc);
6489 int base = (localptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_LOCALS_REG);
6490 int offset0 = (localptr == 0) ? STACK(0) : localptr;
6491 int offset1 = (localptr == 0) ? STACK(1) : localptr + sizeof(sljit_w);
6492
6493 cc = get_iterator_parameters(common, cc, &opcode, &type, &arg1, &arg2, NULL);
6494
6495 switch(opcode)
6496 {
6497 case OP_STAR:
6498 case OP_PLUS:
6499 case OP_UPTO:
6500 case OP_CRRANGE:
6501 if (type == OP_ANYNL || type == OP_EXTUNI)
6502 {
6503 SLJIT_ASSERT(localptr == 0);
6504 set_jumps(current->topbacktracks, LABEL());
6505 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6506 free_stack(common, 1);
6507 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_backtrack)->trypath);
6508 }
6509 else
6510 {
6511 if (opcode <= OP_PLUS || opcode == OP_UPTO)
6512 arg2 = 0;
6513 OP1(SLJIT_MOV, TMP1, 0, base, offset1);
6514 jump = CMP(SLJIT_C_LESS_EQUAL, TMP1, 0, SLJIT_IMM, arg2 + 1);
6515 OP2(SLJIT_SUB, base, offset1, TMP1, 0, SLJIT_IMM, 1);
6516 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
6517 skip_char_back(common);
6518 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
6519 JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_backtrack)->trypath);
6520 if (opcode == OP_CRRANGE)
6521 set_jumps(current->topbacktracks, LABEL());
6522 JUMPHERE(jump);
6523 if (localptr == 0)
6524 free_stack(common, 2);
6525 if (opcode == OP_PLUS)
6526 set_jumps(current->topbacktracks, LABEL());
6527 }
6528 break;
6529
6530 case OP_MINSTAR:
6531 case OP_MINPLUS:
6532 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
6533 compile_char1_trypath(common, type, cc, &jumplist);
6534 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
6535 JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_backtrack)->trypath);
6536 set_jumps(jumplist, LABEL());
6537 if (localptr == 0)
6538 free_stack(common, 1);
6539 if (opcode == OP_MINPLUS)
6540 set_jumps(current->topbacktracks, LABEL());
6541 break;
6542
6543 case OP_MINUPTO:
6544 case OP_CRMINRANGE:
6545 if (opcode == OP_CRMINRANGE)
6546 {
6547 label = LABEL();
6548 set_jumps(current->topbacktracks, label);
6549 }
6550 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
6551 compile_char1_trypath(common, type, cc, &jumplist);
6552
6553 OP1(SLJIT_MOV, TMP1, 0, base, offset1);
6554 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
6555 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
6556 OP1(SLJIT_MOV, base, offset1, TMP1, 0);
6557
6558 if (opcode == OP_CRMINRANGE)
6559 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg2 + 1, label);
6560
6561 if (opcode == OP_CRMINRANGE && arg1 == 0)
6562 JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_backtrack)->trypath);
6563 else
6564 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg1 + 2, CURRENT_AS(iterator_backtrack)->trypath);
6565
6566 set_jumps(jumplist, LABEL());
6567 if (localptr == 0)
6568 free_stack(common, 2);
6569 break;
6570
6571 case OP_QUERY:
6572 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
6573 OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
6574 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_backtrack)->trypath);
6575 jump = JUMP(SLJIT_JUMP);
6576 set_jumps(current->topbacktracks, LABEL());
6577 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
6578 OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
6579 JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_backtrack)->trypath);
6580 JUMPHERE(jump);
6581 if (localptr == 0)
6582 free_stack(common, 1);
6583 break;
6584
6585 case OP_MINQUERY:
6586 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
6587 OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
6588 jump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
6589 compile_char1_trypath(common, type, cc, &jumplist);
6590 JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_backtrack)->trypath);
6591 set_jumps(jumplist, LABEL());
6592 JUMPHERE(jump);
6593 if (localptr == 0)
6594 free_stack(common, 1);
6595 break;
6596
6597 case OP_EXACT:
6598 case OP_POSPLUS:
6599 set_jumps(current->topbacktracks, LABEL());
6600 break;
6601
6602 case OP_POSSTAR:
6603 case OP_POSQUERY:
6604 case OP_POSUPTO:
6605 break;
6606
6607 default:
6608 SLJIT_ASSERT_STOP();
6609 break;
6610 }
6611 }
6612
6613 static void compile_ref_iterator_backtrackpath(compiler_common *common, struct backtrack_common *current)
6614 {
6615 DEFINE_COMPILER;
6616 pcre_uchar *cc = current->cc;
6617 pcre_uchar type;
6618
6619 type = cc[1 + IMM2_SIZE];
6620 if ((type & 0x1) == 0)
6621 {
6622 set_jumps(current->topbacktracks, LABEL());
6623 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6624 free_stack(common, 1);
6625 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_backtrack)->trypath);
6626 return;
6627 }
6628
6629 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6630 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_backtrack)->trypath);
6631 set_jumps(current->topbacktracks, LABEL());
6632 free_stack(common, 2);
6633 }
6634
6635 static void compile_recurse_backtrackpath(compiler_common *common, struct backtrack_common *current)
6636 {
6637 DEFINE_COMPILER;
6638
6639 set_jumps(current->topbacktracks, LABEL());
6640
6641 if (common->has_set_som && common->mark_ptr != 0)
6642 {
6643 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6644 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
6645 free_stack(common, 2);
6646 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), TMP2, 0);
6647 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr, TMP1, 0);
6648 }
6649 else if (common->has_set_som || common->mark_ptr != 0)
6650 {
6651 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6652 free_stack(common, 1);
6653 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->has_set_som ? (int)(OVECTOR(0)) : common->mark_ptr, TMP2, 0);
6654 }
6655 }
6656
6657 static void compile_assert_backtrackpath(compiler_common *common, struct backtrack_common *current)
6658 {
6659 DEFINE_COMPILER;
6660 pcre_uchar *cc = current->cc;
6661 pcre_uchar bra = OP_BRA;
6662 struct sljit_jump *brajump = NULL;
6663
6664 SLJIT_ASSERT(*cc != OP_BRAMINZERO);
6665 if (*cc == OP_BRAZERO)
6666 {
6667 bra = *cc;
6668 cc++;
6669 }
6670
6671 if (bra == OP_BRAZERO)
6672 {
6673 SLJIT_ASSERT(current->topbacktracks == NULL);
6674 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6675 }
6676
6677 if (CURRENT_AS(assert_backtrack)->framesize < 0)
6678 {
6679 set_jumps(current->topbacktracks, LABEL());
6680
6681 if (bra == OP_BRAZERO)
6682 {
6683 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6684 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_backtrack)->trypath);
6685 free_stack(common, 1);
6686 }
6687 return;
6688 }
6689
6690 if (bra == OP_BRAZERO)
6691 {
6692 if (*cc == OP_ASSERT_NOT || *cc == OP_ASSERTBACK_NOT)
6693 {
6694 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6695 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_backtrack)->trypath);
6696 free_stack(common, 1);
6697 return;
6698 }
6699 free_stack(common, 1);
6700 brajump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
6701 }
6702
6703 if (*cc == OP_ASSERT || *cc == OP_ASSERTBACK)
6704 {
6705 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), CURRENT_AS(assert_backtrack)->localptr);
6706 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6707 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), CURRENT_AS(assert_backtrack)->localptr, SLJIT_MEM1(STACK_TOP), CURRENT_AS(assert_backtrack)->framesize * sizeof(sljit_w));
6708
6709 set_jumps(current->topbacktracks, LABEL());
6710 }
6711 else
6712 set_jumps(current->topbacktracks, LABEL());
6713
6714 if (bra == OP_BRAZERO)
6715 {
6716 /* We know there is enough place on the stack. */
6717 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
6718 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6719 JUMPTO(SLJIT_JUMP, CURRENT_AS(assert_backtrack)->trypath);
6720 JUMPHERE(brajump);
6721 }
6722 }
6723
6724 static void compile_bracket_backtrackpath(compiler_common *common, struct backtrack_common *current)
6725 {
6726 DEFINE_COMPILER;
6727 int opcode;
6728 int offset = 0;
6729 int localptr = CURRENT_AS(bracket_backtrack)->localptr;
6730 int stacksize;
6731 int count;
6732 pcre_uchar *cc = current->cc;
6733 pcre_uchar *ccbegin;
6734 pcre_uchar *ccprev;
6735 jump_list *jumplist = NULL;
6736 jump_list *jumplistitem = NULL;
6737 pcre_uchar bra = OP_BRA;
6738 pcre_uchar ket;
6739 assert_backtrack *assert;
6740 BOOL has_alternatives;
6741 struct sljit_jump *brazero = NULL;
6742 struct sljit_jump *once = NULL;
6743 struct sljit_jump *cond = NULL;
6744 struct sljit_label *rminlabel = NULL;
6745
6746 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
6747 {
6748 bra = *cc;
6749 cc++;
6750 }
6751
6752 opcode = *cc;
6753 ccbegin = cc;
6754 ket = *(bracketend(ccbegin) - 1 - LINK_SIZE);
6755 cc += GET(cc, 1);
6756 has_alternatives = *cc == OP_ALT;
6757 if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
6758 has_alternatives = (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT) || CURRENT_AS(bracket_backtrack)->u.condfailed != NULL;
6759 if (opcode == OP_CBRA || opcode == OP_SCBRA)
6760 offset = (GET2(ccbegin, 1 + LINK_SIZE)) << 1;
6761 if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
6762 opcode = OP_SCOND;
6763 if (SLJIT_UNLIKELY(opcode == OP_ONCE_NC))
6764 opcode = OP_ONCE;
6765
6766 if (ket == OP_KETRMAX)
6767 {
6768 if (bra == OP_BRAZERO)
6769 {
6770 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6771 free_stack(common, 1);
6772 brazero = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0);
6773 }
6774 }
6775 else if (ket == OP_KETRMIN)
6776 {
6777 if (bra != OP_BRAMINZERO)
6778 {
6779 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6780 if (opcode >= OP_SBRA || opcode == OP_ONCE)
6781 {
6782 /* Checking zero-length iteration. */
6783 if (opcode != OP_ONCE || CURRENT_AS(bracket_backtrack)->u.framesize < 0)
6784 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, CURRENT_AS(bracket_backtrack)->recursivetrypath);
6785 else