/[pcre]/code/tags/pcre-8.37/pcre_jit_compile.c
ViewVC logotype

Contents of /code/tags/pcre-8.37/pcre_jit_compile.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 989 - (show annotations)
Sat Jul 7 11:11:02 2012 UTC (7 years, 4 months ago) by zherczeg
Original Path: code/trunk/pcre_jit_compile.c
File MIME type: text/plain
File size: 245234 byte(s)
Error occurred while calculating annotation data.
Single character iterator optimizations in the JIT compiler.
1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Written by Philip Hazel
9 Copyright (c) 1997-2012 University of Cambridge
10
11 The machine code generator part (this module) was written by Zoltan Herczeg
12 Copyright (c) 2010-2012
13
14 -----------------------------------------------------------------------------
15 Redistribution and use in source and binary forms, with or without
16 modification, are permitted provided that the following conditions are met:
17
18 * Redistributions of source code must retain the above copyright notice,
19 this list of conditions and the following disclaimer.
20
21 * Redistributions in binary form must reproduce the above copyright
22 notice, this list of conditions and the following disclaimer in the
23 documentation and/or other materials provided with the distribution.
24
25 * Neither the name of the University of Cambridge nor the names of its
26 contributors may be used to endorse or promote products derived from
27 this software without specific prior written permission.
28
29 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 POSSIBILITY OF SUCH DAMAGE.
40 -----------------------------------------------------------------------------
41 */
42
43 #ifdef HAVE_CONFIG_H
44 #include "config.h"
45 #endif
46
47 #include "pcre_internal.h"
48
49 #ifdef SUPPORT_JIT
50
51 /* All-in-one: Since we use the JIT compiler only from here,
52 we just include it. This way we don't need to touch the build
53 system files. */
54
55 #define SLJIT_MALLOC(size) (PUBL(malloc))(size)
56 #define SLJIT_FREE(ptr) (PUBL(free))(ptr)
57 #define SLJIT_CONFIG_AUTO 1
58 #define SLJIT_CONFIG_STATIC 1
59 #define SLJIT_VERBOSE 0
60 #define SLJIT_DEBUG 0
61
62 #include "sljit/sljitLir.c"
63
64 #if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED
65 #error Unsupported architecture
66 #endif
67
68 /* Allocate memory on the stack. Fast, but limited size. */
69 #define LOCAL_SPACE_SIZE 32768
70
71 #define STACK_GROWTH_RATE 8192
72
73 /* Enable to check that the allocation could destroy temporaries. */
74 #if defined SLJIT_DEBUG && SLJIT_DEBUG
75 #define DESTROY_REGISTERS 1
76 #endif
77
78 /*
79 Short summary about the backtracking mechanism empolyed by the jit code generator:
80
81 The code generator follows the recursive nature of the PERL compatible regular
82 expressions. The basic blocks of regular expressions are condition checkers
83 whose execute different commands depending on the result of the condition check.
84 The relationship between the operators can be horizontal (concatenation) and
85 vertical (sub-expression) (See struct backtrack_common for more details).
86
87 'ab' - 'a' and 'b' regexps are concatenated
88 'a+' - 'a' is the sub-expression of the '+' operator
89
90 The condition checkers are boolean (true/false) checkers. Machine code is generated
91 for the checker itself and for the actions depending on the result of the checker.
92 The 'true' case is called as the try path (expected path), and the other is called as
93 the 'backtrack' path. Branch instructions are expesive for all CPUs, so we avoid taken
94 branches on the try path.
95
96 Greedy star operator (*) :
97 Try path: match happens.
98 Backtrack path: match failed.
99 Non-greedy star operator (*?) :
100 Try path: no need to perform a match.
101 Backtrack path: match is required.
102
103 The following example shows how the code generated for a capturing bracket
104 with two alternatives. Let A, B, C, D are arbirary regular expressions, and
105 we have the following regular expression:
106
107 A(B|C)D
108
109 The generated code will be the following:
110
111 A try path
112 '(' try path (pushing arguments to the stack)
113 B try path
114 ')' try path (pushing arguments to the stack)
115 D try path
116 return with successful match
117
118 D backtrack path
119 ')' backtrack path (If we arrived from "C" jump to the backtrack of "C")
120 B backtrack path
121 C expected path
122 jump to D try path
123 C backtrack path
124 A backtrack path
125
126 Notice, that the order of backtrack code paths are the opposite of the fast
127 code paths. In this way the topmost value on the stack is always belong
128 to the current backtrack code path. The backtrack path must check
129 whether there is a next alternative. If so, it needs to jump back to
130 the try path eventually. Otherwise it needs to clear out its own stack
131 frame and continue the execution on the backtrack code paths.
132 */
133
134 /*
135 Saved stack frames:
136
137 Atomic blocks and asserts require reloading the values of local variables
138 when the backtrack mechanism performed. Because of OP_RECURSE, the locals
139 are not necessarly known in compile time, thus we need a dynamic restore
140 mechanism.
141
142 The stack frames are stored in a chain list, and have the following format:
143 ([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]
144
145 Thus we can restore the locals to a particular point in the stack.
146 */
147
148 typedef struct jit_arguments {
149 /* Pointers first. */
150 struct sljit_stack *stack;
151 const pcre_uchar *str;
152 const pcre_uchar *begin;
153 const pcre_uchar *end;
154 int *offsets;
155 pcre_uchar *uchar_ptr;
156 pcre_uchar *mark_ptr;
157 /* Everything else after. */
158 int offsetcount;
159 int calllimit;
160 pcre_uint8 notbol;
161 pcre_uint8 noteol;
162 pcre_uint8 notempty;
163 pcre_uint8 notempty_atstart;
164 } jit_arguments;
165
166 typedef struct executable_functions {
167 void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES];
168 PUBL(jit_callback) callback;
169 void *userdata;
170 sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];
171 } executable_functions;
172
173 typedef struct jump_list {
174 struct sljit_jump *jump;
175 struct jump_list *next;
176 } jump_list;
177
178 enum stub_types { stack_alloc };
179
180 typedef struct stub_list {
181 enum stub_types type;
182 int data;
183 struct sljit_jump *start;
184 struct sljit_label *leave;
185 struct stub_list *next;
186 } stub_list;
187
188 typedef int (SLJIT_CALL *jit_function)(jit_arguments *args);
189
190 /* The following structure is the key data type for the recursive
191 code generator. It is allocated by compile_trypath, and contains
192 the aguments for compile_backtrackpath. Must be the first member
193 of its descendants. */
194 typedef struct backtrack_common {
195 /* Concatenation stack. */
196 struct backtrack_common *prev;
197 jump_list *nextbacktracks;
198 /* Internal stack (for component operators). */
199 struct backtrack_common *top;
200 jump_list *topbacktracks;
201 /* Opcode pointer. */
202 pcre_uchar *cc;
203 } backtrack_common;
204
205 typedef struct assert_backtrack {
206 backtrack_common common;
207 jump_list *condfailed;
208 /* Less than 0 (-1) if a frame is not needed. */
209 int framesize;
210 /* Points to our private memory word on the stack. */
211 int localptr;
212 /* For iterators. */
213 struct sljit_label *trypath;
214 } assert_backtrack;
215
216 typedef struct bracket_backtrack {
217 backtrack_common common;
218 /* Where to coninue if an alternative is successfully matched. */
219 struct sljit_label *alttrypath;
220 /* For rmin and rmax iterators. */
221 struct sljit_label *recursivetrypath;
222 /* For greedy ? operator. */
223 struct sljit_label *zerotrypath;
224 /* Contains the branches of a failed condition. */
225 union {
226 /* Both for OP_COND, OP_SCOND. */
227 jump_list *condfailed;
228 assert_backtrack *assert;
229 /* For OP_ONCE. -1 if not needed. */
230 int framesize;
231 } u;
232 /* Points to our private memory word on the stack. */
233 int localptr;
234 } bracket_backtrack;
235
236 typedef struct bracketpos_backtrack {
237 backtrack_common common;
238 /* Points to our private memory word on the stack. */
239 int localptr;
240 /* Reverting stack is needed. */
241 int framesize;
242 /* Allocated stack size. */
243 int stacksize;
244 } bracketpos_backtrack;
245
246 typedef struct braminzero_backtrack {
247 backtrack_common common;
248 struct sljit_label *trypath;
249 } braminzero_backtrack;
250
251 typedef struct iterator_backtrack {
252 backtrack_common common;
253 /* Next iteration. */
254 struct sljit_label *trypath;
255 } iterator_backtrack;
256
257 typedef struct recurse_entry {
258 struct recurse_entry *next;
259 /* Contains the function entry. */
260 struct sljit_label *entry;
261 /* Collects the calls until the function is not created. */
262 jump_list *calls;
263 /* Points to the starting opcode. */
264 int start;
265 } recurse_entry;
266
267 typedef struct recurse_backtrack {
268 backtrack_common common;
269 } recurse_backtrack;
270
271 typedef struct compiler_common {
272 struct sljit_compiler *compiler;
273 pcre_uchar *start;
274
275 /* Opcode local area direct map. */
276 int *localptrs;
277 int cbraptr;
278 /* OVector starting point. Must be divisible by 2. */
279 int ovector_start;
280 /* Last known position of the requested byte. */
281 int req_char_ptr;
282 /* Head of the last recursion. */
283 int recursive_head;
284 /* First inspected character for partial matching. */
285 int start_used_ptr;
286 /* Starting pointer for partial soft matches. */
287 int hit_start;
288 /* End pointer of the first line. */
289 int first_line_end;
290 /* Points to the marked string. */
291 int mark_ptr;
292
293 /* Other */
294 const pcre_uint8 *fcc;
295 sljit_w lcc;
296 int mode;
297 int nltype;
298 int newline;
299 int bsr_nltype;
300 int endonly;
301 BOOL has_set_som;
302 sljit_w ctypes;
303 sljit_uw name_table;
304 sljit_w name_count;
305 sljit_w name_entry_size;
306
307 /* Labels and jump lists. */
308 struct sljit_label *partialmatchlabel;
309 struct sljit_label *leavelabel;
310 struct sljit_label *acceptlabel;
311 stub_list *stubs;
312 recurse_entry *entries;
313 recurse_entry *currententry;
314 jump_list *partialmatch;
315 jump_list *leave;
316 jump_list *accept;
317 jump_list *calllimit;
318 jump_list *stackalloc;
319 jump_list *revertframes;
320 jump_list *wordboundary;
321 jump_list *anynewline;
322 jump_list *hspace;
323 jump_list *vspace;
324 jump_list *casefulcmp;
325 jump_list *caselesscmp;
326 BOOL jscript_compat;
327 #ifdef SUPPORT_UTF
328 BOOL utf;
329 #ifdef SUPPORT_UCP
330 BOOL use_ucp;
331 #endif
332 jump_list *utfreadchar;
333 #ifdef COMPILE_PCRE8
334 jump_list *utfreadtype8;
335 #endif
336 #endif /* SUPPORT_UTF */
337 #ifdef SUPPORT_UCP
338 jump_list *getucd;
339 #endif
340 } compiler_common;
341
342 /* For byte_sequence_compare. */
343
344 typedef struct compare_context {
345 int length;
346 int sourcereg;
347 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
348 int ucharptr;
349 union {
350 sljit_i asint;
351 sljit_uh asushort;
352 #ifdef COMPILE_PCRE8
353 sljit_ub asbyte;
354 sljit_ub asuchars[4];
355 #else
356 #ifdef COMPILE_PCRE16
357 sljit_uh asuchars[2];
358 #endif
359 #endif
360 } c;
361 union {
362 sljit_i asint;
363 sljit_uh asushort;
364 #ifdef COMPILE_PCRE8
365 sljit_ub asbyte;
366 sljit_ub asuchars[4];
367 #else
368 #ifdef COMPILE_PCRE16
369 sljit_uh asuchars[2];
370 #endif
371 #endif
372 } oc;
373 #endif
374 } compare_context;
375
376 enum {
377 frame_end = 0,
378 frame_setstrbegin = -1,
379 frame_setmark = -2
380 };
381
382 /* Undefine sljit macros. */
383 #undef CMP
384
385 /* Used for accessing the elements of the stack. */
386 #define STACK(i) ((-(i) - 1) * (int)sizeof(sljit_w))
387
388 #define TMP1 SLJIT_TEMPORARY_REG1
389 #define TMP2 SLJIT_TEMPORARY_REG3
390 #define TMP3 SLJIT_TEMPORARY_EREG2
391 #define STR_PTR SLJIT_SAVED_REG1
392 #define STR_END SLJIT_SAVED_REG2
393 #define STACK_TOP SLJIT_TEMPORARY_REG2
394 #define STACK_LIMIT SLJIT_SAVED_REG3
395 #define ARGUMENTS SLJIT_SAVED_EREG1
396 #define CALL_COUNT SLJIT_SAVED_EREG2
397 #define RETURN_ADDR SLJIT_TEMPORARY_EREG1
398
399 /* Locals layout. */
400 /* These two locals can be used by the current opcode. */
401 #define LOCALS0 (0 * sizeof(sljit_w))
402 #define LOCALS1 (1 * sizeof(sljit_w))
403 /* Two local variables for possessive quantifiers (char1 cannot use them). */
404 #define POSSESSIVE0 (2 * sizeof(sljit_w))
405 #define POSSESSIVE1 (3 * sizeof(sljit_w))
406 /* Max limit of recursions. */
407 #define CALL_LIMIT (4 * sizeof(sljit_w))
408 /* The output vector is stored on the stack, and contains pointers
409 to characters. The vector data is divided into two groups: the first
410 group contains the start / end character pointers, and the second is
411 the start pointers when the end of the capturing group has not yet reached. */
412 #define OVECTOR_START (common->ovector_start)
413 #define OVECTOR(i) (OVECTOR_START + (i) * sizeof(sljit_w))
414 #define OVECTOR_PRIV(i) (common->cbraptr + (i) * sizeof(sljit_w))
415 #define PRIV_DATA(cc) (common->localptrs[(cc) - common->start])
416
417 #ifdef COMPILE_PCRE8
418 #define MOV_UCHAR SLJIT_MOV_UB
419 #define MOVU_UCHAR SLJIT_MOVU_UB
420 #else
421 #ifdef COMPILE_PCRE16
422 #define MOV_UCHAR SLJIT_MOV_UH
423 #define MOVU_UCHAR SLJIT_MOVU_UH
424 #else
425 #error Unsupported compiling mode
426 #endif
427 #endif
428
429 /* Shortcuts. */
430 #define DEFINE_COMPILER \
431 struct sljit_compiler *compiler = common->compiler
432 #define OP1(op, dst, dstw, src, srcw) \
433 sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw))
434 #define OP2(op, dst, dstw, src1, src1w, src2, src2w) \
435 sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w))
436 #define LABEL() \
437 sljit_emit_label(compiler)
438 #define JUMP(type) \
439 sljit_emit_jump(compiler, (type))
440 #define JUMPTO(type, label) \
441 sljit_set_label(sljit_emit_jump(compiler, (type)), (label))
442 #define JUMPHERE(jump) \
443 sljit_set_label((jump), sljit_emit_label(compiler))
444 #define CMP(type, src1, src1w, src2, src2w) \
445 sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))
446 #define CMPTO(type, src1, src1w, src2, src2w, label) \
447 sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))
448 #define COND_VALUE(op, dst, dstw, type) \
449 sljit_emit_cond_value(compiler, (op), (dst), (dstw), (type))
450 #define GET_LOCAL_BASE(dst, dstw, offset) \
451 sljit_get_local_base(compiler, (dst), (dstw), (offset))
452
453 static pcre_uchar* bracketend(pcre_uchar* cc)
454 {
455 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
456 do cc += GET(cc, 1); while (*cc == OP_ALT);
457 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
458 cc += 1 + LINK_SIZE;
459 return cc;
460 }
461
462 /* Functions whose might need modification for all new supported opcodes:
463 next_opcode
464 get_localspace
465 set_localptrs
466 get_framesize
467 init_frame
468 get_localsize
469 copy_locals
470 compile_trypath
471 compile_backtrackpath
472 */
473
474 static pcre_uchar *next_opcode(compiler_common *common, pcre_uchar *cc)
475 {
476 SLJIT_UNUSED_ARG(common);
477 switch(*cc)
478 {
479 case OP_SOD:
480 case OP_SOM:
481 case OP_SET_SOM:
482 case OP_NOT_WORD_BOUNDARY:
483 case OP_WORD_BOUNDARY:
484 case OP_NOT_DIGIT:
485 case OP_DIGIT:
486 case OP_NOT_WHITESPACE:
487 case OP_WHITESPACE:
488 case OP_NOT_WORDCHAR:
489 case OP_WORDCHAR:
490 case OP_ANY:
491 case OP_ALLANY:
492 case OP_ANYNL:
493 case OP_NOT_HSPACE:
494 case OP_HSPACE:
495 case OP_NOT_VSPACE:
496 case OP_VSPACE:
497 case OP_EXTUNI:
498 case OP_EODN:
499 case OP_EOD:
500 case OP_CIRC:
501 case OP_CIRCM:
502 case OP_DOLL:
503 case OP_DOLLM:
504 case OP_TYPESTAR:
505 case OP_TYPEMINSTAR:
506 case OP_TYPEPLUS:
507 case OP_TYPEMINPLUS:
508 case OP_TYPEQUERY:
509 case OP_TYPEMINQUERY:
510 case OP_TYPEPOSSTAR:
511 case OP_TYPEPOSPLUS:
512 case OP_TYPEPOSQUERY:
513 case OP_CRSTAR:
514 case OP_CRMINSTAR:
515 case OP_CRPLUS:
516 case OP_CRMINPLUS:
517 case OP_CRQUERY:
518 case OP_CRMINQUERY:
519 case OP_DEF:
520 case OP_BRAZERO:
521 case OP_BRAMINZERO:
522 case OP_BRAPOSZERO:
523 case OP_COMMIT:
524 case OP_FAIL:
525 case OP_ACCEPT:
526 case OP_ASSERT_ACCEPT:
527 case OP_SKIPZERO:
528 return cc + 1;
529
530 case OP_ANYBYTE:
531 #ifdef SUPPORT_UTF
532 if (common->utf) return NULL;
533 #endif
534 return cc + 1;
535
536 case OP_CHAR:
537 case OP_CHARI:
538 case OP_NOT:
539 case OP_NOTI:
540 case OP_STAR:
541 case OP_MINSTAR:
542 case OP_PLUS:
543 case OP_MINPLUS:
544 case OP_QUERY:
545 case OP_MINQUERY:
546 case OP_POSSTAR:
547 case OP_POSPLUS:
548 case OP_POSQUERY:
549 case OP_STARI:
550 case OP_MINSTARI:
551 case OP_PLUSI:
552 case OP_MINPLUSI:
553 case OP_QUERYI:
554 case OP_MINQUERYI:
555 case OP_POSSTARI:
556 case OP_POSPLUSI:
557 case OP_POSQUERYI:
558 case OP_NOTSTAR:
559 case OP_NOTMINSTAR:
560 case OP_NOTPLUS:
561 case OP_NOTMINPLUS:
562 case OP_NOTQUERY:
563 case OP_NOTMINQUERY:
564 case OP_NOTPOSSTAR:
565 case OP_NOTPOSPLUS:
566 case OP_NOTPOSQUERY:
567 case OP_NOTSTARI:
568 case OP_NOTMINSTARI:
569 case OP_NOTPLUSI:
570 case OP_NOTMINPLUSI:
571 case OP_NOTQUERYI:
572 case OP_NOTMINQUERYI:
573 case OP_NOTPOSSTARI:
574 case OP_NOTPOSPLUSI:
575 case OP_NOTPOSQUERYI:
576 cc += 2;
577 #ifdef SUPPORT_UTF
578 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
579 #endif
580 return cc;
581
582 case OP_UPTO:
583 case OP_MINUPTO:
584 case OP_EXACT:
585 case OP_POSUPTO:
586 case OP_UPTOI:
587 case OP_MINUPTOI:
588 case OP_EXACTI:
589 case OP_POSUPTOI:
590 case OP_NOTUPTO:
591 case OP_NOTMINUPTO:
592 case OP_NOTEXACT:
593 case OP_NOTPOSUPTO:
594 case OP_NOTUPTOI:
595 case OP_NOTMINUPTOI:
596 case OP_NOTEXACTI:
597 case OP_NOTPOSUPTOI:
598 cc += 2 + IMM2_SIZE;
599 #ifdef SUPPORT_UTF
600 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
601 #endif
602 return cc;
603
604 case OP_NOTPROP:
605 case OP_PROP:
606 return cc + 1 + 2;
607
608 case OP_TYPEUPTO:
609 case OP_TYPEMINUPTO:
610 case OP_TYPEEXACT:
611 case OP_TYPEPOSUPTO:
612 case OP_REF:
613 case OP_REFI:
614 case OP_CREF:
615 case OP_NCREF:
616 case OP_RREF:
617 case OP_NRREF:
618 case OP_CLOSE:
619 cc += 1 + IMM2_SIZE;
620 return cc;
621
622 case OP_CRRANGE:
623 case OP_CRMINRANGE:
624 return cc + 1 + 2 * IMM2_SIZE;
625
626 case OP_CLASS:
627 case OP_NCLASS:
628 return cc + 1 + 32 / sizeof(pcre_uchar);
629
630 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
631 case OP_XCLASS:
632 return cc + GET(cc, 1);
633 #endif
634
635 case OP_RECURSE:
636 case OP_ASSERT:
637 case OP_ASSERT_NOT:
638 case OP_ASSERTBACK:
639 case OP_ASSERTBACK_NOT:
640 case OP_REVERSE:
641 case OP_ONCE:
642 case OP_ONCE_NC:
643 case OP_BRA:
644 case OP_BRAPOS:
645 case OP_COND:
646 case OP_SBRA:
647 case OP_SBRAPOS:
648 case OP_SCOND:
649 case OP_ALT:
650 case OP_KET:
651 case OP_KETRMAX:
652 case OP_KETRMIN:
653 case OP_KETRPOS:
654 return cc + 1 + LINK_SIZE;
655
656 case OP_CBRA:
657 case OP_CBRAPOS:
658 case OP_SCBRA:
659 case OP_SCBRAPOS:
660 return cc + 1 + LINK_SIZE + IMM2_SIZE;
661
662 case OP_MARK:
663 return cc + 1 + 2 + cc[1];
664
665 default:
666 return NULL;
667 }
668 }
669
670 #define CASE_ITERATOR_LOCAL1 \
671 case OP_MINSTAR: \
672 case OP_MINPLUS: \
673 case OP_QUERY: \
674 case OP_MINQUERY: \
675 case OP_MINSTARI: \
676 case OP_MINPLUSI: \
677 case OP_QUERYI: \
678 case OP_MINQUERYI: \
679 case OP_NOTMINSTAR: \
680 case OP_NOTMINPLUS: \
681 case OP_NOTQUERY: \
682 case OP_NOTMINQUERY: \
683 case OP_NOTMINSTARI: \
684 case OP_NOTMINPLUSI: \
685 case OP_NOTQUERYI: \
686 case OP_NOTMINQUERYI:
687
688 #define CASE_ITERATOR_LOCAL2A \
689 case OP_STAR: \
690 case OP_PLUS: \
691 case OP_STARI: \
692 case OP_PLUSI: \
693 case OP_NOTSTAR: \
694 case OP_NOTPLUS: \
695 case OP_NOTSTARI: \
696 case OP_NOTPLUSI:
697
698 #define CASE_ITERATOR_LOCAL2B \
699 case OP_UPTO: \
700 case OP_MINUPTO: \
701 case OP_UPTOI: \
702 case OP_MINUPTOI: \
703 case OP_NOTUPTO: \
704 case OP_NOTMINUPTO: \
705 case OP_NOTUPTOI: \
706 case OP_NOTMINUPTOI:
707
708 #define CASE_ITERATOR_TYPE_LOCAL1 \
709 case OP_TYPEMINSTAR: \
710 case OP_TYPEMINPLUS: \
711 case OP_TYPEQUERY: \
712 case OP_TYPEMINQUERY:
713
714 #define CASE_ITERATOR_TYPE_LOCAL2A \
715 case OP_TYPESTAR: \
716 case OP_TYPEPLUS:
717
718 #define CASE_ITERATOR_TYPE_LOCAL2B \
719 case OP_TYPEUPTO: \
720 case OP_TYPEMINUPTO:
721
722 static int get_class_iterator_size(pcre_uchar *cc)
723 {
724 switch(*cc)
725 {
726 case OP_CRSTAR:
727 case OP_CRPLUS:
728 return 2;
729
730 case OP_CRMINSTAR:
731 case OP_CRMINPLUS:
732 case OP_CRQUERY:
733 case OP_CRMINQUERY:
734 return 1;
735
736 case OP_CRRANGE:
737 case OP_CRMINRANGE:
738 if (GET2(cc, 1) == GET2(cc, 1 + IMM2_SIZE))
739 return 0;
740 return 2;
741
742 default:
743 return 0;
744 }
745 }
746
747 static int get_localspace(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
748 {
749 int localspace = 0;
750 pcre_uchar *alternative;
751 pcre_uchar *end = NULL;
752 int space, size, bracketlen;
753
754 /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
755 while (cc < ccend)
756 {
757 space = 0;
758 size = 0;
759 bracketlen = 0;
760 switch(*cc)
761 {
762 case OP_SET_SOM:
763 common->has_set_som = TRUE;
764 cc += 1;
765 break;
766
767 case OP_ASSERT:
768 case OP_ASSERT_NOT:
769 case OP_ASSERTBACK:
770 case OP_ASSERTBACK_NOT:
771 case OP_ONCE:
772 case OP_ONCE_NC:
773 case OP_BRAPOS:
774 case OP_SBRA:
775 case OP_SBRAPOS:
776 case OP_SCOND:
777 localspace += sizeof(sljit_w);
778 bracketlen = 1 + LINK_SIZE;
779 break;
780
781 case OP_CBRAPOS:
782 case OP_SCBRAPOS:
783 localspace += sizeof(sljit_w);
784 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
785 break;
786
787 case OP_COND:
788 /* Might be a hidden SCOND. */
789 alternative = cc + GET(cc, 1);
790 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
791 localspace += sizeof(sljit_w);
792 bracketlen = 1 + LINK_SIZE;
793 break;
794
795 case OP_BRA:
796 bracketlen = 1 + LINK_SIZE;
797 break;
798
799 case OP_CBRA:
800 case OP_SCBRA:
801 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
802 break;
803
804 CASE_ITERATOR_LOCAL1
805 space = 1;
806 size = -2;
807 break;
808
809 CASE_ITERATOR_LOCAL2A
810 space = 2;
811 size = -2;
812 break;
813
814 CASE_ITERATOR_LOCAL2B
815 space = 2;
816 size = -(2 + IMM2_SIZE);
817 break;
818
819 CASE_ITERATOR_TYPE_LOCAL1
820 space = 1;
821 size = 1;
822 break;
823
824 CASE_ITERATOR_TYPE_LOCAL2A
825 if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
826 space = 2;
827 size = 1;
828 break;
829
830 CASE_ITERATOR_TYPE_LOCAL2B
831 if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
832 space = 2;
833 size = 1 + IMM2_SIZE;
834 break;
835
836 case OP_CLASS:
837 case OP_NCLASS:
838 size += 1 + 32 / sizeof(pcre_uchar);
839 space = get_class_iterator_size(cc + size);
840 break;
841
842 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
843 case OP_XCLASS:
844 size = GET(cc, 1);
845 space = get_class_iterator_size(cc + size);
846 break;
847 #endif
848
849 case OP_RECURSE:
850 /* Set its value only once. */
851 if (common->recursive_head == 0)
852 {
853 common->recursive_head = common->ovector_start;
854 common->ovector_start += sizeof(sljit_w);
855 }
856 cc += 1 + LINK_SIZE;
857 break;
858
859 case OP_MARK:
860 if (common->mark_ptr == 0)
861 {
862 common->mark_ptr = common->ovector_start;
863 common->ovector_start += sizeof(sljit_w);
864 }
865 cc += 1 + 2 + cc[1];
866 break;
867
868 default:
869 cc = next_opcode(common, cc);
870 if (cc == NULL)
871 return -1;
872 break;
873 }
874
875 if (space > 0 && cc >= end)
876 localspace += sizeof(sljit_w) * space;
877
878 if (size != 0)
879 {
880 if (size < 0)
881 {
882 cc += -size;
883 #ifdef SUPPORT_UTF
884 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
885 #endif
886 }
887 else
888 cc += size;
889 }
890
891 if (bracketlen > 0)
892 {
893 if (cc >= end)
894 {
895 end = bracketend(cc);
896 if (end[-1 - LINK_SIZE] == OP_KET)
897 end = NULL;
898 }
899 cc += bracketlen;
900 }
901 }
902 return localspace;
903 }
904
905 static void set_localptrs(compiler_common *common, int localptr, pcre_uchar *ccend)
906 {
907 pcre_uchar *cc = common->start;
908 pcre_uchar *alternative;
909 pcre_uchar *end = NULL;
910 int space, size, bracketlen;
911
912 while (cc < ccend)
913 {
914 space = 0;
915 size = 0;
916 bracketlen = 0;
917 switch(*cc)
918 {
919 case OP_ASSERT:
920 case OP_ASSERT_NOT:
921 case OP_ASSERTBACK:
922 case OP_ASSERTBACK_NOT:
923 case OP_ONCE:
924 case OP_ONCE_NC:
925 case OP_BRAPOS:
926 case OP_SBRA:
927 case OP_SBRAPOS:
928 case OP_SCOND:
929 common->localptrs[cc - common->start] = localptr;
930 localptr += sizeof(sljit_w);
931 bracketlen = 1 + LINK_SIZE;
932 break;
933
934 case OP_CBRAPOS:
935 case OP_SCBRAPOS:
936 common->localptrs[cc - common->start] = localptr;
937 localptr += sizeof(sljit_w);
938 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
939 break;
940
941 case OP_COND:
942 /* Might be a hidden SCOND. */
943 alternative = cc + GET(cc, 1);
944 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
945 {
946 common->localptrs[cc - common->start] = localptr;
947 localptr += sizeof(sljit_w);
948 }
949 bracketlen = 1 + LINK_SIZE;
950 break;
951
952 case OP_BRA:
953 bracketlen = 1 + LINK_SIZE;
954 break;
955
956 case OP_CBRA:
957 case OP_SCBRA:
958 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
959 break;
960
961 CASE_ITERATOR_LOCAL1
962 space = 1;
963 size = -2;
964 break;
965
966 CASE_ITERATOR_LOCAL2A
967 space = 2;
968 size = -2;
969 break;
970
971 CASE_ITERATOR_LOCAL2B
972 space = 2;
973 size = -(2 + IMM2_SIZE);
974 break;
975
976 CASE_ITERATOR_TYPE_LOCAL1
977 space = 1;
978 size = 1;
979 break;
980
981 CASE_ITERATOR_TYPE_LOCAL2A
982 if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
983 space = 2;
984 size = 1;
985 break;
986
987 CASE_ITERATOR_TYPE_LOCAL2B
988 if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
989 space = 2;
990 size = 1 + IMM2_SIZE;
991 break;
992
993 case OP_CLASS:
994 case OP_NCLASS:
995 size += 1 + 32 / sizeof(pcre_uchar);
996 space = get_class_iterator_size(cc + size);
997 break;
998
999 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1000 case OP_XCLASS:
1001 size = GET(cc, 1);
1002 space = get_class_iterator_size(cc + size);
1003 break;
1004 #endif
1005
1006 default:
1007 cc = next_opcode(common, cc);
1008 SLJIT_ASSERT(cc != NULL);
1009 break;
1010 }
1011
1012 if (space > 0 && cc >= end)
1013 {
1014 common->localptrs[cc - common->start] = localptr;
1015 localptr += sizeof(sljit_w) * space;
1016 }
1017
1018 if (size != 0)
1019 {
1020 if (size < 0)
1021 {
1022 cc += -size;
1023 #ifdef SUPPORT_UTF
1024 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1025 #endif
1026 }
1027 else
1028 cc += size;
1029 }
1030
1031 if (bracketlen > 0)
1032 {
1033 if (cc >= end)
1034 {
1035 end = bracketend(cc);
1036 if (end[-1 - LINK_SIZE] == OP_KET)
1037 end = NULL;
1038 }
1039 cc += bracketlen;
1040 }
1041 }
1042 }
1043
1044 /* Returns with -1 if no need for frame. */
1045 static int get_framesize(compiler_common *common, pcre_uchar *cc, BOOL recursive)
1046 {
1047 pcre_uchar *ccend = bracketend(cc);
1048 int length = 0;
1049 BOOL possessive = FALSE;
1050 BOOL setsom_found = recursive;
1051 BOOL setmark_found = recursive;
1052
1053 if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))
1054 {
1055 length = 3;
1056 possessive = TRUE;
1057 }
1058
1059 cc = next_opcode(common, cc);
1060 SLJIT_ASSERT(cc != NULL);
1061 while (cc < ccend)
1062 switch(*cc)
1063 {
1064 case OP_SET_SOM:
1065 SLJIT_ASSERT(common->has_set_som);
1066 if (!setsom_found)
1067 {
1068 length += 2;
1069 setsom_found = TRUE;
1070 }
1071 cc += 1;
1072 break;
1073
1074 case OP_MARK:
1075 SLJIT_ASSERT(common->mark_ptr != 0);
1076 if (!setmark_found)
1077 {
1078 length += 2;
1079 setmark_found = TRUE;
1080 }
1081 cc += 1 + 2 + cc[1];
1082 break;
1083
1084 case OP_RECURSE:
1085 if (common->has_set_som && !setsom_found)
1086 {
1087 length += 2;
1088 setsom_found = TRUE;
1089 }
1090 if (common->mark_ptr != 0 && !setmark_found)
1091 {
1092 length += 2;
1093 setmark_found = TRUE;
1094 }
1095 cc += 1 + LINK_SIZE;
1096 break;
1097
1098 case OP_CBRA:
1099 case OP_CBRAPOS:
1100 case OP_SCBRA:
1101 case OP_SCBRAPOS:
1102 length += 3;
1103 cc += 1 + LINK_SIZE + IMM2_SIZE;
1104 break;
1105
1106 default:
1107 cc = next_opcode(common, cc);
1108 SLJIT_ASSERT(cc != NULL);
1109 break;
1110 }
1111
1112 /* Possessive quantifiers can use a special case. */
1113 if (SLJIT_UNLIKELY(possessive) && length == 3)
1114 return -1;
1115
1116 if (length > 0)
1117 return length + 1;
1118 return -1;
1119 }
1120
1121 static void init_frame(compiler_common *common, pcre_uchar *cc, int stackpos, int stacktop, BOOL recursive)
1122 {
1123 DEFINE_COMPILER;
1124 pcre_uchar *ccend = bracketend(cc);
1125 BOOL setsom_found = recursive;
1126 BOOL setmark_found = recursive;
1127 int offset;
1128
1129 /* >= 1 + shortest item size (2) */
1130 SLJIT_UNUSED_ARG(stacktop);
1131 SLJIT_ASSERT(stackpos >= stacktop + 2);
1132
1133 stackpos = STACK(stackpos);
1134 if (recursive || (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS))
1135 cc = next_opcode(common, cc);
1136 SLJIT_ASSERT(cc != NULL);
1137 while (cc < ccend)
1138 switch(*cc)
1139 {
1140 case OP_SET_SOM:
1141 SLJIT_ASSERT(common->has_set_som);
1142 if (!setsom_found)
1143 {
1144 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
1145 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_setstrbegin);
1146 stackpos += (int)sizeof(sljit_w);
1147 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1148 stackpos += (int)sizeof(sljit_w);
1149 setsom_found = TRUE;
1150 }
1151 cc += 1;
1152 break;
1153
1154 case OP_MARK:
1155 SLJIT_ASSERT(common->mark_ptr != 0);
1156 if (!setmark_found)
1157 {
1158 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
1159 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_setmark);
1160 stackpos += (int)sizeof(sljit_w);
1161 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1162 stackpos += (int)sizeof(sljit_w);
1163 setmark_found = TRUE;
1164 }
1165 cc += 1 + 2 + cc[1];
1166 break;
1167
1168 case OP_RECURSE:
1169 if (common->has_set_som && !setsom_found)
1170 {
1171 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
1172 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_setstrbegin);
1173 stackpos += (int)sizeof(sljit_w);
1174 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1175 stackpos += (int)sizeof(sljit_w);
1176 setsom_found = TRUE;
1177 }
1178 if (common->mark_ptr != 0 && !setmark_found)
1179 {
1180 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
1181 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_setmark);
1182 stackpos += (int)sizeof(sljit_w);
1183 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1184 stackpos += (int)sizeof(sljit_w);
1185 setmark_found = TRUE;
1186 }
1187 cc += 1 + LINK_SIZE;
1188 break;
1189
1190 case OP_CBRA:
1191 case OP_CBRAPOS:
1192 case OP_SCBRA:
1193 case OP_SCBRAPOS:
1194 offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
1195 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));
1196 stackpos += (int)sizeof(sljit_w);
1197 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
1198 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
1199 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1200 stackpos += (int)sizeof(sljit_w);
1201 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
1202 stackpos += (int)sizeof(sljit_w);
1203
1204 cc += 1 + LINK_SIZE + IMM2_SIZE;
1205 break;
1206
1207 default:
1208 cc = next_opcode(common, cc);
1209 SLJIT_ASSERT(cc != NULL);
1210 break;
1211 }
1212
1213 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_end);
1214 SLJIT_ASSERT(stackpos == STACK(stacktop));
1215 }
1216
1217 static SLJIT_INLINE int get_localsize(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
1218 {
1219 int localsize = 2;
1220 int size;
1221 pcre_uchar *alternative;
1222 /* Calculate the sum of the local variables. */
1223 while (cc < ccend)
1224 {
1225 size = 0;
1226 switch(*cc)
1227 {
1228 case OP_ASSERT:
1229 case OP_ASSERT_NOT:
1230 case OP_ASSERTBACK:
1231 case OP_ASSERTBACK_NOT:
1232 case OP_ONCE:
1233 case OP_ONCE_NC:
1234 case OP_BRAPOS:
1235 case OP_SBRA:
1236 case OP_SBRAPOS:
1237 case OP_SCOND:
1238 localsize++;
1239 cc += 1 + LINK_SIZE;
1240 break;
1241
1242 case OP_CBRA:
1243 case OP_SCBRA:
1244 localsize++;
1245 cc += 1 + LINK_SIZE + IMM2_SIZE;
1246 break;
1247
1248 case OP_CBRAPOS:
1249 case OP_SCBRAPOS:
1250 localsize += 2;
1251 cc += 1 + LINK_SIZE + IMM2_SIZE;
1252 break;
1253
1254 case OP_COND:
1255 /* Might be a hidden SCOND. */
1256 alternative = cc + GET(cc, 1);
1257 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1258 localsize++;
1259 cc += 1 + LINK_SIZE;
1260 break;
1261
1262 CASE_ITERATOR_LOCAL1
1263 if (PRIV_DATA(cc))
1264 localsize++;
1265 cc += 2;
1266 #ifdef SUPPORT_UTF
1267 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1268 #endif
1269 break;
1270
1271 CASE_ITERATOR_LOCAL2A
1272 if (PRIV_DATA(cc))
1273 localsize += 2;
1274 cc += 2;
1275 #ifdef SUPPORT_UTF
1276 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1277 #endif
1278 break;
1279
1280 CASE_ITERATOR_LOCAL2B
1281 if (PRIV_DATA(cc))
1282 localsize += 2;
1283 cc += 2 + IMM2_SIZE;
1284 #ifdef SUPPORT_UTF
1285 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1286 #endif
1287 break;
1288
1289 CASE_ITERATOR_TYPE_LOCAL1
1290 if (PRIV_DATA(cc))
1291 localsize++;
1292 cc += 1;
1293 break;
1294
1295 CASE_ITERATOR_TYPE_LOCAL2A
1296 if (PRIV_DATA(cc))
1297 localsize += 2;
1298 cc += 1;
1299 break;
1300
1301 CASE_ITERATOR_TYPE_LOCAL2B
1302 if (PRIV_DATA(cc))
1303 localsize += 2;
1304 cc += 1 + IMM2_SIZE;
1305 break;
1306
1307 case OP_CLASS:
1308 case OP_NCLASS:
1309 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1310 case OP_XCLASS:
1311 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1312 #else
1313 size = 1 + 32 / (int)sizeof(pcre_uchar);
1314 #endif
1315 if (PRIV_DATA(cc))
1316 localsize += get_class_iterator_size(cc + size);
1317 cc += size;
1318 break;
1319
1320 default:
1321 cc = next_opcode(common, cc);
1322 SLJIT_ASSERT(cc != NULL);
1323 break;
1324 }
1325 }
1326 SLJIT_ASSERT(cc == ccend);
1327 return localsize;
1328 }
1329
1330 static void copy_locals(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend,
1331 BOOL save, int stackptr, int stacktop)
1332 {
1333 DEFINE_COMPILER;
1334 int srcw[2];
1335 int count, size;
1336 BOOL tmp1next = TRUE;
1337 BOOL tmp1empty = TRUE;
1338 BOOL tmp2empty = TRUE;
1339 pcre_uchar *alternative;
1340 enum {
1341 start,
1342 loop,
1343 end
1344 } status;
1345
1346 status = save ? start : loop;
1347 stackptr = STACK(stackptr - 2);
1348 stacktop = STACK(stacktop - 1);
1349
1350 if (!save)
1351 {
1352 stackptr += sizeof(sljit_w);
1353 if (stackptr < stacktop)
1354 {
1355 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1356 stackptr += sizeof(sljit_w);
1357 tmp1empty = FALSE;
1358 }
1359 if (stackptr < stacktop)
1360 {
1361 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1362 stackptr += sizeof(sljit_w);
1363 tmp2empty = FALSE;
1364 }
1365 /* The tmp1next must be TRUE in either way. */
1366 }
1367
1368 while (status != end)
1369 {
1370 count = 0;
1371 switch(status)
1372 {
1373 case start:
1374 SLJIT_ASSERT(save && common->recursive_head != 0);
1375 count = 1;
1376 srcw[0] = common->recursive_head;
1377 status = loop;
1378 break;
1379
1380 case loop:
1381 if (cc >= ccend)
1382 {
1383 status = end;
1384 break;
1385 }
1386
1387 switch(*cc)
1388 {
1389 case OP_ASSERT:
1390 case OP_ASSERT_NOT:
1391 case OP_ASSERTBACK:
1392 case OP_ASSERTBACK_NOT:
1393 case OP_ONCE:
1394 case OP_ONCE_NC:
1395 case OP_BRAPOS:
1396 case OP_SBRA:
1397 case OP_SBRAPOS:
1398 case OP_SCOND:
1399 count = 1;
1400 srcw[0] = PRIV_DATA(cc);
1401 SLJIT_ASSERT(srcw[0] != 0);
1402 cc += 1 + LINK_SIZE;
1403 break;
1404
1405 case OP_CBRA:
1406 case OP_SCBRA:
1407 count = 1;
1408 srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1409 cc += 1 + LINK_SIZE + IMM2_SIZE;
1410 break;
1411
1412 case OP_CBRAPOS:
1413 case OP_SCBRAPOS:
1414 count = 2;
1415 srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1416 srcw[1] = PRIV_DATA(cc);
1417 SLJIT_ASSERT(srcw[0] != 0);
1418 cc += 1 + LINK_SIZE + IMM2_SIZE;
1419 break;
1420
1421 case OP_COND:
1422 /* Might be a hidden SCOND. */
1423 alternative = cc + GET(cc, 1);
1424 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1425 {
1426 count = 1;
1427 srcw[0] = PRIV_DATA(cc);
1428 SLJIT_ASSERT(srcw[0] != 0);
1429 }
1430 cc += 1 + LINK_SIZE;
1431 break;
1432
1433 CASE_ITERATOR_LOCAL1
1434 if (PRIV_DATA(cc))
1435 {
1436 count = 1;
1437 srcw[0] = PRIV_DATA(cc);
1438 }
1439 cc += 2;
1440 #ifdef SUPPORT_UTF
1441 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1442 #endif
1443 break;
1444
1445 CASE_ITERATOR_LOCAL2A
1446 if (PRIV_DATA(cc))
1447 {
1448 count = 2;
1449 srcw[0] = PRIV_DATA(cc);
1450 srcw[1] = PRIV_DATA(cc) + sizeof(sljit_w);
1451 }
1452 cc += 2;
1453 #ifdef SUPPORT_UTF
1454 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1455 #endif
1456 break;
1457
1458 CASE_ITERATOR_LOCAL2B
1459 if (PRIV_DATA(cc))
1460 {
1461 count = 2;
1462 srcw[0] = PRIV_DATA(cc);
1463 srcw[1] = PRIV_DATA(cc) + sizeof(sljit_w);
1464 }
1465 cc += 2 + IMM2_SIZE;
1466 #ifdef SUPPORT_UTF
1467 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1468 #endif
1469 break;
1470
1471 CASE_ITERATOR_TYPE_LOCAL1
1472 if (PRIV_DATA(cc))
1473 {
1474 count = 1;
1475 srcw[0] = PRIV_DATA(cc);
1476 }
1477 cc += 1;
1478 break;
1479
1480 CASE_ITERATOR_TYPE_LOCAL2A
1481 if (PRIV_DATA(cc))
1482 {
1483 count = 2;
1484 srcw[0] = PRIV_DATA(cc);
1485 srcw[1] = srcw[0] + sizeof(sljit_w);
1486 }
1487 cc += 1;
1488 break;
1489
1490 CASE_ITERATOR_TYPE_LOCAL2B
1491 if (PRIV_DATA(cc))
1492 {
1493 count = 2;
1494 srcw[0] = PRIV_DATA(cc);
1495 srcw[1] = srcw[0] + sizeof(sljit_w);
1496 }
1497 cc += 1 + IMM2_SIZE;
1498 break;
1499
1500 case OP_CLASS:
1501 case OP_NCLASS:
1502 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1503 case OP_XCLASS:
1504 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1505 #else
1506 size = 1 + 32 / (int)sizeof(pcre_uchar);
1507 #endif
1508 if (PRIV_DATA(cc))
1509 switch(get_class_iterator_size(cc + size))
1510 {
1511 case 1:
1512 count = 1;
1513 srcw[0] = PRIV_DATA(cc);
1514 break;
1515
1516 case 2:
1517 count = 2;
1518 srcw[0] = PRIV_DATA(cc);
1519 srcw[1] = srcw[0] + sizeof(sljit_w);
1520 break;
1521
1522 default:
1523 SLJIT_ASSERT_STOP();
1524 break;
1525 }
1526 cc += size;
1527 break;
1528
1529 default:
1530 cc = next_opcode(common, cc);
1531 SLJIT_ASSERT(cc != NULL);
1532 break;
1533 }
1534 break;
1535
1536 case end:
1537 SLJIT_ASSERT_STOP();
1538 break;
1539 }
1540
1541 while (count > 0)
1542 {
1543 count--;
1544 if (save)
1545 {
1546 if (tmp1next)
1547 {
1548 if (!tmp1empty)
1549 {
1550 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1551 stackptr += sizeof(sljit_w);
1552 }
1553 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);
1554 tmp1empty = FALSE;
1555 tmp1next = FALSE;
1556 }
1557 else
1558 {
1559 if (!tmp2empty)
1560 {
1561 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1562 stackptr += sizeof(sljit_w);
1563 }
1564 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);
1565 tmp2empty = FALSE;
1566 tmp1next = TRUE;
1567 }
1568 }
1569 else
1570 {
1571 if (tmp1next)
1572 {
1573 SLJIT_ASSERT(!tmp1empty);
1574 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP1, 0);
1575 tmp1empty = stackptr >= stacktop;
1576 if (!tmp1empty)
1577 {
1578 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1579 stackptr += sizeof(sljit_w);
1580 }
1581 tmp1next = FALSE;
1582 }
1583 else
1584 {
1585 SLJIT_ASSERT(!tmp2empty);
1586 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP2, 0);
1587 tmp2empty = stackptr >= stacktop;
1588 if (!tmp2empty)
1589 {
1590 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1591 stackptr += sizeof(sljit_w);
1592 }
1593 tmp1next = TRUE;
1594 }
1595 }
1596 }
1597 }
1598
1599 if (save)
1600 {
1601 if (tmp1next)
1602 {
1603 if (!tmp1empty)
1604 {
1605 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1606 stackptr += sizeof(sljit_w);
1607 }
1608 if (!tmp2empty)
1609 {
1610 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1611 stackptr += sizeof(sljit_w);
1612 }
1613 }
1614 else
1615 {
1616 if (!tmp2empty)
1617 {
1618 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1619 stackptr += sizeof(sljit_w);
1620 }
1621 if (!tmp1empty)
1622 {
1623 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1624 stackptr += sizeof(sljit_w);
1625 }
1626 }
1627 }
1628 SLJIT_ASSERT(cc == ccend && stackptr == stacktop && (save || (tmp1empty && tmp2empty)));
1629 }
1630
1631 #undef CASE_ITERATOR_LOCAL1
1632 #undef CASE_ITERATOR_LOCAL2A
1633 #undef CASE_ITERATOR_LOCAL2B
1634 #undef CASE_ITERATOR_TYPE_LOCAL1
1635 #undef CASE_ITERATOR_TYPE_LOCAL2A
1636 #undef CASE_ITERATOR_TYPE_LOCAL2B
1637
1638 static SLJIT_INLINE BOOL ispowerof2(unsigned int value)
1639 {
1640 return (value & (value - 1)) == 0;
1641 }
1642
1643 static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label)
1644 {
1645 while (list)
1646 {
1647 /* sljit_set_label is clever enough to do nothing
1648 if either the jump or the label is NULL */
1649 sljit_set_label(list->jump, label);
1650 list = list->next;
1651 }
1652 }
1653
1654 static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump* jump)
1655 {
1656 jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list));
1657 if (list_item)
1658 {
1659 list_item->next = *list;
1660 list_item->jump = jump;
1661 *list = list_item;
1662 }
1663 }
1664
1665 static void add_stub(compiler_common *common, enum stub_types type, int data, struct sljit_jump *start)
1666 {
1667 DEFINE_COMPILER;
1668 stub_list* list_item = sljit_alloc_memory(compiler, sizeof(stub_list));
1669
1670 if (list_item)
1671 {
1672 list_item->type = type;
1673 list_item->data = data;
1674 list_item->start = start;
1675 list_item->leave = LABEL();
1676 list_item->next = common->stubs;
1677 common->stubs = list_item;
1678 }
1679 }
1680
1681 static void flush_stubs(compiler_common *common)
1682 {
1683 DEFINE_COMPILER;
1684 stub_list* list_item = common->stubs;
1685
1686 while (list_item)
1687 {
1688 JUMPHERE(list_item->start);
1689 switch(list_item->type)
1690 {
1691 case stack_alloc:
1692 add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));
1693 break;
1694 }
1695 JUMPTO(SLJIT_JUMP, list_item->leave);
1696 list_item = list_item->next;
1697 }
1698 common->stubs = NULL;
1699 }
1700
1701 static SLJIT_INLINE void decrease_call_count(compiler_common *common)
1702 {
1703 DEFINE_COMPILER;
1704
1705 OP2(SLJIT_SUB | SLJIT_SET_E, CALL_COUNT, 0, CALL_COUNT, 0, SLJIT_IMM, 1);
1706 add_jump(compiler, &common->calllimit, JUMP(SLJIT_C_ZERO));
1707 }
1708
1709 static SLJIT_INLINE void allocate_stack(compiler_common *common, int size)
1710 {
1711 /* May destroy all locals and registers except TMP2. */
1712 DEFINE_COMPILER;
1713
1714 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_w));
1715 #ifdef DESTROY_REGISTERS
1716 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);
1717 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
1718 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
1719 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, TMP1, 0);
1720 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);
1721 #endif
1722 add_stub(common, stack_alloc, 0, CMP(SLJIT_C_GREATER, STACK_TOP, 0, STACK_LIMIT, 0));
1723 }
1724
1725 static SLJIT_INLINE void free_stack(compiler_common *common, int size)
1726 {
1727 DEFINE_COMPILER;
1728 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_w));
1729 }
1730
1731 static SLJIT_INLINE void reset_ovector(compiler_common *common, int length)
1732 {
1733 DEFINE_COMPILER;
1734 struct sljit_label *loop;
1735 int i;
1736 /* At this point we can freely use all temporary registers. */
1737 /* TMP1 returns with begin - 1. */
1738 OP2(SLJIT_SUB, SLJIT_TEMPORARY_REG1, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));
1739 if (length < 8)
1740 {
1741 for (i = 0; i < length; i++)
1742 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(i), SLJIT_TEMPORARY_REG1, 0);
1743 }
1744 else
1745 {
1746 GET_LOCAL_BASE(SLJIT_TEMPORARY_REG2, 0, OVECTOR_START - sizeof(sljit_w));
1747 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, length);
1748 loop = LABEL();
1749 OP1(SLJIT_MOVU, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), sizeof(sljit_w), SLJIT_TEMPORARY_REG1, 0);
1750 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_TEMPORARY_REG3, 0, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, 1);
1751 JUMPTO(SLJIT_C_NOT_ZERO, loop);
1752 }
1753 }
1754
1755 static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)
1756 {
1757 DEFINE_COMPILER;
1758 struct sljit_label *loop;
1759 struct sljit_jump *earlyexit;
1760
1761 /* At this point we can freely use all registers. */
1762 OP1(SLJIT_MOV, SLJIT_SAVED_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
1763 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1), STR_PTR, 0);
1764
1765 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG1, 0, ARGUMENTS, 0);
1766 if (common->mark_ptr != 0)
1767 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
1768 OP1(SLJIT_MOV_SI, SLJIT_TEMPORARY_REG2, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), SLJIT_OFFSETOF(jit_arguments, offsetcount));
1769 if (common->mark_ptr != 0)
1770 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_TEMPORARY_REG3, 0);
1771 OP2(SLJIT_SUB, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), SLJIT_OFFSETOF(jit_arguments, offsets), SLJIT_IMM, sizeof(int));
1772 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG1, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), SLJIT_OFFSETOF(jit_arguments, begin));
1773 GET_LOCAL_BASE(SLJIT_SAVED_REG1, 0, OVECTOR_START);
1774 /* Unlikely, but possible */
1775 earlyexit = CMP(SLJIT_C_EQUAL, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, 0);
1776 loop = LABEL();
1777 OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), 0, SLJIT_TEMPORARY_REG1, 0);
1778 OP2(SLJIT_ADD, SLJIT_SAVED_REG1, 0, SLJIT_SAVED_REG1, 0, SLJIT_IMM, sizeof(sljit_w));
1779 /* Copy the integer value to the output buffer */
1780 #ifdef COMPILE_PCRE16
1781 OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, 1);
1782 #endif
1783 OP1(SLJIT_MOVU_SI, SLJIT_MEM1(SLJIT_TEMPORARY_REG3), sizeof(int), SLJIT_SAVED_REG2, 0);
1784 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_TEMPORARY_REG2, 0, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, 1);
1785 JUMPTO(SLJIT_C_NOT_ZERO, loop);
1786 JUMPHERE(earlyexit);
1787
1788 /* Calculate the return value, which is the maximum ovector value. */
1789 if (topbracket > 1)
1790 {
1791 GET_LOCAL_BASE(SLJIT_TEMPORARY_REG1, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_w));
1792 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, topbracket + 1);
1793
1794 /* OVECTOR(0) is never equal to SLJIT_SAVED_REG3. */
1795 loop = LABEL();
1796 OP1(SLJIT_MOVU, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), -(2 * (sljit_w)sizeof(sljit_w)));
1797 OP2(SLJIT_SUB, SLJIT_TEMPORARY_REG2, 0, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, 1);
1798 CMPTO(SLJIT_C_EQUAL, SLJIT_TEMPORARY_REG3, 0, SLJIT_SAVED_REG3, 0, loop);
1799 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_TEMPORARY_REG2, 0);
1800 }
1801 else
1802 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
1803 }
1804
1805 static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *leave)
1806 {
1807 DEFINE_COMPILER;
1808
1809 SLJIT_COMPILE_ASSERT(STR_END == SLJIT_SAVED_REG2, str_end_must_be_saved_reg2);
1810 SLJIT_ASSERT(common->start_used_ptr != 0 && (common->mode == JIT_PARTIAL_SOFT_COMPILE ? common->hit_start != 0 : common->hit_start == 0));
1811
1812 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG2, 0, ARGUMENTS, 0);
1813 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_PARTIAL);
1814 OP1(SLJIT_MOV_SI, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), SLJIT_OFFSETOF(jit_arguments, offsetcount));
1815 CMPTO(SLJIT_C_LESS, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, 2, leave);
1816
1817 /* Store match begin and end. */
1818 OP1(SLJIT_MOV, SLJIT_SAVED_REG1, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), SLJIT_OFFSETOF(jit_arguments, begin));
1819 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG2, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), SLJIT_OFFSETOF(jit_arguments, offsets));
1820 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_used_ptr : common->hit_start);
1821 OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, STR_END, 0, SLJIT_SAVED_REG1, 0);
1822 #ifdef COMPILE_PCRE16
1823 OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, 1);
1824 #endif
1825 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), sizeof(int), SLJIT_SAVED_REG2, 0);
1826
1827 OP2(SLJIT_SUB, SLJIT_TEMPORARY_REG3, 0, SLJIT_TEMPORARY_REG3, 0, SLJIT_SAVED_REG1, 0);
1828 #ifdef COMPILE_PCRE16
1829 OP2(SLJIT_ASHR, SLJIT_TEMPORARY_REG3, 0, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, 1);
1830 #endif
1831 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), 0, SLJIT_TEMPORARY_REG3, 0);
1832
1833 JUMPTO(SLJIT_JUMP, leave);
1834 }
1835
1836 static SLJIT_INLINE void check_start_used_ptr(compiler_common *common)
1837 {
1838 /* May destroy TMP1. */
1839 DEFINE_COMPILER;
1840 struct sljit_jump *jump;
1841
1842 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
1843 {
1844 /* The value of -1 must be kept for start_used_ptr! */
1845 OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, 1);
1846 /* Jumps if start_used_ptr < STR_PTR, or start_used_ptr == -1. Although overwriting
1847 is not necessary if start_used_ptr == STR_PTR, it does not hurt as well. */
1848 jump = CMP(SLJIT_C_LESS_EQUAL, TMP1, 0, STR_PTR, 0);
1849 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
1850 JUMPHERE(jump);
1851 }
1852 else if (common->mode == JIT_PARTIAL_HARD_COMPILE)
1853 {
1854 jump = CMP(SLJIT_C_LESS_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
1855 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
1856 JUMPHERE(jump);
1857 }
1858 }
1859
1860 static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, pcre_uchar* cc)
1861 {
1862 /* Detects if the character has an othercase. */
1863 unsigned int c;
1864
1865 #ifdef SUPPORT_UTF
1866 if (common->utf)
1867 {
1868 GETCHAR(c, cc);
1869 if (c > 127)
1870 {
1871 #ifdef SUPPORT_UCP
1872 return c != UCD_OTHERCASE(c);
1873 #else
1874 return FALSE;
1875 #endif
1876 }
1877 #ifndef COMPILE_PCRE8
1878 return common->fcc[c] != c;
1879 #endif
1880 }
1881 else
1882 #endif
1883 c = *cc;
1884 return MAX_255(c) ? common->fcc[c] != c : FALSE;
1885 }
1886
1887 static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)
1888 {
1889 /* Returns with the othercase. */
1890 #ifdef SUPPORT_UTF
1891 if (common->utf && c > 127)
1892 {
1893 #ifdef SUPPORT_UCP
1894 return UCD_OTHERCASE(c);
1895 #else
1896 return c;
1897 #endif
1898 }
1899 #endif
1900 return TABLE_GET(c, common->fcc, c);
1901 }
1902
1903 static unsigned int char_get_othercase_bit(compiler_common *common, pcre_uchar* cc)
1904 {
1905 /* Detects if the character and its othercase has only 1 bit difference. */
1906 unsigned int c, oc, bit;
1907 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
1908 int n;
1909 #endif
1910
1911 #ifdef SUPPORT_UTF
1912 if (common->utf)
1913 {
1914 GETCHAR(c, cc);
1915 if (c <= 127)
1916 oc = common->fcc[c];
1917 else
1918 {
1919 #ifdef SUPPORT_UCP
1920 oc = UCD_OTHERCASE(c);
1921 #else
1922 oc = c;
1923 #endif
1924 }
1925 }
1926 else
1927 {
1928 c = *cc;
1929 oc = TABLE_GET(c, common->fcc, c);
1930 }
1931 #else
1932 c = *cc;
1933 oc = TABLE_GET(c, common->fcc, c);
1934 #endif
1935
1936 SLJIT_ASSERT(c != oc);
1937
1938 bit = c ^ oc;
1939 /* Optimized for English alphabet. */
1940 if (c <= 127 && bit == 0x20)
1941 return (0 << 8) | 0x20;
1942
1943 /* Since c != oc, they must have at least 1 bit difference. */
1944 if (!ispowerof2(bit))
1945 return 0;
1946
1947 #ifdef COMPILE_PCRE8
1948
1949 #ifdef SUPPORT_UTF
1950 if (common->utf && c > 127)
1951 {
1952 n = GET_EXTRALEN(*cc);
1953 while ((bit & 0x3f) == 0)
1954 {
1955 n--;
1956 bit >>= 6;
1957 }
1958 return (n << 8) | bit;
1959 }
1960 #endif /* SUPPORT_UTF */
1961 return (0 << 8) | bit;
1962
1963 #else /* COMPILE_PCRE8 */
1964
1965 #ifdef COMPILE_PCRE16
1966 #ifdef SUPPORT_UTF
1967 if (common->utf && c > 65535)
1968 {
1969 if (bit >= (1 << 10))
1970 bit >>= 10;
1971 else
1972 return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8));
1973 }
1974 #endif /* SUPPORT_UTF */
1975 return (bit < 256) ? ((0 << 8) | bit) : ((1 << 8) | (bit >> 8));
1976 #endif /* COMPILE_PCRE16 */
1977
1978 #endif /* COMPILE_PCRE8 */
1979 }
1980
1981 static void check_partial(compiler_common *common, BOOL force)
1982 {
1983 /* Checks whether a partial matching is occured. Does not modify registers. */
1984 DEFINE_COMPILER;
1985 struct sljit_jump *jump = NULL;
1986
1987 SLJIT_ASSERT(!force || common->mode != JIT_COMPILE);
1988
1989 if (common->mode == JIT_COMPILE)
1990 return;
1991
1992 if (!force)
1993 jump = CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
1994 else if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
1995 jump = CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, -1);
1996
1997 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
1998 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, -1);
1999 else
2000 {
2001 if (common->partialmatchlabel != NULL)
2002 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2003 else
2004 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2005 }
2006
2007 if (jump != NULL)
2008 JUMPHERE(jump);
2009 }
2010
2011 static struct sljit_jump *check_str_end(compiler_common *common)
2012 {
2013 /* Does not affect registers. Usually used in a tight spot. */
2014 DEFINE_COMPILER;
2015 struct sljit_jump *jump;
2016 struct sljit_jump *nohit;
2017 struct sljit_jump *return_value;
2018
2019 if (common->mode == JIT_COMPILE)
2020 return CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2021
2022 jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
2023 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2024 {
2025 nohit = CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2026 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, -1);
2027 JUMPHERE(nohit);
2028 return_value = JUMP(SLJIT_JUMP);
2029 }
2030 else
2031 {
2032 return_value = CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2033 if (common->partialmatchlabel != NULL)
2034 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2035 else
2036 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2037 }
2038 JUMPHERE(jump);
2039 return return_value;
2040 }
2041
2042 static void detect_partial_match(compiler_common *common, jump_list **backtracks)
2043 {
2044 DEFINE_COMPILER;
2045 struct sljit_jump *jump;
2046
2047 if (common->mode == JIT_COMPILE)
2048 {
2049 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2050 return;
2051 }
2052
2053 /* Partial matching mode. */
2054 jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
2055 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0));
2056 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2057 {
2058 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, -1);
2059 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
2060 }
2061 else
2062 {
2063 if (common->partialmatchlabel != NULL)
2064 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2065 else
2066 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2067 }
2068 JUMPHERE(jump);
2069 }
2070
2071 static void read_char(compiler_common *common)
2072 {
2073 /* Reads the character into TMP1, updates STR_PTR.
2074 Does not check STR_END. TMP2 Destroyed. */
2075 DEFINE_COMPILER;
2076 #ifdef SUPPORT_UTF
2077 struct sljit_jump *jump;
2078 #endif
2079
2080 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2081 #ifdef SUPPORT_UTF
2082 if (common->utf)
2083 {
2084 #ifdef COMPILE_PCRE8
2085 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2086 #else
2087 #ifdef COMPILE_PCRE16
2088 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
2089 #endif
2090 #endif /* COMPILE_PCRE8 */
2091 add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2092 JUMPHERE(jump);
2093 }
2094 #endif
2095 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2096 }
2097
2098 static void peek_char(compiler_common *common)
2099 {
2100 /* Reads the character into TMP1, keeps STR_PTR.
2101 Does not check STR_END. TMP2 Destroyed. */
2102 DEFINE_COMPILER;
2103 #ifdef SUPPORT_UTF
2104 struct sljit_jump *jump;
2105 #endif
2106
2107 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2108 #ifdef SUPPORT_UTF
2109 if (common->utf)
2110 {
2111 #ifdef COMPILE_PCRE8
2112 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2113 #else
2114 #ifdef COMPILE_PCRE16
2115 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
2116 #endif
2117 #endif /* COMPILE_PCRE8 */
2118 add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2119 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2120 JUMPHERE(jump);
2121 }
2122 #endif
2123 }
2124
2125 static void read_char8_type(compiler_common *common)
2126 {
2127 /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */
2128 DEFINE_COMPILER;
2129 #if defined SUPPORT_UTF || defined COMPILE_PCRE16
2130 struct sljit_jump *jump;
2131 #endif
2132
2133 #ifdef SUPPORT_UTF
2134 if (common->utf)
2135 {
2136 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2137 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2138 #ifdef COMPILE_PCRE8
2139 /* This can be an extra read in some situations, but hopefully
2140 it is needed in most cases. */
2141 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2142 jump = CMP(SLJIT_C_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
2143 add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));
2144 JUMPHERE(jump);
2145 #else
2146 #ifdef COMPILE_PCRE16
2147 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2148 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2149 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2150 JUMPHERE(jump);
2151 /* Skip low surrogate if necessary. */
2152 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xfc00);
2153 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0xd800);
2154 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
2155 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
2156 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2157 #endif
2158 #endif /* COMPILE_PCRE8 */
2159 return;
2160 }
2161 #endif
2162 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2163 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2164 #ifdef COMPILE_PCRE16
2165 /* The ctypes array contains only 256 values. */
2166 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2167 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2168 #endif
2169 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2170 #ifdef COMPILE_PCRE16
2171 JUMPHERE(jump);
2172 #endif
2173 }
2174
2175 static void skip_char_back(compiler_common *common)
2176 {
2177 /* Goes one character back. Affects STR_PTR and TMP1. Does not check begin. */
2178 DEFINE_COMPILER;
2179 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2180 struct sljit_label *label;
2181
2182 if (common->utf)
2183 {
2184 label = LABEL();
2185 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2186 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2187 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
2188 CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);
2189 return;
2190 }
2191 #endif
2192 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2193 if (common->utf)
2194 {
2195 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2196 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2197 /* Skip low surrogate if necessary. */
2198 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2199 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
2200 COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
2201 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2202 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2203 return;
2204 }
2205 #endif
2206 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2207 }
2208
2209 static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpiftrue)
2210 {
2211 /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */
2212 DEFINE_COMPILER;
2213
2214 if (nltype == NLTYPE_ANY)
2215 {
2216 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
2217 add_jump(compiler, backtracks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
2218 }
2219 else if (nltype == NLTYPE_ANYCRLF)
2220 {
2221 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_CR);
2222 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
2223 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
2224 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
2225 add_jump(compiler, backtracks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
2226 }
2227 else
2228 {
2229 SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);
2230 add_jump(compiler, backtracks, CMP(jumpiftrue ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
2231 }
2232 }
2233
2234 #ifdef SUPPORT_UTF
2235
2236 #ifdef COMPILE_PCRE8
2237 static void do_utfreadchar(compiler_common *common)
2238 {
2239 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
2240 of the character (>= 0xc0). Return char value in TMP1, length - 1 in TMP2. */
2241 DEFINE_COMPILER;
2242 struct sljit_jump *jump;
2243
2244 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2245 /* Searching for the first zero. */
2246 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
2247 jump = JUMP(SLJIT_C_NOT_ZERO);
2248 /* Two byte sequence. */
2249 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2250 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2251 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1f);
2252 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2253 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2254 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2255 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
2256 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2257 JUMPHERE(jump);
2258
2259 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10);
2260 jump = JUMP(SLJIT_C_NOT_ZERO);
2261 /* Three byte sequence. */
2262 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2263 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0f);
2264 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 12);
2265 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2266 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2267 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2268 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2269 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
2270 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2271 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2272 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(2));
2273 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2274 JUMPHERE(jump);
2275
2276 /* Four byte sequence. */
2277 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2278 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x07);
2279 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 18);
2280 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2281 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12);
2282 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2283 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2284 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2285 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2286 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2287 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(3));
2288 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
2289 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2290 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2291 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));
2292 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2293 }
2294
2295 static void do_utfreadtype8(compiler_common *common)
2296 {
2297 /* Fast decoding a UTF-8 character type. TMP2 contains the first byte
2298 of the character (>= 0xc0). Return value in TMP1. */
2299 DEFINE_COMPILER;
2300 struct sljit_jump *jump;
2301 struct sljit_jump *compare;
2302
2303 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2304
2305 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20);
2306 jump = JUMP(SLJIT_C_NOT_ZERO);
2307 /* Two byte sequence. */
2308 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2309 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2310 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
2311 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2312 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2313 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
2314 compare = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2315 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2316 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2317
2318 JUMPHERE(compare);
2319 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2320 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2321 JUMPHERE(jump);
2322
2323 /* We only have types for characters less than 256. */
2324 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_w)PRIV(utf8_table4) - 0xc0);
2325 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2326 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2327 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2328 }
2329
2330 #else /* COMPILE_PCRE8 */
2331
2332 #ifdef COMPILE_PCRE16
2333 static void do_utfreadchar(compiler_common *common)
2334 {
2335 /* Fast decoding a UTF-16 character. TMP1 contains the first 16 bit char
2336 of the character (>= 0xd800). Return char value in TMP1, length - 1 in TMP2. */
2337 DEFINE_COMPILER;
2338 struct sljit_jump *jump;
2339
2340 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2341 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xdc00);
2342 /* Do nothing, only return. */
2343 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2344
2345 JUMPHERE(jump);
2346 /* Combine two 16 bit characters. */
2347 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2348 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2349 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
2350 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 10);
2351 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3ff);
2352 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2353 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
2354 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
2355 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2356 }
2357 #endif /* COMPILE_PCRE16 */
2358
2359 #endif /* COMPILE_PCRE8 */
2360
2361 #endif /* SUPPORT_UTF */
2362
2363 #ifdef SUPPORT_UCP
2364
2365 /* UCD_BLOCK_SIZE must be 128 (see the assert below). */
2366 #define UCD_BLOCK_MASK 127
2367 #define UCD_BLOCK_SHIFT 7
2368
2369 static void do_getucd(compiler_common *common)
2370 {
2371 /* Search the UCD record for the character comes in TMP1.
2372 Returns chartype in TMP1 and UCD offset in TMP2. */
2373 DEFINE_COMPILER;
2374
2375 SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 8);
2376
2377 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2378 OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
2379 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_w)PRIV(ucd_stage1));
2380 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
2381 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
2382 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
2383 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_w)PRIV(ucd_stage2));
2384 OP1(SLJIT_MOV_UH, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
2385 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_w)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
2386 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
2387 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2388 }
2389 #endif
2390
2391 static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common, BOOL hascrorlf, BOOL firstline)
2392 {
2393 DEFINE_COMPILER;
2394 struct sljit_label *mainloop;
2395 struct sljit_label *newlinelabel = NULL;
2396 struct sljit_jump *start;
2397 struct sljit_jump *end = NULL;
2398 struct sljit_jump *nl = NULL;
2399 #ifdef SUPPORT_UTF
2400 struct sljit_jump *singlechar;
2401 #endif
2402 jump_list *newline = NULL;
2403 BOOL newlinecheck = FALSE;
2404 BOOL readuchar = FALSE;
2405
2406 if (!(hascrorlf || firstline) && (common->nltype == NLTYPE_ANY ||
2407 common->nltype == NLTYPE_ANYCRLF || common->newline > 255))
2408 newlinecheck = TRUE;
2409
2410 if (firstline)
2411 {
2412 /* Search for the end of the first line. */
2413 SLJIT_ASSERT(common->first_line_end != 0);
2414 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STR_PTR, 0);
2415 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_END, 0);
2416
2417 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
2418 {
2419 mainloop = LABEL();
2420 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2421 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2422 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
2423 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2424 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);
2425 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);
2426 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2427 }
2428 else
2429 {
2430 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2431 mainloop = LABEL();
2432 /* Continual stores does not cause data dependency. */
2433 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0);
2434 read_char(common);
2435 check_newlinechar(common, common->nltype, &newline, TRUE);
2436 CMPTO(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0, mainloop);
2437 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0);
2438 set_jumps(newline, LABEL());
2439 }
2440
2441 JUMPHERE(end);
2442 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
2443 }
2444
2445 start = JUMP(SLJIT_JUMP);
2446
2447 if (newlinecheck)
2448 {
2449 newlinelabel = LABEL();
2450 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2451 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2452 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2453 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
2454 COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
2455 #ifdef COMPILE_PCRE16
2456 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2457 #endif
2458 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2459 nl = JUMP(SLJIT_JUMP);
2460 }
2461
2462 mainloop = LABEL();
2463
2464 /* Increasing the STR_PTR here requires one less jump in the most common case. */
2465 #ifdef SUPPORT_UTF
2466 if (common->utf) readuchar = TRUE;
2467 #endif
2468 if (newlinecheck) readuchar = TRUE;
2469
2470 if (readuchar)
2471 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2472
2473 if (newlinecheck)
2474 CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);
2475
2476 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2477 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2478 if (common->utf)
2479 {
2480 singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2481 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0);
2482 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2483 JUMPHERE(singlechar);
2484 }
2485 #endif
2486 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2487 if (common->utf)
2488 {
2489 singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
2490 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2491 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2492 COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
2493 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2494 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2495 JUMPHERE(singlechar);
2496 }
2497 #endif
2498 JUMPHERE(start);
2499
2500 if (newlinecheck)
2501 {
2502 JUMPHERE(end);
2503 JUMPHERE(nl);
2504 }
2505
2506 return mainloop;
2507 }
2508
2509 static SLJIT_INLINE BOOL fast_forward_first_two_chars(compiler_common *common, BOOL firstline)
2510 {
2511 DEFINE_COMPILER;
2512 struct sljit_label *start;
2513 struct sljit_jump *leave;
2514 struct sljit_jump *found;
2515 pcre_int32 chars[4];
2516 pcre_uchar *cc = common->start + 1 + IMM2_SIZE;
2517 int location = 0;
2518 pcre_int32 len, c, bit, caseless;
2519 BOOL must_end;
2520
2521 #ifdef COMPILE_PCRE8
2522 union {
2523 sljit_uh ascombined;
2524 sljit_ub asuchars[2];
2525 } pair;
2526 #else
2527 union {
2528 sljit_ui ascombined;
2529 sljit_uh asuchars[2];
2530 } pair;
2531 #endif
2532
2533 if (*(common->start + GET(common->start, 1)) == OP_ALT)
2534 return FALSE;
2535
2536 while (TRUE)
2537 {
2538 caseless = 0;
2539 must_end = TRUE;
2540 switch(*cc)
2541 {
2542 case OP_CHAR:
2543 must_end = FALSE;
2544 cc++;
2545 break;
2546
2547 case OP_CHARI:
2548 caseless = 1;
2549 must_end = FALSE;
2550 cc++;
2551 break;
2552
2553 case OP_SOD:
2554 case OP_SOM:
2555 case OP_SET_SOM:
2556 case OP_NOT_WORD_BOUNDARY:
2557 case OP_WORD_BOUNDARY:
2558 case OP_EODN:
2559 case OP_EOD:
2560 case OP_CIRC:
2561 case OP_CIRCM:
2562 case OP_DOLL:
2563 case OP_DOLLM:
2564 /* Zero width assertions. */
2565 cc++;
2566 continue;
2567
2568 case OP_PLUS:
2569 case OP_MINPLUS:
2570 case OP_POSPLUS:
2571 cc++;
2572 break;
2573
2574 case OP_EXACT:
2575 cc += 1 + IMM2_SIZE;
2576 break;
2577
2578 case OP_PLUSI:
2579 case OP_MINPLUSI:
2580 case OP_POSPLUSI:
2581 caseless = 1;
2582 cc++;
2583 break;
2584
2585 case OP_EXACTI:
2586 caseless = 1;
2587 cc += 1 + IMM2_SIZE;
2588 break;
2589
2590 default:
2591 return FALSE;
2592 }
2593
2594 len = 1;
2595 #ifdef SUPPORT_UTF
2596 if (common->utf && HAS_EXTRALEN(cc[0])) len += GET_EXTRALEN(cc[0]);
2597 #endif
2598
2599 if (caseless && char_has_othercase(common, cc))
2600 {
2601 caseless = char_get_othercase_bit(common, cc);
2602 if (caseless == 0)
2603 return FALSE;
2604 #ifdef COMPILE_PCRE8
2605 caseless = ((caseless & 0xff) << 8) | (len - (caseless >> 8));
2606 #else
2607 if ((caseless & 0x100) != 0)
2608 caseless = ((caseless & 0xff) << 16) | (len - (caseless >> 9));
2609 else
2610 caseless = ((caseless & 0xff) << 8) | (len - (caseless >> 9));
2611 #endif
2612 }
2613 else
2614 caseless = 0;
2615
2616 while (len > 0 && location < 2 * 2)
2617 {
2618 c = *cc;
2619 bit = 0;
2620 if (len == (caseless & 0xff))
2621 {
2622 bit = caseless >> 8;
2623 c |= bit;
2624 }
2625
2626 chars[location] = c;
2627 chars[location + 1] = bit;
2628
2629 len--;
2630 location += 2;
2631 cc++;
2632 }
2633
2634 if (location == 2 * 2)
2635 break;
2636 else if (must_end)
2637 return FALSE;
2638 }
2639
2640 if (firstline)
2641 {
2642 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, STR_END, 0);
2643 OP2(SLJIT_SUB, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, SLJIT_IMM, 1);
2644 }
2645 else
2646 OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, 1);
2647
2648 start = LABEL();
2649 leave = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2650 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
2651 #ifdef COMPILE_PCRE8
2652 OP1(SLJIT_MOV_UH, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2653 #else /* COMPILE_PCRE8 */
2654 OP1(SLJIT_MOV_UI, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2655 #endif
2656
2657 #else /* SLJIT_UNALIGNED */
2658
2659 #if defined SLJIT_BIG_ENDIAN && SLJIT_BIG_ENDIAN
2660 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2661 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2662 #else /* SLJIT_BIG_ENDIAN */
2663 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2664 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2665 #endif /* SLJIT_BIG_ENDIAN */
2666
2667 #ifdef COMPILE_PCRE8
2668 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 8);
2669 #else /* COMPILE_PCRE8 */
2670 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 16);
2671 #endif
2672 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2673
2674 #endif
2675
2676 if (chars[1] != 0 || chars[3] != 0)
2677 {
2678 pair.asuchars[0] = chars[1];
2679 pair.asuchars[1] = chars[3];
2680 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, pair.ascombined);
2681 }
2682
2683 pair.asuchars[0] = chars[0];
2684 pair.asuchars[1] = chars[2];
2685 found = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, pair.ascombined);
2686
2687 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2688 JUMPTO(SLJIT_JUMP, start);
2689 JUMPHERE(found);
2690 JUMPHERE(leave);
2691
2692 if (firstline)
2693 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
2694 OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, 1);
2695 return TRUE;
2696 }
2697
2698 static SLJIT_INLINE void fast_forward_first_char(compiler_common *common, pcre_uchar first_char, BOOL caseless, BOOL firstline)
2699 {
2700 DEFINE_COMPILER;
2701 struct sljit_label *start;
2702 struct sljit_jump *leave;
2703 struct sljit_jump *found;
2704 pcre_uchar oc, bit;
2705
2706 if (firstline)
2707 {
2708 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, STR_END, 0);
2709 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
2710 }
2711
2712 start = LABEL();
2713 leave = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2714 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2715
2716 oc = first_char;
2717 if (caseless)
2718 {
2719 oc = TABLE_GET(first_char, common->fcc, first_char);
2720 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
2721 if (first_char > 127 && common->utf)
2722 oc = UCD_OTHERCASE(first_char);
2723 #endif
2724 }
2725 if (first_char == oc)
2726 found = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, first_char);
2727 else
2728 {
2729 bit = first_char ^ oc;
2730 if (ispowerof2(bit))
2731 {
2732 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, bit);
2733 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, first_char | bit);
2734 }
2735 else
2736 {
2737 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, first_char);
2738 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
2739 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);
2740 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
2741 found = JUMP(SLJIT_C_NOT_ZERO);
2742 }
2743 }
2744
2745 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2746 JUMPTO(SLJIT_JUMP, start);
2747 JUMPHERE(found);
2748 JUMPHERE(leave);
2749
2750 if (firstline)
2751 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
2752 }
2753
2754 static SLJIT_INLINE void fast_forward_newline(compiler_common *common, BOOL firstline)
2755 {
2756 DEFINE_COMPILER;
2757 struct sljit_label *loop;
2758 struct sljit_jump *lastchar;
2759 struct sljit_jump *firstchar;
2760 struct sljit_jump *leave;
2761 struct sljit_jump *foundcr = NULL;
2762 struct sljit_jump *notfoundnl;
2763 jump_list *newline = NULL;
2764
2765 if (firstline)
2766 {
2767 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, STR_END, 0);
2768 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
2769 }
2770
2771 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
2772 {
2773 lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2774 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
2775 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
2776 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
2777 firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
2778
2779 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2));
2780 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0);
2781 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_GREATER_EQUAL);
2782 #ifdef COMPILE_PCRE16
2783 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
2784 #endif
2785 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2786
2787 loop = LABEL();
2788 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2789 leave = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2790 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
2791 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
2792 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop);
2793 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop);
2794
2795 JUMPHERE(leave);
2796 JUMPHERE(firstchar);
2797 JUMPHERE(lastchar);
2798
2799 if (firstline)
2800 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
2801 return;
2802 }
2803
2804 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
2805 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
2806 firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
2807 skip_char_back(common);
2808
2809 loop = LABEL();
2810 read_char(common);
2811 lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2812 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
2813 foundcr = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
2814 check_newlinechar(common, common->nltype, &newline, FALSE);
2815 set_jumps(newline, loop);
2816
2817 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
2818 {
2819 leave = JUMP(SLJIT_JUMP);
2820 JUMPHERE(foundcr);
2821 notfoundnl = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2822 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2823 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
2824 COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
2825 #ifdef COMPILE_PCRE16
2826 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2827 #endif
2828 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2829 JUMPHERE(notfoundnl);
2830 JUMPHERE(leave);
2831 }
2832 JUMPHERE(lastchar);
2833 JUMPHERE(firstchar);
2834
2835 if (firstline)
2836 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
2837 }
2838
2839 static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, sljit_uw start_bits, BOOL firstline)
2840 {
2841 DEFINE_COMPILER;
2842 struct sljit_label *start;
2843 struct sljit_jump *leave;
2844 struct sljit_jump *found;
2845 #ifndef COMPILE_PCRE8
2846 struct sljit_jump *jump;
2847 #endif
2848
2849 if (firstline)
2850 {
2851 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, STR_END, 0);
2852 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
2853 }
2854
2855 start = LABEL();
2856 leave = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2857 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2858 #ifdef SUPPORT_UTF
2859 if (common->utf)
2860 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
2861 #endif
2862 #ifndef COMPILE_PCRE8
2863 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 255);
2864 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 255);
2865 JUMPHERE(jump);
2866 #endif
2867 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
2868 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
2869 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), start_bits);
2870 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
2871 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
2872 found = JUMP(SLJIT_C_NOT_ZERO);
2873
2874 #ifdef SUPPORT_UTF
2875 if (common->utf)
2876 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
2877 #endif
2878 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2879 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2880 if (common->utf)
2881 {
2882 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start);
2883 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0);
2884 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2885 }
2886 #endif
2887 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2888 if (common->utf)
2889 {
2890 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800, start);
2891 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2892 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2893 COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
2894 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2895 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2896 }
2897 #endif
2898 JUMPTO(SLJIT_JUMP, start);
2899 JUMPHERE(found);
2900 JUMPHERE(leave);
2901
2902 if (firstline)
2903 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
2904 }
2905
2906 static SLJIT_INLINE struct sljit_jump *search_requested_char(compiler_common *common, pcre_uchar req_char, BOOL caseless, BOOL has_firstchar)
2907 {
2908 DEFINE_COMPILER;
2909 struct sljit_label *loop;
2910 struct sljit_jump *toolong;
2911 struct sljit_jump *alreadyfound;
2912 struct sljit_jump *found;
2913 struct sljit_jump *foundoc = NULL;
2914 struct sljit_jump *notfound;
2915 pcre_uchar oc, bit;
2916
2917 SLJIT_ASSERT(common->req_char_ptr != 0);
2918 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr);
2919 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, REQ_BYTE_MAX);
2920 toolong = CMP(SLJIT_C_LESS, TMP1, 0, STR_END, 0);
2921 alreadyfound = CMP(SLJIT_C_LESS, STR_PTR, 0, TMP2, 0);
2922
2923 if (has_firstchar)
2924 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2925 else
2926 OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);
2927
2928 loop = LABEL();
2929 notfound = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, STR_END, 0);
2930
2931 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0);
2932 oc = req_char;
2933 if (caseless)
2934 {
2935 oc = TABLE_GET(req_char, common->fcc, req_char);
2936 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
2937 if (req_char > 127 && common->utf)
2938 oc = UCD_OTHERCASE(req_char);
2939 #endif
2940 }
2941 if (req_char == oc)
2942 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
2943 else
2944 {
2945 bit = req_char ^ oc;
2946 if (ispowerof2(bit))
2947 {
2948 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);
2949 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit);
2950 }
2951 else
2952 {
2953 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
2954 foundoc = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, oc);
2955 }
2956 }
2957 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
2958 JUMPTO(SLJIT_JUMP, loop);
2959
2960 JUMPHERE(found);
2961 if (foundoc)
2962 JUMPHERE(foundoc);
2963 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr, TMP1, 0);
2964 JUMPHERE(alreadyfound);
2965 JUMPHERE(toolong);
2966 return notfound;
2967 }
2968
2969 static void do_revertframes(compiler_common *common)
2970 {
2971 DEFINE_COMPILER;
2972 struct sljit_jump *jump;
2973 struct sljit_label *mainloop;
2974
2975 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2976 OP1(SLJIT_MOV, TMP1, 0, STACK_TOP, 0);
2977 GET_LOCAL_BASE(TMP3, 0, 0);
2978
2979 /* Drop frames until we reach STACK_TOP. */
2980 mainloop = LABEL();
2981 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), 0);
2982 jump = CMP(SLJIT_C_SIG_LESS_EQUAL, TMP2, 0, SLJIT_IMM, frame_end);
2983 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
2984 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_w));
2985 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_w), SLJIT_MEM1(TMP1), 2 * sizeof(sljit_w));
2986 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_w));
2987 JUMPTO(SLJIT_JUMP, mainloop);
2988
2989 JUMPHERE(jump);
2990 jump = CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, frame_end);
2991 /* End of dropping frames. */
2992 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2993
2994 JUMPHERE(jump);
2995 jump = CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, frame_setstrbegin);
2996 /* Set string begin. */
2997 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), sizeof(sljit_w));
2998 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_w));
2999 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), TMP2, 0);
3000 JUMPTO(SLJIT_JUMP, mainloop);
3001
3002 JUMPHERE(jump);
3003 if (common->mark_ptr != 0)
3004 {
3005 jump = CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, frame_setmark);
3006 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), sizeof(sljit_w));
3007 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_w));
3008 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr, TMP2, 0);
3009 JUMPTO(SLJIT_JUMP, mainloop);
3010
3011 JUMPHERE(jump);
3012 }
3013
3014 /* Unknown command. */
3015 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_w));
3016 JUMPTO(SLJIT_JUMP, mainloop);
3017 }
3018
3019 static void check_wordboundary(compiler_common *common)
3020 {
3021 DEFINE_COMPILER;
3022 struct sljit_jump *skipread;
3023 #if !(defined COMPILE_PCRE8) || defined SUPPORT_UTF
3024 struct sljit_jump *jump;
3025 #endif
3026
3027 SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16);
3028
3029 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3030 /* Get type of the previous char, and put it to LOCALS1. */
3031 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3032 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
3033 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, 0);
3034 skipread = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
3035 skip_char_back(common);
3036 check_start_used_ptr(common);
3037 read_char(common);
3038
3039 /* Testing char type. */
3040 #ifdef SUPPORT_UCP
3041 if (common->use_ucp)
3042 {
3043 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
3044 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
3045 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
3046 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
3047 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
3048 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);
3049 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
3050 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
3051 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_LESS_EQUAL);
3052 JUMPHERE(jump);
3053 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP2, 0);
3054 }
3055 else
3056 #endif
3057 {
3058 #ifndef COMPILE_PCRE8
3059 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3060 #elif defined SUPPORT_UTF
3061 /* Here LOCALS1 has already been zeroed. */
3062 jump = NULL;
3063 if (common->utf)
3064 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3065 #endif /* COMPILE_PCRE8 */
3066 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);
3067 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */);
3068 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3069 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);
3070 #ifndef COMPILE_PCRE8
3071 JUMPHERE(jump);
3072 #elif defined SUPPORT_UTF
3073 if (jump != NULL)
3074 JUMPHERE(jump);
3075 #endif /* COMPILE_PCRE8 */
3076 }
3077 JUMPHERE(skipread);
3078
3079 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3080 skipread = check_str_end(common);
3081 peek_char(common);
3082
3083 /* Testing char type. This is a code duplication. */
3084 #ifdef SUPPORT_UCP
3085 if (common->use_ucp)
3086 {
3087 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
3088 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
3089 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
3090 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
3091 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
3092 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);
3093 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
3094 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
3095 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_LESS_EQUAL);
3096 JUMPHERE(jump);
3097 }
3098 else
3099 #endif
3100 {
3101 #ifndef COMPILE_PCRE8
3102 /* TMP2 may be destroyed by peek_char. */
3103 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3104 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3105 #elif defined SUPPORT_UTF
3106 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3107 jump = NULL;
3108 if (common->utf)
3109 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3110 #endif
3111 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes);
3112 OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */);
3113 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
3114 #ifndef COMPILE_PCRE8
3115 JUMPHERE(jump);
3116 #elif defined SUPPORT_UTF
3117 if (jump != NULL)
3118 JUMPHERE(jump);
3119 #endif /* COMPILE_PCRE8 */
3120 }
3121 JUMPHERE(skipread);
3122
3123 OP2(SLJIT_XOR | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
3124 sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3125 }
3126
3127 static void check_anynewline(compiler_common *common)
3128 {
3129 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
3130 DEFINE_COMPILER;
3131
3132 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3133
3134 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
3135 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
3136 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);
3137 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
3138 #if defined SUPPORT_UTF || defined COMPILE_PCRE16
3139 #ifdef COMPILE_PCRE8
3140 if (common->utf)
3141 {
3142 #endif
3143 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
3144 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
3145 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
3146 #ifdef COMPILE_PCRE8
3147 }
3148 #endif
3149 #endif /* SUPPORT_UTF || COMPILE_PCRE16 */
3150 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
3151 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3152 }
3153
3154 static void check_hspace(compiler_common *common)
3155 {
3156 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
3157 DEFINE_COMPILER;
3158
3159 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3160
3161 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x09);
3162 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
3163 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
3164 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
3165 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xa0);
3166 #if defined SUPPORT_UTF || defined COMPILE_PCRE16
3167 #ifdef COMPILE_PCRE8
3168 if (common->utf)
3169 {
3170 #endif
3171 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
3172 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x1680);
3173 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
3174 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e);
3175 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
3176 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000);
3177 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000);
3178 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_LESS_EQUAL);
3179 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000);
3180 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
3181 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000);
3182 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
3183 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000);
3184 #ifdef COMPILE_PCRE8
3185 }
3186 #endif
3187 #endif /* SUPPORT_UTF || COMPILE_PCRE16 */
3188 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
3189
3190 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3191 }
3192
3193 static void check_vspace(compiler_common *common)
3194 {
3195 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
3196 DEFINE_COMPILER;
3197
3198 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3199
3200 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
3201 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
3202 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);
3203 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
3204 #if defined SUPPORT_UTF || defined COMPILE_PCRE16
3205 #ifdef COMPILE_PCRE8
3206 if (common->utf)
3207 {
3208 #endif
3209 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
3210 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
3211 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
3212 #ifdef COMPILE_PCRE8
3213 }
3214 #endif
3215 #endif /* SUPPORT_UTF || COMPILE_PCRE16 */
3216 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
3217
3218 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3219 }
3220
3221 #define CHAR1 STR_END
3222 #define CHAR2 STACK_TOP
3223
3224 static void do_casefulcmp(compiler_common *common)
3225 {
3226 DEFINE_COMPILER;
3227 struct sljit_jump *jump;
3228 struct sljit_label *label;
3229
3230 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3231 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3232 OP1(SLJIT_MOV, TMP3, 0, CHAR1, 0);
3233 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR2, 0);
3234 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
3235 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3236
3237 label = LABEL();
3238 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
3239 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3240 jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
3241 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
3242 JUMPTO(SLJIT_C_NOT_ZERO, label);
3243
3244 JUMPHERE(jump);
3245 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3246 OP1(SLJIT_MOV, CHAR1, 0, TMP3, 0);
3247 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3248 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3249 }
3250
3251 #define LCC_TABLE STACK_LIMIT
3252
3253 static void do_caselesscmp(compiler_common *common)
3254 {
3255 DEFINE_COMPILER;
3256 struct sljit_jump *jump;
3257 struct sljit_label *label;
3258
3259 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3260 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3261
3262 OP1(SLJIT_MOV, TMP3, 0, LCC_TABLE, 0);
3263 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR1, 0);
3264 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, CHAR2, 0);
3265 OP1(SLJIT_MOV, LCC_TABLE, 0, SLJIT_IMM, common->lcc);
3266 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
3267 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3268
3269 label = LABEL();
3270 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
3271 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3272 #ifndef COMPILE_PCRE8
3273 jump = CMP(SLJIT_C_GREATER, CHAR1, 0, SLJIT_IMM, 255);
3274 #endif
3275 OP1(SLJIT_MOV_UB, CHAR1, 0, SLJIT_MEM2(LCC_TABLE, CHAR1), 0);
3276 #ifndef COMPILE_PCRE8
3277 JUMPHERE(jump);
3278 jump = CMP(SLJIT_C_GREATER, CHAR2, 0, SLJIT_IMM, 255);
3279 #endif
3280 OP1(SLJIT_MOV_UB, CHAR2, 0, SLJIT_MEM2(LCC_TABLE, CHAR2), 0);
3281 #ifndef COMPILE_PCRE8
3282 JUMPHERE(jump);
3283 #endif
3284 jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
3285 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
3286 JUMPTO(SLJIT_C_NOT_ZERO, label);
3287
3288 JUMPHERE(jump);
3289 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3290 OP1(SLJIT_MOV, LCC_TABLE, 0, TMP3, 0);
3291 OP1(SLJIT_MOV, CHAR1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3292 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
3293 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3294 }
3295
3296 #undef LCC_TABLE
3297 #undef CHAR1
3298 #undef CHAR2
3299
3300 #if defined SUPPORT_UTF && defined SUPPORT_UCP
3301
3302 static const pcre_uchar *SLJIT_CALL do_utf_caselesscmp(pcre_uchar *src1, jit_arguments *args, pcre_uchar *end1)
3303 {
3304 /* This function would be ineffective to do in JIT level. */
3305 int c1, c2;
3306 const pcre_uchar *src2 = args->uchar_ptr;
3307 const pcre_uchar *end2 = args->end;
3308
3309 while (src1 < end1)
3310 {
3311 if (src2 >= end2)
3312 return (pcre_uchar*)1;
3313 GETCHARINC(c1, src1);
3314 GETCHARINC(c2, src2);
3315 if (c1 != c2 && c1 != UCD_OTHERCASE(c2)) return NULL;
3316 }
3317 return src2;
3318 }
3319
3320 #endif /* SUPPORT_UTF && SUPPORT_UCP */
3321
3322 static pcre_uchar *byte_sequence_compare(compiler_common *common, BOOL caseless, pcre_uchar *cc,
3323 compare_context* context, jump_list **backtracks)
3324 {
3325 DEFINE_COMPILER;
3326 unsigned int othercasebit = 0;
3327 pcre_uchar *othercasechar = NULL;
3328 #ifdef SUPPORT_UTF
3329 int utflength;
3330 #endif
3331
3332 if (caseless && char_has_othercase(common, cc))
3333 {
3334 othercasebit = char_get_othercase_bit(common, cc);
3335 SLJIT_ASSERT(othercasebit);
3336 /* Extracting bit difference info. */
3337 #ifdef COMPILE_PCRE8
3338 othercasechar = cc + (othercasebit >> 8);
3339 othercasebit &= 0xff;
3340 #else
3341 #ifdef COMPILE_PCRE16
3342 othercasechar = cc + (othercasebit >> 9);
3343 if ((othercasebit & 0x100) != 0)
3344 othercasebit = (othercasebit & 0xff) << 8;
3345 else
3346 othercasebit &= 0xff;
3347 #endif
3348 #endif
3349 }
3350
3351 if (context->sourcereg == -1)
3352 {
3353 #ifdef COMPILE_PCRE8
3354 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
3355 if (context->length >= 4)
3356 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3357 else if (context->length >= 2)
3358 OP1(SLJIT_MOV_UH, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3359 else
3360 #endif
3361 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3362 #else
3363 #ifdef COMPILE_PCRE16
3364 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
3365 if (context->length >= 4)
3366 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3367 else
3368 #endif
3369 OP1(SLJIT_MOV_UH, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3370 #endif
3371 #endif /* COMPILE_PCRE8 */
3372 context->sourcereg = TMP2;
3373 }
3374
3375 #ifdef SUPPORT_UTF
3376 utflength = 1;
3377 if (common->utf && HAS_EXTRALEN(*cc))
3378 utflength += GET_EXTRALEN(*cc);
3379
3380 do
3381 {
3382 #endif
3383
3384 context->length -= IN_UCHARS(1);
3385 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
3386
3387 /* Unaligned read is supported. */
3388 if (othercasebit != 0 && othercasechar == cc)
3389 {
3390 context->c.asuchars[context->ucharptr] = *cc | othercasebit;
3391 context->oc.asuchars[context->ucharptr] = othercasebit;
3392 }
3393 else
3394 {
3395 context->c.asuchars[context->ucharptr] = *cc;
3396 context->oc.asuchars[context->ucharptr] = 0;
3397 }
3398 context->ucharptr++;
3399
3400 #ifdef COMPILE_PCRE8
3401 if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1))
3402 #else
3403 if (context->ucharptr >= 2 || context->length == 0)
3404 #endif
3405 {
3406 if (context->length >= 4)
3407 OP1(SLJIT_MOV_SI, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3408 #ifdef COMPILE_PCRE8
3409 else if (context->length >= 2)
3410 OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3411 else if (context->length >= 1)
3412 OP1(SLJIT_MOV_UB, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3413 #else
3414 else if (context->length >= 2)
3415 OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3416 #endif
3417 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
3418
3419 switch(context->ucharptr)
3420 {
3421 case 4 / sizeof(pcre_uchar):
3422 if (context->oc.asint != 0)
3423 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint);
3424 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint));
3425 break;
3426
3427 case 2 / sizeof(pcre_uchar):
3428 if (context->oc.asushort != 0)
3429 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort);
3430 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort));
3431 break;
3432
3433 #ifdef COMPILE_PCRE8
3434 case 1:
3435 if (context->oc.asbyte != 0)
3436 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte);
3437 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte));
3438 break;
3439 #endif
3440
3441 default:
3442 SLJIT_ASSERT_STOP();
3443 break;
3444 }
3445 context->ucharptr = 0;
3446 }
3447
3448 #else
3449
3450 /* Unaligned read is unsupported. */
3451 #ifdef COMPILE_PCRE8
3452 if (context->length > 0)
3453 OP1(SLJIT_MOV_UB, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3454 #else
3455 if (context->length > 0)
3456 OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3457 #endif
3458 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
3459
3460 if (othercasebit != 0 && othercasechar == cc)
3461 {
3462 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit);
3463 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit));
3464 }
3465 else
3466 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc));
3467
3468 #endif
3469
3470 cc++;
3471 #ifdef SUPPORT_UTF
3472 utflength--;
3473 }
3474 while (utflength > 0);
3475 #endif
3476
3477 return cc;
3478 }
3479
3480 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3481
3482 #define SET_TYPE_OFFSET(value) \
3483 if ((value) != typeoffset) \
3484 { \
3485 if ((value) > typeoffset) \
3486 OP2(SLJIT_SUB, typereg, 0, typereg, 0, SLJIT_IMM, (value) - typeoffset); \
3487 else \
3488 OP2(SLJIT_ADD, typereg, 0, typereg, 0, SLJIT_IMM, typeoffset - (value)); \
3489 } \
3490 typeoffset = (value);
3491
3492 #define SET_CHAR_OFFSET(value) \
3493 if ((value) != charoffset) \
3494 { \
3495 if ((value) > charoffset) \
3496 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (value) - charoffset); \
3497 else \
3498 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, charoffset - (value)); \
3499 } \
3500 charoffset = (value);
3501
3502 static void compile_xclass_trypath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
3503 {
3504 DEFINE_COMPILER;
3505 jump_list *found = NULL;
3506 jump_list **list = (*cc & XCL_NOT) == 0 ? &found : backtracks;
3507 unsigned int c;
3508 int compares;
3509 struct sljit_jump *jump = NULL;
3510 pcre_uchar *ccbegin;
3511 #ifdef SUPPORT_UCP
3512 BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE;
3513 BOOL charsaved = FALSE;
3514 int typereg = TMP1, scriptreg = TMP1;
3515 unsigned int typeoffset;
3516 #endif
3517 int invertcmp, numberofcmps;
3518 unsigned int charoffset;
3519
3520 /* Although SUPPORT_UTF must be defined, we are not necessary in utf mode. */
3521 detect_partial_match(common, backtracks);
3522 read_char(common);
3523
3524 if ((*cc++ & XCL_MAP) != 0)
3525 {
3526 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
3527 #ifndef COMPILE_PCRE8
3528 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3529 #elif defined SUPPORT_UTF
3530 if (common->utf)
3531 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3532 #endif
3533
3534 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
3535 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
3536 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)cc);
3537 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
3538 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
3539 add_jump(compiler, list, JUMP(SLJIT_C_NOT_ZERO));
3540
3541 #ifndef COMPILE_PCRE8
3542 JUMPHERE(jump);
3543 #elif defined SUPPORT_UTF
3544 if (common->utf)
3545 JUMPHERE(jump);
3546 #endif
3547 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
3548 #ifdef SUPPORT_UCP
3549 charsaved = TRUE;
3550 #endif
3551 cc += 32 / sizeof(pcre_uchar);
3552 }
3553
3554 /* Scanning the necessary info. */
3555 ccbegin = cc;
3556 compares = 0;
3557 while (*cc != XCL_END)
3558 {
3559 compares++;
3560 if (*cc == XCL_SINGLE)
3561 {
3562 cc += 2;
3563 #ifdef SUPPORT_UTF
3564 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
3565 #endif
3566 #ifdef SUPPORT_UCP
3567 needschar = TRUE;
3568 #endif
3569 }
3570 else if (*cc == XCL_RANGE)
3571 {
3572 cc += 2;
3573 #ifdef SUPPORT_UTF
3574 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
3575 #endif
3576 cc++;
3577 #ifdef SUPPORT_UTF
3578 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
3579 #endif
3580 #ifdef SUPPORT_UCP
3581 needschar = TRUE;
3582 #endif
3583 }
3584 #ifdef SUPPORT_UCP
3585 else
3586 {
3587 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
3588 cc++;
3589 switch(*cc)
3590 {
3591 case PT_ANY:
3592 break;
3593
3594 case PT_LAMP:
3595 case PT_GC:
3596 case PT_PC:
3597 case PT_ALNUM:
3598 needstype = TRUE;
3599 break;
3600
3601 case PT_SC:
3602 needsscript = TRUE;
3603 break;
3604
3605 case PT_SPACE:
3606 case PT_PXSPACE:
3607 case PT_WORD:
3608 needstype = TRUE;
3609 needschar = TRUE;
3610 break;
3611
3612 default:
3613 SLJIT_ASSERT_STOP();
3614 break;
3615 }
3616 cc += 2;
3617 }
3618 #endif
3619 }
3620
3621 #ifdef SUPPORT_UCP
3622 /* Simple register allocation. TMP1 is preferred if possible. */
3623 if (needstype || needsscript)
3624 {
3625 if (needschar && !charsaved)
3626 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
3627 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
3628 if (needschar)
3629 {
3630 if (needstype)
3631 {
3632 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
3633 typereg = RETURN_ADDR;
3634 }
3635
3636 if (needsscript)
3637 scriptreg = TMP3;
3638 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
3639 }
3640 else if (needstype && needsscript)
3641 scriptreg = TMP3;
3642 /* In all other cases only one of them was specified, and that can goes to TMP1. */
3643
3644 if (needsscript)
3645 {
3646 if (scriptreg == TMP1)
3647 {
3648 OP1(SLJIT_MOV, scriptreg, 0, SLJIT_IMM, (sljit_w)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
3649 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM2(scriptreg, TMP2), 3);
3650 }
3651 else
3652 {
3653 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
3654 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_w)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
3655 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM1(TMP2), 0);
3656 }
3657 }
3658 }
3659 #endif
3660
3661 /* Generating code. */
3662 cc = ccbegin;
3663 charoffset = 0;
3664 numberofcmps = 0;
3665 #ifdef SUPPORT_UCP
3666 typeoffset = 0;
3667 #endif
3668
3669 while (*cc != XCL_END)
3670 {
3671 compares--;
3672 invertcmp = (compares == 0 && list != backtracks);
3673 jump = NULL;
3674
3675 if (*cc == XCL_SINGLE)
3676 {
3677 cc ++;
3678 #ifdef SUPPORT_UTF
3679 if (common->utf)
3680 {
3681 GETCHARINC(c, cc);
3682 }
3683 else
3684 #endif
3685 c = *cc++;
3686
3687 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
3688 {
3689 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
3690 COND_VALUE(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
3691 numberofcmps++;
3692 }
3693 else if (numberofcmps > 0)
3694 {
3695 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
3696 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
3697 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
3698 numberofcmps = 0;
3699 }
3700 else
3701 {
3702 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, c - charoffset);
3703 numberofcmps = 0;
3704 }
3705 }
3706 else if (*cc == XCL_RANGE)
3707 {
3708 cc ++;
3709 #ifdef SUPPORT_UTF
3710 if (common->utf)
3711 {
3712 GETCHARINC(c, cc);
3713 }
3714 else
3715 #endif
3716 c = *cc++;
3717 SET_CHAR_OFFSET(c);
3718 #ifdef SUPPORT_UTF
3719 if (common->utf)
3720 {
3721 GETCHARINC(c, cc);
3722 }
3723 else
3724 #endif
3725 c = *cc++;
3726 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
3727 {
3728 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
3729 COND_VALUE(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_C_LESS_EQUAL);
3730 numberofcmps++;
3731 }
3732 else if (numberofcmps > 0)
3733 {
3734 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
3735 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_LESS_EQUAL);
3736 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
3737 numberofcmps = 0;
3738 }
3739 else
3740 {
3741 jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, c - charoffset);
3742 numberofcmps = 0;
3743 }
3744 }
3745 #ifdef SUPPORT_UCP
3746 else
3747 {
3748 if (*cc == XCL_NOTPROP)
3749 invertcmp ^= 0x1;
3750 cc++;
3751 switch(*cc)
3752 {
3753 case PT_ANY:
3754 if (list != backtracks)
3755 {
3756 if ((cc[-1] == XCL_NOTPROP && compares > 0) || (cc[-1] == XCL_PROP && compares == 0))
3757 continue;
3758 }
3759 else if (cc[-1] == XCL_NOTPROP)
3760 continue;
3761 jump = JUMP(SLJIT_JUMP);
3762 break;
3763
3764 case PT_LAMP:
3765 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - typeoffset);
3766 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
3767 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ll - typeoffset);
3768 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
3769 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lt - typeoffset);
3770 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
3771 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
3772 break;
3773
3774 case PT_GC:
3775 c = PRIV(ucp_typerange)[(int)cc[1] * 2];
3776 SET_TYPE_OFFSET(c);
3777 jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, PRIV(ucp_typerange)[(int)cc[1] * 2 + 1] - c);
3778 break;
3779
3780 case PT_PC:
3781 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, (int)cc[1] - typeoffset);
3782 break;
3783
3784 case PT_SC:
3785 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, scriptreg, 0, SLJIT_IMM, (int)cc[1]);
3786 break;
3787
3788 case PT_SPACE:
3789 case PT_PXSPACE:
3790 if (*cc == PT_SPACE)
3791 {
3792 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3793 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 11 - charoffset);
3794 }
3795 SET_CHAR_OFFSET(9);
3796 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 13 - 9);
3797 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);
3798 if (*cc == PT_SPACE)
3799 JUMPHERE(jump);
3800
3801 SET_TYPE_OFFSET(ucp_Zl);
3802 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl);
3803 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_LESS_EQUAL);
3804 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
3805 break;
3806
3807 case PT_WORD:
3808 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE - charoffset);
3809 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
3810 /* ... fall through */
3811
3812 case PT_ALNUM:
3813 SET_TYPE_OFFSET(ucp_Ll);
3814 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
3815 COND_VALUE((*cc == PT_ALNUM) ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_C_LESS_EQUAL);
3816 SET_TYPE_OFFSET(ucp_Nd);
3817 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_No - ucp_Nd);
3818 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_LESS_EQUAL);
3819 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
3820 break;
3821 }
3822 cc += 2;
3823 }
3824 #endif
3825
3826 if (jump != NULL)
3827 add_jump(compiler, compares > 0 ? list : backtracks, jump);
3828 }
3829
3830 if (found != NULL)
3831 set_jumps(found, LABEL());
3832 }
3833
3834 #undef SET_TYPE_OFFSET
3835 #undef SET_CHAR_OFFSET
3836
3837 #endif
3838
3839 static pcre_uchar *compile_char1_trypath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **backtracks)
3840 {
3841 DEFINE_COMPILER;
3842 int length;
3843 unsigned int c, oc, bit;
3844 compare_context context;
3845 struct sljit_jump *jump[4];
3846 #ifdef SUPPORT_UTF
3847 struct sljit_label *label;
3848 #ifdef SUPPORT_UCP
3849 pcre_uchar propdata[5];
3850 #endif
3851 #endif
3852
3853 switch(type)
3854 {
3855 case OP_SOD:
3856 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3857 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
3858 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
3859 return cc;
3860
3861 case OP_SOM:
3862 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3863 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
3864 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
3865 return cc;
3866
3867 case OP_NOT_WORD_BOUNDARY:
3868 case OP_WORD_BOUNDARY:
3869 add_jump(compiler, &common->wordboundary, JUMP(SLJIT_FAST_CALL));
3870 add_jump(compiler, backtracks, JUMP(type == OP_NOT_WORD_BOUNDARY ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
3871 return cc;
3872
3873 case OP_NOT_DIGIT:
3874 case OP_DIGIT:
3875 detect_partial_match(common, backtracks);
3876 read_char8_type(common);
3877 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit);
3878 add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
3879 return cc;
3880
3881 case OP_NOT_WHITESPACE:
3882 case OP_WHITESPACE:
3883 detect_partial_match(common, backtracks);
3884 read_char8_type(common);
3885 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space);
3886 add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
3887 return cc;
3888
3889 case OP_NOT_WORDCHAR:
3890 case OP_WORDCHAR:
3891 detect_partial_match(common, backtracks);
3892 read_char8_type(common);
3893 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word);
3894 add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
3895 return cc;
3896
3897 case OP_ANY:
3898 detect_partial_match(common, backtracks);
3899 read_char(common);
3900 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3901 {
3902 jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
3903 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
3904 jump[1] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3905 else
3906 jump[1] = check_str_end(common);
3907
3908 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3909 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff));
3910 if (jump[1] != NULL)
3911 JUMPHERE(jump[1]);
3912 JUMPHERE(jump[0]);
3913 }
3914 else
3915 check_newlinechar(common, common->nltype, backtracks, TRUE);
3916 return cc;
3917
3918 case OP_ALLANY:
3919 detect_partial_match(common, backtracks);
3920 #ifdef SUPPORT_UTF
3921 if (common->utf)
3922 {
3923 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3924 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3925 #ifdef COMPILE_PCRE8
3926 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
3927 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0);
3928 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3929 #else /* COMPILE_PCRE8 */
3930 #ifdef COMPILE_PCRE16
3931 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
3932 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
3933 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3934 COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
3935 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3936 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3937 #endif /* COMPILE_PCRE16 */
3938 #endif /* COMPILE_PCRE8 */
3939 JUMPHERE(jump[0]);
3940 return cc;
3941 }
3942 #endif
3943 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3944 return cc;
3945
3946 case OP_ANYBYTE:
3947 detect_partial_match(common, backtracks);
3948 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3949 return cc;
3950
3951 #ifdef SUPPORT_UTF
3952 #ifdef SUPPORT_UCP
3953 case OP_NOTPROP:
3954 case OP_PROP:
3955 propdata[0] = 0;
3956 propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP;
3957 propdata[2] = cc[0];
3958 propdata[3] = cc[1];
3959 propdata[4] = XCL_END;
3960 compile_xclass_trypath(common, propdata, backtracks);
3961 return cc + 2;
3962 #endif
3963 #endif
3964
3965 case OP_ANYNL:
3966 detect_partial_match(common, backtracks);
3967 read_char(common);
3968 jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
3969 /* We don't need to handle soft partial matching case. */
3970 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
3971 jump[1] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3972 else
3973 jump[1] = check_str_end(common);
3974 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3975 jump[2] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
3976 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3977 jump[3] = JUMP(SLJIT_JUMP);
3978 JUMPHERE(jump[0]);
3979 check_newlinechar(common, common->bsr_nltype, backtracks, FALSE);
3980 JUMPHERE(jump[1]);
3981 JUMPHERE(jump[2]);
3982 JUMPHERE(jump[3]);
3983 return cc;
3984
3985 case OP_NOT_HSPACE:
3986 case OP_HSPACE:
3987 detect_partial_match(common, backtracks);
3988 read_char(common);
3989 add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));
3990 add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
3991 return cc;
3992
3993 case OP_NOT_VSPACE:
3994 case OP_VSPACE:
3995 detect_partial_match(common, backtracks);
3996 read_char(common);
3997 add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));
3998 add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
3999 return cc;
4000
4001 #ifdef SUPPORT_UCP
4002 case OP_EXTUNI:
4003 detect_partial_match(common, backtracks);
4004 read_char(common);
4005 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4006 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Mc);
4007 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS_EQUAL, TMP1, 0, SLJIT_IMM, ucp_Mn - ucp_Mc));
4008
4009 label = LABEL();
4010 jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4011 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
4012 read_char(common);
4013 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4014 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Mc);
4015 CMPTO(SLJIT_C_LESS_EQUAL, TMP1, 0, SLJIT_IMM, ucp_Mn - ucp_Mc, label);
4016
4017 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
4018 JUMPHERE(jump[0]);
4019 if (common->mode == JIT_PARTIAL_HARD_COMPILE)
4020 {
4021 jump[0] = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
4022 /* Since we successfully read a char above, partial matching must occure. */
4023 check_partial(common, TRUE);
4024 JUMPHERE(jump[0]);
4025 }
4026 return cc;
4027 #endif
4028
4029 case OP_EODN:
4030 /* Requires rather complex checks. */
4031 jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4032 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4033 {
4034 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4035 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4036 if (common->mode == JIT_COMPILE)
4037 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0));
4038 else
4039 {
4040 jump[1] = CMP(SLJIT_C_EQUAL, TMP2, 0, STR_END, 0);
4041 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
4042 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS);
4043 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
4044 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_NOT_EQUAL);
4045 add_jump(compiler, backtracks, JUMP(SLJIT_C_NOT_EQUAL));
4046 check_partial(common, TRUE);
4047 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4048 JUMPHERE(jump[1]);
4049 }
4050 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4051 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4052 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
4053 }
4054 else if (common->nltype == NLTYPE_FIXED)
4055 {
4056 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4057 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4058 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0));
4059 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
4060 }
4061 else
4062 {
4063 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4064 jump[1] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
4065 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4066 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
4067 jump[2] = JUMP(SLJIT_C_GREATER);
4068 add_jump(compiler, backtracks, JUMP(SLJIT_C_LESS));
4069 /* Equal. */
4070 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4071 jump[3] = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
4072 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4073
4074 JUMPHERE(jump[1]);
4075 if (common->nltype == NLTYPE_ANYCRLF)
4076 {
4077 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4078 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP2, 0, STR_END, 0));
4079 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
4080 }
4081 else
4082 {
4083 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, STR_PTR, 0);
4084 read_char(common);
4085 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
4086 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
4087 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
4088 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
4089 }
4090 JUMPHERE(jump[2]);
4091 JUMPHERE(jump[3]);
4092 }
4093 JUMPHERE(jump[0]);
4094 check_partial(common, FALSE);
4095 return cc;
4096
4097 case OP_EOD:
4098 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0));
4099 check_partial(common, FALSE);
4100 return cc;
4101
4102 case OP_CIRC:
4103 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4104 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
4105 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0));
4106 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
4107 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4108 return cc;
4109
4110 case OP_CIRCM:
4111 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4112 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
4113 jump[1] = CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0);
4114 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
4115 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4116 jump[0] = JUMP(SLJIT_JUMP);
4117 JUMPHERE(jump[1]);
4118
4119 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4120 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4121 {
4122 OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4123 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP2, 0, TMP1, 0));
4124 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4125 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
4126 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4127 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
4128 }
4129 else
4130 {
4131 skip_char_back(common);
4132 read_char(common);
4133 check_newlinechar(common, common->nltype, backtracks, FALSE);
4134 }
4135 JUMPHERE(jump[0]);
4136 return cc;
4137
4138 case OP_DOLL:
4139 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4140 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
4141 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4142
4143 if (!common->endonly)
4144 compile_char1_trypath(common, OP_EODN, cc, backtracks);
4145 else
4146 {
4147 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0));
4148 check_partial(common, FALSE);
4149 }
4150 return cc;
4151
4152 case OP_DOLLM:
4153 jump[1] = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
4154 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4155 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
4156 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4157 check_partial(common, FALSE);
4158 jump[0] = JUMP(SLJIT_JUMP);
4159 JUMPHERE(jump[1]);
4160
4161 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4162 {
4163 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4164 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4165 if (common->mode == JIT_COMPILE)
4166 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, TMP2, 0, STR_END, 0));
4167 else
4168 {
4169 jump[1] = CMP(SLJIT_C_LESS_EQUAL, TMP2, 0, STR_END, 0);
4170 /* STR_PTR = STR_END - IN_UCHARS(1) */
4171 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4172 check_partial(common, TRUE);
4173 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4174 JUMPHERE(jump[1]);
4175 }
4176
4177 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4178 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4179 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
4180 }
4181 else
4182 {
4183 peek_char(common);
4184 check_newlinechar(common, common->nltype, backtracks, FALSE);
4185 }
4186 JUMPHERE(jump[0]);
4187 return cc;
4188
4189 case OP_CHAR:
4190 case OP_CHARI:
4191 length = 1;
4192 #ifdef SUPPORT_UTF
4193 if (common->utf && HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc);
4194 #endif
4195 if (common->mode == JIT_COMPILE && (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0))
4196 {
4197 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
4198 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
4199
4200 context.length = IN_UCHARS(length);
4201 context.sourcereg = -1;
4202 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
4203 context.ucharptr = 0;
4204 #endif
4205 return byte_sequence_compare(common, type == OP_CHARI, cc, &context, backtracks);
4206 }
4207 detect_partial_match(common, backtracks);
4208 read_char(common);
4209 #ifdef SUPPORT_UTF
4210 if (common->utf)
4211 {
4212 GETCHAR(c, cc);
4213 }
4214 else
4215 #endif
4216 c = *cc;
4217 if (type == OP_CHAR || !char_has_othercase(common, cc))
4218 {
4219 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c));
4220 return cc + length;
4221 }
4222 oc = char_othercase(common, c);
4223 bit = c ^ oc;
4224 if (ispowerof2(bit))
4225 {
4226 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
4227 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
4228 return cc + length;
4229 }
4230 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c);
4231 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
4232 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, char_othercase(common, c));
4233 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
4234 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
4235 return cc + length;
4236
4237 case OP_NOT:
4238 case OP_NOTI:
4239 detect_partial_match(common, backtracks);
4240 length = 1;
4241 #ifdef SUPPORT_UTF
4242 if (common->utf)
4243 {
4244 #ifdef COMPILE_PCRE8
4245 c = *cc;
4246 if (c < 128)
4247 {
4248 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4249 if (type == OP_NOT || !char_has_othercase(common, cc))
4250 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
4251 else
4252 {
4253 /* Since UTF8 code page is fixed, we know that c is in [a-z] or [A-Z] range. */
4254 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x20);
4255 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20));
4256 }
4257 /* Skip the variable-length character. */
4258 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4259 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
4260 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0);
4261 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4262 JUMPHERE(jump[0]);
4263 return cc + 1;
4264 }
4265 else
4266 #endif /* COMPILE_PCRE8 */
4267 {
4268 GETCHARLEN(c, cc, length);
4269 read_char(common);
4270 }
4271 }
4272 else
4273 #endif /* SUPPORT_UTF */
4274 {
4275 read_char(common);
4276 c = *cc;
4277 }
4278
4279 if (type == OP_NOT || !char_has_othercase(common, cc))
4280 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
4281 else
4282 {
4283 oc = char_othercase(common, c);
4284 bit = c ^ oc;
4285 if (ispowerof2(bit))
4286 {
4287 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
4288 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
4289 }
4290 else
4291 {
4292 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
4293 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, oc));
4294 }
4295 }
4296 return cc + length;
4297
4298 case OP_CLASS:
4299 case OP_NCLASS:
4300 detect_partial_match(common, backtracks);
4301 read_char(common);
4302 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
4303 jump[0] = NULL;
4304 #ifdef COMPILE_PCRE8
4305 /* This check only affects 8 bit mode. In other modes, we
4306 always need to compare the value with 255. */
4307 if (common->utf)
4308 #endif /* COMPILE_PCRE8 */
4309 {
4310 jump[0] = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
4311 if (type == OP_CLASS)
4312 {
4313 add_jump(compiler, backtracks, jump[0]);
4314 jump[0] = NULL;
4315 }
4316 }
4317 #endif /* SUPPORT_UTF || !COMPILE_PCRE8 */
4318 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
4319 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
4320 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)cc);
4321 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4322 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4323 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
4324 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
4325 if (jump[0] != NULL)
4326 JUMPHERE(jump[0]);
4327 #endif /* SUPPORT_UTF || !COMPILE_PCRE8 */
4328 return cc + 32 / sizeof(pcre_uchar);
4329
4330 #if defined SUPPORT_UTF || defined COMPILE_PCRE16
4331 case OP_XCLASS:
4332 compile_xclass_trypath(common, cc + LINK_SIZE, backtracks);
4333 return cc + GET(cc, 0) - 1;
4334 #endif
4335
4336 case OP_REVERSE:
4337 length = GET(cc, 0);
4338 if (length == 0)
4339 return cc + LINK_SIZE;
4340 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4341 #ifdef SUPPORT_UTF
4342 if (common->utf)
4343 {
4344 OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4345 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, length);
4346 label = LABEL();
4347 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP3, 0));
4348 skip_char_back(common);
4349 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
4350 JUMPTO(SLJIT_C_NOT_ZERO, label);
4351 }
4352 else
4353 #endif
4354 {
4355 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4356 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
4357 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, TMP1, 0));
4358 }
4359 check_start_used_ptr(common);
4360 return cc + LINK_SIZE;
4361 }
4362 SLJIT_ASSERT_STOP();
4363 return cc;
4364 }
4365
4366 static SLJIT_INLINE pcre_uchar *compile_charn_trypath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, jump_list **backtracks)
4367 {
4368 /* This function consumes at least one input character. */
4369 /* To decrease the number of length checks, we try to concatenate the fixed length character sequences. */
4370 DEFINE_COMPILER;
4371 pcre_uchar *ccbegin = cc;
4372 compare_context context;
4373 int size;
4374
4375 context.length = 0;
4376 do
4377 {
4378 if (cc >= ccend)
4379 break;
4380
4381 if (*cc == OP_CHAR)
4382 {
4383 size = 1;
4384 #ifdef SUPPORT_UTF
4385 if (common->utf && HAS_EXTRALEN(cc[1]))
4386 size += GET_EXTRALEN(cc[1]);
4387 #endif
4388 }
4389 else if (*cc == OP_CHARI)
4390 {
4391 size = 1;
4392 #ifdef SUPPORT_UTF
4393 if (common->utf)
4394 {
4395 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
4396 size = 0;
4397 else if (HAS_EXTRALEN(cc[1]))
4398 size += GET_EXTRALEN(cc[1]);
4399 }
4400 else
4401 #endif
4402 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
4403 size = 0;
4404 }
4405 else
4406 size = 0;
4407
4408 cc += 1 + size;
4409 context.length += IN_UCHARS(size);
4410 }
4411 while (size > 0 && context.length <= 128);
4412
4413 cc = ccbegin;
4414 if (context.length > 0)
4415 {
4416 /* We have a fixed-length byte sequence. */
4417 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, context.length);
4418 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
4419
4420 context.sourcereg = -1;
4421 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
4422 context.ucharptr = 0;
4423 #endif
4424 do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, backtracks); while (context.length > 0);
4425 return cc;
4426 }
4427
4428 /* A non-fixed length character will be checked if length == 0. */
4429 return compile_char1_trypath(common, *cc, cc + 1, backtracks);
4430 }
4431
4432 static struct sljit_jump *compile_ref_checks(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
4433 {
4434 DEFINE_COMPILER;
4435 int offset = GET2(cc, 1) << 1;
4436
4437 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
4438 if (!common->jscript_compat)
4439 {
4440 if (backtracks == NULL)
4441 {
4442 /* OVECTOR(1) contains the "string begin - 1" constant. */
4443 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
4444 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
4445 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
4446 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
4447 return JUMP(SLJIT_C_NOT_ZERO);
4448 }
4449 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
4450 }
4451 return CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
4452 }
4453
4454 /* Forward definitions. */
4455 static void compile_trypath(compiler_common *, pcre_uchar *, pcre_uchar *, backtrack_common *);
4456 static void compile_backtrackpath(compiler_common *, struct backtrack_common *);
4457
4458 #define PUSH_BACKTRACK(size, ccstart, error) \
4459 do \
4460 { \
4461 backtrack = sljit_alloc_memory(compiler, (size)); \
4462 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
4463 return error; \
4464 memset(backtrack, 0, size); \
4465 backtrack->prev = parent->top; \
4466 backtrack->cc = (ccstart); \
4467 parent->top = backtrack; \
4468 } \
4469 while (0)
4470
4471 #define PUSH_BACKTRACK_NOVALUE(size, ccstart) \
4472 do \
4473 { \
4474 backtrack = sljit_alloc_memory(compiler, (size)); \
4475 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
4476 return; \
4477 memset(backtrack, 0, size); \
4478 backtrack->prev = parent->top; \
4479 backtrack->cc = (ccstart); \
4480 parent->top = backtrack; \
4481 } \
4482 while (0)
4483
4484 #define BACKTRACK_AS(type) ((type *)backtrack)
4485
4486 static pcre_uchar *compile_ref_trypath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks, BOOL withchecks, BOOL emptyfail)
4487 {
4488 DEFINE_COMPILER;
4489 int offset = GET2(cc, 1) << 1;
4490 struct sljit_jump *jump = NULL;
4491 struct sljit_jump *partial;
4492 struct sljit_jump *nopartial;
4493
4494 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
4495 /* OVECTOR(1) contains the "string begin - 1" constant. */
4496 if (withchecks && !common->jscript_compat)
4497 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
4498
4499 #if defined SUPPORT_UTF && defined SUPPORT_UCP
4500 if (common->utf && *cc == OP_REFI)
4501 {
4502 SLJIT_ASSERT(TMP1 == SLJIT_TEMPORARY_REG1 && STACK_TOP == SLJIT_TEMPORARY_REG2 && TMP2 == SLJIT_TEMPORARY_REG3);
4503 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
4504 if (withchecks)
4505 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, TMP2, 0);
4506
4507 /* Needed to save important temporary registers. */
4508 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
4509 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG2, 0, ARGUMENTS, 0);
4510 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), SLJIT_OFFSETOF(jit_arguments, uchar_ptr), STR_PTR, 0);
4511 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_utf_caselesscmp));
4512 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
4513 if (common->mode == JIT_COMPILE)
4514 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1));
4515 else
4516 {
4517 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
4518 nopartial = CMP(SLJIT_C_NOT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
4519 check_partial(common, FALSE);
4520 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4521 JUMPHERE(nopartial);
4522 }
4523 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
4524 }
4525 else
4526 #endif /* SUPPORT_UTF && SUPPORT_UCP */
4527 {
4528 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP1, 0);
4529 if (withchecks)
4530 jump = JUMP(SLJIT_C_ZERO);
4531
4532 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4533 partial = CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0);
4534 if (common->mode == JIT_COMPILE)
4535 add_jump(compiler, backtracks, partial);
4536
4537 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
4538 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4539
4540 if (common->mode != JIT_COMPILE)
4541 {
4542 nopartial = JUMP(SLJIT_JUMP);
4543 JUMPHERE(partial);
4544 /* TMP2 -= STR_END - STR_PTR */
4545 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, STR_PTR, 0);
4546 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, STR_END, 0);
4547 partial = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0);
4548 OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
4549 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
4550 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4551 JUMPHERE(partial);
4552 check_partial(common, FALSE);
4553 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4554 JUMPHERE(nopartial);
4555 }
4556 }
4557
4558 if (jump != NULL)
4559 {
4560 if (emptyfail)
4561 add_jump(compiler, backtracks, jump);
4562 else
4563 JUMPHERE(jump);
4564 }
4565 return cc + 1 + IMM2_SIZE;
4566 }
4567
4568 static SLJIT_INLINE pcre_uchar *compile_ref_iterator_trypath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
4569 {
4570 DEFINE_COMPILER;
4571 backtrack_common *backtrack;
4572 pcre_uchar type;
4573 struct sljit_label *label;
4574 struct sljit_jump *zerolength;
4575 struct sljit_jump *jump = NULL;
4576 pcre_uchar *ccbegin = cc;
4577 int min = 0, max = 0;
4578 BOOL minimize;
4579
4580 PUSH_BACKTRACK(sizeof(iterator_backtrack), cc, NULL);
4581
4582 type = cc[1 + IMM2_SIZE];
4583 minimize = (type & 0x1) != 0;
4584 switch(type)
4585 {
4586 case OP_CRSTAR:
4587 case OP_CRMINSTAR:
4588 min = 0;
4589 max = 0;
4590 cc += 1 + IMM2_SIZE + 1;
4591 break;
4592 case OP_CRPLUS:
4593 case OP_CRMINPLUS:
4594 min = 1;
4595 max = 0;
4596 cc += 1 + IMM2_SIZE + 1;
4597 break;
4598 case OP_CRQUERY:
4599 case OP_CRMINQUERY:
4600 min = 0;
4601 max = 1;
4602 cc += 1 + IMM2_SIZE + 1;
4603 break;
4604 case OP_CRRANGE:
4605 case OP_CRMINRANGE:
4606 min = GET2(cc, 1 + IMM2_SIZE + 1);
4607 max = GET2(cc, 1 + IMM2_SIZE + 1 + IMM2_SIZE);
4608 cc += 1 + IMM2_SIZE + 1 + 2 * IMM2_SIZE;
4609 break;
4610 default:
4611 SLJIT_ASSERT_STOP();
4612 break;
4613 }
4614
4615 if (!minimize)
4616 {
4617 if (min == 0)
4618 {
4619 allocate_stack(common, 2);
4620 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4621 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
4622 /* Temporary release of STR_PTR. */
4623 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
4624 zerolength = compile_ref_checks(common, ccbegin, NULL);
4625 /* Restore if not zero length. */
4626 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
4627 }
4628 else
4629 {
4630 allocate_stack(common, 1);
4631 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
4632 zerolength = compile_ref_checks(common, ccbegin, &backtrack->topbacktracks);
4633 }
4634
4635 if (min > 1 || max > 1)
4636 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 0);
4637
4638 label = LABEL();
4639 compile_ref_trypath(common, ccbegin, &backtrack->topbacktracks, FALSE, FALSE);
4640
4641 if (min > 1 || max > 1)
4642 {
4643 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
4644 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4645 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0);
4646 if (min > 1)
4647 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, label);
4648 if (max > 1)
4649 {
4650 jump = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max);
4651 allocate_stack(common, 1);
4652 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4653 JUMPTO(SLJIT_JUMP, label);
4654 JUMPHERE(jump);
4655 }
4656 }
4657
4658 if (max == 0)
4659 {
4660 /* Includes min > 1 case as well. */
4661 allocate_stack(common, 1);
4662 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4663 JUMPTO(SLJIT_JUMP, label);
4664 }
4665
4666 JUMPHERE(zerolength);
4667 BACKTRACK_AS(iterator_backtrack)->trypath = LABEL();
4668
4669 decrease_call_count(common);
4670 return cc;
4671 }
4672
4673 allocate_stack(common, 2);
4674 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
4675 if (type != OP_CRMINSTAR)
4676 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
4677
4678 if (min == 0)
4679 {
4680 zerolength = compile_ref_checks(common, ccbegin, NULL);
4681 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4682 jump = JUMP(SLJIT_JUMP);
4683 }
4684 else
4685 zerolength = compile_ref_checks(common, ccbegin, &backtrack->topbacktracks);
4686
4687 BACKTRACK_AS(iterator_backtrack)->trypath = LABEL();
4688 if (max > 0)
4689 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max));
4690
4691 compile_ref_trypath(common, ccbegin, &backtrack->topbacktracks, TRUE, TRUE);
4692 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4693
4694 if (min > 1)
4695 {
4696 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
4697 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4698 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
4699 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, BACKTRACK_AS(iterator_backtrack)->trypath);
4700 }
4701 else if (max > 0)
4702 OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
4703
4704 if (jump != NULL)
4705 JUMPHERE(jump);
4706 JUMPHERE(zerolength);
4707
4708 decrease_call_count(common);
4709 return cc;
4710 }
4711
4712 static SLJIT_INLINE pcre_uchar *compile_recurse_trypath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
4713 {
4714 DEFINE_COMPILER;
4715 backtrack_common *backtrack;
4716 recurse_entry *entry = common->entries;
4717 recurse_entry *prev = NULL;
4718 int start = GET(cc, 1);
4719
4720 PUSH_BACKTRACK(sizeof(recurse_backtrack), cc, NULL);
4721 while (entry != NULL)
4722 {
4723 if (entry->start == start)
4724 break;
4725 prev = entry;
4726 entry = entry->next;
4727 }
4728
4729 if (entry == NULL)
4730 {
4731 entry = sljit_alloc_memory(compiler, sizeof(recurse_entry));
4732 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
4733 return NULL;
4734 entry->next = NULL;
4735 entry->entry = NULL;
4736 entry->calls = NULL;
4737 entry->start = start;
4738
4739 if (prev != NULL)
4740 prev->next = entry;
4741 else
4742 common->entries = entry;
4743 }
4744
4745 if (common->has_set_som && common->mark_ptr != 0)
4746 {
4747 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
4748 allocate_stack(common, 2);
4749 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
4750 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
4751 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
4752 }
4753 else if (common->has_set_som || common->mark_ptr != 0)
4754 {
4755 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->has_set_som ? (int)(OVECTOR(0)) : common->mark_ptr);
4756 allocate_stack(common, 1);
4757 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
4758 }
4759
4760 if (entry->entry == NULL)
4761 add_jump(compiler, &entry->calls, JUMP(SLJIT_FAST_CALL));
4762 else
4763 JUMPTO(SLJIT_FAST_CALL, entry->entry);
4764 /* Leave if the match is failed. */
4765 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0));
4766 return cc + 1 + LINK_SIZE;
4767 }
4768
4769 static pcre_uchar *compile_assert_trypath(compiler_common *common, pcre_uchar *cc, assert_backtrack *backtrack, BOOL conditional)
4770 {
4771 DEFINE_COMPILER;
4772 int framesize;
4773 int localptr;
4774 backtrack_common altbacktrack;
4775 pcre_uchar *ccbegin;
4776 pcre_uchar opcode;
4777 pcre_uchar bra = OP_BRA;
4778 jump_list *tmp = NULL;
4779 jump_list **target = (conditional) ? &backtrack->condfailed : &backtrack->common.topbacktracks;
4780 jump_list **found;
4781 /* Saving previous accept variables. */
4782 struct sljit_label *save_leavelabel = common->leavelabel;
4783 struct sljit_label *save_acceptlabel = common->acceptlabel;
4784 jump_list *save_leave = common->leave;
4785 jump_list *save_accept = common->accept;
4786 struct sljit_jump *jump;
4787 struct sljit_jump *brajump = NULL;
4788
4789 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
4790 {
4791 SLJIT_ASSERT(!conditional);
4792 bra = *cc;
4793 cc++;
4794 }
4795 localptr = PRIV_DATA(cc);
4796 SLJIT_ASSERT(localptr != 0);
4797 framesize = get_framesize(common, cc, FALSE);
4798 backtrack->framesize = framesize;
4799 backtrack->localptr = localptr;
4800 opcode = *cc;
4801 SLJIT_ASSERT(opcode >= OP_ASSERT && opcode <= OP_ASSERTBACK_NOT);
4802 found = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) ? &tmp : target;
4803 ccbegin = cc;
4804 cc += GET(cc, 1);
4805
4806 if (bra == OP_BRAMINZERO)
4807 {
4808 /* This is a braminzero backtrack path. */
4809 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
4810 free_stack(common, 1);
4811 brajump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
4812 }
4813
4814 if (framesize < 0)
4815 {
4816 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STACK_TOP, 0);
4817 allocate_stack(common, 1);
4818 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4819 }
4820 else
4821 {
4822 allocate_stack(common, framesize + 2);
4823 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4824 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(framesize + 1));
4825 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, TMP2, 0);
4826 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4827 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
4828 init_frame(common, ccbegin, framesize + 1, 2, FALSE);
4829 }
4830
4831 memset(&altbacktrack, 0, sizeof(backtrack_common));
4832 common->leavelabel = NULL;
4833 common->leave = NULL;
4834 while (1)
4835 {
4836 common->acceptlabel = NULL;
4837 common->accept = NULL;
4838 altbacktrack.top = NULL;
4839 altbacktrack.topbacktracks = NULL;
4840
4841 if (*ccbegin == OP_ALT)
4842 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
4843
4844 altbacktrack.cc = ccbegin;
4845 compile_trypath(common, ccbegin + 1 + LINK_SIZE, cc, &altbacktrack);
4846 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
4847 {
4848 common->leavelabel = save_leavelabel;
4849 common->acceptlabel = save_acceptlabel;
4850 common->leave = save_leave;
4851 common->accept = save_accept;
4852 return NULL;
4853 }
4854 common->acceptlabel = LABEL();
4855 if (common->accept != NULL)
4856 set_jumps(common->accept, common->acceptlabel);
4857
4858 /* Reset stack. */
4859 if (framesize < 0)
4860 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4861 else {
4862 if ((opcode != OP_ASSERT_NOT && opcode != OP_ASSERTBACK_NOT) || conditional)
4863 {
4864 /* We don't need to keep the STR_PTR, only the previous localptr. */
4865 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_w));
4866 }
4867 else
4868 {
4869 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4870 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
4871 }
4872 }
4873
4874 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
4875 {
4876 /* We know that STR_PTR was stored on the top of the stack. */
4877 if (conditional)
4878 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
4879 else if (bra == OP_BRAZERO)
4880 {
4881 if (framesize < 0)
4882 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
4883 else
4884 {
4885 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_w));
4886 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (framesize + 1) * sizeof(sljit_w));
4887 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, TMP1, 0);
4888 }
4889 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
4890 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
4891 }
4892 else if (framesize >= 0)
4893 {
4894 /* For OP_BRA and OP_BRAMINZERO. */
4895 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_w));
4896 }
4897 }
4898 add_jump(compiler, found, JUMP(SLJIT_JUMP));
4899
4900 compile_backtrackpath(common, altbacktrack.top);
4901 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
4902 {
4903 common->leavelabel = save_leavelabel;
4904 common->acceptlabel = save_acceptlabel;
4905 common->leave = save_leave;
4906 common->accept = save_accept;
4907 return NULL;
4908 }
4909 set_jumps(altbacktrack.topbacktracks, LABEL());
4910
4911 if (*cc != OP_ALT)
4912 break;
4913
4914 ccbegin = cc;
4915 cc += GET(cc, 1);
4916 }
4917 /* None of them matched. */
4918 if (common->leave != NULL)
4919 set_jumps(common->leave, LABEL());
4920
4921 if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK)
4922 {
4923 /* Assert is failed. */
4924 if (conditional || bra == OP_BRAZERO)
4925 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
4926
4927 if (framesize < 0)
4928 {
4929 /* The topmost item should be 0. */
4930 if (bra == OP_BRAZERO)
4931 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
4932 else
4933 free_stack(common, 1);
4934 }
4935 else
4936 {
4937 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
4938 /* The topmost item should be 0. */
4939 if (bra == OP_BRAZERO)
4940 {
4941 free_stack(common, framesize + 1);
4942 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
4943 }
4944 else
4945 free_stack(common, framesize + 2);
4946 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, TMP1, 0);
4947 }
4948 jump = JUMP(SLJIT_JUMP);
4949 if (bra != OP_BRAZERO)
4950 add_jump(compiler, target, jump);
4951
4952 /* Assert is successful. */
4953 set_jumps(tmp, LABEL());
4954 if (framesize < 0)
4955 {
4956 /* We know that STR_PTR was stored on the top of the stack. */
4957 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
4958 /* Keep the STR_PTR on the top of the stack. */
4959 if (bra == OP_BRAZERO)
4960 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
4961 else if (bra == OP_BRAMINZERO)
4962 {
4963 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
4964 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
4965 }
4966 }
4967 else
4968 {
4969 if (bra == OP_BRA)
4970 {
4971 /* We don't need to keep the STR_PTR, only the previous localptr. */
4972 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_w));
4973 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
4974 }
4975 else
4976 {
4977 /* We don't need to keep the STR_PTR, only the previous localptr. */
4978 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_IMM, (framesize + 2) * sizeof(sljit_w));
4979 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
4980 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), bra == OP_BRAZERO ? STR_PTR : SLJIT_IMM, 0);
4981 }
4982 }
4983
4984 if (bra == OP_BRAZERO)
4985 {
4986 backtrack->trypath = LABEL();
4987 sljit_set_label(jump, backtrack->trypath);
4988 }
4989 else if (bra == OP_BRAMINZERO)
4990 {
4991 JUMPTO(SLJIT_JUMP, backtrack->trypath);
4992 JUMPHERE(brajump);
4993 if (framesize >= 0)
4994 {
4995 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4996 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
4997 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_w));
4998 }
4999 set_jumps(backtrack->common.topbacktracks, LABEL());
5000 }
5001 }
5002 else
5003 {
5004 /* AssertNot is successful. */
5005 if (framesize < 0)
5006 {
5007 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5008 if (bra != OP_BRA)
5009 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5010 else
5011 free_stack(common, 1);
5012 }
5013 else
5014 {
5015 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5016 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5017 /* The topmost item should be 0. */
5018 if (bra != OP_BRA)
5019 {
5020 free_stack(common, framesize + 1);
5021 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5022 }
5023 else
5024 free_stack(common, framesize + 2);
5025 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, TMP1, 0);
5026 }
5027
5028 if (bra == OP_BRAZERO)
5029 backtrack->trypath = LABEL();
5030 else if (bra == OP_BRAMINZERO)
5031 {
5032 JUMPTO(SLJIT_JUMP, backtrack->trypath);
5033 JUMPHERE(brajump);
5034 }
5035
5036 if (bra != OP_BRA)
5037 {
5038 SLJIT_ASSERT(found == &backtrack->common.topbacktracks);
5039 set_jumps(backtrack->common.topbacktracks, LABEL());
5040 backtrack->common.topbacktracks = NULL;
5041 }
5042 }
5043
5044 common->leavelabel = save_leavelabel;
5045 common->acceptlabel = save_acceptlabel;
5046 common->leave = save_leave;
5047 common->accept = save_accept;
5048 return cc + 1 + LINK_SIZE;
5049 }
5050
5051 static sljit_w SLJIT_CALL do_searchovector(sljit_w refno, sljit_w* locals, pcre_uchar *name_table)
5052 {
5053 int condition = FALSE;
5054 pcre_uchar *slotA = name_table;
5055 pcre_uchar *slotB;
5056 sljit_w name_count = locals[LOCALS0 / sizeof(sljit_w)];
5057 sljit_w name_entry_size = locals[LOCALS1 / sizeof(sljit_w)];
5058 sljit_w no_capture;
5059 int i;
5060
5061 locals += refno & 0xff;
5062 refno >>= 8;
5063 no_capture = locals[1];
5064
5065 for (i = 0; i < name_count; i++)
5066 {
5067 if (GET2(slotA, 0) == refno) break;
5068 slotA += name_entry_size;
5069 }
5070
5071 if (i < name_count)
5072 {
5073 /* Found a name for the number - there can be only one; duplicate names
5074 for different numbers are allowed, but not vice versa. First scan down
5075 for duplicates. */
5076
5077 slotB = slotA;
5078 while (slotB > name_table)
5079 {
5080 slotB -= name_entry_size;
5081 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
5082 {
5083 condition = locals[GET2(slotB, 0) << 1] != no_capture;
5084 if (condition) break;
5085 }
5086 else break;
5087 }
5088
5089 /* Scan up for duplicates */
5090 if (!condition)
5091 {
5092 slotB = slotA;
5093 for (i++; i < name_count; i++)
5094 {
5095 slotB += name_entry_size;
5096 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
5097 {
5098 condition = locals[GET2(slotB, 0) << 1] != no_capture;
5099 if (condition) break;
5100 }
5101 else break;
5102 }
5103 }
5104 }
5105 return condition;
5106 }
5107
5108 static sljit_w SLJIT_CALL do_searchgroups(sljit_w recno, sljit_w* locals, pcre_uchar *name_table)
5109 {
5110 int condition = FALSE;
5111 pcre_uchar *slotA = name_table;
5112 pcre_uchar *slotB;
5113 sljit_w name_count = locals[LOCALS0 / sizeof(sljit_w)];
5114 sljit_w name_entry_size = locals[LOCALS1 / sizeof(sljit_w)];
5115 sljit_w group_num = locals[POSSESSIVE0 / sizeof(sljit_w)];
5116 int i;
5117
5118 for (i = 0; i < name_count; i++)
5119 {
5120 if (GET2(slotA, 0) == recno) break;
5121 slotA += name_entry_size;
5122 }
5123
5124 if (i < name_count)
5125 {
5126 /* Found a name for the number - there can be only one; duplicate
5127 names for different numbers are allowed, but not vice versa. First
5128 scan down for duplicates. */
5129
5130 slotB = slotA;
5131 while (slotB > name_table)
5132 {
5133 slotB -= name_entry_size;
5134 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
5135 {
5136 condition = GET2(slotB, 0) == group_num;
5137 if (condition) break;
5138 }
5139 else break;
5140 }
5141
5142 /* Scan up for duplicates */
5143 if (!condition)
5144 {
5145 slotB = slotA;
5146 for (i++; i < name_count; i++)
5147 {
5148 slotB += name_entry_size;
5149 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
5150 {
5151 condition = GET2(slotB, 0) == group_num;
5152 if (condition) break;
5153 }
5154 else break;
5155 }
5156 }
5157 }
5158 return condition;
5159 }
5160
5161 /*
5162 Handling bracketed expressions is probably the most complex part.
5163
5164 Stack layout naming characters:
5165 S - Push the current STR_PTR
5166 0 - Push a 0 (NULL)
5167 A - Push the current STR_PTR. Needed for restoring the STR_PTR
5168 before the next alternative. Not pushed if there are no alternatives.
5169 M - Any values pushed by the current alternative. Can be empty, or anything.
5170 C - Push the previous OVECTOR(i), OVECTOR(i+1) and OVECTOR_PRIV(i) to the stack.
5171 L - Push the previous local (pointed by localptr) to the stack
5172 () - opional values stored on the stack
5173 ()* - optonal, can be stored multiple times
5174
5175 The following list shows the regular expression templates, their PCRE byte codes
5176 and stack layout supported by pcre-sljit.
5177
5178 (?:) OP_BRA | OP_KET A M
5179 () OP_CBRA | OP_KET C M
5180 (?:)+ OP_BRA | OP_KETRMAX 0 A M S ( A M S )*
5181 OP_SBRA | OP_KETRMAX 0 L M S ( L M S )*
5182 (?:)+? OP_BRA | OP_KETRMIN 0 A M S ( A M S )*
5183 OP_SBRA | OP_KETRMIN 0 L M S ( L M S )*
5184 ()+ OP_CBRA | OP_KETRMAX 0 C M S ( C M S )*
5185 OP_SCBRA | OP_KETRMAX 0 C M S ( C M S )*
5186 ()+? OP_CBRA | OP_KETRMIN 0 C M S ( C M S )*
5187 OP_SCBRA | OP_KETRMIN 0 C M S ( C M S )*
5188 (?:)? OP_BRAZERO | OP_BRA | OP_KET S ( A M 0 )
5189 (?:)?? OP_BRAMINZERO | OP_BRA | OP_KET S ( A M 0 )
5190 ()? OP_BRAZERO | OP_CBRA | OP_KET S ( C M 0 )
5191 ()?? OP_BRAMINZERO | OP_CBRA | OP_KET S ( C M 0 )
5192 (?:)* OP_BRAZERO | OP_BRA | OP_KETRMAX S 0 ( A M S )*
5193 OP_BRAZERO | OP_SBRA | OP_KETRMAX S 0 ( L M S )*
5194 (?:)*? OP_BRAMINZERO | OP_BRA | OP_KETRMIN S 0 ( A M S )*
5195 OP_BRAMINZERO | OP_SBRA | OP_KETRMIN S 0 ( L M S )*
5196 ()* OP_BRAZERO | OP_CBRA | OP_KETRMAX S 0 ( C M S )*
5197 OP_BRAZERO | OP_SCBRA | OP_KETRMAX S 0 ( C M S )*
5198 ()*? OP_BRAMINZERO | OP_CBRA | OP_KETRMIN S 0 ( C M S )*
5199 OP_BRAMINZERO | OP_SCBRA | OP_KETRMIN S 0 ( C M S )*
5200
5201
5202 Stack layout naming characters:
5203 A - Push the alternative index (starting from 0) on the stack.
5204 Not pushed if there is no alternatives.
5205 M - Any values pushed by the current alternative. Can be empty, or anything.
5206
5207 The next list shows the possible content of a bracket:
5208 (|) OP_*BRA | OP_ALT ... M A
5209 (?()|) OP_*COND | OP_ALT M A
5210 (?>|) OP_ONCE | OP_ALT ... [stack trace] M A
5211 (?>|) OP_ONCE_NC | OP_ALT ... [stack trace] M A
5212 Or nothing, if trace is unnecessary
5213 */
5214
5215 static pcre_uchar *compile_bracket_trypath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
5216 {
5217 DEFINE_COMPILER;
5218 backtrack_common *backtrack;
5219 pcre_uchar opcode;
5220 int localptr = 0;
5221 int offset = 0;
5222 int stacksize;
5223 pcre_uchar *ccbegin;
5224 pcre_uchar *trypath;
5225 pcre_uchar bra = OP_BRA;
5226 pcre_uchar ket;
5227 assert_backtrack *assert;
5228 BOOL has_alternatives;
5229 struct sljit_jump *jump;
5230 struct sljit_jump *skip;
5231 struct sljit_label *rmaxlabel = NULL;
5232 struct sljit_jump *braminzerojump = NULL;
5233
5234 PUSH_BACKTRACK(sizeof(bracket_backtrack), cc, NULL);
5235
5236 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
5237 {
5238 bra = *cc;
5239 cc++;
5240 opcode = *cc;
5241 }
5242
5243 opcode = *cc;
5244 ccbegin = cc;
5245 trypath = ccbegin + 1 + LINK_SIZE;
5246
5247 if ((opcode == OP_COND || opcode == OP_SCOND) && cc[1 + LINK_SIZE] == OP_DEF)
5248 {
5249 /* Drop this bracket_backtrack. */
5250 parent->top = backtrack->prev;
5251 return bracketend(cc);
5252 }
5253
5254 ket = *(bracketend(cc) - 1 - LINK_SIZE);
5255 SLJIT_ASSERT(ket == OP_KET || ket == OP_KETRMAX || ket == OP_KETRMIN);
5256 SLJIT_ASSERT(!((bra == OP_BRAZERO && ket == OP_KETRMIN) || (bra == OP_BRAMINZERO && ket == OP_KETRMAX)));
5257 cc += GET(cc, 1);
5258
5259 has_alternatives = *cc == OP_ALT;
5260 if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
5261 {
5262 has_alternatives = (*trypath == OP_RREF) ? FALSE : TRUE;
5263 if (*trypath == OP_NRREF)
5264 {
5265 stacksize = GET2(trypath, 1);
5266 if (common->currententry == NULL || stacksize == RREF_ANY)
5267 has_alternatives = FALSE;
5268 else if (common->currententry->start == 0)
5269 has_alternatives = stacksize != 0;
5270 else
5271 has_alternatives = stacksize != GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
5272 }
5273 }
5274
5275 if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
5276 opcode = OP_SCOND;
5277 if (SLJIT_UNLIKELY(opcode == OP_ONCE_NC))
5278 opcode = OP_ONCE;
5279
5280 if (opcode == OP_CBRA || opcode == OP_SCBRA)
5281 {
5282 /* Capturing brackets has a pre-allocated space. */
5283 offset = GET2(ccbegin, 1 + LINK_SIZE);
5284 localptr = OVECTOR_PRIV(offset);
5285 offset <<= 1;
5286 BACKTRACK_AS(bracket_backtrack)->localptr = localptr;
5287 trypath += IMM2_SIZE;
5288 }
5289 else if (opcode == OP_ONCE || opcode == OP_SBRA || opcode == OP_SCOND)
5290 {
5291 /* Other brackets simply allocate the next entry. */
5292 localptr = PRIV_DATA(ccbegin);
5293 SLJIT_ASSERT(localptr != 0);
5294 BACKTRACK_AS(bracket_backtrack)->localptr = localptr;
5295 if (opcode == OP_ONCE)
5296 BACKTRACK_AS(bracket_backtrack)->u.framesize = get_framesize(common, ccbegin, FALSE);
5297 }
5298
5299 /* Instructions before the first alternative. */
5300 stacksize = 0;
5301 if ((ket == OP_KETRMAX) || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
5302 stacksize++;
5303 if (bra == OP_BRAZERO)
5304 stacksize++;
5305
5306 if (stacksize > 0)
5307 allocate_stack(common, stacksize);
5308
5309 stacksize = 0;
5310 if ((ket == OP_KETRMAX) || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
5311 {
5312 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
5313 stacksize++;
5314 }
5315
5316 if (bra == OP_BRAZERO)
5317 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
5318
5319 if (bra == OP_BRAMINZERO)
5320 {
5321 /* This is a backtrack path! (Since the try-path of OP_BRAMINZERO matches to the empty string) */
5322 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5323 if (ket != OP_KETRMIN)
5324 {
5325 free_stack(common, 1);
5326 braminzerojump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
5327 }
5328 else
5329 {
5330 if (opcode == OP_ONCE || opcode >= OP_SBRA)
5331 {
5332 jump = CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
5333 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5334 /* Nothing stored during the first run. */
5335 skip = JUMP(SLJIT_JUMP);
5336 JUMPHERE(jump);
5337 /* Checking zero-length iteration. */
5338 if (opcode != OP_ONCE || BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
5339 {
5340 /* When we come from outside, localptr contains the previous STR_PTR. */
5341 braminzerojump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
5342 }
5343 else
5344 {
5345 /* Except when the whole stack frame must be saved. */
5346 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
5347 braminzerojump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), (BACKTRACK_AS(bracket_backtrack)->u.framesize + 1) * sizeof(sljit_w));
5348 }
5349 JUMPHERE(skip);
5350 }
5351 else
5352 {
5353 jump = CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
5354 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5355 JUMPHERE(jump);
5356 }
5357 }
5358 }
5359
5360 if (ket == OP_KETRMIN)
5361 BACKTRACK_AS(bracket_backtrack)->recursivetrypath = LABEL();
5362
5363 if (ket == OP_KETRMAX)
5364 {
5365 rmaxlabel = LABEL();
5366 if (has_alternatives && opcode != OP_ONCE && opcode < OP_SBRA)
5367 BACKTRACK_AS(bracket_backtrack)->alttrypath = rmaxlabel;
5368 }
5369
5370 /* Handling capturing brackets and alternatives. */
5371 if (opcode == OP_ONCE)
5372 {
5373 if (BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
5374 {
5375 /* Neither capturing brackets nor recursions are not found in the block. */
5376 if (ket == OP_KETRMIN)
5377 {
5378 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
5379 allocate_stack(common, 2);
5380 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5381 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
5382 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
5383 }
5384 else if (ket == OP_KETRMAX || has_alternatives)
5385 {
5386 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STACK_TOP, 0);
5387 allocate_stack(common, 1);
5388 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5389 }
5390 else
5391 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STACK_TOP, 0);
5392 }
5393 else
5394 {
5395 if (ket == OP_KETRMIN || ket == OP_KETRMAX || has_alternatives)
5396 {
5397 allocate_stack(common, BACKTRACK_AS(bracket_backtrack)->u.framesize + 2);
5398 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
5399 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(BACKTRACK_AS(bracket_backtrack)->u.framesize + 1));
5400 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5401 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, TMP2, 0);
5402 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5403 init_frame(common, ccbegin, BACKTRACK_AS(bracket_backtrack)->u.framesize + 1, 2, FALSE);
5404 }
5405 else
5406 {
5407 allocate_stack(common, BACKTRACK_AS(bracket_backtrack)->u.framesize + 1);
5408 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
5409 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(BACKTRACK_AS(bracket_backtrack)->u.framesize));
5410 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, TMP2, 0);
5411 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
5412 init_frame(common, ccbegin, BACKTRACK_AS(bracket_backtrack)->u.framesize, 1, FALSE);
5413 }
5414 }
5415 }
5416 else if (opcode == OP_CBRA || opcode == OP_SCBRA)
5417 {
5418 /* Saving the previous values. */
5419 allocate_stack(common, 3);
5420 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
5421 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
5422 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
5423 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
5424 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
5425 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STR_PTR, 0);
5426 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
5427 }
5428 else if (opcode == OP_SBRA || opcode == OP_SCOND)
5429 {
5430 /* Saving the previous value. */
5431 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
5432 allocate_stack(common, 1);
5433 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STR_PTR, 0);
5434 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
5435 }
5436 else if (has_alternatives)
5437 {
5438 /* Pushing the starting string pointer. */
5439 allocate_stack(common, 1);
5440 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5441 }
5442
5443 /* Generating code for the first alternative. */
5444 if (opcode == OP_COND || opcode == OP_SCOND)
5445 {
5446 if (*trypath == OP_CREF)
5447 {
5448 SLJIT_ASSERT(has_alternatives);
5449 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed),
5450 CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(GET2(trypath, 1) << 1), SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
5451 trypath += 1 + IMM2_SIZE;
5452 }
5453 else if (*trypath == OP_NCREF)
5454 {
5455 SLJIT_ASSERT(has_alternatives);
5456 stacksize = GET2(trypath, 1);
5457 jump = CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(stacksize << 1), SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
5458
5459 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STACK_TOP, 0);
5460 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, SLJIT_IMM, common->name_count);
5461 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, common->name_entry_size);
5462 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG1, 0, SLJIT_IMM, (stacksize << 8) | (common->ovector_start / sizeof(sljit_w)));
5463 GET_LOCAL_BASE(SLJIT_TEMPORARY_REG2, 0, 0);
5464 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, common->name_table);
5465 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_searchovector));
5466 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);
5467 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), CMP(SLJIT_C_EQUAL, SLJIT_TEMPORARY_REG1, 0, SLJIT_IMM, 0));
5468
5469 JUMPHERE(jump);
5470 trypath += 1 + IMM2_SIZE;
5471 }
5472 else if (*trypath == OP_RREF || *trypath == OP_NRREF)
5473 {
5474 /* Never has other case. */
5475 BACKTRACK_AS(bracket_backtrack)->u.condfailed = NULL;
5476
5477 stacksize = GET2(trypath, 1);
5478 if (common->currententry == NULL)
5479 stacksize = 0;
5480 else if (stacksize == RREF_ANY)
5481 stacksize = 1;
5482 else if (common->currententry->start == 0)
5483 stacksize = stacksize == 0;
5484 else
5485 stacksize = stacksize == GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
5486
5487 if (*trypath == OP_RREF || stacksize || common->currententry == NULL)
5488 {
5489 SLJIT_ASSERT(!has_alternatives);
5490 if (stacksize != 0)
5491 trypath += 1 + IMM2_SIZE;
5492 else
5493 {
5494 if (*cc == OP_ALT)
5495 {
5496 trypath = cc + 1 + LINK_SIZE;
5497 cc += GET(cc, 1);
5498 }
5499 else
5500 trypath = cc;
5501 }
5502 }
5503 else
5504 {
5505 SLJIT_ASSERT(has_alternatives);
5506
5507 stacksize = GET2(trypath, 1);
5508 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STACK_TOP, 0);
5509 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, SLJIT_IMM, common->name_count);
5510 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, common->name_entry_size);
5511 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, GET2(common->start, common->currententry->start + 1 + LINK_SIZE));
5512 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG1, 0, SLJIT_IMM, stacksize);
5513 GET_LOCAL_BASE(SLJIT_TEMPORARY_REG2, 0, 0);
5514 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, common->name_table);
5515 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_searchgroups));
5516 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);
5517 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), CMP(SLJIT_C_EQUAL, SLJIT_TEMPORARY_REG1, 0, SLJIT_IMM, 0));
5518 trypath += 1 + IMM2_SIZE;
5519 }
5520 }
5521 else
5522 {
5523 SLJIT_ASSERT(has_alternatives && *trypath >= OP_ASSERT && *trypath <= OP_ASSERTBACK_NOT);
5524 /* Similar code as PUSH_BACKTRACK macro. */
5525 assert = sljit_alloc_memory(compiler, sizeof(assert_backtrack));
5526 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5527 return NULL;
5528 memset(assert, 0, sizeof(assert_backtrack));
5529 assert->common.cc = trypath;
5530 BACKTRACK_AS(bracket_backtrack)->u.assert = assert;
5531 trypath = compile_assert_trypath(common, trypath, assert, TRUE);
5532 }
5533 }
5534
5535 compile_trypath(common, trypath, cc, backtrack);
5536 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5537 return NULL;
5538
5539 if (opcode == OP_ONCE)
5540 {
5541 if (BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
5542 {
5543 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
5544 /* TMP2 which is set here used by OP_KETRMAX below. */
5545 if (ket == OP_KETRMAX)
5546 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), 0);
5547 else if (ket == OP_KETRMIN)
5548 {
5549 /* Move the STR_PTR to the localptr. */
5550 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_MEM1(STACK_TOP), 0);
5551 }
5552 }
5553 else
5554 {
5555 stacksize = (ket == OP_KETRMIN || ket == OP_KETRMAX || has_alternatives) ? 2 : 1;
5556 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_IMM, (BACKTRACK_AS(bracket_backtrack)->u.framesize + stacksize) * sizeof(sljit_w));
5557 if (ket == OP_KETRMAX)
5558 {
5559 /* TMP2 which is set here used by OP_KETRMAX below. */
5560 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5561 }
5562 }
5563 }
5564
5565 stacksize = 0;
5566 if (ket != OP_KET || bra != OP_BRA)
5567 stacksize++;
5568 if (has_alternatives && opcode != OP_ONCE)
5569 stacksize++;
5570
5571 if (stacksize > 0)
5572 allocate_stack(common, stacksize);
5573
5574 stacksize = 0;
5575 if (ket != OP_KET)
5576 {
5577 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
5578 stacksize++;
5579 }
5580 else if (bra != OP_BRA)
5581 {
5582 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
5583 stacksize++;
5584 }
5585
5586 if (has_alternatives)
5587 {
5588 if (opcode != OP_ONCE)
5589 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
5590 if (ket != OP_KETRMAX)
5591 BACKTRACK_AS(bracket_backtrack)->alttrypath = LABEL();
5592 }
5593
5594 /* Must be after the trypath label. */
5595 if (offset != 0)
5596 {
5597 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
5598 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
5599 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 0), TMP1, 0);
5600 }
5601
5602 if (ket == OP_KETRMAX)
5603 {
5604 if (opcode == OP_ONCE || opcode >= OP_SBRA)
5605 {
5606 if (has_alternatives)
5607 BACKTRACK_AS(bracket_backtrack)->alttrypath = LABEL();
5608 /* Checking zero-length iteration. */
5609 if (opcode != OP_ONCE)
5610 {
5611 CMPTO(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STR_PTR, 0, rmaxlabel);
5612 /* Drop STR_PTR for greedy plus quantifier. */
5613 if (bra != OP_BRAZERO)
5614 free_stack(common, 1);
5615 }
5616 else
5617 /* TMP2 must contain the starting STR_PTR. */
5618 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0, rmaxlabel);
5619 }
5620 else
5621 JUMPTO(SLJIT_JUMP, rmaxlabel);
5622 BACKTRACK_AS(bracket_backtrack)->recursivetrypath = LABEL();
5623 }
5624
5625 if (bra == OP_BRAZERO)
5626 BACKTRACK_AS(bracket_backtrack)->zerotrypath = LABEL();
5627
5628 if (bra == OP_BRAMINZERO)
5629 {
5630 /* This is a backtrack path! (From the viewpoint of OP_BRAMINZERO) */
5631 JUMPTO(SLJIT_JUMP, ((braminzero_backtrack *)parent)->trypath);
5632 if (braminzerojump != NULL)
5633 {
5634 JUMPHERE(braminzerojump);
5635 /* We need to release the end pointer to perform the
5636 backtrack for the zero-length iteration. When
5637 framesize is < 0, OP_ONCE will do the release itself. */
5638 if (opcode == OP_ONCE && BACKTRACK_AS(bracket_backtrack)->u.framesize >= 0)
5639 {
5640 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
5641 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
5642 }
5643 else if (ket == OP_KETRMIN && opcode != OP_ONCE)
5644 free_stack(common, 1);
5645 }
5646 /* Continue to the normal backtrack. */
5647 }
5648
5649 if ((ket != OP_KET && bra != OP_BRAMINZERO) || bra == OP_BRAZERO)
5650 decrease_call_count(common);
5651
5652 /* Skip the other alternatives. */
5653 while (*cc == OP_ALT)
5654 cc += GET(cc, 1);
5655 cc += 1 + LINK_SIZE;
5656 return cc;
5657 }
5658
5659 static pcre_uchar *compile_bracketpos_trypath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
5660 {
5661 DEFINE_COMPILER;
5662 backtrack_common *backtrack;
5663 pcre_uchar opcode;
5664 int localptr;
5665 int cbraprivptr = 0;
5666 int framesize;
5667 int stacksize;
5668 int offset = 0;
5669 BOOL zero = FALSE;
5670 pcre_uchar *ccbegin = NULL;
5671 int stack;
5672 struct sljit_label *loop = NULL;
5673 struct jump_list *emptymatch = NULL;
5674
5675 PUSH_BACKTRACK(sizeof(bracketpos_backtrack), cc, NULL);
5676 if (*cc == OP_BRAPOSZERO)
5677 {
5678 zero = TRUE;
5679 cc++;
5680 }
5681
5682 opcode = *cc;
5683 localptr = PRIV_DATA(cc);
5684 SLJIT_ASSERT(localptr != 0);
5685 BACKTRACK_AS(bracketpos_backtrack)->localptr = localptr;
5686 switch(opcode)
5687 {
5688 case OP_BRAPOS:
5689 case OP_SBRAPOS:
5690 ccbegin = cc + 1 + LINK_SIZE;
5691 break;
5692
5693 case OP_CBRAPOS:
5694 case OP_SCBRAPOS:
5695 offset = GET2(cc, 1 + LINK_SIZE);
5696 cbraprivptr = OVECTOR_PRIV(offset);
5697 offset <<= 1;
5698 ccbegin = cc + 1 + LINK_SIZE + IMM2_SIZE;
5699 break;
5700
5701 default:
5702 SLJIT_ASSERT_STOP();
5703 break;
5704 }
5705
5706 framesize = get_framesize(common, cc, FALSE);
5707 BACKTRACK_AS(bracketpos_backtrack)->framesize = framesize;
5708 if (framesize < 0)
5709 {
5710 stacksize = (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS) ? 2 : 1;
5711 if (!zero)
5712 stacksize++;
5713 BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
5714 allocate_stack(common, stacksize);
5715 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STACK_TOP, 0);
5716
5717 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
5718 {
5719 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
5720 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
5721 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
5722 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
5723 }
5724 else
5725 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5726
5727 if (!zero)
5728 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 1);
5729 }
5730 else
5731 {
5732 stacksize = framesize + 1;
5733 if (!zero)
5734 stacksize++;
5735 if (opcode == OP_BRAPOS || opcode == OP_SBRAPOS)
5736 stacksize++;
5737 BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
5738 allocate_stack(common, stacksize);
5739
5740 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
5741 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(stacksize - 1));
5742 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, TMP2, 0);
5743 stack = 0;
5744 if (!zero)
5745 {
5746 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 1);
5747 stack++;
5748 }
5749 if (opcode == OP_BRAPOS || opcode == OP_SBRAPOS)
5750 {
5751 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), STR_PTR, 0);
5752 stack++;
5753 }
5754 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP1, 0);
5755 init_frame(common, cc, stacksize - 1, stacksize - framesize, FALSE);
5756 }
5757
5758 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
5759 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr, STR_PTR, 0);
5760
5761 loop = LABEL();
5762 while (*cc != OP_KETRPOS)
5763 {
5764 backtrack->top = NULL;
5765 backtrack->topbacktracks = NULL;
5766 cc += GET(cc, 1);
5767
5768 compile_trypath(common, ccbegin, cc, backtrack);
5769 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5770 return NULL;
5771
5772 if (framesize < 0)
5773 {
5774 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
5775
5776 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
5777 {
5778 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
5779 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
5780 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr, STR_PTR, 0);
5781 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
5782 }
5783 else
5784 {
5785 if (opcode == OP_SBRAPOS)
5786 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5787 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5788 }
5789
5790 if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
5791 add_jump(compiler, &emptymatch, CMP(SLJIT_C_EQUAL, TMP1, 0, STR_PTR, 0));
5792
5793 if (!zero)
5794 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
5795 }
5796 else
5797 {
5798 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
5799 {
5800 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_IMM, stacksize * sizeof(sljit_w));
5801 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
5802 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
5803 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr, STR_PTR, 0);
5804 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
5805 }
5806 else
5807 {
5808 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
5809 OP2(SLJIT_ADD, STACK_TOP, 0, TMP2, 0, SLJIT_IMM, stacksize * sizeof(sljit_w));
5810 if (opcode == OP_SBRAPOS)
5811 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_w));
5812 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_w), STR_PTR, 0);
5813 }
5814
5815 if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
5816 add_jump(compiler, &emptymatch, CMP(SLJIT_C_EQUAL, TMP1, 0, STR_PTR, 0));
5817
5818 if (!zero)
5819 {
5820 if (framesize < 0)
5821 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
5822 else
5823 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5824 }
5825 }
5826 JUMPTO(SLJIT_JUMP, loop);
5827 flush_stubs(common);
5828
5829 compile_backtrackpath(common, backtrack->top);
5830 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5831 return NULL;
5832 set_jumps(backtrack->topbacktracks, LABEL());
5833
5834 if (framesize < 0)
5835 {
5836 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
5837 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
5838 else
5839 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5840 }
5841 else
5842 {
5843 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
5844 {
5845 /* Last alternative. */
5846 if (*cc == OP_KETRPOS)
5847 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
5848 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
5849 }
5850 else
5851 {
5852 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
5853 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_w));
5854 }
5855 }
5856
5857 if (*cc == OP_KETRPOS)
5858 break;
5859 ccbegin = cc + 1 + LINK_SIZE;
5860 }
5861
5862 backtrack->topbacktracks = NULL;
5863 if (!zero)
5864 {
5865 if (framesize < 0)
5866 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0));
5867 else /* TMP2 is set to [localptr] above. */
5868 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(TMP2), (stacksize - 1) * sizeof(sljit_w), SLJIT_IMM, 0));
5869 }
5870
5871 /* None of them matched. */
5872 set_jumps(emptymatch, LABEL());
5873 decrease_call_count(common);
5874 return cc + 1 + LINK_SIZE;
5875 }
5876
5877 static SLJIT_INLINE pcre_uchar *get_iterator_parameters(compiler_common *common, pcre_uchar *cc, pcre_uchar *opcode, pcre_uchar *type, int *arg1, int *arg2, pcre_uchar **end)
5878 {
5879 int class_len;
5880
5881 *opcode = *cc;
5882 if (*opcode >= OP_STAR && *opcode <= OP_POSUPTO)
5883 {
5884 cc++;
5885 *type = OP_CHAR;
5886 }
5887 else if (*opcode >= OP_STARI && *opcode <= OP_POSUPTOI)
5888 {
5889 cc++;
5890 *type = OP_CHARI;
5891 *opcode -= OP_STARI - OP_STAR;
5892 }
5893 else if (*opcode >= OP_NOTSTAR && *opcode <= OP_NOTPOSUPTO)
5894 {
5895 cc++;
5896 *type = OP_NOT;
5897 *opcode -= OP_NOTSTAR - OP_STAR;
5898 }
5899 else if (*opcode >= OP_NOTSTARI && *opcode <= OP_NOTPOSUPTOI)
5900 {
5901 cc++;
5902 *type = OP_NOTI;
5903 *opcode -= OP_NOTSTARI - OP_STAR;
5904 }
5905 else if (*opcode >= OP_TYPESTAR && *opcode <= OP_TYPEPOSUPTO)
5906 {
5907 cc++;
5908 *opcode -= OP_TYPESTAR - OP_STAR;
5909 *type = 0;
5910 }
5911 else
5912 {
5913 SLJIT_ASSERT(*opcode >= OP_CLASS || *opcode <= OP_XCLASS);
5914 *type = *opcode;
5915 cc++;
5916 class_len = (*type < OP_XCLASS) ? (int)(1 + (32 / sizeof(pcre_uchar))) : GET(cc, 0);
5917 *opcode = cc[class_len - 1];
5918 if (*opcode >= OP_CRSTAR && *opcode <= OP_CRMINQUERY)
5919 {
5920 *opcode -= OP_CRSTAR - OP_STAR;
5921 if (end != NULL)
5922 *end = cc + class_len;
5923 }
5924 else
5925 {
5926 SLJIT_ASSERT(*opcode == OP_CRRANGE || *opcode == OP_CRMINRANGE);
5927 *arg1 = GET2(cc, (class_len + IMM2_SIZE));
5928 *arg2 = GET2(cc, class_len);
5929
5930 if (*arg2 == 0)
5931 {
5932 SLJIT_ASSERT(*arg1 != 0);
5933 *opcode = (*opcode == OP_CRRANGE) ? OP_UPTO : OP_MINUPTO;
5934 }
5935 if (*arg1 == *arg2)
5936 *opcode = OP_EXACT;
5937
5938 if (end != NULL)
5939 *end = cc + class_len + 2 * IMM2_SIZE;
5940 }
5941 return cc;
5942 }
5943
5944 if (*opcode == OP_UPTO || *opcode == OP_MINUPTO || *opcode == OP_EXACT || *opcode == OP_POSUPTO)
5945 {
5946 *arg1 = GET2(cc, 0);
5947 cc += IMM2_SIZE;
5948 }
5949
5950 if (*type == 0)
5951 {
5952 *type = *cc;
5953 if (end != NULL)
5954 *end = next_opcode(common, cc);
5955 cc++;
5956 return cc;
5957 }
5958
5959 if (end != NULL)
5960 {
5961 *end = cc + 1;
5962 #ifdef SUPPORT_UTF
5963 if (common->utf && HAS_EXTRALEN(*cc)) *end += GET_EXTRALEN(*cc);
5964 #endif
5965 }
5966 return cc;
5967 }
5968
5969 static pcre_uchar *compile_iterator_trypath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
5970 {
5971 DEFINE_COMPILER;
5972 backtrack_common *backtrack;
5973 pcre_uchar opcode;
5974 pcre_uchar type;
5975 int arg1 = -1, arg2 = -1;
5976 pcre_uchar* end;
5977 jump_list *nomatch = NULL;
5978 struct sljit_jump *jump = NULL;
5979 struct sljit_label *label;
5980 int localptr = PRIV_DATA(cc);
5981 int base = (localptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_LOCALS_REG);
5982 int offset0 = (localptr == 0) ? STACK(0) : localptr;
5983 int offset1 = (localptr == 0) ? STACK(1) : localptr + (int)sizeof(sljit_w);
5984 int tmp_base, tmp_offset;
5985
5986 PUSH_BACKTRACK(sizeof(iterator_backtrack), cc, NULL);
5987
5988 cc = get_iterator_parameters(common, cc, &opcode, &type, &arg1, &arg2, &end);
5989
5990 switch (type)
5991 {
5992 case OP_NOT_DIGIT:
5993 case OP_DIGIT:
5994 case OP_NOT_WHITESPACE:
5995 case OP_WHITESPACE:
5996 case OP_NOT_WORDCHAR:
5997 case OP_WORDCHAR:
5998 case OP_ANY:
5999 case OP_ALLANY:
6000 case OP_ANYBYTE:
6001 case OP_ANYNL:
6002 case OP_NOT_HSPACE:
6003 case OP_HSPACE:
6004 case OP_NOT_VSPACE:
6005 case OP_VSPACE:
6006 case OP_CHAR:
6007 case OP_CHARI:
6008 case OP_NOT:
6009 case OP_NOTI:
6010 case OP_CLASS:
6011 case OP_NCLASS:
6012 tmp_base = TMP3;
6013 tmp_offset = 0;
6014 break;
6015
6016 default:
6017 SLJIT_ASSERT_STOP();
6018 /* Fall through. */
6019
6020 case OP_EXTUNI:
6021 case OP_XCLASS:
6022 case OP_NOTPROP:
6023 case OP_PROP:
6024 tmp_base = SLJIT_MEM1(SLJIT_LOCALS_REG);
6025 tmp_offset = POSSESSIVE0;
6026 break;
6027 }
6028
6029 switch(opcode)
6030 {
6031 case OP_STAR:
6032 case OP_PLUS:
6033 case OP_UPTO:
6034 case OP_CRRANGE:
6035 if (type == OP_ANYNL || type == OP_EXTUNI)
6036 {
6037 SLJIT_ASSERT(localptr == 0);
6038 if (opcode == OP_STAR || opcode == OP_UPTO)
6039 {
6040 allocate_stack(common, 2);
6041 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6042 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
6043 }
6044 else
6045 {
6046 allocate_stack(common, 1);
6047 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6048 }
6049
6050 if (opcode == OP_UPTO || opcode == OP_CRRANGE)
6051 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 0);
6052
6053 label = LABEL();
6054 compile_char1_trypath(common, type, cc, &backtrack->topbacktracks);
6055 if (opcode == OP_UPTO || opcode == OP_CRRANGE)
6056 {
6057 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
6058 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
6059 if (opcode == OP_CRRANGE && arg2 > 0)
6060 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg2, label);
6061 if (opcode == OP_UPTO || (opcode == OP_CRRANGE && arg1 > 0))
6062 jump = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, arg1);
6063 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0);
6064 }
6065
6066 /* We cannot use TMP3 because of this allocate_stack. */
6067 allocate_stack(common, 1);
6068 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6069 JUMPTO(SLJIT_JUMP, label);
6070 if (jump != NULL)
6071 JUMPHERE(jump);
6072 }
6073 else
6074 {
6075 if (opcode == OP_PLUS)
6076 compile_char1_trypath(common, type, cc, &backtrack->topbacktracks);
6077 if (localptr == 0)
6078 allocate_stack(common, 2);
6079 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
6080 if (opcode <= OP_PLUS)
6081 OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
6082 else
6083 OP1(SLJIT_MOV, base, offset1, SLJIT_IMM, 1);
6084 label = LABEL();
6085 compile_char1_trypath(common, type, cc, &nomatch);
6086 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
6087 if (opcode <= OP_PLUS)
6088 JUMPTO(SLJIT_JUMP, label);
6089 else if (opcode == OP_CRRANGE && arg1 == 0)
6090 {
6091 OP2(SLJIT_ADD, base, offset1, base, offset1, SLJIT_IMM, 1);
6092 JUMPTO(SLJIT_JUMP, label);
6093 }
6094 else
6095 {
6096 OP1(SLJIT_MOV, TMP1, 0, base, offset1);
6097 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
6098 OP1(SLJIT_MOV, base, offset1, TMP1, 0);
6099 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg1 + 1, label);
6100 }
6101 set_jumps(nomatch, LABEL());
6102 if (opcode == OP_CRRANGE)
6103 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_LESS, base, offset1, SLJIT_IMM, arg2 + 1));
6104 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
6105 }
6106 BACKTRACK_AS(iterator_backtrack)->trypath = LABEL();
6107 break;
6108
6109 case OP_MINSTAR:
6110 case OP_MINPLUS:
6111 if (opcode == OP_MINPLUS)
6112 compile_char1_trypath(common, type, cc, &backtrack->topbacktracks);
6113 if (localptr == 0)
6114 allocate_stack(common, 1);
6115 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
6116 BACKTRACK_AS(iterator_backtrack)->trypath = LABEL();
6117 break;
6118
6119 case OP_MINUPTO:
6120 case OP_CRMINRANGE:
6121 if (localptr == 0)
6122 allocate_stack(common, 2);
6123 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
6124 OP1(SLJIT_MOV, base, offset1, SLJIT_IMM, 1);
6125 if (opcode == OP_CRMINRANGE)
6126 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
6127 BACKTRACK_AS(iterator_backtrack)->trypath = LABEL();
6128 break;
6129
6130 case OP_QUERY:
6131 case OP_MINQUERY:
6132 if (localptr == 0)
6133 allocate_stack(common, 1);
6134 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
6135 if (opcode == OP_QUERY)
6136 compile_char1_trypath(common, type, cc, &backtrack->topbacktracks);
6137 BACKTRACK_AS(iterator_backtrack)->trypath = LABEL();
6138 break;
6139
6140 case OP_EXACT:
6141 OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, arg1);
6142 label = LABEL();
6143 compile_char1_trypath(common, type, cc, &backtrack->topbacktracks);
6144 OP2(SLJIT_SUB | SLJIT_SET_E, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
6145 JUMPTO(SLJIT_C_NOT_ZERO, label);
6146 break;
6147
6148 case OP_POSSTAR:
6149 case OP_POSPLUS:
6150 case OP_POSUPTO:
6151 if (opcode == OP_POSPLUS)
6152 compile_char1_trypath(common, type, cc, &backtrack->topbacktracks);
6153 if (opcode == OP_POSUPTO)
6154 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, SLJIT_IMM, arg1);
6155 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
6156 label = LABEL();
6157 compile_char1_trypath(common, type, cc, &nomatch);
6158 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
6159 if (opcode != OP_POSUPTO)
6160 JUMPTO(SLJIT_JUMP, label);
6161 else
6162 {
6163 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, SLJIT_IMM, 1);
6164 JUMPTO(SLJIT_C_NOT_ZERO, label);
6165 }
6166 set_jumps(nomatch, LABEL());
6167 OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
6168 break;
6169
6170 case OP_POSQUERY:
6171 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
6172 compile_char1_trypath(common, type, cc, &nomatch);
6173 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
6174 set_jumps(nomatch, LABEL());
6175 OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
6176 break;
6177
6178 default:
6179 SLJIT_ASSERT_STOP();
6180 break;
6181 }
6182
6183 decrease_call_count(common);
6184 return end;
6185 }
6186
6187 static SLJIT_INLINE pcre_uchar *compile_fail_accept_trypath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6188 {
6189 DEFINE_COMPILER;
6190 backtrack_common *backtrack;
6191
6192 PUSH_BACKTRACK(sizeof(bracket_backtrack), cc, NULL);
6193
6194 if (*cc == OP_FAIL)
6195 {
6196 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
6197 return cc + 1;
6198 }
6199
6200 if (*cc == OP_ASSERT_ACCEPT || common->currententry != NULL)
6201 {
6202 /* No need to check notempty conditions. */
6203 if (common->acceptlabel == NULL)
6204 add_jump(compiler, &common->accept, JUMP(SLJIT_JUMP));
6205 else
6206 JUMPTO(SLJIT_JUMP, common->acceptlabel);
6207 return cc + 1;
6208 }
6209
6210 if (common->acceptlabel == NULL)
6211 add_jump(compiler, &common->accept, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0)));
6212 else
6213 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), common->acceptlabel);
6214 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6215 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty));
6216 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
6217 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty_atstart));
6218 if (common->acceptlabel == NULL)
6219 add_jump(compiler, &common->accept, CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0));
6220 else
6221 CMPTO(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0, common->acceptlabel);
6222 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
6223 if (common->acceptlabel == NULL)
6224 add_jump(compiler, &common->accept, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0));
6225 else
6226 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0, common->acceptlabel);
6227 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
6228 return cc + 1;
6229 }
6230
6231 static SLJIT_INLINE pcre_uchar *compile_close_trypath(compiler_common *common, pcre_uchar *cc)
6232 {
6233 DEFINE_COMPILER;
6234 int offset = GET2(cc, 1);
6235
6236 /* Data will be discarded anyway... */
6237 if (common->currententry != NULL)
6238 return cc + 1 + IMM2_SIZE;
6239
6240 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR_PRIV(offset));
6241 offset <<= 1;
6242 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
6243 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
6244 return cc + 1 + IMM2_SIZE;
6245 }
6246
6247 static void compile_trypath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, backtrack_common *parent)
6248 {
6249 DEFINE_COMPILER;
6250 backtrack_common *backtrack;
6251
6252 while (cc < ccend)
6253 {
6254 switch(*cc)
6255 {
6256 case OP_SOD:
6257 case OP_SOM:
6258 case OP_NOT_WORD_BOUNDARY:
6259 case OP_WORD_BOUNDARY:
6260 case OP_NOT_DIGIT:
6261 case OP_DIGIT:
6262 case OP_NOT_WHITESPACE:
6263 case OP_WHITESPACE:
6264 case OP_NOT_WORDCHAR:
6265 case OP_WORDCHAR:
6266 case OP_ANY:
6267 case OP_ALLANY:
6268 case OP_ANYBYTE:
6269 case OP_NOTPROP:
6270 case OP_PROP:
6271 case OP_ANYNL:
6272 case OP_NOT_HSPACE:
6273 case OP_HSPACE:
6274 case OP_NOT_VSPACE:
6275 case OP_VSPACE:
6276 case OP_EXTUNI:
6277 case OP_EODN:
6278 case OP_EOD:
6279 case OP_CIRC:
6280 case OP_CIRCM:
6281 case OP_DOLL:
6282 case OP_DOLLM:
6283 case OP_NOT:
6284 case OP_NOTI:
6285 case OP_REVERSE:
6286 cc = compile_char1_trypath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
6287 break;
6288
6289 case OP_SET_SOM:
6290 PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc);
6291 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
6292 allocate_stack(common, 1);
6293 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), STR_PTR, 0);
6294 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
6295 cc++;
6296 break;
6297
6298 case OP_CHAR:
6299 case OP_CHARI:
6300 if (common->mode == JIT_COMPILE)
6301 cc = compile_charn_trypath(common, cc, ccend, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
6302 else
6303 cc = compile_char1_trypath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
6304 break;
6305
6306 case OP_STAR:
6307 case OP_MINSTAR:
6308 case OP_PLUS:
6309 case OP_MINPLUS:
6310 case OP_QUERY:
6311 case OP_MINQUERY:
6312 case OP_UPTO:
6313 case OP_MINUPTO:
6314 case OP_EXACT:
6315 case OP_POSSTAR:
6316 case OP_POSPLUS:
6317 case OP_POSQUERY:
6318 case OP_POSUPTO:
6319 case OP_STARI:
6320 case OP_MINSTARI:
6321 case OP_PLUSI:
6322 case OP_MINPLUSI:
6323 case OP_QUERYI:
6324 case OP_MINQUERYI:
6325 case OP_UPTOI:
6326 case OP_MINUPTOI:
6327 case OP_EXACTI:
6328 case OP_POSSTARI:
6329 case OP_POSPLUSI:
6330 case OP_POSQUERYI:
6331 case OP_POSUPTOI:
6332 case OP_NOTSTAR:
6333 case OP_NOTMINSTAR:
6334 case OP_NOTPLUS:
6335 case OP_NOTMINPLUS:
6336 case OP_NOTQUERY:
6337 case OP_NOTMINQUERY:
6338 case OP_NOTUPTO:
6339 case OP_NOTMINUPTO:
6340 case OP_NOTEXACT:
6341 case OP_NOTPOSSTAR:
6342 case OP_NOTPOSPLUS:
6343 case OP_NOTPOSQUERY:
6344 case OP_NOTPOSUPTO:
6345 case OP_NOTSTARI:
6346 case OP_NOTMINSTARI:
6347 case OP_NOTPLUSI:
6348 case OP_NOTMINPLUSI:
6349 case OP_NOTQUERYI:
6350 case OP_NOTMINQUERYI:
6351 case OP_NOTUPTOI:
6352 case OP_NOTMINUPTOI:
6353 case OP_NOTEXACTI:
6354 case OP_NOTPOSSTARI:
6355 case OP_NOTPOSPLUSI:
6356 case OP_NOTPOSQUERYI:
6357 case OP_NOTPOSUPTOI:
6358 case OP_TYPESTAR:
6359 case OP_TYPEMINSTAR:
6360 case OP_TYPEPLUS:
6361 case OP_TYPEMINPLUS:
6362 case OP_TYPEQUERY:
6363 case OP_TYPEMINQUERY:
6364 case OP_TYPEUPTO:
6365 case OP_TYPEMINUPTO:
6366 case OP_TYPEEXACT:
6367 case OP_TYPEPOSSTAR:
6368 case OP_TYPEPOSPLUS:
6369 case OP_TYPEPOSQUERY:
6370 case OP_TYPEPOSUPTO:
6371 cc = compile_iterator_trypath(common, cc, parent);
6372 break;
6373
6374 case OP_CLASS:
6375 case OP_NCLASS:
6376 if (cc[1 + (32 / sizeof(pcre_uchar))] >= OP_CRSTAR && cc[1 + (32 / sizeof(pcre_uchar))] <= OP_CRMINRANGE)
6377 cc = compile_iterator_trypath(common, cc, parent);
6378 else
6379 cc = compile_char1_trypath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
6380 break;
6381
6382 #if defined SUPPORT_UTF || defined COMPILE_PCRE16
6383 case OP_XCLASS:
6384 if (*(cc + GET(cc, 1)) >= OP_CRSTAR && *(cc + GET(cc, 1)) <= OP_CRMINRANGE)
6385 cc = compile_iterator_trypath(common, cc, parent);
6386 else
6387 cc = compile_char1_trypath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
6388 break;
6389 #endif
6390
6391 case OP_REF:
6392 case OP_REFI:
6393 if (cc[1 + IMM2_SIZE] >= OP_CRSTAR && cc[1 + IMM2_SIZE] <= OP_CRMINRANGE)
6394 cc = compile_ref_iterator_trypath(common, cc, parent);
6395 else
6396 cc = compile_ref_trypath(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE, FALSE);
6397 break;
6398
6399 case OP_RECURSE:
6400 cc = compile_recurse_trypath(common, cc, parent);
6401 break;
6402
6403 case OP_ASSERT:
6404 case OP_ASSERT_NOT:
6405 case OP_ASSERTBACK:
6406 case OP_ASSERTBACK_NOT:
6407 PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc);
6408 cc = compile_assert_trypath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE);
6409 break;
6410
6411 case OP_BRAMINZERO:
6412 PUSH_BACKTRACK_NOVALUE(sizeof(braminzero_backtrack), cc);
6413 cc = bracketend(cc + 1);
6414 if (*(cc - 1 - LINK_SIZE) != OP_KETRMIN)
6415 {
6416 allocate_stack(common, 1);
6417 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6418 }
6419 else
6420 {
6421 allocate_stack(common, 2);
6422 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6423 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), STR_PTR, 0);
6424 }
6425 BACKTRACK_AS(braminzero_backtrack)->trypath = LABEL();
6426 if (cc[1] > OP_ASSERTBACK_NOT)
6427 decrease_call_count(common);
6428 break;
6429
6430 case OP_ONCE:
6431 case OP_ONCE_NC:
6432 case OP_BRA:
6433 case OP_CBRA:
6434 case OP_COND:
6435 case OP_SBRA:
6436 case OP_SCBRA:
6437 case OP_SCOND:
6438 cc = compile_bracket_trypath(common, cc, parent);
6439 break;
6440
6441 case OP_BRAZERO:
6442 if (cc[1] > OP_ASSERTBACK_NOT)
6443 cc = compile_bracket_trypath(common, cc, parent);
6444 else
6445 {
6446 PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc);
6447 cc = compile_assert_trypath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE);
6448 }
6449 break;
6450
6451 case OP_BRAPOS:
6452 case OP_CBRAPOS:
6453 case OP_SBRAPOS:
6454 case OP_SCBRAPOS:
6455 case OP_BRAPOSZERO:
6456 cc = compile_bracketpos_trypath(common, cc, parent);
6457 break;
6458
6459 case OP_MARK:
6460 PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc);
6461 SLJIT_ASSERT(common->mark_ptr != 0);
6462 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
6463 allocate_stack(common, 1);
6464 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6465 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
6466 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_w)(cc + 2));
6467 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr, TMP2, 0);
6468 OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), TMP2, 0);
6469 cc += 1 + 2 + cc[1];
6470 break;
6471
6472 case OP_COMMIT:
6473 PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc);
6474 cc += 1;
6475 break;
6476
6477 case OP_FAIL:
6478 case OP_ACCEPT:
6479 case OP_ASSERT_ACCEPT:
6480 cc = compile_fail_accept_trypath(common, cc, parent);
6481 break;
6482
6483 case OP_CLOSE:
6484 cc = compile_close_trypath(common, cc);
6485 break;
6486
6487 case OP_SKIPZERO:
6488 cc = bracketend(cc + 1);
6489 break;
6490
6491 default:
6492 SLJIT_ASSERT_STOP();
6493 return;
6494 }
6495 if (cc == NULL)
6496 return;
6497 }
6498 SLJIT_ASSERT(cc == ccend);
6499 }
6500
6501 #undef PUSH_BACKTRACK
6502 #undef PUSH_BACKTRACK_NOVALUE
6503 #undef BACKTRACK_AS
6504
6505 #define COMPILE_BACKTRACKPATH(current) \
6506 do \
6507 { \
6508 compile_backtrackpath(common, (current)); \
6509 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
6510 return; \
6511 } \
6512 while (0)
6513
6514 #define CURRENT_AS(type) ((type *)current)
6515
6516 static void compile_iterator_backtrackpath(compiler_common *common, struct backtrack_common *current)
6517 {
6518 DEFINE_COMPILER;
6519 pcre_uchar *cc = current->cc;
6520 pcre_uchar opcode;
6521 pcre_uchar type;
6522 int arg1 = -1, arg2 = -1;
6523 struct sljit_label *label = NULL;
6524 struct sljit_jump *jump = NULL;
6525 jump_list *jumplist = NULL;
6526 int localptr = PRIV_DATA(cc);
6527 int base = (localptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_LOCALS_REG);
6528 int offset0 = (localptr == 0) ? STACK(0) : localptr;
6529 int offset1 = (localptr == 0) ? STACK(1) : localptr + (int)sizeof(sljit_w);
6530
6531 cc = get_iterator_parameters(common, cc, &opcode, &type, &arg1, &arg2, NULL);
6532
6533 switch(opcode)
6534 {
6535 case OP_STAR:
6536 case OP_PLUS:
6537 case OP_UPTO:
6538 case OP_CRRANGE:
6539 if (type == OP_ANYNL || type == OP_EXTUNI)
6540 {
6541 SLJIT_ASSERT(localptr == 0);
6542 set_jumps(current->topbacktracks, LABEL());
6543 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6544 free_stack(common, 1);
6545 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_backtrack)->trypath);
6546 }
6547 else
6548 {
6549 if (opcode == OP_UPTO)
6550 arg2 = 0;
6551 if (opcode <= OP_PLUS)
6552 {
6553 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
6554 jump = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, base, offset1);
6555 }
6556 else
6557 {
6558 OP1(SLJIT_MOV, TMP1, 0, base, offset1);
6559 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
6560 jump = CMP(SLJIT_C_LESS_EQUAL, TMP1, 0, SLJIT_IMM, arg2 + 1);
6561 OP2(SLJIT_SUB, base, offset1, TMP1, 0, SLJIT_IMM, 1);
6562 }
6563 skip_char_back(common);
6564 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
6565 JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_backtrack)->trypath);
6566 if (opcode == OP_CRRANGE)
6567 set_jumps(current->topbacktracks, LABEL());
6568 JUMPHERE(jump);
6569 if (localptr == 0)
6570 free_stack(common, 2);
6571 if (opcode == OP_PLUS)
6572 set_jumps(current->topbacktracks, LABEL());
6573 }
6574 break;
6575
6576 case OP_MINSTAR:
6577 case OP_MINPLUS:
6578 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
6579 compile_char1_trypath(common, type, cc, &jumplist);
6580 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
6581 JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_backtrack)->trypath);
6582 set_jumps(jumplist, LABEL());
6583 if (localptr == 0)
6584 free_stack(common, 1);
6585 if (opcode == OP_MINPLUS)
6586 set_jumps(current->topbacktracks, LABEL());
6587 break;
6588
6589 case OP_MINUPTO:
6590 case OP_CRMINRANGE:
6591 if (opcode == OP_CRMINRANGE)
6592 {
6593 label = LABEL();
6594 set_jumps(current->topbacktracks, label);
6595 }
6596 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
6597 compile_char1_trypath(common, type, cc, &jumplist);
6598
6599 OP1(SLJIT_MOV, TMP1, 0, base, offset1);
6600 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
6601 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
6602 OP1(SLJIT_MOV, base, offset1, TMP1, 0);
6603
6604 if (opcode == OP_CRMINRANGE)
6605 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg2 + 1, label);
6606
6607 if (opcode == OP_CRMINRANGE && arg1 == 0)
6608 JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_backtrack)->trypath);
6609 else
6610 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg1 + 2, CURRENT_AS(iterator_backtrack)->trypath);
6611
6612 set_jumps(jumplist, LABEL());
6613 if (localptr == 0)
6614 free_stack(common, 2);
6615 break;
6616
6617 case OP_QUERY:
6618 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
6619 OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
6620 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_backtrack)->trypath);
6621 jump = JUMP(SLJIT_JUMP);
6622 set_jumps(current->topbacktracks, LABEL());
6623 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
6624 OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
6625 JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_backtrack)->trypath);
6626 JUMPHERE(jump);
6627 if (localptr == 0)
6628 free_stack(common, 1);
6629 break;
6630
6631 case OP_MINQUERY:
6632 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
6633 OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
6634 jump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
6635 compile_char1_trypath(common, type, cc, &jumplist);
6636 JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_backtrack)->trypath);
6637 set_jumps(jumplist, LABEL());
6638 JUMPHERE(jump);
6639 if (localptr == 0)
6640 free_stack(common, 1);
6641 break;
6642
6643 case OP_EXACT:
6644 case OP_POSPLUS:
6645 set_jumps(current->topbacktracks, LABEL());
6646 break;
6647
6648 case OP_POSSTAR:
6649 case OP_POSQUERY:
6650 case OP_POSUPTO:
6651 break;
6652
6653 default:
6654 SLJIT_ASSERT_STOP();
6655 break;
6656 }
6657 }
6658
6659 static void compile_ref_iterator_backtrackpath(compiler_common *common, struct backtrack_common *current)
6660 {
6661 DEFINE_COMPILER;
6662 pcre_uchar *cc = current->cc;
6663 pcre_uchar type;
6664
6665 type = cc[1 + IMM2_SIZE];
6666 if ((type & 0x1) == 0)
6667 {
6668 set_jumps(current->topbacktracks, LABEL());
6669 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6670 free_stack(common, 1);
6671 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_backtrack)->trypath);
6672 return;
6673 }
6674
6675 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6676 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_backtrack)->trypath);
6677 set_jumps(current->topbacktracks, LABEL());
6678 free_stack(common, 2);
6679 }
6680
6681 static void compile_recurse_backtrackpath(compiler_common *common, struct backtrack_common *current)
6682 {
6683 DEFINE_COMPILER;
6684
6685 set_jumps(current->topbacktracks, LABEL());
6686
6687 if (common->has_set_som && common->mark_ptr != 0)
6688 {
6689 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6690 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
6691 free_stack(common, 2);
6692 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), TMP2, 0);
6693 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr, TMP1, 0);
6694 }
6695 else if (common->has_set_som || common->mark_ptr != 0)
6696 {
6697 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6698 free_stack(common, 1);
6699 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->has_set_som ? (int)(OVECTOR(0)) : common->mark_ptr, TMP2, 0);
6700 }
6701 }
6702
6703 static void compile_assert_backtrackpath(compiler_common *common, struct backtrack_common *current)
6704 {
6705 DEFINE_COMPILER;
6706 pcre_uchar *cc = current->cc;
6707 pcre_uchar bra = OP_BRA;
6708 struct sljit_jump *brajump = NULL;
6709
6710 SLJIT_ASSERT(*cc != OP_BRAMINZERO);
6711 if (*cc == OP_BRAZERO)
6712 {
6713 bra = *cc;
6714 cc++;
6715 }
6716
6717 if (bra == OP_BRAZERO)
6718 {
6719 SLJIT_ASSERT(current->topbacktracks == NULL);
6720 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6721 }
6722
6723 if (CURRENT_AS(assert_backtrack)->framesize < 0)
6724 {
6725 set_jumps(current->topbacktracks, LABEL());
6726
6727 if (bra == OP_BRAZERO)
6728 {
6729 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6730 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_backtrack)->trypath);
6731 free_stack(common, 1);
6732 }
6733 return;
6734 }
6735
6736 if (bra == OP_BRAZERO)
6737 {
6738 if (*cc == OP_ASSERT_NOT || *cc == OP_ASSERTBACK_NOT)
6739 {
6740 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6741 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_backtrack)->trypath);
6742 free_stack(common, 1);
6743 return;
6744 }
6745 free_stack(common, 1);
6746 brajump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
6747 }
6748
6749 if (*cc == OP_ASSERT || *cc == OP_ASSERTBACK)
6750 {
6751 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), CURRENT_AS(assert_backtrack)->localptr);
6752 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6753 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), CURRENT_AS(assert_backtrack)->localptr, SLJIT_MEM1(STACK_TOP), CURRENT_AS(assert_backtrack)->framesize * sizeof(sljit_w));
6754
6755 set_jumps(current->topbacktracks, LABEL());
6756 }
6757 else
6758 set_jumps(current->topbacktracks, LABEL());
6759
6760 if (bra == OP_BRAZERO)
6761 {
6762 /* We know there is enough place on the stack. */
6763 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
6764 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6765 JUMPTO(SLJIT_JUMP, CURRENT_AS(assert_backtrack)->trypath);
6766 JUMPHERE(brajump);
6767 }
6768 }
6769
6770 static void compile_bracket_backtrackpath(compiler_common *common, struct backtrack_common *current)
6771 {
6772 DEFINE_COMPILER;
6773 int opcode;
6774 int offset = 0;
6775 int localptr = CURRENT_AS(bracket_backtrack)->localptr;
6776 int stacksize;
6777 int count;
6778 pcre_uchar *cc = current->cc;
6779 pcre_uchar *ccbegin;
6780 pcre_uchar *ccprev;
6781 jump_list *jumplist = NULL;
6782 jump_list *jumplistitem = NULL;
6783 pcre_uchar bra = OP_BRA;
6784 pcre_uchar ket;
6785 assert_backtrack *assert;
6786 BOOL has_alternatives;
6787 struct sljit_jump *brazero = NULL;
6788 struct sljit_jump *once = NULL;
6789 struct sljit_jump *cond = NULL;
6790