/[pcre]/code/trunk/pcre_jit_compile.c
ViewVC logotype

Contents of /code/trunk/pcre_jit_compile.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 953 - (show annotations)
Thu Mar 29 17:41:57 2012 UTC (7 years, 8 months ago) by zherczeg
File MIME type: text/plain
File size: 231723 byte(s)
Fixed a bug for backward assertions with REVERSE 0 in the JIT compiler
1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Written by Philip Hazel
9 Copyright (c) 1997-2012 University of Cambridge
10
11 The machine code generator part (this module) was written by Zoltan Herczeg
12 Copyright (c) 2010-2012
13
14 -----------------------------------------------------------------------------
15 Redistribution and use in source and binary forms, with or without
16 modification, are permitted provided that the following conditions are met:
17
18 * Redistributions of source code must retain the above copyright notice,
19 this list of conditions and the following disclaimer.
20
21 * Redistributions in binary form must reproduce the above copyright
22 notice, this list of conditions and the following disclaimer in the
23 documentation and/or other materials provided with the distribution.
24
25 * Neither the name of the University of Cambridge nor the names of its
26 contributors may be used to endorse or promote products derived from
27 this software without specific prior written permission.
28
29 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 POSSIBILITY OF SUCH DAMAGE.
40 -----------------------------------------------------------------------------
41 */
42
43 #ifdef HAVE_CONFIG_H
44 #include "config.h"
45 #endif
46
47 #include "pcre_internal.h"
48
49 #ifdef SUPPORT_JIT
50
51 /* All-in-one: Since we use the JIT compiler only from here,
52 we just include it. This way we don't need to touch the build
53 system files. */
54
55 #define SLJIT_MALLOC(size) (PUBL(malloc))(size)
56 #define SLJIT_FREE(ptr) (PUBL(free))(ptr)
57 #define SLJIT_CONFIG_AUTO 1
58 #define SLJIT_CONFIG_STATIC 1
59 #define SLJIT_VERBOSE 0
60 #define SLJIT_DEBUG 0
61
62 #include "sljit/sljitLir.c"
63
64 #if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED
65 #error Unsupported architecture
66 #endif
67
68 /* Allocate memory on the stack. Fast, but limited size. */
69 #define LOCAL_SPACE_SIZE 32768
70
71 #define STACK_GROWTH_RATE 8192
72
73 /* Enable to check that the allocation could destroy temporaries. */
74 #if defined SLJIT_DEBUG && SLJIT_DEBUG
75 #define DESTROY_REGISTERS 1
76 #endif
77
78 /*
79 Short summary about the backtracking mechanism empolyed by the jit code generator:
80
81 The code generator follows the recursive nature of the PERL compatible regular
82 expressions. The basic blocks of regular expressions are condition checkers
83 whose execute different commands depending on the result of the condition check.
84 The relationship between the operators can be horizontal (concatenation) and
85 vertical (sub-expression) (See struct fallback_common for more details).
86
87 'ab' - 'a' and 'b' regexps are concatenated
88 'a+' - 'a' is the sub-expression of the '+' operator
89
90 The condition checkers are boolean (true/false) checkers. Machine code is generated
91 for the checker itself and for the actions depending on the result of the checker.
92 The 'true' case is called as the hot path (expected path), and the other is called as
93 the 'fallback' path. Branch instructions are expesive for all CPUs, so we avoid taken
94 branches on the hot path.
95
96 Greedy star operator (*) :
97 Hot path: match happens.
98 Fallback path: match failed.
99 Non-greedy star operator (*?) :
100 Hot path: no need to perform a match.
101 Fallback path: match is required.
102
103 The following example shows how the code generated for a capturing bracket
104 with two alternatives. Let A, B, C, D are arbirary regular expressions, and
105 we have the following regular expression:
106
107 A(B|C)D
108
109 The generated code will be the following:
110
111 A hot path
112 '(' hot path (pushing arguments to the stack)
113 B hot path
114 ')' hot path (pushing arguments to the stack)
115 D hot path
116 return with successful match
117
118 D fallback path
119 ')' fallback path (If we arrived from "C" jump to the fallback of "C")
120 B fallback path
121 C expected path
122 jump to D hot path
123 C fallback path
124 A fallback path
125
126 Notice, that the order of fallback code paths are the opposite of the fast
127 code paths. In this way the topmost value on the stack is always belong
128 to the current fallback code path. The fallback code path must check
129 whether there is a next alternative. If so, it needs to jump back to
130 the hot path eventually. Otherwise it needs to clear out its own stack
131 frame and continue the execution on the fallback code paths.
132 */
133
134 /*
135 Saved stack frames:
136
137 Atomic blocks and asserts require reloading the values of local variables
138 when the fallback mechanism performed. Because of OP_RECURSE, the locals
139 are not necessarly known in compile time, thus we need a dynamic restore
140 mechanism.
141
142 The stack frames are stored in a chain list, and have the following format:
143 ([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]
144
145 Thus we can restore the locals to a particular point in the stack.
146 */
147
148 typedef struct jit_arguments {
149 /* Pointers first. */
150 struct sljit_stack *stack;
151 const pcre_uchar *str;
152 const pcre_uchar *begin;
153 const pcre_uchar *end;
154 int *offsets;
155 pcre_uchar *uchar_ptr;
156 pcre_uchar *mark_ptr;
157 /* Everything else after. */
158 int offsetcount;
159 int calllimit;
160 pcre_uint8 notbol;
161 pcre_uint8 noteol;
162 pcre_uint8 notempty;
163 pcre_uint8 notempty_atstart;
164 } jit_arguments;
165
166 typedef struct executable_functions {
167 void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES];
168 PUBL(jit_callback) callback;
169 void *userdata;
170 sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];
171 } executable_functions;
172
173 typedef struct jump_list {
174 struct sljit_jump *jump;
175 struct jump_list *next;
176 } jump_list;
177
178 enum stub_types { stack_alloc };
179
180 typedef struct stub_list {
181 enum stub_types type;
182 int data;
183 struct sljit_jump *start;
184 struct sljit_label *leave;
185 struct stub_list *next;
186 } stub_list;
187
188 typedef int (SLJIT_CALL *jit_function)(jit_arguments *args);
189
190 /* The following structure is the key data type for the recursive
191 code generator. It is allocated by compile_hotpath, and contains
192 the aguments for compile_fallbackpath. Must be the first member
193 of its descendants. */
194 typedef struct fallback_common {
195 /* Concatenation stack. */
196 struct fallback_common *prev;
197 jump_list *nextfallbacks;
198 /* Internal stack (for component operators). */
199 struct fallback_common *top;
200 jump_list *topfallbacks;
201 /* Opcode pointer. */
202 pcre_uchar *cc;
203 } fallback_common;
204
205 typedef struct assert_fallback {
206 fallback_common common;
207 jump_list *condfailed;
208 /* Less than 0 (-1) if a frame is not needed. */
209 int framesize;
210 /* Points to our private memory word on the stack. */
211 int localptr;
212 /* For iterators. */
213 struct sljit_label *hotpath;
214 } assert_fallback;
215
216 typedef struct bracket_fallback {
217 fallback_common common;
218 /* Where to coninue if an alternative is successfully matched. */
219 struct sljit_label *althotpath;
220 /* For rmin and rmax iterators. */
221 struct sljit_label *recursivehotpath;
222 /* For greedy ? operator. */
223 struct sljit_label *zerohotpath;
224 /* Contains the branches of a failed condition. */
225 union {
226 /* Both for OP_COND, OP_SCOND. */
227 jump_list *condfailed;
228 assert_fallback *assert;
229 /* For OP_ONCE. -1 if not needed. */
230 int framesize;
231 } u;
232 /* Points to our private memory word on the stack. */
233 int localptr;
234 } bracket_fallback;
235
236 typedef struct bracketpos_fallback {
237 fallback_common common;
238 /* Points to our private memory word on the stack. */
239 int localptr;
240 /* Reverting stack is needed. */
241 int framesize;
242 /* Allocated stack size. */
243 int stacksize;
244 } bracketpos_fallback;
245
246 typedef struct braminzero_fallback {
247 fallback_common common;
248 struct sljit_label *hotpath;
249 } braminzero_fallback;
250
251 typedef struct iterator_fallback {
252 fallback_common common;
253 /* Next iteration. */
254 struct sljit_label *hotpath;
255 } iterator_fallback;
256
257 typedef struct recurse_entry {
258 struct recurse_entry *next;
259 /* Contains the function entry. */
260 struct sljit_label *entry;
261 /* Collects the calls until the function is not created. */
262 jump_list *calls;
263 /* Points to the starting opcode. */
264 int start;
265 } recurse_entry;
266
267 typedef struct recurse_fallback {
268 fallback_common common;
269 } recurse_fallback;
270
271 typedef struct compiler_common {
272 struct sljit_compiler *compiler;
273 pcre_uchar *start;
274
275 /* Local stack area size and variable pointers. */
276 int localsize;
277 int *localptrs;
278 int cbraptr;
279 /* OVector starting point. Must be divisible by 2. */
280 int ovector_start;
281 /* Last known position of the requested byte. */
282 int req_char_ptr;
283 /* Head of the last recursion. */
284 int recursive_head;
285 /* First inspected character for partial matching. */
286 int start_used_ptr;
287 /* Starting pointer for partial soft matches. */
288 int hit_start;
289 /* End pointer of the first line. */
290 int first_line_end;
291 /* Points to the marked string. */
292 int mark_ptr;
293
294 /* Other */
295 const pcre_uint8 *fcc;
296 sljit_w lcc;
297 int mode;
298 int nltype;
299 int newline;
300 int bsr_nltype;
301 int endonly;
302 BOOL has_set_som;
303 sljit_w ctypes;
304 sljit_uw name_table;
305 sljit_w name_count;
306 sljit_w name_entry_size;
307
308 /* Labels and jump lists. */
309 struct sljit_label *partialmatchlabel;
310 struct sljit_label *leavelabel;
311 struct sljit_label *acceptlabel;
312 stub_list *stubs;
313 recurse_entry *entries;
314 recurse_entry *currententry;
315 jump_list *partialmatch;
316 jump_list *leave;
317 jump_list *accept;
318 jump_list *calllimit;
319 jump_list *stackalloc;
320 jump_list *revertframes;
321 jump_list *wordboundary;
322 jump_list *anynewline;
323 jump_list *hspace;
324 jump_list *vspace;
325 jump_list *casefulcmp;
326 jump_list *caselesscmp;
327 BOOL jscript_compat;
328 #ifdef SUPPORT_UTF
329 BOOL utf;
330 #ifdef SUPPORT_UCP
331 BOOL use_ucp;
332 #endif
333 jump_list *utfreadchar;
334 #ifdef COMPILE_PCRE8
335 jump_list *utfreadtype8;
336 #endif
337 #endif /* SUPPORT_UTF */
338 #ifdef SUPPORT_UCP
339 jump_list *getucd;
340 #endif
341 } compiler_common;
342
343 /* For byte_sequence_compare. */
344
345 typedef struct compare_context {
346 int length;
347 int sourcereg;
348 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
349 int ucharptr;
350 union {
351 sljit_i asint;
352 sljit_uh asushort;
353 #ifdef COMPILE_PCRE8
354 sljit_ub asbyte;
355 sljit_ub asuchars[4];
356 #else
357 #ifdef COMPILE_PCRE16
358 sljit_uh asuchars[2];
359 #endif
360 #endif
361 } c;
362 union {
363 sljit_i asint;
364 sljit_uh asushort;
365 #ifdef COMPILE_PCRE8
366 sljit_ub asbyte;
367 sljit_ub asuchars[4];
368 #else
369 #ifdef COMPILE_PCRE16
370 sljit_uh asuchars[2];
371 #endif
372 #endif
373 } oc;
374 #endif
375 } compare_context;
376
377 enum {
378 frame_end = 0,
379 frame_setstrbegin = -1,
380 frame_setmark = -2
381 };
382
383 /* Undefine sljit macros. */
384 #undef CMP
385
386 /* Used for accessing the elements of the stack. */
387 #define STACK(i) ((-(i) - 1) * (int)sizeof(sljit_w))
388
389 #define TMP1 SLJIT_TEMPORARY_REG1
390 #define TMP2 SLJIT_TEMPORARY_REG3
391 #define TMP3 SLJIT_TEMPORARY_EREG2
392 #define STR_PTR SLJIT_SAVED_REG1
393 #define STR_END SLJIT_SAVED_REG2
394 #define STACK_TOP SLJIT_TEMPORARY_REG2
395 #define STACK_LIMIT SLJIT_SAVED_REG3
396 #define ARGUMENTS SLJIT_SAVED_EREG1
397 #define CALL_COUNT SLJIT_SAVED_EREG2
398 #define RETURN_ADDR SLJIT_TEMPORARY_EREG1
399
400 /* Locals layout. */
401 /* These two locals can be used by the current opcode. */
402 #define LOCALS0 (0 * sizeof(sljit_w))
403 #define LOCALS1 (1 * sizeof(sljit_w))
404 /* Two local variables for possessive quantifiers (char1 cannot use them). */
405 #define POSSESSIVE0 (2 * sizeof(sljit_w))
406 #define POSSESSIVE1 (3 * sizeof(sljit_w))
407 /* Max limit of recursions. */
408 #define CALL_LIMIT (4 * sizeof(sljit_w))
409 /* The output vector is stored on the stack, and contains pointers
410 to characters. The vector data is divided into two groups: the first
411 group contains the start / end character pointers, and the second is
412 the start pointers when the end of the capturing group has not yet reached. */
413 #define OVECTOR_START (common->ovector_start)
414 #define OVECTOR(i) (OVECTOR_START + (i) * sizeof(sljit_w))
415 #define OVECTOR_PRIV(i) (common->cbraptr + (i) * sizeof(sljit_w))
416 #define PRIV_DATA(cc) (common->localptrs[(cc) - common->start])
417
418 #ifdef COMPILE_PCRE8
419 #define MOV_UCHAR SLJIT_MOV_UB
420 #define MOVU_UCHAR SLJIT_MOVU_UB
421 #else
422 #ifdef COMPILE_PCRE16
423 #define MOV_UCHAR SLJIT_MOV_UH
424 #define MOVU_UCHAR SLJIT_MOVU_UH
425 #else
426 #error Unsupported compiling mode
427 #endif
428 #endif
429
430 /* Shortcuts. */
431 #define DEFINE_COMPILER \
432 struct sljit_compiler *compiler = common->compiler
433 #define OP1(op, dst, dstw, src, srcw) \
434 sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw))
435 #define OP2(op, dst, dstw, src1, src1w, src2, src2w) \
436 sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w))
437 #define LABEL() \
438 sljit_emit_label(compiler)
439 #define JUMP(type) \
440 sljit_emit_jump(compiler, (type))
441 #define JUMPTO(type, label) \
442 sljit_set_label(sljit_emit_jump(compiler, (type)), (label))
443 #define JUMPHERE(jump) \
444 sljit_set_label((jump), sljit_emit_label(compiler))
445 #define CMP(type, src1, src1w, src2, src2w) \
446 sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))
447 #define CMPTO(type, src1, src1w, src2, src2w, label) \
448 sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))
449 #define COND_VALUE(op, dst, dstw, type) \
450 sljit_emit_cond_value(compiler, (op), (dst), (dstw), (type))
451
452 static pcre_uchar* bracketend(pcre_uchar* cc)
453 {
454 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
455 do cc += GET(cc, 1); while (*cc == OP_ALT);
456 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
457 cc += 1 + LINK_SIZE;
458 return cc;
459 }
460
461 /* Functions whose might need modification for all new supported opcodes:
462 next_opcode
463 get_localspace
464 set_localptrs
465 get_framesize
466 init_frame
467 get_localsize
468 copy_locals
469 compile_hotpath
470 compile_fallbackpath
471 */
472
473 static pcre_uchar *next_opcode(compiler_common *common, pcre_uchar *cc)
474 {
475 SLJIT_UNUSED_ARG(common);
476 switch(*cc)
477 {
478 case OP_SOD:
479 case OP_SOM:
480 case OP_SET_SOM:
481 case OP_NOT_WORD_BOUNDARY:
482 case OP_WORD_BOUNDARY:
483 case OP_NOT_DIGIT:
484 case OP_DIGIT:
485 case OP_NOT_WHITESPACE:
486 case OP_WHITESPACE:
487 case OP_NOT_WORDCHAR:
488 case OP_WORDCHAR:
489 case OP_ANY:
490 case OP_ALLANY:
491 case OP_ANYNL:
492 case OP_NOT_HSPACE:
493 case OP_HSPACE:
494 case OP_NOT_VSPACE:
495 case OP_VSPACE:
496 case OP_EXTUNI:
497 case OP_EODN:
498 case OP_EOD:
499 case OP_CIRC:
500 case OP_CIRCM:
501 case OP_DOLL:
502 case OP_DOLLM:
503 case OP_TYPESTAR:
504 case OP_TYPEMINSTAR:
505 case OP_TYPEPLUS:
506 case OP_TYPEMINPLUS:
507 case OP_TYPEQUERY:
508 case OP_TYPEMINQUERY:
509 case OP_TYPEPOSSTAR:
510 case OP_TYPEPOSPLUS:
511 case OP_TYPEPOSQUERY:
512 case OP_CRSTAR:
513 case OP_CRMINSTAR:
514 case OP_CRPLUS:
515 case OP_CRMINPLUS:
516 case OP_CRQUERY:
517 case OP_CRMINQUERY:
518 case OP_DEF:
519 case OP_BRAZERO:
520 case OP_BRAMINZERO:
521 case OP_BRAPOSZERO:
522 case OP_COMMIT:
523 case OP_FAIL:
524 case OP_ACCEPT:
525 case OP_ASSERT_ACCEPT:
526 case OP_SKIPZERO:
527 return cc + 1;
528
529 case OP_ANYBYTE:
530 #ifdef SUPPORT_UTF
531 if (common->utf) return NULL;
532 #endif
533 return cc + 1;
534
535 case OP_CHAR:
536 case OP_CHARI:
537 case OP_NOT:
538 case OP_NOTI:
539 case OP_STAR:
540 case OP_MINSTAR:
541 case OP_PLUS:
542 case OP_MINPLUS:
543 case OP_QUERY:
544 case OP_MINQUERY:
545 case OP_POSSTAR:
546 case OP_POSPLUS:
547 case OP_POSQUERY:
548 case OP_STARI:
549 case OP_MINSTARI:
550 case OP_PLUSI:
551 case OP_MINPLUSI:
552 case OP_QUERYI:
553 case OP_MINQUERYI:
554 case OP_POSSTARI:
555 case OP_POSPLUSI:
556 case OP_POSQUERYI:
557 case OP_NOTSTAR:
558 case OP_NOTMINSTAR:
559 case OP_NOTPLUS:
560 case OP_NOTMINPLUS:
561 case OP_NOTQUERY:
562 case OP_NOTMINQUERY:
563 case OP_NOTPOSSTAR:
564 case OP_NOTPOSPLUS:
565 case OP_NOTPOSQUERY:
566 case OP_NOTSTARI:
567 case OP_NOTMINSTARI:
568 case OP_NOTPLUSI:
569 case OP_NOTMINPLUSI:
570 case OP_NOTQUERYI:
571 case OP_NOTMINQUERYI:
572 case OP_NOTPOSSTARI:
573 case OP_NOTPOSPLUSI:
574 case OP_NOTPOSQUERYI:
575 cc += 2;
576 #ifdef SUPPORT_UTF
577 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
578 #endif
579 return cc;
580
581 case OP_UPTO:
582 case OP_MINUPTO:
583 case OP_EXACT:
584 case OP_POSUPTO:
585 case OP_UPTOI:
586 case OP_MINUPTOI:
587 case OP_EXACTI:
588 case OP_POSUPTOI:
589 case OP_NOTUPTO:
590 case OP_NOTMINUPTO:
591 case OP_NOTEXACT:
592 case OP_NOTPOSUPTO:
593 case OP_NOTUPTOI:
594 case OP_NOTMINUPTOI:
595 case OP_NOTEXACTI:
596 case OP_NOTPOSUPTOI:
597 cc += 2 + IMM2_SIZE;
598 #ifdef SUPPORT_UTF
599 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
600 #endif
601 return cc;
602
603 case OP_NOTPROP:
604 case OP_PROP:
605 return cc + 1 + 2;
606
607 case OP_TYPEUPTO:
608 case OP_TYPEMINUPTO:
609 case OP_TYPEEXACT:
610 case OP_TYPEPOSUPTO:
611 case OP_REF:
612 case OP_REFI:
613 case OP_CREF:
614 case OP_NCREF:
615 case OP_RREF:
616 case OP_NRREF:
617 case OP_CLOSE:
618 cc += 1 + IMM2_SIZE;
619 return cc;
620
621 case OP_CRRANGE:
622 case OP_CRMINRANGE:
623 return cc + 1 + 2 * IMM2_SIZE;
624
625 case OP_CLASS:
626 case OP_NCLASS:
627 return cc + 1 + 32 / sizeof(pcre_uchar);
628
629 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
630 case OP_XCLASS:
631 return cc + GET(cc, 1);
632 #endif
633
634 case OP_RECURSE:
635 case OP_ASSERT:
636 case OP_ASSERT_NOT:
637 case OP_ASSERTBACK:
638 case OP_ASSERTBACK_NOT:
639 case OP_REVERSE:
640 case OP_ONCE:
641 case OP_ONCE_NC:
642 case OP_BRA:
643 case OP_BRAPOS:
644 case OP_COND:
645 case OP_SBRA:
646 case OP_SBRAPOS:
647 case OP_SCOND:
648 case OP_ALT:
649 case OP_KET:
650 case OP_KETRMAX:
651 case OP_KETRMIN:
652 case OP_KETRPOS:
653 return cc + 1 + LINK_SIZE;
654
655 case OP_CBRA:
656 case OP_CBRAPOS:
657 case OP_SCBRA:
658 case OP_SCBRAPOS:
659 return cc + 1 + LINK_SIZE + IMM2_SIZE;
660
661 case OP_MARK:
662 return cc + 1 + 2 + cc[1];
663
664 default:
665 return NULL;
666 }
667 }
668
669 static int get_localspace(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
670 {
671 int localspace = 0;
672 pcre_uchar *alternative;
673 /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
674 while (cc < ccend)
675 {
676 switch(*cc)
677 {
678 case OP_SET_SOM:
679 common->has_set_som = TRUE;
680 cc += 1;
681 break;
682
683 case OP_ASSERT:
684 case OP_ASSERT_NOT:
685 case OP_ASSERTBACK:
686 case OP_ASSERTBACK_NOT:
687 case OP_ONCE:
688 case OP_ONCE_NC:
689 case OP_BRAPOS:
690 case OP_SBRA:
691 case OP_SBRAPOS:
692 case OP_SCOND:
693 localspace += sizeof(sljit_w);
694 cc += 1 + LINK_SIZE;
695 break;
696
697 case OP_CBRAPOS:
698 case OP_SCBRAPOS:
699 localspace += sizeof(sljit_w);
700 cc += 1 + LINK_SIZE + IMM2_SIZE;
701 break;
702
703 case OP_COND:
704 /* Might be a hidden SCOND. */
705 alternative = cc + GET(cc, 1);
706 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
707 localspace += sizeof(sljit_w);
708 cc += 1 + LINK_SIZE;
709 break;
710
711 case OP_RECURSE:
712 /* Set its value only once. */
713 if (common->recursive_head == 0)
714 {
715 common->recursive_head = common->ovector_start;
716 common->ovector_start += sizeof(sljit_w);
717 }
718 cc += 1 + LINK_SIZE;
719 break;
720
721 case OP_MARK:
722 if (common->mark_ptr == 0)
723 {
724 common->mark_ptr = common->ovector_start;
725 common->ovector_start += sizeof(sljit_w);
726 }
727 cc += 1 + 2 + cc[1];
728 break;
729
730 default:
731 cc = next_opcode(common, cc);
732 if (cc == NULL)
733 return -1;
734 break;
735 }
736 }
737 return localspace;
738 }
739
740 static void set_localptrs(compiler_common *common, int localptr, pcre_uchar *ccend)
741 {
742 pcre_uchar *cc = common->start;
743 pcre_uchar *alternative;
744 while (cc < ccend)
745 {
746 switch(*cc)
747 {
748 case OP_ASSERT:
749 case OP_ASSERT_NOT:
750 case OP_ASSERTBACK:
751 case OP_ASSERTBACK_NOT:
752 case OP_ONCE:
753 case OP_ONCE_NC:
754 case OP_BRAPOS:
755 case OP_SBRA:
756 case OP_SBRAPOS:
757 case OP_SCOND:
758 common->localptrs[cc - common->start] = localptr;
759 localptr += sizeof(sljit_w);
760 cc += 1 + LINK_SIZE;
761 break;
762
763 case OP_CBRAPOS:
764 case OP_SCBRAPOS:
765 common->localptrs[cc - common->start] = localptr;
766 localptr += sizeof(sljit_w);
767 cc += 1 + LINK_SIZE + IMM2_SIZE;
768 break;
769
770 case OP_COND:
771 /* Might be a hidden SCOND. */
772 alternative = cc + GET(cc, 1);
773 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
774 {
775 common->localptrs[cc - common->start] = localptr;
776 localptr += sizeof(sljit_w);
777 }
778 cc += 1 + LINK_SIZE;
779 break;
780
781 default:
782 cc = next_opcode(common, cc);
783 SLJIT_ASSERT(cc != NULL);
784 break;
785 }
786 }
787 }
788
789 /* Returns with -1 if no need for frame. */
790 static int get_framesize(compiler_common *common, pcre_uchar *cc, BOOL recursive)
791 {
792 pcre_uchar *ccend = bracketend(cc);
793 int length = 0;
794 BOOL possessive = FALSE;
795 BOOL setsom_found = recursive;
796 BOOL setmark_found = recursive;
797
798 if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))
799 {
800 length = 3;
801 possessive = TRUE;
802 }
803
804 cc = next_opcode(common, cc);
805 SLJIT_ASSERT(cc != NULL);
806 while (cc < ccend)
807 switch(*cc)
808 {
809 case OP_SET_SOM:
810 SLJIT_ASSERT(common->has_set_som);
811 if (!setsom_found)
812 {
813 length += 2;
814 setsom_found = TRUE;
815 }
816 cc += 1;
817 break;
818
819 case OP_MARK:
820 SLJIT_ASSERT(common->mark_ptr != 0);
821 if (!setmark_found)
822 {
823 length += 2;
824 setmark_found = TRUE;
825 }
826 cc += 1 + 2 + cc[1];
827 break;
828
829 case OP_RECURSE:
830 if (common->has_set_som && !setsom_found)
831 {
832 length += 2;
833 setsom_found = TRUE;
834 }
835 if (common->mark_ptr != 0 && !setmark_found)
836 {
837 length += 2;
838 setmark_found = TRUE;
839 }
840 cc += 1 + LINK_SIZE;
841 break;
842
843 case OP_CBRA:
844 case OP_CBRAPOS:
845 case OP_SCBRA:
846 case OP_SCBRAPOS:
847 length += 3;
848 cc += 1 + LINK_SIZE + IMM2_SIZE;
849 break;
850
851 default:
852 cc = next_opcode(common, cc);
853 SLJIT_ASSERT(cc != NULL);
854 break;
855 }
856
857 /* Possessive quantifiers can use a special case. */
858 if (SLJIT_UNLIKELY(possessive) && length == 3)
859 return -1;
860
861 if (length > 0)
862 return length + 1;
863 return -1;
864 }
865
866 static void init_frame(compiler_common *common, pcre_uchar *cc, int stackpos, int stacktop, BOOL recursive)
867 {
868 DEFINE_COMPILER;
869 pcre_uchar *ccend = bracketend(cc);
870 BOOL setsom_found = recursive;
871 BOOL setmark_found = recursive;
872 int offset;
873
874 /* >= 1 + shortest item size (2) */
875 SLJIT_UNUSED_ARG(stacktop);
876 SLJIT_ASSERT(stackpos >= stacktop + 2);
877
878 stackpos = STACK(stackpos);
879 if (recursive || (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS))
880 cc = next_opcode(common, cc);
881 SLJIT_ASSERT(cc != NULL);
882 while (cc < ccend)
883 switch(*cc)
884 {
885 case OP_SET_SOM:
886 SLJIT_ASSERT(common->has_set_som);
887 if (!setsom_found)
888 {
889 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
890 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_setstrbegin);
891 stackpos += (int)sizeof(sljit_w);
892 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
893 stackpos += (int)sizeof(sljit_w);
894 setsom_found = TRUE;
895 }
896 cc += 1;
897 break;
898
899 case OP_MARK:
900 SLJIT_ASSERT(common->mark_ptr != 0);
901 if (!setmark_found)
902 {
903 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
904 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_setmark);
905 stackpos += (int)sizeof(sljit_w);
906 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
907 stackpos += (int)sizeof(sljit_w);
908 setmark_found = TRUE;
909 }
910 cc += 1 + 2 + cc[1];
911 break;
912
913 case OP_RECURSE:
914 if (common->has_set_som && !setsom_found)
915 {
916 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
917 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_setstrbegin);
918 stackpos += (int)sizeof(sljit_w);
919 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
920 stackpos += (int)sizeof(sljit_w);
921 setsom_found = TRUE;
922 }
923 if (common->mark_ptr != 0 && !setmark_found)
924 {
925 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
926 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_setmark);
927 stackpos += (int)sizeof(sljit_w);
928 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
929 stackpos += (int)sizeof(sljit_w);
930 setmark_found = TRUE;
931 }
932 cc += 1 + LINK_SIZE;
933 break;
934
935 case OP_CBRA:
936 case OP_CBRAPOS:
937 case OP_SCBRA:
938 case OP_SCBRAPOS:
939 offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
940 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));
941 stackpos += (int)sizeof(sljit_w);
942 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
943 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
944 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
945 stackpos += (int)sizeof(sljit_w);
946 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
947 stackpos += (int)sizeof(sljit_w);
948
949 cc += 1 + LINK_SIZE + IMM2_SIZE;
950 break;
951
952 default:
953 cc = next_opcode(common, cc);
954 SLJIT_ASSERT(cc != NULL);
955 break;
956 }
957
958 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_end);
959 SLJIT_ASSERT(stackpos == STACK(stacktop));
960 }
961
962 static SLJIT_INLINE int get_localsize(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
963 {
964 int localsize = 2;
965 pcre_uchar *alternative;
966 /* Calculate the sum of the local variables. */
967 while (cc < ccend)
968 {
969 switch(*cc)
970 {
971 case OP_ASSERT:
972 case OP_ASSERT_NOT:
973 case OP_ASSERTBACK:
974 case OP_ASSERTBACK_NOT:
975 case OP_ONCE:
976 case OP_ONCE_NC:
977 case OP_BRAPOS:
978 case OP_SBRA:
979 case OP_SBRAPOS:
980 case OP_SCOND:
981 localsize++;
982 cc += 1 + LINK_SIZE;
983 break;
984
985 case OP_CBRA:
986 case OP_SCBRA:
987 localsize++;
988 cc += 1 + LINK_SIZE + IMM2_SIZE;
989 break;
990
991 case OP_CBRAPOS:
992 case OP_SCBRAPOS:
993 localsize += 2;
994 cc += 1 + LINK_SIZE + IMM2_SIZE;
995 break;
996
997 case OP_COND:
998 /* Might be a hidden SCOND. */
999 alternative = cc + GET(cc, 1);
1000 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1001 localsize++;
1002 cc += 1 + LINK_SIZE;
1003 break;
1004
1005 default:
1006 cc = next_opcode(common, cc);
1007 SLJIT_ASSERT(cc != NULL);
1008 break;
1009 }
1010 }
1011 SLJIT_ASSERT(cc == ccend);
1012 return localsize;
1013 }
1014
1015 static void copy_locals(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend,
1016 BOOL save, int stackptr, int stacktop)
1017 {
1018 DEFINE_COMPILER;
1019 int srcw[2];
1020 int count;
1021 BOOL tmp1next = TRUE;
1022 BOOL tmp1empty = TRUE;
1023 BOOL tmp2empty = TRUE;
1024 pcre_uchar *alternative;
1025 enum {
1026 start,
1027 loop,
1028 end
1029 } status;
1030
1031 status = save ? start : loop;
1032 stackptr = STACK(stackptr - 2);
1033 stacktop = STACK(stacktop - 1);
1034
1035 if (!save)
1036 {
1037 stackptr += sizeof(sljit_w);
1038 if (stackptr < stacktop)
1039 {
1040 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1041 stackptr += sizeof(sljit_w);
1042 tmp1empty = FALSE;
1043 }
1044 if (stackptr < stacktop)
1045 {
1046 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1047 stackptr += sizeof(sljit_w);
1048 tmp2empty = FALSE;
1049 }
1050 /* The tmp1next must be TRUE in either way. */
1051 }
1052
1053 while (status != end)
1054 {
1055 count = 0;
1056 switch(status)
1057 {
1058 case start:
1059 SLJIT_ASSERT(save && common->recursive_head != 0);
1060 count = 1;
1061 srcw[0] = common->recursive_head;
1062 status = loop;
1063 break;
1064
1065 case loop:
1066 if (cc >= ccend)
1067 {
1068 status = end;
1069 break;
1070 }
1071
1072 switch(*cc)
1073 {
1074 case OP_ASSERT:
1075 case OP_ASSERT_NOT:
1076 case OP_ASSERTBACK:
1077 case OP_ASSERTBACK_NOT:
1078 case OP_ONCE:
1079 case OP_ONCE_NC:
1080 case OP_BRAPOS:
1081 case OP_SBRA:
1082 case OP_SBRAPOS:
1083 case OP_SCOND:
1084 count = 1;
1085 srcw[0] = PRIV_DATA(cc);
1086 SLJIT_ASSERT(srcw[0] != 0);
1087 cc += 1 + LINK_SIZE;
1088 break;
1089
1090 case OP_CBRA:
1091 case OP_SCBRA:
1092 count = 1;
1093 srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1094 cc += 1 + LINK_SIZE + IMM2_SIZE;
1095 break;
1096
1097 case OP_CBRAPOS:
1098 case OP_SCBRAPOS:
1099 count = 2;
1100 srcw[1] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1101 srcw[0] = PRIV_DATA(cc);
1102 SLJIT_ASSERT(srcw[0] != 0);
1103 cc += 1 + LINK_SIZE + IMM2_SIZE;
1104 break;
1105
1106 case OP_COND:
1107 /* Might be a hidden SCOND. */
1108 alternative = cc + GET(cc, 1);
1109 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1110 {
1111 count = 1;
1112 srcw[0] = PRIV_DATA(cc);
1113 SLJIT_ASSERT(srcw[0] != 0);
1114 }
1115 cc += 1 + LINK_SIZE;
1116 break;
1117
1118 default:
1119 cc = next_opcode(common, cc);
1120 SLJIT_ASSERT(cc != NULL);
1121 break;
1122 }
1123 break;
1124
1125 case end:
1126 SLJIT_ASSERT_STOP();
1127 break;
1128 }
1129
1130 while (count > 0)
1131 {
1132 count--;
1133 if (save)
1134 {
1135 if (tmp1next)
1136 {
1137 if (!tmp1empty)
1138 {
1139 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1140 stackptr += sizeof(sljit_w);
1141 }
1142 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);
1143 tmp1empty = FALSE;
1144 tmp1next = FALSE;
1145 }
1146 else
1147 {
1148 if (!tmp2empty)
1149 {
1150 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1151 stackptr += sizeof(sljit_w);
1152 }
1153 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);
1154 tmp2empty = FALSE;
1155 tmp1next = TRUE;
1156 }
1157 }
1158 else
1159 {
1160 if (tmp1next)
1161 {
1162 SLJIT_ASSERT(!tmp1empty);
1163 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP1, 0);
1164 tmp1empty = stackptr >= stacktop;
1165 if (!tmp1empty)
1166 {
1167 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1168 stackptr += sizeof(sljit_w);
1169 }
1170 tmp1next = FALSE;
1171 }
1172 else
1173 {
1174 SLJIT_ASSERT(!tmp2empty);
1175 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP2, 0);
1176 tmp2empty = stackptr >= stacktop;
1177 if (!tmp2empty)
1178 {
1179 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1180 stackptr += sizeof(sljit_w);
1181 }
1182 tmp1next = TRUE;
1183 }
1184 }
1185 }
1186 }
1187
1188 if (save)
1189 {
1190 if (tmp1next)
1191 {
1192 if (!tmp1empty)
1193 {
1194 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1195 stackptr += sizeof(sljit_w);
1196 }
1197 if (!tmp2empty)
1198 {
1199 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1200 stackptr += sizeof(sljit_w);
1201 }
1202 }
1203 else
1204 {
1205 if (!tmp2empty)
1206 {
1207 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1208 stackptr += sizeof(sljit_w);
1209 }
1210 if (!tmp1empty)
1211 {
1212 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1213 stackptr += sizeof(sljit_w);
1214 }
1215 }
1216 }
1217 SLJIT_ASSERT(cc == ccend && stackptr == stacktop && (save || (tmp1empty && tmp2empty)));
1218 }
1219
1220 static SLJIT_INLINE BOOL ispowerof2(unsigned int value)
1221 {
1222 return (value & (value - 1)) == 0;
1223 }
1224
1225 static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label)
1226 {
1227 while (list)
1228 {
1229 /* sljit_set_label is clever enough to do nothing
1230 if either the jump or the label is NULL */
1231 sljit_set_label(list->jump, label);
1232 list = list->next;
1233 }
1234 }
1235
1236 static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump* jump)
1237 {
1238 jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list));
1239 if (list_item)
1240 {
1241 list_item->next = *list;
1242 list_item->jump = jump;
1243 *list = list_item;
1244 }
1245 }
1246
1247 static void add_stub(compiler_common *common, enum stub_types type, int data, struct sljit_jump *start)
1248 {
1249 DEFINE_COMPILER;
1250 stub_list* list_item = sljit_alloc_memory(compiler, sizeof(stub_list));
1251
1252 if (list_item)
1253 {
1254 list_item->type = type;
1255 list_item->data = data;
1256 list_item->start = start;
1257 list_item->leave = LABEL();
1258 list_item->next = common->stubs;
1259 common->stubs = list_item;
1260 }
1261 }
1262
1263 static void flush_stubs(compiler_common *common)
1264 {
1265 DEFINE_COMPILER;
1266 stub_list* list_item = common->stubs;
1267
1268 while (list_item)
1269 {
1270 JUMPHERE(list_item->start);
1271 switch(list_item->type)
1272 {
1273 case stack_alloc:
1274 add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));
1275 break;
1276 }
1277 JUMPTO(SLJIT_JUMP, list_item->leave);
1278 list_item = list_item->next;
1279 }
1280 common->stubs = NULL;
1281 }
1282
1283 static SLJIT_INLINE void decrease_call_count(compiler_common *common)
1284 {
1285 DEFINE_COMPILER;
1286
1287 OP2(SLJIT_SUB | SLJIT_SET_E, CALL_COUNT, 0, CALL_COUNT, 0, SLJIT_IMM, 1);
1288 add_jump(compiler, &common->calllimit, JUMP(SLJIT_C_ZERO));
1289 }
1290
1291 static SLJIT_INLINE void allocate_stack(compiler_common *common, int size)
1292 {
1293 /* May destroy all locals and registers except TMP2. */
1294 DEFINE_COMPILER;
1295
1296 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_w));
1297 #ifdef DESTROY_REGISTERS
1298 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);
1299 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
1300 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
1301 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, TMP1, 0);
1302 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);
1303 #endif
1304 add_stub(common, stack_alloc, 0, CMP(SLJIT_C_GREATER, STACK_TOP, 0, STACK_LIMIT, 0));
1305 }
1306
1307 static SLJIT_INLINE void free_stack(compiler_common *common, int size)
1308 {
1309 DEFINE_COMPILER;
1310 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_w));
1311 }
1312
1313 static SLJIT_INLINE void reset_ovector(compiler_common *common, int length)
1314 {
1315 DEFINE_COMPILER;
1316 struct sljit_label *loop;
1317 int i;
1318 /* At this point we can freely use all temporary registers. */
1319 /* TMP1 returns with begin - 1. */
1320 OP2(SLJIT_SUB, SLJIT_TEMPORARY_REG1, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));
1321 if (length < 8)
1322 {
1323 for (i = 0; i < length; i++)
1324 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(i), SLJIT_TEMPORARY_REG1, 0);
1325 }
1326 else
1327 {
1328 OP2(SLJIT_ADD, SLJIT_TEMPORARY_REG2, 0, SLJIT_LOCALS_REG, 0, SLJIT_IMM, OVECTOR_START - sizeof(sljit_w));
1329 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, length);
1330 loop = LABEL();
1331 OP1(SLJIT_MOVU, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), sizeof(sljit_w), SLJIT_TEMPORARY_REG1, 0);
1332 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_TEMPORARY_REG3, 0, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, 1);
1333 JUMPTO(SLJIT_C_NOT_ZERO, loop);
1334 }
1335 }
1336
1337 static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)
1338 {
1339 DEFINE_COMPILER;
1340 struct sljit_label *loop;
1341 struct sljit_jump *earlyexit;
1342
1343 /* At this point we can freely use all registers. */
1344 OP1(SLJIT_MOV, SLJIT_SAVED_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
1345 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1), STR_PTR, 0);
1346
1347 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG1, 0, ARGUMENTS, 0);
1348 if (common->mark_ptr != 0)
1349 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
1350 OP1(SLJIT_MOV_SI, SLJIT_TEMPORARY_REG2, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), SLJIT_OFFSETOF(jit_arguments, offsetcount));
1351 if (common->mark_ptr != 0)
1352 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_TEMPORARY_REG3, 0);
1353 OP2(SLJIT_SUB, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), SLJIT_OFFSETOF(jit_arguments, offsets), SLJIT_IMM, sizeof(int));
1354 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG1, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), SLJIT_OFFSETOF(jit_arguments, begin));
1355 OP2(SLJIT_ADD, SLJIT_SAVED_REG1, 0, SLJIT_LOCALS_REG, 0, SLJIT_IMM, OVECTOR_START);
1356 /* Unlikely, but possible */
1357 earlyexit = CMP(SLJIT_C_EQUAL, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, 0);
1358 loop = LABEL();
1359 OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), 0, SLJIT_TEMPORARY_REG1, 0);
1360 OP2(SLJIT_ADD, SLJIT_SAVED_REG1, 0, SLJIT_SAVED_REG1, 0, SLJIT_IMM, sizeof(sljit_w));
1361 /* Copy the integer value to the output buffer */
1362 #ifdef COMPILE_PCRE16
1363 OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, 1);
1364 #endif
1365 OP1(SLJIT_MOVU_SI, SLJIT_MEM1(SLJIT_TEMPORARY_REG3), sizeof(int), SLJIT_SAVED_REG2, 0);
1366 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_TEMPORARY_REG2, 0, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, 1);
1367 JUMPTO(SLJIT_C_NOT_ZERO, loop);
1368 JUMPHERE(earlyexit);
1369
1370 /* Calculate the return value, which is the maximum ovector value. */
1371 if (topbracket > 1)
1372 {
1373 OP2(SLJIT_ADD, SLJIT_TEMPORARY_REG1, 0, SLJIT_LOCALS_REG, 0, SLJIT_IMM, OVECTOR_START + topbracket * 2 * sizeof(sljit_w));
1374 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, topbracket + 1);
1375
1376 /* OVECTOR(0) is never equal to SLJIT_SAVED_REG3. */
1377 loop = LABEL();
1378 OP1(SLJIT_MOVU, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), -(2 * (sljit_w)sizeof(sljit_w)));
1379 OP2(SLJIT_SUB, SLJIT_TEMPORARY_REG2, 0, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, 1);
1380 CMPTO(SLJIT_C_EQUAL, SLJIT_TEMPORARY_REG3, 0, SLJIT_SAVED_REG3, 0, loop);
1381 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_TEMPORARY_REG2, 0);
1382 }
1383 else
1384 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
1385 }
1386
1387 static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *leave)
1388 {
1389 DEFINE_COMPILER;
1390
1391 SLJIT_COMPILE_ASSERT(STR_END == SLJIT_SAVED_REG2, str_end_must_be_saved_reg2);
1392 SLJIT_ASSERT(common->start_used_ptr != 0 && (common->mode == JIT_PARTIAL_SOFT_COMPILE ? common->hit_start != 0 : common->hit_start == 0));
1393
1394 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG2, 0, ARGUMENTS, 0);
1395 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_PARTIAL);
1396 OP1(SLJIT_MOV_SI, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), SLJIT_OFFSETOF(jit_arguments, offsetcount));
1397 CMPTO(SLJIT_C_LESS, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, 2, leave);
1398
1399 /* Store match begin and end. */
1400 OP1(SLJIT_MOV, SLJIT_SAVED_REG1, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), SLJIT_OFFSETOF(jit_arguments, begin));
1401 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG2, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), SLJIT_OFFSETOF(jit_arguments, offsets));
1402 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_used_ptr : common->hit_start);
1403 OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, STR_END, 0, SLJIT_SAVED_REG1, 0);
1404 #ifdef COMPILE_PCRE16
1405 OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, 1);
1406 #endif
1407 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), sizeof(int), SLJIT_SAVED_REG2, 0);
1408
1409 OP2(SLJIT_SUB, SLJIT_TEMPORARY_REG3, 0, SLJIT_TEMPORARY_REG3, 0, SLJIT_SAVED_REG1, 0);
1410 #ifdef COMPILE_PCRE16
1411 OP2(SLJIT_ASHR, SLJIT_TEMPORARY_REG3, 0, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, 1);
1412 #endif
1413 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), 0, SLJIT_TEMPORARY_REG3, 0);
1414
1415 JUMPTO(SLJIT_JUMP, leave);
1416 }
1417
1418 static SLJIT_INLINE void check_start_used_ptr(compiler_common *common)
1419 {
1420 /* May destroy TMP1. */
1421 DEFINE_COMPILER;
1422 struct sljit_jump *jump;
1423
1424 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
1425 {
1426 /* The value of -1 must be kept for start_used_ptr! */
1427 OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, 1);
1428 /* Jumps if start_used_ptr < STR_PTR, or start_used_ptr == -1. Although overwriting
1429 is not necessary if start_used_ptr == STR_PTR, it does not hurt as well. */
1430 jump = CMP(SLJIT_C_LESS_EQUAL, TMP1, 0, STR_PTR, 0);
1431 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
1432 JUMPHERE(jump);
1433 }
1434 else if (common->mode == JIT_PARTIAL_HARD_COMPILE)
1435 {
1436 jump = CMP(SLJIT_C_LESS_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
1437 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
1438 JUMPHERE(jump);
1439 }
1440 }
1441
1442 static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, pcre_uchar* cc)
1443 {
1444 /* Detects if the character has an othercase. */
1445 unsigned int c;
1446
1447 #ifdef SUPPORT_UTF
1448 if (common->utf)
1449 {
1450 GETCHAR(c, cc);
1451 if (c > 127)
1452 {
1453 #ifdef SUPPORT_UCP
1454 return c != UCD_OTHERCASE(c);
1455 #else
1456 return FALSE;
1457 #endif
1458 }
1459 #ifndef COMPILE_PCRE8
1460 return common->fcc[c] != c;
1461 #endif
1462 }
1463 else
1464 #endif
1465 c = *cc;
1466 return MAX_255(c) ? common->fcc[c] != c : FALSE;
1467 }
1468
1469 static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)
1470 {
1471 /* Returns with the othercase. */
1472 #ifdef SUPPORT_UTF
1473 if (common->utf && c > 127)
1474 {
1475 #ifdef SUPPORT_UCP
1476 return UCD_OTHERCASE(c);
1477 #else
1478 return c;
1479 #endif
1480 }
1481 #endif
1482 return TABLE_GET(c, common->fcc, c);
1483 }
1484
1485 static unsigned int char_get_othercase_bit(compiler_common *common, pcre_uchar* cc)
1486 {
1487 /* Detects if the character and its othercase has only 1 bit difference. */
1488 unsigned int c, oc, bit;
1489 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
1490 int n;
1491 #endif
1492
1493 #ifdef SUPPORT_UTF
1494 if (common->utf)
1495 {
1496 GETCHAR(c, cc);
1497 if (c <= 127)
1498 oc = common->fcc[c];
1499 else
1500 {
1501 #ifdef SUPPORT_UCP
1502 oc = UCD_OTHERCASE(c);
1503 #else
1504 oc = c;
1505 #endif
1506 }
1507 }
1508 else
1509 {
1510 c = *cc;
1511 oc = TABLE_GET(c, common->fcc, c);
1512 }
1513 #else
1514 c = *cc;
1515 oc = TABLE_GET(c, common->fcc, c);
1516 #endif
1517
1518 SLJIT_ASSERT(c != oc);
1519
1520 bit = c ^ oc;
1521 /* Optimized for English alphabet. */
1522 if (c <= 127 && bit == 0x20)
1523 return (0 << 8) | 0x20;
1524
1525 /* Since c != oc, they must have at least 1 bit difference. */
1526 if (!ispowerof2(bit))
1527 return 0;
1528
1529 #ifdef COMPILE_PCRE8
1530
1531 #ifdef SUPPORT_UTF
1532 if (common->utf && c > 127)
1533 {
1534 n = GET_EXTRALEN(*cc);
1535 while ((bit & 0x3f) == 0)
1536 {
1537 n--;
1538 bit >>= 6;
1539 }
1540 return (n << 8) | bit;
1541 }
1542 #endif /* SUPPORT_UTF */
1543 return (0 << 8) | bit;
1544
1545 #else /* COMPILE_PCRE8 */
1546
1547 #ifdef COMPILE_PCRE16
1548 #ifdef SUPPORT_UTF
1549 if (common->utf && c > 65535)
1550 {
1551 if (bit >= (1 << 10))
1552 bit >>= 10;
1553 else
1554 return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8));
1555 }
1556 #endif /* SUPPORT_UTF */
1557 return (bit < 256) ? ((0 << 8) | bit) : ((1 << 8) | (bit >> 8));
1558 #endif /* COMPILE_PCRE16 */
1559
1560 #endif /* COMPILE_PCRE8 */
1561 }
1562
1563 static void check_partial(compiler_common *common, BOOL force)
1564 {
1565 /* Checks whether a partial matching is occured. Does not modify registers. */
1566 DEFINE_COMPILER;
1567 struct sljit_jump *jump = NULL;
1568
1569 SLJIT_ASSERT(!force || common->mode != JIT_COMPILE);
1570
1571 if (common->mode == JIT_COMPILE)
1572 return;
1573
1574 if (!force)
1575 jump = CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
1576 else if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
1577 jump = CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, -1);
1578
1579 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
1580 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, -1);
1581 else
1582 {
1583 if (common->partialmatchlabel != NULL)
1584 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
1585 else
1586 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
1587 }
1588
1589 if (jump != NULL)
1590 JUMPHERE(jump);
1591 }
1592
1593 static struct sljit_jump *check_str_end(compiler_common *common)
1594 {
1595 /* Does not affect registers. Usually used in a tight spot. */
1596 DEFINE_COMPILER;
1597 struct sljit_jump *jump;
1598 struct sljit_jump *nohit;
1599 struct sljit_jump *return_value;
1600
1601 if (common->mode == JIT_COMPILE)
1602 return CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
1603
1604 jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
1605 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
1606 {
1607 nohit = CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
1608 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, -1);
1609 JUMPHERE(nohit);
1610 return_value = JUMP(SLJIT_JUMP);
1611 }
1612 else
1613 {
1614 return_value = CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
1615 if (common->partialmatchlabel != NULL)
1616 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
1617 else
1618 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
1619 }
1620 JUMPHERE(jump);
1621 return return_value;
1622 }
1623
1624 static void fallback_at_str_end(compiler_common *common, jump_list **fallbacks)
1625 {
1626 DEFINE_COMPILER;
1627 struct sljit_jump *jump;
1628
1629 if (common->mode == JIT_COMPILE)
1630 {
1631 add_jump(compiler, fallbacks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
1632 return;
1633 }
1634
1635 /* Partial matching mode. */
1636 jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
1637 add_jump(compiler, fallbacks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0));
1638 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
1639 {
1640 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, -1);
1641 add_jump(compiler, fallbacks, JUMP(SLJIT_JUMP));
1642 }
1643 else
1644 {
1645 if (common->partialmatchlabel != NULL)
1646 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
1647 else
1648 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
1649 }
1650 JUMPHERE(jump);
1651 }
1652
1653 static void read_char(compiler_common *common)
1654 {
1655 /* Reads the character into TMP1, updates STR_PTR.
1656 Does not check STR_END. TMP2 Destroyed. */
1657 DEFINE_COMPILER;
1658 #ifdef SUPPORT_UTF
1659 struct sljit_jump *jump;
1660 #endif
1661
1662 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
1663 #ifdef SUPPORT_UTF
1664 if (common->utf)
1665 {
1666 #ifdef COMPILE_PCRE8
1667 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
1668 #else
1669 #ifdef COMPILE_PCRE16
1670 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
1671 #endif
1672 #endif /* COMPILE_PCRE8 */
1673 add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
1674 JUMPHERE(jump);
1675 }
1676 #endif
1677 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1678 }
1679
1680 static void peek_char(compiler_common *common)
1681 {
1682 /* Reads the character into TMP1, keeps STR_PTR.
1683 Does not check STR_END. TMP2 Destroyed. */
1684 DEFINE_COMPILER;
1685 #ifdef SUPPORT_UTF
1686 struct sljit_jump *jump;
1687 #endif
1688
1689 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
1690 #ifdef SUPPORT_UTF
1691 if (common->utf)
1692 {
1693 #ifdef COMPILE_PCRE8
1694 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
1695 #else
1696 #ifdef COMPILE_PCRE16
1697 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
1698 #endif
1699 #endif /* COMPILE_PCRE8 */
1700 add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
1701 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
1702 JUMPHERE(jump);
1703 }
1704 #endif
1705 }
1706
1707 static void read_char8_type(compiler_common *common)
1708 {
1709 /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */
1710 DEFINE_COMPILER;
1711 #if defined SUPPORT_UTF || defined COMPILE_PCRE16
1712 struct sljit_jump *jump;
1713 #endif
1714
1715 #ifdef SUPPORT_UTF
1716 if (common->utf)
1717 {
1718 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
1719 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1720 #ifdef COMPILE_PCRE8
1721 /* This can be an extra read in some situations, but hopefully
1722 it is needed in most cases. */
1723 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
1724 jump = CMP(SLJIT_C_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
1725 add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));
1726 JUMPHERE(jump);
1727 #else
1728 #ifdef COMPILE_PCRE16
1729 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
1730 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
1731 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
1732 JUMPHERE(jump);
1733 /* Skip low surrogate if necessary. */
1734 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xfc00);
1735 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0xd800);
1736 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
1737 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
1738 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
1739 #endif
1740 #endif /* COMPILE_PCRE8 */
1741 return;
1742 }
1743 #endif
1744 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
1745 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1746 #ifdef COMPILE_PCRE16
1747 /* The ctypes array contains only 256 values. */
1748 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
1749 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
1750 #endif
1751 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
1752 #ifdef COMPILE_PCRE16
1753 JUMPHERE(jump);
1754 #endif
1755 }
1756
1757 static void skip_char_back(compiler_common *common)
1758 {
1759 /* Goes one character back. Affects STR_PTR and TMP1. Does not check begin. */
1760 DEFINE_COMPILER;
1761 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
1762 struct sljit_label *label;
1763
1764 if (common->utf)
1765 {
1766 label = LABEL();
1767 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
1768 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1769 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
1770 CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);
1771 return;
1772 }
1773 #endif
1774 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
1775 if (common->utf)
1776 {
1777 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
1778 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1779 /* Skip low surrogate if necessary. */
1780 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
1781 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
1782 COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
1783 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
1784 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
1785 return;
1786 }
1787 #endif
1788 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1789 }
1790
1791 static void check_newlinechar(compiler_common *common, int nltype, jump_list **fallbacks, BOOL jumpiftrue)
1792 {
1793 /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */
1794 DEFINE_COMPILER;
1795
1796 if (nltype == NLTYPE_ANY)
1797 {
1798 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
1799 add_jump(compiler, fallbacks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
1800 }
1801 else if (nltype == NLTYPE_ANYCRLF)
1802 {
1803 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_CR);
1804 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
1805 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
1806 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
1807 add_jump(compiler, fallbacks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
1808 }
1809 else
1810 {
1811 SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);
1812 add_jump(compiler, fallbacks, CMP(jumpiftrue ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
1813 }
1814 }
1815
1816 #ifdef SUPPORT_UTF
1817
1818 #ifdef COMPILE_PCRE8
1819 static void do_utfreadchar(compiler_common *common)
1820 {
1821 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
1822 of the character (>= 0xc0). Return char value in TMP1, length - 1 in TMP2. */
1823 DEFINE_COMPILER;
1824 struct sljit_jump *jump;
1825
1826 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);
1827 /* Searching for the first zero. */
1828 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
1829 jump = JUMP(SLJIT_C_NOT_ZERO);
1830 /* Two byte sequence. */
1831 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
1832 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1833 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1f);
1834 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
1835 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
1836 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
1837 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
1838 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1839 JUMPHERE(jump);
1840
1841 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10);
1842 jump = JUMP(SLJIT_C_NOT_ZERO);
1843 /* Three byte sequence. */
1844 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
1845 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0f);
1846 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 12);
1847 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
1848 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
1849 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
1850 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
1851 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
1852 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
1853 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
1854 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(2));
1855 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1856 JUMPHERE(jump);
1857
1858 /* Four byte sequence. */
1859 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
1860 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x07);
1861 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 18);
1862 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
1863 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12);
1864 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
1865 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
1866 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
1867 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
1868 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
1869 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(3));
1870 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
1871 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
1872 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
1873 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));
1874 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1875 }
1876
1877 static void do_utfreadtype8(compiler_common *common)
1878 {
1879 /* Fast decoding a UTF-8 character type. TMP2 contains the first byte
1880 of the character (>= 0xc0). Return value in TMP1. */
1881 DEFINE_COMPILER;
1882 struct sljit_jump *jump;
1883 struct sljit_jump *compare;
1884
1885 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);
1886
1887 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20);
1888 jump = JUMP(SLJIT_C_NOT_ZERO);
1889 /* Two byte sequence. */
1890 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
1891 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1892 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
1893 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
1894 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
1895 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
1896 compare = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
1897 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
1898 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1899
1900 JUMPHERE(compare);
1901 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
1902 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1903 JUMPHERE(jump);
1904
1905 /* We only have types for characters less than 256. */
1906 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_w)PRIV(utf8_table4) - 0xc0);
1907 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
1908 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
1909 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1910 }
1911
1912 #else /* COMPILE_PCRE8 */
1913
1914 #ifdef COMPILE_PCRE16
1915 static void do_utfreadchar(compiler_common *common)
1916 {
1917 /* Fast decoding a UTF-16 character. TMP1 contains the first 16 bit char
1918 of the character (>= 0xd800). Return char value in TMP1, length - 1 in TMP2. */
1919 DEFINE_COMPILER;
1920 struct sljit_jump *jump;
1921
1922 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);
1923 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xdc00);
1924 /* Do nothing, only return. */
1925 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1926
1927 JUMPHERE(jump);
1928 /* Combine two 16 bit characters. */
1929 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
1930 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1931 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
1932 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 10);
1933 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3ff);
1934 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
1935 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
1936 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
1937 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1938 }
1939 #endif /* COMPILE_PCRE16 */
1940
1941 #endif /* COMPILE_PCRE8 */
1942
1943 #endif /* SUPPORT_UTF */
1944
1945 #ifdef SUPPORT_UCP
1946
1947 /* UCD_BLOCK_SIZE must be 128 (see the assert below). */
1948 #define UCD_BLOCK_MASK 127
1949 #define UCD_BLOCK_SHIFT 7
1950
1951 static void do_getucd(compiler_common *common)
1952 {
1953 /* Search the UCD record for the character comes in TMP1.
1954 Returns chartype in TMP1 and UCD offset in TMP2. */
1955 DEFINE_COMPILER;
1956
1957 SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 8);
1958
1959 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);
1960 OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
1961 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_w)PRIV(ucd_stage1));
1962 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
1963 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
1964 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
1965 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_w)PRIV(ucd_stage2));
1966 OP1(SLJIT_MOV_UH, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
1967 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_w)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
1968 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
1969 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1970 }
1971 #endif
1972
1973 static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common, BOOL hascrorlf, BOOL firstline)
1974 {
1975 DEFINE_COMPILER;
1976 struct sljit_label *mainloop;
1977 struct sljit_label *newlinelabel = NULL;
1978 struct sljit_jump *start;
1979 struct sljit_jump *end = NULL;
1980 struct sljit_jump *nl = NULL;
1981 #ifdef SUPPORT_UTF
1982 struct sljit_jump *singlechar;
1983 #endif
1984 jump_list *newline = NULL;
1985 BOOL newlinecheck = FALSE;
1986 BOOL readuchar = FALSE;
1987
1988 if (!(hascrorlf || firstline) && (common->nltype == NLTYPE_ANY ||
1989 common->nltype == NLTYPE_ANYCRLF || common->newline > 255))
1990 newlinecheck = TRUE;
1991
1992 if (firstline)
1993 {
1994 /* Search for the end of the first line. */
1995 SLJIT_ASSERT(common->first_line_end != 0);
1996 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STR_PTR, 0);
1997 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_END, 0);
1998
1999 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
2000 {
2001 mainloop = LABEL();
2002 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2003 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2004 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
2005 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2006 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);
2007 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);
2008 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2009 }
2010 else
2011 {
2012 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2013 mainloop = LABEL();
2014 /* Continual stores does not cause data dependency. */
2015 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0);
2016 read_char(common);
2017 check_newlinechar(common, common->nltype, &newline, TRUE);
2018 CMPTO(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0, mainloop);
2019 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0);
2020 set_jumps(newline, LABEL());
2021 }
2022
2023 JUMPHERE(end);
2024 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
2025 }
2026
2027 start = JUMP(SLJIT_JUMP);
2028
2029 if (newlinecheck)
2030 {
2031 newlinelabel = LABEL();
2032 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2033 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2034 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2035 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
2036 COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
2037 #ifdef COMPILE_PCRE16
2038 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2039 #endif
2040 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2041 nl = JUMP(SLJIT_JUMP);
2042 }
2043
2044 mainloop = LABEL();
2045
2046 /* Increasing the STR_PTR here requires one less jump in the most common case. */
2047 #ifdef SUPPORT_UTF
2048 if (common->utf) readuchar = TRUE;
2049 #endif
2050 if (newlinecheck) readuchar = TRUE;
2051
2052 if (readuchar)
2053 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2054
2055 if (newlinecheck)
2056 CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);
2057
2058 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2059 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2060 if (common->utf)
2061 {
2062 singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2063 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0);
2064 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2065 JUMPHERE(singlechar);
2066 }
2067 #endif
2068 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2069 if (common->utf)
2070 {
2071 singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
2072 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2073 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2074 COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
2075 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2076 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2077 JUMPHERE(singlechar);
2078 }
2079 #endif
2080 JUMPHERE(start);
2081
2082 if (newlinecheck)
2083 {
2084 JUMPHERE(end);
2085 JUMPHERE(nl);
2086 }
2087
2088 return mainloop;
2089 }
2090
2091 static SLJIT_INLINE void fast_forward_first_char(compiler_common *common, pcre_uchar first_char, BOOL caseless, BOOL firstline)
2092 {
2093 DEFINE_COMPILER;
2094 struct sljit_label *start;
2095 struct sljit_jump *leave;
2096 struct sljit_jump *found;
2097 pcre_uchar oc, bit;
2098
2099 if (firstline)
2100 {
2101 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, STR_END, 0);
2102 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
2103 }
2104
2105 start = LABEL();
2106 leave = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2107 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2108
2109 oc = first_char;
2110 if (caseless)
2111 {
2112 oc = TABLE_GET(first_char, common->fcc, first_char);
2113 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
2114 if (first_char > 127 && common->utf)
2115 oc = UCD_OTHERCASE(first_char);
2116 #endif
2117 }
2118 if (first_char == oc)
2119 found = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, first_char);
2120 else
2121 {
2122 bit = first_char ^ oc;
2123 if (ispowerof2(bit))
2124 {
2125 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, bit);
2126 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, first_char | bit);
2127 }
2128 else
2129 {
2130 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, first_char);
2131 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
2132 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);
2133 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
2134 found = JUMP(SLJIT_C_NOT_ZERO);
2135 }
2136 }
2137
2138 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2139 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2140 if (common->utf)
2141 {
2142 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start);
2143 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0);
2144 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2145 }
2146 #endif
2147 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2148 if (common->utf)
2149 {
2150 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800, start);
2151 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2152 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2153 COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
2154 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2155 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2156 }
2157 #endif
2158 JUMPTO(SLJIT_JUMP, start);
2159 JUMPHERE(found);
2160 JUMPHERE(leave);
2161
2162 if (firstline)
2163 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
2164 }
2165
2166 static SLJIT_INLINE void fast_forward_newline(compiler_common *common, BOOL firstline)
2167 {
2168 DEFINE_COMPILER;
2169 struct sljit_label *loop;
2170 struct sljit_jump *lastchar;
2171 struct sljit_jump *firstchar;
2172 struct sljit_jump *leave;
2173 struct sljit_jump *foundcr = NULL;
2174 struct sljit_jump *notfoundnl;
2175 jump_list *newline = NULL;
2176
2177 if (firstline)
2178 {
2179 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, STR_END, 0);
2180 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
2181 }
2182
2183 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
2184 {
2185 lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2186 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
2187 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
2188 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
2189 firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
2190
2191 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2));
2192 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0);
2193 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_GREATER_EQUAL);
2194 #ifdef COMPILE_PCRE16
2195 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
2196 #endif
2197 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2198
2199 loop = LABEL();
2200 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2201 leave = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2202 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
2203 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
2204 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop);
2205 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop);
2206
2207 JUMPHERE(leave);
2208 JUMPHERE(firstchar);
2209 JUMPHERE(lastchar);
2210
2211 if (firstline)
2212 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
2213 return;
2214 }
2215
2216 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
2217 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
2218 firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
2219 skip_char_back(common);
2220
2221 loop = LABEL();
2222 read_char(common);
2223 lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2224 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
2225 foundcr = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
2226 check_newlinechar(common, common->nltype, &newline, FALSE);
2227 set_jumps(newline, loop);
2228
2229 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
2230 {
2231 leave = JUMP(SLJIT_JUMP);
2232 JUMPHERE(foundcr);
2233 notfoundnl = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2234 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2235 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
2236 COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
2237 #ifdef COMPILE_PCRE16
2238 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2239 #endif
2240 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2241 JUMPHERE(notfoundnl);
2242 JUMPHERE(leave);
2243 }
2244 JUMPHERE(lastchar);
2245 JUMPHERE(firstchar);
2246
2247 if (firstline)
2248 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
2249 }
2250
2251 static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, sljit_uw start_bits, BOOL firstline)
2252 {
2253 DEFINE_COMPILER;
2254 struct sljit_label *start;
2255 struct sljit_jump *leave;
2256 struct sljit_jump *found;
2257 #ifndef COMPILE_PCRE8
2258 struct sljit_jump *jump;
2259 #endif
2260
2261 if (firstline)
2262 {
2263 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, STR_END, 0);
2264 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
2265 }
2266
2267 start = LABEL();
2268 leave = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2269 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2270 #ifdef SUPPORT_UTF
2271 if (common->utf)
2272 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
2273 #endif
2274 #ifndef COMPILE_PCRE8
2275 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 255);
2276 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 255);
2277 JUMPHERE(jump);
2278 #endif
2279 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
2280 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
2281 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), start_bits);
2282 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
2283 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
2284 found = JUMP(SLJIT_C_NOT_ZERO);
2285
2286 #ifdef SUPPORT_UTF
2287 if (common->utf)
2288 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
2289 #endif
2290 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2291 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2292 if (common->utf)
2293 {
2294 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start);
2295 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0);
2296 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2297 }
2298 #endif
2299 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2300 if (common->utf)
2301 {
2302 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800, start);
2303 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2304 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2305 COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
2306 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2307 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2308 }
2309 #endif
2310 JUMPTO(SLJIT_JUMP, start);
2311 JUMPHERE(found);
2312 JUMPHERE(leave);
2313
2314 if (firstline)
2315 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
2316 }
2317
2318 static SLJIT_INLINE struct sljit_jump *search_requested_char(compiler_common *common, pcre_uchar req_char, BOOL caseless, BOOL has_firstchar)
2319 {
2320 DEFINE_COMPILER;
2321 struct sljit_label *loop;
2322 struct sljit_jump *toolong;
2323 struct sljit_jump *alreadyfound;
2324 struct sljit_jump *found;
2325 struct sljit_jump *foundoc = NULL;
2326 struct sljit_jump *notfound;
2327 pcre_uchar oc, bit;
2328
2329 SLJIT_ASSERT(common->req_char_ptr != 0);
2330 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr);
2331 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, REQ_BYTE_MAX);
2332 toolong = CMP(SLJIT_C_LESS, TMP1, 0, STR_END, 0);
2333 alreadyfound = CMP(SLJIT_C_LESS, STR_PTR, 0, TMP2, 0);
2334
2335 if (has_firstchar)
2336 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2337 else
2338 OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);
2339
2340 loop = LABEL();
2341 notfound = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, STR_END, 0);
2342
2343 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0);
2344 oc = req_char;
2345 if (caseless)
2346 {
2347 oc = TABLE_GET(req_char, common->fcc, req_char);
2348 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
2349 if (req_char > 127 && common->utf)
2350 oc = UCD_OTHERCASE(req_char);
2351 #endif
2352 }
2353 if (req_char == oc)
2354 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
2355 else
2356 {
2357 bit = req_char ^ oc;
2358 if (ispowerof2(bit))
2359 {
2360 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);
2361 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit);
2362 }
2363 else
2364 {
2365 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
2366 foundoc = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, oc);
2367 }
2368 }
2369 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
2370 JUMPTO(SLJIT_JUMP, loop);
2371
2372 JUMPHERE(found);
2373 if (foundoc)
2374 JUMPHERE(foundoc);
2375 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr, TMP1, 0);
2376 JUMPHERE(alreadyfound);
2377 JUMPHERE(toolong);
2378 return notfound;
2379 }
2380
2381 static void do_revertframes(compiler_common *common)
2382 {
2383 DEFINE_COMPILER;
2384 struct sljit_jump *jump;
2385 struct sljit_label *mainloop;
2386
2387 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);
2388 OP1(SLJIT_MOV, TMP1, 0, STACK_TOP, 0);
2389
2390 /* Drop frames until we reach STACK_TOP. */
2391 mainloop = LABEL();
2392 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), 0);
2393 jump = CMP(SLJIT_C_SIG_LESS_EQUAL, TMP2, 0, SLJIT_IMM, frame_end);
2394 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_LOCALS_REG, 0);
2395 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_w));
2396 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_w), SLJIT_MEM1(TMP1), 2 * sizeof(sljit_w));
2397 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_w));
2398 JUMPTO(SLJIT_JUMP, mainloop);
2399
2400 JUMPHERE(jump);
2401 jump = CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, frame_end);
2402 /* End of dropping frames. */
2403 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2404
2405 JUMPHERE(jump);
2406 jump = CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, frame_setstrbegin);
2407 /* Set string begin. */
2408 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), sizeof(sljit_w));
2409 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_w));
2410 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), TMP2, 0);
2411 JUMPTO(SLJIT_JUMP, mainloop);
2412
2413 JUMPHERE(jump);
2414 if (common->mark_ptr != 0)
2415 {
2416 jump = CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, frame_setmark);
2417 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), sizeof(sljit_w));
2418 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_w));
2419 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr, TMP2, 0);
2420 JUMPTO(SLJIT_JUMP, mainloop);
2421
2422 JUMPHERE(jump);
2423 }
2424
2425 /* Unknown command. */
2426 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_w));
2427 JUMPTO(SLJIT_JUMP, mainloop);
2428 }
2429
2430 static void check_wordboundary(compiler_common *common)
2431 {
2432 DEFINE_COMPILER;
2433 struct sljit_jump *skipread;
2434 #if !(defined COMPILE_PCRE8) || defined SUPPORT_UTF
2435 struct sljit_jump *jump;
2436 #endif
2437
2438 SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16);
2439
2440 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, 1, 5, 5, common->localsize);
2441 /* Get type of the previous char, and put it to LOCALS1. */
2442 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
2443 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
2444 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, 0);
2445 skipread = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
2446 skip_char_back(common);
2447 check_start_used_ptr(common);
2448 read_char(common);
2449
2450 /* Testing char type. */
2451 #ifdef SUPPORT_UCP
2452 if (common->use_ucp)
2453 {
2454 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
2455 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
2456 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
2457 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
2458 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
2459 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);
2460 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
2461 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
2462 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_LESS_EQUAL);
2463 JUMPHERE(jump);
2464 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP2, 0);
2465 }
2466 else
2467 #endif
2468 {
2469 #ifndef COMPILE_PCRE8
2470 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
2471 #elif defined SUPPORT_UTF
2472 /* Here LOCALS1 has already been zeroed. */
2473 jump = NULL;
2474 if (common->utf)
2475 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
2476 #endif /* COMPILE_PCRE8 */
2477 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);
2478 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */);
2479 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2480 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);
2481 #ifndef COMPILE_PCRE8
2482 JUMPHERE(jump);
2483 #elif defined SUPPORT_UTF
2484 if (jump != NULL)
2485 JUMPHERE(jump);
2486 #endif /* COMPILE_PCRE8 */
2487 }
2488 JUMPHERE(skipread);
2489
2490 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
2491 skipread = check_str_end(common);
2492 peek_char(common);
2493
2494 /* Testing char type. This is a code duplication. */
2495 #ifdef SUPPORT_UCP
2496 if (common->use_ucp)
2497 {
2498 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
2499 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
2500 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
2501 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
2502 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
2503 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);
2504 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
2505 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
2506 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_LESS_EQUAL);
2507 JUMPHERE(jump);
2508 }
2509 else
2510 #endif
2511 {
2512 #ifndef COMPILE_PCRE8
2513 /* TMP2 may be destroyed by peek_char. */
2514 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
2515 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
2516 #elif defined SUPPORT_UTF
2517 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
2518 jump = NULL;
2519 if (common->utf)
2520 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
2521 #endif
2522 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes);
2523 OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */);
2524 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
2525 #ifndef COMPILE_PCRE8
2526 JUMPHERE(jump);
2527 #elif defined SUPPORT_UTF
2528 if (jump != NULL)
2529 JUMPHERE(jump);
2530 #endif /* COMPILE_PCRE8 */
2531 }
2532 JUMPHERE(skipread);
2533
2534 OP2(SLJIT_XOR | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
2535 sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
2536 }
2537
2538 static void check_anynewline(compiler_common *common)
2539 {
2540 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
2541 DEFINE_COMPILER;
2542
2543 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);
2544
2545 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
2546 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
2547 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);
2548 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
2549 #if defined SUPPORT_UTF || defined COMPILE_PCRE16
2550 #ifdef COMPILE_PCRE8
2551 if (common->utf)
2552 {
2553 #endif
2554 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
2555 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
2556 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
2557 #ifdef COMPILE_PCRE8
2558 }
2559 #endif
2560 #endif /* SUPPORT_UTF || COMPILE_PCRE16 */
2561 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
2562 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2563 }
2564
2565 static void check_hspace(compiler_common *common)
2566 {
2567 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
2568 DEFINE_COMPILER;
2569
2570 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);
2571
2572 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x09);
2573 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
2574 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
2575 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
2576 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xa0);
2577 #if defined SUPPORT_UTF || defined COMPILE_PCRE16
2578 #ifdef COMPILE_PCRE8
2579 if (common->utf)
2580 {
2581 #endif
2582 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
2583 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x1680);
2584 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
2585 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e);
2586 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
2587 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000);
2588 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000);
2589 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_LESS_EQUAL);
2590 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000);
2591 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
2592 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000);
2593 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
2594 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000);
2595 #ifdef COMPILE_PCRE8
2596 }
2597 #endif
2598 #endif /* SUPPORT_UTF || COMPILE_PCRE16 */
2599 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
2600
2601 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2602 }
2603
2604 static void check_vspace(compiler_common *common)
2605 {
2606 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
2607 DEFINE_COMPILER;
2608
2609 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);
2610
2611 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
2612 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
2613 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);
2614 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
2615 #if defined SUPPORT_UTF || defined COMPILE_PCRE16
2616 #ifdef COMPILE_PCRE8
2617 if (common->utf)
2618 {
2619 #endif
2620 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
2621 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
2622 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
2623 #ifdef COMPILE_PCRE8
2624 }
2625 #endif
2626 #endif /* SUPPORT_UTF || COMPILE_PCRE16 */
2627 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
2628
2629 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2630 }
2631
2632 #define CHAR1 STR_END
2633 #define CHAR2 STACK_TOP
2634
2635 static void do_casefulcmp(compiler_common *common)
2636 {
2637 DEFINE_COMPILER;
2638 struct sljit_jump *jump;
2639 struct sljit_label *label;
2640
2641 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);
2642 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2643 OP1(SLJIT_MOV, TMP3, 0, CHAR1, 0);
2644 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR2, 0);
2645 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
2646 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2647
2648 label = LABEL();
2649 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
2650 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2651 jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
2652 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
2653 JUMPTO(SLJIT_C_NOT_ZERO, label);
2654
2655 JUMPHERE(jump);
2656 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2657 OP1(SLJIT_MOV, CHAR1, 0, TMP3, 0);
2658 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
2659 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2660 }
2661
2662 #define LCC_TABLE STACK_LIMIT
2663
2664 static void do_caselesscmp(compiler_common *common)
2665 {
2666 DEFINE_COMPILER;
2667 struct sljit_jump *jump;
2668 struct sljit_label *label;
2669
2670 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);
2671 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2672
2673 OP1(SLJIT_MOV, TMP3, 0, LCC_TABLE, 0);
2674 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR1, 0);
2675 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, CHAR2, 0);
2676 OP1(SLJIT_MOV, LCC_TABLE, 0, SLJIT_IMM, common->lcc);
2677 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
2678 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2679
2680 label = LABEL();
2681 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
2682 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2683 #ifndef COMPILE_PCRE8
2684 jump = CMP(SLJIT_C_GREATER, CHAR1, 0, SLJIT_IMM, 255);
2685 #endif
2686 OP1(SLJIT_MOV_UB, CHAR1, 0, SLJIT_MEM2(LCC_TABLE, CHAR1), 0);
2687 #ifndef COMPILE_PCRE8
2688 JUMPHERE(jump);
2689 jump = CMP(SLJIT_C_GREATER, CHAR2, 0, SLJIT_IMM, 255);
2690 #endif
2691 OP1(SLJIT_MOV_UB, CHAR2, 0, SLJIT_MEM2(LCC_TABLE, CHAR2), 0);
2692 #ifndef COMPILE_PCRE8
2693 JUMPHERE(jump);
2694 #endif
2695 jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
2696 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
2697 JUMPTO(SLJIT_C_NOT_ZERO, label);
2698
2699 JUMPHERE(jump);
2700 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2701 OP1(SLJIT_MOV, LCC_TABLE, 0, TMP3, 0);
2702 OP1(SLJIT_MOV, CHAR1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
2703 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
2704 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2705 }
2706
2707 #undef LCC_TABLE
2708 #undef CHAR1
2709 #undef CHAR2
2710
2711 #if defined SUPPORT_UTF && defined SUPPORT_UCP
2712
2713 static const pcre_uchar *SLJIT_CALL do_utf_caselesscmp(pcre_uchar *src1, jit_arguments *args, pcre_uchar *end1)
2714 {
2715 /* This function would be ineffective to do in JIT level. */
2716 int c1, c2;
2717 const pcre_uchar *src2 = args->uchar_ptr;
2718 const pcre_uchar *end2 = args->end;
2719
2720 while (src1 < end1)
2721 {
2722 if (src2 >= end2)
2723 return (pcre_uchar*)1;
2724 GETCHARINC(c1, src1);
2725 GETCHARINC(c2, src2);
2726 if (c1 != c2 && c1 != UCD_OTHERCASE(c2)) return NULL;
2727 }
2728 return src2;
2729 }
2730
2731 #endif /* SUPPORT_UTF && SUPPORT_UCP */
2732
2733 static pcre_uchar *byte_sequence_compare(compiler_common *common, BOOL caseless, pcre_uchar *cc,
2734 compare_context* context, jump_list **fallbacks)
2735 {
2736 DEFINE_COMPILER;
2737 unsigned int othercasebit = 0;
2738 pcre_uchar *othercasechar = NULL;
2739 #ifdef SUPPORT_UTF
2740 int utflength;
2741 #endif
2742
2743 if (caseless && char_has_othercase(common, cc))
2744 {
2745 othercasebit = char_get_othercase_bit(common, cc);
2746 SLJIT_ASSERT(othercasebit);
2747 /* Extracting bit difference info. */
2748 #ifdef COMPILE_PCRE8
2749 othercasechar = cc + (othercasebit >> 8);
2750 othercasebit &= 0xff;
2751 #else
2752 #ifdef COMPILE_PCRE16
2753 othercasechar = cc + (othercasebit >> 9);
2754 if ((othercasebit & 0x100) != 0)
2755 othercasebit = (othercasebit & 0xff) << 8;
2756 else
2757 othercasebit &= 0xff;
2758 #endif
2759 #endif
2760 }
2761
2762 if (context->sourcereg == -1)
2763 {
2764 #ifdef COMPILE_PCRE8
2765 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
2766 if (context->length >= 4)
2767 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
2768 else if (context->length >= 2)
2769 OP1(SLJIT_MOV_UH, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
2770 else
2771 #endif
2772 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
2773 #else
2774 #ifdef COMPILE_PCRE16
2775 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
2776 if (context->length >= 4)
2777 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
2778 else
2779 #endif
2780 OP1(SLJIT_MOV_UH, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
2781 #endif
2782 #endif /* COMPILE_PCRE8 */
2783 context->sourcereg = TMP2;
2784 }
2785
2786 #ifdef SUPPORT_UTF
2787 utflength = 1;
2788 if (common->utf && HAS_EXTRALEN(*cc))
2789 utflength += GET_EXTRALEN(*cc);
2790
2791 do
2792 {
2793 #endif
2794
2795 context->length -= IN_UCHARS(1);
2796 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
2797
2798 /* Unaligned read is supported. */
2799 if (othercasebit != 0 && othercasechar == cc)
2800 {
2801 context->c.asuchars[context->ucharptr] = *cc | othercasebit;
2802 context->oc.asuchars[context->ucharptr] = othercasebit;
2803 }
2804 else
2805 {
2806 context->c.asuchars[context->ucharptr] = *cc;
2807 context->oc.asuchars[context->ucharptr] = 0;
2808 }
2809 context->ucharptr++;
2810
2811 #ifdef COMPILE_PCRE8
2812 if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1))
2813 #else
2814 if (context->ucharptr >= 2 || context->length == 0)
2815 #endif
2816 {
2817 if (context->length >= 4)
2818 OP1(SLJIT_MOV_SI, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
2819 #ifdef COMPILE_PCRE8
2820 else if (context->length >= 2)
2821 OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
2822 else if (context->length >= 1)
2823 OP1(SLJIT_MOV_UB, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
2824 #else
2825 else if (context->length >= 2)
2826 OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
2827 #endif
2828 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
2829
2830 switch(context->ucharptr)
2831 {
2832 case 4 / sizeof(pcre_uchar):
2833 if (context->oc.asint != 0)
2834 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint);
2835 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint));
2836 break;
2837
2838 case 2 / sizeof(pcre_uchar):
2839 if (context->oc.asushort != 0)
2840 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort);
2841 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort));
2842 break;
2843
2844 #ifdef COMPILE_PCRE8
2845 case 1:
2846 if (context->oc.asbyte != 0)
2847 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte);
2848 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte));
2849 break;
2850 #endif
2851
2852 default:
2853 SLJIT_ASSERT_STOP();
2854 break;
2855 }
2856 context->ucharptr = 0;
2857 }
2858
2859 #else
2860
2861 /* Unaligned read is unsupported. */
2862 #ifdef COMPILE_PCRE8
2863 if (context->length > 0)
2864 OP1(SLJIT_MOV_UB, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
2865 #else
2866 if (context->length > 0)
2867 OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
2868 #endif
2869 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
2870
2871 if (othercasebit != 0 && othercasechar == cc)
2872 {
2873 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit);
2874 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit));
2875 }
2876 else
2877 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc));
2878
2879 #endif
2880
2881 cc++;
2882 #ifdef SUPPORT_UTF
2883 utflength--;
2884 }
2885 while (utflength > 0);
2886 #endif
2887
2888 return cc;
2889 }
2890
2891 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
2892
2893 #define SET_TYPE_OFFSET(value) \
2894 if ((value) != typeoffset) \
2895 { \
2896 if ((value) > typeoffset) \
2897 OP2(SLJIT_SUB, typereg, 0, typereg, 0, SLJIT_IMM, (value) - typeoffset); \
2898 else \
2899 OP2(SLJIT_ADD, typereg, 0, typereg, 0, SLJIT_IMM, typeoffset - (value)); \
2900 } \
2901 typeoffset = (value);
2902
2903 #define SET_CHAR_OFFSET(value) \
2904 if ((value) != charoffset) \
2905 { \
2906 if ((value) > charoffset) \
2907 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (value) - charoffset); \
2908 else \
2909 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, charoffset - (value)); \
2910 } \
2911 charoffset = (value);
2912
2913 static void compile_xclass_hotpath(compiler_common *common, pcre_uchar *cc, jump_list **fallbacks)
2914 {
2915 DEFINE_COMPILER;
2916 jump_list *found = NULL;
2917 jump_list **list = (*cc & XCL_NOT) == 0 ? &found : fallbacks;
2918 unsigned int c;
2919 int compares;
2920 struct sljit_jump *jump = NULL;
2921 pcre_uchar *ccbegin;
2922 #ifdef SUPPORT_UCP
2923 BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE;
2924 BOOL charsaved = FALSE;
2925 int typereg = TMP1, scriptreg = TMP1;
2926 unsigned int typeoffset;
2927 #endif
2928 int invertcmp, numberofcmps;
2929 unsigned int charoffset;
2930
2931 /* Although SUPPORT_UTF must be defined, we are not necessary in utf mode. */
2932 fallback_at_str_end(common, fallbacks);
2933 read_char(common);
2934
2935 if ((*cc++ & XCL_MAP) != 0)
2936 {
2937 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
2938 #ifndef COMPILE_PCRE8
2939 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
2940 #elif defined SUPPORT_UTF
2941 if (common->utf)
2942 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
2943 #endif
2944
2945 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
2946 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
2947 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)cc);
2948 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
2949 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
2950 add_jump(compiler, list, JUMP(SLJIT_C_NOT_ZERO));
2951
2952 #ifndef COMPILE_PCRE8
2953 JUMPHERE(jump);
2954 #elif defined SUPPORT_UTF
2955 if (common->utf)
2956 JUMPHERE(jump);
2957 #endif
2958 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
2959 #ifdef SUPPORT_UCP
2960 charsaved = TRUE;
2961 #endif
2962 cc += 32 / sizeof(pcre_uchar);
2963 }
2964
2965 /* Scanning the necessary info. */
2966 ccbegin = cc;
2967 compares = 0;
2968 while (*cc != XCL_END)
2969 {
2970 compares++;
2971 if (*cc == XCL_SINGLE)
2972 {
2973 cc += 2;
2974 #ifdef SUPPORT_UTF
2975 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2976 #endif
2977 #ifdef SUPPORT_UCP
2978 needschar = TRUE;
2979 #endif
2980 }
2981 else if (*cc == XCL_RANGE)
2982 {
2983 cc += 2;
2984 #ifdef SUPPORT_UTF
2985 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2986 #endif
2987 cc++;
2988 #ifdef SUPPORT_UTF
2989 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2990 #endif
2991 #ifdef SUPPORT_UCP
2992 needschar = TRUE;
2993 #endif
2994 }
2995 #ifdef SUPPORT_UCP
2996 else
2997 {
2998 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
2999 cc++;
3000 switch(*cc)
3001 {
3002 case PT_ANY:
3003 break;
3004
3005 case PT_LAMP:
3006 case PT_GC:
3007 case PT_PC:
3008 case PT_ALNUM:
3009 needstype = TRUE;
3010 break;
3011
3012 case PT_SC:
3013 needsscript = TRUE;
3014 break;
3015
3016 case PT_SPACE:
3017 case PT_PXSPACE:
3018 case PT_WORD:
3019 needstype = TRUE;
3020 needschar = TRUE;
3021 break;
3022
3023 default:
3024 SLJIT_ASSERT_STOP();
3025 break;
3026 }
3027 cc += 2;
3028 }
3029 #endif
3030 }
3031
3032 #ifdef SUPPORT_UCP
3033 /* Simple register allocation. TMP1 is preferred if possible. */
3034 if (needstype || needsscript)
3035 {
3036 if (needschar && !charsaved)
3037 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
3038 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
3039 if (needschar)
3040 {
3041 if (needstype)
3042 {
3043 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
3044 typereg = RETURN_ADDR;
3045 }
3046
3047 if (needsscript)
3048 scriptreg = TMP3;
3049 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
3050 }
3051 else if (needstype && needsscript)
3052 scriptreg = TMP3;
3053 /* In all other cases only one of them was specified, and that can goes to TMP1. */
3054
3055 if (needsscript)
3056 {
3057 if (scriptreg == TMP1)
3058 {
3059 OP1(SLJIT_MOV, scriptreg, 0, SLJIT_IMM, (sljit_w)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
3060 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM2(scriptreg, TMP2), 3);
3061 }
3062 else
3063 {
3064 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
3065 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_w)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
3066 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM1(TMP2), 0);
3067 }
3068 }
3069 }
3070 #endif
3071
3072 /* Generating code. */
3073 cc = ccbegin;
3074 charoffset = 0;
3075 numberofcmps = 0;
3076 #ifdef SUPPORT_UCP
3077 typeoffset = 0;
3078 #endif
3079
3080 while (*cc != XCL_END)
3081 {
3082 compares--;
3083 invertcmp = (compares == 0 && list != fallbacks);
3084 jump = NULL;
3085
3086 if (*cc == XCL_SINGLE)
3087 {
3088 cc ++;
3089 #ifdef SUPPORT_UTF
3090 if (common->utf)
3091 {
3092 GETCHARINC(c, cc);
3093 }
3094 else
3095 #endif
3096 c = *cc++;
3097
3098 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
3099 {
3100 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
3101 COND_VALUE(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
3102 numberofcmps++;
3103 }
3104 else if (numberofcmps > 0)
3105 {
3106 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
3107 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
3108 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
3109 numberofcmps = 0;
3110 }
3111 else
3112 {
3113 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, c - charoffset);
3114 numberofcmps = 0;
3115 }
3116 }
3117 else if (*cc == XCL_RANGE)
3118 {
3119 cc ++;
3120 #ifdef SUPPORT_UTF
3121 if (common->utf)
3122 {
3123 GETCHARINC(c, cc);
3124 }
3125 else
3126 #endif
3127 c = *cc++;
3128 SET_CHAR_OFFSET(c);
3129 #ifdef SUPPORT_UTF
3130 if (common->utf)
3131 {
3132 GETCHARINC(c, cc);
3133 }
3134 else
3135 #endif
3136 c = *cc++;
3137 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
3138 {
3139 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
3140 COND_VALUE(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_C_LESS_EQUAL);
3141 numberofcmps++;
3142 }
3143 else if (numberofcmps > 0)
3144 {
3145 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
3146 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_LESS_EQUAL);
3147 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
3148 numberofcmps = 0;
3149 }
3150 else
3151 {
3152 jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, c - charoffset);
3153 numberofcmps = 0;
3154 }
3155 }
3156 #ifdef SUPPORT_UCP
3157 else
3158 {
3159 if (*cc == XCL_NOTPROP)
3160 invertcmp ^= 0x1;
3161 cc++;
3162 switch(*cc)
3163 {
3164 case PT_ANY:
3165 if (list != fallbacks)
3166 {
3167 if ((cc[-1] == XCL_NOTPROP && compares > 0) || (cc[-1] == XCL_PROP && compares == 0))
3168 continue;
3169 }
3170 else if (cc[-1] == XCL_NOTPROP)
3171 continue;
3172 jump = JUMP(SLJIT_JUMP);
3173 break;
3174
3175 case PT_LAMP:
3176 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - typeoffset);
3177 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
3178 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ll - typeoffset);
3179 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
3180 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lt - typeoffset);
3181 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
3182 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
3183 break;
3184
3185 case PT_GC:
3186 c = PRIV(ucp_typerange)[(int)cc[1] * 2];
3187 SET_TYPE_OFFSET(c);
3188 jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, PRIV(ucp_typerange)[(int)cc[1] * 2 + 1] - c);
3189 break;
3190
3191 case PT_PC:
3192 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, (int)cc[1] - typeoffset);
3193 break;
3194
3195 case PT_SC:
3196 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, scriptreg, 0, SLJIT_IMM, (int)cc[1]);
3197 break;
3198
3199 case PT_SPACE:
3200 case PT_PXSPACE:
3201 if (*cc == PT_SPACE)
3202 {
3203 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3204 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 11 - charoffset);
3205 }
3206 SET_CHAR_OFFSET(9);
3207 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 13 - 9);
3208 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);
3209 if (*cc == PT_SPACE)
3210 JUMPHERE(jump);
3211
3212 SET_TYPE_OFFSET(ucp_Zl);
3213 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl);
3214 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_LESS_EQUAL);
3215 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
3216 break;
3217
3218 case PT_WORD:
3219 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE - charoffset);
3220 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
3221 /* ... fall through */
3222
3223 case PT_ALNUM:
3224 SET_TYPE_OFFSET(ucp_Ll);
3225 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
3226 COND_VALUE((*cc == PT_ALNUM) ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_C_LESS_EQUAL);
3227 SET_TYPE_OFFSET(ucp_Nd);
3228 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_No - ucp_Nd);
3229 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_LESS_EQUAL);
3230 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
3231 break;
3232 }
3233 cc += 2;
3234 }
3235 #endif
3236
3237 if (jump != NULL)
3238 add_jump(compiler, compares > 0 ? list : fallbacks, jump);
3239 }
3240
3241 if (found != NULL)
3242 set_jumps(found, LABEL());
3243 }
3244
3245 #undef SET_TYPE_OFFSET
3246 #undef SET_CHAR_OFFSET
3247
3248 #endif
3249
3250 static pcre_uchar *compile_char1_hotpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **fallbacks)
3251 {
3252 DEFINE_COMPILER;
3253 int length;
3254 unsigned int c, oc, bit;
3255 compare_context context;
3256 struct sljit_jump *jump[4];
3257 #ifdef SUPPORT_UTF
3258 struct sljit_label *label;
3259 #ifdef SUPPORT_UCP
3260 pcre_uchar propdata[5];
3261 #endif
3262 #endif
3263
3264 switch(type)
3265 {
3266 case OP_SOD:
3267 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3268 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
3269 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
3270 return cc;
3271
3272 case OP_SOM:
3273 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3274 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
3275 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
3276 return cc;
3277
3278 case OP_NOT_WORD_BOUNDARY:
3279 case OP_WORD_BOUNDARY:
3280 add_jump(compiler, &common->wordboundary, JUMP(SLJIT_FAST_CALL));
3281 add_jump(compiler, fallbacks, JUMP(type == OP_NOT_WORD_BOUNDARY ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
3282 return cc;
3283
3284 case OP_NOT_DIGIT:
3285 case OP_DIGIT:
3286 fallback_at_str_end(common, fallbacks);
3287 read_char8_type(common);
3288 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit);
3289 add_jump(compiler, fallbacks, JUMP(type == OP_DIGIT ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
3290 return cc;
3291
3292 case OP_NOT_WHITESPACE:
3293 case OP_WHITESPACE:
3294 fallback_at_str_end(common, fallbacks);
3295 read_char8_type(common);
3296 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space);
3297 add_jump(compiler, fallbacks, JUMP(type == OP_WHITESPACE ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
3298 return cc;
3299
3300 case OP_NOT_WORDCHAR:
3301 case OP_WORDCHAR:
3302 fallback_at_str_end(common, fallbacks);
3303 read_char8_type(common);
3304 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word);
3305 add_jump(compiler, fallbacks, JUMP(type == OP_WORDCHAR ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
3306 return cc;
3307
3308 case OP_ANY:
3309 fallback_at_str_end(common, fallbacks);
3310 read_char(common);
3311 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3312 {
3313 jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
3314 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
3315 jump[1] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3316 else
3317 jump[1] = check_str_end(common);
3318
3319 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3320 add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff));
3321 if (jump[1] != NULL)
3322 JUMPHERE(jump[1]);
3323 JUMPHERE(jump[0]);
3324 }
3325 else
3326 check_newlinechar(common, common->nltype, fallbacks, TRUE);
3327 return cc;
3328
3329 case OP_ALLANY:
3330 fallback_at_str_end(common, fallbacks);
3331 #ifdef SUPPORT_UTF
3332 if (common->utf)
3333 {
3334 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3335 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3336 #ifdef COMPILE_PCRE8
3337 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
3338 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0);
3339 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3340 #else /* COMPILE_PCRE8 */
3341 #ifdef COMPILE_PCRE16
3342 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
3343 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
3344 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3345 COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
3346 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3347 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3348 #endif /* COMPILE_PCRE16 */
3349 #endif /* COMPILE_PCRE8 */
3350 JUMPHERE(jump[0]);
3351 return cc;
3352 }
3353 #endif
3354 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3355 return cc;
3356
3357 case OP_ANYBYTE:
3358 fallback_at_str_end(common, fallbacks);
3359 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3360 return cc;
3361
3362 #ifdef SUPPORT_UTF
3363 #ifdef SUPPORT_UCP
3364 case OP_NOTPROP:
3365 case OP_PROP:
3366 propdata[0] = 0;
3367 propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP;
3368 propdata[2] = cc[0];
3369 propdata[3] = cc[1];
3370 propdata[4] = XCL_END;
3371 compile_xclass_hotpath(common, propdata, fallbacks);
3372 return cc + 2;
3373 #endif
3374 #endif
3375
3376 case OP_ANYNL:
3377 fallback_at_str_end(common, fallbacks);
3378 read_char(common);
3379 jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
3380 /* We don't need to handle soft partial matching case. */
3381 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
3382 jump[1] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3383 else
3384 jump[1] = check_str_end(common);
3385 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3386 jump[2] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
3387 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3388 jump[3] = JUMP(SLJIT_JUMP);
3389 JUMPHERE(jump[0]);
3390 check_newlinechar(common, common->bsr_nltype, fallbacks, FALSE);
3391 JUMPHERE(jump[1]);
3392 JUMPHERE(jump[2]);
3393 JUMPHERE(jump[3]);
3394 return cc;
3395
3396 case OP_NOT_HSPACE:
3397 case OP_HSPACE:
3398 fallback_at_str_end(common, fallbacks);
3399 read_char(common);
3400 add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));
3401 add_jump(compiler, fallbacks, JUMP(type == OP_NOT_HSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
3402 return cc;
3403
3404 case OP_NOT_VSPACE:
3405 case OP_VSPACE:
3406 fallback_at_str_end(common, fallbacks);
3407 read_char(common);
3408 add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));
3409 add_jump(compiler, fallbacks, JUMP(type == OP_NOT_VSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
3410 return cc;
3411
3412 #ifdef SUPPORT_UCP
3413 case OP_EXTUNI:
3414 fallback_at_str_end(common, fallbacks);
3415 read_char(common);
3416 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
3417 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Mc);
3418 add_jump(compiler, fallbacks, CMP(SLJIT_C_LESS_EQUAL, TMP1, 0, SLJIT_IMM, ucp_Mn - ucp_Mc));
3419
3420 label = LABEL();
3421 jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3422 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
3423 read_char(common);
3424 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
3425 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Mc);
3426 CMPTO(SLJIT_C_LESS_EQUAL, TMP1, 0, SLJIT_IMM, ucp_Mn - ucp_Mc, label);
3427
3428 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
3429 JUMPHERE(jump[0]);
3430 if (common->mode == JIT_PARTIAL_HARD_COMPILE)
3431 {
3432 jump[0] = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
3433 /* Since we successfully read a char above, partial matching must occure. */
3434 check_partial(common, TRUE);
3435 JUMPHERE(jump[0]);
3436 }
3437 return cc;
3438 #endif
3439
3440 case OP_EODN:
3441 /* Requires rather complex checks. */
3442 jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3443 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3444 {
3445 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
3446 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3447 if (common->mode == JIT_COMPILE)
3448 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0));
3449 else
3450 {
3451 jump[1] = CMP(SLJIT_C_EQUAL, TMP2, 0, STR_END, 0);
3452 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
3453 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS);
3454 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
3455 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_NOT_EQUAL);
3456 add_jump(compiler, fallbacks, JUMP(SLJIT_C_NOT_EQUAL));
3457 check_partial(common, TRUE);
3458 add_jump(compiler, fallbacks, JUMP(SLJIT_JUMP));
3459 JUMPHERE(jump[1]);
3460 }
3461 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3462 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
3463 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
3464 }
3465 else if (common->nltype == NLTYPE_FIXED)
3466 {
3467 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3468 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3469 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0));
3470 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
3471 }
3472 else
3473 {
3474 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3475 jump[1] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
3476 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
3477 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
3478 jump[2] = JUMP(SLJIT_C_GREATER);
3479 add_jump(compiler, fallbacks, JUMP(SLJIT_C_LESS));
3480 /* Equal. */
3481 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3482 jump[3] = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
3483 add_jump(compiler, fallbacks, JUMP(SLJIT_JUMP));
3484
3485 JUMPHERE(jump[1]);
3486 if (common->nltype == NLTYPE_ANYCRLF)
3487 {
3488 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3489 add_jump(compiler, fallbacks, CMP(SLJIT_C_LESS, TMP2, 0, STR_END, 0));
3490 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
3491 }
3492 else
3493 {
3494 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, STR_PTR, 0);
3495 read_char(common);
3496 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
3497 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
3498 add_jump(compiler, fallbacks, JUMP(SLJIT_C_ZERO));
3499 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
3500 }
3501 JUMPHERE(jump[2]);
3502 JUMPHERE(jump[3]);
3503 }
3504 JUMPHERE(jump[0]);
3505 check_partial(common, FALSE);
3506 return cc;
3507
3508 case OP_EOD:
3509 add_jump(compiler, fallbacks, CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0));
3510 check_partial(common, FALSE);
3511 return cc;
3512
3513 case OP_CIRC:
3514 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
3515 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
3516 add_jump(compiler, fallbacks, CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0));
3517 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
3518 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
3519 return cc;
3520
3521 case OP_CIRCM:
3522 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
3523 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
3524 jump[1] = CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0);
3525 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
3526 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
3527 jump[0] = JUMP(SLJIT_JUMP);
3528 JUMPHERE(jump[1]);
3529
3530 add_jump(compiler, fallbacks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
3531 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3532 {
3533 OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
3534 add_jump(compiler, fallbacks, CMP(SLJIT_C_LESS, TMP2, 0, TMP1, 0));
3535 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
3536 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
3537 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
3538 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
3539 }
3540 else
3541 {
3542 skip_char_back(common);
3543 read_char(common);
3544 check_newlinechar(common, common->nltype, fallbacks, FALSE);
3545 }
3546 JUMPHERE(jump[0]);
3547 return cc;
3548
3549 case OP_DOLL:
3550 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
3551 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
3552 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
3553
3554 if (!common->endonly)
3555 compile_char1_hotpath(common, OP_EODN, cc, fallbacks);
3556 else
3557 {
3558 add_jump(compiler, fallbacks, CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0));
3559 check_partial(common, FALSE);
3560 }
3561 return cc;
3562
3563 case OP_DOLLM:
3564 jump[1] = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
3565 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
3566 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
3567 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
3568 check_partial(common, FALSE);
3569 jump[0] = JUMP(SLJIT_JUMP);
3570 JUMPHERE(jump[1]);
3571
3572 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3573 {
3574 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
3575 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3576 if (common->mode == JIT_COMPILE)
3577 add_jump(compiler, fallbacks, CMP(SLJIT_C_GREATER, TMP2, 0, STR_END, 0));
3578 else
3579 {
3580 jump[1] = CMP(SLJIT_C_LESS_EQUAL, TMP2, 0, STR_END, 0);
3581 /* STR_PTR = STR_END - IN_UCHARS(1) */
3582 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
3583 check_partial(common, TRUE);
3584 add_jump(compiler, fallbacks, JUMP(SLJIT_JUMP));
3585 JUMPHERE(jump[1]);
3586 }
3587
3588 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3589 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
3590 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
3591 }
3592 else
3593 {
3594 peek_char(common);
3595 check_newlinechar(common, common->nltype, fallbacks, FALSE);
3596 }
3597 JUMPHERE(jump[0]);
3598 return cc;
3599
3600 case OP_CHAR:
3601 case OP_CHARI:
3602 length = 1;
3603 #ifdef SUPPORT_UTF
3604 if (common->utf && HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc);
3605 #endif
3606 if (common->mode == JIT_COMPILE && (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0))
3607 {
3608 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
3609 add_jump(compiler, fallbacks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
3610
3611 context.length = IN_UCHARS(length);
3612 context.sourcereg = -1;
3613 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
3614 context.ucharptr = 0;
3615 #endif
3616 return byte_sequence_compare(common, type == OP_CHARI, cc, &context, fallbacks);
3617 }
3618 fallback_at_str_end(common, fallbacks);
3619 read_char(common);
3620 #ifdef SUPPORT_UTF
3621 if (common->utf)
3622 {
3623 GETCHAR(c, cc);
3624 }
3625 else
3626 #endif
3627 c = *cc;
3628 if (type == OP_CHAR || !char_has_othercase(common, cc))
3629 {
3630 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c));
3631 return cc + length;
3632 }
3633 oc = char_othercase(common, c);
3634 bit = c ^ oc;
3635 if (ispowerof2(bit))
3636 {
3637 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
3638 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
3639 return cc + length;
3640 }
3641 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c);
3642 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
3643 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, char_othercase(common, c));
3644 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
3645 add_jump(compiler, fallbacks, JUMP(SLJIT_C_ZERO));
3646 return cc + length;
3647
3648 case OP_NOT:
3649 case OP_NOTI:
3650 fallback_at_str_end(common, fallbacks);
3651 length = 1;
3652 #ifdef SUPPORT_UTF
3653 if (common->utf)
3654 {
3655 #ifdef COMPILE_PCRE8
3656 c = *cc;
3657 if (c < 128)
3658 {
3659 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3660 if (type == OP_NOT || !char_has_othercase(common, cc))
3661 add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
3662 else
3663 {
3664 /* Since UTF8 code page is fixed, we know that c is in [a-z] or [A-Z] range. */
3665 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x20);
3666 add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20));
3667 }
3668 /* Skip the variable-length character. */
3669 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3670 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
3671 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0);
3672 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3673 JUMPHERE(jump[0]);
3674 return cc + 1;
3675 }
3676 else
3677 #endif /* COMPILE_PCRE8 */
3678 {
3679 GETCHARLEN(c, cc, length);
3680 read_char(common);
3681 }
3682 }
3683 else
3684 #endif /* SUPPORT_UTF */
3685 {
3686 read_char(common);
3687 c = *cc;
3688 }
3689
3690 if (type == OP_NOT || !char_has_othercase(common, cc))
3691 add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
3692 else
3693 {
3694 oc = char_othercase(common, c);
3695 bit = c ^ oc;
3696 if (ispowerof2(bit))
3697 {
3698 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
3699 add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
3700 }
3701 else
3702 {
3703 add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
3704 add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, oc));
3705 }
3706 }
3707 return cc + length;
3708
3709 case OP_CLASS:
3710 case OP_NCLASS:
3711 fallback_at_str_end(common, fallbacks);
3712 read_char(common);
3713 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3714 jump[0] = NULL;
3715 #ifdef COMPILE_PCRE8
3716 /* This check only affects 8 bit mode. In other modes, we
3717 always need to compare the value with 255. */
3718 if (common->utf)
3719 #endif /* COMPILE_PCRE8 */
3720 {
3721 jump[0] = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3722 if (type == OP_CLASS)
3723 {
3724 add_jump(compiler, fallbacks, jump[0]);
3725 jump[0] = NULL;
3726 }
3727 }
3728 #endif /* SUPPORT_UTF || !COMPILE_PCRE8 */
3729 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
3730 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
3731 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)cc);
3732 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
3733 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
3734 add_jump(compiler, fallbacks, JUMP(SLJIT_C_ZERO));
3735 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3736 if (jump[0] != NULL)
3737 JUMPHERE(jump[0]);
3738 #endif /* SUPPORT_UTF || !COMPILE_PCRE8 */
3739 return cc + 32 / sizeof(pcre_uchar);
3740
3741 #if defined SUPPORT_UTF || defined COMPILE_PCRE16
3742 case OP_XCLASS:
3743 compile_xclass_hotpath(common, cc + LINK_SIZE, fallbacks);
3744 return cc + GET(cc, 0) - 1;
3745 #endif
3746
3747 case OP_REVERSE:
3748 length = GET(cc, 0);
3749 if (length == 0)
3750 return cc + LINK_SIZE;
3751 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3752 #ifdef SUPPORT_UTF
3753 if (common->utf)
3754 {
3755 OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
3756 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, length);
3757 label = LABEL();
3758 add_jump(compiler, fallbacks, CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP3, 0));
3759 skip_char_back(common);
3760 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
3761 JUMPTO(SLJIT_C_NOT_ZERO, label);
3762 }
3763 else
3764 #endif
3765 {
3766 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
3767 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
3768 add_jump(compiler, fallbacks, CMP(SLJIT_C_LESS, STR_PTR, 0, TMP1, 0));
3769 }
3770 check_start_used_ptr(common);
3771 return cc + LINK_SIZE;
3772 }
3773 SLJIT_ASSERT_STOP();
3774 return cc;
3775 }
3776
3777 static SLJIT_INLINE pcre_uchar *compile_charn_hotpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, jump_list **fallbacks)
3778 {
3779 /* This function consumes at least one input character. */
3780 /* To decrease the number of length checks, we try to concatenate the fixed length character sequences. */
3781 DEFINE_COMPILER;
3782 pcre_uchar *ccbegin = cc;
3783 compare_context context;
3784 int size;
3785
3786 context.length = 0;
3787 do
3788 {
3789 if (cc >= ccend)
3790 break;
3791
3792 if (*cc == OP_CHAR)
3793 {
3794 size = 1;
3795 #ifdef SUPPORT_UTF
3796 if (common->utf && HAS_EXTRALEN(cc[1]))
3797 size += GET_EXTRALEN(cc[1]);
3798 #endif
3799 }
3800 else if (*cc == OP_CHARI)
3801 {
3802 size = 1;
3803 #ifdef SUPPORT_UTF
3804 if (common->utf)
3805 {
3806 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
3807 size = 0;
3808 else if (HAS_EXTRALEN(cc[1]))
3809 size += GET_EXTRALEN(cc[1]);
3810 }
3811 else
3812 #endif
3813 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
3814 size = 0;
3815 }
3816 else
3817 size = 0;
3818
3819 cc += 1 + size;
3820 context.length += IN_UCHARS(size);
3821 }
3822 while (size > 0 && context.length <= 128);
3823
3824 cc = ccbegin;
3825 if (context.length > 0)
3826 {
3827 /* We have a fixed-length byte sequence. */
3828 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, context.length);
3829 add_jump(compiler, fallbacks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
3830
3831 context.sourcereg = -1;
3832 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
3833 context.ucharptr = 0;
3834 #endif
3835 do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, fallbacks); while (context.length > 0);
3836 return cc;
3837 }
3838
3839 /* A non-fixed length character will be checked if length == 0. */
3840 return compile_char1_hotpath(common, *cc, cc + 1, fallbacks);
3841 }
3842
3843 static struct sljit_jump *compile_ref_checks(compiler_common *common, pcre_uchar *cc, jump_list **fallbacks)
3844 {
3845 DEFINE_COMPILER;
3846 int offset = GET2(cc, 1) << 1;
3847
3848 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
3849 if (!common->jscript_compat)
3850 {
3851 if (fallbacks == NULL)
3852 {
3853 /* OVECTOR(1) contains the "string begin - 1" constant. */
3854 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
3855 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
3856 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
3857 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
3858 return JUMP(SLJIT_C_NOT_ZERO);
3859 }
3860 add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
3861 }
3862 return CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
3863 }
3864
3865 /* Forward definitions. */
3866 static void compile_hotpath(compiler_common *, pcre_uchar *, pcre_uchar *, fallback_common *);
3867 static void compile_fallbackpath(compiler_common *, struct fallback_common *);
3868
3869 #define PUSH_FALLBACK(size, ccstart, error) \
3870 do \
3871 { \
3872 fallback = sljit_alloc_memory(compiler, (size)); \
3873 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
3874 return error; \
3875 memset(fallback, 0, size); \
3876 fallback->prev = parent->top; \
3877 fallback->cc = (ccstart); \
3878 parent->top = fallback; \
3879 } \
3880 while (0)
3881
3882 #define PUSH_FALLBACK_NOVALUE(size, ccstart) \
3883 do \
3884 { \
3885 fallback = sljit_alloc_memory(compiler, (size)); \
3886 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
3887 return; \
3888 memset(fallback, 0, size); \
3889 fallback->prev = parent->top; \
3890 fallback->cc = (ccstart); \
3891 parent->top = fallback; \
3892 } \
3893 while (0)
3894
3895 #define FALLBACK_AS(type) ((type *)fallback)
3896
3897 static pcre_uchar *compile_ref_hotpath(compiler_common *common, pcre_uchar *cc, jump_list **fallbacks, BOOL withchecks, BOOL emptyfail)
3898 {
3899 DEFINE_COMPILER;
3900 int offset = GET2(cc, 1) << 1;
3901 struct sljit_jump *jump = NULL;
3902 struct sljit_jump *partial;
3903 struct sljit_jump *nopartial;
3904
3905 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
3906 /* OVECTOR(1) contains the "string begin - 1" constant. */
3907 if (withchecks && !common->jscript_compat)
3908 add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
3909
3910 #if defined SUPPORT_UTF && defined SUPPORT_UCP
3911 if (common->utf && *cc == OP_REFI)
3912 {
3913 SLJIT_ASSERT(TMP1 == SLJIT_TEMPORARY_REG1 && STACK_TOP == SLJIT_TEMPORARY_REG2 && TMP2 == SLJIT_TEMPORARY_REG3);
3914 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
3915 if (withchecks)
3916 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, TMP2, 0);
3917
3918 /* Needed to save important temporary registers. */
3919 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
3920 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG2, 0, ARGUMENTS, 0);
3921 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), SLJIT_OFFSETOF(jit_arguments, uchar_ptr), STR_PTR, 0);
3922 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_utf_caselesscmp));
3923 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3924 if (common->mode == JIT_COMPILE)
3925 add_jump(compiler, fallbacks, CMP(SLJIT_C_LESS_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1));
3926 else
3927 {
3928 add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
3929 nopartial = CMP(SLJIT_C_NOT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
3930 check_partial(common, FALSE);
3931 add_jump(compiler, fallbacks, JUMP(SLJIT_JUMP));
3932 JUMPHERE(nopartial);
3933 }
3934 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
3935 }
3936 else
3937 #endif /* SUPPORT_UTF && SUPPORT_UCP */
3938 {
3939 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP1, 0);
3940 if (withchecks)
3941 jump = JUMP(SLJIT_C_ZERO);
3942
3943 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3944 partial = CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0);
3945 if (common->mode == JIT_COMPILE)
3946 add_jump(compiler, fallbacks, partial);
3947
3948 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
3949 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
3950
3951 if (common->mode != JIT_COMPILE)
3952 {
3953 nopartial = JUMP(SLJIT_JUMP);
3954 JUMPHERE(partial);
3955 /* TMP2 -= STR_END - STR_PTR */
3956 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, STR_PTR, 0);
3957 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, STR_END, 0);
3958 partial = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0);
3959 OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
3960 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
3961 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
3962 JUMPHERE(partial);
3963 check_partial(common, FALSE);
3964 add_jump(compiler, fallbacks, JUMP(SLJIT_JUMP));
3965 JUMPHERE(nopartial);
3966 }
3967 }
3968
3969 if (jump != NULL)
3970 {
3971 if (emptyfail)
3972 add_jump(compiler, fallbacks, jump);
3973 else
3974 JUMPHERE(jump);
3975 }
3976 return cc + 1 + IMM2_SIZE;
3977 }
3978
3979 static SLJIT_INLINE pcre_uchar *compile_ref_iterator_hotpath(compiler_common *common, pcre_uchar *cc, fallback_common *parent)
3980 {
3981 DEFINE_COMPILER;
3982 fallback_common *fallback;
3983 pcre_uchar type;
3984 struct sljit_label *label;
3985 struct sljit_jump *zerolength;
3986 struct sljit_jump *jump = NULL;
3987 pcre_uchar *ccbegin = cc;
3988 int min = 0, max = 0;
3989 BOOL minimize;
3990
3991 PUSH_FALLBACK(sizeof(iterator_fallback), cc, NULL);
3992
3993 type = cc[1 + IMM2_SIZE];
3994 minimize = (type & 0x1) != 0;
3995 switch(type)
3996 {
3997 case OP_CRSTAR:
3998 case OP_CRMINSTAR:
3999 min = 0;
4000 max = 0;
4001 cc += 1 + IMM2_SIZE + 1;
4002 break;
4003 case OP_CRPLUS:
4004 case OP_CRMINPLUS:
4005 min = 1;
4006 max = 0;
4007 cc += 1 + IMM2_SIZE + 1;
4008 break;
4009 case OP_CRQUERY:
4010 case OP_CRMINQUERY:
4011 min = 0;
4012 max = 1;
4013 cc += 1 + IMM2_SIZE + 1;
4014 break;
4015 case OP_CRRANGE:
4016 case OP_CRMINRANGE:
4017 min = GET2(cc, 1 + IMM2_SIZE + 1);
4018 max = GET2(cc, 1 + IMM2_SIZE + 1 + IMM2_SIZE);
4019 cc += 1 + IMM2_SIZE + 1 + 2 * IMM2_SIZE;
4020 break;
4021 default:
4022 SLJIT_ASSERT_STOP();
4023 break;
4024 }
4025
4026 if (!minimize)
4027 {
4028 if (min == 0)
4029 {
4030 allocate_stack(common, 2);
4031 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4032 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
4033 /* Temporary release of STR_PTR. */
4034 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
4035 zerolength = compile_ref_checks(common, ccbegin, NULL);
4036 /* Restore if not zero length. */
4037 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
4038 }
4039 else
4040 {
4041 allocate_stack(common, 1);
4042 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
4043 zerolength = compile_ref_checks(common, ccbegin, &fallback->topfallbacks);
4044 }
4045
4046 if (min > 1 || max > 1)
4047 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 0);
4048
4049 label = LABEL();
4050 compile_ref_hotpath(common, ccbegin, &fallback->topfallbacks, FALSE, FALSE);
4051
4052 if (min > 1 || max > 1)
4053 {
4054 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
4055 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4056 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0);
4057 if (min > 1)
4058 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, label);
4059 if (max > 1)
4060 {
4061 jump = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max);
4062 allocate_stack(common, 1);
4063 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4064 JUMPTO(SLJIT_JUMP, label);
4065 JUMPHERE(jump);
4066 }
4067 }
4068
4069 if (max == 0)
4070 {
4071 /* Includes min > 1 case as well. */
4072 allocate_stack(common, 1);
4073 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4074 JUMPTO(SLJIT_JUMP, label);
4075 }
4076
4077 JUMPHERE(zerolength);
4078 FALLBACK_AS(iterator_fallback)->hotpath = LABEL();
4079
4080 decrease_call_count(common);
4081 return cc;
4082 }
4083
4084 allocate_stack(common, 2);
4085 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
4086 if (type != OP_CRMINSTAR)
4087 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
4088
4089 if (min == 0)
4090 {
4091 zerolength = compile_ref_checks(common, ccbegin, NULL);
4092 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4093 jump = JUMP(SLJIT_JUMP);
4094 }
4095 else
4096 zerolength = compile_ref_checks(common, ccbegin, &fallback->topfallbacks);
4097
4098 FALLBACK_AS(iterator_fallback)->hotpath = LABEL();
4099 if (max > 0)
4100 add_jump(compiler, &fallback->topfallbacks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max));
4101
4102 compile_ref_hotpath(common, ccbegin, &fallback->topfallbacks, TRUE, TRUE);
4103 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4104
4105 if (min > 1)
4106 {
4107 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
4108 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4109 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
4110 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, FALLBACK_AS(iterator_fallback)->hotpath);
4111 }
4112 else if (max > 0)
4113 OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
4114
4115 if (jump != NULL)
4116 JUMPHERE(jump);
4117 JUMPHERE(zerolength);
4118
4119 decrease_call_count(common);
4120 return cc;
4121 }
4122
4123 static SLJIT_INLINE pcre_uchar *compile_recurse_hotpath(compiler_common *common, pcre_uchar *cc, fallback_common *parent)
4124 {
4125 DEFINE_COMPILER;
4126 fallback_common *fallback;
4127 recurse_entry *entry = common->entries;
4128 recurse_entry *prev = NULL;
4129 int start = GET(cc, 1);
4130
4131 PUSH_FALLBACK(sizeof(recurse_fallback), cc, NULL);
4132 while (entry != NULL)
4133 {
4134 if (entry->start == start)
4135 break;
4136 prev = entry;
4137 entry = entry->next;
4138 }
4139
4140 if (entry == NULL)
4141 {
4142 entry = sljit_alloc_memory(compiler, sizeof(recurse_entry));
4143 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
4144 return NULL;
4145 entry->next = NULL;
4146 entry->entry = NULL;
4147 entry->calls = NULL;
4148 entry->start = start;
4149
4150 if (prev != NULL)
4151 prev->next = entry;
4152 else
4153 common->entries = entry;
4154 }
4155
4156 if (common->has_set_som && common->mark_ptr != 0)
4157 {
4158 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
4159 allocate_stack(common, 2);
4160 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
4161 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
4162 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
4163 }
4164 else if (common->has_set_som || common->mark_ptr != 0)
4165 {
4166 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->has_set_som ? (int)(OVECTOR(0)) : common->mark_ptr);
4167 allocate_stack(common, 1);
4168 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
4169 }
4170
4171 if (entry->entry == NULL)
4172 add_jump(compiler, &entry->calls, JUMP(SLJIT_FAST_CALL));
4173 else
4174 JUMPTO(SLJIT_FAST_CALL, entry->entry);
4175 /* Leave if the match is failed. */
4176 add_jump(compiler, &fallback->topfallbacks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0));
4177 return cc + 1 + LINK_SIZE;
4178 }
4179
4180 static pcre_uchar *compile_assert_hotpath(compiler_common *common, pcre_uchar *cc, assert_fallback *fallback, BOOL conditional)
4181 {
4182 DEFINE_COMPILER;
4183 int framesize;
4184 int localptr;
4185 fallback_common altfallback;
4186 pcre_uchar *ccbegin;
4187 pcre_uchar opcode;
4188 pcre_uchar bra = OP_BRA;
4189 jump_list *tmp = NULL;
4190 jump_list **target = (conditional) ? &fallback->condfailed : &fallback->common.topfallbacks;
4191 jump_list **found;
4192 /* Saving previous accept variables. */
4193 struct sljit_label *save_leavelabel = common->leavelabel;
4194 struct sljit_label *save_acceptlabel = common->acceptlabel;
4195 jump_list *save_leave = common->leave;
4196 jump_list *save_accept = common->accept;
4197 struct sljit_jump *jump;
4198 struct sljit_jump *brajump = NULL;
4199
4200 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
4201 {
4202 SLJIT_ASSERT(!conditional);
4203 bra = *cc;
4204 cc++;
4205 }
4206 localptr = PRIV_DATA(cc);
4207 SLJIT_ASSERT(localptr != 0);
4208 framesize = get_framesize(common, cc, FALSE);
4209 fallback->framesize = framesize;
4210 fallback->localptr = localptr;
4211 opcode = *cc;
4212 SLJIT_ASSERT(opcode >= OP_ASSERT && opcode <= OP_ASSERTBACK_NOT);
4213 found = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) ? &tmp : target;
4214 ccbegin = cc;
4215 cc += GET(cc, 1);
4216
4217 if (bra == OP_BRAMINZERO)
4218 {
4219 /* This is a braminzero fallback path. */
4220 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
4221 free_stack(common, 1);
4222 brajump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
4223 }
4224
4225 if (framesize < 0)
4226 {
4227 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STACK_TOP, 0);
4228 allocate_stack(common, 1);
4229 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4230 }
4231 else
4232 {
4233 allocate_stack(common, framesize + 2);
4234 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4235 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(framesize + 1));
4236 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, TMP2, 0);
4237 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4238 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
4239 init_frame(common, ccbegin, framesize + 1, 2, FALSE);
4240 }
4241
4242 memset(&altfallback, 0, sizeof(fallback_common));
4243 common->leavelabel = NULL;
4244 common->leave = NULL;
4245 while (1)
4246 {
4247 common->acceptlabel = NULL;
4248 common->accept = NULL;
4249 altfallback.top = NULL;
4250 altfallback.topfallbacks = NULL;
4251
4252 if (*ccbegin == OP_ALT)
4253 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
4254
4255 altfallback.cc = ccbegin;
4256 compile_hotpath(common, ccbegin + 1 + LINK_SIZE, cc, &altfallback);
4257 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
4258 {
4259 common->leavelabel = save_leavelabel;
4260 common->acceptlabel = save_acceptlabel;
4261 common->leave = save_leave;
4262 common->accept = save_accept;
4263 return NULL;
4264 }
4265 common->acceptlabel = LABEL();
4266 if (common->accept != NULL)
4267 set_jumps(common->accept, common->acceptlabel);
4268
4269 /* Reset stack. */
4270 if (framesize < 0)
4271 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4272 else {
4273 if ((opcode != OP_ASSERT_NOT && opcode != OP_ASSERTBACK_NOT) || conditional)
4274 {
4275 /* We don't need to keep the STR_PTR, only the previous localptr. */
4276 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_w));
4277 }
4278 else
4279 {
4280 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4281 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
4282 }
4283 }
4284
4285 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
4286 {
4287 /* We know that STR_PTR was stored on the top of the stack. */
4288 if (conditional)
4289 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
4290 else if (bra == OP_BRAZERO)
4291 {
4292 if (framesize < 0)
4293 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
4294 else
4295 {
4296 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_w));
4297 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (framesize + 1) * sizeof(sljit_w));
4298 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, TMP1, 0);
4299 }
4300 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
4301 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
4302 }
4303 else if (framesize >= 0)
4304 {
4305 /* For OP_BRA and OP_BRAMINZERO. */
4306 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_w));
4307 }
4308 }
4309 add_jump(compiler, found, JUMP(SLJIT_JUMP));
4310
4311 compile_fallbackpath(common, altfallback.top);
4312 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
4313 {
4314 common->leavelabel = save_leavelabel;
4315 common->acceptlabel = save_acceptlabel;
4316 common->leave = save_leave;
4317 common->accept = save_accept;
4318 return NULL;
4319 }
4320 set_jumps(altfallback.topfallbacks, LABEL());
4321
4322 if (*cc != OP_ALT)
4323 break;
4324
4325 ccbegin = cc;
4326 cc += GET(cc, 1);
4327 }
4328 /* None of them matched. */
4329 if (common->leave != NULL)
4330 set_jumps(common->leave, LABEL());
4331
4332 if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK)
4333 {
4334 /* Assert is failed. */
4335 if (conditional || bra == OP_BRAZERO)
4336 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
4337
4338 if (framesize < 0)
4339 {
4340 /* The topmost item should be 0. */
4341 if (bra == OP_BRAZERO)
4342 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
4343 else
4344 free_stack(common, 1);
4345 }
4346 else
4347 {
4348 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
4349 /* The topmost item should be 0. */
4350 if (bra == OP_BRAZERO)
4351 {
4352 free_stack(common, framesize + 1);
4353 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
4354 }
4355 else
4356 free_stack(common, framesize + 2);
4357 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, TMP1, 0);
4358 }
4359 jump = JUMP(SLJIT_JUMP);
4360 if (bra != OP_BRAZERO)
4361 add_jump(compiler, target, jump);
4362
4363 /* Assert is successful. */
4364 set_jumps(tmp, LABEL());
4365 if (framesize < 0)
4366 {
4367 /* We know that STR_PTR was stored on the top of the stack. */
4368 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
4369 /* Keep the STR_PTR on the top of the stack. */
4370 if (bra == OP_BRAZERO)
4371 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
4372 else if (bra == OP_BRAMINZERO)
4373 {
4374 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
4375 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
4376 }
4377 }
4378 else
4379 {
4380 if (bra == OP_BRA)
4381 {
4382 /* We don't need to keep the STR_PTR, only the previous localptr. */
4383 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_w));
4384 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
4385 }
4386 else
4387 {
4388 /* We don't need to keep the STR_PTR, only the previous localptr. */
4389 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_IMM, (framesize + 2) * sizeof(sljit_w));
4390 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
4391 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), bra == OP_BRAZERO ? STR_PTR : SLJIT_IMM, 0);
4392 }
4393 }
4394
4395 if (bra == OP_BRAZERO)
4396 {
4397 fallback->hotpath = LABEL();
4398 sljit_set_label(jump, fallback->hotpath);
4399 }
4400 else if (bra == OP_BRAMINZERO)
4401 {
4402 JUMPTO(SLJIT_JUMP, fallback->hotpath);
4403 JUMPHERE(brajump);
4404 if (framesize >= 0)
4405 {
4406 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4407 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
4408 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_w));
4409 }
4410 set_jumps(fallback->common.topfallbacks, LABEL());
4411 }
4412 }
4413 else
4414 {
4415 /* AssertNot is successful. */
4416 if (framesize < 0)
4417 {
4418 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
4419 if (bra != OP_BRA)
4420 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
4421 else
4422 free_stack(common, 1);
4423 }
4424 else
4425 {
4426 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
4427 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
4428 /* The topmost item should be 0. */
4429 if (bra != OP_BRA)
4430 {
4431 free_stack(common, framesize + 1);
4432 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
4433 }
4434 else
4435 free_stack(common, framesize + 2);
4436 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, TMP1, 0);
4437 }
4438
4439 if (bra == OP_BRAZERO)
4440 fallback->hotpath = LABEL();
4441 else if (bra == OP_BRAMINZERO)
4442 {
4443 JUMPTO(SLJIT_JUMP, fallback->hotpath);
4444 JUMPHERE(brajump);
4445 }
4446
4447 if (bra != OP_BRA)
4448 {
4449 SLJIT_ASSERT(found == &fallback->common.topfallbacks);
4450 set_jumps(fallback->common.topfallbacks, LABEL());
4451 fallback->common.topfallbacks = NULL;
4452 }
4453 }
4454
4455 common->leavelabel = save_leavelabel;
4456 common->acceptlabel = save_acceptlabel;
4457 common->leave = save_leave;
4458 common->accept = save_accept;
4459 return cc + 1 + LINK_SIZE;
4460 }
4461
4462 static sljit_w SLJIT_CALL do_searchovector(sljit_w refno, sljit_w* locals, pcre_uchar *name_table)
4463 {
4464 int condition = FALSE;
4465 pcre_uchar *slotA = name_table;
4466 pcre_uchar *slotB;
4467 sljit_w name_count = locals[LOCALS0 / sizeof(sljit_w)];
4468 sljit_w name_entry_size = locals[LOCALS1 / sizeof(sljit_w)];
4469 sljit_w no_capture;
4470 int i;
4471
4472 locals += refno & 0xff;
4473 refno >>= 8;
4474 no_capture = locals[1];
4475
4476 for (i = 0; i < name_count; i++)
4477 {
4478 if (GET2(slotA, 0) == refno) break;
4479 slotA += name_entry_size;
4480 }
4481
4482 if (i < name_count)
4483 {
4484 /* Found a name for the number - there can be only one; duplicate names
4485 for different numbers are allowed, but not vice versa. First scan down
4486 for duplicates. */
4487
4488 slotB = slotA;
4489 while (slotB > name_table)
4490 {
4491 slotB -= name_entry_size;
4492 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
4493 {
4494 condition = locals[GET2(slotB, 0) << 1] != no_capture;
4495 if (condition) break;
4496 }
4497 else break;
4498 }
4499
4500 /* Scan up for duplicates */
4501 if (!condition)
4502 {
4503 slotB = slotA;
4504 for (i++; i < name_count; i++)
4505 {
4506 slotB += name_entry_size;
4507 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
4508 {
4509 condition = locals[GET2(slotB, 0) << 1] != no_capture;
4510 if (condition) break;
4511 }
4512 else break;
4513 }
4514 }
4515 }
4516 return condition;
4517 }
4518
4519 static sljit_w SLJIT_CALL do_searchgroups(sljit_w recno, sljit_w* locals, pcre_uchar *name_table)
4520 {
4521 int condition = FALSE;
4522 pcre_uchar *slotA = name_table;
4523 pcre_uchar *slotB;
4524 sljit_w name_count = locals[LOCALS0 / sizeof(sljit_w)];
4525 sljit_w name_entry_size = locals[LOCALS1 / sizeof(sljit_w)];
4526 sljit_w group_num = locals[POSSESSIVE0 / sizeof(sljit_w)];
4527 int i;
4528
4529 for (i = 0; i < name_count; i++)
4530 {
4531 if (GET2(slotA, 0) == recno) break;
4532 slotA += name_entry_size;
4533 }
4534
4535 if (i < name_count)
4536 {
4537 /* Found a name for the number - there can be only one; duplicate
4538 names for different numbers are allowed, but not vice versa. First
4539 scan down for duplicates. */
4540
4541 slotB = slotA;
4542 while (slotB > name_table)
4543 {
4544 slotB -= name_entry_size;
4545 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
4546 {
4547 condition = GET2(slotB, 0) == group_num;
4548 if (condition) break;
4549 }
4550 else break;
4551 }
4552
4553 /* Scan up for duplicates */
4554 if (!condition)
4555 {
4556 slotB = slotA;
4557 for (i++; i < name_count; i++)
4558 {
4559 slotB += name_entry_size;
4560 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
4561 {
4562 condition = GET2(slotB, 0) == group_num;
4563 if (condition) break;
4564 }
4565 else break;
4566 }
4567 }
4568 }
4569 return condition;
4570 }
4571
4572 /*
4573 Handling bracketed expressions is probably the most complex part.
4574
4575 Stack layout naming characters:
4576 S - Push the current STR_PTR
4577 0 - Push a 0 (NULL)
4578 A - Push the current STR_PTR. Needed for restoring the STR_PTR
4579 before the next alternative. Not pushed if there are no alternatives.
4580 M - Any values pushed by the current alternative. Can be empty, or anything.
4581 C - Push the previous OVECTOR(i), OVECTOR(i+1) and OVECTOR_PRIV(i) to the stack.
4582 L - Push the previous local (pointed by localptr) to the stack
4583 () - opional values stored on the stack
4584 ()* - optonal, can be stored multiple times
4585
4586 The following list shows the regular expression templates, their PCRE byte codes
4587 and stack layout supported by pcre-sljit.
4588
4589 (?:) OP_BRA | OP_KET A M
4590 () OP_CBRA | OP_KET C M
4591 (?:)+ OP_BRA | OP_KETRMAX 0 A M S ( A M S )*
4592 OP_SBRA | OP_KETRMAX 0 L M S ( L M S )*
4593 (?:)+? OP_BRA | OP_KETRMIN 0 A M S ( A M S )*
4594 OP_SBRA | OP_KETRMIN 0 L M S ( L M S )*
4595 ()+ OP_CBRA | OP_KETRMAX 0 C M S ( C M S )*
4596 OP_SCBRA | OP_KETRMAX 0 C M S ( C M S )*
4597 ()+? OP_CBRA | OP_KETRMIN 0 C M S ( C M S )*
4598 OP_SCBRA | OP_KETRMIN 0 C M S ( C M S )*
4599 (?:)? OP_BRAZERO | OP_BRA | OP_KET S ( A M 0 )
4600 (?:)?? OP_BRAMINZERO | OP_BRA | OP_KET S ( A M 0 )
4601 ()? OP_BRAZERO | OP_CBRA | OP_KET S ( C M 0 )
4602 ()?? OP_BRAMINZERO | OP_CBRA | OP_KET S ( C M 0 )
4603 (?:)* OP_BRAZERO | OP_BRA | OP_KETRMAX S 0 ( A M S )*
4604 OP_BRAZERO | OP_SBRA | OP_KETRMAX S 0 ( L M S )*
4605 (?:)*? OP_BRAMINZERO | OP_BRA | OP_KETRMIN S 0 ( A M S )*
4606 OP_BRAMINZERO | OP_SBRA | OP_KETRMIN S 0 ( L M S )*
4607 ()* OP_BRAZERO | OP_CBRA | OP_KETRMAX S 0 ( C M S )*
4608 OP_BRAZERO | OP_SCBRA | OP_KETRMAX S 0 ( C M S )*
4609 ()*? OP_BRAMINZERO | OP_CBRA | OP_KETRMIN S 0 ( C M S )*
4610 OP_BRAMINZERO | OP_SCBRA | OP_KETRMIN S 0 ( C M S )*
4611
4612
4613 Stack layout naming characters:
4614 A - Push the alternative index (starting from 0) on the stack.
4615 Not pushed if there is no alternatives.
4616 M - Any values pushed by the current alternative. Can be empty, or anything.
4617
4618 The next list shows the possible content of a bracket:
4619 (|) OP_*BRA | OP_ALT ... M A
4620 (?()|) OP_*COND | OP_ALT M A
4621 (?>|) OP_ONCE | OP_ALT ... [stack trace] M A
4622 (?>|) OP_ONCE_NC | OP_ALT ... [stack trace] M A
4623 Or nothing, if trace is unnecessary
4624 */
4625
4626 static pcre_uchar *compile_bracket_hotpath(compiler_common *common, pcre_uchar *cc, fallback_common *parent)
4627 {
4628 DEFINE_COMPILER;
4629 fallback_common *fallback;
4630 pcre_uchar opcode;
4631 int localptr = 0;
4632 int offset = 0;
4633 int stacksize;
4634 pcre_uchar *ccbegin;
4635 pcre_uchar *hotpath;
4636 pcre_uchar bra = OP_BRA;
4637 pcre_uchar ket;
4638 assert_fallback *assert;
4639 BOOL has_alternatives;
4640 struct sljit_jump *jump;
4641 struct sljit_jump *skip;
4642 struct sljit_label *rmaxlabel = NULL;
4643 struct sljit_jump *braminzerojump = NULL;
4644
4645 PUSH_FALLBACK(sizeof(bracket_fallback), cc, NULL);
4646
4647 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
4648 {
4649 bra = *cc;
4650 cc++;
4651 opcode = *cc;
4652 }
4653
4654 opcode = *cc;
4655 ccbegin = cc;
4656 hotpath = ccbegin + 1 + LINK_SIZE;
4657
4658 if ((opcode == OP_COND || opcode == OP_SCOND) && cc[1 + LINK_SIZE] == OP_DEF)
4659 {
4660 /* Drop this bracket_fallback. */
4661 parent->top = fallback->prev;
4662 return bracketend(cc);
4663 }
4664
4665 ket = *(bracketend(cc) - 1 - LINK_SIZE);
4666 SLJIT_ASSERT(ket == OP_KET || ket == OP_KETRMAX || ket == OP_KETRMIN);
4667 SLJIT_ASSERT(!((bra == OP_BRAZERO && ket == OP_KETRMIN) || (bra == OP_BRAMINZERO && ket == OP_KETRMAX)));
4668 cc += GET(cc, 1);
4669
4670 has_alternatives = *cc == OP_ALT;
4671 if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
4672 {
4673 has_alternatives = (*hotpath == OP_RREF) ? FALSE : TRUE;
4674 if (*hotpath == OP_NRREF)
4675 {
4676 stacksize = GET2(hotpath, 1);
4677 if (common->currententry == NULL || stacksize == RREF_ANY)
4678 has_alternatives = FALSE;
4679 else if (common->currententry->start == 0)
4680 has_alternatives = stacksize != 0;
4681 else
4682 has_alternatives = stacksize != GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
4683 }
4684 }
4685
4686 if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
4687 opcode = OP_SCOND;
4688 if (SLJIT_UNLIKELY(opcode == OP_ONCE_NC))
4689 opcode = OP_ONCE;
4690
4691 if (opcode == OP_CBRA || opcode == OP_SCBRA)
4692 {
4693 /* Capturing brackets has a pre-allocated space. */
4694 offset = GET2(ccbegin, 1 + LINK_SIZE);
4695 localptr = OVECTOR_PRIV(offset);
4696 offset <<= 1;
4697 FALLBACK_AS(bracket_fallback)->localptr = localptr;
4698 hotpath += IMM2_SIZE;
4699 }
4700 else if (opcode == OP_ONCE || opcode == OP_SBRA || opcode == OP_SCOND)
4701 {
4702 /* Other brackets simply allocate the next entry. */
4703 localptr = PRIV_DATA(ccbegin);
4704 SLJIT_ASSERT(localptr != 0);
4705 FALLBACK_AS(bracket_fallback)->localptr = localptr;
4706 if (opcode == OP_ONCE)
4707 FALLBACK_AS(bracket_fallback)->u.framesize = get_framesize(common, ccbegin, FALSE);
4708 }
4709
4710 /* Instructions before the first alternative. */
4711 stacksize = 0;
4712 if ((ket == OP_KETRMAX) || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
4713 stacksize++;
4714 if (bra == OP_BRAZERO)
4715 stacksize++;
4716
4717 if (stacksize > 0)
4718 allocate_stack(common, stacksize);
4719
4720 stacksize = 0;
4721 if ((ket == OP_KETRMAX) || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
4722 {
4723 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
4724 stacksize++;
4725 }
4726
4727 if (bra == OP_BRAZERO)
4728 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
4729
4730 if (bra == OP_BRAMINZERO)
4731 {
4732 /* This is a fallback path! (Since the hot-path of OP_BRAMINZERO matches to the empty string) */
4733 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
4734 if (ket != OP_KETRMIN)
4735 {
4736 free_stack(common, 1);
4737 braminzerojump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
4738 }
4739 else
4740 {
4741 if (opcode == OP_ONCE || opcode >= OP_SBRA)
4742 {
4743 jump = CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
4744 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
4745 /* Nothing stored during the first run. */
4746 skip = JUMP(SLJIT_JUMP);
4747 JUMPHERE(jump);
4748 /* Checking zero-length iteration. */
4749 if (opcode != OP_ONCE || FALLBACK_AS(bracket_fallback)->u.framesize < 0)
4750 {
4751 /* When we come from outside, localptr contains the previous STR_PTR. */
4752 braminzerojump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4753 }
4754 else
4755 {
4756 /* Except when the whole stack frame must be saved. */
4757 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4758 braminzerojump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), (FALLBACK_AS(bracket_fallback)->u.framesize + 1) * sizeof(sljit_w));
4759 }
4760 JUMPHERE(skip);
4761 }
4762 else
4763 {
4764 jump = CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
4765 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
4766 JUMPHERE(jump);
4767 }
4768 }
4769 }
4770
4771 if (ket == OP_KETRMIN)
4772 FALLBACK_AS(bracket_fallback)->recursivehotpath = LABEL();
4773
4774 if (ket == OP_KETRMAX)
4775 {
4776 rmaxlabel = LABEL();
4777 if (has_alternatives && opcode != OP_ONCE && opcode < OP_SBRA)
4778 FALLBACK_AS(bracket_fallback)->althotpath = rmaxlabel;
4779 }
4780
4781 /* Handling capturing brackets and alternatives. */
4782 if (opcode == OP_ONCE)
4783 {
4784 if (FALLBACK_AS(bracket_fallback)->u.framesize < 0)
4785 {
4786 /* Neither capturing brackets nor recursions are not found in the block. */
4787 if (ket == OP_KETRMIN)
4788 {
4789 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4790 allocate_stack(common, 2);
4791 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4792 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
4793 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
4794 }
4795 else if (ket == OP_KETRMAX || has_alternatives)
4796 {
4797 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STACK_TOP, 0);
4798 allocate_stack(common, 1);
4799 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4800 }
4801 else
4802 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STACK_TOP, 0);
4803 }
4804 else
4805 {
4806 if (ket == OP_KETRMIN || ket == OP_KETRMAX || has_alternatives)
4807 {
4808 allocate_stack(common, FALLBACK_AS(bracket_fallback)->u.framesize + 2);
4809 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4810 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(FALLBACK_AS(bracket_fallback)->u.framesize + 1));
4811 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4812 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, TMP2, 0);
4813 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
4814 init_frame(common, ccbegin, FALLBACK_AS(bracket_fallback)->u.framesize + 1, 2, FALSE);
4815 }
4816 else
4817 {
4818 allocate_stack(common, FALLBACK_AS(bracket_fallback)->u.framesize + 1);
4819 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4820 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(FALLBACK_AS(bracket_fallback)->u.framesize));
4821 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, TMP2, 0);
4822 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
4823 init_frame(common, ccbegin, FALLBACK_AS(bracket_fallback)->u.framesize, 1, FALSE);
4824 }
4825 }
4826 }
4827 else if (opcode == OP_CBRA || opcode == OP_SCBRA)
4828 {
4829 /* Saving the previous values. */
4830 allocate_stack(common, 3);
4831 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
4832 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
4833 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
4834 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
4835 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4836 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STR_PTR, 0);
4837 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
4838 }
4839 else if (opcode == OP_SBRA || opcode == OP_SCOND)
4840 {
4841 /* Saving the previous value. */
4842 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4843 allocate_stack(common, 1);
4844 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STR_PTR, 0);
4845 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
4846 }
4847 else if (has_alternatives)
4848 {
4849 /* Pushing the starting string pointer. */
4850 allocate_stack(common, 1);
4851 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4852 }
4853
4854 /* Generating code for the first alternative. */
4855 if (opcode == OP_COND || opcode == OP_SCOND)
4856 {
4857 if (*hotpath == OP_CREF)
4858 {
4859 SLJIT_ASSERT(has_alternatives);
4860 add_jump(compiler, &(FALLBACK_AS(bracket_fallback)->u.condfailed),
4861 CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(GET2(hotpath, 1) << 1), SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
4862 hotpath += 1 + IMM2_SIZE;
4863 }
4864 else if (*hotpath == OP_NCREF)
4865 {
4866 SLJIT_ASSERT(has_alternatives);
4867 stacksize = GET2(hotpath, 1);
4868 jump = CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(stacksize << 1), SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
4869
4870 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STACK_TOP, 0);
4871 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, SLJIT_IMM, common->name_count);
4872 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, common->name_entry_size);
4873 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG1, 0, SLJIT_IMM, (stacksize << 8) | (common->ovector_start / sizeof(sljit_w)));
4874 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG2, 0, SLJIT_LOCALS_REG, 0);
4875 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, common->name_table);
4876 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_searchovector));
4877 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);
4878 add_jump(compiler, &(FALLBACK_AS(bracket_fallback)->u.condfailed), CMP(SLJIT_C_EQUAL, SLJIT_TEMPORARY_REG1, 0, SLJIT_IMM, 0));
4879
4880 JUMPHERE(jump);
4881 hotpath += 1 + IMM2_SIZE;
4882 }
4883 else if (*hotpath == OP_RREF || *hotpath == OP_NRREF)
4884 {
4885 /* Never has other case. */
4886 FALLBACK_AS(bracket_fallback)->u.condfailed = NULL;
4887
4888 stacksize = GET2(hotpath, 1);
4889 if (common->currententry == NULL)
4890 stacksize = 0;
4891 else if (stacksize == RREF_ANY)
4892 stacksize = 1;
4893 else if (common->currententry->start == 0)
4894 stacksize = stacksize == 0;
4895 else
4896 stacksize = stacksize == GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
4897
4898 if (*hotpath == OP_RREF || stacksize || common->currententry == NULL)
4899 {
4900 SLJIT_ASSERT(!has_alternatives);
4901 if (stacksize != 0)
4902 hotpath += 1 + IMM2_SIZE;
4903 else
4904 {
4905 if (*cc == OP_ALT)
4906 {
4907 hotpath = cc + 1 + LINK_SIZE;
4908 cc += GET(cc, 1);
4909 }
4910 else
4911 hotpath = cc;
4912 }
4913 }
4914 else
4915 {
4916 SLJIT_ASSERT(has_alternatives);
4917
4918 stacksize = GET2(hotpath, 1);
4919 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STACK_TOP, 0);
4920 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, SLJIT_IMM, common->name_count);
4921 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, common->name_entry_size);
4922 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, GET2(common->start, common->currententry->start + 1 + LINK_SIZE));
4923 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG1, 0, SLJIT_IMM, stacksize);
4924 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG2, 0, SLJIT_LOCALS_REG, 0);
4925 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, common->name_table);
4926 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_searchgroups));
4927 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);
4928 add_jump(compiler, &(FALLBACK_AS(bracket_fallback)->u.condfailed), CMP(SLJIT_C_EQUAL, SLJIT_TEMPORARY_REG1, 0, SLJIT_IMM, 0));
4929 hotpath += 1 + IMM2_SIZE;
4930 }
4931 }
4932 else
4933 {
4934 SLJIT_ASSERT(has_alternatives && *hotpath >= OP_ASSERT && *hotpath <= OP_ASSERTBACK_NOT);
4935 /* Similar code as PUSH_FALLBACK macro. */
4936 assert = sljit_alloc_memory(compiler, sizeof(assert_fallback));
4937 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
4938 return NULL;
4939 memset(assert, 0, sizeof(assert_fallback));
4940 assert->common.cc = hotpath;
4941 FALLBACK_AS(bracket_fallback)->u.assert = assert;
4942 hotpath = compile_assert_hotpath(common, hotpath, assert, TRUE);
4943 }
4944 }
4945
4946 compile_hotpath(common, hotpath, cc, fallback);
4947 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
4948 return NULL;
4949
4950 if (opcode == OP_ONCE)
4951 {
4952 if (FALLBACK_AS(bracket_fallback)->u.framesize < 0)
4953 {
4954 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4955 /* TMP2 which is set here used by OP_KETRMAX below. */
4956 if (ket == OP_KETRMAX)
4957 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), 0);
4958 else if (ket == OP_KETRMIN)
4959 {
4960 /* Move the STR_PTR to the localptr. */
4961 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_MEM1(STACK_TOP), 0);
4962 }
4963 }
4964 else
4965 {
4966 stacksize = (ket == OP_KETRMIN || ket == OP_KETRMAX || has_alternatives) ? 2 : 1;
4967 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_IMM, (FALLBACK_AS(bracket_fallback)->u.framesize + stacksize) * sizeof(sljit_w));
4968 if (ket == OP_KETRMAX)
4969 {
4970 /* TMP2 which is set here used by OP_KETRMAX below. */
4971 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
4972 }
4973 }
4974 }
4975
4976 stacksize = 0;
4977 if (ket != OP_KET || bra != OP_BRA)
4978 stacksize++;
4979 if (has_alternatives && opcode != OP_ONCE)
4980 stacksize++;
4981
4982 if (stacksize > 0)
4983 allocate_stack(common, stacksize);
4984
4985 stacksize = 0;
4986 if (ket != OP_KET)
4987 {
4988 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
4989 stacksize++;
4990 }
4991 else if (bra != OP_BRA)
4992 {
4993 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
4994 stacksize++;
4995 }
4996
4997 if (has_alternatives)
4998 {
4999 if (opcode != OP_ONCE)
5000 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
5001 if (ket != OP_KETRMAX)
5002 FALLBACK_AS(bracket_fallback)->althotpath = LABEL();
5003 }
5004
5005 /* Must be after the hotpath label. */
5006 if (offset != 0)
5007 {
5008 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
5009 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
5010 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 0), TMP1, 0);
5011 }
5012
5013 if (ket == OP_KETRMAX)
5014 {
5015 if (opcode == OP_ONCE || opcode >= OP_SBRA)
5016 {
5017 if (has_alternatives)
5018 FALLBACK_AS(bracket_fallback)->althotpath = LABEL();
5019 /* Checking zero-length iteration. */
5020 if (opcode != OP_ONCE)
5021 CMPTO(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STR_PTR, 0, rmaxlabel);
5022 else
5023 /* TMP2 must contain the starting STR_PTR. */
5024 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0, rmaxlabel);
5025 }
5026 else
5027 JUMPTO(SLJIT_JUMP, rmaxlabel);
5028 FALLBACK_AS(bracket_fallback)->recursivehotpath = LABEL();
5029 }
5030
5031 if (bra == OP_BRAZERO)
5032 FALLBACK_AS(bracket_fallback)->zerohotpath = LABEL();
5033
5034 if (bra == OP_BRAMINZERO)
5035 {
5036 /* This is a fallback path! (From the viewpoint of OP_BRAMINZERO) */
5037 JUMPTO(SLJIT_JUMP, ((braminzero_fallback *)parent)->hotpath);
5038 if (braminzerojump != NULL)
5039 {
5040 JUMPHERE(braminzerojump);
5041 /* We need to release the end pointer to perform the
5042 fallback for the zero-length iteration. When
5043 framesize is < 0, OP_ONCE will do the release itself. */
5044 if (opcode == OP_ONCE && FALLBACK_AS(bracket_fallback)->u.framesize >= 0)
5045 {
5046 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
5047 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
5048 }
5049 else if (ket == OP_KETRMIN && opcode != OP_ONCE)
5050 free_stack(common, 1);
5051 }
5052 /* Continue to the normal fallback. */
5053 }
5054
5055 if ((ket != OP_KET && bra != OP_BRAMINZERO) || bra == OP_BRAZERO)
5056 decrease_call_count(common);
5057
5058 /* Skip the other alternatives. */
5059 while (*cc == OP_ALT)
5060 cc += GET(cc, 1);
5061 cc += 1 + LINK_SIZE;
5062 return cc;
5063 }
5064
5065 static pcre_uchar *compile_bracketpos_hotpath(compiler_common *common, pcre_uchar *cc, fallback_common *parent)
5066 {
5067 DEFINE_COMPILER;
5068 fallback_common *fallback;
5069 pcre_uchar opcode;
5070 int localptr;
5071 int cbraprivptr = 0;
5072 int framesize;
5073 int stacksize;
5074 int offset = 0;
5075 BOOL zero = FALSE;
5076 pcre_uchar *ccbegin = NULL;
5077 int stack;
5078 struct sljit_label *loop = NULL;
5079 struct jump_list *emptymatch = NULL;
5080
5081 PUSH_FALLBACK(sizeof(bracketpos_fallback), cc, NULL);
5082 if (*cc == OP_BRAPOSZERO)
5083 {
5084 zero = TRUE;
5085 cc++;
5086 }
5087
5088 opcode = *cc;
5089 localptr = PRIV_DATA(cc);
5090 SLJIT_ASSERT(localptr != 0);
5091 FALLBACK_AS(bracketpos_fallback)->localptr = localptr;
5092 switch(opcode)
5093 {
5094 case OP_BRAPOS:
5095 case OP_SBRAPOS:
5096 ccbegin = cc + 1 + LINK_SIZE;
5097 break;
5098
5099 case OP_CBRAPOS:
5100 case OP_SCBRAPOS:
5101 offset = GET2(cc, 1 + LINK_SIZE);
5102 cbraprivptr = OVECTOR_PRIV(offset);
5103 offset <<= 1;
5104 ccbegin = cc + 1 + LINK_SIZE + IMM2_SIZE;
5105 break;
5106
5107 default:
5108 SLJIT_ASSERT_STOP();
5109 break;
5110 }
5111
5112 framesize = get_framesize(common, cc, FALSE);
5113 FALLBACK_AS(bracketpos_fallback)->framesize = framesize;
5114 if (framesize < 0)
5115 {
5116 stacksize = (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS) ? 2 : 1;
5117 if (!zero)
5118 stacksize++;
5119 FALLBACK_AS(bracketpos_fallback)->stacksize = stacksize;
5120 allocate_stack(common, stacksize);
5121 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STACK_TOP, 0);
5122
5123 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
5124 {
5125 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
5126 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
5127 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
5128 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
5129 }
5130 else
5131 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5132
5133 if (!zero)
5134 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 1);
5135 }
5136 else
5137 {
5138 stacksize = framesize + 1;
5139 if (!zero)
5140 stacksize++;
5141 if (opcode == OP_BRAPOS || opcode == OP_SBRAPOS)
5142 stacksize++;
5143 FALLBACK_AS(bracketpos_fallback)->stacksize = stacksize;
5144 allocate_stack(common, stacksize);
5145
5146 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
5147 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(stacksize - 1));
5148 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, TMP2, 0);
5149 stack = 0;
5150 if (!zero)
5151 {
5152 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 1);
5153 stack++;
5154 }
5155 if (opcode == OP_BRAPOS || opcode == OP_SBRAPOS)
5156 {
5157 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), STR_PTR, 0);
5158 stack++;
5159 }
5160 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP1, 0);
5161 init_frame(common, cc, stacksize - 1, stacksize - framesize, FALSE);
5162 }
5163
5164 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
5165 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr, STR_PTR, 0);
5166
5167 loop = LABEL();
5168 while (*cc != OP_KETRPOS)
5169 {
5170 fallback->top = NULL;
5171 fallback->topfallbacks = NULL;
5172 cc += GET(cc, 1);
5173
5174 compile_hotpath(common, ccbegin, cc, fallback);
5175 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5176 return NULL;
5177
5178 if (framesize < 0)
5179 {
5180 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
5181
5182 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
5183 {
5184 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
5185 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
5186 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr, STR_PTR, 0);
5187 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
5188 }
5189 else
5190 {
5191 if (opcode == OP_SBRAPOS)
5192 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5193 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5194 }
5195
5196 if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
5197 add_jump(compiler, &emptymatch, CMP(SLJIT_C_EQUAL, TMP1, 0, STR_PTR, 0));
5198
5199 if (!zero)
5200 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
5201 }
5202 else
5203 {
5204 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
5205 {
5206 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_IMM, stacksize * sizeof(sljit_w));
5207 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
5208 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
5209 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr, STR_PTR, 0);
5210 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
5211 }
5212 else
5213 {
5214 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
5215 OP2(SLJIT_ADD, STACK_TOP, 0, TMP2, 0, SLJIT_IMM, stacksize * sizeof(sljit_w));
5216 if (opcode == OP_SBRAPOS)
5217 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_w));
5218 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_w), STR_PTR, 0);
5219 }
5220
5221 if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
5222 add_jump(compiler, &emptymatch, CMP(SLJIT_C_EQUAL, TMP1, 0, STR_PTR, 0));
5223
5224 if (!zero)
5225 {
5226 if (framesize < 0)
5227 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
5228 else
5229 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5230 }
5231 }
5232 JUMPTO(SLJIT_JUMP, loop);
5233 flush_stubs(common);
5234
5235 compile_fallbackpath(common, fallback->top);
5236 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5237 return NULL;
5238 set_jumps(fallback->topfallbacks, LABEL());
5239
5240 if (framesize < 0)
5241 {
5242 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
5243 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
5244 else
5245 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5246 }
5247 else
5248 {
5249 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
5250 {
5251 /* Last alternative. */
5252 if (*cc == OP_KETRPOS)
5253 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
5254 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
5255 }
5256 else
5257 {
5258 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
5259 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_w));
5260 }
5261 }
5262
5263 if (*cc == OP_KETRPOS)
5264 break;
5265 ccbegin = cc + 1 + LINK_SIZE;
5266 }
5267
5268 fallback->topfallbacks = NULL;
5269 if (!zero)
5270 {
5271 if (framesize < 0)
5272 add_jump(compiler, &fallback->topfallbacks, CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0));
5273 else /* TMP2 is set to [localptr] above. */
5274 add_jump(compiler, &fallback->topfallbacks, CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(TMP2), (stacksize - 1) * sizeof(sljit_w), SLJIT_IMM, 0));
5275 }
5276
5277 /* None of them matched. */
5278 set_jumps(emptymatch, LABEL());
5279 decrease_call_count(common);
5280 return cc + 1 + LINK_SIZE;
5281 }
5282
5283 static SLJIT_INLINE pcre_uchar *get_iterator_parameters(compiler_common *common, pcre_uchar *cc, pcre_uchar *opcode, pcre_uchar *type, int *arg1, int *arg2, pcre_uchar **end)
5284 {
5285 int class_len;
5286
5287 *opcode = *cc;
5288 if (*opcode >= OP_STAR && *opcode <= OP_POSUPTO)
5289 {
5290 cc++;
5291 *type = OP_CHAR;
5292 }
5293 else if (*opcode >= OP_STARI && *opcode <= OP_POSUPTOI)
5294 {
5295 cc++;
5296 *type = OP_CHARI;
5297 *opcode -= OP_STARI - OP_STAR;
5298 }
5299 else if (*opcode >= OP_NOTSTAR && *opcode <= OP_NOTPOSUPTO)
5300 {
5301 cc++;
5302 *type = OP_NOT;
5303 *opcode -= OP_NOTSTAR - OP_STAR;
5304 }
5305 else if (*opcode >= OP_NOTSTARI && *opcode <= OP_NOTPOSUPTOI)
5306 {
5307 cc++;
5308 *type = OP_NOTI;
5309 *opcode -= OP_NOTSTARI - OP_STAR;
5310 }
5311 else if (*opcode >= OP_TYPESTAR && *opcode <= OP_TYPEPOSUPTO)
5312 {
5313 cc++;
5314 *opcode -= OP_TYPESTAR - OP_STAR;
5315 *type = 0;
5316 }
5317 else
5318 {
5319 SLJIT_ASSERT(*opcode >= OP_CLASS || *opcode <= OP_XCLASS);
5320 *type = *opcode;
5321 cc++;
5322 class_len = (*type < OP_XCLASS) ? (int)(1 + (32 / sizeof(pcre_uchar))) : GET(cc, 0);
5323 *opcode = cc[class_len - 1];
5324 if (*opcode >= OP_CRSTAR && *opcode <= OP_CRMINQUERY)
5325 {
5326 *opcode -= OP_CRSTAR - OP_STAR;
5327 if (end != NULL)
5328 *end = cc + class_len;
5329 }
5330 else
5331 {
5332 SLJIT_ASSERT(*opcode == OP_CRRANGE || *opcode == OP_CRMINRANGE);
5333 *arg1 = GET2(cc, (class_len + IMM2_SIZE));
5334 *arg2 = GET2(cc, class_len);
5335
5336 if (*arg2 == 0)
5337 {
5338 SLJIT_ASSERT(*arg1 != 0);
5339 *opcode = (*opcode == OP_CRRANGE) ? OP_UPTO : OP_MINUPTO;
5340 }
5341 if (*arg1 == *arg2)
5342 *opcode = OP_EXACT;
5343
5344 if (end != NULL)
5345 *end = cc + class_len + 2 * IMM2_SIZE;
5346 }
5347 return cc;
5348 }
5349
5350 if (*opcode == OP_UPTO || *opcode == OP_MINUPTO || *opcode == OP_EXACT || *opcode == OP_POSUPTO)
5351 {
5352 *arg1 = GET2(cc, 0);
5353 cc += IMM2_SIZE;
5354 }
5355
5356 if (*type == 0)
5357 {
5358 *type = *cc;
5359 if (end != NULL)
5360 *end = next_opcode(common, cc);
5361 cc++;
5362 return cc;
5363 }
5364
5365 if (end != NULL)
5366 {
5367 *end = cc + 1;
5368 #ifdef SUPPORT_UTF
5369 if (common->utf && HAS_EXTRALEN(*cc)) *end += GET_EXTRALEN(*cc);
5370 #endif
5371 }
5372 return cc;
5373 }
5374
5375 static pcre_uchar *compile_iterator_hotpath(compiler_common *common, pcre_uchar *cc, fallback_common *parent)
5376 {
5377 DEFINE_COMPILER;
5378 fallback_common *fallback;
5379 pcre_uchar opcode;
5380 pcre_uchar type;
5381 int arg1 = -1, arg2 = -1;
5382 pcre_uchar* end;
5383 jump_list *nomatch = NULL;
5384 struct sljit_jump *jump = NULL;
5385 struct sljit_label *label;
5386
5387 PUSH_FALLBACK(sizeof(iterator_fallback), cc, NULL);
5388
5389 cc = get_iterator_parameters(common, cc, &opcode, &type, &arg1, &arg2, &end);
5390
5391 switch(opcode)
5392 {
5393 case OP_STAR:
5394 case OP_PLUS:
5395 case OP_UPTO:
5396 case OP_CRRANGE:
5397 if (type == OP_ANYNL || type == OP_EXTUNI)
5398 {
5399 if (opcode == OP_STAR || opcode == OP_UPTO)
5400 {
5401 allocate_stack(common, 2);
5402 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5403 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
5404 }
5405 else
5406 {
5407 allocate_stack(common, 1);
5408 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5409 }
5410 if (opcode == OP_UPTO || opcode == OP_CRRANGE)
5411 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 0);
5412
5413 label = LABEL();
5414 compile_char1_hotpath(common, type, cc, &fallback->topfallbacks);
5415 if (opcode == OP_UPTO || opcode == OP_CRRANGE)
5416 {
5417 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
5418 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
5419 if (opcode == OP_CRRANGE && arg2 > 0)
5420 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg2, label);
5421 if (opcode == OP_UPTO || (opcode == OP_CRRANGE && arg1 > 0))
5422 jump = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, arg1);
5423 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0);
5424 }
5425
5426 allocate_stack(common, 1);
5427 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5428 JUMPTO(SLJIT_JUMP, label);
5429 if (jump != NULL)
5430 JUMPHERE(jump);
5431 }
5432 else
5433 {
5434 allocate_stack(common, 2);
5435 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5436 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
5437 label = LABEL();
5438 compile_char1_hotpath(common, type, cc, &nomatch);
5439 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5440 if (opcode <= OP_PLUS || (opcode == OP_CRRANGE && arg1 == 0))
5441 {
5442 OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
5443 JUMPTO(SLJIT_JUMP, label);
5444 }
5445 else
5446 {
5447 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5448 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
5449 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5450 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg1 + 1, label);
5451 }
5452 set_jumps(nomatch, LABEL());
5453 if (opcode == OP_PLUS || opcode == OP_CRRANGE)
5454 add_jump(compiler, &fallback->topfallbacks,
5455 CMP(SLJIT_C_LESS, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, opcode == OP_PLUS ? 2 : arg2 + 1));
5456 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5457 }
5458 FALLBACK_AS(iterator_fallback)->hotpath = LABEL();
5459 break;
5460
5461 case OP_MINSTAR:
5462 case OP_MINPLUS:
5463 allocate_stack(common, 1);
5464 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5465 if (opcode == OP_MINPLUS)
5466 add_jump(compiler, &fallback->topfallbacks, JUMP(SLJIT_JUMP));
5467 FALLBACK_AS(iterator_fallback)->hotpath = LABEL();
5468 break;
5469
5470 case OP_MINUPTO:
5471 case OP_CRMINRANGE:
5472 allocate_stack(common, 2);
5473 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5474 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
5475 if (opcode == OP_CRMINRANGE)
5476 add_jump(compiler, &fallback->topfallbacks, JUMP(SLJIT_JUMP));
5477 FALLBACK_AS(iterator_fallback)->hotpath = LABEL();
5478 break;
5479
5480 case OP_QUERY:
5481 case OP_MINQUERY:
5482 allocate_stack(common, 1);
5483 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5484 if (opcode == OP_QUERY)
5485 compile_char1_hotpath(common, type, cc, &fallback->topfallbacks);
5486 FALLBACK_AS(iterator_fallback)->hotpath = LABEL();
5487 break;
5488
5489 case OP_EXACT:
5490 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 1);
5491 label = LABEL();
5492 compile_char1_hotpath(common, type, cc, &fallback->topfallbacks);
5493 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
5494 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
5495 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0);
5496 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg1 + 1, label);
5497 break;
5498
5499 case OP_POSSTAR:
5500 case OP_POSPLUS:
5501 case OP_POSUPTO:
5502 if (opcode != OP_POSSTAR)
5503 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 1);
5504 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STR_PTR, 0);
5505 label = LABEL();
5506 compile_char1_hotpath(common, type, cc, &nomatch);
5507 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STR_PTR, 0);
5508 if (opcode != OP_POSUPTO)
5509 {
5510 if (opcode == OP_POSPLUS)
5511 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 2);
5512 JUMPTO(SLJIT_JUMP, label);
5513 }
5514 else
5515 {
5516 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
5517 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
5518 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0);
5519 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg1 + 1, label);
5520 }
5521 set_jumps(nomatch, LABEL());
5522 if (opcode == OP_POSPLUS)
5523 add_jump(compiler, &fallback->topfallbacks, CMP(SLJIT_C_LESS, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 2));
5524 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);
5525 break;
5526
5527 case OP_POSQUERY:
5528 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STR_PTR, 0);
5529 compile_char1_hotpath(common, type, cc, &nomatch);
5530 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STR_PTR, 0);
5531 set_jumps(nomatch, LABEL());
5532 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);
5533 break;
5534
5535 default:
5536 SLJIT_ASSERT_STOP();
5537 break;
5538 }
5539
5540 decrease_call_count(common);
5541 return end;
5542 }
5543
5544 static SLJIT_INLINE pcre_uchar *compile_fail_accept_hotpath(compiler_common *common, pcre_uchar *cc, fallback_common *parent)
5545 {
5546 DEFINE_COMPILER;
5547 fallback_common *fallback;
5548
5549 PUSH_FALLBACK(sizeof(bracket_fallback), cc, NULL);
5550
5551 if (*cc == OP_FAIL)
5552 {
5553 add_jump(compiler, &fallback->topfallbacks, JUMP(SLJIT_JUMP));
5554 return cc + 1;
5555 }
5556
5557 if (*cc == OP_ASSERT_ACCEPT || common->currententry != NULL)
5558 {
5559 /* No need to check notempty conditions. */
5560 if (common->acceptlabel == NULL)
5561 add_jump(compiler, &common->accept, JUMP(SLJIT_JUMP));
5562 else
5563 JUMPTO(SLJIT_JUMP, common->acceptlabel);
5564 return cc + 1;
5565 }
5566
5567 if (common->acceptlabel == NULL)
5568 add_jump(compiler, &common->accept, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0)));
5569 else
5570 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), common->acceptlabel);
5571 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5572 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty));
5573 add_jump(compiler, &fallback->topfallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5574 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty_atstart));
5575 if (common->acceptlabel == NULL)
5576 add_jump(compiler, &common->accept, CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5577 else
5578 CMPTO(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0, common->acceptlabel);
5579 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
5580 if (common->acceptlabel == NULL)
5581 add_jump(compiler, &common->accept, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0));
5582 else
5583 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0, common->acceptlabel);
5584 add_jump(compiler, &fallback->topfallbacks, JUMP(SLJIT_JUMP));
5585 return cc + 1;
5586 }
5587
5588 static SLJIT_INLINE pcre_uchar *compile_close_hotpath(compiler_common *common, pcre_uchar *cc)
5589 {
5590 DEFINE_COMPILER;
5591 int offset = GET2(cc, 1);
5592
5593 /* Data will be discarded anyway... */
5594 if (common->currententry != NULL)
5595 return cc + 1 + IMM2_SIZE;
5596
5597 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR_PRIV(offset));
5598 offset <<= 1;
5599 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
5600 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
5601 return cc + 1 + IMM2_SIZE;
5602 }
5603
5604 static void compile_hotpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, fallback_common *parent)
5605 {
5606 DEFINE_COMPILER;
5607 fallback_common *fallback;
5608
5609 while (cc < ccend)
5610 {
5611 switch(*cc)
5612 {
5613 case OP_SOD:
5614 case OP_SOM:
5615 case OP_NOT_WORD_BOUNDARY:
5616 case OP_WORD_BOUNDARY:
5617 case OP_NOT_DIGIT:
5618 case OP_DIGIT:
5619 case OP_NOT_WHITESPACE:
5620 case OP_WHITESPACE:
5621 case OP_NOT_WORDCHAR:
5622 case OP_WORDCHAR:
5623 case OP_ANY:
5624 case OP_ALLANY:
5625 case OP_ANYBYTE:
5626 case OP_NOTPROP:
5627 case OP_PROP:
5628 case OP_ANYNL:
5629 case OP_NOT_HSPACE:
5630 case OP_HSPACE:
5631 case OP_NOT_VSPACE:
5632 case OP_VSPACE:
5633 case OP_EXTUNI:
5634 case OP_EODN:
5635 case OP_EOD:
5636 case OP_CIRC:
5637 case OP_CIRCM:
5638 case OP_DOLL:
5639 case OP_DOLLM:
5640 case OP_NOT:
5641 case OP_NOTI:
5642 case OP_REVERSE:
5643 cc = compile_char1_hotpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextfallbacks : &parent->topfallbacks);
5644 break;
5645
5646 case OP_SET_SOM:
5647 PUSH_FALLBACK_NOVALUE(sizeof(fallback_common), cc);
5648 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
5649 allocate_stack(common, 1);
5650 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), STR_PTR, 0);
5651 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
5652 cc++;
5653 break;
5654
5655 case OP_CHAR:
5656 case OP_CHARI:
5657 if (common->mode == JIT_COMPILE)
5658 cc = compile_charn_hotpath(common, cc, ccend, parent->top != NULL ? &parent->top->nextfallbacks : &parent->topfallbacks);
5659 else
5660 cc = compile_char1_hotpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextfallbacks : &parent->topfallbacks);
5661 break;
5662
5663 case OP_STAR:
5664 case OP_MINSTAR:
5665 case OP_PLUS:
5666 case OP_MINPLUS:
5667 case OP_QUERY:
5668 case OP_MINQUERY:
5669 case OP_UPTO:
5670 case OP_MINUPTO:
5671 case OP_EXACT:
5672 case OP_POSSTAR:
5673 case OP_POSPLUS:
5674 case OP_POSQUERY:
5675 case OP_POSUPTO:
5676 case OP_STARI:
5677 case OP_MINSTARI:
5678 case OP_PLUSI:
5679 case OP_MINPLUSI:
5680 case OP_QUERYI:
5681 case OP_MINQUERYI:
5682 case OP_UPTOI:
5683 case OP_MINUPTOI:
5684 case OP_EXACTI:
5685 case OP_POSSTARI:
5686 case OP_POSPLUSI:
5687 case OP_POSQUERYI:
5688 case OP_POSUPTOI:
5689 case OP_NOTSTAR:
5690 case OP_NOTMINSTAR:
5691 case OP_NOTPLUS:
5692 case OP_NOTMINPLUS:
5693 case OP_NOTQUERY:
5694 case OP_NOTMINQUERY:
5695 case OP_NOTUPTO:
5696 case OP_NOTMINUPTO:
5697 case OP_NOTEXACT:
5698 case OP_NOTPOSSTAR:
5699 case OP_NOTPOSPLUS:
5700 case OP_NOTPOSQUERY:
5701 case OP_NOTPOSUPTO:
5702 case OP_NOTSTARI:
5703 case OP_NOTMINSTARI:
5704 case OP_NOTPLUSI:
5705 case OP_NOTMINPLUSI:
5706 case OP_NOTQUERYI:
5707 case OP_NOTMINQUERYI:
5708 case OP_NOTUPTOI:
5709 case OP_NOTMINUPTOI:
5710 case OP_NOTEXACTI:
5711 case OP_NOTPOSSTARI:
5712 case OP_NOTPOSPLUSI:
5713 case OP_NOTPOSQUERYI:
5714 case OP_NOTPOSUPTOI:
5715 case OP_TYPESTAR:
5716 case OP_TYPEMINSTAR:
5717 case OP_TYPEPLUS:
5718 case OP_TYPEMINPLUS:
5719 case OP_TYPEQUERY:
5720 case OP_TYPEMINQUERY:
5721 case OP_TYPEUPTO:
5722 case OP_TYPEMINUPTO:
5723 case OP_TYPEEXACT:
5724 case OP_TYPEPOSSTAR:
5725 case OP_TYPEPOSPLUS:
5726 case OP_TYPEPOSQUERY:
5727 case OP_TYPEPOSUPTO:
5728 cc = compile_iterator_hotpath(common, cc, parent);
5729 break;
5730
5731 case OP_CLASS:
5732 case OP_NCLASS:
5733 if (cc[1 + (32 / sizeof(pcre_uchar))] >= OP_CRSTAR && cc[1 + (32 / sizeof(pcre_uchar))] <= OP_CRMINRANGE)
5734 cc = compile_iterator_hotpath(common, cc, parent);
5735 else
5736 cc = compile_char1_hotpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextfallbacks : &parent->topfallbacks);
5737 break;
5738
5739 #if defined SUPPORT_UTF || defined COMPILE_PCRE16
5740 case OP_XCLASS:
5741 if (*(cc + GET(cc, 1)) >= OP_CRSTAR && *(cc + GET(cc, 1)) <= OP_CRMINRANGE)
5742 cc = compile_iterator_hotpath(common, cc, parent);
5743 else
5744 cc = compile_char1_hotpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextfallbacks : &parent->topfallbacks);
5745 break;
5746 #endif
5747
5748 case OP_REF:
5749 case OP_REFI:
5750 if (cc[1 + IMM2_SIZE] >= OP_CRSTAR && cc[1 + IMM2_SIZE] <= OP_CRMINRANGE)
5751 cc = compile_ref_iterator_hotpath(common, cc, parent);
5752 else
5753 cc = compile_ref_hotpath(common, cc, parent->top != NULL ? &parent->top->nextfallbacks : &parent->topfallbacks, TRUE, FALSE);
5754 break;
5755
5756 case OP_RECURSE:
5757 cc = compile_recurse_hotpath(common, cc, parent);
5758 break;
5759
5760 case OP_ASSERT:
5761 case OP_ASSERT_NOT:
5762 case OP_ASSERTBACK:
5763 case OP_ASSERTBACK_NOT:
5764 PUSH_FALLBACK_NOVALUE(sizeof(assert_fallback), cc);
5765 cc = compile_assert_hotpath(common, cc, FALLBACK_AS(assert_fallback), FALSE);
5766 break;
5767
5768 case OP_BRAMINZERO:
5769 PUSH_FALLBACK_NOVALUE(sizeof(braminzero_fallback), cc);
5770 cc = bracketend(cc + 1);
5771 if (*(cc - 1 - LINK_SIZE) != OP_KETRMIN)
5772 {
5773 allocate_stack(common, 1);
5774 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5775 }
5776 else
5777 {
5778 allocate_stack(common, 2);
5779 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5780 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), STR_PTR, 0);
5781 }
5782 FALLBACK_AS(braminzero_fallback)->hotpath = LABEL();
5783 if (cc[1] > OP_ASSERTBACK_NOT)
5784 decrease_call_count(common);
5785 break;
5786
5787 case OP_ONCE:
5788 case OP_ONCE_NC:
5789 case OP_BRA:
5790 case OP_CBRA:
5791 case OP_COND:
5792 case OP_SBRA:
5793 case OP_SCBRA:
5794 case OP_SCOND:
5795 cc = compile_bracket_hotpath(common, cc, parent);
5796 break;
5797
5798 case OP_BRAZERO:
5799 if (cc[1] > OP_ASSERTBACK_NOT)
5800 cc = compile_bracket_hotpath(common, cc, parent);
5801 else
5802 {
5803 PUSH_FALLBACK_NOVALUE(sizeof(assert_fallback), cc);
5804 cc = compile_assert_hotpath(common, cc, FALLBACK_AS(assert_fallback), FALSE);
5805 }
5806 break;
5807
5808 case OP_BRAPOS:
5809 case OP_CBRAPOS:
5810 case OP_SBRAPOS:
5811 case OP_SCBRAPOS:
5812 case OP_BRAPOSZERO:
5813 cc = compile_bracketpos_hotpath(common, cc, parent);
5814 break;
5815
5816 case OP_MARK:
5817 PUSH_FALLBACK_NOVALUE(sizeof(fallback_common), cc);
5818 SLJIT_ASSERT(common->mark_ptr != 0);
5819 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
5820 allocate_stack(common, 1);
5821 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5822 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
5823 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_w)(cc + 2));
5824 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr, TMP2, 0);
5825 OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), TMP2, 0);
5826 cc += 1 + 2 + cc[1];
5827 break;
5828
5829 case OP_COMMIT:
5830 PUSH_FALLBACK_NOVALUE(sizeof(fallback_common), cc);
5831 cc += 1;
5832 break;
5833
5834 case OP_FAIL:
5835 case OP_ACCEPT:
5836 case OP_ASSERT_ACCEPT:
5837 cc = compile_fail_accept_hotpath(common, cc, parent);
5838 break;
5839
5840 case OP_CLOSE:
5841 cc = compile_close_hotpath(common, cc);
5842 break;
5843
5844 case OP_SKIPZERO:
5845 cc = bracketend(cc + 1);
5846 break;
5847
5848 default:
5849 SLJIT_ASSERT_STOP();
5850 return;
5851 }
5852 if (cc == NULL)
5853 return;
5854 }
5855 SLJIT_ASSERT(cc == ccend);
5856 }
5857
5858 #undef PUSH_FALLBACK
5859 #undef PUSH_FALLBACK_NOVALUE
5860 #undef FALLBACK_AS
5861
5862 #define COMPILE_FALLBACKPATH(current) \
5863 do \
5864 { \
5865 compile_fallbackpath(common, (current)); \
5866 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
5867 return; \
5868 } \
5869 while (0)
5870
5871 #define CURRENT_AS(type) ((type *)current)
5872
5873 static void compile_iterator_fallbackpath(compiler_common *common, struct fallback_common *current)
5874 {
5875 DEFINE_COMPILER;
5876 pcre_uchar *cc = current->cc;
5877 pcre_uchar opcode;
5878 pcre_uchar type;
5879 int arg1 = -1, arg2 = -1;
5880 struct sljit_label *label = NULL;
5881 struct sljit_jump *jump = NULL;
5882
5883 cc = get_iterator_parameters(common, cc, &opcode, &type, &arg1, &arg2, NULL);
5884
5885 switch(opcode)
5886 {
5887 case OP_STAR:
5888 case OP_PLUS:
5889 case OP_UPTO:
5890 case OP_CRRANGE:
5891 if (type == OP_ANYNL || type == OP_EXTUNI)
5892 {
5893 set_jumps(current->topfallbacks, LABEL());
5894 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5895 free_stack(common, 1);
5896 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_fallback)->hotpath);
5897 }
5898 else
5899 {
5900 if (opcode == OP_STAR || opcode == OP_UPTO)
5901 arg2 = 0;
5902 else if (opcode == OP_PLUS)
5903 arg2 = 1;
5904 jump = CMP(SLJIT_C_LESS_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, arg2 + 1);
5905 OP2(SLJIT_SUB, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
5906 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5907 skip_char_back(common);
5908 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5909 JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_fallback)->hotpath);
5910 if (opcode == OP_PLUS || opcode == OP_CRRANGE)
5911 set_jumps(current->topfallbacks, LABEL());
5912 JUMPHERE(jump);
5913 free_stack(common, 2);
5914 }
5915 break;
5916
5917 case OP_MINSTAR:
5918 case OP_MINPLUS:
5919 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5920 if (opcode == OP_MINPLUS)
5921 {
5922 set_jumps(current->topfallbacks, LABEL());
5923 current->topfallbacks = NULL;
5924 }
5925 compile_char1_hotpath(common, type, cc, &current->topfallbacks);
5926 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5927 JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_fallback)->hotpath);
5928 set_jumps(current->topfallbacks, LABEL());
5929 free_stack(common, 1);
5930 break;
5931
5932 case OP_MINUPTO:
5933 case OP_CRMINRANGE:
5934 if (opcode == OP_CRMINRANGE)
5935 {
5936 set_jumps(current->topfallbacks, LABEL());
5937 current->topfallbacks = NULL;
5938 label = LABEL();
5939 }
5940 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5941 compile_char1_hotpath(common, type, cc, &current->topfallbacks);
5942
5943 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5944 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5945 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
5946 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5947
5948 if (opcode == OP_CRMINRANGE)
5949 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg2 + 1, label);
5950
5951 if (opcode == OP_CRMINRANGE && arg1 == 0)
5952 JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_fallback)->hotpath);
5953 else
5954 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg1 + 2, CURRENT_AS(iterator_fallback)->hotpath);
5955
5956 set_jumps(current->topfallbacks, LABEL());
5957 free_stack(common, 2);
5958 break;
5959
5960 case OP_QUERY:
5961 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5962 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5963 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_fallback)->hotpath);
5964 jump = JUMP(SLJIT_JUMP);
5965 set_jumps(current->topfallbacks, LABEL());
5966 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5967 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5968 JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_fallback)->hotpath);
5969 JUMPHERE(jump);
5970 free_stack(common, 1);
5971 break;
5972
5973 case OP_MINQUERY:
5974 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5975 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5976 jump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
5977 compile_char1_hotpath(common, type, cc, &current->topfallbacks);
5978 JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_fallback)->hotpath);
5979 set_jumps(current->topfallbacks, LABEL());
5980 JUMPHERE(jump);
5981 free_stack(common, 1);
5982 break;
5983
5984 case OP_EXACT:
5985 case OP_POSPLUS:
5986 set_jumps(current->topfallbacks, LABEL());
5987 break;
5988
5989 case OP_POSSTAR:
5990 case OP_POSQUERY:
5991 case OP_POSUPTO:
5992 break;
5993
5994 default:
5995 SLJIT_ASSERT_STOP();
5996 break;
5997 }
5998 }
5999
6000 static void compile_ref_iterator_fallbackpath(compiler_common *common, struct fallback_common *current)
6001 {
6002 DEFINE_COMPILER;
6003 pcre_uchar *cc = current->cc;
6004 pcre_uchar type;
6005
6006 type = cc[1 + IMM2_SIZE];
6007 if ((type & 0x1) == 0)
6008 {
6009 set_jumps(current->topfallbacks, LABEL());
6010 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6011 free_stack(common, 1);
6012 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_fallback)->hotpath);
6013 return;
6014 }
6015
6016 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6017 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_fallback)->hotpath);
6018 set_jumps(current->topfallbacks, LABEL());
6019 free_stack(common, 2);
6020 }
6021
6022 static void compile_recurse_fallbackpath(compiler_common *common, struct fallback_common *current)
6023 {
6024 DEFINE_COMPILER;
6025
6026 set_jumps(current->topfallbacks, LABEL());
6027
6028 if (common->has_set_som && common->mark_ptr != 0)
6029 {
6030 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6031 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
6032 free_stack(common, 2);
6033 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), TMP2, 0);
6034 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr, TMP1, 0);
6035 }
6036 else if (common->has_set_som || common->mark_ptr != 0)
6037 {
6038 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6039 free_stack(common, 1);
6040 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->has_set_som ? (int)(OVECTOR(0)) : common->mark_ptr, TMP2, 0);
6041 }
6042 }
6043
6044 static void compile_assert_fallbackpath(compiler_common *common, struct fallback_common *current)
6045 {
6046 DEFINE_COMPILER;
6047 pcre_uchar *cc = current->cc;
6048 pcre_uchar bra = OP_BRA;
6049 struct sljit_jump *brajump = NULL;
6050
6051 SLJIT_ASSERT(*cc != OP_BRAMINZERO);
6052 if (*cc == OP_BRAZERO)
6053 {
6054 bra = *cc;
6055 cc++;
6056 }
6057
6058 if (bra == OP_BRAZERO)
6059 {
6060 SLJIT_ASSERT(current->topfallbacks == NULL);
6061 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6062 }
6063
6064 if (CURRENT_AS(assert_fallback)->framesize < 0)
6065 {
6066 set_jumps(current->topfallbacks, LABEL());
6067
6068 if (bra == OP_BRAZERO)
6069 {
6070 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6071 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_fallback)->hotpath);
6072 free_stack(common, 1);
6073 }
6074 return;
6075 }
6076
6077 if (bra == OP_BRAZERO)
6078 {
6079 if (*cc == OP_ASSERT_NOT || *cc == OP_ASSERTBACK_NOT)
6080 {
6081 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6082 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_fallback)->hotpath);
6083 free_stack(common, 1);
6084 return;
6085 }
6086 free_stack(common, 1);
6087 brajump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
6088 }
6089
6090 if (*cc == OP_ASSERT || *cc == OP_ASSERTBACK)
6091 {
6092 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), CURRENT_AS(assert_fallback)->localptr);
6093 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6094 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), CURRENT_AS(assert_fallback)->localptr, SLJIT_MEM1(STACK_TOP), CURRENT_AS(assert_fallback)->framesize * sizeof(sljit_w));
6095
6096 set_jumps(current->topfallbacks, LABEL());
6097 }
6098 else
6099 set_jumps(current->topfallbacks, LABEL());
6100
6101 if (bra == OP_BRAZERO)
6102 {
6103 /* We know there is enough place on the stack. */
6104 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
6105 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6106 JUMPTO(SLJIT_JUMP, CURRENT_AS(assert_fallback)->hotpath);
6107 JUMPHERE(brajump);
6108 }
6109 }
6110
6111 static void compile_bracket_fallbackpath(compiler_common *common, struct fallback_common *current)
6112 {
6113 DEFINE_COMPILER;
6114 int opcode;
6115 int offset = 0;
6116 int localptr = CURRENT_AS(bracket_fallback)->localptr;
6117 int stacksize;
6118 int count;
6119 pcre_uchar *cc = current->cc;
6120 pcre_uchar *ccbegin;
6121 pcre_uchar *ccprev;
6122 jump_list *jumplist = NULL;
6123 jump_list *jumplistitem = NULL;
6124 pcre_uchar bra = OP_BRA;
6125 pcre_uchar ket;
6126 assert_fallback *assert;
6127 BOOL has_alternatives;
6128 struct sljit_jump *brazero = NULL;
6129 struct sljit_jump *once = NULL;
6130 struct sljit_jump *cond = NULL;
6131 struct sljit_label *rminlabel = NULL;
6132
6133 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
6134 {
6135 bra = *cc;
6136 cc++;
6137 }
6138
6139 opcode = *cc;
6140 ccbegin = cc;
6141 ket = *(bracketend(ccbegin) - 1 - LINK_SIZE);
6142 cc += GET(cc, 1);
6143 has_alternatives = *cc == OP_ALT;
6144 if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
6145 has_alternatives = (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT) || CURRENT_AS(bracket_fallback)->u.condfailed != NULL;
6146 if (opcode == OP_CBRA || opcode == OP_SCBRA)
6147 offset = (GET2(ccbegin, 1 + LINK_SIZE)) << 1;
6148 if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
6149 opcode = OP_SCOND;
6150 if (SLJIT_UNLIKELY(opcode == OP_ONCE_NC))
6151 opcode = OP_ONCE;
6152
6153 if (ket == OP_KETRMAX)
6154 {
6155 if (bra != OP_BRAZERO)
6156 free_stack(common, 1);
6157 else
6158 {
6159 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6160 free_stack(common, 1);
6161 brazero = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0);
6162 }
6163 }
6164 else if (ket == OP_KETRMIN)
6165 {
6166 if (bra != OP_BRAMINZERO)
6167 {
6168 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6169 if (opcode >= OP_SBRA || opcode == OP_ONCE)
6170 {
6171 /* Checking zero-length iteration. */
6172 if (opcode != OP_ONCE || CURRENT_AS(bracket_fallback)->u.framesize < 0)
6173 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, CURRENT_AS(bracket_fallback)->recursivehotpath);
6174 else
6175 {
6176 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
6177 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), (CURRENT_AS(bracket_fallback)->u.framesize + 1) * sizeof(sljit_w), CURRENT_AS(bracket_fallback)->recursivehotpath);
6178 }
6179 if (opcode != OP_ONCE)
6180 free_stack(common, 1);
6181 }
6182 else
6183 JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_fallback)->recursivehotpath);
6184 }
6185 rminlabel = LABEL();
6186 }
6187 else if (bra == OP_BRAZERO)
6188 {
6189 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6190 free_stack(common, 1);
6191 brazero = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
6192 }
6193
6194 if (SLJIT_UNLIKELY(opcode == OP_ONCE))
6195 {
6196 if (CURRENT_AS(bracket_fallback)->u.framesize >= 0)
6197 {
6198 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
6199 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6200 }
6201 once = JUMP(SLJIT_JUMP);
6202 }
6203 else if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
6204 {
6205 if (has_alternatives)
6206 {
6207 /* Always exactly one alternative. */
6208 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6209 free_stack(common, 1);
6210
6211 jumplistitem = sljit_alloc_memory(compiler, sizeof(jump_list));
6212 if (SLJIT_UNLIKELY(!jumplistitem))
6213 return;
6214 jumplist = jumplistitem;
6215 jumplistitem->next = NULL;
6216 jumplistitem->jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 1);
6217 }
6218 }
6219 else if (*cc == OP_ALT)
6220 {
6221 /* Build a jump list. Get the last successfully matched branch index. */
6222 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6223 free_stack(common, 1);
6224 count = 1;
6225 do
6226 {
6227 /* Append as the last item. */
6228 if (jumplist != NULL)
6229 {
6230 jumplistitem->next = sljit_alloc_memory(compiler, sizeof(jump_list));
6231 jumplistitem = jumplistitem->next;
6232 }
6233 else
6234 {
6235 jumplistitem = sljit_alloc_memory(compiler, sizeof(jump_list));
6236 jumplist = jumplistitem;
6237 }
6238
6239 if (SLJIT_UNLIKELY(!jumplistitem))
6240 return;
6241
6242 jumplistitem->next = NULL;
6243 jumplistitem->jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, count++);
6244 cc += GET(cc, 1);
6245 }
6246 while (*cc == OP_ALT);
6247
6248 cc = ccbegin + GET(ccbegin, 1);
6249 }
6250
6251 COMPILE_FALLBACKPATH(current->top);
6252 if (current->topfallbacks)
6253 set_jumps(current->topfallbacks, LABEL());
6254
6255 if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
6256 {
6257 /* Conditional block always has at most one alternative. */
6258 if (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT)
6259 {
6260 SLJIT_ASSERT(has_alternatives);
6261 assert = CURRENT_AS(bracket_fallback)->u.assert;
6262 if (assert->framesize >= 0 && (ccbegin[1 + LINK_SIZE] == OP_ASSERT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK))
6263 {
6264 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), assert->localptr);
6265 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6266 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), assert->localptr, SLJIT_MEM1(STACK_TOP), assert->framesize * sizeof(sljit_w));
6267 }
6268 cond = JUMP(SLJIT_JUMP);
6269 set_jumps(CURRENT_AS(bracket_fallback)->u.assert->condfailed, LABEL());
6270 }
6271 else if (CURRENT_AS(bracket_fallback)->u.condfailed != NULL)
6272 {
6273 SLJIT_ASSERT(has_alternatives);
6274 cond = JUMP(SLJIT_JUMP);
6275 set_jumps(CURRENT_AS(bracket_fallback)->u.condfailed, LABEL());
6276 }
6277 else
6278 SLJIT_ASSERT(!has_alternatives);
6279 }
6280
6281 if (has_alternatives)
6282 {
6283 count = 1;
6284 do
6285 {
6286 current->top = NULL;
6287 current->topfallbacks = NULL;
6288 current->nextfallbacks = NULL;
6289 if (*cc == OP_ALT)
6290 {
6291 ccprev = cc + 1 + LINK_SIZE;
6292 cc += GET(cc, 1);
6293 if (opcode != OP_COND && opcode != OP_SCOND)
6294 {
6295 if (localptr != 0 && opcode != OP_ONCE)
6296 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
6297 else
6298 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6299 }
6300 compile_hotpath(common, ccprev, cc, current);
6301 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6302 return;
6303 }
6304
6305 /* Instructions after the current alternative is succesfully matched. */
6306 /* There is a similar code in compile_bracket_hotpath. */
6307 if (opcode == OP_ONCE)
6308 {
6309 if (CURRENT_AS(bracket_fallback)->u.framesize < 0)
6310 {
6311 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
6312 /* TMP2 which is set here used by OP_KETRMAX below. */
6313 if (ket == OP_KETRMAX)
6314 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), 0);
6315 else if (ket == OP_KETRMIN)
6316 {
6317 /* Move the STR_PTR to the localptr. */
6318 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_MEM1(STACK_TOP), 0);
6319 }
6320 }
6321 else
6322 {
6323 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_IMM, (CURRENT_AS(bracket_fallback)->u.framesize + 2) * sizeof(sljit_w));
6324 if (ket == OP_KETRMAX)
6325 {
6326 /* TMP2 which is set here used by OP_KETRMAX below. */
6327 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6328 }
6329 }
6330 }
6331
6332 stacksize = 0;
6333 if (opcode != OP_ONCE)
6334 stacksize++;
6335 if (ket != OP_KET || bra != OP_BRA)
6336 stacksize++;
6337
6338 if (stacksize > 0) {
6339 if (opcode != OP_ONCE || CURRENT_AS(bracket_fallback)->u.framesize >= 0)
6340 allocate_stack(common, stacksize);
6341 else
6342 {
6343 /* We know we have place at least for one item on the top of the stack. */
6344 SLJIT_ASSERT(stacksize == 1);
6345 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
6346 }
6347 }
6348
6349 stacksize = 0;
6350 if (ket != OP_KET || bra != OP_BRA)
6351 {
6352 if (ket != OP_KET)
6353 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
6354 else
6355 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
6356 stacksize++;
6357 }
6358
6359 if (opcode != OP_ONCE)
6360 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, count++);
6361
6362 if (offset != 0)
6363 {
6364 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
6365 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
6366 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 0), TMP1, 0);
6367 }
6368
6369 JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_fallback)->althotpath);
6370
6371 if (opcode != OP_ONCE)
6372 {
6373 SLJIT_ASSERT(jumplist);
6374 JUMPHERE(jumplist->jump);
6375 jumplist = jumplist->next;
6376 }
6377
6378 COMPILE_FALLBACKPATH(current->top);
6379 if (current->topfallbacks)
6380 set_jumps(current->topfallbacks, LABEL());
6381 SLJIT_ASSERT(!current->nextfallbacks);
6382 }
6383 while (*cc == OP_ALT);
6384 SLJIT_ASSERT(!jumplist);
6385
6386 if (cond != NULL)
6387 {
6388 SLJIT_ASSERT(opcode == OP_COND || opcode == OP_SCOND);
6389 assert = CURRENT_AS(bracket_fallback)->u.assert;
6390 if ((ccbegin[1 + LINK_SIZE] == OP_ASSERT_NOT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK_NOT) && assert->framesize >= 0)
6391
6392 {
6393 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), assert->localptr);
6394 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6395 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), assert->localptr, SLJIT_MEM1(STACK_TOP), assert->framesize * sizeof(sljit_w));
6396 }
6397 JUMPHERE(cond);
6398 }
6399
6400 /* Free the STR_PTR. */
6401 if (localptr == 0)
6402 free_stack(common, 1);
6403 }
6404
6405 if (offset != 0)
6406 {
6407 /* Using both tmp register is better for instruction scheduling. */
6408 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6409 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
6410 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
6411 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP2, 0);
6412 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_MEM1(STACK_TOP), STACK(2));
6413 free_stack(common, 3);
6414 }
6415 else if (opcode == OP_SBRA || opcode == OP_SCOND)
6416 {
6417 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_MEM1(STACK_TOP), STACK(0));
6418 free_stack(common, 1);
6419 }
6420 else if (opcode == OP_ONCE)
6421 {
6422 cc = ccbegin + GET(ccbegin, 1);
6423 if (CURRENT_AS(bracket_fallback)->u.framesize >= 0)
6424 {
6425 /* Reset head and drop saved frame. */
6426 stacksize = (ket == OP_KETRMAX || ket == OP_KETRMIN || *cc == OP_ALT) ? 2 : 1;
6427 free_stack(common, CURRENT_AS(bracket_fallback)->u.framesize + stacksize);
6428 }
6429 else if (ket == OP_KETRMAX || (*cc == OP_ALT && ket != OP_KETRMIN))
6430 {
6431 /* The STR_PTR must be released. */
6432 free_stack(common, 1);
6433 }
6434
6435 JUMPHERE(once);
6436 /* Restore previous localptr */
6437 if (CURRENT_AS(bracket_fallback)->u.framesize >= 0)
6438 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_MEM1(STACK_TOP), CURRENT_AS(bracket_fallback)->u.framesize * sizeof(sljit_w));
6439 else if (ket == OP_KETRMIN)
6440 {
6441 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
6442 /* See the comment below. */
6443 free_stack(common, 2);
6444 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, TMP1, 0);
6445 }
6446 }
6447
6448 if (ket == OP_KETRMAX)
6449 {
6450 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6451 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(bracket_fallback)->recursivehotpath);
6452 if (bra == OP_BRAZERO)
6453 {
6454 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
6455 JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_fallback)->zerohotpath);
6456 JUMPHERE(brazero);
6457 }
6458 free_stack(common, 1);
6459 }
6460 else if (ket == OP_KETRMIN)
6461 {
6462 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6463
6464 /* OP_ONCE removes everything in case of a fallback, so we don't
6465 need to explicitly release the STR_PTR. The extra release would
6466 affect badly the free_stack(2) above. */
6467 if (opcode != OP_ONCE)
6468 free_stack(common, 1);
6469 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, rminlabel);
6470 if (opcode == OP_ONCE)
6471 free_stack(common, bra == OP_BRAMINZERO ? 2 : 1);
6472 else if (bra == OP_BRAMINZERO)
6473 free_stack(common, 1);
6474 }
6475 else if (bra == OP_BRAZERO)
6476 {
6477 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6478 JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_fallback)->zerohotpath);
6479 JUMPHERE(brazero);
6480 }
6481 }
6482
6483 static void compile_bracketpos_fallbackpath(compiler_common *common, struct fallback_common *current)
6484 {
6485 DEFINE_COMPILER;
6486 int offset;
6487 struct sljit_jump *jump;
6488
6489 if (CURRENT_AS(bracketpos_fallback)->framesize < 0)
6490 {
6491 if (*current->cc == OP_CBRAPOS || *current->cc == OP_SCBRAPOS)
6492 {
6493 offset = (GET2(current->cc, 1 + LINK_SIZE)) << 1;
6494 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6495 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
6496 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
6497 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP2, 0);
6498 }
6499 set_jumps(current->topfallbacks, LABEL());
6500 free_stack(common, CURRENT_AS(bracketpos_fallback)->stacksize);
6501 return;
6502 }
6503
6504 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), CURRENT_AS(bracketpos_fallback)->localptr);
6505 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6506
6507 if (current->topfallbacks)
6508 {
6509 jump = JUMP(SLJIT_JUMP);
6510 set_jumps(current->topfallbacks, LABEL());
6511 /* Drop the stack frame. */
6512 free_stack(common, CURRENT_AS(bracketpos_fallback)->stacksize);
6513 JUMPHERE(jump);
6514 }
6515 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), CURRENT_AS(bracketpos_fallback)->localptr, SLJIT_MEM1(STACK_TOP), CURRENT_AS(bracketpos_fallback)->framesize * sizeof(sljit_w));
6516 }
6517
6518 static void compile_braminzero_fallbackpath(compiler_common *common, struct fallback_common *current)
6519 {
6520 assert_fallback fallback;
6521
6522 current->top = NULL;
6523 current->topfallbacks = NULL;
6524 current->nextfallbacks = NULL;
6525 if (current->cc[1] > OP_ASSERTBACK_NOT)
6526 {
6527 /* Manual call of compile_bracket_hotpath and compile_bracket_fallbackpath. */
6528 compile_bracket_hotpath(common, current->cc, current);
6529 compile_bracket_fallbackpath(common, current->top);
6530 }
6531 else
6532 {
6533 memset(&fallback, 0, sizeof(fallback));
6534 fallback.common.cc = current->cc;
6535 fallback.hotpath = CURRENT_AS(braminzero_fallback)->hotpath;
6536 /* Manual call of compile_assert_hotpath. */
6537 compile_assert_hotpath(common, current->cc, &fallback, FALSE);
6538 }
6539 SLJIT_ASSERT(!current->nextfallbacks && !current->topfallbacks);
6540 }
6541
6542 static void compile_fallbackpath(compiler_common *common, struct fallback_common *current)
6543 {
6544 DEFINE_COMPILER;
6545
6546 while (current)
6547 {
6548 if (current->nextfallbacks != NULL)
6549 set_jumps(current->nextfallbacks, LABEL());
6550 switch(*current->cc)
6551 {
6552 case OP_SET_SOM:
6553 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6554 free_stack(common, 1);
6555 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), TMP1, 0);
6556 break;
6557
6558 case OP_STAR:
6559 case OP_MINSTAR:
6560 case OP_PLUS:
6561 case OP_MINPLUS:
6562 case OP_QUERY:
6563 case OP_MINQUERY:
6564 case OP_UPTO:
6565 case OP_MINUPTO:
6566 case OP_EXACT:
6567 case OP_POSSTAR:
6568 case OP_POSPLUS:
6569 case OP_POSQUERY:
6570 case OP_POSUPTO:
6571 case OP_STARI:
6572 case OP_MINSTARI:
6573 case OP_PLUSI:
6574 case OP_MINPLUSI:
6575 case OP_QUERYI:
6576 case OP_MINQUERYI:
6577 case OP_UPTOI:
6578 case OP_MINUPTOI:
6579 case OP_EXACTI:
6580 case OP_POSSTARI:
6581 case OP_POSPLUSI:
6582 case OP_POSQUERYI:
6583 case OP_POSUPTOI:
6584 case OP_NOTSTAR:
6585 case OP_NOTMINSTAR:
6586 case OP_NOTPLUS:
6587 case OP_NOTMINPLUS:
6588 case OP_NOTQUERY:
6589 case OP_NOTMINQUERY:
6590 case OP_NOTUPTO:
6591 case OP_NOTMINUPTO:
6592 case OP_NOTEXACT:
6593 case OP_NOTPOSSTAR:
6594 case OP_NOTPOSPLUS:
6595 case OP_NOTPOSQUERY:
6596 case OP_NOTPOSUPTO:
6597 case OP_NOTSTARI:
6598 case OP_NOTMINSTARI:
6599 case OP_NOTPLUSI:
6600 case OP_NOTMINPLUSI:
6601 case OP_NOTQUERYI:
6602 case OP_NOTMINQUERYI:
6603 case OP_NOTUPTOI:
6604 case OP_NOTMINUPTOI:
6605 case OP_NOTEXACTI:
6606 case OP_NOTPOSSTARI:
6607 case OP_NOTPOSPLUSI:
6608 case OP_NOTPOSQUERYI:
6609 case OP_NOTPOSUPTOI:
6610 case OP_TYPESTAR:
6611 case OP_TYPEMINSTAR:
6612 case OP_TYPEPLUS:
6613 case OP_TYPEMINPLUS:
6614 case OP_TYPEQUERY:
6615 case OP_TYPEMINQUERY:
6616 case OP_TYPEUPTO:
6617 case OP_TYPEMINUPTO:
6618 case OP_TYPEEXACT:
6619 case OP_TYPEPOSSTAR:
6620 case OP_TYPEPOSPLUS:
6621 case OP_TYPEPOSQUERY:
6622 case OP_TYPEPOSUPTO:
6623 case OP_CLASS:
6624 case OP_NCLASS:
6625 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
6626 case OP_XCLASS:
6627 #endif
6628 compile_iterator_fallbackpath(common, current);
6629 break;
6630
6631 case OP_REF:
6632 case OP_REFI:
6633 compile_ref_iterator_fallbackpath(common, current);
6634 break;
6635
6636 case OP_RECURSE:
6637 compile_recurse_fallbackpath(common, current);
6638 break;
6639
6640 case OP_ASSERT:
6641 case OP_ASSERT_NOT:
6642 case OP_ASSERTBACK:
6643 case OP_ASSERTBACK_NOT:
6644 compile_assert_fallbackpath(common, current);
6645 break;
6646
6647 case OP_ONCE:
6648 case OP_ONCE_NC:
6649 case OP_BRA:
6650 case OP_CBRA:
6651 case OP_COND:
6652 case OP_SBRA:
6653 case OP_SCBRA:
6654 case OP_SCOND:
6655 compile_bracket_fallbackpath(common, current);
6656 break;
6657
6658 case OP_BRAZERO:
6659 if (current->cc[1] > OP_ASSERTBACK_NOT)
6660 compile_bracket_fallbackpath(common, current);
6661 else
6662 compile_assert_fallbackpath(common, current);
6663 break;
6664
6665 case OP_BRAPOS:
6666 case OP_CBRAPOS:
6667 case OP_SBRAPOS:
6668 case OP_SCBRAPOS:
6669 case OP_BRAPOSZERO:
6670 compile_bracketpos_fallbackpath(common, current);
6671 break;
6672
6673 case OP_BRAMINZERO:
6674 compile_braminzero_fallbackpath(common, current);
6675 break;
6676
6677 case OP_MARK:
6678 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6679 free_stack(common, 1);
6680 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr, TMP1, 0);
6681 break;
6682
6683 case OP_COMMIT:
6684 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_NOMATCH);
6685 if (common->leavelabel == NULL)
6686 add_jump(compiler, &common->leave, JUMP(SLJIT_JUMP));
6687 else
6688 JUMPTO(SLJIT_JUMP, common->leavelabel);
6689 break;
6690
6691 case OP_FAIL:
6692 case OP_ACCEPT:
6693 case OP_ASSERT_ACCEPT:
6694 set_jumps(current->topfallbacks, LABEL());
6695 break;
6696
6697 default:
6698 SLJIT_ASSERT_STOP();
6699 break;
6700 }
6701 current = current->prev;
6702 }
6703 }
6704
6705 static SLJIT_INLINE void compile_recurse(compiler_common *common)
6706 {
6707 DEFINE_COMPILER;
6708 pcre_uchar *cc = common->start + common->currententry->start;
6709 pcre_uchar *ccbegin = cc + 1 + LINK_SIZE + (*cc == OP_BRA ? 0 : IMM2_SIZE);
6710 pcre_uchar *ccend = bracketend(cc);
6711 int localsize = get_localsize(common, ccbegin, ccend);
6712 int framesize = get_framesize(common, cc, TRUE);
6713 int alternativesize;
6714 BOOL needsframe;
6715 fallback_common altfallback;
6716 struct sljit_label *save_leavelabel = common->leavelabel;
6717 jump_list *save_leave = common->leave;
6718 struct sljit_jump *jump;
6719
6720 SLJIT_ASSERT(*cc == OP_BRA || *cc == OP_CBRA || *cc == OP_CBRAPOS || *cc == OP_SCBRA || *cc == OP_SCBRAPOS);
6721 needsframe = framesize >= 0;
6722 if (!needsframe)
6723 framesize = 0;
6724 alternativesize = *(cc + GET(cc, 1)) == OP_ALT ? 1 : 0;
6725
6726 SLJIT_ASSERT(common->currententry->entry == NULL && common->recursive_head != 0);
6727 common->currententry->entry = LABEL();
6728 set_jumps(common->currententry->calls, common->currententry->entry);
6729
6730 sljit_emit_fast_enter(compiler, TMP2, 0, 1, 5, 5, common->localsize);
6731 allocate_stack(common, localsize + framesize + alternativesize);
6732 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(localsize + framesize + alternativesize - 1), TMP2, 0);
6733 copy_locals(common, ccbegin, ccend, TRUE, localsize + framesize + alternativesize, framesize + alternativesize);
6734 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->recursive_head, STACK_TOP, 0);
6735 if (needsframe)
6736 init_frame(common, cc, framesize + alternativesize - 1, alternativesize, TRUE);
6737
6738 if (alternativesize > 0)
6739 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_