/[pcre]/code/branches/pcre16/pcre_jit_compile.c
ViewVC logotype

Contents of /code/branches/pcre16/pcre_jit_compile.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 782 - (show annotations)
Sat Dec 3 23:58:37 2011 UTC (7 years, 9 months ago) by zherczeg
File MIME type: text/plain
File size: 210314 byte(s)
Start working on UTF-16. Updating macros and adding new ones.
1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Written by Philip Hazel
9 Copyright (c) 1997-2008 University of Cambridge
10
11 The machine code generator part (this module) was written by Zoltan Herczeg
12 Copyright (c) 2010-2011
13
14 -----------------------------------------------------------------------------
15 Redistribution and use in source and binary forms, with or without
16 modification, are permitted provided that the following conditions are met:
17
18 * Redistributions of source code must retain the above copyright notice,
19 this list of conditions and the following disclaimer.
20
21 * Redistributions in binary form must reproduce the above copyright
22 notice, this list of conditions and the following disclaimer in the
23 documentation and/or other materials provided with the distribution.
24
25 * Neither the name of the University of Cambridge nor the names of its
26 contributors may be used to endorse or promote products derived from
27 this software without specific prior written permission.
28
29 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 POSSIBILITY OF SUCH DAMAGE.
40 -----------------------------------------------------------------------------
41 */
42
43 #ifdef HAVE_CONFIG_H
44 #include "config.h"
45 #endif
46
47 #include "pcre_internal.h"
48
49 #ifdef SUPPORT_JIT
50
51 /* All-in-one: Since we use the JIT compiler only from here,
52 we just include it. This way we don't need to touch the build
53 system files. */
54
55 #define SLJIT_MALLOC(size) (pcre_malloc)(size)
56 #define SLJIT_FREE(ptr) (pcre_free)(ptr)
57 #define SLJIT_CONFIG_AUTO 1
58 #define SLJIT_CONFIG_STATIC 1
59 #define SLJIT_VERBOSE 0
60 #define SLJIT_DEBUG 0
61
62 #include "sljit/sljitLir.c"
63
64 #if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED
65 #error Unsupported architecture
66 #endif
67
68 /* Allocate memory on the stack. Fast, but limited size. */
69 #define LOCAL_SPACE_SIZE 32768
70
71 #define STACK_GROWTH_RATE 8192
72
73 /* Enable to check that the allocation could destroy temporaries. */
74 #if defined SLJIT_DEBUG && SLJIT_DEBUG
75 #define DESTROY_REGISTERS 1
76 #endif
77
78 /*
79 Short summary about the backtracking mechanism empolyed by the jit code generator:
80
81 The code generator follows the recursive nature of the PERL compatible regular
82 expressions. The basic blocks of regular expressions are condition checkers
83 whose execute different commands depending on the result of the condition check.
84 The relationship between the operators can be horizontal (concatenation) and
85 vertical (sub-expression) (See struct fallback_common for more details).
86
87 'ab' - 'a' and 'b' regexps are concatenated
88 'a+' - 'a' is the sub-expression of the '+' operator
89
90 The condition checkers are boolean (true/false) checkers. Machine code is generated
91 for the checker itself and for the actions depending on the result of the checker.
92 The 'true' case is called as the hot path (expected path), and the other is called as
93 the 'fallback' path. Branch instructions are expesive for all CPUs, so we avoid taken
94 branches on the hot path.
95
96 Greedy star operator (*) :
97 Hot path: match happens.
98 Fallback path: match failed.
99 Non-greedy star operator (*?) :
100 Hot path: no need to perform a match.
101 Fallback path: match is required.
102
103 The following example shows how the code generated for a capturing bracket
104 with two alternatives. Let A, B, C, D are arbirary regular expressions, and
105 we have the following regular expression:
106
107 A(B|C)D
108
109 The generated code will be the following:
110
111 A hot path
112 '(' hot path (pushing arguments to the stack)
113 B hot path
114 ')' hot path (pushing arguments to the stack)
115 D hot path
116 return with successful match
117
118 D fallback path
119 ')' fallback path (If we arrived from "C" jump to the fallback of "C")
120 B fallback path
121 C expected path
122 jump to D hot path
123 C fallback path
124 A fallback path
125
126 Notice, that the order of fallback code paths are the opposite of the fast
127 code paths. In this way the topmost value on the stack is always belong
128 to the current fallback code path. The fallback code path must check
129 whether there is a next alternative. If so, it needs to jump back to
130 the hot path eventually. Otherwise it needs to clear out its own stack
131 frame and continue the execution on the fallback code paths.
132 */
133
134 /*
135 Saved stack frames:
136
137 Atomic blocks and asserts require reloading the values of local variables
138 when the fallback mechanism performed. Because of OP_RECURSE, the locals
139 are not necessarly known in compile time, thus we need a dynamic restore
140 mechanism.
141
142 The stack frames are stored in a chain list, and have the following format:
143 ([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]
144
145 Thus we can restore the locals to a particular point in the stack.
146 */
147
148 typedef struct jit_arguments {
149 /* Pointers first. */
150 struct sljit_stack *stack;
151 const pcre_uchar *str;
152 const pcre_uchar *begin;
153 const pcre_uchar *end;
154 int *offsets;
155 pcre_uchar *ptr;
156 /* Everything else after. */
157 int offsetcount;
158 int calllimit;
159 pcre_uint8 notbol;
160 pcre_uint8 noteol;
161 pcre_uint8 notempty;
162 pcre_uint8 notempty_atstart;
163 } jit_arguments;
164
165 typedef struct executable_function {
166 void *executable_func;
167 pcre_jit_callback callback;
168 void *userdata;
169 } executable_function;
170
171 typedef struct jump_list {
172 struct sljit_jump *jump;
173 struct jump_list *next;
174 } jump_list;
175
176 enum stub_types { stack_alloc };
177
178 typedef struct stub_list {
179 enum stub_types type;
180 int data;
181 struct sljit_jump *start;
182 struct sljit_label *leave;
183 struct stub_list *next;
184 } stub_list;
185
186 typedef int (SLJIT_CALL *jit_function)(jit_arguments *args);
187
188 /* The following structure is the key data type for the recursive
189 code generator. It is allocated by compile_hotpath, and contains
190 the aguments for compile_fallbackpath. Must be the first member
191 of its descendants. */
192 typedef struct fallback_common {
193 /* Concatenation stack. */
194 struct fallback_common *prev;
195 jump_list *nextfallbacks;
196 /* Internal stack (for component operators). */
197 struct fallback_common *top;
198 jump_list *topfallbacks;
199 /* Opcode pointer. */
200 pcre_uchar *cc;
201 } fallback_common;
202
203 typedef struct assert_fallback {
204 fallback_common common;
205 jump_list *condfailed;
206 /* Less than 0 (-1) if a frame is not needed. */
207 int framesize;
208 /* Points to our private memory word on the stack. */
209 int localptr;
210 /* For iterators. */
211 struct sljit_label *hotpath;
212 } assert_fallback;
213
214 typedef struct bracket_fallback {
215 fallback_common common;
216 /* Where to coninue if an alternative is successfully matched. */
217 struct sljit_label *althotpath;
218 /* For rmin and rmax iterators. */
219 struct sljit_label *recursivehotpath;
220 /* For greedy ? operator. */
221 struct sljit_label *zerohotpath;
222 /* Contains the branches of a failed condition. */
223 union {
224 /* Both for OP_COND, OP_SCOND. */
225 jump_list *condfailed;
226 assert_fallback *assert;
227 /* For OP_ONCE. -1 if not needed. */
228 int framesize;
229 } u;
230 /* Points to our private memory word on the stack. */
231 int localptr;
232 } bracket_fallback;
233
234 typedef struct bracketpos_fallback {
235 fallback_common common;
236 /* Points to our private memory word on the stack. */
237 int localptr;
238 /* Reverting stack is needed. */
239 int framesize;
240 /* Allocated stack size. */
241 int stacksize;
242 } bracketpos_fallback;
243
244 typedef struct braminzero_fallback {
245 fallback_common common;
246 struct sljit_label *hotpath;
247 } braminzero_fallback;
248
249 typedef struct iterator_fallback {
250 fallback_common common;
251 /* Next iteration. */
252 struct sljit_label *hotpath;
253 } iterator_fallback;
254
255 typedef struct recurse_entry {
256 struct recurse_entry *next;
257 /* Contains the function entry. */
258 struct sljit_label *entry;
259 /* Collects the calls until the function is not created. */
260 jump_list *calls;
261 /* Points to the starting opcode. */
262 int start;
263 } recurse_entry;
264
265 typedef struct recurse_fallback {
266 fallback_common common;
267 } recurse_fallback;
268
269 typedef struct compiler_common {
270 struct sljit_compiler *compiler;
271 pcre_uchar *start;
272 int localsize;
273 int *localptrs;
274 const pcre_uint8 *fcc;
275 sljit_w lcc;
276 int cbraptr;
277 int nltype;
278 int newline;
279 int bsr_nltype;
280 int endonly;
281 sljit_w ctypes;
282 sljit_uw name_table;
283 sljit_w name_count;
284 sljit_w name_entry_size;
285 struct sljit_label *acceptlabel;
286 stub_list *stubs;
287 recurse_entry *entries;
288 recurse_entry *currententry;
289 jump_list *accept;
290 jump_list *calllimit;
291 jump_list *stackalloc;
292 jump_list *revertframes;
293 jump_list *wordboundary;
294 jump_list *anynewline;
295 jump_list *hspace;
296 jump_list *vspace;
297 jump_list *casefulcmp;
298 jump_list *caselesscmp;
299 BOOL jscript_compat;
300 #ifdef SUPPORT_UTF8
301 BOOL utf;
302 #ifdef SUPPORT_UCP
303 BOOL useucp;
304 #endif
305 jump_list *utfreadchar;
306 #ifdef COMPILE_PCRE8
307 jump_list *utfreadtype8;
308 #endif
309 #endif /* SUPPORT_UTF8 */
310 #ifdef SUPPORT_UCP
311 jump_list *getucd;
312 #endif
313 } compiler_common;
314
315 /* For byte_sequence_compare. */
316
317 typedef struct compare_context {
318 int length;
319 int sourcereg;
320 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
321 int ucharptr;
322 union {
323 sljit_i asint;
324 sljit_h asshort;
325 #ifdef COMPILE_PCRE8
326 sljit_ub asbyte;
327 sljit_ub asuchars[4];
328 #else
329 #ifdef COMPILE_PCRE16
330 sljit_uh asuchars[2];
331 #endif
332 #endif
333 } c;
334 union {
335 sljit_i asint;
336 sljit_h asshort;
337 #ifdef COMPILE_PCRE8
338 sljit_ub asbyte;
339 sljit_ub asuchars[4];
340 #else
341 #ifdef COMPILE_PCRE16
342 sljit_uh asuchars[2];
343 #endif
344 #endif
345 } oc;
346 #endif
347 } compare_context;
348
349 enum {
350 frame_end = 0,
351 frame_setstrbegin = -1
352 };
353
354 /* Used for accessing the elements of the stack. */
355 #define STACK(i) ((-(i) - 1) * (int)sizeof(sljit_w))
356
357 #define TMP1 SLJIT_TEMPORARY_REG1
358 #define TMP2 SLJIT_TEMPORARY_REG3
359 #define TMP3 SLJIT_TEMPORARY_EREG2
360 #define STR_PTR SLJIT_GENERAL_REG1
361 #define STR_END SLJIT_GENERAL_REG2
362 #define STACK_TOP SLJIT_TEMPORARY_REG2
363 #define STACK_LIMIT SLJIT_GENERAL_REG3
364 #define ARGUMENTS SLJIT_GENERAL_EREG1
365 #define CALL_COUNT SLJIT_GENERAL_EREG2
366 #define RETURN_ADDR SLJIT_TEMPORARY_EREG1
367
368 /* Locals layout. */
369 /* These two locals can be used by the current opcode. */
370 #define LOCALS0 (0 * sizeof(sljit_w))
371 #define LOCALS1 (1 * sizeof(sljit_w))
372 /* Two local variables for possessive quantifiers (char1 cannot use them). */
373 #define POSSESSIVE0 (2 * sizeof(sljit_w))
374 #define POSSESSIVE1 (3 * sizeof(sljit_w))
375 /* Head of the last recursion. */
376 #define RECURSIVE_HEAD (4 * sizeof(sljit_w))
377 /* Max limit of recursions. */
378 #define CALL_LIMIT (5 * sizeof(sljit_w))
379 /* Last known position of the requested byte. */
380 #define REQ_CHAR_PTR (6 * sizeof(sljit_w))
381 /* End pointer of the first line. */
382 #define FIRSTLINE_END (7 * sizeof(sljit_w))
383 /* The output vector is stored on the stack, and contains pointers
384 to characters. The vector data is divided into two groups: the first
385 group contains the start / end character pointers, and the second is
386 the start pointers when the end of the capturing group has not yet reached. */
387 #define OVECTOR_START (8 * sizeof(sljit_w))
388 #define OVECTOR(i) (OVECTOR_START + (i) * sizeof(sljit_w))
389 #define OVECTOR_PRIV(i) (common->cbraptr + (i) * sizeof(sljit_w))
390 #define PRIV_DATA(cc) (common->localptrs[(cc) - common->start])
391
392 #ifdef COMPILE_PCRE8
393 #define MOV_UCHAR SLJIT_MOV_UB
394 #else
395 #ifdef COMPILE_PCRE16
396 #define MOV_UCHAR SLJIT_MOV_UH
397 #else
398 #error Unsupported compiling mode
399 #endif
400 #endif
401
402 /* Shortcuts. */
403 #define DEFINE_COMPILER \
404 struct sljit_compiler *compiler = common->compiler
405 #define OP1(op, dst, dstw, src, srcw) \
406 sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw))
407 #define OP2(op, dst, dstw, src1, src1w, src2, src2w) \
408 sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w))
409 #define LABEL() \
410 sljit_emit_label(compiler)
411 #define JUMP(type) \
412 sljit_emit_jump(compiler, (type))
413 #define JUMPTO(type, label) \
414 sljit_set_label(sljit_emit_jump(compiler, (type)), (label))
415 #define JUMPHERE(jump) \
416 sljit_set_label((jump), sljit_emit_label(compiler))
417 #define CMP(type, src1, src1w, src2, src2w) \
418 sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))
419 #define CMPTO(type, src1, src1w, src2, src2w, label) \
420 sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))
421 #define COND_VALUE(op, dst, dstw, type) \
422 sljit_emit_cond_value(compiler, (op), (dst), (dstw), (type))
423
424 static pcre_uchar* bracketend(pcre_uchar* cc)
425 {
426 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
427 do cc += GET(cc, 1); while (*cc == OP_ALT);
428 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
429 cc += 1 + LINK_SIZE;
430 return cc;
431 }
432
433 /* Functions whose might need modification for all new supported opcodes:
434 next_opcode
435 get_localspace
436 set_localptrs
437 get_framesize
438 init_frame
439 get_localsize
440 copy_locals
441 compile_hotpath
442 compile_fallbackpath
443 */
444
445 static pcre_uchar *next_opcode(compiler_common *common, pcre_uchar *cc)
446 {
447 SLJIT_UNUSED_ARG(common);
448 switch(*cc)
449 {
450 case OP_SOD:
451 case OP_SOM:
452 case OP_SET_SOM:
453 case OP_NOT_WORD_BOUNDARY:
454 case OP_WORD_BOUNDARY:
455 case OP_NOT_DIGIT:
456 case OP_DIGIT:
457 case OP_NOT_WHITESPACE:
458 case OP_WHITESPACE:
459 case OP_NOT_WORDCHAR:
460 case OP_WORDCHAR:
461 case OP_ANY:
462 case OP_ALLANY:
463 case OP_ANYNL:
464 case OP_NOT_HSPACE:
465 case OP_HSPACE:
466 case OP_NOT_VSPACE:
467 case OP_VSPACE:
468 case OP_EXTUNI:
469 case OP_EODN:
470 case OP_EOD:
471 case OP_CIRC:
472 case OP_CIRCM:
473 case OP_DOLL:
474 case OP_DOLLM:
475 case OP_TYPESTAR:
476 case OP_TYPEMINSTAR:
477 case OP_TYPEPLUS:
478 case OP_TYPEMINPLUS:
479 case OP_TYPEQUERY:
480 case OP_TYPEMINQUERY:
481 case OP_TYPEPOSSTAR:
482 case OP_TYPEPOSPLUS:
483 case OP_TYPEPOSQUERY:
484 case OP_CRSTAR:
485 case OP_CRMINSTAR:
486 case OP_CRPLUS:
487 case OP_CRMINPLUS:
488 case OP_CRQUERY:
489 case OP_CRMINQUERY:
490 case OP_DEF:
491 case OP_BRAZERO:
492 case OP_BRAMINZERO:
493 case OP_BRAPOSZERO:
494 case OP_FAIL:
495 case OP_ACCEPT:
496 case OP_ASSERT_ACCEPT:
497 case OP_SKIPZERO:
498 return cc + 1;
499
500 case OP_ANYBYTE:
501 #ifdef SUPPORT_UTF8
502 if (common->utf) return NULL;
503 #endif
504 return cc + 1;
505
506 case OP_CHAR:
507 case OP_CHARI:
508 case OP_NOT:
509 case OP_NOTI:
510
511 case OP_STAR:
512 case OP_MINSTAR:
513 case OP_PLUS:
514 case OP_MINPLUS:
515 case OP_QUERY:
516 case OP_MINQUERY:
517 case OP_POSSTAR:
518 case OP_POSPLUS:
519 case OP_POSQUERY:
520 case OP_STARI:
521 case OP_MINSTARI:
522 case OP_PLUSI:
523 case OP_MINPLUSI:
524 case OP_QUERYI:
525 case OP_MINQUERYI:
526 case OP_POSSTARI:
527 case OP_POSPLUSI:
528 case OP_POSQUERYI:
529 case OP_NOTSTAR:
530 case OP_NOTMINSTAR:
531 case OP_NOTPLUS:
532 case OP_NOTMINPLUS:
533 case OP_NOTQUERY:
534 case OP_NOTMINQUERY:
535 case OP_NOTPOSSTAR:
536 case OP_NOTPOSPLUS:
537 case OP_NOTPOSQUERY:
538 case OP_NOTSTARI:
539 case OP_NOTMINSTARI:
540 case OP_NOTPLUSI:
541 case OP_NOTMINPLUSI:
542 case OP_NOTQUERYI:
543 case OP_NOTMINQUERYI:
544 case OP_NOTPOSSTARI:
545 case OP_NOTPOSPLUSI:
546 case OP_NOTPOSQUERYI:
547 cc += 2;
548 #ifdef SUPPORT_UTF
549 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
550 #endif
551 return cc;
552
553 case OP_UPTO:
554 case OP_MINUPTO:
555 case OP_EXACT:
556 case OP_POSUPTO:
557 case OP_UPTOI:
558 case OP_MINUPTOI:
559 case OP_EXACTI:
560 case OP_POSUPTOI:
561 case OP_NOTUPTO:
562 case OP_NOTMINUPTO:
563 case OP_NOTEXACT:
564 case OP_NOTPOSUPTO:
565 case OP_NOTUPTOI:
566 case OP_NOTMINUPTOI:
567 case OP_NOTEXACTI:
568 case OP_NOTPOSUPTOI:
569 cc += 2 + IMM2_SIZE;
570 #ifdef SUPPORT_UTF
571 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
572 #endif
573 return cc;
574
575 case OP_NOTPROP:
576 case OP_PROP:
577 case OP_TYPEUPTO:
578 case OP_TYPEMINUPTO:
579 case OP_TYPEEXACT:
580 case OP_TYPEPOSUPTO:
581 case OP_REF:
582 case OP_REFI:
583 case OP_CREF:
584 case OP_NCREF:
585 case OP_RREF:
586 case OP_NRREF:
587 case OP_CLOSE:
588 cc += 1 + IMM2_SIZE;
589 return cc;
590
591 case OP_CRRANGE:
592 case OP_CRMINRANGE:
593 return cc + 1 + 2 * IMM2_SIZE;
594
595 case OP_CLASS:
596 case OP_NCLASS:
597 return cc + 1 + 32 / sizeof(pcre_uchar);
598
599 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
600 case OP_XCLASS:
601 return cc + GET(cc, 1);
602 #endif
603
604 case OP_RECURSE:
605 case OP_ASSERT:
606 case OP_ASSERT_NOT:
607 case OP_ASSERTBACK:
608 case OP_ASSERTBACK_NOT:
609 case OP_REVERSE:
610 case OP_ONCE:
611 case OP_ONCE_NC:
612 case OP_BRA:
613 case OP_BRAPOS:
614 case OP_COND:
615 case OP_SBRA:
616 case OP_SBRAPOS:
617 case OP_SCOND:
618 case OP_ALT:
619 case OP_KET:
620 case OP_KETRMAX:
621 case OP_KETRMIN:
622 case OP_KETRPOS:
623 return cc + 1 + LINK_SIZE;
624
625 case OP_CBRA:
626 case OP_CBRAPOS:
627 case OP_SCBRA:
628 case OP_SCBRAPOS:
629 return cc + 1 + LINK_SIZE + IMM2_SIZE;
630
631 default:
632 return NULL;
633 }
634 }
635
636 static int get_localspace(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
637 {
638 int localspace = 0;
639 pcre_uchar *alternative;
640 /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
641 while (cc < ccend)
642 {
643 switch(*cc)
644 {
645 case OP_ASSERT:
646 case OP_ASSERT_NOT:
647 case OP_ASSERTBACK:
648 case OP_ASSERTBACK_NOT:
649 case OP_ONCE:
650 case OP_ONCE_NC:
651 case OP_BRAPOS:
652 case OP_SBRA:
653 case OP_SBRAPOS:
654 case OP_SCOND:
655 localspace += sizeof(sljit_w);
656 cc += 1 + LINK_SIZE;
657 break;
658
659 case OP_CBRAPOS:
660 case OP_SCBRAPOS:
661 localspace += sizeof(sljit_w);
662 cc += 1 + LINK_SIZE + IMM2_SIZE;
663 break;
664
665 case OP_COND:
666 /* Might be a hidden SCOND. */
667 alternative = cc + GET(cc, 1);
668 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
669 localspace += sizeof(sljit_w);
670 cc += 1 + LINK_SIZE;
671 break;
672
673 default:
674 cc = next_opcode(common, cc);
675 if (cc == NULL)
676 return -1;
677 break;
678 }
679 }
680 return localspace;
681 }
682
683 static void set_localptrs(compiler_common *common, int localptr, pcre_uchar *ccend)
684 {
685 pcre_uchar *cc = common->start;
686 pcre_uchar *alternative;
687 while (cc < ccend)
688 {
689 switch(*cc)
690 {
691 case OP_ASSERT:
692 case OP_ASSERT_NOT:
693 case OP_ASSERTBACK:
694 case OP_ASSERTBACK_NOT:
695 case OP_ONCE:
696 case OP_ONCE_NC:
697 case OP_BRAPOS:
698 case OP_SBRA:
699 case OP_SBRAPOS:
700 case OP_SCOND:
701 common->localptrs[cc - common->start] = localptr;
702 localptr += sizeof(sljit_w);
703 cc += 1 + LINK_SIZE;
704 break;
705
706 case OP_CBRAPOS:
707 case OP_SCBRAPOS:
708 common->localptrs[cc - common->start] = localptr;
709 localptr += sizeof(sljit_w);
710 cc += 1 + LINK_SIZE + IMM2_SIZE;
711 break;
712
713 case OP_COND:
714 /* Might be a hidden SCOND. */
715 alternative = cc + GET(cc, 1);
716 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
717 {
718 common->localptrs[cc - common->start] = localptr;
719 localptr += sizeof(sljit_w);
720 }
721 cc += 1 + LINK_SIZE;
722 break;
723
724 default:
725 cc = next_opcode(common, cc);
726 SLJIT_ASSERT(cc != NULL);
727 break;
728 }
729 }
730 }
731
732 /* Returns with -1 if no need for frame. */
733 static int get_framesize(compiler_common *common, pcre_uchar *cc, BOOL recursive)
734 {
735 pcre_uchar *ccend = bracketend(cc);
736 int length = 0;
737 BOOL possessive = FALSE;
738 BOOL setsom_found = FALSE;
739
740 if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))
741 {
742 length = 3;
743 possessive = TRUE;
744 }
745
746 cc = next_opcode(common, cc);
747 SLJIT_ASSERT(cc != NULL);
748 while (cc < ccend)
749 switch(*cc)
750 {
751 case OP_SET_SOM:
752 case OP_RECURSE:
753 if (!setsom_found)
754 {
755 length += 2;
756 setsom_found = TRUE;
757 }
758 cc += (*cc == OP_SET_SOM) ? 1 : 1 + LINK_SIZE;
759 break;
760
761 case OP_CBRA:
762 case OP_CBRAPOS:
763 case OP_SCBRA:
764 case OP_SCBRAPOS:
765 length += 3;
766 cc += 1 + LINK_SIZE + IMM2_SIZE;
767 break;
768
769 default:
770 cc = next_opcode(common, cc);
771 SLJIT_ASSERT(cc != NULL);
772 break;
773 }
774
775 /* Possessive quantifiers can use a special case. */
776 if (SLJIT_UNLIKELY(possessive) && length == 3)
777 return -1;
778
779 if (length > 0)
780 return length + 1;
781 return -1;
782 }
783
784 static void init_frame(compiler_common *common, pcre_uchar *cc, int stackpos, int stacktop, BOOL recursive)
785 {
786 DEFINE_COMPILER;
787 pcre_uchar *ccend = bracketend(cc);
788 BOOL setsom_found = FALSE;
789 int offset;
790
791 /* >= 1 + shortest item size (2) */
792 SLJIT_ASSERT(stackpos >= stacktop + 2);
793
794 stackpos = STACK(stackpos);
795 if (recursive || (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS))
796 cc = next_opcode(common, cc);
797 SLJIT_ASSERT(cc != NULL);
798 while (cc < ccend)
799 switch(*cc)
800 {
801 case OP_SET_SOM:
802 case OP_RECURSE:
803 if (!setsom_found)
804 {
805 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
806 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_setstrbegin);
807 stackpos += (int)sizeof(sljit_w);
808 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
809 stackpos += (int)sizeof(sljit_w);
810 setsom_found = TRUE;
811 }
812 cc += (*cc == OP_SET_SOM) ? 1 : 1 + LINK_SIZE;
813 break;
814
815 case OP_CBRA:
816 case OP_CBRAPOS:
817 case OP_SCBRA:
818 case OP_SCBRAPOS:
819 offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
820 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));
821 stackpos += (int)sizeof(sljit_w);
822 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
823 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
824 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
825 stackpos += (int)sizeof(sljit_w);
826 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
827 stackpos += (int)sizeof(sljit_w);
828
829 cc += 1 + LINK_SIZE + IMM2_SIZE;
830 break;
831
832 default:
833 cc = next_opcode(common, cc);
834 SLJIT_ASSERT(cc != NULL);
835 break;
836 }
837
838 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_end);
839 SLJIT_ASSERT(stackpos == STACK(stacktop));
840 }
841
842 static SLJIT_INLINE int get_localsize(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
843 {
844 int localsize = 2;
845 pcre_uchar *alternative;
846 /* Calculate the sum of the local variables. */
847 while (cc < ccend)
848 {
849 switch(*cc)
850 {
851 case OP_ASSERT:
852 case OP_ASSERT_NOT:
853 case OP_ASSERTBACK:
854 case OP_ASSERTBACK_NOT:
855 case OP_ONCE:
856 case OP_ONCE_NC:
857 case OP_BRAPOS:
858 case OP_SBRA:
859 case OP_SBRAPOS:
860 case OP_SCOND:
861 localsize++;
862 cc += 1 + LINK_SIZE;
863 break;
864
865 case OP_CBRA:
866 case OP_SCBRA:
867 localsize++;
868 cc += 1 + LINK_SIZE + IMM2_SIZE;
869 break;
870
871 case OP_CBRAPOS:
872 case OP_SCBRAPOS:
873 localsize += 2;
874 cc += 1 + LINK_SIZE + IMM2_SIZE;
875 break;
876
877 case OP_COND:
878 /* Might be a hidden SCOND. */
879 alternative = cc + GET(cc, 1);
880 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
881 localsize++;
882 cc += 1 + LINK_SIZE;
883 break;
884
885 default:
886 cc = next_opcode(common, cc);
887 SLJIT_ASSERT(cc != NULL);
888 break;
889 }
890 }
891 SLJIT_ASSERT(cc == ccend);
892 return localsize;
893 }
894
895 static void copy_locals(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend,
896 BOOL save, int stackptr, int stacktop)
897 {
898 DEFINE_COMPILER;
899 int srcw[2];
900 int count;
901 BOOL tmp1next = TRUE;
902 BOOL tmp1empty = TRUE;
903 BOOL tmp2empty = TRUE;
904 pcre_uchar *alternative;
905 enum {
906 start,
907 loop,
908 end
909 } status;
910
911 status = save ? start : loop;
912 stackptr = STACK(stackptr - 2);
913 stacktop = STACK(stacktop - 1);
914
915 if (!save)
916 {
917 stackptr += sizeof(sljit_w);
918 if (stackptr < stacktop)
919 {
920 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
921 stackptr += sizeof(sljit_w);
922 tmp1empty = FALSE;
923 }
924 if (stackptr < stacktop)
925 {
926 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
927 stackptr += sizeof(sljit_w);
928 tmp2empty = FALSE;
929 }
930 /* The tmp1next must be TRUE in either way. */
931 }
932
933 while (status != end)
934 {
935 count = 0;
936 switch(status)
937 {
938 case start:
939 SLJIT_ASSERT(save);
940 count = 1;
941 srcw[0] = RECURSIVE_HEAD;
942 status = loop;
943 break;
944
945 case loop:
946 if (cc >= ccend)
947 {
948 status = end;
949 break;
950 }
951
952 switch(*cc)
953 {
954 case OP_ASSERT:
955 case OP_ASSERT_NOT:
956 case OP_ASSERTBACK:
957 case OP_ASSERTBACK_NOT:
958 case OP_ONCE:
959 case OP_ONCE_NC:
960 case OP_BRAPOS:
961 case OP_SBRA:
962 case OP_SBRAPOS:
963 case OP_SCOND:
964 count = 1;
965 srcw[0] = PRIV_DATA(cc);
966 SLJIT_ASSERT(srcw[0] != 0);
967 cc += 1 + LINK_SIZE;
968 break;
969
970 case OP_CBRA:
971 case OP_SCBRA:
972 count = 1;
973 srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
974 cc += 1 + LINK_SIZE + IMM2_SIZE;
975 break;
976
977 case OP_CBRAPOS:
978 case OP_SCBRAPOS:
979 count = 2;
980 srcw[1] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
981 srcw[0] = PRIV_DATA(cc);
982 SLJIT_ASSERT(srcw[0] != 0);
983 cc += 1 + LINK_SIZE + IMM2_SIZE;
984 break;
985
986 case OP_COND:
987 /* Might be a hidden SCOND. */
988 alternative = cc + GET(cc, 1);
989 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
990 {
991 count = 1;
992 srcw[0] = PRIV_DATA(cc);
993 SLJIT_ASSERT(srcw[0] != 0);
994 }
995 cc += 1 + LINK_SIZE;
996 break;
997
998 default:
999 cc = next_opcode(common, cc);
1000 SLJIT_ASSERT(cc != NULL);
1001 break;
1002 }
1003 break;
1004
1005 case end:
1006 SLJIT_ASSERT_STOP();
1007 break;
1008 }
1009
1010 while (count > 0)
1011 {
1012 count--;
1013 if (save)
1014 {
1015 if (tmp1next)
1016 {
1017 if (!tmp1empty)
1018 {
1019 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1020 stackptr += sizeof(sljit_w);
1021 }
1022 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);
1023 tmp1empty = FALSE;
1024 tmp1next = FALSE;
1025 }
1026 else
1027 {
1028 if (!tmp2empty)
1029 {
1030 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1031 stackptr += sizeof(sljit_w);
1032 }
1033 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);
1034 tmp2empty = FALSE;
1035 tmp1next = TRUE;
1036 }
1037 }
1038 else
1039 {
1040 if (tmp1next)
1041 {
1042 SLJIT_ASSERT(!tmp1empty);
1043 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP1, 0);
1044 tmp1empty = stackptr >= stacktop;
1045 if (!tmp1empty)
1046 {
1047 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1048 stackptr += sizeof(sljit_w);
1049 }
1050 tmp1next = FALSE;
1051 }
1052 else
1053 {
1054 SLJIT_ASSERT(!tmp2empty);
1055 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP2, 0);
1056 tmp2empty = stackptr >= stacktop;
1057 if (!tmp2empty)
1058 {
1059 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1060 stackptr += sizeof(sljit_w);
1061 }
1062 tmp1next = TRUE;
1063 }
1064 }
1065 }
1066 }
1067
1068 if (save)
1069 {
1070 if (tmp1next)
1071 {
1072 if (!tmp1empty)
1073 {
1074 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1075 stackptr += sizeof(sljit_w);
1076 }
1077 if (!tmp2empty)
1078 {
1079 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1080 stackptr += sizeof(sljit_w);
1081 }
1082 }
1083 else
1084 {
1085 if (!tmp2empty)
1086 {
1087 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1088 stackptr += sizeof(sljit_w);
1089 }
1090 if (!tmp1empty)
1091 {
1092 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1093 stackptr += sizeof(sljit_w);
1094 }
1095 }
1096 }
1097 SLJIT_ASSERT(cc == ccend && stackptr == stacktop && (save || (tmp1empty && tmp2empty)));
1098 }
1099
1100 static SLJIT_INLINE BOOL ispowerof2(unsigned int value)
1101 {
1102 return (value & (value - 1)) == 0;
1103 }
1104
1105 static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label)
1106 {
1107 while (list)
1108 {
1109 /* sljit_set_label is clever enough to do nothing
1110 if either the jump or the label is NULL */
1111 sljit_set_label(list->jump, label);
1112 list = list->next;
1113 }
1114 }
1115
1116 static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump* jump)
1117 {
1118 jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list));
1119 if (list_item)
1120 {
1121 list_item->next = *list;
1122 list_item->jump = jump;
1123 *list = list_item;
1124 }
1125 }
1126
1127 static void add_stub(compiler_common *common, enum stub_types type, int data, struct sljit_jump *start)
1128 {
1129 DEFINE_COMPILER;
1130 stub_list* list_item = sljit_alloc_memory(compiler, sizeof(stub_list));
1131
1132 if (list_item)
1133 {
1134 list_item->type = type;
1135 list_item->data = data;
1136 list_item->start = start;
1137 list_item->leave = LABEL();
1138 list_item->next = common->stubs;
1139 common->stubs = list_item;
1140 }
1141 }
1142
1143 static void flush_stubs(compiler_common *common)
1144 {
1145 DEFINE_COMPILER;
1146 stub_list* list_item = common->stubs;
1147
1148 while (list_item)
1149 {
1150 JUMPHERE(list_item->start);
1151 switch(list_item->type)
1152 {
1153 case stack_alloc:
1154 add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));
1155 break;
1156 }
1157 JUMPTO(SLJIT_JUMP, list_item->leave);
1158 list_item = list_item->next;
1159 }
1160 common->stubs = NULL;
1161 }
1162
1163 static SLJIT_INLINE void decrease_call_count(compiler_common *common)
1164 {
1165 DEFINE_COMPILER;
1166
1167 OP2(SLJIT_SUB | SLJIT_SET_E, CALL_COUNT, 0, CALL_COUNT, 0, SLJIT_IMM, 1);
1168 add_jump(compiler, &common->calllimit, JUMP(SLJIT_C_ZERO));
1169 }
1170
1171 static SLJIT_INLINE void allocate_stack(compiler_common *common, int size)
1172 {
1173 /* May destroy all locals and registers except TMP2. */
1174 DEFINE_COMPILER;
1175
1176 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_w));
1177 #ifdef DESTROY_REGISTERS
1178 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);
1179 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
1180 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
1181 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, TMP1, 0);
1182 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);
1183 #endif
1184 add_stub(common, stack_alloc, 0, CMP(SLJIT_C_GREATER, STACK_TOP, 0, STACK_LIMIT, 0));
1185 }
1186
1187 static SLJIT_INLINE void free_stack(compiler_common *common, int size)
1188 {
1189 DEFINE_COMPILER;
1190 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_w));
1191 }
1192
1193 static SLJIT_INLINE void reset_ovector(compiler_common *common, int length)
1194 {
1195 DEFINE_COMPILER;
1196 struct sljit_label *loop;
1197 int i;
1198 /* At this point we can freely use all temporary registers. */
1199 /* TMP1 returns with begin - 1. */
1200 OP2(SLJIT_SUB, SLJIT_TEMPORARY_REG1, 0, SLJIT_MEM1(SLJIT_GENERAL_REG1), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));
1201 if (length < 8)
1202 {
1203 for (i = 0; i < length; i++)
1204 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(i), SLJIT_TEMPORARY_REG1, 0);
1205 }
1206 else
1207 {
1208 OP2(SLJIT_ADD, SLJIT_TEMPORARY_REG2, 0, SLJIT_LOCALS_REG, 0, SLJIT_IMM, OVECTOR_START - sizeof(sljit_w));
1209 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, length);
1210 loop = LABEL();
1211 OP1(SLJIT_MOVU, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), sizeof(sljit_w), SLJIT_TEMPORARY_REG1, 0);
1212 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_TEMPORARY_REG3, 0, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, 1);
1213 JUMPTO(SLJIT_C_NOT_ZERO, loop);
1214 }
1215 }
1216
1217 static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)
1218 {
1219 DEFINE_COMPILER;
1220 struct sljit_label *loop;
1221 struct sljit_jump *earlyexit;
1222
1223 /* At this point we can freely use all registers. */
1224 OP1(SLJIT_MOV, SLJIT_GENERAL_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
1225 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1), STR_PTR, 0);
1226
1227 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG1, 0, ARGUMENTS, 0);
1228 OP1(SLJIT_MOV_SI, SLJIT_TEMPORARY_REG2, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), SLJIT_OFFSETOF(jit_arguments, offsetcount));
1229 OP2(SLJIT_SUB, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), SLJIT_OFFSETOF(jit_arguments, offsets), SLJIT_IMM, sizeof(int));
1230 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG1, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), SLJIT_OFFSETOF(jit_arguments, begin));
1231 OP2(SLJIT_ADD, SLJIT_GENERAL_REG1, 0, SLJIT_LOCALS_REG, 0, SLJIT_IMM, OVECTOR_START);
1232 /* Unlikely, but possible */
1233 earlyexit = CMP(SLJIT_C_EQUAL, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, 0);
1234 loop = LABEL();
1235 OP2(SLJIT_SUB, SLJIT_GENERAL_REG2, 0, SLJIT_MEM1(SLJIT_GENERAL_REG1), 0, SLJIT_TEMPORARY_REG1, 0);
1236 OP2(SLJIT_ADD, SLJIT_GENERAL_REG1, 0, SLJIT_GENERAL_REG1, 0, SLJIT_IMM, sizeof(sljit_w));
1237 /* Copy the integer value to the output buffer */
1238 #ifdef COMPILE_PCRE16
1239 OP2(SLJIT_LSHR, SLJIT_GENERAL_REG2, 0, SLJIT_GENERAL_REG2, 0, SLJIT_IMM, 1);
1240 #endif
1241 OP1(SLJIT_MOVU_SI, SLJIT_MEM1(SLJIT_TEMPORARY_REG3), sizeof(int), SLJIT_GENERAL_REG2, 0);
1242 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_TEMPORARY_REG2, 0, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, 1);
1243 JUMPTO(SLJIT_C_NOT_ZERO, loop);
1244 JUMPHERE(earlyexit);
1245
1246 /* Calculate the return value, which is the maximum ovector value. */
1247 if (topbracket > 1)
1248 {
1249 OP2(SLJIT_ADD, SLJIT_TEMPORARY_REG1, 0, SLJIT_LOCALS_REG, 0, SLJIT_IMM, OVECTOR_START + topbracket * 2 * sizeof(sljit_w));
1250 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, topbracket + 1);
1251
1252 /* OVECTOR(0) is never equal to SLJIT_GENERAL_REG3. */
1253 loop = LABEL();
1254 OP1(SLJIT_MOVU, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), -(2 * (sljit_w)sizeof(sljit_w)));
1255 OP2(SLJIT_SUB, SLJIT_TEMPORARY_REG2, 0, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, 1);
1256 CMPTO(SLJIT_C_EQUAL, SLJIT_TEMPORARY_REG3, 0, SLJIT_GENERAL_REG3, 0, loop);
1257 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_TEMPORARY_REG2, 0);
1258 }
1259 else
1260 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
1261 }
1262
1263 static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, pcre_uchar* cc)
1264 {
1265 /* Detects if the character has an othercase. */
1266 unsigned int c;
1267
1268 #ifdef SUPPORT_UTF8
1269 if (common->utf)
1270 {
1271 GETCHAR(c, cc);
1272 if (c > 127)
1273 {
1274 #ifdef SUPPORT_UCP
1275 return c != UCD_OTHERCASE(c);
1276 #else
1277 return FALSE;
1278 #endif
1279 }
1280 }
1281 else
1282 #endif
1283 c = *cc;
1284 return MAX_255(c) ? common->fcc[c] != c : FALSE;
1285 }
1286
1287 static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)
1288 {
1289 /* Returns with the othercase. */
1290 #ifdef SUPPORT_UTF
1291 if (common->utf && c > 127)
1292 {
1293 #ifdef SUPPORT_UCP
1294 return UCD_OTHERCASE(c);
1295 #else
1296 return c;
1297 #endif
1298 }
1299 #endif
1300 return TABLE_GET(c, common->fcc, c);
1301 }
1302
1303 static unsigned int char_get_othercase_bit(compiler_common *common, pcre_uchar* cc)
1304 {
1305 /* Detects if the character and its othercase has only 1 bit difference. */
1306 unsigned int c, oc, bit;
1307 #if defined SUPPORT_UTF8 && defined COMPILE_PCRE8
1308 int n;
1309 #endif
1310
1311 #ifdef SUPPORT_UTF
1312 if (common->utf)
1313 {
1314 GETCHAR(c, cc);
1315 if (c <= 127)
1316 oc = common->fcc[c];
1317 else
1318 {
1319 #ifdef SUPPORT_UCP
1320 oc = UCD_OTHERCASE(c);
1321 #else
1322 oc = c;
1323 #endif
1324 }
1325 }
1326 else
1327 {
1328 c = *cc;
1329 oc = TABLE_GET(c, common->fcc, c);
1330 }
1331 #else
1332 c = *cc;
1333 oc = TABLE_GET(c, common->fcc, c);
1334 #endif
1335
1336 SLJIT_ASSERT(c != oc);
1337
1338 bit = c ^ oc;
1339 /* Optimized for English alphabet. */
1340 if (c <= 127 && bit == 0x20)
1341 return (0 << 8) | 0x20;
1342
1343 /* Since c != oc, they must have at least 1 bit difference. */
1344 if (!ispowerof2(bit))
1345 return 0;
1346
1347 #ifdef COMPILE_PCRE8
1348
1349 #ifdef SUPPORT_UTF8
1350 if (common->utf && c > 127)
1351 {
1352 n = GET_EXTRALEN(*cc);
1353 while ((bit & 0x3f) == 0)
1354 {
1355 n--;
1356 bit >>= 6;
1357 }
1358 return (n << 8) | bit;
1359 }
1360 #endif /* SUPPORT_UTF8 */
1361 return (0 << 8) | bit;
1362
1363 #else /* COMPILE_PCRE8 */
1364
1365 #ifdef COMPILE_PCRE16
1366 #ifdef SUPPORT_UTF16
1367 if (common->utf && c > 65535)
1368 {
1369 if (bit >= (1 << 10))
1370 bit >>= 10;
1371 else
1372 return (bit <= 255) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8));
1373 }
1374 #endif /* SUPPORT_UTF16 */
1375 return (bit <= 255) ? ((0 << 8) | bit) : ((1 << 8) | (bit >> 8));
1376 #endif /* COMPILE_PCRE16 */
1377
1378 #endif /* COMPILE_PCRE8 */
1379 }
1380
1381 static SLJIT_INLINE void check_input_end(compiler_common *common, jump_list **fallbacks)
1382 {
1383 DEFINE_COMPILER;
1384 add_jump(compiler, fallbacks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
1385 }
1386
1387 static void read_char(compiler_common *common)
1388 {
1389 /* Reads the character into TMP1, updates STR_PTR.
1390 Does not check STR_END. TMP2 Destroyed. */
1391 DEFINE_COMPILER;
1392 #ifdef SUPPORT_UTF
1393 struct sljit_jump *jump;
1394 #endif
1395
1396 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
1397 #ifdef SUPPORT_UTF
1398 if (common->utf)
1399 {
1400 #ifdef COMPILE_PCRE8
1401 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
1402 #else
1403 #ifdef COMPILE_PCRE16
1404 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
1405 #endif
1406 #endif /* COMPILE_PCRE8 */
1407 add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
1408 JUMPHERE(jump);
1409 }
1410 #endif
1411 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1412 }
1413
1414 static void peek_char(compiler_common *common)
1415 {
1416 /* Reads the character into TMP1, keeps STR_PTR.
1417 Does not check STR_END. TMP2 Destroyed. */
1418 DEFINE_COMPILER;
1419 #ifdef SUPPORT_UTF
1420 struct sljit_jump *jump;
1421 #endif
1422
1423 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
1424 #ifdef SUPPORT_UTF
1425 if (common->utf)
1426 {
1427 #ifdef COMPILE_PCRE8
1428 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
1429 #else
1430 #ifdef COMPILE_PCRE16
1431 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
1432 #endif
1433 #endif /* COMPILE_PCRE8 */
1434 add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
1435 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
1436 JUMPHERE(jump);
1437 }
1438 #endif
1439 }
1440
1441 static void read_char8_type(compiler_common *common)
1442 {
1443 /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */
1444 DEFINE_COMPILER;
1445 #if defined SUPPORT_UTF || defined COMPILE_PCRE16
1446 struct sljit_jump *jump;
1447 #endif
1448
1449 #ifdef SUPPORT_UTF
1450 if (common->utf)
1451 {
1452 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
1453 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1454 #ifdef COMPILE_PCRE8
1455 /* This can be an extra read in some situations, but hopefully
1456 it is needed in most cases. */
1457 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
1458 jump = CMP(SLJIT_C_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
1459 add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));
1460 JUMPHERE(jump);
1461 #else
1462 #ifdef COMPILE_PCRE16
1463 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
1464 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0xff);
1465 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
1466 JUMPHERE(jump);
1467 /* Skip low surrogate if necessary. */
1468 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xfc00);
1469 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0xd800);
1470 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
1471 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
1472 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
1473 #endif
1474 #endif /* COMPILE_PCRE8 */
1475 return;
1476 }
1477 #endif
1478 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
1479 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1480 #ifdef COMPILE_PCRE16
1481 /* The ctypes array contains only 255 values. */
1482 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
1483 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0xff);
1484 #endif
1485 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
1486 #ifdef COMPILE_PCRE16
1487 JUMPHERE(jump);
1488 #endif
1489 }
1490
1491 static void skip_char_back(compiler_common *common)
1492 {
1493 /* Goes one character back. Only affects STR_PTR. Does not check begin. */
1494 DEFINE_COMPILER;
1495 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
1496 struct sljit_label *label;
1497
1498 if (common->utf)
1499 {
1500 label = LABEL();
1501 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
1502 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1503 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
1504 CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);
1505 return;
1506 }
1507 #endif
1508 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
1509 if (common->utf)
1510 {
1511 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
1512 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1513 /* Skip low surrogate if necessary. */
1514 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
1515 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
1516 COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
1517 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
1518 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
1519 return;
1520 }
1521 #endif
1522 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1523 }
1524
1525 static void check_newlinechar(compiler_common *common, int nltype, jump_list **fallbacks, BOOL jumpiftrue)
1526 {
1527 /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */
1528 DEFINE_COMPILER;
1529
1530 if (nltype == NLTYPE_ANY)
1531 {
1532 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
1533 add_jump(compiler, fallbacks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
1534 }
1535 else if (nltype == NLTYPE_ANYCRLF)
1536 {
1537 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_CR);
1538 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
1539 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
1540 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
1541 add_jump(compiler, fallbacks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
1542 }
1543 else
1544 {
1545 SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline <= 255);
1546 add_jump(compiler, fallbacks, CMP(jumpiftrue ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
1547 }
1548 }
1549
1550 #ifdef SUPPORT_UTF
1551
1552 #ifdef COMPILE_PCRE8
1553 static void do_utfreadchar(compiler_common *common)
1554 {
1555 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
1556 of the character (>= 0xc0). Return char value in TMP1, length - 1 in TMP2. */
1557 DEFINE_COMPILER;
1558 struct sljit_jump *jump;
1559
1560 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);
1561 /* Searching for the first zero. */
1562 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
1563 jump = JUMP(SLJIT_C_NOT_ZERO);
1564 /* Two byte sequence. */
1565 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
1566 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1567 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1f);
1568 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
1569 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
1570 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
1571 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
1572 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1573 JUMPHERE(jump);
1574
1575 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10);
1576 jump = JUMP(SLJIT_C_NOT_ZERO);
1577 /* Three byte sequence. */
1578 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
1579 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0f);
1580 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 12);
1581 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
1582 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
1583 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
1584 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
1585 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
1586 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
1587 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
1588 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(2));
1589 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1590 JUMPHERE(jump);
1591
1592 /* Four byte sequence. */
1593 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
1594 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x07);
1595 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 18);
1596 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
1597 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12);
1598 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
1599 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
1600 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
1601 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
1602 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
1603 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(3));
1604 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
1605 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
1606 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
1607 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));
1608 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1609 }
1610
1611 static void do_utfreadtype8(compiler_common *common)
1612 {
1613 /* Fast decoding a UTF-8 character type. TMP2 contains the first byte
1614 of the character (>= 0xc0). Return value in TMP1. */
1615 DEFINE_COMPILER;
1616 struct sljit_jump *jump;
1617 struct sljit_jump *compare;
1618
1619 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);
1620
1621 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20);
1622 jump = JUMP(SLJIT_C_NOT_ZERO);
1623 /* Two byte sequence. */
1624 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
1625 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1626 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
1627 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
1628 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
1629 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
1630 compare = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
1631 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
1632 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1633
1634 JUMPHERE(compare);
1635 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
1636 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1637 JUMPHERE(jump);
1638
1639 /* We only have types for characters less than 256. */
1640 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_w)PRIV(utf8_table4) - 0xc0);
1641 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
1642 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
1643 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1644 }
1645
1646 #else /* COMPILE_PCRE8 */
1647
1648 #ifdef COMPILE_PCRE16
1649 static void do_utfreadchar(compiler_common *common)
1650 {
1651 /* Fast decoding a UTF-16 character. TMP1 contains the first 16 bit char
1652 of the character (>= 0xd800). Return char value in TMP1, length - 1 in TMP2. */
1653 DEFINE_COMPILER;
1654 struct sljit_jump *jump;
1655
1656 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);
1657 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xdc00);
1658 /* Do nothing, only return. */
1659 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1660
1661 JUMPHERE(jump);
1662 /* Combine two 16 bit characters. */
1663 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 1);
1664 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1665 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
1666 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 10);
1667 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3ff);
1668 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
1669 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
1670 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
1671 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1672 }
1673 #endif /* COMPILE_PCRE16 */
1674
1675 #endif /* COMPILE_PCRE8 */
1676
1677 #endif /* SUPPORT_UTF */
1678
1679 #ifdef SUPPORT_UCP
1680
1681 /* UCD_BLOCK_SIZE must be 128 (see the assert below). */
1682 #define UCD_BLOCK_MASK 127
1683 #define UCD_BLOCK_SHIFT 7
1684
1685 static void do_getucd(compiler_common *common)
1686 {
1687 /* Search the UCD record for the character comes in TMP1.
1688 Returns chartype in TMP1 and UCD offset in TMP2. */
1689 DEFINE_COMPILER;
1690
1691 SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 8);
1692
1693 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);
1694 OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
1695 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_w)PRIV(ucd_stage1));
1696 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
1697 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
1698 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
1699 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_w)PRIV(ucd_stage2));
1700 OP1(SLJIT_MOV_UH, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
1701 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_w)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
1702 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
1703 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1704 }
1705 #endif
1706
1707 static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common, BOOL hascrorlf, BOOL firstline)
1708 {
1709 DEFINE_COMPILER;
1710 struct sljit_label *mainloop;
1711 struct sljit_label *newlinelabel = NULL;
1712 struct sljit_jump *start;
1713 struct sljit_jump *end = NULL;
1714 struct sljit_jump *nl = NULL;
1715 #ifdef SUPPORT_UTF
1716 struct sljit_jump *singlechar;
1717 #endif
1718 jump_list *newline = NULL;
1719 BOOL newlinecheck = FALSE;
1720 BOOL readuchar = FALSE;
1721
1722 if (!(hascrorlf || firstline) && (common->nltype == NLTYPE_ANY ||
1723 common->nltype == NLTYPE_ANYCRLF || common->newline > 255))
1724 newlinecheck = TRUE;
1725
1726 if (firstline)
1727 {
1728 /* Search for the end of the first line. */
1729 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STR_PTR, 0);
1730 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), FIRSTLINE_END, STR_END, 0);
1731
1732 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
1733 {
1734 mainloop = LABEL();
1735 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1736 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
1737 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
1738 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
1739 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);
1740 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);
1741 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), FIRSTLINE_END, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1742 }
1743 else
1744 {
1745 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
1746 mainloop = LABEL();
1747 /* Continual stores does not cause data dependency. */
1748 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), FIRSTLINE_END, STR_PTR, 0);
1749 read_char(common);
1750 check_newlinechar(common, common->nltype, &newline, TRUE);
1751 CMPTO(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0, mainloop);
1752 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), FIRSTLINE_END, STR_PTR, 0);
1753 set_jumps(newline, LABEL());
1754 }
1755
1756 JUMPHERE(end);
1757 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
1758 }
1759
1760 start = JUMP(SLJIT_JUMP);
1761
1762 if (newlinecheck)
1763 {
1764 newlinelabel = LABEL();
1765 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1766 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
1767 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
1768 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
1769 COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
1770 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
1771 nl = JUMP(SLJIT_JUMP);
1772 }
1773
1774 mainloop = LABEL();
1775
1776 /* Increasing the STR_PTR here requires one less jump in the most common case. */
1777 #ifdef SUPPORT_UTF8
1778 if (common->utf) readuchar = TRUE;
1779 #endif
1780 if (newlinecheck) readuchar = TRUE;
1781
1782 if (readuchar)
1783 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
1784
1785 if (newlinecheck)
1786 CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);
1787
1788 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1789 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
1790 if (common->utf)
1791 {
1792 singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
1793 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0);
1794 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
1795 JUMPHERE(singlechar);
1796 }
1797 #endif
1798 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
1799 if (common->utf)
1800 {
1801 singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
1802 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
1803 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
1804 COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
1805 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
1806 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
1807 JUMPHERE(singlechar);
1808 }
1809 #endif
1810 JUMPHERE(start);
1811
1812 if (newlinecheck)
1813 {
1814 JUMPHERE(end);
1815 JUMPHERE(nl);
1816 }
1817
1818 return mainloop;
1819 }
1820
1821 static SLJIT_INLINE void fast_forward_first_char(compiler_common *common, pcre_uchar firstchar, BOOL caseless, BOOL firstline)
1822 {
1823 DEFINE_COMPILER;
1824 struct sljit_label *start;
1825 struct sljit_jump *leave;
1826 struct sljit_jump *found;
1827 pcre_uchar oc, bit;
1828
1829 if (firstline)
1830 {
1831 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, STR_END, 0);
1832 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), FIRSTLINE_END);
1833 }
1834
1835 start = LABEL();
1836 leave = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
1837 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
1838
1839 oc = firstchar;
1840 if (caseless)
1841 oc = TABLE_GET(firstchar, common->fcc, firstchar);
1842 if (firstchar == oc)
1843 found = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, firstchar);
1844 else
1845 {
1846 bit = firstchar ^ oc;
1847 if (ispowerof2(bit))
1848 {
1849 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, bit);
1850 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, firstchar | bit);
1851 }
1852 else
1853 {
1854 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, firstchar);
1855 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
1856 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);
1857 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
1858 found = JUMP(SLJIT_C_NOT_ZERO);
1859 }
1860 }
1861
1862 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1863 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
1864 if (common->utf)
1865 {
1866 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start);
1867 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0);
1868 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
1869 }
1870 #endif
1871 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
1872 if (common->utf)
1873 {
1874 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800, start);
1875 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
1876 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
1877 COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
1878 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
1879 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
1880 }
1881 #endif
1882 JUMPTO(SLJIT_JUMP, start);
1883 JUMPHERE(found);
1884 JUMPHERE(leave);
1885
1886 if (firstline)
1887 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
1888 }
1889
1890 static SLJIT_INLINE void fast_forward_newline(compiler_common *common, BOOL firstline)
1891 {
1892 DEFINE_COMPILER;
1893 struct sljit_label *loop;
1894 struct sljit_jump *lastchar;
1895 struct sljit_jump *firstchar;
1896 struct sljit_jump *leave;
1897 struct sljit_jump *foundcr = NULL;
1898 struct sljit_jump *notfoundnl;
1899 jump_list *newline = NULL;
1900
1901 if (firstline)
1902 {
1903 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, STR_END, 0);
1904 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), FIRSTLINE_END);
1905 }
1906
1907 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
1908 {
1909 lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
1910 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
1911 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
1912 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
1913 firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
1914
1915 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2);
1916 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0);
1917 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_GREATER_EQUAL);
1918 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
1919
1920 loop = LABEL();
1921 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);
1922 leave = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
1923 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), -2);
1924 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(STR_PTR), -1);
1925 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop);
1926 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop);
1927
1928 JUMPHERE(leave);
1929 JUMPHERE(firstchar);
1930 JUMPHERE(lastchar);
1931
1932 if (firstline)
1933 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
1934 return;
1935 }
1936
1937 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
1938 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
1939 firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
1940 skip_char_back(common);
1941
1942 loop = LABEL();
1943 read_char(common);
1944 lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
1945 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
1946 foundcr = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
1947 check_newlinechar(common, common->nltype, &newline, FALSE);
1948 set_jumps(newline, loop);
1949
1950 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
1951 {
1952 leave = JUMP(SLJIT_JUMP);
1953 JUMPHERE(foundcr);
1954 notfoundnl = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
1955 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
1956 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
1957 COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
1958 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
1959 JUMPHERE(notfoundnl);
1960 JUMPHERE(leave);
1961 }
1962 JUMPHERE(lastchar);
1963 JUMPHERE(firstchar);
1964
1965 if (firstline)
1966 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
1967 }
1968
1969 static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, sljit_uw start_bits, BOOL firstline)
1970 {
1971 DEFINE_COMPILER;
1972 struct sljit_label *start;
1973 struct sljit_jump *leave;
1974 struct sljit_jump *found;
1975
1976 if (firstline)
1977 {
1978 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, STR_END, 0);
1979 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), FIRSTLINE_END);
1980 }
1981
1982 start = LABEL();
1983 leave = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
1984 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
1985 #ifdef SUPPORT_UTF
1986 if (common->utf)
1987 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
1988 #endif
1989 #ifndef COMPILE_PCRE8
1990 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xff);
1991 #endif
1992 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
1993 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
1994 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), start_bits);
1995 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
1996 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
1997 found = JUMP(SLJIT_C_NOT_ZERO);
1998
1999 #ifdef SUPPORT_UTF
2000 if (common->utf)
2001 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
2002 #endif
2003 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2004 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2005 if (common->utf)
2006 {
2007 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start);
2008 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0);
2009 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2010 }
2011 #endif
2012 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2013 if (common->utf)
2014 {
2015 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800, start);
2016 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2017 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2018 COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
2019 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2020 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2021 }
2022 #endif
2023 JUMPTO(SLJIT_JUMP, start);
2024 JUMPHERE(found);
2025 JUMPHERE(leave);
2026
2027 if (firstline)
2028 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
2029 }
2030
2031 static SLJIT_INLINE struct sljit_jump *search_requested_char(compiler_common *common, pcre_uchar reqchar, BOOL caseless, BOOL has_firstchar)
2032 {
2033 DEFINE_COMPILER;
2034 struct sljit_label *loop;
2035 struct sljit_jump *toolong;
2036 struct sljit_jump *alreadyfound;
2037 struct sljit_jump *found;
2038 struct sljit_jump *foundoc = NULL;
2039 struct sljit_jump *notfound;
2040 pcre_uchar oc, bit;
2041
2042 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), REQ_CHAR_PTR);
2043 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, REQ_BYTE_MAX);
2044 toolong = CMP(SLJIT_C_LESS, TMP1, 0, STR_END, 0);
2045 alreadyfound = CMP(SLJIT_C_LESS, STR_PTR, 0, TMP2, 0);
2046
2047 if (has_firstchar)
2048 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, 1);
2049 else
2050 OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);
2051
2052 loop = LABEL();
2053 notfound = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, STR_END, 0);
2054
2055 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), 0);
2056 oc = reqchar;
2057 if (caseless)
2058 oc = TABLE_GET(reqchar, common->fcc, reqchar);
2059 if (reqchar == oc)
2060 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, reqchar);
2061 else
2062 {
2063 bit = reqchar ^ oc;
2064 if (ispowerof2(bit))
2065 {
2066 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);
2067 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, reqchar | bit);
2068 }
2069 else
2070 {
2071 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, reqchar);
2072 foundoc = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, oc);
2073 }
2074 }
2075 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2076 JUMPTO(SLJIT_JUMP, loop);
2077
2078 JUMPHERE(found);
2079 if (foundoc)
2080 JUMPHERE(foundoc);
2081 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), REQ_CHAR_PTR, TMP1, 0);
2082 JUMPHERE(alreadyfound);
2083 JUMPHERE(toolong);
2084 return notfound;
2085 }
2086
2087 static void do_revertframes(compiler_common *common)
2088 {
2089 DEFINE_COMPILER;
2090 struct sljit_jump *jump;
2091 struct sljit_label *mainloop;
2092
2093 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);
2094 OP1(SLJIT_MOV, TMP1, 0, STACK_TOP, 0);
2095
2096 /* Drop frames until we reach STACK_TOP. */
2097 mainloop = LABEL();
2098 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), 0);
2099 jump = CMP(SLJIT_C_SIG_LESS_EQUAL, TMP2, 0, SLJIT_IMM, frame_end);
2100 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_LOCALS_REG, 0);
2101 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_w));
2102 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_w), SLJIT_MEM1(TMP1), 2 * sizeof(sljit_w));
2103 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_w));
2104 JUMPTO(SLJIT_JUMP, mainloop);
2105
2106 JUMPHERE(jump);
2107 jump = CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, frame_end);
2108 /* End of dropping frames. */
2109 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2110
2111 JUMPHERE(jump);
2112 jump = CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, frame_setstrbegin);
2113 /* Set string begin. */
2114 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), sizeof(sljit_w));
2115 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_w));
2116 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), TMP2, 0);
2117 JUMPTO(SLJIT_JUMP, mainloop);
2118
2119 JUMPHERE(jump);
2120 /* Unknown command. */
2121 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_w));
2122 JUMPTO(SLJIT_JUMP, mainloop);
2123 }
2124
2125 static void check_wordboundary(compiler_common *common)
2126 {
2127 DEFINE_COMPILER;
2128 struct sljit_jump *beginend;
2129 #ifdef SUPPORT_UTF8
2130 struct sljit_jump *jump;
2131 #endif
2132
2133 SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16);
2134
2135 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, 1, 5, 5, common->localsize);
2136 /* Get type of the previous char, and put it to LOCALS1. */
2137 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
2138 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
2139 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, 0);
2140 beginend = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
2141 skip_char_back(common);
2142 read_char(common);
2143
2144 /* Testing char type. */
2145 #ifdef SUPPORT_UCP
2146 if (common->useucp)
2147 {
2148 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
2149 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
2150 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
2151 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
2152 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
2153 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);
2154 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
2155 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
2156 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_LESS_EQUAL);
2157 JUMPHERE(jump);
2158 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP2, 0);
2159 }
2160 else
2161 #endif
2162 {
2163 #ifdef SUPPORT_UTF8
2164 /* Here LOCALS1 has already been zeroed. */
2165 jump = NULL;
2166 if (common->utf)
2167 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
2168 #endif
2169 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);
2170 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */);
2171 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2172 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);
2173 #ifdef SUPPORT_UTF8
2174 if (jump != NULL)
2175 JUMPHERE(jump);
2176 #endif
2177 }
2178 JUMPHERE(beginend);
2179
2180 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
2181 beginend = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2182 peek_char(common);
2183
2184 /* Testing char type. This is a code duplication. */
2185 #ifdef SUPPORT_UCP
2186 if (common->useucp)
2187 {
2188 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
2189 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
2190 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
2191 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
2192 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
2193 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);
2194 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
2195 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
2196 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_LESS_EQUAL);
2197 JUMPHERE(jump);
2198 }
2199 else
2200 #endif
2201 {
2202 #ifdef SUPPORT_UTF8
2203 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
2204 jump = NULL;
2205 if (common->utf)
2206 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
2207 #endif
2208 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes);
2209 OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */);
2210 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
2211 #ifdef SUPPORT_UTF8
2212 if (jump != NULL)
2213 JUMPHERE(jump);
2214 #endif
2215 }
2216 JUMPHERE(beginend);
2217
2218 OP2(SLJIT_XOR | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
2219 sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
2220 }
2221
2222 static void check_anynewline(compiler_common *common)
2223 {
2224 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
2225 DEFINE_COMPILER;
2226
2227 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);
2228
2229 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
2230 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
2231 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);
2232 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
2233 #ifdef SUPPORT_UTF8
2234 if (common->utf)
2235 {
2236 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
2237 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
2238 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
2239 }
2240 #endif
2241 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
2242 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2243 }
2244
2245 static void check_hspace(compiler_common *common)
2246 {
2247 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
2248 DEFINE_COMPILER;
2249
2250 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);
2251
2252 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x09);
2253 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
2254 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
2255 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
2256 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xa0);
2257 #ifdef SUPPORT_UTF8
2258 if (common->utf)
2259 {
2260 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
2261 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x1680);
2262 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
2263 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e);
2264 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
2265 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000);
2266 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000);
2267 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_LESS_EQUAL);
2268 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000);
2269 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
2270 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000);
2271 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
2272 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000);
2273 }
2274 #endif
2275 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
2276
2277 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2278 }
2279
2280 static void check_vspace(compiler_common *common)
2281 {
2282 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
2283 DEFINE_COMPILER;
2284
2285 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);
2286
2287 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
2288 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
2289 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);
2290 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
2291 #ifdef SUPPORT_UTF8
2292 if (common->utf)
2293 {
2294 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
2295 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
2296 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
2297 }
2298 #endif
2299 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
2300
2301 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2302 }
2303
2304 #define CHAR1 STR_END
2305 #define CHAR2 STACK_TOP
2306
2307 static void do_casefulcmp(compiler_common *common)
2308 {
2309 DEFINE_COMPILER;
2310 struct sljit_jump *jump;
2311 struct sljit_label *label;
2312
2313 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);
2314 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2315 OP1(SLJIT_MOV, TMP3, 0, CHAR1, 0);
2316 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR2, 0);
2317 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2318 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);
2319
2320 label = LABEL();
2321 OP1(SLJIT_MOVU_UB, CHAR1, 0, SLJIT_MEM1(TMP1), 1);
2322 OP1(SLJIT_MOVU_UB, CHAR2, 0, SLJIT_MEM1(STR_PTR), 1);
2323 jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
2324 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
2325 JUMPTO(SLJIT_C_NOT_ZERO, label);
2326
2327 JUMPHERE(jump);
2328 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);
2329 OP1(SLJIT_MOV, CHAR1, 0, TMP3, 0);
2330 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
2331 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2332 }
2333
2334 #define LCC_TABLE STACK_LIMIT
2335
2336 static void do_caselesscmp(compiler_common *common)
2337 {
2338 DEFINE_COMPILER;
2339 struct sljit_jump *jump;
2340 struct sljit_label *label;
2341
2342 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);
2343 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2344
2345 OP1(SLJIT_MOV, TMP3, 0, LCC_TABLE, 0);
2346 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR1, 0);
2347 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, CHAR2, 0);
2348 OP1(SLJIT_MOV, LCC_TABLE, 0, SLJIT_IMM, common->lcc);
2349 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2350 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);
2351
2352 label = LABEL();
2353 OP1(SLJIT_MOVU_UB, CHAR1, 0, SLJIT_MEM1(TMP1), 1);
2354 OP1(SLJIT_MOVU_UB, CHAR2, 0, SLJIT_MEM1(STR_PTR), 1);
2355 OP1(SLJIT_MOV_UB, CHAR1, 0, SLJIT_MEM2(LCC_TABLE, CHAR1), 0);
2356 OP1(SLJIT_MOV_UB, CHAR2, 0, SLJIT_MEM2(LCC_TABLE, CHAR2), 0);
2357 jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
2358 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
2359 JUMPTO(SLJIT_C_NOT_ZERO, label);
2360
2361 JUMPHERE(jump);
2362 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);
2363 OP1(SLJIT_MOV, LCC_TABLE, 0, TMP3, 0);
2364 OP1(SLJIT_MOV, CHAR1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
2365 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
2366 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2367 }
2368
2369 #undef LCC_TABLE
2370 #undef CHAR1
2371 #undef CHAR2
2372
2373 #ifdef SUPPORT_UTF8
2374 #ifdef SUPPORT_UCP
2375
2376 static const pcre_uchar *SLJIT_CALL do_utf_caselesscmp(pcre_uchar *src1, jit_arguments *args, pcre_uchar *end1)
2377 {
2378 /* This function would be ineffective to do in JIT level. */
2379 int c1, c2;
2380 const pcre_uchar *src2 = args->ptr;
2381 const pcre_uchar *end2 = (pcre_uchar *)args->end;
2382
2383 while (src1 < end1)
2384 {
2385 if (src2 >= end2)
2386 return 0;
2387 GETCHARINC(c1, src1);
2388 GETCHARINC(c2, src2);
2389 if (c1 != c2 && c1 != UCD_OTHERCASE(c2)) return 0;
2390 }
2391 return src2;
2392 }
2393
2394 #endif
2395 #endif
2396
2397 static pcre_uchar *byte_sequence_compare(compiler_common *common, BOOL caseless, pcre_uchar *cc,
2398 compare_context* context, jump_list **fallbacks)
2399 {
2400 DEFINE_COMPILER;
2401 unsigned int othercasebit = 0;
2402 pcre_uchar *othercasechar = NULL;
2403 #ifdef SUPPORT_UTF8
2404 int utflength;
2405 #endif
2406
2407 if (caseless && char_has_othercase(common, cc))
2408 {
2409 othercasebit = char_get_othercase_bit(common, cc);
2410 SLJIT_ASSERT(othercasebit);
2411 /* Extracting bit difference info. */
2412 #ifdef COMPILE_PCRE8
2413 othercasechar = cc + (othercasebit >> 8);
2414 othercasebit &= 0xff;
2415 #else
2416 #ifdef COMPILE_PCRE16
2417 othercasechar = cc + (othercasebit >> 9);
2418 if ((othercasebit & 0x100) != 0)
2419 othercasebit = (othercasebit & 0xff) << 8;
2420 else
2421 othercasebit &= 0xff;
2422 #endif
2423 #endif
2424 }
2425
2426 if (context->sourcereg == -1)
2427 {
2428 #ifdef COMPILE_PCRE8
2429 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
2430 if (context->length >= 4)
2431 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
2432 else if (context->length >= 2)
2433 OP1(SLJIT_MOV_SH, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
2434 else
2435 #endif
2436 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
2437 #else
2438 #ifdef COMPILE_PCRE16
2439 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
2440 if (context->length >= 4)
2441 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
2442 else
2443 #endif
2444 OP1(SLJIT_MOV_SH, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
2445 #endif
2446 #endif /* COMPILE_PCRE8 */
2447 context->sourcereg = TMP2;
2448 }
2449
2450 #ifdef SUPPORT_UTF
2451 utflength = 1;
2452 if (common->utf && HAS_EXTRALEN(*cc))
2453 utflength += GET_EXTRALEN(*cc);
2454
2455 do
2456 {
2457 #endif
2458
2459 context->length -= IN_UCHARS(1);
2460 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
2461
2462 /* Unaligned read is supported. */
2463 if (othercasebit != 0 && othercasechar == cc)
2464 {
2465 context->c.asuchars[context->ucharptr] = *cc | othercasebit;
2466 context->oc.asuchars[context->ucharptr] = othercasebit;
2467 }
2468 else
2469 {
2470 context->c.asuchars[context->ucharptr] = *cc;
2471 context->oc.asuchars[context->ucharptr] = 0;
2472 }
2473 context->ucharptr++;
2474
2475 #ifdef COMPILE_PCRE8
2476 if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1))
2477 #else
2478 if (context->ucharptr >= 2 || context->length == 0)
2479 #endif
2480 {
2481 if (context->length >= 4)
2482 OP1(SLJIT_MOV_SI, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
2483 #ifdef COMPILE_PCRE8
2484 else if (context->length >= 2)
2485 OP1(SLJIT_MOV_SH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
2486 else if (context->length >= 1)
2487 OP1(SLJIT_MOV_UB, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
2488 #else
2489 else if (context->length >= 2)
2490 OP1(SLJIT_MOV_SH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
2491 #endif
2492 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
2493
2494 switch(context->ucharptr)
2495 {
2496 case 4 / sizeof(pcre_uchar):
2497 if (context->oc.asint != 0)
2498 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint);
2499 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint));
2500 break;
2501
2502 case 2 / sizeof(pcre_uchar):
2503 if (context->oc.asshort != 0)
2504 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asshort);
2505 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asshort | context->oc.asshort));
2506 break;
2507
2508 #ifdef COMPILE_PCRE8
2509 case 1:
2510 if (context->oc.asbyte != 0)
2511 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte);
2512 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte));
2513 break;
2514 #endif
2515
2516 default:
2517 SLJIT_ASSERT_STOP();
2518 break;
2519 }
2520 context->ucharptr = 0;
2521 }
2522
2523 #else
2524
2525 /* Unaligned read is unsupported. */
2526 #ifdef COMPILE_PCRE8
2527 if (context->length > 0)
2528 OP1(SLJIT_MOV_UB, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
2529 #else
2530 if (context->length > 0)
2531 OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
2532 #endif
2533 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
2534
2535 if (othercasebit != 0 && othercasechar == cc)
2536 {
2537 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit);
2538 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit));
2539 }
2540 else
2541 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc));
2542
2543 #endif
2544
2545 cc++;
2546 #ifdef SUPPORT_UTF8
2547 utflength--;
2548 }
2549 while (utflength > 0);
2550 #endif
2551
2552 return cc;
2553 }
2554
2555 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
2556
2557 #define SET_TYPE_OFFSET(value) \
2558 if ((value) != typeoffset) \
2559 { \
2560 if ((value) > typeoffset) \
2561 OP2(SLJIT_SUB, typereg, 0, typereg, 0, SLJIT_IMM, (value) - typeoffset); \
2562 else \
2563 OP2(SLJIT_ADD, typereg, 0, typereg, 0, SLJIT_IMM, typeoffset - (value)); \
2564 } \
2565 typeoffset = (value);
2566
2567 #define SET_CHAR_OFFSET(value) \
2568 if ((value) != charoffset) \
2569 { \
2570 if ((value) > charoffset) \
2571 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (value) - charoffset); \
2572 else \
2573 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, charoffset - (value)); \
2574 } \
2575 charoffset = (value);
2576
2577 static void compile_xclass_hotpath(compiler_common *common, pcre_uchar *cc, jump_list **fallbacks)
2578 {
2579 DEFINE_COMPILER;
2580 jump_list *found = NULL;
2581 jump_list **list = (*cc & XCL_NOT) == 0 ? &found : fallbacks;
2582 unsigned int c;
2583 int compares;
2584 struct sljit_jump *jump = NULL;
2585 pcre_uchar *ccbegin;
2586 #ifdef SUPPORT_UCP
2587 BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE;
2588 BOOL charsaved = FALSE;
2589 int typereg = TMP1, scriptreg = TMP1;
2590 unsigned int typeoffset;
2591 #endif
2592 int invertcmp, numberofcmps;
2593 unsigned int charoffset;
2594
2595 /* Although SUPPORT_UTF must be defined, we are not necessary in utf mode. */
2596 check_input_end(common, fallbacks);
2597 read_char(common);
2598
2599 if ((*cc++ & XCL_MAP) != 0)
2600 {
2601 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
2602 #ifndef COMPILE_PCRE8
2603 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
2604 #elif defined SUPPORT_UTF8
2605 if (common->utf)
2606 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
2607 #endif
2608
2609 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
2610 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
2611 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)cc);
2612 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
2613 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
2614 add_jump(compiler, list, JUMP(SLJIT_C_NOT_ZERO));
2615
2616 #ifndef COMPILE_PCRE8
2617 JUMPHERE(jump);
2618 #elif defined SUPPORT_UTF8
2619 if (common->utf)
2620 JUMPHERE(jump);
2621 #endif
2622 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
2623 #ifdef SUPPORT_UCP
2624 charsaved = TRUE;
2625 #endif
2626 cc += 32 / sizeof(pcre_uchar);
2627 }
2628
2629 /* Scanning the necessary info. */
2630 ccbegin = cc;
2631 compares = 0;
2632 while (*cc != XCL_END)
2633 {
2634 compares++;
2635 if (*cc == XCL_SINGLE)
2636 {
2637 cc += 2;
2638 #ifdef SUPPORT_UTF
2639 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2640 #endif
2641 #ifdef SUPPORT_UCP
2642 needschar = TRUE;
2643 #endif
2644 }
2645 else if (*cc == XCL_RANGE)
2646 {
2647 cc += 2;
2648 #ifdef SUPPORT_UTF
2649 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2650 #endif
2651 cc++;
2652 #ifdef SUPPORT_UTF
2653 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2654 #endif
2655 #ifdef SUPPORT_UCP
2656 needschar = TRUE;
2657 #endif
2658 }
2659 #ifdef SUPPORT_UCP
2660 else
2661 {
2662 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
2663 cc++;
2664 switch(*cc)
2665 {
2666 case PT_ANY:
2667 break;
2668
2669 case PT_LAMP:
2670 case PT_GC:
2671 case PT_PC:
2672 case PT_ALNUM:
2673 needstype = TRUE;
2674 break;
2675
2676 case PT_SC:
2677 needsscript = TRUE;
2678 break;
2679
2680 case PT_SPACE:
2681 case PT_PXSPACE:
2682 case PT_WORD:
2683 needstype = TRUE;
2684 needschar = TRUE;
2685 break;
2686
2687 default:
2688 SLJIT_ASSERT_STOP();
2689 break;
2690 }
2691 cc += 2;
2692 }
2693 #endif
2694 }
2695
2696 #ifdef SUPPORT_UCP
2697 /* Simple register allocation. TMP1 is preferred if possible. */
2698 if (needstype || needsscript)
2699 {
2700 if (needschar && !charsaved)
2701 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
2702 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
2703 if (needschar)
2704 {
2705 if (needstype)
2706 {
2707 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
2708 typereg = RETURN_ADDR;
2709 }
2710
2711 if (needsscript)
2712 scriptreg = TMP3;
2713 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
2714 }
2715 else if (needstype && needsscript)
2716 scriptreg = TMP3;
2717 /* In all other cases only one of them was specified, and that can goes to TMP1. */
2718
2719 if (needsscript)
2720 {
2721 if (scriptreg == TMP1)
2722 {
2723 OP1(SLJIT_MOV, scriptreg, 0, SLJIT_IMM, (sljit_w)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
2724 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM2(scriptreg, TMP2), 3);
2725 }
2726 else
2727 {
2728 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
2729 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_w)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
2730 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM1(TMP2), 0);
2731 }
2732 }
2733 }
2734 #endif
2735
2736 /* Generating code. */
2737 cc = ccbegin;
2738 charoffset = 0;
2739 numberofcmps = 0;
2740 #ifdef SUPPORT_UCP
2741 typeoffset = 0;
2742 #endif
2743
2744 while (*cc != XCL_END)
2745 {
2746 compares--;
2747 invertcmp = (compares == 0 && list != fallbacks);
2748 jump = NULL;
2749
2750 if (*cc == XCL_SINGLE)
2751 {
2752 cc ++;
2753 #ifdef SUPPORT_UTF8
2754 if (common->utf)
2755 {
2756 GETCHARINC(c, cc);
2757 }
2758 else
2759 #endif
2760 c = *cc++;
2761
2762 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
2763 {
2764 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
2765 COND_VALUE(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
2766 numberofcmps++;
2767 }
2768 else if (numberofcmps > 0)
2769 {
2770 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
2771 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
2772 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
2773 numberofcmps = 0;
2774 }
2775 else
2776 {
2777 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, c - charoffset);
2778 numberofcmps = 0;
2779 }
2780 }
2781 else if (*cc == XCL_RANGE)
2782 {
2783 cc ++;
2784 #ifdef SUPPORT_UTF8
2785 if (common->utf)
2786 {
2787 GETCHARINC(c, cc);
2788 }
2789 else
2790 #endif
2791 c = *cc++;
2792 SET_CHAR_OFFSET(c);
2793 #ifdef SUPPORT_UTF8
2794 if (common->utf)
2795 {
2796 GETCHARINC(c, cc);
2797 }
2798 else
2799 #endif
2800 c = *cc++;
2801 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
2802 {
2803 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
2804 COND_VALUE(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_C_LESS_EQUAL);
2805 numberofcmps++;
2806 }
2807 else if (numberofcmps > 0)
2808 {
2809 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
2810 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_LESS_EQUAL);
2811 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
2812 numberofcmps = 0;
2813 }
2814 else
2815 {
2816 jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, c - charoffset);
2817 numberofcmps = 0;
2818 }
2819 }
2820 #ifdef SUPPORT_UCP
2821 else
2822 {
2823 if (*cc == XCL_NOTPROP)
2824 invertcmp ^= 0x1;
2825 cc++;
2826 switch(*cc)
2827 {
2828 case PT_ANY:
2829 if (list != fallbacks)
2830 {
2831 if ((cc[-1] == XCL_NOTPROP && compares > 0) || (cc[-1] == XCL_PROP && compares == 0))
2832 continue;
2833 }
2834 else if (cc[-1] == XCL_NOTPROP)
2835 continue;
2836 jump = JUMP(SLJIT_JUMP);
2837 break;
2838
2839 case PT_LAMP:
2840 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - typeoffset);
2841 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
2842 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ll - typeoffset);
2843 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
2844 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lt - typeoffset);
2845 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
2846 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
2847 break;
2848
2849 case PT_GC:
2850 c = PRIV(ucp_typerange)[(int)cc[1] * 2];
2851 SET_TYPE_OFFSET(c);
2852 jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, PRIV(ucp_typerange)[(int)cc[1] * 2 + 1] - c);
2853 break;
2854
2855 case PT_PC:
2856 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, (int)cc[1] - typeoffset);
2857 break;
2858
2859 case PT_SC:
2860 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, scriptreg, 0, SLJIT_IMM, (int)cc[1]);
2861 break;
2862
2863 case PT_SPACE:
2864 case PT_PXSPACE:
2865 if (*cc == PT_SPACE)
2866 {
2867 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
2868 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 11 - charoffset);
2869 }
2870 SET_CHAR_OFFSET(9);
2871 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 13 - 9);
2872 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);
2873 if (*cc == PT_SPACE)
2874 JUMPHERE(jump);
2875
2876 SET_TYPE_OFFSET(ucp_Zl);
2877 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl);
2878 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_LESS_EQUAL);
2879 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
2880 break;
2881
2882 case PT_WORD:
2883 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE - charoffset);
2884 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
2885 /* ... fall through */
2886
2887 case PT_ALNUM:
2888 SET_TYPE_OFFSET(ucp_Ll);
2889 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
2890 COND_VALUE((*cc == PT_ALNUM) ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_C_LESS_EQUAL);
2891 SET_TYPE_OFFSET(ucp_Nd);
2892 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_No - ucp_Nd);
2893 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_LESS_EQUAL);
2894 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
2895 break;
2896 }
2897 cc += 2;
2898 }
2899 #endif
2900
2901 if (jump != NULL)
2902 add_jump(compiler, compares > 0 ? list : fallbacks, jump);
2903 }
2904
2905 if (found != NULL)
2906 set_jumps(found, LABEL());
2907 }
2908
2909 #undef SET_TYPE_OFFSET
2910 #undef SET_CHAR_OFFSET
2911
2912 #endif
2913
2914 static pcre_uchar *compile_char1_hotpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **fallbacks)
2915 {
2916 DEFINE_COMPILER;
2917 int length;
2918 unsigned int c, oc, bit;
2919 compare_context context;
2920 struct sljit_jump *jump[4];
2921 #ifdef SUPPORT_UTF8
2922 struct sljit_label *label;
2923 #ifdef SUPPORT_UCP
2924 pcre_uchar propdata[5];
2925 #endif
2926 #endif
2927
2928 switch(type)
2929 {
2930 case OP_SOD:
2931 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
2932 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
2933 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
2934 return cc;
2935
2936 case OP_SOM:
2937 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
2938 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
2939 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
2940 return cc;
2941
2942 case OP_NOT_WORD_BOUNDARY:
2943 case OP_WORD_BOUNDARY:
2944 add_jump(compiler, &common->wordboundary, JUMP(SLJIT_FAST_CALL));
2945 add_jump(compiler, fallbacks, JUMP(type == OP_NOT_WORD_BOUNDARY ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
2946 return cc;
2947
2948 case OP_NOT_DIGIT:
2949 case OP_DIGIT:
2950 check_input_end(common, fallbacks);
2951 read_char8_type(common);
2952 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit);
2953 add_jump(compiler, fallbacks, JUMP(type == OP_DIGIT ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
2954 return cc;
2955
2956 case OP_NOT_WHITESPACE:
2957 case OP_WHITESPACE:
2958 check_input_end(common, fallbacks);
2959 read_char8_type(common);
2960 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space);
2961 add_jump(compiler, fallbacks, JUMP(type == OP_WHITESPACE ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
2962 return cc;
2963
2964 case OP_NOT_WORDCHAR:
2965 case OP_WORDCHAR:
2966 check_input_end(common, fallbacks);
2967 read_char8_type(common);
2968 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word);
2969 add_jump(compiler, fallbacks, JUMP(type == OP_WORDCHAR ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
2970 return cc;
2971
2972 case OP_ANY:
2973 check_input_end(common, fallbacks);
2974 read_char(common);
2975 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
2976 {
2977 jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
2978 jump[1] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2979 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2980 add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff));
2981 JUMPHERE(jump[1]);
2982 JUMPHERE(jump[0]);
2983 }
2984 else
2985 check_newlinechar(common, common->nltype, fallbacks, TRUE);
2986 return cc;
2987
2988 case OP_ALLANY:
2989 check_input_end(common, fallbacks);
2990 #ifdef SUPPORT_UTF
2991 if (common->utf)
2992 {
2993 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2994 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2995 #ifdef COMPILE_PCRE8
2996 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2997 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0);
2998 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2999 #else /* COMPILE_PCRE8 */
3000 #ifdef COMPILE_PCRE16
3001 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
3002 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
3003 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3004 COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
3005 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3006 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3007 #endif /* COMPILE_PCRE16 */
3008 #endif /* COMPILE_PCRE8 */
3009 JUMPHERE(jump[0]);
3010 return cc;
3011 }
3012 #endif
3013 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3014 return cc;
3015
3016 case OP_ANYBYTE:
3017 check_input_end(common, fallbacks);
3018 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3019 return cc;
3020
3021 #ifdef SUPPORT_UTF8
3022 #ifdef SUPPORT_UCP
3023 case OP_NOTPROP:
3024 case OP_PROP:
3025 propdata[0] = 0;
3026 propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP;
3027 propdata[2] = cc[0];
3028 propdata[3] = cc[1];
3029 propdata[4] = XCL_END;
3030 compile_xclass_hotpath(common, propdata, fallbacks);
3031 return cc + 2;
3032 #endif
3033 #endif
3034
3035 case OP_ANYNL:
3036 check_input_end(common, fallbacks);
3037 read_char(common);
3038 jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
3039 jump[1] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3040 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3041 jump[2] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
3042 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);
3043 jump[3] = JUMP(SLJIT_JUMP);
3044 JUMPHERE(jump[0]);
3045 check_newlinechar(common, common->bsr_nltype, fallbacks, FALSE);
3046 JUMPHERE(jump[1]);
3047 JUMPHERE(jump[2]);
3048 JUMPHERE(jump[3]);
3049 return cc;
3050
3051 case OP_NOT_HSPACE:
3052 case OP_HSPACE:
3053 check_input_end(common, fallbacks);
3054 read_char(common);
3055 add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));
3056 add_jump(compiler, fallbacks, JUMP(type == OP_NOT_HSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
3057 return cc;
3058
3059 case OP_NOT_VSPACE:
3060 case OP_VSPACE:
3061 check_input_end(common, fallbacks);
3062 read_char(common);
3063 add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));
3064 add_jump(compiler, fallbacks, JUMP(type == OP_NOT_VSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
3065 return cc;
3066
3067 #ifdef SUPPORT_UCP
3068 case OP_EXTUNI:
3069 check_input_end(common, fallbacks);
3070 read_char(common);
3071 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
3072 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Mc);
3073 add_jump(compiler, fallbacks, CMP(SLJIT_C_LESS_EQUAL, TMP1, 0, SLJIT_IMM, ucp_Mn - ucp_Mc));
3074
3075 label = LABEL();
3076 jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3077 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
3078 read_char(common);
3079 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
3080 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Mc);
3081 CMPTO(SLJIT_C_LESS_EQUAL, TMP1, 0, SLJIT_IMM, ucp_Mn - ucp_Mc, label);
3082
3083 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
3084 JUMPHERE(jump[0]);
3085 return cc;
3086 #endif
3087
3088 case OP_EODN:
3089 jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3090 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3091 {
3092 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, 2);
3093 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3094 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0));
3095 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(STR_PTR), 1);
3096 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
3097 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
3098 }
3099 else if (common->nltype == NLTYPE_FIXED)
3100 {
3101 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, 1);
3102 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3103 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0));
3104 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
3105 }
3106 else
3107 {
3108 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3109 jump[1] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
3110 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, 2);
3111 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
3112 jump[2] = JUMP(SLJIT_C_GREATER);
3113 add_jump(compiler, fallbacks, JUMP(SLJIT_C_LESS));
3114 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 1);
3115 jump[3] = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
3116 add_jump(compiler, fallbacks, JUMP(SLJIT_JUMP));
3117
3118 JUMPHERE(jump[1]);
3119 if (common->nltype == NLTYPE_ANYCRLF)
3120 {
3121 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, 1);
3122 add_jump(compiler, fallbacks, CMP(SLJIT_C_LESS, TMP2, 0, STR_END, 0));
3123 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
3124 }
3125 else
3126 {
3127 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, STR_PTR, 0);
3128 read_char(common);
3129 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
3130 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
3131 add_jump(compiler, fallbacks, JUMP(SLJIT_C_ZERO));
3132 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
3133 }
3134 JUMPHERE(jump[2]);
3135 JUMPHERE(jump[3]);
3136 }
3137 JUMPHERE(jump[0]);
3138 return cc;
3139
3140 case OP_EOD:
3141 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
3142 return cc;
3143
3144 case OP_CIRC:
3145 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
3146 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
3147 add_jump(compiler, fallbacks, CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0));
3148 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
3149 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
3150 return cc;
3151
3152 case OP_CIRCM:
3153 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
3154 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
3155 jump[1] = CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0);
3156 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
3157 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
3158 jump[0] = JUMP(SLJIT_JUMP);
3159 JUMPHERE(jump[1]);
3160
3161 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, end));
3162 add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP2, 0, STR_PTR, 0));
3163
3164 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3165 {
3166 OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, SLJIT_IMM, 2);
3167 add_jump(compiler, fallbacks, CMP(SLJIT_C_LESS, TMP2, 0, TMP1, 0));
3168 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), -2);
3169 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(STR_PTR), -1);
3170 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
3171 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
3172 }
3173 else
3174 {
3175 skip_char_back(common);
3176 read_char(common);
3177 check_newlinechar(common, common->nltype, fallbacks, FALSE);
3178 }
3179 JUMPHERE(jump[0]);
3180 return cc;
3181
3182 case OP_DOLL:
3183 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
3184 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
3185 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
3186
3187 if (!common->endonly)
3188 compile_char1_hotpath(common, OP_EODN, cc, fallbacks);
3189 else
3190 add_jump(compiler, fallbacks, CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0));
3191 return cc;
3192
3193 case OP_DOLLM:
3194 jump[1] = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
3195 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
3196 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
3197 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
3198 jump[0] = JUMP(SLJIT_JUMP);
3199 JUMPHERE(jump[1]);
3200
3201 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3202 {
3203 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, 2);
3204 add_jump(compiler, fallbacks, CMP(SLJIT_C_GREATER, TMP2, 0, STR_END, 0));
3205 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3206 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(STR_PTR), 1);
3207 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
3208 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
3209 }
3210 else
3211 {
3212 peek_char(common);
3213 check_newlinechar(common, common->nltype, fallbacks, FALSE);
3214 }
3215 JUMPHERE(jump[0]);
3216 return cc;
3217
3218 case OP_CHAR:
3219 case OP_CHARI:
3220 length = 1;
3221 #ifdef SUPPORT_UTF
3222 if (common->utf && HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc);
3223 #endif
3224 if (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0)
3225 {
3226 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
3227 add_jump(compiler, fallbacks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
3228
3229 context.length = IN_UCHARS(length);
3230 context.sourcereg = -1;
3231 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
3232 context.ucharptr = 0;
3233 #endif
3234 return byte_sequence_compare(common, type == OP_CHARI, cc, &context, fallbacks);
3235 }
3236 check_input_end(common, fallbacks);
3237 read_char(common);
3238 #ifdef SUPPORT_UTF8
3239 if (common->utf)
3240 {
3241 GETCHAR(c, cc);
3242 }
3243 else
3244 #endif
3245 c = *cc;
3246 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c);
3247 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
3248 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, char_othercase(common, c));
3249 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
3250 add_jump(compiler, fallbacks, JUMP(SLJIT_C_ZERO));
3251 return cc + length;
3252
3253 case OP_NOT:
3254 case OP_NOTI:
3255 #ifdef SUPPORT_UTF
3256 if (common->utf)
3257 {
3258 length = 1;
3259 if (HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc);
3260
3261 check_input_end(common, fallbacks);
3262 GETCHAR(c, cc);
3263
3264 if (c <= 127)
3265 {
3266 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3267 if (type == OP_NOT || !char_has_othercase(common, cc))
3268 add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
3269 else
3270 {
3271 /* Since UTF8 code page is fixed, we know that c is in [a-z] or [A-Z] range. */
3272 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x20);
3273 add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20));
3274 }
3275 /* Skip the variable-length character. */
3276 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);
3277 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
3278 #ifdef COMPILE_PCRE8
3279 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0);
3280 #endif
3281 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3282 JUMPHERE(jump[0]);
3283 return cc + length;
3284 }
3285 else
3286 read_char(common);
3287 }
3288 else
3289 #endif
3290 {
3291 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3292 add_jump(compiler, fallbacks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
3293 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
3294 c = *cc;
3295 }
3296
3297 if (type == OP_NOT || !char_has_othercase(common, cc))
3298 add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
3299 else
3300 {
3301 oc = char_othercase(common, c);
3302 bit = c ^ oc;
3303 if (ispowerof2(bit))
3304 {
3305 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
3306 add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
3307 }
3308 else
3309 {
3310 add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
3311 add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, oc));
3312 }
3313 }
3314 return cc + 1;
3315
3316 case OP_CLASS:
3317 case OP_NCLASS:
3318 check_input_end(common, fallbacks);
3319 read_char(common);
3320 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3321 jump[0] = NULL;
3322 #ifdef SUPPORT_UTF8
3323 /* This check can only be skipped in pure 8 bit mode. */
3324 if (common->utf)
3325 #endif
3326 {
3327 jump[0] = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3328 if (type == OP_CLASS)
3329 {
3330 add_jump(compiler, fallbacks, jump[0]);
3331 jump[0] = NULL;
3332 }
3333 }
3334 #endif
3335 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
3336 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
3337 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)cc);
3338 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
3339 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
3340 add_jump(compiler, fallbacks, JUMP(SLJIT_C_ZERO));
3341 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3342 if (jump[0] != NULL)
3343 JUMPHERE(jump[0]);
3344 #endif
3345 return cc + 32 / sizeof(pcre_uchar);
3346
3347 #if defined SUPPORT_UTF || defined COMPILE_PCRE16
3348 case OP_XCLASS:
3349 compile_xclass_hotpath(common, cc + LINK_SIZE, fallbacks);
3350 return cc + GET(cc, 0) - 1;
3351 #endif
3352
3353 case OP_REVERSE:
3354 length = GET(cc, 0);
3355 SLJIT_ASSERT(length > 0);
3356 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3357 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
3358 #ifdef SUPPORT_UTF8
3359 if (common->utf)
3360 {
3361 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, length);
3362 label = LABEL();
3363 add_jump(compiler, fallbacks, CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP1, 0));
3364 skip_char_back(common);
3365 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
3366 JUMPTO(SLJIT_C_NOT_ZERO, label);
3367 return cc + LINK_SIZE;
3368 }
3369 #endif
3370 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, length);
3371 add_jump(compiler, fallbacks, CMP(SLJIT_C_LESS, STR_PTR, 0, TMP1, 0));
3372 return cc + LINK_SIZE;
3373 }
3374 SLJIT_ASSERT_STOP();
3375 return cc;
3376 }
3377
3378 static SLJIT_INLINE pcre_uchar *compile_charn_hotpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, jump_list **fallbacks)
3379 {
3380 /* This function consumes at least one input character. */
3381 /* To decrease the number of length checks, we try to concatenate the fixed length character sequences. */
3382 DEFINE_COMPILER;
3383 pcre_uchar *ccbegin = cc;
3384 compare_context context;
3385 int size;
3386
3387 context.length = 0;
3388 do
3389 {
3390 if (cc >= ccend)
3391 break;
3392
3393 if (*cc == OP_CHAR)
3394 {
3395 size = 1;
3396 #ifdef SUPPORT_UTF
3397 if (common->utf && HAS_EXTRALEN(cc[1]))
3398 size += GET_EXTRALEN(cc[1]);
3399 #endif
3400 }
3401 else if (*cc == OP_CHARI)
3402 {
3403 size = 1;
3404 #ifdef SUPPORT_UTF
3405 if (common->utf)
3406 {
3407 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
3408 size = 0;
3409 else if (HAS_EXTRALEN(cc[1]))
3410 size += GET_EXTRALEN(cc[1]);
3411 }
3412 else
3413 #endif
3414 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
3415 size = 0;
3416 }
3417 else
3418 size = 0;
3419
3420 cc += 1 + size;
3421 context.length += IN_UCHARS(size);
3422 }
3423 while (size > 0 && context.length <= 128);
3424
3425 cc = ccbegin;
3426 if (context.length > 0)
3427 {
3428 /* We have a fixed-length byte sequence. */
3429 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, context.length);
3430 add_jump(compiler, fallbacks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
3431
3432 context.sourcereg = -1;
3433 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
3434 context.ucharptr = 0;
3435 #endif
3436 do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, fallbacks); while (context.length > 0);
3437 return cc;
3438 }
3439
3440 /* A non-fixed length character will be checked if length == 0. */
3441 return compile_char1_hotpath(common, *cc, cc + 1, fallbacks);
3442 }
3443
3444 static struct sljit_jump *compile_ref_checks(compiler_common *common, pcre_uchar *cc, jump_list **fallbacks)
3445 {
3446 DEFINE_COMPILER;
3447 int offset = GET2(cc, 1) << 1;
3448
3449 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
3450 if (!common->jscript_compat)
3451 {
3452 if (fallbacks == NULL)
3453 {
3454 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
3455 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
3456 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
3457 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
3458 return JUMP(SLJIT_C_NOT_ZERO);
3459 }
3460 add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
3461 }
3462 return CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
3463 }
3464
3465 /* Forward definitions. */
3466 static void compile_hotpath(compiler_common *, pcre_uchar *, pcre_uchar *, fallback_common *);
3467 static void compile_fallbackpath(compiler_common *, struct fallback_common *);
3468
3469 #define PUSH_FALLBACK(size, ccstart, error) \
3470 do \
3471 { \
3472 fallback = sljit_alloc_memory(compiler, (size)); \
3473 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
3474 return error; \
3475 memset(fallback, 0, size); \
3476 fallback->prev = parent->top; \
3477 fallback->cc = (ccstart); \
3478 parent->top = fallback; \
3479 } \
3480 while (0)
3481
3482 #define PUSH_FALLBACK_NOVALUE(size, ccstart) \
3483 do \
3484 { \
3485 fallback = sljit_alloc_memory(compiler, (size)); \
3486 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
3487 return; \
3488 memset(fallback, 0, size); \
3489 fallback->prev = parent->top; \
3490 fallback->cc = (ccstart); \
3491 parent->top = fallback; \
3492 } \
3493 while (0)
3494
3495 #define FALLBACK_AS(type) ((type*)fallback)
3496
3497 static pcre_uchar *compile_ref_hotpath(compiler_common *common, pcre_uchar *cc, jump_list **fallbacks, BOOL withchecks, BOOL emptyfail)
3498 {
3499 DEFINE_COMPILER;
3500 int offset = GET2(cc, 1) << 1;
3501 struct sljit_jump *jump = NULL;
3502
3503 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
3504 if (withchecks && !common->jscript_compat)
3505 add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
3506
3507 #ifdef SUPPORT_UTF8
3508 #ifdef SUPPORT_UCP
3509 if (common->utf && *cc == OP_REFI)
3510 {
3511 SLJIT_ASSERT(TMP1 == SLJIT_TEMPORARY_REG1 && STACK_TOP == SLJIT_TEMPORARY_REG2 && TMP2 == SLJIT_TEMPORARY_REG3);
3512 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
3513 if (withchecks)
3514 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, TMP2, 0);
3515
3516 /* Needed to save important temporary registers. */
3517 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
3518 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG2, 0, ARGUMENTS, 0);
3519 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), SLJIT_OFFSETOF(jit_arguments, ptr), STR_PTR, 0);
3520 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_utf_caselesscmp));
3521 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3522 add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
3523 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
3524 }
3525 else
3526 #endif
3527 #endif
3528 {
3529 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP1, 0);
3530 if (withchecks)
3531 jump = JUMP(SLJIT_C_ZERO);
3532 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3533
3534 add_jump(compiler, fallbacks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
3535 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
3536 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
3537 }
3538
3539 if (jump != NULL)
3540 {
3541 if (emptyfail)
3542 add_jump(compiler, fallbacks, jump);
3543 else
3544 JUMPHERE(jump);
3545 }
3546 return cc + 1 + IMM2_SIZE;
3547 }
3548
3549 static SLJIT_INLINE pcre_uchar *compile_ref_iterator_hotpath(compiler_common *common, pcre_uchar *cc, fallback_common *parent)
3550 {
3551 DEFINE_COMPILER;
3552 fallback_common *fallback;
3553 pcre_uchar type;
3554 struct sljit_label *label;
3555 struct sljit_jump *zerolength;
3556 struct sljit_jump *jump = NULL;
3557 pcre_uchar *ccbegin = cc;
3558 int min = 0, max = 0;
3559 BOOL minimize;
3560
3561 PUSH_FALLBACK(sizeof(iterator_fallback), cc, NULL);
3562
3563 type = cc[1 + IMM2_SIZE];
3564 minimize = (type & 0x1) != 0;
3565 switch(type)
3566 {
3567 case OP_CRSTAR:
3568 case OP_CRMINSTAR:
3569 min = 0;
3570 max = 0;
3571 cc += 1 + IMM2_SIZE + 1;
3572 break;
3573 case OP_CRPLUS:
3574 case OP_CRMINPLUS:
3575 min = 1;
3576 max = 0;
3577 cc += 1 + IMM2_SIZE + 1;
3578 break;
3579 case OP_CRQUERY:
3580 case OP_CRMINQUERY:
3581 min = 0;
3582 max = 1;
3583 cc += 1 + IMM2_SIZE + 1;
3584 break;
3585 case OP_CRRANGE:
3586 case OP_CRMINRANGE:
3587 min = GET2(cc, 1 + IMM2_SIZE + 1);
3588 max = GET2(cc, 1 + IMM2_SIZE + 1 + IMM2_SIZE);
3589 cc += 1 + IMM2_SIZE + 1 + 2 * IMM2_SIZE;
3590 break;
3591 default:
3592 SLJIT_ASSERT_STOP();
3593 break;
3594 }
3595
3596 if (!minimize)
3597 {
3598 if (min == 0)
3599 {
3600 allocate_stack(common, 2);
3601 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
3602 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
3603 /* Temporary release of STR_PTR. */
3604 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
3605 zerolength = compile_ref_checks(common, ccbegin, NULL);
3606 /* Restore if not zero length. */
3607 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
3608 }
3609 else
3610 {
3611 allocate_stack(common, 1);
3612 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
3613 zerolength = compile_ref_checks(common, ccbegin, &fallback->topfallbacks);
3614 }
3615
3616 if (min > 1 || max > 1)
3617 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 0);
3618
3619 label = LABEL();
3620 compile_ref_hotpath(common, ccbegin, &fallback->topfallbacks, FALSE, FALSE);
3621
3622 if (min > 1 || max > 1)
3623 {
3624 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
3625 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3626 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0);
3627 if (min > 1)
3628 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, label);
3629 if (max > 1)
3630 {
3631 jump = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max);
3632 allocate_stack(common, 1);
3633 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
3634 JUMPTO(SLJIT_JUMP, label);
3635 JUMPHERE(jump);
3636 }
3637 }
3638
3639 if (max == 0)
3640 {
3641 /* Includes min > 1 case as well. */
3642 allocate_stack(common, 1);
3643 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
3644 JUMPTO(SLJIT_JUMP, label);
3645 }
3646
3647 JUMPHERE(zerolength);
3648 FALLBACK_AS(iterator_fallback)->hotpath = LABEL();
3649
3650 decrease_call_count(common);
3651 return cc;
3652 }
3653
3654 allocate_stack(common, 2);
3655 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
3656 if (type != OP_CRMINSTAR)
3657 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
3658
3659 if (min == 0)
3660 {
3661 zerolength = compile_ref_checks(common, ccbegin, NULL);
3662 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
3663 jump = JUMP(SLJIT_JUMP);
3664 }
3665 else
3666 zerolength = compile_ref_checks(common, ccbegin, &fallback->topfallbacks);
3667
3668 FALLBACK_AS(iterator_fallback)->hotpath = LABEL();
3669 if (max > 0)
3670 add_jump(compiler, &fallback->topfallbacks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max));
3671
3672 compile_ref_hotpath(common, ccbegin, &fallback->topfallbacks, TRUE, TRUE);
3673 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
3674
3675 if (min > 1)
3676 {
3677 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
3678 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3679 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
3680 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, FALLBACK_AS(iterator_fallback)->hotpath);
3681 }
3682 else if (max > 0)
3683 OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
3684
3685 if (jump != NULL)
3686 JUMPHERE(jump);
3687 JUMPHERE(zerolength);
3688
3689 decrease_call_count(common);
3690 return cc;
3691 }
3692
3693 static SLJIT_INLINE pcre_uchar *compile_recurse_hotpath(compiler_common *common, pcre_uchar *cc, fallback_common *parent)
3694 {
3695 DEFINE_COMPILER;
3696 fallback_common *fallback;
3697 recurse_entry *entry = common->entries;
3698 recurse_entry *prev = NULL;
3699 int start = GET(cc, 1);
3700
3701 PUSH_FALLBACK(sizeof(recurse_fallback), cc, NULL);
3702 while (entry != NULL)
3703 {
3704 if (entry->start == start)
3705 break;
3706 prev = entry;
3707 entry = entry->next;
3708 }
3709
3710 if (entry == NULL)
3711 {
3712 entry = sljit_alloc_memory(compiler, sizeof(recurse_entry));
3713 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
3714 return NULL;
3715 entry->next = NULL;
3716 entry->entry = NULL;
3717 entry->calls = NULL;
3718 entry->start = start;
3719
3720 if (prev != NULL)
3721 prev->next = entry;
3722 else
3723 common->entries = entry;
3724 }
3725
3726 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
3727 allocate_stack(common, 1);
3728 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
3729
3730 if (entry->entry == NULL)
3731 add_jump(compiler, &entry->calls, JUMP(SLJIT_FAST_CALL));
3732 else
3733 JUMPTO(SLJIT_FAST_CALL, entry->entry);
3734 /* Leave if the match is failed. */
3735 add_jump(compiler, &fallback->topfallbacks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0));
3736 return cc + 1 + LINK_SIZE;
3737 }
3738
3739 static pcre_uchar *compile_assert_hotpath(compiler_common *common, pcre_uchar *cc, assert_fallback *fallback, BOOL conditional)
3740 {
3741 DEFINE_COMPILER;
3742 int framesize;
3743 int localptr;
3744 fallback_common altfallback;
3745 pcre_uchar *ccbegin;
3746 pcre_uchar opcode;
3747 pcre_uchar bra = OP_BRA;
3748 jump_list *tmp = NULL;
3749 jump_list **target = (conditional) ? &fallback->condfailed : &fallback->common.topfallbacks;
3750 jump_list **found;
3751 /* Saving previous accept variables. */
3752 struct sljit_label *save_acceptlabel = common->acceptlabel;
3753 struct sljit_jump *jump;
3754 struct sljit_jump *brajump = NULL;
3755 jump_list *save_accept = common->accept;
3756
3757 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
3758 {
3759 SLJIT_ASSERT(!conditional);
3760 bra = *cc;
3761 cc++;
3762 }
3763 localptr = PRIV_DATA(cc);
3764 SLJIT_ASSERT(localptr != 0);
3765 framesize = get_framesize(common, cc, FALSE);
3766 fallback->framesize = framesize;
3767 fallback->localptr = localptr;
3768 opcode = *cc;
3769 SLJIT_ASSERT(opcode >= OP_ASSERT && opcode <= OP_ASSERTBACK_NOT);
3770 found = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) ? &tmp : target;
3771 ccbegin = cc;
3772 cc += GET(cc, 1);
3773
3774 if (bra == OP_BRAMINZERO)
3775 {
3776 /* This is a braminzero fallback path. */
3777 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
3778 free_stack(common, 1);
3779 brajump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
3780 }
3781
3782 if (framesize < 0)
3783 {
3784 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STACK_TOP, 0);
3785 allocate_stack(common, 1);
3786 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
3787 }
3788 else
3789 {
3790 allocate_stack(common, framesize + 2);
3791 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
3792 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(framesize + 1));
3793 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, TMP2, 0);
3794 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
3795 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
3796 init_frame(common, ccbegin, framesize + 1, 2, FALSE);
3797 }
3798
3799 memset(&altfallback, 0, sizeof(fallback_common));
3800 while (1)
3801 {
3802 common->acceptlabel = NULL;
3803 common->accept = NULL;
3804 altfallback.top = NULL;
3805 altfallback.topfallbacks = NULL;
3806
3807 if (*ccbegin == OP_ALT)
3808 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
3809
3810 altfallback.cc = ccbegin;
3811 compile_hotpath(common, ccbegin + 1 + LINK_SIZE, cc, &altfallback);
3812 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
3813 {
3814 common->acceptlabel = save_acceptlabel;
3815 common->accept = save_accept;
3816 return NULL;
3817 }
3818 common->acceptlabel = LABEL();
3819 if (common->accept != NULL)
3820 set_jumps(common->accept, common->acceptlabel);
3821
3822 /* Reset stack. */
3823 if (framesize < 0)
3824 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
3825 else {
3826 if ((opcode != OP_ASSERT_NOT && opcode != OP_ASSERTBACK_NOT) || conditional)
3827 {
3828 /* We don't need to keep the STR_PTR, only the previous localptr. */
3829 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_w));
3830 }
3831 else
3832 {
3833 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
3834 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
3835 }
3836 }
3837
3838 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
3839 {
3840 /* We know that STR_PTR was stored on the top of the stack. */
3841 if (conditional)
3842 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
3843 else if (bra == OP_BRAZERO)
3844 {
3845 if (framesize < 0)
3846 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
3847 else
3848 {
3849 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_w));
3850 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (framesize + 1) * sizeof(sljit_w));
3851 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, TMP1, 0);
3852 }
3853 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
3854 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
3855 }
3856 else if (framesize >= 0)
3857 {
3858 /* For OP_BRA and OP_BRAMINZERO. */
3859 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_w));
3860 }
3861 }
3862 add_jump(compiler, found, JUMP(SLJIT_JUMP));
3863
3864 compile_fallbackpath(common, altfallback.top);
3865 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
3866 {
3867 common->acceptlabel = save_acceptlabel;
3868 common->accept = save_accept;
3869 return NULL;
3870 }
3871 set_jumps(altfallback.topfallbacks, LABEL());
3872
3873 if (*cc != OP_ALT)
3874 break;
3875
3876 ccbegin = cc;
3877 cc += GET(cc, 1);
3878 }
3879 /* None of them matched. */
3880
3881 if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK)
3882 {
3883 /* Assert is failed. */
3884 if (conditional || bra == OP_BRAZERO)
3885 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
3886
3887 if (framesize < 0)
3888 {
3889 /* The topmost item should be 0. */
3890 if (bra == OP_BRAZERO)
3891 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
3892 else
3893 free_stack(common, 1);
3894 }
3895 else
3896 {
3897 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
3898 /* The topmost item should be 0. */
3899 if (bra == OP_BRAZERO)
3900 {
3901 free_stack(common, framesize + 1);
3902 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
3903 }
3904 else
3905 free_stack(common, framesize + 2);
3906 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, TMP1, 0);
3907 }
3908 jump = JUMP(SLJIT_JUMP);
3909 if (bra != OP_BRAZERO)
3910 add_jump(compiler, target, jump);
3911
3912 /* Assert is successful. */
3913 set_jumps(tmp, LABEL());
3914 if (framesize < 0)
3915 {
3916 /* We know that STR_PTR was stored on the top of the stack. */
3917 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
3918 /* Keep the STR_PTR on the top of the stack. */
3919 if (bra == OP_BRAZERO)
3920 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
3921 else if (bra == OP_BRAMINZERO)
3922 {
3923 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
3924 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
3925 }
3926 }
3927 else
3928 {
3929 if (bra == OP_BRA)
3930 {
3931 /* We don't need to keep the STR_PTR, only the previous localptr. */
3932 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_w));
3933 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
3934 }
3935 else
3936 {
3937 /* We don't need to keep the STR_PTR, only the previous localptr. */
3938 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_IMM, (framesize + 2) * sizeof(sljit_w));
3939 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
3940 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), bra == OP_BRAZERO ? STR_PTR : SLJIT_IMM, 0);
3941 }
3942 }
3943
3944 if (bra == OP_BRAZERO)
3945 {
3946 fallback->hotpath = LABEL();
3947 sljit_set_label(jump, fallback->hotpath);
3948 }
3949 else if (bra == OP_BRAMINZERO)
3950 {
3951 JUMPTO(SLJIT_JUMP, fallback->hotpath);
3952 JUMPHERE(brajump);
3953 if (framesize >= 0)
3954 {
3955 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
3956 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
3957 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_w));
3958 }
3959 set_jumps(fallback->common.topfallbacks, LABEL());
3960 }
3961 }
3962 else
3963 {
3964 /* AssertNot is successful. */
3965 if (framesize < 0)
3966 {
3967 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
3968 if (bra != OP_BRA)
3969 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
3970 else
3971 free_stack(common, 1);
3972 }
3973 else
3974 {
3975 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
3976 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
3977 /* The topmost item should be 0. */
3978 if (bra != OP_BRA)
3979 {
3980 free_stack(common, framesize + 1);
3981 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
3982 }
3983 else
3984 free_stack(common, framesize + 2);
3985 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, TMP1, 0);
3986 }
3987
3988 if (bra == OP_BRAZERO)
3989 fallback->hotpath = LABEL();
3990 else if (bra == OP_BRAMINZERO)
3991 {
3992 JUMPTO(SLJIT_JUMP, fallback->hotpath);
3993 JUMPHERE(brajump);
3994 }
3995
3996 if (bra != OP_BRA)
3997 {
3998 SLJIT_ASSERT(found == &fallback->common.topfallbacks);
3999 set_jumps(fallback->common.topfallbacks, LABEL());
4000 fallback->common.topfallbacks = NULL;
4001 }
4002 }
4003
4004 common->acceptlabel = save_acceptlabel;
4005 common->accept = save_accept;
4006 return cc + 1 + LINK_SIZE;
4007 }
4008
4009 static sljit_w SLJIT_CALL do_searchovector(sljit_w refno, sljit_w* locals, pcre_uchar *name_table)
4010 {
4011 int condition = FALSE;
4012 pcre_uchar *slotA = name_table;
4013 pcre_uchar *slotB;
4014 sljit_w name_count = locals[LOCALS0 / sizeof(sljit_w)];
4015 sljit_w name_entry_size = locals[LOCALS1 / sizeof(sljit_w)];
4016 sljit_w no_capture;
4017 int i;
4018
4019 locals += OVECTOR_START / sizeof(sljit_w);
4020 no_capture = locals[1];
4021
4022 for (i = 0; i < name_count; i++)
4023 {
4024 if (GET2(slotA, 0) == refno) break;
4025 slotA += name_entry_size;
4026 }
4027
4028 if (i < name_count)
4029 {
4030 /* Found a name for the number - there can be only one; duplicate names
4031 for different numbers are allowed, but not vice versa. First scan down
4032 for duplicates. */
4033
4034 slotB = slotA;
4035 while (slotB > name_table)
4036 {
4037 slotB -= name_entry_size;
4038 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
4039 {
4040 condition = locals[GET2(slotB, 0) << 1] != no_capture;
4041 if (condition) break;
4042 }
4043 else break;
4044 }
4045
4046 /* Scan up for duplicates */
4047 if (!condition)
4048 {
4049 slotB = slotA;
4050 for (i++; i < name_count; i++)
4051 {
4052 slotB += name_entry_size;
4053 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
4054 {
4055 condition = locals[GET2(slotB, 0) << 1] != no_capture;
4056 if (condition) break;
4057 }
4058 else break;
4059 }
4060 }
4061 }
4062 return condition;
4063 }
4064
4065 static sljit_w SLJIT_CALL do_searchgroups(sljit_w recno, sljit_w* locals, pcre_uchar *name_table)
4066 {
4067 int condition = FALSE;
4068 pcre_uchar *slotA = name_table;
4069 pcre_uchar *slotB;
4070 sljit_w name_count = locals[LOCALS0 / sizeof(sljit_w)];
4071 sljit_w name_entry_size = locals[LOCALS1 / sizeof(sljit_w)];
4072 sljit_w group_num = locals[POSSESSIVE0 / sizeof(sljit_w)];
4073 int i;
4074
4075 for (i = 0; i < name_count; i++)
4076 {
4077 if (GET2(slotA, 0) == recno) break;
4078 slotA += name_entry_size;
4079 }
4080
4081 if (i < name_count)
4082 {
4083 /* Found a name for the number - there can be only one; duplicate
4084 names for different numbers are allowed, but not vice versa. First
4085 scan down for duplicates. */
4086
4087 slotB = slotA;
4088 while (slotB > name_table)
4089 {
4090 slotB -= name_entry_size;
4091 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
4092 {
4093 condition = GET2(slotB, 0) == group_num;
4094 if (condition) break;
4095 }
4096 else break;
4097 }
4098
4099 /* Scan up for duplicates */
4100 if (!condition)
4101 {
4102 slotB = slotA;
4103 for (i++; i < name_count; i++)
4104 {
4105 slotB += name_entry_size;
4106 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
4107 {
4108 condition = GET2(slotB, 0) == group_num;
4109 if (condition) break;
4110 }
4111 else break;
4112 }
4113 }
4114 }
4115 return condition;
4116 }
4117
4118 /*
4119 Handling bracketed expressions is probably the most complex part.
4120
4121 Stack layout naming characters:
4122 S - Push the current STR_PTR
4123 0 - Push a 0 (NULL)
4124 A - Push the current STR_PTR. Needed for restoring the STR_PTR
4125 before the next alternative. Not pushed if there are no alternatives.
4126 M - Any values pushed by the current alternative. Can be empty, or anything.
4127 C - Push the previous OVECTOR(i), OVECTOR(i+1) and OVECTOR_PRIV(i) to the stack.
4128 L - Push the previous local (pointed by localptr) to the stack
4129 () - opional values stored on the stack
4130 ()* - optonal, can be stored multiple times
4131
4132 The following list shows the regular expression templates, their PCRE byte codes
4133 and stack layout supported by pcre-sljit.
4134
4135 (?:) OP_BRA | OP_KET A M
4136 () OP_CBRA | OP_KET C M
4137 (?:)+ OP_BRA | OP_KETRMAX 0 A M S ( A M S )*
4138 OP_SBRA | OP_KETRMAX 0 L M S ( L M S )*
4139 (?:)+? OP_BRA | OP_KETRMIN 0 A M S ( A M S )*
4140 OP_SBRA | OP_KETRMIN 0 L M S ( L M S )*
4141 ()+ OP_CBRA | OP_KETRMAX 0 C M S ( C M S )*
4142 OP_SCBRA | OP_KETRMAX 0 C M S ( C M S )*
4143 ()+? OP_CBRA | OP_KETRMIN 0 C M S ( C M S )*
4144 OP_SCBRA | OP_KETRMIN 0 C M S ( C M S )*
4145 (?:)? OP_BRAZERO | OP_BRA | OP_KET S ( A M 0 )
4146 (?:)?? OP_BRAMINZERO | OP_BRA | OP_KET S ( A M 0 )
4147 ()? OP_BRAZERO | OP_CBRA | OP_KET S ( C M 0 )
4148 ()?? OP_BRAMINZERO | OP_CBRA | OP_KET S ( C M 0 )
4149 (?:)* OP_BRAZERO | OP_BRA | OP_KETRMAX S 0 ( A M S )*
4150 OP_BRAZERO | OP_SBRA | OP_KETRMAX S 0 ( L M S )*
4151 (?:)*? OP_BRAMINZERO | OP_BRA | OP_KETRMIN S 0 ( A M S )*
4152 OP_BRAMINZERO | OP_SBRA | OP_KETRMIN S 0 ( L M S )*
4153 ()* OP_BRAZERO | OP_CBRA | OP_KETRMAX S 0 ( C M S )*
4154 OP_BRAZERO | OP_SCBRA | OP_KETRMAX S 0 ( C M S )*
4155 ()*? OP_BRAMINZERO | OP_CBRA | OP_KETRMIN S 0 ( C M S )*
4156 OP_BRAMINZERO | OP_SCBRA | OP_KETRMIN S 0 ( C M S )*
4157
4158
4159 Stack layout naming characters:
4160 A - Push the alternative index (starting from 0) on the stack.
4161 Not pushed if there is no alternatives.
4162 M - Any values pushed by the current alternative. Can be empty, or anything.
4163
4164 The next list shows the possible content of a bracket:
4165 (|) OP_*BRA | OP_ALT ... M A
4166 (?()|) OP_*COND | OP_ALT M A
4167 (?>|) OP_ONCE | OP_ALT ... [stack trace] M A
4168 (?>|) OP_ONCE_NC | OP_ALT ... [stack trace] M A
4169 Or nothing, if trace is unnecessary
4170 */
4171
4172 static pcre_uchar *compile_bracket_hotpath(compiler_common *common, pcre_uchar *cc, fallback_common *parent)
4173 {
4174 DEFINE_COMPILER;
4175 fallback_common *fallback;
4176 pcre_uchar opcode;
4177 int localptr = 0;
4178 int offset = 0;
4179 int stacksize;
4180 pcre_uchar *ccbegin;
4181 pcre_uchar *hotpath;
4182 pcre_uchar bra = OP_BRA;
4183 pcre_uchar ket;
4184 assert_fallback *assert;
4185 BOOL has_alternatives;
4186 struct sljit_jump *jump;
4187 struct sljit_jump *skip;
4188 struct sljit_label *rmaxlabel = NULL;
4189 struct sljit_jump *braminzerojump = NULL;
4190
4191 PUSH_FALLBACK(sizeof(bracket_fallback), cc, NULL);
4192
4193 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
4194 {
4195 bra = *cc;
4196 cc++;
4197 opcode = *cc;
4198 }
4199
4200 opcode = *cc;
4201 ccbegin = cc;
4202 hotpath = ccbegin + 1 + LINK_SIZE;
4203
4204 if ((opcode == OP_COND || opcode == OP_SCOND) && cc[1 + LINK_SIZE] == OP_DEF)
4205 {
4206 /* Drop this bracket_fallback. */
4207 parent->top = fallback->prev;
4208 return bracketend(cc);
4209 }
4210
4211 ket = *(bracketend(cc) - 1 - LINK_SIZE);
4212 SLJIT_ASSERT(ket == OP_KET || ket == OP_KETRMAX || ket == OP_KETRMIN);
4213 SLJIT_ASSERT(!((bra == OP_BRAZERO && ket == OP_KETRMIN) || (bra == OP_BRAMINZERO && ket == OP_KETRMAX)));
4214 cc += GET(cc, 1);
4215
4216 has_alternatives = *cc == OP_ALT;
4217 if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
4218 {
4219 has_alternatives = (*hotpath == OP_RREF) ? FALSE : TRUE;
4220 if (*hotpath == OP_NRREF)
4221 {
4222 stacksize = GET2(hotpath, 1);
4223 if (common->currententry == NULL || stacksize == RREF_ANY)
4224 has_alternatives = FALSE;
4225 else if (common->currententry->start == 0)
4226 has_alternatives = stacksize != 0;
4227 else
4228 has_alternatives = stacksize != GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
4229 }
4230 }
4231
4232 if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
4233 opcode = OP_SCOND;
4234 if (SLJIT_UNLIKELY(opcode == OP_ONCE_NC))
4235 opcode = OP_ONCE;
4236
4237 if (opcode == OP_CBRA || opcode == OP_SCBRA)
4238 {
4239 /* Capturing brackets has a pre-allocated space. */
4240 offset = GET2(ccbegin, 1 + LINK_SIZE);
4241 localptr = OVECTOR_PRIV(offset);
4242 offset <<= 1;
4243 FALLBACK_AS(bracket_fallback)->localptr = localptr;
4244 hotpath += IMM2_SIZE;
4245 }
4246 else if (opcode == OP_ONCE || opcode == OP_SBRA || opcode == OP_SCOND)
4247 {
4248 /* Other brackets simply allocate the next entry. */
4249 localptr = PRIV_DATA(ccbegin);
4250 SLJIT_ASSERT(localptr != 0);
4251 FALLBACK_AS(bracket_fallback)->localptr = localptr;
4252 if (opcode == OP_ONCE)
4253 FALLBACK_AS(bracket_fallback)->u.framesize = get_framesize(common, ccbegin, FALSE);
4254 }
4255
4256 /* Instructions before the first alternative. */
4257 stacksize = 0;
4258 if ((ket == OP_KETRMAX) || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
4259 stacksize++;
4260 if (bra == OP_BRAZERO)
4261 stacksize++;
4262
4263 if (stacksize > 0)
4264 allocate_stack(common, stacksize);
4265
4266 stacksize = 0;
4267 if ((ket == OP_KETRMAX) || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
4268 {
4269 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
4270 stacksize++;
4271 }
4272
4273 if (bra == OP_BRAZERO)
4274 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
4275
4276 if (bra == OP_BRAMINZERO)
4277 {
4278 /* This is a fallback path! (Since the hot-path of OP_BRAMINZERO matches to the empty string) */
4279 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
4280 if (ket != OP_KETRMIN)
4281 {
4282 free_stack(common, 1);
4283 braminzerojump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
4284 }
4285 else
4286 {
4287 if (opcode == OP_ONCE || opcode >= OP_SBRA)
4288 {
4289 jump = CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
4290 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
4291 /* Nothing stored during the first run. */
4292 skip = JUMP(SLJIT_JUMP);
4293 JUMPHERE(jump);
4294 /* Checking zero-length iteration. */
4295 if (opcode != OP_ONCE || FALLBACK_AS(bracket_fallback)->u.framesize < 0)
4296 {
4297 /* When we come from outside, localptr contains the previous STR_PTR. */
4298 braminzerojump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4299 }
4300 else
4301 {
4302 /* Except when the whole stack frame must be saved. */
4303 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4304 braminzerojump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), (FALLBACK_AS(bracket_fallback)->u.framesize + 1) * sizeof(sljit_w));
4305 }
4306 JUMPHERE(skip);
4307 }
4308 else
4309 {
4310 jump = CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
4311 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
4312 JUMPHERE(jump);
4313 }
4314 }
4315 }
4316
4317 if (ket == OP_KETRMIN)
4318 FALLBACK_AS(bracket_fallback)->recursivehotpath = LABEL();
4319
4320 if (ket == OP_KETRMAX)
4321 {
4322 rmaxlabel = LABEL();
4323 if (has_alternatives && opcode != OP_ONCE && opcode < OP_SBRA)
4324 FALLBACK_AS(bracket_fallback)->althotpath = rmaxlabel;
4325 }
4326
4327 /* Handling capturing brackets and alternatives. */
4328 if (opcode == OP_ONCE)
4329 {
4330 if (FALLBACK_AS(bracket_fallback)->u.framesize < 0)
4331 {
4332 /* Neither capturing brackets nor recursions are not found in the block. */
4333 if (ket == OP_KETRMIN)
4334 {
4335 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4336 allocate_stack(common, 2);
4337 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4338 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
4339 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
4340 }
4341 else if (ket == OP_KETRMAX || has_alternatives)
4342 {
4343 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STACK_TOP, 0);
4344 allocate_stack(common, 1);
4345 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4346 }
4347 else
4348 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STACK_TOP, 0);
4349 }
4350 else
4351 {
4352 if (ket == OP_KETRMIN || ket == OP_KETRMAX || has_alternatives)
4353 {
4354 allocate_stack(common, FALLBACK_AS(bracket_fallback)->u.framesize + 2);
4355 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4356 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(FALLBACK_AS(bracket_fallback)->u.framesize + 1));
4357 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4358 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, TMP2, 0);
4359 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
4360 init_frame(common, ccbegin, FALLBACK_AS(bracket_fallback)->u.framesize + 1, 2, FALSE);
4361 }
4362 else
4363 {
4364 allocate_stack(common, FALLBACK_AS(bracket_fallback)->u.framesize + 1);
4365 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4366 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(FALLBACK_AS(bracket_fallback)->u.framesize));
4367 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, TMP2, 0);
4368 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
4369 init_frame(common, ccbegin, FALLBACK_AS(bracket_fallback)->u.framesize, 1, FALSE);
4370 }
4371 }
4372 }
4373 else if (opcode == OP_CBRA || opcode == OP_SCBRA)
4374 {
4375 /* Saving the previous values. */
4376 allocate_stack(common, 3);
4377 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
4378 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
4379 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
4380 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
4381 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4382 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STR_PTR, 0);
4383 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
4384 }
4385 else if (opcode == OP_SBRA || opcode == OP_SCOND)
4386 {
4387 /* Saving the previous value. */
4388 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4389 allocate_stack(common, 1);
4390 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STR_PTR, 0);
4391 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
4392 }
4393 else if (has_alternatives)
4394 {
4395 /* Pushing the starting string pointer. */
4396 allocate_stack(common, 1);
4397 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4398 }
4399
4400 /* Generating code for the first alternative. */
4401 if (opcode == OP_COND || opcode == OP_SCOND)
4402 {
4403 if (*hotpath == OP_CREF)
4404 {
4405 SLJIT_ASSERT(has_alternatives);
4406 add_jump(compiler, &(FALLBACK_AS(bracket_fallback)->u.condfailed),
4407 CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(GET2(hotpath, 1) << 1), SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
4408 hotpath += 1 + IMM2_SIZE;
4409 }
4410 else if (*hotpath == OP_NCREF)
4411 {
4412 SLJIT_ASSERT(has_alternatives);
4413 stacksize = GET2(hotpath, 1);
4414 jump = CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(stacksize << 1), SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
4415
4416 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STACK_TOP, 0);
4417 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, SLJIT_IMM, common->name_count);
4418 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, common->name_entry_size);
4419 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG1, 0, SLJIT_IMM, stacksize);
4420 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG2, 0, SLJIT_LOCALS_REG, 0);
4421 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, common->name_table);
4422 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_searchovector));
4423 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);
4424 add_jump(compiler, &(FALLBACK_AS(bracket_fallback)->u.condfailed), CMP(SLJIT_C_EQUAL, SLJIT_TEMPORARY_REG1, 0, SLJIT_IMM, 0));
4425
4426 JUMPHERE(jump);
4427 hotpath += 1 + IMM2_SIZE;
4428 }
4429 else if (*hotpath == OP_RREF || *hotpath == OP_NRREF)
4430 {
4431 /* Never has other case. */
4432 FALLBACK_AS(bracket_fallback)->u.condfailed = NULL;
4433
4434 stacksize = GET2(hotpath, 1);
4435 if (common->currententry == NULL)
4436 stacksize = 0;
4437 else if (stacksize == RREF_ANY)
4438 stacksize = 1;
4439 else if (common->currententry->start == 0)
4440 stacksize = stacksize == 0;
4441 else
4442 stacksize = stacksize == GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
4443
4444 if (*hotpath == OP_RREF || stacksize || common->currententry == NULL)
4445 {
4446 SLJIT_ASSERT(!has_alternatives);
4447 if (stacksize != 0)
4448 hotpath += 1 + IMM2_SIZE;
4449 else
4450 {
4451 if (*cc == OP_ALT)
4452 {
4453 hotpath = cc + 1 + LINK_SIZE;
4454 cc += GET(cc, 1);
4455 }
4456 else
4457 hotpath = cc;
4458 }
4459 }
4460 else
4461 {
4462 SLJIT_ASSERT(has_alternatives);
4463
4464 stacksize = GET2(hotpath, 1);
4465 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STACK_TOP, 0);
4466 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, SLJIT_IMM, common->name_count);
4467 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, common->name_entry_size);
4468 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, GET2(common->start, common->currententry->start + 1 + LINK_SIZE));
4469 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG1, 0, SLJIT_IMM, stacksize);
4470 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG2, 0, SLJIT_LOCALS_REG, 0);
4471 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, common->name_table);
4472 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_searchgroups));
4473 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);
4474 add_jump(compiler, &(FALLBACK_AS(bracket_fallback)->u.condfailed), CMP(SLJIT_C_EQUAL, SLJIT_TEMPORARY_REG1, 0, SLJIT_IMM, 0));
4475 hotpath += 1 + IMM2_SIZE;
4476 }
4477 }
4478 else
4479 {
4480 SLJIT_ASSERT(has_alternatives && *hotpath >= OP_ASSERT && *hotpath <= OP_ASSERTBACK_NOT);
4481 /* Similar code as PUSH_FALLBACK macro. */
4482 assert = sljit_alloc_memory(compiler, sizeof(assert_fallback));
4483 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
4484 return NULL;
4485 memset(assert, 0, sizeof(assert_fallback));
4486 assert->common.cc = hotpath;
4487 FALLBACK_AS(bracket_fallback)->u.assert = assert;
4488 hotpath = compile_assert_hotpath(common, hotpath, assert, TRUE);
4489 }
4490 }
4491
4492 compile_hotpath(common, hotpath, cc, fallback);
4493 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
4494 return NULL;
4495
4496 if (opcode == OP_ONCE)
4497 {
4498 if (FALLBACK_AS(bracket_fallback)->u.framesize < 0)
4499 {
4500 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4501 /* TMP2 which is set here used by OP_KETRMAX below. */
4502 if (ket == OP_KETRMAX)
4503 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), 0);
4504 else if (ket == OP_KETRMIN)
4505 {
4506 /* Move the STR_PTR to the localptr. */
4507 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_MEM1(STACK_TOP), 0);
4508 }
4509 }
4510 else
4511 {
4512 stacksize = (ket == OP_KETRMIN || ket == OP_KETRMAX || has_alternatives) ? 2 : 1;
4513 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_IMM, (FALLBACK_AS(bracket_fallback)->u.framesize + stacksize) * sizeof(sljit_w));
4514 if (ket == OP_KETRMAX)
4515 {
4516 /* TMP2 which is set here used by OP_KETRMAX below. */
4517 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
4518 }
4519 }
4520 }
4521
4522 stacksize = 0;
4523 if (ket != OP_KET || bra != OP_BRA)
4524 stacksize++;
4525 if (has_alternatives && opcode != OP_ONCE)
4526 stacksize++;
4527
4528 if (stacksize > 0)
4529 allocate_stack(common, stacksize);
4530
4531 stacksize = 0;
4532 if (ket != OP_KET)
4533 {
4534 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
4535 stacksize++;
4536 }
4537 else if (bra != OP_BRA)
4538 {
4539 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
4540 stacksize++;
4541 }
4542
4543 if (has_alternatives)
4544 {
4545 if (opcode != OP_ONCE)
4546 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
4547 if (ket != OP_KETRMAX)
4548 FALLBACK_AS(bracket_fallback)->althotpath = LABEL();
4549 }
4550
4551 /* Must be after the hotpath label. */
4552 if (offset != 0)
4553 {
4554 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4555 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
4556 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 0), TMP1, 0);
4557 }
4558
4559 if (ket == OP_KETRMAX)
4560 {
4561 if (opcode == OP_ONCE || opcode >= OP_SBRA)
4562 {
4563 if (has_alternatives)
4564 FALLBACK_AS(bracket_fallback)->althotpath = LABEL();
4565 /* Checking zero-length iteration. */
4566 if (opcode != OP_ONCE)
4567 CMPTO(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STR_PTR, 0, rmaxlabel);
4568 else
4569 /* TMP2 must contain the starting STR_PTR. */
4570 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0, rmaxlabel);
4571 }
4572 else
4573 JUMPTO(SLJIT_JUMP, rmaxlabel);
4574 FALLBACK_AS(bracket_fallback)->recursivehotpath = LABEL();
4575 }
4576
4577 if (bra == OP_BRAZERO)
4578 FALLBACK_AS(bracket_fallback)->zerohotpath = LABEL();
4579
4580 if (bra == OP_BRAMINZERO)
4581 {
4582 /* This is a fallback path! (From the viewpoint of OP_BRAMINZERO) */
4583 JUMPTO(SLJIT_JUMP, ((braminzero_fallback*)parent)->hotpath);
4584 if (braminzerojump != NULL)
4585 {
4586 JUMPHERE(braminzerojump);
4587 /* We need to release the end pointer to perform the
4588 fallback for the zero-length iteration. When
4589 framesize is < 0, OP_ONCE will do the release itself. */
4590 if (opcode == OP_ONCE && FALLBACK_AS(bracket_fallback)->u.framesize >= 0)
4591 {
4592 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4593 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
4594 }
4595 else if (ket == OP_KETRMIN && opcode != OP_ONCE)
4596 free_stack(common, 1);
4597 }
4598 /* Continue to the normal fallback. */
4599 }
4600
4601 if ((ket != OP_KET && bra != OP_BRAMINZERO) || bra == OP_BRAZERO)
4602 decrease_call_count(common);
4603
4604 /* Skip the other alternatives. */
4605 while (*cc == OP_ALT)
4606 cc += GET(cc, 1);
4607 cc += 1 + LINK_SIZE;
4608 return cc;
4609 }
4610
4611 static pcre_uchar *compile_bracketpos_hotpath(compiler_common *common, pcre_uchar *cc, fallback_common *parent)
4612 {
4613 DEFINE_COMPILER;
4614 fallback_common *fallback;
4615 pcre_uchar opcode;
4616 int localptr;
4617 int cbraprivptr = 0;
4618 int framesize;
4619 int stacksize;
4620 int offset = 0;
4621 BOOL zero = FALSE;
4622 pcre_uchar *ccbegin = NULL;
4623 int stack;
4624 struct sljit_label *loop = NULL;
4625 struct jump_list *emptymatch = NULL;
4626
4627 PUSH_FALLBACK(sizeof(bracketpos_fallback), cc, NULL);
4628 if (*cc == OP_BRAPOSZERO)
4629 {
4630 zero = TRUE;
4631 cc++;
4632 }
4633
4634 opcode = *cc;
4635 localptr = PRIV_DATA(cc);
4636 SLJIT_ASSERT(localptr != 0);
4637 FALLBACK_AS(bracketpos_fallback)->localptr = localptr;
4638 switch(opcode)
4639 {
4640 case OP_BRAPOS:
4641 case OP_SBRAPOS:
4642 ccbegin = cc + 1 + LINK_SIZE;
4643 break;
4644
4645 case OP_CBRAPOS:
4646 case OP_SCBRAPOS:
4647 offset = GET2(cc, 1 + LINK_SIZE);
4648 cbraprivptr = OVECTOR_PRIV(offset);
4649 offset <<= 1;
4650 ccbegin = cc + 1 + LINK_SIZE + IMM2_SIZE;
4651 break;
4652
4653 default:
4654 SLJIT_ASSERT_STOP();
4655 break;
4656 }
4657
4658 framesize = get_framesize(common, cc, FALSE);
4659 FALLBACK_AS(bracketpos_fallback)->framesize = framesize;
4660 if (framesize < 0)
4661 {
4662 stacksize = (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS) ? 2 : 1;
4663 if (!zero)
4664 stacksize++;
4665 FALLBACK_AS(bracketpos_fallback)->stacksize = stacksize;
4666 allocate_stack(common, stacksize);
4667 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STACK_TOP, 0);
4668
4669 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
4670 {
4671 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
4672 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
4673 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
4674 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
4675 }
4676 else
4677 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4678
4679 if (!zero)
4680 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 1);
4681 }
4682 else
4683 {
4684 stacksize = framesize + 1;
4685 if (!zero)
4686 stacksize++;
4687 if (opcode == OP_BRAPOS || opcode == OP_SBRAPOS)
4688 stacksize++;
4689 FALLBACK_AS(bracketpos_fallback)->stacksize = stacksize;
4690 allocate_stack(common, stacksize);
4691
4692 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4693 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(stacksize - 1));
4694 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, TMP2, 0);
4695 stack = 0;
4696 if (!zero)
4697 {
4698 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 1);
4699 stack++;
4700 }
4701 if (opcode == OP_BRAPOS || opcode == OP_SBRAPOS)
4702 {
4703 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), STR_PTR, 0);
4704 stack++;
4705 }
4706 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP1, 0);
4707 init_frame(common, cc, stacksize - 1, stacksize - framesize, FALSE);
4708 }
4709
4710 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
4711 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr, STR_PTR, 0);
4712
4713 loop = LABEL();
4714 while (*cc != OP_KETRPOS)
4715 {
4716 fallback->top = NULL;
4717 fallback->topfallbacks = NULL;
4718 cc += GET(cc, 1);
4719
4720 compile_hotpath(common, ccbegin, cc, fallback);
4721 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
4722 return NULL;
4723
4724 if (framesize < 0)
4725 {
4726 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4727
4728 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
4729 {
4730 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
4731 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
4732 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr, STR_PTR, 0);
4733 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
4734 }
4735 else
4736 {
4737 if (opcode == OP_SBRAPOS)
4738 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
4739 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4740 }
4741
4742 if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
4743 add_jump(compiler, &emptymatch, CMP(SLJIT_C_EQUAL, TMP1, 0, STR_PTR, 0));
4744
4745 if (!zero)
4746 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
4747 }
4748 else
4749 {
4750 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
4751 {
4752 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_IMM, stacksize * sizeof(sljit_w));
4753 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
4754 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
4755 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr, STR_PTR, 0);
4756 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
4757 }
4758 else
4759 {
4760 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4761 OP2(SLJIT_ADD, STACK_TOP, 0, TMP2, 0, SLJIT_IMM, stacksize * sizeof(sljit_w));
4762 if (opcode == OP_SBRAPOS)
4763 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_w));
4764 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_w), STR_PTR, 0);
4765 }
4766
4767 if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
4768 add_jump(compiler, &emptymatch, CMP(SLJIT_C_EQUAL, TMP1, 0, STR_PTR, 0));
4769
4770 if (!zero)
4771 {
4772 if (framesize < 0)
4773 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
4774 else
4775 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
4776 }
4777 }
4778 JUMPTO(SLJIT_JUMP, loop);
4779 flush_stubs(common);
4780
4781 compile_fallbackpath(common, fallback->top);
4782 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
4783 return NULL;
4784 set_jumps(fallback->topfallbacks, LABEL());
4785
4786 if (framesize < 0)
4787 {
4788 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
4789 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
4790 else
4791 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
4792 }
4793 else
4794 {
4795 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
4796 {
4797 /* Last alternative. */
4798 if (*cc == OP_KETRPOS)
4799 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4800 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
4801 }
4802 else
4803 {
4804 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4805 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_w));
4806 }
4807 }
4808
4809 if (*cc == OP_KETRPOS)
4810 break;
4811 ccbegin = cc + 1 + LINK_SIZE;
4812 }
4813
4814 fallback->topfallbacks = NULL;
4815 if (!zero)
4816 {
4817 if (framesize < 0)
4818 add_jump(compiler, &fallback->topfallbacks, CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0));
4819 else /* TMP2 is set to [localptr] above. */
4820 add_jump(compiler, &fallback->topfallbacks, CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(TMP2), (stacksize - 1) * sizeof(sljit_w), SLJIT_IMM, 0));
4821 }
4822
4823 /* None of them matched. */
4824 set_jumps(emptymatch, LABEL());
4825 decrease_call_count(common);
4826 return cc + 1 + LINK_SIZE;
4827 }
4828
4829 static SLJIT_INLINE pcre_uchar *get_iterator_parameters(compiler_common *common, pcre_uchar *cc, pcre_uchar *opcode, pcre_uchar *type, int *arg1, int *arg2, pcre_uchar **end)
4830 {
4831 int class_len;
4832
4833 *opcode = *cc;
4834 if (*opcode >= OP_STAR && *opcode <= OP_POSUPTO)
4835 {
4836 cc++;
4837 *type = OP_CHAR;
4838 }
4839 else if (*opcode >= OP_STARI && *opcode <= OP_POSUPTOI)
4840 {
4841 cc++;
4842 *type = OP_CHARI;
4843 *opcode -= OP_STARI - OP_STAR;
4844 }
4845 else if (*opcode >= OP_NOTSTAR && *opcode <= OP_NOTPOSUPTO)
4846 {
4847 cc++;
4848 *type = OP_NOT;
4849 *opcode -= OP_NOTSTAR - OP_STAR;
4850 }
4851 else if (*opcode >= OP_NOTSTARI && *opcode <= OP_NOTPOSUPTOI)
4852 {
4853 cc++;
4854 *type = OP_NOTI;
4855 *opcode -= OP_NOTSTARI - OP_STAR;
4856 }
4857 else if (*opcode >= OP_TYPESTAR && *opcode <= OP_TYPEPOSUPTO)
4858 {
4859 cc++;
4860 *opcode -= OP_TYPESTAR - OP_STAR;
4861 *type = 0;
4862 }
4863 else
4864 {
4865 SLJIT_ASSERT(*opcode >= OP_CLASS || *opcode <= OP_XCLASS);
4866 *type = *opcode;
4867 cc++;
4868 class_len = (*type < OP_XCLASS) ? (1 + (32 / sizeof(pcre_uchar))) : GET(cc, 0);
4869 *opcode = cc[class_len - 1];
4870 if (*opcode >= OP_CRSTAR && *opcode <= OP_CRMINQUERY)
4871 {
4872 *opcode -= OP_CRSTAR - OP_STAR;
4873 if (end != NULL)
4874 *end = cc + class_len;
4875 }
4876 else
4877 {
4878 SLJIT_ASSERT(*opcode == OP_CRRANGE || *opcode == OP_CRMINRANGE);
4879 *arg1 = GET2(cc, (class_len + IMM2_SIZE));
4880 *arg2 = GET2(cc, class_len);
4881
4882 if (*arg2 == 0)
4883 {
4884 SLJIT_ASSERT(*arg1 != 0);
4885 *opcode = (*opcode == OP_CRRANGE) ? OP_UPTO : OP_MINUPTO;
4886 }
4887 if (*arg1 == *arg2)
4888 *opcode = OP_EXACT;
4889
4890 if (end != NULL)
4891 *end = cc + class_len + 2 * IMM2_SIZE;
4892 }
4893 return cc;
4894 }
4895
4896 if (*opcode == OP_UPTO || *opcode == OP_MINUPTO || *opcode == OP_EXACT || *opcode == OP_POSUPTO)
4897 {
4898 *arg1 = GET2(cc, 0);
4899 cc += IMM2_SIZE;
4900 }
4901
4902 if (*type == 0)
4903 {
4904 *type = *cc;
4905 if (end != NULL)
4906 *end = next_opcode(common, cc);
4907 cc++;
4908 return cc;
4909 }
4910
4911 if (end != NULL)
4912 {
4913 *end = cc + 1;
4914 #ifdef SUPPORT_UTF
4915 if (common->utf && HAS_EXTRALEN(*cc)) *end += GET_EXTRALEN(*cc);
4916 #endif
4917 }
4918 return cc;
4919 }
4920
4921 static pcre_uchar *compile_iterator_hotpath(compiler_common *common, pcre_uchar *cc, fallback_common *parent)
4922 {
4923 DEFINE_COMPILER;
4924 fallback_common *fallback;
4925 pcre_uchar opcode;
4926 pcre_uchar type;
4927 int arg1 = -1, arg2 = -1;
4928 pcre_uchar* end;
4929 jump_list *nomatch = NULL;
4930 struct sljit_jump *jump = NULL;
4931 struct sljit_label *label;
4932
4933 PUSH_FALLBACK(sizeof(iterator_fallback), cc, NULL);
4934
4935 cc = get_iterator_parameters(common, cc, &opcode, &type, &arg1, &arg2, &end);
4936
4937 switch(opcode)
4938 {
4939 case OP_STAR:
4940 case OP_PLUS:
4941 case OP_UPTO:
4942 case OP_CRRANGE:
4943 if (type == OP_ANYNL || type == OP_EXTUNI)
4944 {
4945 if (opcode == OP_STAR || opcode == OP_UPTO)
4946 {
4947 allocate_stack(common, 2);
4948 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4949 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
4950 }
4951 else
4952 {
4953 allocate_stack(common, 1);
4954 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
4955 }
4956 if (opcode == OP_UPTO || opcode == OP_CRRANGE)
4957 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 0);
4958
4959 label = LABEL();
4960 compile_char1_hotpath(common, type, cc, &fallback->topfallbacks);
4961 if (opcode == OP_UPTO || opcode == OP_CRRANGE)
4962 {
4963 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
4964 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4965 if (opcode == OP_CRRANGE && arg2 > 0)
4966 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg2, label);
4967 if (opcode == OP_UPTO || (opcode == OP_CRRANGE && arg1 > 0))
4968 jump = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, arg1);
4969 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0);
4970 }
4971
4972 allocate_stack(common, 1);
4973 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4974 JUMPTO(SLJIT_JUMP, label);
4975 if (jump != NULL)
4976 JUMPHERE(jump);
4977 }
4978 else
4979 {
4980 allocate_stack(common, 2);
4981 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4982 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
4983 label = LABEL();
4984 compile_char1_hotpath(common, type, cc, &nomatch);
4985 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4986 if (opcode <= OP_PLUS || (opcode == OP_CRRANGE && arg1 == 0))
4987 {
4988 OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
4989 JUMPTO(SLJIT_JUMP, label);
4990 }
4991 else
4992 {
4993 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
4994 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4995 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
4996 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg1 + 1, label);
4997 }
4998 set_jumps(nomatch, LABEL());
4999 if (opcode == OP_PLUS || opcode == OP_CRRANGE)
5000 add_jump(compiler, &fallback->topfallbacks,
5001 CMP(SLJIT_C_LESS, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, opcode == OP_PLUS ? 2 : arg2 + 1));
5002 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5003 }
5004 FALLBACK_AS(iterator_fallback)->hotpath = LABEL();
5005 break;
5006
5007 case OP_MINSTAR:
5008 case OP_MINPLUS:
5009 allocate_stack(common, 1);
5010 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5011 if (opcode == OP_MINPLUS)
5012 add_jump(compiler, &fallback->topfallbacks, JUMP(SLJIT_JUMP));
5013 FALLBACK_AS(iterator_fallback)->hotpath = LABEL();
5014 break;
5015
5016 case OP_MINUPTO:
5017 case OP_CRMINRANGE:
5018 allocate_stack(common, 2);
5019 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5020 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
5021 if (opcode == OP_CRMINRANGE)
5022 add_jump(compiler, &fallback->topfallbacks, JUMP(SLJIT_JUMP));
5023 FALLBACK_AS(iterator_fallback)->hotpath = LABEL();
5024 break;
5025
5026 case OP_QUERY:
5027 case OP_MINQUERY:
5028 allocate_stack(common, 1);
5029 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5030 if (opcode == OP_QUERY)
5031 compile_char1_hotpath(common, type, cc, &fallback->topfallbacks);
5032 FALLBACK_AS(iterator_fallback)->hotpath = LABEL();
5033 break;
5034
5035 case OP_EXACT:
5036 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 1);
5037 label = LABEL();
5038 compile_char1_hotpath(common, type, cc, &fallback->topfallbacks);
5039 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
5040 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
5041 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0);
5042 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg1 + 1, label);
5043 break;
5044
5045 case OP_POSSTAR:
5046 case OP_POSPLUS:
5047 case OP_POSUPTO:
5048 if (opcode != OP_POSSTAR)
5049 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 1);
5050 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STR_PTR, 0);
5051 label = LABEL();
5052 compile_char1_hotpath(common, type, cc, &nomatch);
5053 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STR_PTR, 0);
5054 if (opcode != OP_POSUPTO)
5055 {
5056 if (opcode == OP_POSPLUS)
5057 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 2);
5058 JUMPTO(SLJIT_JUMP, label);
5059 }
5060 else
5061 {
5062 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
5063 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
5064 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0);
5065 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg1 + 1, label);
5066 }
5067 set_jumps(nomatch, LABEL());
5068 if (opcode == OP_POSPLUS)
5069 add_jump(compiler, &fallback->topfallbacks, CMP(SLJIT_C_LESS, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 2));
5070 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);
5071 break;
5072
5073 case OP_POSQUERY:
5074 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STR_PTR, 0);
5075 compile_char1_hotpath(common, type, cc, &nomatch);
5076 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STR_PTR, 0);
5077 set_jumps(nomatch, LABEL());
5078 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);
5079 break;
5080
5081 default:
5082 SLJIT_ASSERT_STOP();
5083 break;
5084 }
5085
5086 decrease_call_count(common);
5087 return end;
5088 }
5089
5090 static SLJIT_INLINE pcre_uchar *compile_fail_accept_hotpath(compiler_common *common, pcre_uchar *cc, fallback_common *parent)
5091 {
5092 DEFINE_COMPILER;
5093 fallback_common *fallback;
5094
5095 PUSH_FALLBACK(sizeof(bracket_fallback), cc, NULL);
5096
5097 if (*cc == OP_FAIL)
5098 {
5099 add_jump(compiler, &fallback->topfallbacks, JUMP(SLJIT_JUMP));
5100 return cc + 1;
5101 }
5102
5103 if (*cc == OP_ASSERT_ACCEPT || common->currententry != NULL)
5104 {
5105 /* No need to check notempty conditions. */
5106 if (common->acceptlabel == NULL)
5107 add_jump(compiler, &common->accept, JUMP(SLJIT_JUMP));
5108 else
5109 JUMPTO(SLJIT_JUMP, common->acceptlabel);
5110 return cc + 1;
5111 }
5112
5113 if (common->acceptlabel == NULL)
5114 add_jump(compiler, &common->accept, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0)));
5115 else
5116 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), common->acceptlabel);
5117 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5118 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty));
5119 add_jump(compiler, &fallback->topfallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5120 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty_atstart));
5121 if (common->acceptlabel == NULL)
5122 add_jump(compiler, &common->accept, CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5123 else
5124 CMPTO(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0, common->acceptlabel);
5125 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
5126 if (common->acceptlabel == NULL)
5127 add_jump(compiler, &common->accept, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0));
5128 else
5129 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0, common->acceptlabel);
5130 add_jump(compiler, &fallback->topfallbacks, JUMP(SLJIT_JUMP));
5131 return cc + 1;
5132 }
5133
5134 static SLJIT_INLINE pcre_uchar *compile_close_hotpath(compiler_common *common, pcre_uchar *cc)
5135 {
5136 DEFINE_COMPILER;
5137 int offset = GET2(cc, 1);
5138
5139 /* Data will be discarded anyway... */
5140 if (common->currententry != NULL)
5141 return cc + 1 + IMM2_SIZE;
5142
5143 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR_PRIV(offset));
5144 offset <<= 1;
5145 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
5146 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
5147 return cc + 1 + IMM2_SIZE;
5148 }
5149
5150 static void compile_hotpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, fallback_common *parent)
5151 {
5152 DEFINE_COMPILER;
5153 fallback_common *fallback;
5154
5155 while (cc < ccend)
5156 {
5157 switch(*cc)
5158 {
5159 case OP_SOD:
5160 case OP_SOM:
5161 case OP_NOT_WORD_BOUNDARY:
5162 case OP_WORD_BOUNDARY:
5163 case OP_NOT_DIGIT:
5164 case OP_DIGIT:
5165 case OP_NOT_WHITESPACE:
5166 case OP_WHITESPACE:
5167 case OP_NOT_WORDCHAR:
5168 case OP_WORDCHAR:
5169 case OP_ANY:
5170 case OP_ALLANY:
5171 case OP_ANYBYTE:
5172 case OP_NOTPROP:
5173 case OP_PROP:
5174 case OP_ANYNL:
5175 case OP_NOT_HSPACE:
5176 case OP_HSPACE:
5177 case OP_NOT_VSPACE:
5178 case OP_VSPACE:
5179 case OP_EXTUNI:
5180 case OP_EODN:
5181 case OP_EOD:
5182 case OP_CIRC:
5183 case OP_CIRCM:
5184 case OP_DOLL:
5185 case OP_DOLLM:
5186 case OP_NOT:
5187 case OP_NOTI:
5188 case OP_REVERSE:
5189 cc = compile_char1_hotpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextfallbacks : &parent->topfallbacks);
5190 break;
5191
5192 case OP_SET_SOM:
5193 PUSH_FALLBACK_NOVALUE(sizeof(fallback_common), cc);
5194 allocate_stack(common, 1);
5195 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
5196 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), STR_PTR, 0);
5197 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
5198 cc++;
5199 break;
5200
5201 case OP_CHAR:
5202 case OP_CHARI:
5203 cc = compile_charn_hotpath(common, cc, ccend, parent->top != NULL ? &parent->top->nextfallbacks : &parent->topfallbacks);
5204 break;
5205
5206 case OP_STAR:
5207 case OP_MINSTAR:
5208 case OP_PLUS:
5209 case OP_MINPLUS:
5210 case OP_QUERY:
5211 case OP_MINQUERY:
5212 case OP_UPTO:
5213 case OP_MINUPTO:
5214 case OP_EXACT:
5215 case OP_POSSTAR:
5216 case OP_POSPLUS:
5217 case OP_POSQUERY:
5218 case OP_POSUPTO:
5219 case OP_STARI:
5220 case OP_MINSTARI:
5221 case OP_PLUSI:
5222 case OP_MINPLUSI:
5223 case OP_QUERYI:
5224 case OP_MINQUERYI:
5225 case OP_UPTOI:
5226 case OP_MINUPTOI:
5227 case OP_EXACTI:
5228 case OP_POSSTARI:
5229 case OP_POSPLUSI:
5230 case OP_POSQUERYI:
5231 case OP_POSUPTOI:
5232 case OP_NOTSTAR:
5233 case OP_NOTMINSTAR:
5234 case OP_NOTPLUS:
5235 case OP_NOTMINPLUS:
5236 case OP_NOTQUERY:
5237 case OP_NOTMINQUERY:
5238 case OP_NOTUPTO:
5239 case OP_NOTMINUPTO:
5240 case OP_NOTEXACT:
5241 case OP_NOTPOSSTAR:
5242 case OP_NOTPOSPLUS:
5243 case OP_NOTPOSQUERY:
5244 case OP_NOTPOSUPTO:
5245 case OP_NOTSTARI:
5246 case OP_NOTMINSTARI:
5247 case OP_NOTPLUSI:
5248 case OP_NOTMINPLUSI:
5249 case OP_NOTQUERYI:
5250 case OP_NOTMINQUERYI:
5251 case OP_NOTUPTOI:
5252 case OP_NOTMINUPTOI:
5253 case OP_NOTEXACTI:
5254 case OP_NOTPOSSTARI:
5255 case OP_NOTPOSPLUSI:
5256 case OP_NOTPOSQUERYI:
5257 case OP_NOTPOSUPTOI:
5258 case OP_TYPESTAR:
5259 case OP_TYPEMINSTAR:
5260 case OP_TYPEPLUS:
5261 case OP_TYPEMINPLUS:
5262 case OP_TYPEQUERY:
5263 case OP_TYPEMINQUERY:
5264 case OP_TYPEUPTO:
5265 case OP_TYPEMINUPTO:
5266 case OP_TYPEEXACT:
5267 case OP_TYPEPOSSTAR:
5268 case OP_TYPEPOSPLUS:
5269 case OP_TYPEPOSQUERY:
5270 case OP_TYPEPOSUPTO:
5271 cc = compile_iterator_hotpath(common, cc, parent);
5272 break;
5273
5274 case OP_CLASS:
5275 case OP_NCLASS:
5276 if (cc[1 + (32 / sizeof(pcre_uchar))] >= OP_CRSTAR && cc[1 + (32 / sizeof(pcre_uchar))] <= OP_CRMINRANGE)
5277 cc = compile_iterator_hotpath(common, cc, parent);
5278 else
5279 cc = compile_char1_hotpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextfallbacks : &parent->topfallbacks);
5280 break;
5281
5282 #if defined SUPPORT_UTF || defined COMPILE_PCRE16
5283 case OP_XCLASS:
5284 if (*(cc + GET(cc, 1)) >= OP_CRSTAR && *(cc + GET(cc, 1)) <= OP_CRMINRANGE)
5285 cc = compile_iterator_hotpath(common, cc, parent);
5286 else
5287 cc = compile_char1_hotpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextfallbacks : &parent->topfallbacks);
5288 break;
5289 #endif
5290
5291 case OP_REF:
5292 case OP_REFI:
5293 if (cc[1 + IMM2_SIZE] >= OP_CRSTAR && cc[1 + IMM2_SIZE] <= OP_CRMINRANGE)
5294 cc = compile_ref_iterator_hotpath(common, cc, parent);
5295 else
5296 cc = compile_ref_hotpath(common, cc, parent->top != NULL ? &parent->top->nextfallbacks : &parent->topfallbacks, TRUE, FALSE);
5297 break;
5298
5299 case OP_RECURSE:
5300 cc = compile_recurse_hotpath(common, cc, parent);
5301 break;
5302
5303 case OP_ASSERT:
5304 case OP_ASSERT_NOT:
5305 case OP_ASSERTBACK:
5306 case OP_ASSERTBACK_NOT:
5307 PUSH_FALLBACK_NOVALUE(sizeof(assert_fallback), cc);
5308 cc = compile_assert_hotpath(common, cc, FALLBACK_AS(assert_fallback), FALSE);
5309 break;
5310
5311 case OP_BRAMINZERO:
5312 PUSH_FALLBACK_NOVALUE(sizeof(braminzero_fallback), cc);
5313 cc = bracketend(cc + 1);
5314 if (*(cc - 1 - LINK_SIZE) != OP_KETRMIN)
5315 {
5316 allocate_stack(common, 1);
5317 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5318 }
5319 else
5320 {
5321 allocate_stack(common, 2);
5322 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5323 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), STR_PTR, 0);
5324 }
5325 FALLBACK_AS(braminzero_fallback)->hotpath = LABEL();
5326 if (cc[1] > OP_ASSERTBACK_NOT)
5327 decrease_call_count(common);
5328 break;
5329
5330 case OP_ONCE:
5331 case OP_ONCE_NC:
5332 case OP_BRA:
5333 case OP_CBRA:
5334 case OP_COND:
5335 case OP_SBRA:
5336 case OP_SCBRA:
5337 case OP_SCOND:
5338 cc = compile_bracket_hotpath(common, cc, parent);
5339 break;
5340
5341 case OP_BRAZERO:
5342 if (cc[1] > OP_ASSERTBACK_NOT)
5343 cc = compile_bracket_hotpath(common, cc, parent);
5344 else
5345 {
5346 PUSH_FALLBACK_NOVALUE(sizeof(assert_fallback), cc);
5347 cc = compile_assert_hotpath(common, cc, FALLBACK_AS(assert_fallback), FALSE);
5348 }
5349 break;
5350
5351 case OP_BRAPOS:
5352 case OP_CBRAPOS:
5353 case OP_SBRAPOS:
5354 case OP_SCBRAPOS:
5355 case OP_BRAPOSZERO:
5356 cc = compile_bracketpos_hotpath(common, cc, parent);
5357 break;
5358
5359 case OP_FAIL:
5360 case OP_ACCEPT:
5361 case OP_ASSERT_ACCEPT:
5362 cc = compile_fail_accept_hotpath(common, cc, parent);
5363 break;
5364
5365 case OP_CLOSE:
5366 cc = compile_close_hotpath(common, cc);
5367 break;
5368
5369 case OP_SKIPZERO:
5370 cc = bracketend(cc + 1);
5371 break;
5372
5373 default:
5374 SLJIT_ASSERT_STOP();
5375 return;
5376 }
5377 if (cc == NULL)
5378 return;
5379 }
5380 SLJIT_ASSERT(cc == ccend);
5381 }
5382
5383 #undef PUSH_FALLBACK
5384 #undef PUSH_FALLBACK_NOVALUE
5385 #undef FALLBACK_AS
5386
5387 #define COMPILE_FALLBACKPATH(current) \
5388 do \
5389 { \
5390 compile_fallbackpath(common, (current)); \
5391 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
5392 return; \
5393 } \
5394 while (0)
5395
5396 #define CURRENT_AS(type) ((type*)current)
5397
5398 static void compile_iterator_fallbackpath(compiler_common *common, struct fallback_common *current)
5399 {
5400 DEFINE_COMPILER;
5401 pcre_uchar *cc = current->cc;
5402 pcre_uchar opcode;
5403 pcre_uchar type;
5404 int arg1 = -1, arg2 = -1;
5405 struct sljit_label *label = NULL;
5406 struct sljit_jump *jump = NULL;
5407
5408 cc = get_iterator_parameters(common, cc, &opcode, &type, &arg1, &arg2, NULL);
5409
5410 switch(opcode)
5411 {
5412 case OP_STAR:
5413 case OP_PLUS:
5414 case OP_UPTO:
5415 case OP_CRRANGE:
5416 if (type == OP_ANYNL || type == OP_EXTUNI)
5417 {
5418 set_jumps(current->topfallbacks, LABEL());
5419 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5420 free_stack(common, 1);
5421 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_fallback)->hotpath);
5422 }
5423 else
5424 {
5425 if (opcode == OP_STAR || opcode == OP_UPTO)
5426 arg2 = 0;
5427 else if (opcode == OP_PLUS)
5428 arg2 = 1;
5429 jump = CMP(SLJIT_C_LESS_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, arg2 + 1);
5430 OP2(SLJIT_SUB, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
5431 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5432 skip_char_back(common);
5433 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5434 JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_fallback)->hotpath);
5435 if (opcode == OP_PLUS || opcode == OP_CRRANGE)
5436 set_jumps(current->topfallbacks, LABEL());
5437 JUMPHERE(jump);
5438 free_stack(common, 2);
5439 }
5440 break;
5441
5442 case OP_MINSTAR:
5443 case OP_MINPLUS:
5444 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5445 if (opcode == OP_MINPLUS)
5446 {
5447 set_jumps(current->topfallbacks, LABEL());
5448 current->topfallbacks = NULL;
5449 }
5450 compile_char1_hotpath(common, type, cc, &current->topfallbacks);
5451 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5452 JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_fallback)->hotpath);
5453 set_jumps(current->topfallbacks, LABEL());
5454 free_stack(common, 1);
5455 break;
5456
5457 case OP_MINUPTO:
5458 case OP_CRMINRANGE:
5459 if (opcode == OP_CRMINRANGE)
5460 {
5461 set_jumps(current->topfallbacks, LABEL());
5462 current->topfallbacks = NULL;
5463 label = LABEL();
5464 }
5465 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5466 compile_char1_hotpath(common, type, cc, &current->topfallbacks);
5467
5468 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5469 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5470 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
5471 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5472
5473 if (opcode == OP_CRMINRANGE)
5474 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg2 + 1, label);
5475
5476 if (opcode == OP_CRMINRANGE && arg1 == 0)
5477 JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_fallback)->hotpath);
5478 else
5479 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg1 + 2, CURRENT_AS(iterator_fallback)->hotpath);
5480
5481 set_jumps(current->topfallbacks, LABEL());
5482 free_stack(common, 2);
5483 break;
5484
5485 case OP_QUERY:
5486 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5487 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5488 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_fallback)->hotpath);
5489 jump = JUMP(SLJIT_JUMP);
5490 set_jumps(current->topfallbacks, LABEL());
5491 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5492 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5493 JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_fallback)->hotpath);
5494 JUMPHERE(jump);
5495 free_stack(common, 1);
5496 break;
5497
5498 case OP_MINQUERY:
5499 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5500 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5501 jump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
5502 compile_char1_hotpath(common, type, cc, &current->topfallbacks);
5503 JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_fallback)->hotpath);
5504 set_jumps(current->topfallbacks, LABEL());
5505 JUMPHERE(jump);
5506 free_stack(common, 1);
5507 break;
5508
5509 case OP_EXACT:
5510 case OP_POSPLUS:
5511 set_jumps(current->topfallbacks, LABEL());
5512 break;
5513
5514 case OP_POSSTAR:
5515 case OP_POSQUERY:
5516 case OP_POSUPTO:
5517 break;
5518
5519 default:
5520 SLJIT_ASSERT_STOP();
5521 break;
5522 }
5523 }
5524
5525 static void compile_ref_iterator_fallbackpath(compiler_common *common, struct fallback_common *current)
5526 {
5527 DEFINE_COMPILER;
5528 pcre_uchar *cc = current->cc;
5529 pcre_uchar type;
5530
5531 type = cc[1 + IMM2_SIZE];
5532 if ((type & 0x1) == 0)
5533 {
5534 set_jumps(current->topfallbacks, LABEL());
5535 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5536 free_stack(common, 1);
5537 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_fallback)->hotpath);
5538 return;
5539 }
5540
5541 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5542 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_fallback)->hotpath);
5543 set_jumps(current->topfallbacks, LABEL());
5544 free_stack(common, 2);
5545 }
5546
5547 static void compile_recurse_fallbackpath(compiler_common *common, struct fallback_common *current)
5548 {
5549 DEFINE_COMPILER;
5550
5551 set_jumps(current->topfallbacks, LABEL());
5552 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5553 free_stack(common, 1);
5554 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), TMP2, 0);
5555 }
5556
5557 static void compile_assert_fallbackpath(compiler_common *common, struct fallback_common *current)
5558 {
5559 DEFINE_COMPILER;
5560 pcre_uchar *cc = current->cc;
5561 pcre_uchar bra = OP_BRA;
5562 struct sljit_jump *brajump = NULL;
5563
5564 SLJIT_ASSERT(*cc != OP_BRAMINZERO);
5565 if (*cc == OP_BRAZERO)
5566 {
5567 bra = *cc;
5568 cc++;
5569 }
5570
5571 if (bra == OP_BRAZERO)
5572 {
5573 SLJIT_ASSERT(current->topfallbacks == NULL);
5574 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5575 }
5576
5577 if (CURRENT_AS(assert_fallback)->framesize < 0)
5578 {
5579 set_jumps(current->topfallbacks, LABEL());
5580
5581 if (bra == OP_BRAZERO)
5582 {
5583 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5584 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_fallback)->hotpath);
5585 free_stack(common, 1);
5586 }
5587 return;
5588 }
5589
5590 if (bra == OP_BRAZERO)
5591 {
5592 if (*cc == OP_ASSERT_NOT || *cc == OP_ASSERTBACK_NOT)
5593 {
5594 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5595 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_fallback)->hotpath);
5596 free_stack(common, 1);
5597 return;
5598 }
5599 free_stack(common, 1);
5600 brajump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
5601 }
5602
5603 if (*cc == OP_ASSERT || *cc == OP_ASSERTBACK)
5604 {
5605 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), CURRENT_AS(assert_fallback)->localptr);
5606 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
5607 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), CURRENT_AS(assert_fallback)->localptr, SLJIT_MEM1(STACK_TOP), CURRENT_AS(assert_fallback)->framesize * sizeof(sljit_w));
5608
5609 set_jumps(current->topfallbacks, LABEL());
5610 }
5611 else
5612 set_jumps(current->topfallbacks, LABEL());
5613
5614 if (bra == OP_BRAZERO)
5615 {
5616 /* We know there is enough place on the stack. */
5617 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
5618 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5619 JUMPTO(SLJIT_JUMP, CURRENT_AS(assert_fallback)->hotpath);
5620 JUMPHERE(brajump);
5621 }
5622 }
5623
5624 static void compile_bracket_fallbackpath(compiler_common *common, struct fallback_common *current)
5625 {
5626 DEFINE_COMPILER;
5627 int opcode;
5628 int offset = 0;
5629 int localptr = CURRENT_AS(bracket_fallback)->localptr;
5630 int stacksize;
5631 int count;
5632 pcre_uchar *cc = current->cc;
5633 pcre_uchar *ccbegin;
5634 pcre_uchar *ccprev;
5635 jump_list *jumplist = NULL;
5636 jump_list *jumplistitem = NULL;
5637 pcre_uchar bra = OP_BRA;
5638 pcre_uchar ket;
5639 assert_fallback *assert;
5640 BOOL has_alternatives;
5641 struct sljit_jump *brazero = NULL;
5642 struct sljit_jump *once = NULL;
5643 struct sljit_jump *cond = NULL;
5644 struct sljit_label *rminlabel = NULL;
5645
5646 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
5647 {
5648 bra = *cc;
5649 cc++;
5650 }
5651
5652 opcode = *cc;
5653 ccbegin = cc;
5654 ket = *(bracketend(ccbegin) - 1 - LINK_SIZE);
5655 cc += GET(cc, 1);
5656 has_alternatives = *cc == OP_ALT;
5657 if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
5658 has_alternatives = (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT) || CURRENT_AS(bracket_fallback)->u.condfailed != NULL;
5659 if (opcode == OP_CBRA || opcode == OP_SCBRA)
5660 offset = (GET2(ccbegin, 1 + LINK_SIZE)) << 1;
5661 if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
5662 opcode = OP_SCOND;
5663 if (SLJIT_UNLIKELY(opcode == OP_ONCE_NC))
5664 opcode = OP_ONCE;
5665
5666 if (ket == OP_KETRMAX)
5667 {
5668 if (bra != OP_BRAZERO)
5669 free_stack(common, 1);
5670 else
5671 {
5672 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5673 free_stack(common, 1);
5674 brazero = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0);
5675 }
5676 }
5677 else if (ket == OP_KETRMIN)
5678 {
5679 if (bra != OP_BRAMINZERO)
5680 {
5681 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5682 if (opcode >= OP_SBRA || opcode == OP_ONCE)
5683 {
5684 /* Checking zero-length iteration. */
5685 if (opcode != OP_ONCE || CURRENT_AS(bracket_fallback)->u.framesize < 0)
5686 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, CURRENT_AS(bracket_fallback)->recursivehotpath);
5687 else
5688 {
5689 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
5690 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), (CURRENT_AS(bracket_fallback)->u.framesize + 1) * sizeof(sljit_w), CURRENT_AS(bracket_fallback)->recursivehotpath);
5691 }
5692 if (opcode != OP_ONCE)
5693 free_stack(common, 1);
5694 }
5695 else
5696 JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_fallback)->recursivehotpath);
5697 }
5698 rminlabel = LABEL();
5699 }
5700 else if (bra == OP_BRAZERO)
5701 {
5702 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5703 free_stack(common, 1);
5704 brazero = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
5705 }
5706
5707 if (SLJIT_UNLIKELY(opcode == OP_ONCE))
5708 {
5709 if (CURRENT_AS(bracket_fallback)->u.framesize >= 0)
5710 {
5711 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
5712 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
5713 }
5714 once = JUMP(SLJIT_JUMP);
5715 }
5716 else if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
5717 {
5718 if (has_alternatives)
5719 {
5720 /* Always exactly one alternative. */
5721 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5722 free_stack(common, 1);
5723
5724 jumplistitem = sljit_alloc_memory(compiler, sizeof(jump_list));
5725 if (SLJIT_UNLIKELY(!jumplistitem))
5726 return;
5727 jumplist = jumplistitem;
5728 jumplistitem->next = NULL;
5729 jumplistitem->jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 1);
5730 }
5731 }
5732 else if (*cc == OP_ALT)
5733 {
5734 /* Build a jump list. Get the last successfully matched branch index. */
5735 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5736 free_stack(common, 1);
5737 count = 1;
5738 do
5739 {
5740 /* Append as the last item. */
5741 if (jumplist != NULL)
5742 {
5743 jumplistitem->next = sljit_alloc_memory(compiler, sizeof(jump_list));
5744 jumplistitem = jumplistitem->next;
5745 }
5746 else
5747 {
5748 jumplistitem = sljit_alloc_memory(compiler, sizeof(jump_list));
5749 jumplist = jumplistitem;
5750 }
5751
5752 if (SLJIT_UNLIKELY(!jumplistitem))
5753 return;
5754
5755 jumplistitem->next = NULL;
5756 jumplistitem->jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, count++);
5757 cc += GET(cc, 1);
5758 }
5759 while (*cc == OP_ALT);
5760
5761 cc = ccbegin + GET(ccbegin, 1);
5762 }
5763
5764 COMPILE_FALLBACKPATH(current->top);
5765 if (current->topfallbacks)
5766 set_jumps(current->topfallbacks, LABEL());
5767
5768 if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
5769 {
5770 /* Conditional block always has at most one alternative. */
5771 if (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT)
5772 {
5773 SLJIT_ASSERT(has_alternatives);
5774 assert = CURRENT_AS(bracket_fallback)->u.assert;
5775 if (assert->framesize >= 0 && (ccbegin[1 + LINK_SIZE] == OP_ASSERT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK))
5776 {
5777 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), assert->localptr);
5778 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
5779 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), assert->localptr, SLJIT_MEM1(STACK_TOP), assert->framesize * sizeof(sljit_w));
5780 }
5781 cond = JUMP(SLJIT_JUMP);
5782 set_jumps(CURRENT_AS(bracket_fallback)->u.assert->condfailed, LABEL());
5783 }
5784 else if (CURRENT_AS(bracket_fallback)->u.condfailed != NULL)
5785 {
5786 SLJIT_ASSERT(has_alternatives);
5787 cond = JUMP(SLJIT_JUMP);
5788 set_jumps(CURRENT_AS(bracket_fallback)->u.condfailed, LABEL());
5789 }
5790 else
5791 SLJIT_ASSERT(!has_alternatives);
5792 }
5793
5794 if (has_alternatives)
5795 {
5796 count = 1;
5797 do
5798 {
5799 current->top = NULL;
5800 current->topfallbacks = NULL;
5801 current->nextfallbacks = NULL;
5802 if (*cc == OP_ALT)
5803 {
5804 ccprev = cc + 1 + LINK_SIZE;
5805 cc += GET(cc, 1);
5806 if (opcode != OP_COND && opcode != OP_SCOND)
5807 {
5808 if (localptr != 0 && opcode != OP_ONCE)
5809 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
5810 else
5811 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5812 }
5813 compile_hotpath(common, ccprev, cc, current);
5814 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5815 return;
5816 }
5817
5818 /* Instructions after the current alternative is succesfully matched. */
5819 /* There is a similar code in compile_bracket_hotpath. */
5820 if (opcode == OP_ONCE)
5821 {
5822 if (CURRENT_AS(bracket_fallback)->u.framesize < 0)
5823 {
5824 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
5825 /* TMP2 which is set here used by OP_KETRMAX below. */
5826 if (ket == OP_KETRMAX)
5827 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), 0);
5828 else if (ket == OP_KETRMIN)
5829 {
5830 /* Move the STR_PTR to the localptr. */
5831 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_MEM1(STACK_TOP), 0);
5832 }
5833 }
5834 else
5835 {
5836 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_IMM, (CURRENT_AS(bracket_fallback)->u.framesize + 2) * sizeof(sljit_w));
5837 if (ket == OP_KETRMAX)
5838 {
5839 /* TMP2 which is set here used by OP_KETRMAX below. */
5840 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5841 }
5842 }
5843 }
5844
5845 stacksize = 0;
5846 if (opcode != OP_ONCE)
5847 stacksize++;
5848 if (ket != OP_KET || bra != OP_BRA)
5849 stacksize++;
5850
5851 if (stacksize > 0) {
5852 if (opcode != OP_ONCE || CURRENT_AS(bracket_fallback)->u.framesize >= 0)
5853 allocate_stack(common, stacksize);
5854 else
5855 {
5856 /* We know we have place at least for one item on the top of the stack. */
5857 SLJIT_ASSERT(stacksize == 1);
5858 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
5859 }
5860 }
5861
5862 stacksize = 0;
5863 if (ket != OP_KET || bra != OP_BRA)
5864 {
5865 if (ket != OP_KET)
5866 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
5867 else
5868 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
5869 stacksize++;
5870 }
5871
5872 if (opcode != OP_ONCE)
5873 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, count++);
5874
5875 if (offset != 0)
5876 {
5877 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
5878 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
5879 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 0), TMP1, 0);
5880 }
5881
5882 JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_fallback)->althotpath);
5883
5884 if (opcode != OP_ONCE)
5885 {
5886 SLJIT_ASSERT(jumplist);
5887 JUMPHERE(jumplist->jump);
5888 jumplist = jumplist->next;
5889 }
5890
5891 COMPILE_FALLBACKPATH(current->top);
5892 if (current->topfallbacks)
5893 set_jumps(current->topfallbacks, LABEL());
5894 SLJIT_ASSERT(!current->nextfallbacks);
5895 }
5896 while (*cc == OP_ALT);
5897 SLJIT_ASSERT(!jumplist);
5898
5899 if (cond != NULL)
5900 {
5901 SLJIT_ASSERT(opcode == OP_COND || opcode == OP_SCOND);
5902 assert = CURRENT_AS(bracket_fallback)->u.assert;
5903 if (assert->framesize >= 0 && (ccbegin[1 + LINK_SIZE] == OP_ASSERT_NOT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK_NOT))
5904 {
5905 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), assert->localptr);
5906 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
5907 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), assert->localptr, SLJIT_MEM1(STACK_TOP), assert->framesize * sizeof(sljit_w));
5908 }
5909 JUMPHERE(cond);
5910 }
5911
5912 /* Free the STR_PTR. */
5913 if (localptr == 0)
5914 free_stack(common, 1);
5915 }
5916
5917 if (offset != 0)
5918 {
5919 /* Using both tmp register is better for instruction scheduling. */
5920 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5921 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5922 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
5923 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP2, 0);
5924 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_MEM1(STACK_TOP), STACK(2));
5925 free_stack(common, 3);
5926 }
5927 else if (opcode == OP_SBRA || opcode == OP_SCOND)
5928 {
5929 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_MEM1(STACK_TOP), STACK(0));
5930 free_stack(common, 1);
5931 }
5932 else if (opcode == OP_ONCE)
5933 {
5934 cc = ccbegin + GET(ccbegin, 1);
5935 if (CURRENT_AS(bracket_fallback)->u.framesize >= 0)
5936 {
5937 /* Reset head and drop saved frame. */
5938 stacksize = (ket == OP_KETRMAX || ket == OP_KETRMIN || *cc == OP_ALT) ? 2 : 1;
5939 free_stack(common, CURRENT_AS(bracket_fallback)->u.framesize + stacksize);
5940 }
5941 else if (ket == OP_KETRMAX || (*cc == OP_ALT && ket != OP_KETRMIN))
5942 {
5943 /* The STR_PTR must be released. */
5944 free_stack(common, 1);
5945 }
5946
5947 JUMPHERE(once);
5948 /* Restore previous localptr */
5949 if (CURRENT_AS(bracket_fallback)->u.framesize >= 0)
5950 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_MEM1(STACK_TOP), CURRENT_AS(bracket_fallback)->u.framesize * sizeof(sljit_w));
5951 else if (ket == OP_KETRMIN)
5952 {
5953 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5954 /* See the comment below. */
5955 free_stack(common, 2);
5956 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, TMP1, 0);
5957 }
5958 }
5959
5960 if (ket == OP_KETRMAX)
5961 {
5962 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5963 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(bracket_fallback)->recursivehotpath);
5964 if (bra == OP_BRAZERO)
5965 {
5966 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5967 JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_fallback)->zerohotpath);
5968 JUMPHERE(brazero);
5969 }
5970 free_stack(common, 1);
5971 }
5972 else if (ket == OP_KETRMIN)
5973 {
5974 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5975
5976 /* OP_ONCE removes everything in case of a fallback, so we don't
5977 need to explicitly release the STR_PTR. The extra release would
5978 affect badly the free_stack(2) above. */
5979 if (opcode != OP_ONCE)
5980 free_stack(common, 1);
5981 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, rminlabel);
5982 if (opcode == OP_ONCE)
5983 free_stack(common, bra == OP_BRAMINZERO ? 2 : 1);
5984 else if (bra == OP_BRAMINZERO)
5985 free_stack(common, 1);
5986 }
5987 else if (bra == OP_BRAZERO)
5988 {
5989 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5990 JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_fallback)->zerohotpath);
5991 JUMPHERE(brazero);
5992 }
5993 }
5994
5995 static void compile_bracketpos_fallbackpath(compiler_common *common, struct fallback_common *current)
5996 {
5997 DEFINE_COMPILER;
5998 int offset;
5999 struct sljit_jump *jump;
6000
6001 if (CURRENT_AS(bracketpos_fallback)->framesize < 0)
6002 {
6003 if (*current->cc == OP_CBRAPOS || *current->cc == OP_SCBRAPOS)
6004 {
6005 offset = (GET2(current->cc, 1 + LINK_SIZE)) << 1;
6006 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6007 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
6008 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
6009 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP2, 0);
6010 }
6011 set_jumps(current->topfallbacks, LABEL());
6012 free_stack(common, CURRENT_AS(bracketpos_fallback)->stacksize);
6013 return;
6014 }
6015
6016 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), CURRENT_AS(bracketpos_fallback)->localptr);
6017 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6018
6019 if (current->topfallbacks)
6020 {
6021 jump = JUMP(SLJIT_JUMP);
6022 set_jumps(current->topfallbacks, LABEL());
6023 /* Drop the stack frame. */
6024 free_stack(common, CURRENT_AS(bracketpos_fallback)->stacksize);
6025 JUMPHERE(jump);
6026 }
6027 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), CURRENT_AS(bracketpos_fallback)->localptr, SLJIT_MEM1(STACK_TOP), CURRENT_AS(bracketpos_fallback)->framesize * sizeof(sljit_w));
6028 }
6029
6030 static void compile_braminzero_fallbackpath(compiler_common *common, struct fallback_common *current)
6031 {
6032 assert_fallback fallback;
6033
6034 current->top = NULL;
6035 current->topfallbacks = NULL;
6036 current->nextfallbacks = NULL;
6037 if (current->cc[1] > OP_ASSERTBACK_NOT)
6038 {
6039 /* Manual call of compile_bracket_hotpath and compile_bracket_fallbackpath. */
6040 compile_bracket_hotpath(common, current->cc, current);
6041 compile_bracket_fallbackpath(common, current->top);
6042 }
6043 else
6044 {
6045 memset(&fallback, 0, sizeof(fallback));
6046 fallback.common.cc = current->cc;
6047 fallback.hotpath = CURRENT_AS(braminzero_fallback)->hotpath;
6048 /* Manual call of compile_assert_hotpath. */
6049 compile_assert_hotpath(common, current->cc, &fallback, FALSE);
6050 }
6051 SLJIT_ASSERT(!current->nextfallbacks && !current->topfallbacks);
6052 }
6053
6054 static void compile_fallbackpath(compiler_common *common, struct fallback_common *current)
6055 {
6056 DEFINE_COMPILER;
6057
6058 while (current)
6059 {
6060 if (current->nextfallbacks != NULL)
6061 set_jumps(current->nextfallbacks, LABEL());
6062 switch(*current->cc)
6063 {
6064 case OP_SET_SOM:
6065 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6066 free_stack(common, 1);
6067 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), TMP1, 0);
6068 break;
6069
6070 case OP_STAR:
6071 case OP_MINSTAR:
6072 case OP_PLUS:
6073 case OP_MINPLUS:
6074 case OP_QUERY:
6075 case OP_MINQUERY:
6076 case OP_UPTO:
6077 case OP_MINUPTO:
6078 case OP_EXACT:
6079 case OP_POSSTAR:
6080 case OP_POSPLUS:
6081 case OP_POSQUERY:
6082 case OP_POSUPTO:
6083 case OP_STARI:
6084 case OP_MINSTARI:
6085 case OP_PLUSI:
6086 case OP_MINPLUSI:
6087 case OP_QUERYI:
6088 case OP_MINQUERYI:
6089 case OP_UPTOI:
6090 case OP_MINUPTOI:
6091 case OP_EXACTI:
6092 case OP_POSSTARI:
6093 case OP_POSPLUSI:
6094 case OP_POSQUERYI:
6095 case OP_POSUPTOI:
6096 case OP_NOTSTAR:
6097 case OP_NOTMINSTAR:
6098 case OP_NOTPLUS:
6099 case OP_NOTMINPLUS:
6100 case OP_NOTQUERY:
6101 case OP_NOTMINQUERY:
6102 case OP_NOTUPTO:
6103 case OP_NOTMINUPTO:
6104 case OP_NOTEXACT:
6105 case OP_NOTPOSSTAR:
6106 case OP_NOTPOSPLUS:
6107 case OP_NOTPOSQUERY:
6108 case OP_NOTPOSUPTO:
6109 case OP_NOTSTARI:
6110 case OP_NOTMINSTARI:
6111 case OP_NOTPLUSI:
6112 case OP_NOTMINPLUSI:
6113 case OP_NOTQUERYI:
6114 case OP_NOTMINQUERYI:
6115 case OP_NOTUPTOI:
6116 case OP_NOTMINUPTOI:
6117 case OP_NOTEXACTI:
6118 case OP_NOTPOSSTARI:
6119 case OP_NOTPOSPLUSI:
6120 case OP_NOTPOSQUERYI:
6121 case OP_NOTPOSUPTOI:
6122 case OP_TYPESTAR:
6123 case OP_TYPEMINSTAR:
6124 case OP_TYPEPLUS:
6125 case OP_TYPEMINPLUS:
6126 case OP_TYPEQUERY:
6127 case OP_TYPEMINQUERY:
6128 case OP_TYPEUPTO:
6129 case OP_TYPEMINUPTO:
6130 case OP_TYPEEXACT:
6131 case OP_TYPEPOSSTAR:
6132 case OP_TYPEPOSPLUS:
6133 case OP_TYPEPOSQUERY:
6134 case OP_TYPEPOSUPTO:
6135 case OP_CLASS:
6136 case OP_NCLASS:
6137 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
6138 case OP_XCLASS:
6139 #endif
6140 compile_iterator_fallbackpath(common, current);
6141 break;
6142
6143 case OP_REF:
6144 case OP_REFI:
6145 compile_ref_iterator_fallbackpath(common, current);
6146 break;
6147
6148 case OP_RECURSE:
6149 compile_recurse_fallbackpath(common, current);
6150 break;
6151
6152 case OP_ASSERT:
6153 case OP_ASSERT_NOT:
6154 case OP_ASSERTBACK:
6155 case OP_ASSERTBACK_NOT:
6156 compile_assert_fallbackpath(common, current);
6157 break;
6158
6159 case OP_ONCE:
6160 case OP_ONCE_NC:
6161 case OP_BRA:
6162 case OP_CBRA:
6163 case OP_COND:
6164 case OP_SBRA:
6165 case OP_SCBRA:
6166 case OP_SCOND:
6167 compile_bracket_fallbackpath(common, current);
6168 break;
6169
6170 case OP_BRAZERO:
6171 if (current->cc[1] > OP_ASSERTBACK_NOT)
6172 compile_bracket_fallbackpath(common, current);
6173 else
6174 compile_assert_fallbackpath(common, current);
6175 break;
6176
6177 case OP_BRAPOS:
6178 case OP_CBRAPOS:
6179 case OP_SBRAPOS:
6180 case OP_SCBRAPOS:
6181 case OP_BRAPOSZERO:
6182 compile_bracketpos_fallbackpath(common, current);
6183 break;
6184
6185 case OP_BRAMINZERO:
6186 compile_braminzero_fallbackpath(common, current);
6187 break;
6188
6189 case OP_FAIL:
6190 case OP_ACCEPT:
6191 case OP_ASSERT_ACCEPT:
6192 set_jumps(current->topfallbacks, LABEL());
6193 break;
6194
6195 default:
6196 SLJIT_ASSERT_STOP();
6197 break;
6198 }
6199 current = current->prev;
6200 }
6201 }
6202
6203 static SLJIT_INLINE void compile_recurse(compiler_common *common)
6204 {
6205 DEFINE_COMPILER;
6206 pcre_uchar *cc = common->start + common->currententry->start;
6207 pcre_uchar *ccbegin = cc + 1 + LINK_SIZE + (*cc == OP_BRA ? 0 : IMM2_SIZE);
6208 pcre_uchar *ccend = bracketend(cc);
6209 int localsize = get_localsize(common, ccbegin, ccend);
6210 int framesize = get_framesize(common, cc, TRUE);
6211 int alternativesize;
6212 BOOL needsframe;
6213 fallback_common altfallback;
6214 struct sljit_jump *jump;
6215
6216 SLJIT_ASSERT(*cc == OP_BRA || *cc == OP_CBRA || *cc == OP_CBRAPOS || *cc == OP_SCBRA || *cc == OP_SCBRAPOS);
6217 needsframe = framesize >= 0;
6218 if (!needsframe)
6219 framesize = 0;
6220 alternativesize = *(cc + GET(cc, 1)) == OP_ALT ? 1 : 0;
6221
6222 SLJIT_ASSERT(common->currententry->entry == NULL);
6223 common->currententry->entry = LABEL();
6224 set_jumps(common->currententry->calls, common->currententry->entry);
6225
6226 sljit_emit_fast_enter(compiler, TMP2, 0, 1, 5, 5, common->localsize);
6227 allocate_stack(common, localsize + framesize + alternativesize);
6228 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(localsize + framesize + alternativesize - 1), TMP2, 0);
6229 copy_locals(common, ccbegin, ccend, TRUE, localsize + framesize + alternativesize, framesize + alternativesize);
6230 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), RECURSIVE_HEAD, STACK_TOP, 0);
6231 if (needsframe)
6232 init_frame(common, cc, framesize + alternativesize - 1, alternativesize, FALSE);
6233
6234 if (alternativesize > 0)
6235 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6236
6237 memset(&altfallback, 0, sizeof(fallback_common));
6238 common->acceptlabel = NULL;
6239 common->accept = NULL;
6240 altfallback.cc = ccbegin;
6241 cc += GET(cc, 1);
6242 while (1)
6243 {
6244 altfallback.top = NULL;
6245 altfallback.topfallbacks = NULL;
6246
6247 if (altfallback.cc != ccbegin)
6248 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6249
6250 compile_hotpath(common, altfallback.cc, cc, &altfallback);
6251 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6252 return;
6253
6254 add_jump(compiler, &common->accept, JUMP(SLJIT_JUMP));
6255
6256 compile_fallbackpath(common, altfallback.top);
6257 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6258 return;
6259 set_jumps(altfallback.topfallbacks, LABEL());
6260
6261 if (*cc != OP_ALT)
6262 break;
6263
6264 altfallback.cc = cc + 1 + LINK_SIZE;
6265 cc += GET(cc, 1);
6266 }
6267 /* None of them matched. */
6268 OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 0);
6269 jump = JUMP(SLJIT_JUMP);
6270
6271 set_jumps(common->accept, LABEL());
6272 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), RECURSIVE_HEAD);
6273 if (needsframe)
6274 {
6275 OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
6276 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + alternativesize) * sizeof(sljit_w));
6277 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6278 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + alternativesize) * sizeof(sljit_w));
6279 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), TMP3, 0);
6280 }
6281 OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 1);
6282
6283 JUMPHERE(jump);
6284 copy_locals(common, ccbegin, ccend, FALSE, localsize + framesize + alternativesize, framesize + alternativesize);
6285 free_stack(common, localsize + framesize + alternativesize);
6286 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), sizeof(sljit_w));
6287 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
6288 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), RECURSIVE_HEAD, TMP2, 0);
6289 sljit_emit_fast_return(compiler, SLJIT_MEM1(STACK_TOP), 0);
6290 }
6291
6292 #undef COMPILE_FALLBACKPATH
6293 #undef CURRENT_AS
6294
6295 void
6296 PRIV(jit_compile)(const real_pcre *re, pcre_extra *extra)
6297 {
6298 struct sljit_compiler *compiler;
6299 fallback_common rootfallback;
6300 compiler_common common_data;
6301 compiler_common *common = &common_data;
6302 const pcre_uint8 *tables = re->tables;
6303 pcre_study_data *study;
6304 pcre_uchar *ccend;
6305 executable_function *function;
6306 void *executable_func;
6307 struct sljit_label *leave;
6308 struct sljit_label *mainloop = NULL;
6309 struct sljit_label *empty_match_found;
6310 struct sljit_label *empty_match_fallback;
6311 struct sljit_jump *alloc_error;
6312 struct sljit_jump *reqbyte_notfound = NULL;
6313 struct sljit_jump *empty_match;
6314
6315 SLJIT_ASSERT((extra->flags & PCRE_EXTRA_STUDY_DATA) != 0);
6316 study = extra->study_data;
6317
6318 if (!tables)
6319 tables = PRIV(default_tables);
6320
6321 memset(&rootfallback, 0, sizeof(fallback_common));
6322 rootfallback.cc = (pcre_uchar *)re + re->name_table_offset + re->name_count * re->name_entry_size;
6323
6324 common->compiler = NULL;
6325 common->start = rootfallback.cc;
6326 common->cbraptr = OVECTOR_START + (re->top_bracket + 1) * 2 * sizeof(sljit_w);
6327 common->fcc = tables + fcc_offset;
6328 common->lcc = (sljit_w)(tables + lcc_offset);
6329 common->nltype = NLTYPE_FIXED;
6330 switch(re->options & PCRE_NEWLINE_BITS)
6331 {
6332 case 0:
6333 /* Compile-time default */
6334 switch (NEWLINE)
6335 {
6336 case -1: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANY; break;
6337 case -2: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANYCRLF; break;
6338 default: common->newline = NEWLINE; break;
6339 }
6340 break;
6341 case PCRE_NEWLINE_CR: common->newline = CHAR_CR; break;
6342 case PCRE_NEWLINE_LF: common->newline = CHAR_NL; break;
6343 case PCRE_NEWLINE_CR+
6344 PCRE_NEWLINE_LF: common->newline = (CHAR_CR << 8) | CHAR_NL; break;
6345 case PCRE_NEWLINE_ANY: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANY; break;
6346 case PCRE_NEWLINE_ANYCRLF: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANYCRLF; break;
6347 default: return;
6348 }
6349 if ((re->options & PCRE_BSR_ANYCRLF) != 0)
6350 common->bsr_nltype = NLTYPE_ANYCRLF;
6351 else if ((re->options & PCRE_BSR_UNICODE) != 0)
6352 common->bsr_nltype = NLTYPE_ANY;
6353 else
6354 {
6355 #ifdef BSR_ANYCRLF
6356 common->bsr_nltype = NLTYPE_ANYCRLF;
6357 #else
6358 common->bsr_nltype = NLTYPE_ANY;
6359 #endif
6360 }
6361 common->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
6362 common->ctypes = (sljit_w)(tables + ctypes_offset);
6363 common->name_table = (sljit_w)re + re->name_table_offset;
6364 common->name_count = re->name_count;
6365 common->name_entry_size = re->name_entry_size;
6366 common->acceptlabel = NULL;
6367 common->stubs = NULL;
6368 common->entries = NULL;
6369 common->currententry = NULL;
6370 common->accept = NULL;
6371 common->calllimit = NULL;
6372 common->stackalloc = NULL;
6373 common->revertframes = NULL;
6374 common->wordboundary = NULL;
6375 common->anynewline = NULL;
6376 common->hspace = NULL;
6377 common->vspace = NULL;
6378 common->casefulcmp = NULL;
6379 common->caselesscmp = NULL;
6380 common->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;
6381 #ifdef SUPPORT_UTF8
6382 /* PCRE_UTF16 has the same value as PCRE_UTF8. */
6383 common->utf = (re->options & PCRE_UTF8) != 0;
6384 #ifdef SUPPORT_UCP
6385 common->useucp = (re->options & PCRE_UCP) != 0;
6386 #endif
6387 common->utfreadchar = NULL;
6388 #ifdef COMPILE_PCRE8
6389 common->utfreadtype8 = NULL;
6390 #endif
6391 #endif /* SUPPORT_UTF8 */
6392 #ifdef SUPPORT_UCP
6393 common->getucd = NULL;
6394 #endif
6395 ccend = bracketend(rootfallback.cc);
6396 SLJIT_ASSERT(*rootfallback.cc == OP_BRA && ccend[-(1 + LINK_SIZE)] == OP_KET);
6397 common->localsize = get_localspace(common, rootfallback.cc, ccend);
6398 if (common->localsize < 0)
6399 return;
6400 common->localsize += common->cbraptr + (re->top_bracket + 1) * sizeof(sljit_w);
6401 if (common->localsize > SLJIT_MAX_LOCAL_SIZE)
6402 return;
6403 common->localptrs = (int*)SLJIT_MALLOC((ccend - rootfallback.cc) * sizeof(int));
6404 if (!common->localptrs)
6405 return;
6406 memset(common->localptrs, 0, (ccend - rootfallback.cc) * sizeof(int));
6407 set_localptrs(common, common->cbraptr + (re->top_bracket + 1) * sizeof(sljit_w), ccend);
6408
6409 compiler = sljit_create_compiler();
6410 if (!compiler)
6411 {
6412 SLJIT_FREE(common->localptrs);
6413 return;
6414 }
6415 common->compiler = compiler;
6416
6417 /* Main pcre_jit_exec entry. */
6418 sljit_emit_enter(compiler, 1, 5, 5, common->localsize);
6419
6420 /* Register init. */
6421 reset_ovector(common, (re->top_bracket + 1) * 2);
6422 if ((re->flags & PCRE_REQCHSET) != 0)
6423 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), REQ_CHAR_PTR, SLJIT_TEMPORARY_REG1, 0);
6424
6425 OP1(SLJIT_MOV, ARGUMENTS, 0, SLJIT_GENERAL_REG1, 0);
6426 OP1(SLJIT_MOV, TMP1, 0, SLJIT_GENERAL_REG1, 0);
6427 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
6428 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, end));
6429 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, stack));
6430 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, calllimit));
6431 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, base));
6432 OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, limit));
6433 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), CALL_LIMIT, TMP1, 0);
6434
6435 /* Main part of the matching */
6436 if ((re->options & PCRE_ANCHORED) == 0)
6437 {
6438 mainloop = mainloop_entry(common, (re->flags & PCRE_HASCRORLF) != 0, (re->options & PCRE_FIRSTLINE) != 0);
6439 /* Forward search if possible. */
6440 if ((re->flags & PCRE_FIRSTSET) != 0)
6441 fast_forward_first_char(common, re->first_char, (re->flags & PCRE_FCH_CASELESS) != 0, (re->options & PCRE_FIRSTLINE) != 0);
6442 else if ((re->flags & PCRE_STARTLINE) != 0)
6443 fast_forward_newline(common, (re->options & PCRE_FIRSTLINE) != 0);
6444 else if ((re->flags & PCRE_STARTLINE) == 0 && study != NULL && (study->flags & PCRE_STUDY_MAPPED) != 0)
6445 fast_forward_start_bits(common, (sljit_uw)study->start_bits, (re->options & PCRE_FIRSTLINE) != 0);
6446 }
6447 if ((re->flags & PCRE_REQCHSET) != 0)
6448 reqbyte_notfound = search_requested_char(common, re->req_char, (re->flags & PCRE_RCH_CASELESS) != 0, (re->flags & PCRE_FIRSTSET) != 0);
6449
6450 /* Store the current STR_PTR in OVECTOR(0). */
6451 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), STR_PTR, 0);
6452 /* Copy the limit of allowed recursions. */
6453 OP1(SLJIT_MOV, CALL_COUNT, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), CALL_LIMIT);
6454
6455 compile_hotpath(common, rootfallback.cc, ccend, &rootfallback);
6456 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6457 {
6458 sljit_free_compiler(compiler);
6459 SLJIT_FREE(common->localptrs);
6460 return;
6461 }
6462
6463 empty_match = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
6464 empty_match_found = LABEL();
6465
6466 common->acceptlabel = LABEL();
6467 if (common->accept != NULL)
6468 set_jumps(common->accept, common->acceptlabel);
6469
6470 /* This means we have a match. Update the ovector. */
6471 copy_ovector(common, re->top_bracket + 1);
6472 leave = LABEL();
6473 sljit_emit_return(compiler, SLJIT_UNUSED, 0);
6474
6475 empty_match_fallback = LABEL();
6476 compile_fallbackpath(common, rootfallback.top);
6477 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6478 {
6479 sljit_free_compiler(compiler);
6480 SLJIT_FREE(common->localptrs);
6481 return;
6482 }
6483
6484 SLJIT_ASSERT(rootfallback.prev == NULL);
6485
6486 /* Check we have remaining characters. */
6487 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
6488
6489 if ((re->options & PCRE_ANCHORED) == 0)
6490 {
6491 if ((re->options & PCRE_FIRSTLINE) == 0)
6492 {
6493 if (study != NULL && study->minlength > 1)
6494 {
6495 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(study->minlength));
6496 CMPTO(SLJIT_C_LESS_EQUAL, TMP1, 0, STR_END, 0, mainloop);
6497 }
6498 else
6499 CMPTO(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0, mainloop);
6500 }
6501 else
6502 {
6503 if (study != NULL && study->minlength > 1)
6504 {
6505 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(study->minlength));
6506 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, STR_END, 0);
6507 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_GREATER);
6508 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), FIRSTLINE_END);
6509 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_GREATER_EQUAL);
6510 JUMPTO(SLJIT_C_ZERO, mainloop);
6511 }
6512 else
6513 CMPTO(SLJIT_C_LESS, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), FIRSTLINE_END, mainloop);
6514 }
6515 }
6516
6517 if (reqbyte_notfound != NULL)
6518 JUMPHERE(reqbyte_notfound);
6519 /* Copy OVECTOR(1) to OVECTOR(0) */
6520 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
6521 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_NOMATCH);
6522 JUMPTO(SLJIT_JUMP, leave);
6523
6524 flush_stubs(common);
6525
6526 JUMPHERE(empty_match);
6527 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6528 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty));
6529 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0, empty_match_fallback);
6530 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty_atstart));
6531 CMPTO(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0, empty_match_found);
6532 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
6533 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0, empty_match_found);
6534 JUMPTO(SLJIT_JUMP, empty_match_fallback);
6535
6536 common->currententry = common->entries;
6537 while (common->currententry != NULL)
6538 {
6539 /* Might add new entries. */
6540 compile_recurse(common);
6541 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6542 {
6543 sljit_free_compiler(compiler);
6544 SLJIT_FREE(common->localptrs);
6545 return;
6546 }
6547 flush_stubs(common);
6548 common->currententry = common->currententry->next;
6549 }
6550
6551 /* Allocating stack, returns with PCRE_ERROR_JIT_STACKLIMIT if fails. */
6552 /* This is a (really) rare case. */
6553 set_jumps(common->stackalloc, LABEL());
6554 /* RETURN_ADDR is not a saved register. */
6555 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, 1, 5, 5, common->localsize);
6556 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP2, 0);
6557 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6558 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, stack));
6559 OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(struct sljit_stack, top), STACK_TOP, 0);
6560 OP2(SLJIT_ADD, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(struct sljit_stack, limit), SLJIT_IMM, STACK_GROWTH_RATE);
6561
6562 sljit_emit_ijump(compiler, SLJIT_CALL2, SLJIT_IMM, SLJIT_FUNC_OFFSET(sljit_stack_resize));
6563 alloc_error = CMP(SLJIT_C_NOT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
6564 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6565 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, stack));
6566 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(struct sljit_stack, top));
6567 OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(struct sljit_stack, limit));
6568 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
6569 sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
6570
6571 /* Allocation failed. */
6572 JUMPHERE(alloc_error);
6573 /* We break the return address cache here, but this is a really rare case. */
6574 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_JIT_STACKLIMIT);
6575 JUMPTO(SLJIT_JUMP, leave);
6576
6577 /* Call limit reached. */
6578 set_jumps(common->calllimit, LABEL());
6579 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_MATCHLIMIT);
6580 JUMPTO(SLJIT_JUMP, leave);
6581
6582 if (common->revertframes != NULL)
6583 {
6584 set_jumps(common->revertframes, LABEL());
6585 do_revertframes(common);
6586 }
6587 if (common->wordboundary != NULL)
6588 {
6589 set_jumps(common->wordboundary, LABEL());
6590 check_wordboundary(common);
6591 }
6592 if (common->anynewline != NULL)
6593 {
6594 set_jumps(common->anynewline, LABEL());
6595 check_anynewline(common);
6596 }
6597 if (common->hspace != NULL)
6598 {
6599 set_jumps(common->hspace, LABEL());
6600 check_hspace(common);
6601 }
6602 if (common->vspace != NULL)
6603 {
6604 set_jumps(common->vspace, LABEL());
6605 check_vspace(common);
6606 }
6607 if (common->casefulcmp != NULL)
6608 {
6609 set_jumps(common->casefulcmp, LABEL());
6610 do_casefulcmp(common);
6611 }
6612 if (common->caselesscmp != NULL)
6613 {
6614 set_jumps(common->caselesscmp, LABEL());
6615 do_caselesscmp(common);
6616 }
6617 #ifdef SUPPORT_UTF
6618 if (common->utfreadchar != NULL)
6619 {
6620 set_jumps(common->utfreadchar, LABEL());
6621 do_utfreadchar(common);
6622 }
6623 #ifdef COMPILE_PCRE8
6624 if (common->utfreadtype8 != NULL)
6625 {
6626 set_jumps(common->utfreadtype8, LABEL());
6627 do_utfreadtype8(common);
6628 }
6629 #endif
6630 #endif /* COMPILE_PCRE8 */
6631 #ifdef SUPPORT_UCP
6632 if (common->getucd != NULL)
6633 {
6634 set_jumps(common->getucd, LABEL());
6635 do_getucd(common);
6636 }
6637 #endif
6638
6639 SLJIT_FREE(common->localptrs);
6640 executable_func = sljit_generate_code(compiler);
6641 sljit_free_compiler(compiler);
6642 if (executable_func == NULL)
6643 return;
6644
6645 function = SLJIT_MALLOC(sizeof(executable_function));
6646 if (function == NULL)
6647 {
6648 /* This case is highly unlikely since we just recently
6649 freed a lot of memory. Although not impossible. */
6650 sljit_free_code(executable_func);
6651 return;
6652 }
6653
6654 function->executable_func = executable_func;
6655 function->callback = NULL;
6656 function->userdata = NULL;
6657 extra->executable_jit = function;
6658 extra->flags |= PCRE_EXTRA_EXECUTABLE_JIT;
6659 }
6660
6661 static int jit_machine_stack_exec(jit_arguments *arguments, executable_function *function)
6662 {
6663 union {
6664 void* executable_func;
6665 jit_function call_executable_func;
6666 } convert_executable_func;
6667 pcre_uint8 local_area[LOCAL_SPACE_SIZE];
6668 struct sljit_stack local_stack;
6669
6670 local_stack.top = (sljit_w)&local_area;
6671 local_stack.base = local_stack.top;
6672 local_stack.limit = local_stack.base + LOCAL_SPACE_SIZE;
6673 local_stack.max_limit = local_stack.limit;
6674 arguments->stack = &local_stack;
6675 convert_executable_func.executable_func = function->executable_func;
6676 return convert_executable_func.call_executable_func(arguments);
6677 }
6678
6679 int
6680 PRIV(jit_exec)(const real_pcre *re, void *executable_func,
6681 const pcre_uchar *subject, int length, int start_offset, int options,
6682 int match_limit, int *offsets, int offsetcount)
6683 {
6684 executable_function *function = (executable_function*)executable_func;
6685 union {
6686 void* executable_func;
6687 jit_function call_executable_func;
6688 } convert_executable_func;
6689 jit_arguments arguments;
6690 int maxoffsetcount;
6691 int retval;
6692
6693 /* Sanity checks should be handled by pcre_exec. */
6694 arguments.stack = NULL;
6695 arguments.str = subject + start_offset;
6696 arguments.begin = subject;
6697 arguments.end = subject + length;
6698 arguments.calllimit = match_limit; /* JIT decreases this value less times. */
6699 arguments.notbol = (options & PCRE_NOTBOL) != 0;
6700 arguments.noteol = (options & PCRE_NOTEOL) != 0;
6701 arguments.notempty = (options & PCRE_NOTEMPTY) != 0;
6702 arguments.notempty_atstart = (options & PCRE_NOTEMPTY_ATSTART) != 0;
6703 arguments.offsets = offsets;
6704
6705 /* pcre_exec() rounds offsetcount to a multiple of 3, and then uses only 2/3 of
6706 the output vector for storing captured strings, with the remainder used as
6707 workspace. We don't need the workspace here. For compatibility, we limit the
6708 number of captured strings in the same way as pcre_exec(), so that the user
6709 gets the same result with and without JIT. */
6710
6711 offsetcount = ((offsetcount - (offsetcount % 3)) * 2)/3;
6712 maxoffsetcount = (re->top_bracket + 1) * 2;
6713 if (offsetcount > maxoffsetcount)
6714 offsetcount = maxoffsetcount;
6715 arguments.offsetcount = offsetcount;
6716
6717 if (function->callback)
6718 arguments.stack = (struct sljit_stack*)function->callback(function->userdata);
6719 else
6720 arguments.stack = (struct sljit_stack*)function->userdata;
6721
6722 if (arguments.stack == NULL)
6723 retval = jit_machine_stack_exec(&arguments, function);
6724 else
6725 {
6726 convert_executable_func.executable_func = function->executable_func;
6727 retval = convert_executable_func.call_executable_func(&arguments);
6728 }
6729
6730 if (retval * 2 > offsetcount)
6731 retval = 0;
6732 return retval;
6733 }
6734
6735 void
6736 PRIV(jit_free)(void *executable_func)
6737 {
6738 executable_function *function = (executable_function*)executable_func;