/[pcre]/code/branches/pcre16/pcre_jit_compile.c
ViewVC logotype

Contents of /code/branches/pcre16/pcre_jit_compile.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 770 - (show annotations)
Mon Nov 28 20:39:30 2011 UTC (7 years, 8 months ago) by zherczeg
File MIME type: text/plain
File size: 205934 byte(s)
Make character ranges 16 bit friendly
1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Written by Philip Hazel
9 Copyright (c) 1997-2008 University of Cambridge
10
11 The machine code generator part (this module) was written by Zoltan Herczeg
12 Copyright (c) 2010-2011
13
14 -----------------------------------------------------------------------------
15 Redistribution and use in source and binary forms, with or without
16 modification, are permitted provided that the following conditions are met:
17
18 * Redistributions of source code must retain the above copyright notice,
19 this list of conditions and the following disclaimer.
20
21 * Redistributions in binary form must reproduce the above copyright
22 notice, this list of conditions and the following disclaimer in the
23 documentation and/or other materials provided with the distribution.
24
25 * Neither the name of the University of Cambridge nor the names of its
26 contributors may be used to endorse or promote products derived from
27 this software without specific prior written permission.
28
29 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 POSSIBILITY OF SUCH DAMAGE.
40 -----------------------------------------------------------------------------
41 */
42
43 #ifdef HAVE_CONFIG_H
44 #include "config.h"
45 #endif
46
47 #include "pcre_internal.h"
48
49 #ifdef SUPPORT_JIT
50
51 /* All-in-one: Since we use the JIT compiler only from here,
52 we just include it. This way we don't need to touch the build
53 system files. */
54
55 #define SLJIT_MALLOC(size) (pcre_malloc)(size)
56 #define SLJIT_FREE(ptr) (pcre_free)(ptr)
57 #define SLJIT_CONFIG_AUTO 1
58 #define SLJIT_CONFIG_STATIC 1
59 #define SLJIT_VERBOSE 0
60 #define SLJIT_DEBUG 0
61
62 #include "sljit/sljitLir.c"
63
64 #if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED
65 #error Unsupported architecture
66 #endif
67
68 /* Allocate memory on the stack. Fast, but limited size. */
69 #define LOCAL_SPACE_SIZE 32768
70
71 #define STACK_GROWTH_RATE 8192
72
73 /* Enable to check that the allocation could destroy temporaries. */
74 #if defined SLJIT_DEBUG && SLJIT_DEBUG
75 #define DESTROY_REGISTERS 1
76 #endif
77
78 /*
79 Short summary about the backtracking mechanism empolyed by the jit code generator:
80
81 The code generator follows the recursive nature of the PERL compatible regular
82 expressions. The basic blocks of regular expressions are condition checkers
83 whose execute different commands depending on the result of the condition check.
84 The relationship between the operators can be horizontal (concatenation) and
85 vertical (sub-expression) (See struct fallback_common for more details).
86
87 'ab' - 'a' and 'b' regexps are concatenated
88 'a+' - 'a' is the sub-expression of the '+' operator
89
90 The condition checkers are boolean (true/false) checkers. Machine code is generated
91 for the checker itself and for the actions depending on the result of the checker.
92 The 'true' case is called as the hot path (expected path), and the other is called as
93 the 'fallback' path. Branch instructions are expesive for all CPUs, so we avoid taken
94 branches on the hot path.
95
96 Greedy star operator (*) :
97 Hot path: match happens.
98 Fallback path: match failed.
99 Non-greedy star operator (*?) :
100 Hot path: no need to perform a match.
101 Fallback path: match is required.
102
103 The following example shows how the code generated for a capturing bracket
104 with two alternatives. Let A, B, C, D are arbirary regular expressions, and
105 we have the following regular expression:
106
107 A(B|C)D
108
109 The generated code will be the following:
110
111 A hot path
112 '(' hot path (pushing arguments to the stack)
113 B hot path
114 ')' hot path (pushing arguments to the stack)
115 D hot path
116 return with successful match
117
118 D fallback path
119 ')' fallback path (If we arrived from "C" jump to the fallback of "C")
120 B fallback path
121 C expected path
122 jump to D hot path
123 C fallback path
124 A fallback path
125
126 Notice, that the order of fallback code paths are the opposite of the fast
127 code paths. In this way the topmost value on the stack is always belong
128 to the current fallback code path. The fallback code path must check
129 whether there is a next alternative. If so, it needs to jump back to
130 the hot path eventually. Otherwise it needs to clear out its own stack
131 frame and continue the execution on the fallback code paths.
132 */
133
134 /*
135 Saved stack frames:
136
137 Atomic blocks and asserts require reloading the values of local variables
138 when the fallback mechanism performed. Because of OP_RECURSE, the locals
139 are not necessarly known in compile time, thus we need a dynamic restore
140 mechanism.
141
142 The stack frames are stored in a chain list, and have the following format:
143 ([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]
144
145 Thus we can restore the locals to a particular point in the stack.
146 */
147
148 typedef struct jit_arguments {
149 /* Pointers first. */
150 struct sljit_stack *stack;
151 const pcre_uchar *str;
152 const pcre_uchar *begin;
153 const pcre_uchar *end;
154 int *offsets;
155 pcre_uchar *ptr;
156 /* Everything else after. */
157 int offsetcount;
158 int calllimit;
159 pcre_uint8 notbol;
160 pcre_uint8 noteol;
161 pcre_uint8 notempty;
162 pcre_uint8 notempty_atstart;
163 } jit_arguments;
164
165 typedef struct executable_function {
166 void *executable_func;
167 pcre_jit_callback callback;
168 void *userdata;
169 } executable_function;
170
171 typedef struct jump_list {
172 struct sljit_jump *jump;
173 struct jump_list *next;
174 } jump_list;
175
176 enum stub_types { stack_alloc };
177
178 typedef struct stub_list {
179 enum stub_types type;
180 int data;
181 struct sljit_jump *start;
182 struct sljit_label *leave;
183 struct stub_list *next;
184 } stub_list;
185
186 typedef int (SLJIT_CALL *jit_function)(jit_arguments *args);
187
188 /* The following structure is the key data type for the recursive
189 code generator. It is allocated by compile_hotpath, and contains
190 the aguments for compile_fallbackpath. Must be the first member
191 of its descendants. */
192 typedef struct fallback_common {
193 /* Concatenation stack. */
194 struct fallback_common *prev;
195 jump_list *nextfallbacks;
196 /* Internal stack (for component operators). */
197 struct fallback_common *top;
198 jump_list *topfallbacks;
199 /* Opcode pointer. */
200 pcre_uchar *cc;
201 } fallback_common;
202
203 typedef struct assert_fallback {
204 fallback_common common;
205 jump_list *condfailed;
206 /* Less than 0 (-1) if a frame is not needed. */
207 int framesize;
208 /* Points to our private memory word on the stack. */
209 int localptr;
210 /* For iterators. */
211 struct sljit_label *hotpath;
212 } assert_fallback;
213
214 typedef struct bracket_fallback {
215 fallback_common common;
216 /* Where to coninue if an alternative is successfully matched. */
217 struct sljit_label *althotpath;
218 /* For rmin and rmax iterators. */
219 struct sljit_label *recursivehotpath;
220 /* For greedy ? operator. */
221 struct sljit_label *zerohotpath;
222 /* Contains the branches of a failed condition. */
223 union {
224 /* Both for OP_COND, OP_SCOND. */
225 jump_list *condfailed;
226 assert_fallback *assert;
227 /* For OP_ONCE. -1 if not needed. */
228 int framesize;
229 } u;
230 /* Points to our private memory word on the stack. */
231 int localptr;
232 } bracket_fallback;
233
234 typedef struct bracketpos_fallback {
235 fallback_common common;
236 /* Points to our private memory word on the stack. */
237 int localptr;
238 /* Reverting stack is needed. */
239 int framesize;
240 /* Allocated stack size. */
241 int stacksize;
242 } bracketpos_fallback;
243
244 typedef struct braminzero_fallback {
245 fallback_common common;
246 struct sljit_label *hotpath;
247 } braminzero_fallback;
248
249 typedef struct iterator_fallback {
250 fallback_common common;
251 /* Next iteration. */
252 struct sljit_label *hotpath;
253 } iterator_fallback;
254
255 typedef struct recurse_entry {
256 struct recurse_entry *next;
257 /* Contains the function entry. */
258 struct sljit_label *entry;
259 /* Collects the calls until the function is not created. */
260 jump_list *calls;
261 /* Points to the starting opcode. */
262 int start;
263 } recurse_entry;
264
265 typedef struct recurse_fallback {
266 fallback_common common;
267 } recurse_fallback;
268
269 typedef struct compiler_common {
270 struct sljit_compiler *compiler;
271 pcre_uchar *start;
272 int localsize;
273 int *localptrs;
274 const pcre_uint8 *fcc;
275 sljit_w lcc;
276 int cbraptr;
277 int nltype;
278 int newline;
279 int bsr_nltype;
280 int endonly;
281 sljit_w ctypes;
282 sljit_uw name_table;
283 sljit_w name_count;
284 sljit_w name_entry_size;
285 struct sljit_label *acceptlabel;
286 stub_list *stubs;
287 recurse_entry *entries;
288 recurse_entry *currententry;
289 jump_list *accept;
290 jump_list *calllimit;
291 jump_list *stackalloc;
292 jump_list *revertframes;
293 jump_list *wordboundary;
294 jump_list *anynewline;
295 jump_list *hspace;
296 jump_list *vspace;
297 jump_list *casefulcmp;
298 jump_list *caselesscmp;
299 BOOL jscript_compat;
300 #ifdef SUPPORT_UTF8
301 BOOL utf8;
302 #ifdef SUPPORT_UCP
303 BOOL useucp;
304 #endif
305 jump_list *utf8readchar;
306 jump_list *utf8readtype8;
307 #endif
308 #ifdef SUPPORT_UCP
309 jump_list *getucd;
310 #endif
311 } compiler_common;
312
313 /* For byte_sequence_compare. */
314
315 typedef struct compare_context {
316 int length;
317 int sourcereg;
318 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
319 int ucharptr;
320 union {
321 sljit_i asint;
322 sljit_h asshort;
323 #ifdef COMPILE_PCRE8
324 sljit_ub asbyte;
325 sljit_ub asuchars[4];
326 #else
327 #ifdef COMPILE_PCRE16
328 sljit_uh asuchars[2];
329 #endif
330 #endif
331 } c;
332 union {
333 sljit_i asint;
334 sljit_h asshort;
335 #ifdef COMPILE_PCRE8
336 sljit_ub asbyte;
337 sljit_ub asuchars[4];
338 #else
339 #ifdef COMPILE_PCRE16
340 sljit_uh asuchars[2];
341 #endif
342 #endif
343 } oc;
344 #endif
345 } compare_context;
346
347 enum {
348 frame_end = 0,
349 frame_setstrbegin = -1
350 };
351
352 /* Used for accessing the elements of the stack. */
353 #define STACK(i) ((-(i) - 1) * (int)sizeof(sljit_w))
354
355 #define TMP1 SLJIT_TEMPORARY_REG1
356 #define TMP2 SLJIT_TEMPORARY_REG3
357 #define TMP3 SLJIT_TEMPORARY_EREG2
358 #define STR_PTR SLJIT_GENERAL_REG1
359 #define STR_END SLJIT_GENERAL_REG2
360 #define STACK_TOP SLJIT_TEMPORARY_REG2
361 #define STACK_LIMIT SLJIT_GENERAL_REG3
362 #define ARGUMENTS SLJIT_GENERAL_EREG1
363 #define CALL_COUNT SLJIT_GENERAL_EREG2
364 #define RETURN_ADDR SLJIT_TEMPORARY_EREG1
365
366 /* Locals layout. */
367 /* These two locals can be used by the current opcode. */
368 #define LOCALS0 (0 * sizeof(sljit_w))
369 #define LOCALS1 (1 * sizeof(sljit_w))
370 /* Two local variables for possessive quantifiers (char1 cannot use them). */
371 #define POSSESSIVE0 (2 * sizeof(sljit_w))
372 #define POSSESSIVE1 (3 * sizeof(sljit_w))
373 /* Head of the last recursion. */
374 #define RECURSIVE_HEAD (4 * sizeof(sljit_w))
375 /* Max limit of recursions. */
376 #define CALL_LIMIT (5 * sizeof(sljit_w))
377 /* Last known position of the requested byte. */
378 #define REQ_BYTE_PTR (6 * sizeof(sljit_w))
379 /* End pointer of the first line. */
380 #define FIRSTLINE_END (7 * sizeof(sljit_w))
381 /* The output vector is stored on the stack, and contains pointers
382 to characters. The vector data is divided into two groups: the first
383 group contains the start / end character pointers, and the second is
384 the start pointers when the end of the capturing group has not yet reached. */
385 #define OVECTOR_START (8 * sizeof(sljit_w))
386 #define OVECTOR(i) (OVECTOR_START + (i) * sizeof(sljit_w))
387 #define OVECTOR_PRIV(i) (common->cbraptr + (i) * sizeof(sljit_w))
388 #define PRIV_DATA(cc) (common->localptrs[(cc) - common->start])
389
390 #ifdef COMPILE_PCRE8
391 #define MOV_UCHAR SLJIT_MOV_UB
392 #else
393 #ifdef COMPILE_PCRE16
394 #define MOV_UCHAR SLJIT_MOV_UH
395 #else
396 #error Unsupported compiling mode
397 #endif
398 #endif
399
400 /* Shortcuts. */
401 #define DEFINE_COMPILER \
402 struct sljit_compiler *compiler = common->compiler
403 #define OP1(op, dst, dstw, src, srcw) \
404 sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw))
405 #define OP2(op, dst, dstw, src1, src1w, src2, src2w) \
406 sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w))
407 #define LABEL() \
408 sljit_emit_label(compiler)
409 #define JUMP(type) \
410 sljit_emit_jump(compiler, (type))
411 #define JUMPTO(type, label) \
412 sljit_set_label(sljit_emit_jump(compiler, (type)), (label))
413 #define JUMPHERE(jump) \
414 sljit_set_label((jump), sljit_emit_label(compiler))
415 #define CMP(type, src1, src1w, src2, src2w) \
416 sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))
417 #define CMPTO(type, src1, src1w, src2, src2w, label) \
418 sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))
419 #define COND_VALUE(op, dst, dstw, type) \
420 sljit_emit_cond_value(compiler, (op), (dst), (dstw), (type))
421
422 static pcre_uchar* bracketend(pcre_uchar* cc)
423 {
424 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
425 do cc += GET(cc, 1); while (*cc == OP_ALT);
426 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
427 cc += 1 + LINK_SIZE;
428 return cc;
429 }
430
431 /* Functions whose might need modification for all new supported opcodes:
432 next_opcode
433 get_localspace
434 set_localptrs
435 get_framesize
436 init_frame
437 get_localsize
438 copy_locals
439 compile_hotpath
440 compile_fallbackpath
441 */
442
443 static pcre_uchar *next_opcode(compiler_common *common, pcre_uchar *cc)
444 {
445 SLJIT_UNUSED_ARG(common);
446 switch(*cc)
447 {
448 case OP_SOD:
449 case OP_SOM:
450 case OP_SET_SOM:
451 case OP_NOT_WORD_BOUNDARY:
452 case OP_WORD_BOUNDARY:
453 case OP_NOT_DIGIT:
454 case OP_DIGIT:
455 case OP_NOT_WHITESPACE:
456 case OP_WHITESPACE:
457 case OP_NOT_WORDCHAR:
458 case OP_WORDCHAR:
459 case OP_ANY:
460 case OP_ALLANY:
461 case OP_ANYNL:
462 case OP_NOT_HSPACE:
463 case OP_HSPACE:
464 case OP_NOT_VSPACE:
465 case OP_VSPACE:
466 case OP_EXTUNI:
467 case OP_EODN:
468 case OP_EOD:
469 case OP_CIRC:
470 case OP_CIRCM:
471 case OP_DOLL:
472 case OP_DOLLM:
473 case OP_TYPESTAR:
474 case OP_TYPEMINSTAR:
475 case OP_TYPEPLUS:
476 case OP_TYPEMINPLUS:
477 case OP_TYPEQUERY:
478 case OP_TYPEMINQUERY:
479 case OP_TYPEPOSSTAR:
480 case OP_TYPEPOSPLUS:
481 case OP_TYPEPOSQUERY:
482 case OP_CRSTAR:
483 case OP_CRMINSTAR:
484 case OP_CRPLUS:
485 case OP_CRMINPLUS:
486 case OP_CRQUERY:
487 case OP_CRMINQUERY:
488 case OP_DEF:
489 case OP_BRAZERO:
490 case OP_BRAMINZERO:
491 case OP_BRAPOSZERO:
492 case OP_FAIL:
493 case OP_ACCEPT:
494 case OP_ASSERT_ACCEPT:
495 case OP_SKIPZERO:
496 return cc + 1;
497
498 case OP_ANYBYTE:
499 #ifdef SUPPORT_UTF8
500 if (common->utf8) return NULL;
501 #endif
502 return cc + 1;
503
504 case OP_CHAR:
505 case OP_CHARI:
506 case OP_NOT:
507 case OP_NOTI:
508
509 case OP_STAR:
510 case OP_MINSTAR:
511 case OP_PLUS:
512 case OP_MINPLUS:
513 case OP_QUERY:
514 case OP_MINQUERY:
515 case OP_POSSTAR:
516 case OP_POSPLUS:
517 case OP_POSQUERY:
518 case OP_STARI:
519 case OP_MINSTARI:
520 case OP_PLUSI:
521 case OP_MINPLUSI:
522 case OP_QUERYI:
523 case OP_MINQUERYI:
524 case OP_POSSTARI:
525 case OP_POSPLUSI:
526 case OP_POSQUERYI:
527 case OP_NOTSTAR:
528 case OP_NOTMINSTAR:
529 case OP_NOTPLUS:
530 case OP_NOTMINPLUS:
531 case OP_NOTQUERY:
532 case OP_NOTMINQUERY:
533 case OP_NOTPOSSTAR:
534 case OP_NOTPOSPLUS:
535 case OP_NOTPOSQUERY:
536 case OP_NOTSTARI:
537 case OP_NOTMINSTARI:
538 case OP_NOTPLUSI:
539 case OP_NOTMINPLUSI:
540 case OP_NOTQUERYI:
541 case OP_NOTMINQUERYI:
542 case OP_NOTPOSSTARI:
543 case OP_NOTPOSPLUSI:
544 case OP_NOTPOSQUERYI:
545 cc += 2;
546 #ifdef SUPPORT_UTF8
547 if (common->utf8 && cc[-1] >= 0xc0) cc += PRIV(utf8_table4)[cc[-1] & 0x3f];
548 #endif
549 return cc;
550
551 case OP_UPTO:
552 case OP_MINUPTO:
553 case OP_EXACT:
554 case OP_POSUPTO:
555 case OP_UPTOI:
556 case OP_MINUPTOI:
557 case OP_EXACTI:
558 case OP_POSUPTOI:
559 case OP_NOTUPTO:
560 case OP_NOTMINUPTO:
561 case OP_NOTEXACT:
562 case OP_NOTPOSUPTO:
563 case OP_NOTUPTOI:
564 case OP_NOTMINUPTOI:
565 case OP_NOTEXACTI:
566 case OP_NOTPOSUPTOI:
567 cc += 2 + IMM2_SIZE;
568 #ifdef SUPPORT_UTF8
569 if (common->utf8 && cc[-1] >= 0xc0) cc += PRIV(utf8_table4)[cc[-1] & 0x3f];
570 #endif
571 return cc;
572
573 case OP_NOTPROP:
574 case OP_PROP:
575 case OP_TYPEUPTO:
576 case OP_TYPEMINUPTO:
577 case OP_TYPEEXACT:
578 case OP_TYPEPOSUPTO:
579 case OP_REF:
580 case OP_REFI:
581 case OP_CREF:
582 case OP_NCREF:
583 case OP_RREF:
584 case OP_NRREF:
585 case OP_CLOSE:
586 cc += 1 + IMM2_SIZE;
587 return cc;
588
589 case OP_CRRANGE:
590 case OP_CRMINRANGE:
591 return cc + 1 + 2 * IMM2_SIZE;
592
593 case OP_CLASS:
594 case OP_NCLASS:
595 return cc + 1 + 32 / sizeof(pcre_uchar);
596
597 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
598 case OP_XCLASS:
599 return cc + GET(cc, 1);
600 #endif
601
602 case OP_RECURSE:
603 case OP_ASSERT:
604 case OP_ASSERT_NOT:
605 case OP_ASSERTBACK:
606 case OP_ASSERTBACK_NOT:
607 case OP_REVERSE:
608 case OP_ONCE:
609 case OP_ONCE_NC:
610 case OP_BRA:
611 case OP_BRAPOS:
612 case OP_COND:
613 case OP_SBRA:
614 case OP_SBRAPOS:
615 case OP_SCOND:
616 case OP_ALT:
617 case OP_KET:
618 case OP_KETRMAX:
619 case OP_KETRMIN:
620 case OP_KETRPOS:
621 return cc + 1 + LINK_SIZE;
622
623 case OP_CBRA:
624 case OP_CBRAPOS:
625 case OP_SCBRA:
626 case OP_SCBRAPOS:
627 return cc + 1 + LINK_SIZE + IMM2_SIZE;
628
629 default:
630 return NULL;
631 }
632 }
633
634 static int get_localspace(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
635 {
636 int localspace = 0;
637 pcre_uchar *alternative;
638 /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
639 while (cc < ccend)
640 {
641 switch(*cc)
642 {
643 case OP_ASSERT:
644 case OP_ASSERT_NOT:
645 case OP_ASSERTBACK:
646 case OP_ASSERTBACK_NOT:
647 case OP_ONCE:
648 case OP_ONCE_NC:
649 case OP_BRAPOS:
650 case OP_SBRA:
651 case OP_SBRAPOS:
652 case OP_SCOND:
653 localspace += sizeof(sljit_w);
654 cc += 1 + LINK_SIZE;
655 break;
656
657 case OP_CBRAPOS:
658 case OP_SCBRAPOS:
659 localspace += sizeof(sljit_w);
660 cc += 1 + LINK_SIZE + IMM2_SIZE;
661 break;
662
663 case OP_COND:
664 /* Might be a hidden SCOND. */
665 alternative = cc + GET(cc, 1);
666 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
667 localspace += sizeof(sljit_w);
668 cc += 1 + LINK_SIZE;
669 break;
670
671 default:
672 cc = next_opcode(common, cc);
673 if (cc == NULL)
674 return -1;
675 break;
676 }
677 }
678 return localspace;
679 }
680
681 static void set_localptrs(compiler_common *common, int localptr, pcre_uchar *ccend)
682 {
683 pcre_uchar *cc = common->start;
684 pcre_uchar *alternative;
685 while (cc < ccend)
686 {
687 switch(*cc)
688 {
689 case OP_ASSERT:
690 case OP_ASSERT_NOT:
691 case OP_ASSERTBACK:
692 case OP_ASSERTBACK_NOT:
693 case OP_ONCE:
694 case OP_ONCE_NC:
695 case OP_BRAPOS:
696 case OP_SBRA:
697 case OP_SBRAPOS:
698 case OP_SCOND:
699 common->localptrs[cc - common->start] = localptr;
700 localptr += sizeof(sljit_w);
701 cc += 1 + LINK_SIZE;
702 break;
703
704 case OP_CBRAPOS:
705 case OP_SCBRAPOS:
706 common->localptrs[cc - common->start] = localptr;
707 localptr += sizeof(sljit_w);
708 cc += 1 + LINK_SIZE + IMM2_SIZE;
709 break;
710
711 case OP_COND:
712 /* Might be a hidden SCOND. */
713 alternative = cc + GET(cc, 1);
714 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
715 {
716 common->localptrs[cc - common->start] = localptr;
717 localptr += sizeof(sljit_w);
718 }
719 cc += 1 + LINK_SIZE;
720 break;
721
722 default:
723 cc = next_opcode(common, cc);
724 SLJIT_ASSERT(cc != NULL);
725 break;
726 }
727 }
728 }
729
730 /* Returns with -1 if no need for frame. */
731 static int get_framesize(compiler_common *common, pcre_uchar *cc, BOOL recursive)
732 {
733 pcre_uchar *ccend = bracketend(cc);
734 int length = 0;
735 BOOL possessive = FALSE;
736 BOOL setsom_found = FALSE;
737
738 if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))
739 {
740 length = 3;
741 possessive = TRUE;
742 }
743
744 cc = next_opcode(common, cc);
745 SLJIT_ASSERT(cc != NULL);
746 while (cc < ccend)
747 switch(*cc)
748 {
749 case OP_SET_SOM:
750 case OP_RECURSE:
751 if (!setsom_found)
752 {
753 length += 2;
754 setsom_found = TRUE;
755 }
756 cc += (*cc == OP_SET_SOM) ? 1 : 1 + LINK_SIZE;
757 break;
758
759 case OP_CBRA:
760 case OP_CBRAPOS:
761 case OP_SCBRA:
762 case OP_SCBRAPOS:
763 length += 3;
764 cc += 1 + LINK_SIZE + IMM2_SIZE;
765 break;
766
767 default:
768 cc = next_opcode(common, cc);
769 SLJIT_ASSERT(cc != NULL);
770 break;
771 }
772
773 /* Possessive quantifiers can use a special case. */
774 if (SLJIT_UNLIKELY(possessive) && length == 3)
775 return -1;
776
777 if (length > 0)
778 return length + 1;
779 return -1;
780 }
781
782 static void init_frame(compiler_common *common, pcre_uchar *cc, int stackpos, int stacktop, BOOL recursive)
783 {
784 DEFINE_COMPILER;
785 pcre_uchar *ccend = bracketend(cc);
786 BOOL setsom_found = FALSE;
787 int offset;
788
789 /* >= 1 + shortest item size (2) */
790 SLJIT_ASSERT(stackpos >= stacktop + 2);
791
792 stackpos = STACK(stackpos);
793 if (recursive || (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS))
794 cc = next_opcode(common, cc);
795 SLJIT_ASSERT(cc != NULL);
796 while (cc < ccend)
797 switch(*cc)
798 {
799 case OP_SET_SOM:
800 case OP_RECURSE:
801 if (!setsom_found)
802 {
803 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
804 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_setstrbegin);
805 stackpos += (int)sizeof(sljit_w);
806 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
807 stackpos += (int)sizeof(sljit_w);
808 setsom_found = TRUE;
809 }
810 cc += (*cc == OP_SET_SOM) ? 1 : 1 + LINK_SIZE;
811 break;
812
813 case OP_CBRA:
814 case OP_CBRAPOS:
815 case OP_SCBRA:
816 case OP_SCBRAPOS:
817 offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
818 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));
819 stackpos += (int)sizeof(sljit_w);
820 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
821 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
822 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
823 stackpos += (int)sizeof(sljit_w);
824 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
825 stackpos += (int)sizeof(sljit_w);
826
827 cc += 1 + LINK_SIZE + IMM2_SIZE;
828 break;
829
830 default:
831 cc = next_opcode(common, cc);
832 SLJIT_ASSERT(cc != NULL);
833 break;
834 }
835
836 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_end);
837 SLJIT_ASSERT(stackpos == STACK(stacktop));
838 }
839
840 static SLJIT_INLINE int get_localsize(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
841 {
842 int localsize = 2;
843 pcre_uchar *alternative;
844 /* Calculate the sum of the local variables. */
845 while (cc < ccend)
846 {
847 switch(*cc)
848 {
849 case OP_ASSERT:
850 case OP_ASSERT_NOT:
851 case OP_ASSERTBACK:
852 case OP_ASSERTBACK_NOT:
853 case OP_ONCE:
854 case OP_ONCE_NC:
855 case OP_BRAPOS:
856 case OP_SBRA:
857 case OP_SBRAPOS:
858 case OP_SCOND:
859 localsize++;
860 cc += 1 + LINK_SIZE;
861 break;
862
863 case OP_CBRA:
864 case OP_SCBRA:
865 localsize++;
866 cc += 1 + LINK_SIZE + IMM2_SIZE;
867 break;
868
869 case OP_CBRAPOS:
870 case OP_SCBRAPOS:
871 localsize += 2;
872 cc += 1 + LINK_SIZE + IMM2_SIZE;
873 break;
874
875 case OP_COND:
876 /* Might be a hidden SCOND. */
877 alternative = cc + GET(cc, 1);
878 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
879 localsize++;
880 cc += 1 + LINK_SIZE;
881 break;
882
883 default:
884 cc = next_opcode(common, cc);
885 SLJIT_ASSERT(cc != NULL);
886 break;
887 }
888 }
889 SLJIT_ASSERT(cc == ccend);
890 return localsize;
891 }
892
893 static void copy_locals(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend,
894 BOOL save, int stackptr, int stacktop)
895 {
896 DEFINE_COMPILER;
897 int srcw[2];
898 int count;
899 BOOL tmp1next = TRUE;
900 BOOL tmp1empty = TRUE;
901 BOOL tmp2empty = TRUE;
902 pcre_uchar *alternative;
903 enum {
904 start,
905 loop,
906 end
907 } status;
908
909 status = save ? start : loop;
910 stackptr = STACK(stackptr - 2);
911 stacktop = STACK(stacktop - 1);
912
913 if (!save)
914 {
915 stackptr += sizeof(sljit_w);
916 if (stackptr < stacktop)
917 {
918 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
919 stackptr += sizeof(sljit_w);
920 tmp1empty = FALSE;
921 }
922 if (stackptr < stacktop)
923 {
924 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
925 stackptr += sizeof(sljit_w);
926 tmp2empty = FALSE;
927 }
928 /* The tmp1next must be TRUE in either way. */
929 }
930
931 while (status != end)
932 {
933 count = 0;
934 switch(status)
935 {
936 case start:
937 SLJIT_ASSERT(save);
938 count = 1;
939 srcw[0] = RECURSIVE_HEAD;
940 status = loop;
941 break;
942
943 case loop:
944 if (cc >= ccend)
945 {
946 status = end;
947 break;
948 }
949
950 switch(*cc)
951 {
952 case OP_ASSERT:
953 case OP_ASSERT_NOT:
954 case OP_ASSERTBACK:
955 case OP_ASSERTBACK_NOT:
956 case OP_ONCE:
957 case OP_ONCE_NC:
958 case OP_BRAPOS:
959 case OP_SBRA:
960 case OP_SBRAPOS:
961 case OP_SCOND:
962 count = 1;
963 srcw[0] = PRIV_DATA(cc);
964 SLJIT_ASSERT(srcw[0] != 0);
965 cc += 1 + LINK_SIZE;
966 break;
967
968 case OP_CBRA:
969 case OP_SCBRA:
970 count = 1;
971 srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
972 cc += 1 + LINK_SIZE + IMM2_SIZE;
973 break;
974
975 case OP_CBRAPOS:
976 case OP_SCBRAPOS:
977 count = 2;
978 srcw[1] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
979 srcw[0] = PRIV_DATA(cc);
980 SLJIT_ASSERT(srcw[0] != 0);
981 cc += 1 + LINK_SIZE + IMM2_SIZE;
982 break;
983
984 case OP_COND:
985 /* Might be a hidden SCOND. */
986 alternative = cc + GET(cc, 1);
987 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
988 {
989 count = 1;
990 srcw[0] = PRIV_DATA(cc);
991 SLJIT_ASSERT(srcw[0] != 0);
992 }
993 cc += 1 + LINK_SIZE;
994 break;
995
996 default:
997 cc = next_opcode(common, cc);
998 SLJIT_ASSERT(cc != NULL);
999 break;
1000 }
1001 break;
1002
1003 case end:
1004 SLJIT_ASSERT_STOP();
1005 break;
1006 }
1007
1008 while (count > 0)
1009 {
1010 count--;
1011 if (save)
1012 {
1013 if (tmp1next)
1014 {
1015 if (!tmp1empty)
1016 {
1017 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1018 stackptr += sizeof(sljit_w);
1019 }
1020 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);
1021 tmp1empty = FALSE;
1022 tmp1next = FALSE;
1023 }
1024 else
1025 {
1026 if (!tmp2empty)
1027 {
1028 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1029 stackptr += sizeof(sljit_w);
1030 }
1031 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);
1032 tmp2empty = FALSE;
1033 tmp1next = TRUE;
1034 }
1035 }
1036 else
1037 {
1038 if (tmp1next)
1039 {
1040 SLJIT_ASSERT(!tmp1empty);
1041 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP1, 0);
1042 tmp1empty = stackptr >= stacktop;
1043 if (!tmp1empty)
1044 {
1045 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1046 stackptr += sizeof(sljit_w);
1047 }
1048 tmp1next = FALSE;
1049 }
1050 else
1051 {
1052 SLJIT_ASSERT(!tmp2empty);
1053 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP2, 0);
1054 tmp2empty = stackptr >= stacktop;
1055 if (!tmp2empty)
1056 {
1057 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1058 stackptr += sizeof(sljit_w);
1059 }
1060 tmp1next = TRUE;
1061 }
1062 }
1063 }
1064 }
1065
1066 if (save)
1067 {
1068 if (tmp1next)
1069 {
1070 if (!tmp1empty)
1071 {
1072 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1073 stackptr += sizeof(sljit_w);
1074 }
1075 if (!tmp2empty)
1076 {
1077 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1078 stackptr += sizeof(sljit_w);
1079 }
1080 }
1081 else
1082 {
1083 if (!tmp2empty)
1084 {
1085 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1086 stackptr += sizeof(sljit_w);
1087 }
1088 if (!tmp1empty)
1089 {
1090 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1091 stackptr += sizeof(sljit_w);
1092 }
1093 }
1094 }
1095 SLJIT_ASSERT(cc == ccend && stackptr == stacktop && (save || (tmp1empty && tmp2empty)));
1096 }
1097
1098 static SLJIT_INLINE BOOL ispowerof2(unsigned int value)
1099 {
1100 return (value & (value - 1)) == 0;
1101 }
1102
1103 static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label)
1104 {
1105 while (list)
1106 {
1107 /* sljit_set_label is clever enough to do nothing
1108 if either the jump or the label is NULL */
1109 sljit_set_label(list->jump, label);
1110 list = list->next;
1111 }
1112 }
1113
1114 static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump* jump)
1115 {
1116 jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list));
1117 if (list_item)
1118 {
1119 list_item->next = *list;
1120 list_item->jump = jump;
1121 *list = list_item;
1122 }
1123 }
1124
1125 static void add_stub(compiler_common *common, enum stub_types type, int data, struct sljit_jump *start)
1126 {
1127 DEFINE_COMPILER;
1128 stub_list* list_item = sljit_alloc_memory(compiler, sizeof(stub_list));
1129
1130 if (list_item)
1131 {
1132 list_item->type = type;
1133 list_item->data = data;
1134 list_item->start = start;
1135 list_item->leave = LABEL();
1136 list_item->next = common->stubs;
1137 common->stubs = list_item;
1138 }
1139 }
1140
1141 static void flush_stubs(compiler_common *common)
1142 {
1143 DEFINE_COMPILER;
1144 stub_list* list_item = common->stubs;
1145
1146 while (list_item)
1147 {
1148 JUMPHERE(list_item->start);
1149 switch(list_item->type)
1150 {
1151 case stack_alloc:
1152 add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));
1153 break;
1154 }
1155 JUMPTO(SLJIT_JUMP, list_item->leave);
1156 list_item = list_item->next;
1157 }
1158 common->stubs = NULL;
1159 }
1160
1161 static SLJIT_INLINE void decrease_call_count(compiler_common *common)
1162 {
1163 DEFINE_COMPILER;
1164
1165 OP2(SLJIT_SUB | SLJIT_SET_E, CALL_COUNT, 0, CALL_COUNT, 0, SLJIT_IMM, 1);
1166 add_jump(compiler, &common->calllimit, JUMP(SLJIT_C_ZERO));
1167 }
1168
1169 static SLJIT_INLINE void allocate_stack(compiler_common *common, int size)
1170 {
1171 /* May destroy all locals and registers except TMP2. */
1172 DEFINE_COMPILER;
1173
1174 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_w));
1175 #ifdef DESTROY_REGISTERS
1176 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);
1177 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
1178 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
1179 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, TMP1, 0);
1180 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);
1181 #endif
1182 add_stub(common, stack_alloc, 0, CMP(SLJIT_C_GREATER, STACK_TOP, 0, STACK_LIMIT, 0));
1183 }
1184
1185 static SLJIT_INLINE void free_stack(compiler_common *common, int size)
1186 {
1187 DEFINE_COMPILER;
1188 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_w));
1189 }
1190
1191 static SLJIT_INLINE void reset_ovector(compiler_common *common, int length)
1192 {
1193 DEFINE_COMPILER;
1194 struct sljit_label *loop;
1195 int i;
1196 /* At this point we can freely use all temporary registers. */
1197 /* TMP1 returns with begin - 1. */
1198 OP2(SLJIT_SUB, SLJIT_TEMPORARY_REG1, 0, SLJIT_MEM1(SLJIT_GENERAL_REG1), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));
1199 if (length < 8)
1200 {
1201 for (i = 0; i < length; i++)
1202 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(i), SLJIT_TEMPORARY_REG1, 0);
1203 }
1204 else
1205 {
1206 OP2(SLJIT_ADD, SLJIT_TEMPORARY_REG2, 0, SLJIT_LOCALS_REG, 0, SLJIT_IMM, OVECTOR_START - sizeof(sljit_w));
1207 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, length);
1208 loop = LABEL();
1209 OP1(SLJIT_MOVU, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), sizeof(sljit_w), SLJIT_TEMPORARY_REG1, 0);
1210 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_TEMPORARY_REG3, 0, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, 1);
1211 JUMPTO(SLJIT_C_NOT_ZERO, loop);
1212 }
1213 }
1214
1215 static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)
1216 {
1217 DEFINE_COMPILER;
1218 struct sljit_label *loop;
1219 struct sljit_jump *earlyexit;
1220
1221 /* At this point we can freely use all registers. */
1222 OP1(SLJIT_MOV, SLJIT_GENERAL_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
1223 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1), STR_PTR, 0);
1224
1225 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG1, 0, ARGUMENTS, 0);
1226 OP1(SLJIT_MOV_SI, SLJIT_TEMPORARY_REG2, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), SLJIT_OFFSETOF(jit_arguments, offsetcount));
1227 OP2(SLJIT_SUB, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), SLJIT_OFFSETOF(jit_arguments, offsets), SLJIT_IMM, sizeof(int));
1228 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG1, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), SLJIT_OFFSETOF(jit_arguments, begin));
1229 OP2(SLJIT_ADD, SLJIT_GENERAL_REG1, 0, SLJIT_LOCALS_REG, 0, SLJIT_IMM, OVECTOR_START);
1230 /* Unlikely, but possible */
1231 earlyexit = CMP(SLJIT_C_EQUAL, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, 0);
1232 loop = LABEL();
1233 OP2(SLJIT_SUB, SLJIT_GENERAL_REG2, 0, SLJIT_MEM1(SLJIT_GENERAL_REG1), 0, SLJIT_TEMPORARY_REG1, 0);
1234 OP2(SLJIT_ADD, SLJIT_GENERAL_REG1, 0, SLJIT_GENERAL_REG1, 0, SLJIT_IMM, sizeof(sljit_w));
1235 /* Copy the integer value to the output buffer */
1236 #ifdef COMPILE_PCRE16
1237 OP2(SLJIT_LSHR, SLJIT_GENERAL_REG2, 0, SLJIT_GENERAL_REG2, 0, SLJIT_IMM, 1);
1238 #endif
1239 OP1(SLJIT_MOVU_SI, SLJIT_MEM1(SLJIT_TEMPORARY_REG3), sizeof(int), SLJIT_GENERAL_REG2, 0);
1240 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_TEMPORARY_REG2, 0, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, 1);
1241 JUMPTO(SLJIT_C_NOT_ZERO, loop);
1242 JUMPHERE(earlyexit);
1243
1244 /* Calculate the return value, which is the maximum ovector value. */
1245 if (topbracket > 1)
1246 {
1247 OP2(SLJIT_ADD, SLJIT_TEMPORARY_REG1, 0, SLJIT_LOCALS_REG, 0, SLJIT_IMM, OVECTOR_START + topbracket * 2 * sizeof(sljit_w));
1248 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, topbracket + 1);
1249
1250 /* OVECTOR(0) is never equal to SLJIT_GENERAL_REG3. */
1251 loop = LABEL();
1252 OP1(SLJIT_MOVU, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), -(2 * (sljit_w)sizeof(sljit_w)));
1253 OP2(SLJIT_SUB, SLJIT_TEMPORARY_REG2, 0, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, 1);
1254 CMPTO(SLJIT_C_EQUAL, SLJIT_TEMPORARY_REG3, 0, SLJIT_GENERAL_REG3, 0, loop);
1255 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_TEMPORARY_REG2, 0);
1256 }
1257 else
1258 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
1259 }
1260
1261 static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, pcre_uchar* cc)
1262 {
1263 /* Detects if the character has an othercase. */
1264 unsigned int c;
1265
1266 #ifdef SUPPORT_UTF8
1267 if (common->utf8)
1268 {
1269 GETCHAR(c, cc);
1270 if (c > 127)
1271 {
1272 #ifdef SUPPORT_UCP
1273 return c != UCD_OTHERCASE(c);
1274 #else
1275 return FALSE;
1276 #endif
1277 }
1278 }
1279 else
1280 #endif
1281 c = *cc;
1282 return common->fcc[c] != c;
1283 }
1284
1285 static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)
1286 {
1287 /* Returns with the othercase. */
1288 #ifdef SUPPORT_UTF8
1289 if (common->utf8 && c > 127)
1290 {
1291 #ifdef SUPPORT_UCP
1292 return UCD_OTHERCASE(c);
1293 #else
1294 return c;
1295 #endif
1296 }
1297 #endif
1298 return common->fcc[c];
1299 }
1300
1301 static unsigned int char_get_othercase_bit(compiler_common *common, pcre_uchar* cc)
1302 {
1303 /* Detects if the character and its othercase has only 1 bit difference. */
1304 unsigned int c, oc, bit;
1305 #ifdef SUPPORT_UTF8
1306 int n;
1307 #endif
1308
1309 #ifdef SUPPORT_UTF8
1310 if (common->utf8)
1311 {
1312 GETCHAR(c, cc);
1313 if (c <= 127)
1314 oc = common->fcc[c];
1315 else
1316 {
1317 #ifdef SUPPORT_UCP
1318 oc = UCD_OTHERCASE(c);
1319 #else
1320 oc = c;
1321 #endif
1322 }
1323 }
1324 else
1325 {
1326 c = *cc;
1327 oc = common->fcc[c];
1328 }
1329 #else
1330 c = *cc;
1331 oc = common->fcc[c];
1332 #endif
1333
1334 SLJIT_ASSERT(c != oc);
1335
1336 bit = c ^ oc;
1337 /* Optimized for English alphabet. */
1338 if (c <= 127 && bit == 0x20)
1339 return (0 << 8) | 0x20;
1340
1341 /* Since c != oc, they must have at least 1 bit difference. */
1342 if (!ispowerof2(bit))
1343 return 0;
1344
1345 #ifdef SUPPORT_UTF8
1346 if (common->utf8 && c > 127)
1347 {
1348 n = PRIV(utf8_table4)[*cc & 0x3f];
1349 while ((bit & 0x3f) == 0)
1350 {
1351 n--;
1352 bit >>= 6;
1353 }
1354 return (n << 8) | bit;
1355 }
1356 #endif
1357 return (0 << 8) | bit;
1358 }
1359
1360 static SLJIT_INLINE void check_input_end(compiler_common *common, jump_list **fallbacks)
1361 {
1362 DEFINE_COMPILER;
1363 add_jump(compiler, fallbacks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
1364 }
1365
1366 static void read_char(compiler_common *common)
1367 {
1368 /* Reads the character into TMP1, updates STR_PTR.
1369 Does not check STR_END. TMP2 Destroyed. */
1370 DEFINE_COMPILER;
1371 #ifdef SUPPORT_UTF8
1372 struct sljit_jump *jump;
1373 #endif
1374
1375 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
1376 #ifdef SUPPORT_UTF8
1377 if (common->utf8)
1378 {
1379 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
1380 add_jump(compiler, &common->utf8readchar, JUMP(SLJIT_FAST_CALL));
1381 JUMPHERE(jump);
1382 }
1383 #endif
1384 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1385 }
1386
1387 static void peek_char(compiler_common *common)
1388 {
1389 /* Reads the character into TMP1, keeps STR_PTR.
1390 Does not check STR_END. TMP2 Destroyed. */
1391 DEFINE_COMPILER;
1392 #ifdef SUPPORT_UTF8
1393 struct sljit_jump *jump;
1394 #endif
1395
1396 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
1397 #ifdef SUPPORT_UTF8
1398 if (common->utf8)
1399 {
1400 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
1401 add_jump(compiler, &common->utf8readchar, JUMP(SLJIT_FAST_CALL));
1402 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
1403 JUMPHERE(jump);
1404 }
1405 #endif
1406 }
1407
1408 static void read_char8_type(compiler_common *common)
1409 {
1410 /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */
1411 DEFINE_COMPILER;
1412 #ifdef SUPPORT_UTF8
1413 struct sljit_jump *jump;
1414 #endif
1415
1416 #ifdef SUPPORT_UTF8
1417 if (common->utf8)
1418 {
1419 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
1420 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);
1421 /* This can be an extra read in some situations, but hopefully
1422 it is a clever early read in most cases. */
1423 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
1424 jump = CMP(SLJIT_C_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
1425 add_jump(compiler, &common->utf8readtype8, JUMP(SLJIT_FAST_CALL));
1426 JUMPHERE(jump);
1427 return;
1428 }
1429 #endif
1430 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
1431 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1432 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);
1433 }
1434
1435 static void skip_char_back(compiler_common *common)
1436 {
1437 /* Goes one character back. Only affects STR_PTR. Does not check begin. */
1438 DEFINE_COMPILER;
1439 #ifdef SUPPORT_UTF8
1440 struct sljit_label *label;
1441
1442 if (common->utf8)
1443 {
1444 label = LABEL();
1445 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);
1446 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
1447 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
1448 CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);
1449 return;
1450 }
1451 #endif
1452 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1453 }
1454
1455 static void check_newlinechar(compiler_common *common, int nltype, jump_list **fallbacks, BOOL jumpiftrue)
1456 {
1457 /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */
1458 DEFINE_COMPILER;
1459
1460 if (nltype == NLTYPE_ANY)
1461 {
1462 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
1463 add_jump(compiler, fallbacks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
1464 }
1465 else if (nltype == NLTYPE_ANYCRLF)
1466 {
1467 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_CR);
1468 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
1469 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
1470 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
1471 add_jump(compiler, fallbacks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
1472 }
1473 else
1474 {
1475 SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline <= 255);
1476 add_jump(compiler, fallbacks, CMP(jumpiftrue ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
1477 }
1478 }
1479
1480 #ifdef SUPPORT_UTF8
1481 static void do_utf8readchar(compiler_common *common)
1482 {
1483 /* Fast decoding an utf8 character. TMP1 contains the first byte
1484 of the character (>= 0xc0). Return char value in TMP1, length - 1 in TMP2. */
1485 DEFINE_COMPILER;
1486 struct sljit_jump *jump;
1487
1488 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);
1489 /* Searching for the first zero. */
1490 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
1491 jump = JUMP(SLJIT_C_NOT_ZERO);
1492 /* 2 byte sequence */
1493 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(STR_PTR), 1);
1494 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);
1495 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1f);
1496 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
1497 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
1498 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
1499 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
1500 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1501 JUMPHERE(jump);
1502
1503 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10);
1504 jump = JUMP(SLJIT_C_NOT_ZERO);
1505 /* 3 byte sequence */
1506 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(STR_PTR), 1);
1507 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0f);
1508 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 12);
1509 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
1510 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
1511 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
1512 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(STR_PTR), 2);
1513 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 2);
1514 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
1515 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
1516 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 2);
1517 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1518 JUMPHERE(jump);
1519
1520 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x08);
1521 jump = JUMP(SLJIT_C_NOT_ZERO);
1522 /* 4 byte sequence */
1523 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(STR_PTR), 1);
1524 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x07);
1525 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 18);
1526 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
1527 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12);
1528 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
1529 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(STR_PTR), 2);
1530 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
1531 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
1532 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
1533 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(STR_PTR), 3);
1534 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 3);
1535 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
1536 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
1537 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 3);
1538 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1539 JUMPHERE(jump);
1540
1541 /* 5 byte sequence */
1542 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(STR_PTR), 1);
1543 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x03);
1544 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 24);
1545 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
1546 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 18);
1547 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
1548 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(STR_PTR), 2);
1549 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
1550 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12);
1551 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
1552 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(STR_PTR), 3);
1553 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
1554 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
1555 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
1556 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(STR_PTR), 4);
1557 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 4);
1558 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
1559 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
1560 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 4);
1561 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1562 }
1563
1564 static void do_utf8readtype8(compiler_common *common)
1565 {
1566 /* Fast decoding an utf8 character type. TMP2 contains the first byte
1567 of the character (>= 0xc0) and TMP1 is destroyed. Return value in TMP1. */
1568 DEFINE_COMPILER;
1569 struct sljit_jump *jump;
1570 struct sljit_jump *compare;
1571
1572 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);
1573
1574 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20);
1575 jump = JUMP(SLJIT_C_NOT_ZERO);
1576 /* 2 byte sequence */
1577 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
1578 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);
1579 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
1580 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
1581 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
1582 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
1583 compare = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
1584 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
1585 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1586
1587 JUMPHERE(compare);
1588 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
1589 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1590 JUMPHERE(jump);
1591
1592 /* We only have types for characters less than 256. */
1593 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_w)PRIV(utf8_table4) - 0xc0);
1594 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
1595 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
1596 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1597 }
1598
1599 #endif
1600
1601 #ifdef SUPPORT_UCP
1602
1603 /* UCD_BLOCK_SIZE must be 128 (see the assert below). */
1604 #define UCD_BLOCK_MASK 127
1605 #define UCD_BLOCK_SHIFT 7
1606
1607 static void do_getucd(compiler_common *common)
1608 {
1609 /* Search the UCD record for the character comes in TMP1.
1610 Returns chartype in TMP1 and UCD offset in TMP2. */
1611 DEFINE_COMPILER;
1612
1613 SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 8);
1614
1615 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);
1616 OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
1617 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_w)PRIV(ucd_stage1));
1618 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
1619 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
1620 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
1621 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_w)PRIV(ucd_stage2));
1622 OP1(SLJIT_MOV_UH, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
1623 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_w)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
1624 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
1625 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1626 }
1627 #endif
1628
1629 static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common, BOOL hascrorlf, BOOL firstline)
1630 {
1631 DEFINE_COMPILER;
1632 struct sljit_label *mainloop;
1633 struct sljit_label *newlinelabel = NULL;
1634 struct sljit_jump *start;
1635 struct sljit_jump *end = NULL;
1636 struct sljit_jump *nl = NULL;
1637 #ifdef SUPPORT_UTF8
1638 struct sljit_jump *singlebyte;
1639 #endif
1640 jump_list *newline = NULL;
1641 BOOL newlinecheck = FALSE;
1642 BOOL readuchar = FALSE;
1643
1644 if (!(hascrorlf || firstline) && (common->nltype == NLTYPE_ANY ||
1645 common->nltype == NLTYPE_ANYCRLF || common->newline > 255))
1646 newlinecheck = TRUE;
1647
1648 if (firstline)
1649 {
1650 /* Search for the end of the first line. */
1651 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STR_PTR, 0);
1652 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), FIRSTLINE_END, STR_END, 0);
1653
1654 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
1655 {
1656 mainloop = LABEL();
1657 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1658 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
1659 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
1660 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
1661 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);
1662 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);
1663 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), FIRSTLINE_END, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1664 }
1665 else
1666 {
1667 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
1668 mainloop = LABEL();
1669 /* Continual stores does not cause data dependency. */
1670 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), FIRSTLINE_END, STR_PTR, 0);
1671 read_char(common);
1672 check_newlinechar(common, common->nltype, &newline, TRUE);
1673 CMPTO(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0, mainloop);
1674 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), FIRSTLINE_END, STR_PTR, 0);
1675 set_jumps(newline, LABEL());
1676 }
1677
1678 JUMPHERE(end);
1679 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
1680 }
1681
1682 start = JUMP(SLJIT_JUMP);
1683
1684 if (newlinecheck)
1685 {
1686 newlinelabel = LABEL();
1687 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1688 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
1689 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
1690 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
1691 COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
1692 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
1693 nl = JUMP(SLJIT_JUMP);
1694 }
1695
1696 mainloop = LABEL();
1697
1698 /* Increasing the STR_PTR here requires one less jump in the most common case. */
1699 #ifdef SUPPORT_UTF8
1700 if (common->utf8) readuchar = TRUE;
1701 #endif
1702 if (newlinecheck) readuchar = TRUE;
1703
1704 if (readuchar)
1705 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
1706
1707 if (newlinecheck)
1708 CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);
1709
1710 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1711 #ifdef SUPPORT_UTF8
1712 if (common->utf8)
1713 {
1714 singlebyte = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
1715 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0);
1716 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
1717 JUMPHERE(singlebyte);
1718 }
1719 #endif
1720 JUMPHERE(start);
1721
1722 if (newlinecheck)
1723 {
1724 JUMPHERE(end);
1725 JUMPHERE(nl);
1726 }
1727
1728 return mainloop;
1729 }
1730
1731 static SLJIT_INLINE void fast_forward_first_byte(compiler_common *common, pcre_uint16 firstbyte, BOOL firstline)
1732 {
1733 DEFINE_COMPILER;
1734 struct sljit_label *start;
1735 struct sljit_jump *leave;
1736 struct sljit_jump *found;
1737 pcre_uint16 oc, bit;
1738
1739 if (firstline)
1740 {
1741 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, STR_END, 0);
1742 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), FIRSTLINE_END);
1743 }
1744
1745 start = LABEL();
1746 leave = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
1747 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
1748
1749 if ((firstbyte & REQ_CASELESS) == 0)
1750 found = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, firstbyte & 0xff);
1751 else
1752 {
1753 firstbyte &= 0xff;
1754 oc = common->fcc[firstbyte];
1755 bit = firstbyte ^ oc;
1756 if (ispowerof2(bit))
1757 {
1758 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, bit);
1759 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, firstbyte | bit);
1760 }
1761 else
1762 {
1763 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, firstbyte);
1764 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
1765 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);
1766 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
1767 found = JUMP(SLJIT_C_NOT_ZERO);
1768 }
1769 }
1770
1771 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1772 #ifdef SUPPORT_UTF8
1773 if (common->utf8)
1774 {
1775 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start);
1776 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0);
1777 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
1778 }
1779 #endif
1780 JUMPTO(SLJIT_JUMP, start);
1781 JUMPHERE(found);
1782 JUMPHERE(leave);
1783
1784 if (firstline)
1785 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
1786 }
1787
1788 static SLJIT_INLINE void fast_forward_newline(compiler_common *common, BOOL firstline)
1789 {
1790 DEFINE_COMPILER;
1791 struct sljit_label *loop;
1792 struct sljit_jump *lastchar;
1793 struct sljit_jump *firstchar;
1794 struct sljit_jump *leave;
1795 struct sljit_jump *foundcr = NULL;
1796 struct sljit_jump *notfoundnl;
1797 jump_list *newline = NULL;
1798
1799 if (firstline)
1800 {
1801 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, STR_END, 0);
1802 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), FIRSTLINE_END);
1803 }
1804
1805 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
1806 {
1807 lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
1808 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
1809 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
1810 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
1811 firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
1812
1813 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2);
1814 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0);
1815 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_GREATER_EQUAL);
1816 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
1817
1818 loop = LABEL();
1819 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);
1820 leave = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
1821 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), -2);
1822 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(STR_PTR), -1);
1823 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop);
1824 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop);
1825
1826 JUMPHERE(leave);
1827 JUMPHERE(firstchar);
1828 JUMPHERE(lastchar);
1829
1830 if (firstline)
1831 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
1832 return;
1833 }
1834
1835 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
1836 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
1837 firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
1838 skip_char_back(common);
1839
1840 loop = LABEL();
1841 read_char(common);
1842 lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
1843 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
1844 foundcr = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
1845 check_newlinechar(common, common->nltype, &newline, FALSE);
1846 set_jumps(newline, loop);
1847
1848 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
1849 {
1850 leave = JUMP(SLJIT_JUMP);
1851 JUMPHERE(foundcr);
1852 notfoundnl = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
1853 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
1854 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
1855 COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
1856 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
1857 JUMPHERE(notfoundnl);
1858 JUMPHERE(leave);
1859 }
1860 JUMPHERE(lastchar);
1861 JUMPHERE(firstchar);
1862
1863 if (firstline)
1864 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
1865 }
1866
1867 static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, sljit_uw start_bits, BOOL firstline)
1868 {
1869 DEFINE_COMPILER;
1870 struct sljit_label *start;
1871 struct sljit_jump *leave;
1872 struct sljit_jump *found;
1873
1874 if (firstline)
1875 {
1876 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, STR_END, 0);
1877 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), FIRSTLINE_END);
1878 }
1879
1880 start = LABEL();
1881 leave = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
1882 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
1883 #ifdef SUPPORT_UTF
1884 if (common->utf8)
1885 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
1886 #endif
1887 #ifndef COMPILE_PCRE8
1888 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xff);
1889 #endif
1890 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
1891 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
1892 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), start_bits);
1893 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
1894 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
1895 found = JUMP(SLJIT_C_NOT_ZERO);
1896
1897 #ifdef SUPPORT_UTF
1898 if (common->utf8)
1899 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
1900 #endif
1901 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1902 #ifdef SUPPORT_UTF8
1903 if (common->utf8)
1904 {
1905 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start);
1906 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0);
1907 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
1908 }
1909 #endif
1910 JUMPTO(SLJIT_JUMP, start);
1911 JUMPHERE(found);
1912 JUMPHERE(leave);
1913
1914 if (firstline)
1915 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
1916 }
1917
1918 static SLJIT_INLINE struct sljit_jump *search_requested_char(compiler_common *common, pcre_uint16 reqbyte, BOOL has_firstbyte)
1919 {
1920 DEFINE_COMPILER;
1921 struct sljit_label *loop;
1922 struct sljit_jump *toolong;
1923 struct sljit_jump *alreadyfound;
1924 struct sljit_jump *found;
1925 struct sljit_jump *foundoc = NULL;
1926 struct sljit_jump *notfound;
1927 pcre_uint16 oc, bit;
1928
1929 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), REQ_BYTE_PTR);
1930 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, REQ_BYTE_MAX);
1931 toolong = CMP(SLJIT_C_LESS, TMP1, 0, STR_END, 0);
1932 alreadyfound = CMP(SLJIT_C_LESS, STR_PTR, 0, TMP2, 0);
1933
1934 if (has_firstbyte)
1935 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, 1);
1936 else
1937 OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);
1938
1939 loop = LABEL();
1940 notfound = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, STR_END, 0);
1941
1942 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), 0);
1943 if ((reqbyte & REQ_CASELESS) == 0)
1944 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, reqbyte & 0xff);
1945 else
1946 {
1947 reqbyte &= 0xff;
1948 oc = common->fcc[reqbyte];
1949 bit = reqbyte ^ oc;
1950 if (ispowerof2(bit))
1951 {
1952 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);
1953 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, reqbyte | bit);
1954 }
1955 else
1956 {
1957 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, reqbyte);
1958 foundoc = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, oc);
1959 }
1960 }
1961 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
1962 JUMPTO(SLJIT_JUMP, loop);
1963
1964 JUMPHERE(found);
1965 if (foundoc)
1966 JUMPHERE(foundoc);
1967 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), REQ_BYTE_PTR, TMP1, 0);
1968 JUMPHERE(alreadyfound);
1969 JUMPHERE(toolong);
1970 return notfound;
1971 }
1972
1973 static void do_revertframes(compiler_common *common)
1974 {
1975 DEFINE_COMPILER;
1976 struct sljit_jump *jump;
1977 struct sljit_label *mainloop;
1978
1979 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);
1980 OP1(SLJIT_MOV, TMP1, 0, STACK_TOP, 0);
1981
1982 /* Drop frames until we reach STACK_TOP. */
1983 mainloop = LABEL();
1984 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), 0);
1985 jump = CMP(SLJIT_C_SIG_LESS_EQUAL, TMP2, 0, SLJIT_IMM, frame_end);
1986 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_LOCALS_REG, 0);
1987 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_w));
1988 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_w), SLJIT_MEM1(TMP1), 2 * sizeof(sljit_w));
1989 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_w));
1990 JUMPTO(SLJIT_JUMP, mainloop);
1991
1992 JUMPHERE(jump);
1993 jump = CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, frame_end);
1994 /* End of dropping frames. */
1995 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1996
1997 JUMPHERE(jump);
1998 jump = CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, frame_setstrbegin);
1999 /* Set string begin. */
2000 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), sizeof(sljit_w));
2001 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_w));
2002 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), TMP2, 0);
2003 JUMPTO(SLJIT_JUMP, mainloop);
2004
2005 JUMPHERE(jump);
2006 /* Unknown command. */
2007 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_w));
2008 JUMPTO(SLJIT_JUMP, mainloop);
2009 }
2010
2011 static void check_wordboundary(compiler_common *common)
2012 {
2013 DEFINE_COMPILER;
2014 struct sljit_jump *beginend;
2015 #ifdef SUPPORT_UTF8
2016 struct sljit_jump *jump;
2017 #endif
2018
2019 SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16);
2020
2021 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, 1, 5, 5, common->localsize);
2022 /* Get type of the previous char, and put it to LOCALS1. */
2023 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
2024 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
2025 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, 0);
2026 beginend = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
2027 skip_char_back(common);
2028 read_char(common);
2029
2030 /* Testing char type. */
2031 #ifdef SUPPORT_UCP
2032 if (common->useucp)
2033 {
2034 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
2035 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
2036 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
2037 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
2038 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
2039 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);
2040 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
2041 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
2042 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_LESS_EQUAL);
2043 JUMPHERE(jump);
2044 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP2, 0);
2045 }
2046 else
2047 #endif
2048 {
2049 #ifdef SUPPORT_UTF8
2050 /* Here LOCALS1 has already been zeroed. */
2051 jump = NULL;
2052 if (common->utf8)
2053 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
2054 #endif
2055 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);
2056 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */);
2057 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2058 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);
2059 #ifdef SUPPORT_UTF8
2060 if (jump != NULL)
2061 JUMPHERE(jump);
2062 #endif
2063 }
2064 JUMPHERE(beginend);
2065
2066 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
2067 beginend = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2068 peek_char(common);
2069
2070 /* Testing char type. This is a code duplication. */
2071 #ifdef SUPPORT_UCP
2072 if (common->useucp)
2073 {
2074 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
2075 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
2076 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
2077 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
2078 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
2079 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);
2080 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
2081 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
2082 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_LESS_EQUAL);
2083 JUMPHERE(jump);
2084 }
2085 else
2086 #endif
2087 {
2088 #ifdef SUPPORT_UTF8
2089 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
2090 jump = NULL;
2091 if (common->utf8)
2092 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
2093 #endif
2094 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes);
2095 OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */);
2096 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
2097 #ifdef SUPPORT_UTF8
2098 if (jump != NULL)
2099 JUMPHERE(jump);
2100 #endif
2101 }
2102 JUMPHERE(beginend);
2103
2104 OP2(SLJIT_XOR | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
2105 sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
2106 }
2107
2108 static void check_anynewline(compiler_common *common)
2109 {
2110 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
2111 DEFINE_COMPILER;
2112
2113 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);
2114
2115 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
2116 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
2117 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);
2118 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
2119 #ifdef SUPPORT_UTF8
2120 if (common->utf8)
2121 {
2122 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
2123 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
2124 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
2125 }
2126 #endif
2127 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
2128 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2129 }
2130
2131 static void check_hspace(compiler_common *common)
2132 {
2133 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
2134 DEFINE_COMPILER;
2135
2136 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);
2137
2138 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x09);
2139 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
2140 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
2141 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
2142 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xa0);
2143 #ifdef SUPPORT_UTF8
2144 if (common->utf8)
2145 {
2146 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
2147 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x1680);
2148 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
2149 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e);
2150 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
2151 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000);
2152 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000);
2153 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_LESS_EQUAL);
2154 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000);
2155 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
2156 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000);
2157 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
2158 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000);
2159 }
2160 #endif
2161 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
2162
2163 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2164 }
2165
2166 static void check_vspace(compiler_common *common)
2167 {
2168 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
2169 DEFINE_COMPILER;
2170
2171 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);
2172
2173 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
2174 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
2175 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);
2176 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
2177 #ifdef SUPPORT_UTF8
2178 if (common->utf8)
2179 {
2180 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
2181 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
2182 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
2183 }
2184 #endif
2185 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
2186
2187 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2188 }
2189
2190 #define CHAR1 STR_END
2191 #define CHAR2 STACK_TOP
2192
2193 static void do_casefulcmp(compiler_common *common)
2194 {
2195 DEFINE_COMPILER;
2196 struct sljit_jump *jump;
2197 struct sljit_label *label;
2198
2199 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);
2200 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2201 OP1(SLJIT_MOV, TMP3, 0, CHAR1, 0);
2202 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR2, 0);
2203 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2204 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);
2205
2206 label = LABEL();
2207 OP1(SLJIT_MOVU_UB, CHAR1, 0, SLJIT_MEM1(TMP1), 1);
2208 OP1(SLJIT_MOVU_UB, CHAR2, 0, SLJIT_MEM1(STR_PTR), 1);
2209 jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
2210 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
2211 JUMPTO(SLJIT_C_NOT_ZERO, label);
2212
2213 JUMPHERE(jump);
2214 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);
2215 OP1(SLJIT_MOV, CHAR1, 0, TMP3, 0);
2216 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
2217 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2218 }
2219
2220 #define LCC_TABLE STACK_LIMIT
2221
2222 static void do_caselesscmp(compiler_common *common)
2223 {
2224 DEFINE_COMPILER;
2225 struct sljit_jump *jump;
2226 struct sljit_label *label;
2227
2228 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);
2229 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2230
2231 OP1(SLJIT_MOV, TMP3, 0, LCC_TABLE, 0);
2232 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR1, 0);
2233 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, CHAR2, 0);
2234 OP1(SLJIT_MOV, LCC_TABLE, 0, SLJIT_IMM, common->lcc);
2235 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2236 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);
2237
2238 label = LABEL();
2239 OP1(SLJIT_MOVU_UB, CHAR1, 0, SLJIT_MEM1(TMP1), 1);
2240 OP1(SLJIT_MOVU_UB, CHAR2, 0, SLJIT_MEM1(STR_PTR), 1);
2241 OP1(SLJIT_MOV_UB, CHAR1, 0, SLJIT_MEM2(LCC_TABLE, CHAR1), 0);
2242 OP1(SLJIT_MOV_UB, CHAR2, 0, SLJIT_MEM2(LCC_TABLE, CHAR2), 0);
2243 jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
2244 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
2245 JUMPTO(SLJIT_C_NOT_ZERO, label);
2246
2247 JUMPHERE(jump);
2248 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);
2249 OP1(SLJIT_MOV, LCC_TABLE, 0, TMP3, 0);
2250 OP1(SLJIT_MOV, CHAR1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
2251 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
2252 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2253 }
2254
2255 #undef LCC_TABLE
2256 #undef CHAR1
2257 #undef CHAR2
2258
2259 #ifdef SUPPORT_UTF8
2260 #ifdef SUPPORT_UCP
2261
2262 static const pcre_uchar *SLJIT_CALL do_utf_caselesscmp(pcre_uchar *src1, jit_arguments *args, pcre_uchar *end1)
2263 {
2264 /* This function would be ineffective to do in JIT level. */
2265 int c1, c2;
2266 const pcre_uchar *src2 = args->ptr;
2267 const pcre_uchar *end2 = (pcre_uchar *)args->end;
2268
2269 while (src1 < end1)
2270 {
2271 if (src2 >= end2)
2272 return 0;
2273 GETCHARINC(c1, src1);
2274 GETCHARINC(c2, src2);
2275 if (c1 != c2 && c1 != UCD_OTHERCASE(c2)) return 0;
2276 }
2277 return src2;
2278 }
2279
2280 #endif
2281 #endif
2282
2283 static pcre_uchar *byte_sequence_compare(compiler_common *common, BOOL caseless, pcre_uchar *cc,
2284 compare_context* context, jump_list **fallbacks)
2285 {
2286 DEFINE_COMPILER;
2287 unsigned int othercasebit = 0;
2288 pcre_uchar *othercasechar = NULL;
2289 #ifdef SUPPORT_UTF8
2290 int utf8length;
2291 #endif
2292
2293 if (caseless && char_has_othercase(common, cc))
2294 {
2295 othercasebit = char_get_othercase_bit(common, cc);
2296 SLJIT_ASSERT(othercasebit);
2297 /* Extracting bit difference info. */
2298 #ifdef COMPILE_PCRE8
2299 othercasechar = cc + (othercasebit >> 8);
2300 othercasebit &= 0xff;
2301 #else
2302 #ifdef COMPILE_PCRE16
2303 othercasechar = cc + (othercasebit >> 9);
2304 if ((othercasebit & 0x100) != 0)
2305 othercasebit = (othercasebit & 0xff) << 8;
2306 else
2307 othercasebit &= 0xff;
2308 #endif
2309 #endif
2310 }
2311
2312 if (context->sourcereg == -1)
2313 {
2314 #ifdef COMPILE_PCRE8
2315 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
2316 if (context->length >= 4)
2317 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
2318 else if (context->length >= 2)
2319 OP1(SLJIT_MOV_SH, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
2320 else
2321 #endif
2322 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
2323 #else
2324 #ifdef COMPILE_PCRE16
2325 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
2326 if (context->length >= 4)
2327 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
2328 else
2329 #endif
2330 OP1(SLJIT_MOV_SH, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
2331 #endif
2332 #endif /* COMPILE_PCRE8 */
2333 context->sourcereg = TMP2;
2334 }
2335
2336 #ifdef SUPPORT_UTF8
2337 utf8length = 1;
2338 if (common->utf8 && *cc >= 0xc0)
2339 utf8length += PRIV(utf8_table4)[*cc & 0x3f];
2340
2341 do
2342 {
2343 #endif
2344
2345 context->length -= IN_UCHARS(1);
2346 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
2347
2348 /* Unaligned read is supported. */
2349 if (othercasebit != 0 && othercasechar == cc)
2350 {
2351 context->c.asuchars[context->ucharptr] = *cc | othercasebit;
2352 context->oc.asuchars[context->ucharptr] = othercasebit;
2353 }
2354 else
2355 {
2356 context->c.asuchars[context->ucharptr] = *cc;
2357 context->oc.asuchars[context->ucharptr] = 0;
2358 }
2359 context->ucharptr++;
2360
2361 #ifdef COMPILE_PCRE8
2362 if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1))
2363 #else
2364 if (context->ucharptr >= 2 || context->length == 0)
2365 #endif
2366 {
2367 if (context->length >= 4)
2368 OP1(SLJIT_MOV_SI, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
2369 #ifdef COMPILE_PCRE8
2370 else if (context->length >= 2)
2371 OP1(SLJIT_MOV_SH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
2372 else if (context->length >= 1)
2373 OP1(SLJIT_MOV_UB, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
2374 #else
2375 else if (context->length >= 2)
2376 OP1(SLJIT_MOV_SH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
2377 #endif
2378 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
2379
2380 switch(context->ucharptr)
2381 {
2382 case 4 / sizeof(pcre_uchar):
2383 if (context->oc.asint != 0)
2384 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint);
2385 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint));
2386 break;
2387
2388 case 2 / sizeof(pcre_uchar):
2389 if (context->oc.asshort != 0)
2390 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asshort);
2391 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asshort | context->oc.asshort));
2392 break;
2393
2394 #ifdef COMPILE_PCRE8
2395 case 1:
2396 if (context->oc.asbyte != 0)
2397 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte);
2398 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte));
2399 break;
2400 #endif
2401
2402 default:
2403 SLJIT_ASSERT_STOP();
2404 break;
2405 }
2406 context->ucharptr = 0;
2407 }
2408
2409 #else
2410
2411 /* Unaligned read is unsupported. */
2412 #ifdef COMPILE_PCRE8
2413 if (context->length > 0)
2414 OP1(SLJIT_MOV_UB, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
2415 #else
2416 if (context->length > 0)
2417 OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
2418 #endif
2419 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
2420
2421 if (othercasebit != 0 && othercasechar == cc)
2422 {
2423 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit);
2424 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit));
2425 }
2426 else
2427 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc));
2428
2429 #endif
2430
2431 cc++;
2432 #ifdef SUPPORT_UTF8
2433 utf8length--;
2434 }
2435 while (utf8length > 0);
2436 #endif
2437
2438 return cc;
2439 }
2440
2441 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
2442
2443 #define SET_TYPE_OFFSET(value) \
2444 if ((value) != typeoffset) \
2445 { \
2446 if ((value) > typeoffset) \
2447 OP2(SLJIT_SUB, typereg, 0, typereg, 0, SLJIT_IMM, (value) - typeoffset); \
2448 else \
2449 OP2(SLJIT_ADD, typereg, 0, typereg, 0, SLJIT_IMM, typeoffset - (value)); \
2450 } \
2451 typeoffset = (value);
2452
2453 #define SET_CHAR_OFFSET(value) \
2454 if ((value) != charoffset) \
2455 { \
2456 if ((value) > charoffset) \
2457 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (value) - charoffset); \
2458 else \
2459 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, charoffset - (value)); \
2460 } \
2461 charoffset = (value);
2462
2463 static void compile_xclass_hotpath(compiler_common *common, pcre_uchar *cc, jump_list **fallbacks)
2464 {
2465 DEFINE_COMPILER;
2466 jump_list *found = NULL;
2467 jump_list **list = (*cc & XCL_NOT) == 0 ? &found : fallbacks;
2468 unsigned int c;
2469 int compares;
2470 struct sljit_jump *jump = NULL;
2471 pcre_uchar *ccbegin;
2472 #ifdef SUPPORT_UCP
2473 BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE;
2474 BOOL charsaved = FALSE;
2475 int typereg = TMP1, scriptreg = TMP1;
2476 unsigned int typeoffset;
2477 #endif
2478 int invertcmp, numberofcmps;
2479 unsigned int charoffset;
2480
2481 /* Although SUPPORT_UTF8 must be defined, we are not necessary in utf8 mode. */
2482 check_input_end(common, fallbacks);
2483 read_char(common);
2484
2485 if ((*cc++ & XCL_MAP) != 0)
2486 {
2487 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
2488 #ifndef COMPILE_PCRE8
2489 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
2490 #elif defined SUPPORT_UTF8
2491 if (common->utf8)
2492 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
2493 #endif
2494
2495 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
2496 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
2497 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)cc);
2498 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
2499 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
2500 add_jump(compiler, list, JUMP(SLJIT_C_NOT_ZERO));
2501
2502 #ifndef COMPILE_PCRE8
2503 JUMPHERE(jump);
2504 #elif defined SUPPORT_UTF8
2505 if (common->utf8)
2506 JUMPHERE(jump);
2507 #endif
2508 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
2509 #ifdef SUPPORT_UCP
2510 charsaved = TRUE;
2511 #endif
2512 cc += 32 / sizeof(pcre_uchar);
2513 }
2514
2515 /* Scanning the necessary info. */
2516 ccbegin = cc;
2517 compares = 0;
2518 while (*cc != XCL_END)
2519 {
2520 compares++;
2521 if (*cc == XCL_SINGLE)
2522 {
2523 cc += 2;
2524 #ifdef SUPPORT_UTF8
2525 if (common->utf8 && cc[-1] >= 0xc0) cc += PRIV(utf8_table4)[cc[-1] & 0x3f];
2526 #endif
2527 #ifdef SUPPORT_UCP
2528 needschar = TRUE;
2529 #endif
2530 }
2531 else if (*cc == XCL_RANGE)
2532 {
2533 cc += 2;
2534 #ifdef SUPPORT_UTF8
2535 if (common->utf8 && cc[-1] >= 0xc0) cc += PRIV(utf8_table4)[cc[-1] & 0x3f];
2536 #endif
2537 cc++;
2538 #ifdef SUPPORT_UTF8
2539 if (common->utf8 && cc[-1] >= 0xc0) cc += PRIV(utf8_table4)[cc[-1] & 0x3f];
2540 #endif
2541 #ifdef SUPPORT_UCP
2542 needschar = TRUE;
2543 #endif
2544 }
2545 #ifdef SUPPORT_UCP
2546 else
2547 {
2548 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
2549 cc++;
2550 switch(*cc)
2551 {
2552 case PT_ANY:
2553 break;
2554
2555 case PT_LAMP:
2556 case PT_GC:
2557 case PT_PC:
2558 case PT_ALNUM:
2559 needstype = TRUE;
2560 break;
2561
2562 case PT_SC:
2563 needsscript = TRUE;
2564 break;
2565
2566 case PT_SPACE:
2567 case PT_PXSPACE:
2568 case PT_WORD:
2569 needstype = TRUE;
2570 needschar = TRUE;
2571 break;
2572
2573 default:
2574 SLJIT_ASSERT_STOP();
2575 break;
2576 }
2577 cc += 2;
2578 }
2579 #endif
2580 }
2581
2582 #ifdef SUPPORT_UCP
2583 /* Simple register allocation. TMP1 is preferred if possible. */
2584 if (needstype || needsscript)
2585 {
2586 if (needschar && !charsaved)
2587 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
2588 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
2589 if (needschar)
2590 {
2591 if (needstype)
2592 {
2593 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
2594 typereg = RETURN_ADDR;
2595 }
2596
2597 if (needsscript)
2598 scriptreg = TMP3;
2599 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
2600 }
2601 else if (needstype && needsscript)
2602 scriptreg = TMP3;
2603 /* In all other cases only one of them was specified, and that can goes to TMP1. */
2604
2605 if (needsscript)
2606 {
2607 if (scriptreg == TMP1)
2608 {
2609 OP1(SLJIT_MOV, scriptreg, 0, SLJIT_IMM, (sljit_w)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
2610 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM2(scriptreg, TMP2), 3);
2611 }
2612 else
2613 {
2614 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
2615 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_w)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
2616 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM1(TMP2), 0);
2617 }
2618 }
2619 }
2620 #endif
2621
2622 /* Generating code. */
2623 cc = ccbegin;
2624 charoffset = 0;
2625 numberofcmps = 0;
2626 #ifdef SUPPORT_UCP
2627 typeoffset = 0;
2628 #endif
2629
2630 while (*cc != XCL_END)
2631 {
2632 compares--;
2633 invertcmp = (compares == 0 && list != fallbacks);
2634 jump = NULL;
2635
2636 if (*cc == XCL_SINGLE)
2637 {
2638 cc ++;
2639 #ifdef SUPPORT_UTF8
2640 if (common->utf8)
2641 {
2642 GETCHARINC(c, cc);
2643 }
2644 else
2645 #endif
2646 c = *cc++;
2647
2648 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
2649 {
2650 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
2651 COND_VALUE(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
2652 numberofcmps++;
2653 }
2654 else if (numberofcmps > 0)
2655 {
2656 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
2657 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
2658 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
2659 numberofcmps = 0;
2660 }
2661 else
2662 {
2663 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, c - charoffset);
2664 numberofcmps = 0;
2665 }
2666 }
2667 else if (*cc == XCL_RANGE)
2668 {
2669 cc ++;
2670 #ifdef SUPPORT_UTF8
2671 if (common->utf8)
2672 {
2673 GETCHARINC(c, cc);
2674 }
2675 else
2676 #endif
2677 c = *cc++;
2678 SET_CHAR_OFFSET(c);
2679 #ifdef SUPPORT_UTF8
2680 if (common->utf8)
2681 {
2682 GETCHARINC(c, cc);
2683 }
2684 else
2685 #endif
2686 c = *cc++;
2687 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
2688 {
2689 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
2690 COND_VALUE(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_C_LESS_EQUAL);
2691 numberofcmps++;
2692 }
2693 else if (numberofcmps > 0)
2694 {
2695 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
2696 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_LESS_EQUAL);
2697 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
2698 numberofcmps = 0;
2699 }
2700 else
2701 {
2702 jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, c - charoffset);
2703 numberofcmps = 0;
2704 }
2705 }
2706 #ifdef SUPPORT_UCP
2707 else
2708 {
2709 if (*cc == XCL_NOTPROP)
2710 invertcmp ^= 0x1;
2711 cc++;
2712 switch(*cc)
2713 {
2714 case PT_ANY:
2715 if (list != fallbacks)
2716 {
2717 if ((cc[-1] == XCL_NOTPROP && compares > 0) || (cc[-1] == XCL_PROP && compares == 0))
2718 continue;
2719 }
2720 else if (cc[-1] == XCL_NOTPROP)
2721 continue;
2722 jump = JUMP(SLJIT_JUMP);
2723 break;
2724
2725 case PT_LAMP:
2726 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - typeoffset);
2727 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
2728 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ll - typeoffset);
2729 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
2730 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lt - typeoffset);
2731 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
2732 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
2733 break;
2734
2735 case PT_GC:
2736 c = PRIV(ucp_typerange)[(int)cc[1] * 2];
2737 SET_TYPE_OFFSET(c);
2738 jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, PRIV(ucp_typerange)[(int)cc[1] * 2 + 1] - c);
2739 break;
2740
2741 case PT_PC:
2742 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, (int)cc[1] - typeoffset);
2743 break;
2744
2745 case PT_SC:
2746 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, scriptreg, 0, SLJIT_IMM, (int)cc[1]);
2747 break;
2748
2749 case PT_SPACE:
2750 case PT_PXSPACE:
2751 if (*cc == PT_SPACE)
2752 {
2753 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
2754 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 11 - charoffset);
2755 }
2756 SET_CHAR_OFFSET(9);
2757 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 13 - 9);
2758 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);
2759 if (*cc == PT_SPACE)
2760 JUMPHERE(jump);
2761
2762 SET_TYPE_OFFSET(ucp_Zl);
2763 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl);
2764 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_LESS_EQUAL);
2765 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
2766 break;
2767
2768 case PT_WORD:
2769 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE - charoffset);
2770 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
2771 /* ... fall through */
2772
2773 case PT_ALNUM:
2774 SET_TYPE_OFFSET(ucp_Ll);
2775 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
2776 COND_VALUE((*cc == PT_ALNUM) ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_C_LESS_EQUAL);
2777 SET_TYPE_OFFSET(ucp_Nd);
2778 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_No - ucp_Nd);
2779 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_LESS_EQUAL);
2780 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
2781 break;
2782 }
2783 cc += 2;
2784 }
2785 #endif
2786
2787 if (jump != NULL)
2788 add_jump(compiler, compares > 0 ? list : fallbacks, jump);
2789 }
2790
2791 if (found != NULL)
2792 set_jumps(found, LABEL());
2793 }
2794
2795 #undef SET_TYPE_OFFSET
2796 #undef SET_CHAR_OFFSET
2797
2798 #endif
2799
2800 static pcre_uchar *compile_char1_hotpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **fallbacks)
2801 {
2802 DEFINE_COMPILER;
2803 int length;
2804 unsigned int c, oc, bit;
2805 compare_context context;
2806 struct sljit_jump *jump[4];
2807 #ifdef SUPPORT_UTF8
2808 struct sljit_label *label;
2809 #ifdef SUPPORT_UCP
2810 pcre_uchar propdata[5];
2811 #endif
2812 #endif
2813
2814 switch(type)
2815 {
2816 case OP_SOD:
2817 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
2818 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
2819 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
2820 return cc;
2821
2822 case OP_SOM:
2823 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
2824 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
2825 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
2826 return cc;
2827
2828 case OP_NOT_WORD_BOUNDARY:
2829 case OP_WORD_BOUNDARY:
2830 add_jump(compiler, &common->wordboundary, JUMP(SLJIT_FAST_CALL));
2831 add_jump(compiler, fallbacks, JUMP(type == OP_NOT_WORD_BOUNDARY ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
2832 return cc;
2833
2834 case OP_NOT_DIGIT:
2835 case OP_DIGIT:
2836 check_input_end(common, fallbacks);
2837 read_char8_type(common);
2838 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit);
2839 add_jump(compiler, fallbacks, JUMP(type == OP_DIGIT ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
2840 return cc;
2841
2842 case OP_NOT_WHITESPACE:
2843 case OP_WHITESPACE:
2844 check_input_end(common, fallbacks);
2845 read_char8_type(common);
2846 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space);
2847 add_jump(compiler, fallbacks, JUMP(type == OP_WHITESPACE ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
2848 return cc;
2849
2850 case OP_NOT_WORDCHAR:
2851 case OP_WORDCHAR:
2852 check_input_end(common, fallbacks);
2853 read_char8_type(common);
2854 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word);
2855 add_jump(compiler, fallbacks, JUMP(type == OP_WORDCHAR ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
2856 return cc;
2857
2858 case OP_ANY:
2859 check_input_end(common, fallbacks);
2860 read_char(common);
2861 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
2862 {
2863 jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
2864 jump[1] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2865 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2866 add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff));
2867 JUMPHERE(jump[1]);
2868 JUMPHERE(jump[0]);
2869 }
2870 else
2871 check_newlinechar(common, common->nltype, fallbacks, TRUE);
2872 return cc;
2873
2874 case OP_ALLANY:
2875 check_input_end(common, fallbacks);
2876 #ifdef SUPPORT_UTF8
2877 if (common->utf8)
2878 {
2879 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2880 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);
2881 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2882 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0);
2883 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2884 JUMPHERE(jump[0]);
2885 return cc;
2886 }
2887 #endif
2888 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);
2889 return cc;
2890
2891 case OP_ANYBYTE:
2892 check_input_end(common, fallbacks);
2893 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);
2894 return cc;
2895
2896 #ifdef SUPPORT_UTF8
2897 #ifdef SUPPORT_UCP
2898 case OP_NOTPROP:
2899 case OP_PROP:
2900 propdata[0] = 0;
2901 propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP;
2902 propdata[2] = cc[0];
2903 propdata[3] = cc[1];
2904 propdata[4] = XCL_END;
2905 compile_xclass_hotpath(common, propdata, fallbacks);
2906 return cc + 2;
2907 #endif
2908 #endif
2909
2910 case OP_ANYNL:
2911 check_input_end(common, fallbacks);
2912 read_char(common);
2913 jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
2914 jump[1] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2915 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2916 jump[2] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
2917 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);
2918 jump[3] = JUMP(SLJIT_JUMP);
2919 JUMPHERE(jump[0]);
2920 check_newlinechar(common, common->bsr_nltype, fallbacks, FALSE);
2921 JUMPHERE(jump[1]);
2922 JUMPHERE(jump[2]);
2923 JUMPHERE(jump[3]);
2924 return cc;
2925
2926 case OP_NOT_HSPACE:
2927 case OP_HSPACE:
2928 check_input_end(common, fallbacks);
2929 read_char(common);
2930 add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));
2931 add_jump(compiler, fallbacks, JUMP(type == OP_NOT_HSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
2932 return cc;
2933
2934 case OP_NOT_VSPACE:
2935 case OP_VSPACE:
2936 check_input_end(common, fallbacks);
2937 read_char(common);
2938 add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));
2939 add_jump(compiler, fallbacks, JUMP(type == OP_NOT_VSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
2940 return cc;
2941
2942 #ifdef SUPPORT_UCP
2943 case OP_EXTUNI:
2944 check_input_end(common, fallbacks);
2945 read_char(common);
2946 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
2947 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Mc);
2948 add_jump(compiler, fallbacks, CMP(SLJIT_C_LESS_EQUAL, TMP1, 0, SLJIT_IMM, ucp_Mn - ucp_Mc));
2949
2950 label = LABEL();
2951 jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2952 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
2953 read_char(common);
2954 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
2955 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Mc);
2956 CMPTO(SLJIT_C_LESS_EQUAL, TMP1, 0, SLJIT_IMM, ucp_Mn - ucp_Mc, label);
2957
2958 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
2959 JUMPHERE(jump[0]);
2960 return cc;
2961 #endif
2962
2963 case OP_EODN:
2964 jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2965 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
2966 {
2967 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, 2);
2968 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2969 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0));
2970 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(STR_PTR), 1);
2971 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
2972 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
2973 }
2974 else if (common->nltype == NLTYPE_FIXED)
2975 {
2976 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, 1);
2977 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2978 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0));
2979 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
2980 }
2981 else
2982 {
2983 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2984 jump[1] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
2985 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, 2);
2986 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
2987 jump[2] = JUMP(SLJIT_C_GREATER);
2988 add_jump(compiler, fallbacks, JUMP(SLJIT_C_LESS));
2989 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 1);
2990 jump[3] = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
2991 add_jump(compiler, fallbacks, JUMP(SLJIT_JUMP));
2992
2993 JUMPHERE(jump[1]);
2994 if (common->nltype == NLTYPE_ANYCRLF)
2995 {
2996 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, 1);
2997 add_jump(compiler, fallbacks, CMP(SLJIT_C_LESS, TMP2, 0, STR_END, 0));
2998 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
2999 }
3000 else
3001 {
3002 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, STR_PTR, 0);
3003 read_char(common);
3004 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
3005 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
3006 add_jump(compiler, fallbacks, JUMP(SLJIT_C_ZERO));
3007 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
3008 }
3009 JUMPHERE(jump[2]);
3010 JUMPHERE(jump[3]);
3011 }
3012 JUMPHERE(jump[0]);
3013 return cc;
3014
3015 case OP_EOD:
3016 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
3017 return cc;
3018
3019 case OP_CIRC:
3020 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
3021 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
3022 add_jump(compiler, fallbacks, CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0));
3023 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
3024 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
3025 return cc;
3026
3027 case OP_CIRCM:
3028 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
3029 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
3030 jump[1] = CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0);
3031 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
3032 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
3033 jump[0] = JUMP(SLJIT_JUMP);
3034 JUMPHERE(jump[1]);
3035
3036 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, end));
3037 add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP2, 0, STR_PTR, 0));
3038
3039 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3040 {
3041 OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, SLJIT_IMM, 2);
3042 add_jump(compiler, fallbacks, CMP(SLJIT_C_LESS, TMP2, 0, TMP1, 0));
3043 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), -2);
3044 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(STR_PTR), -1);
3045 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
3046 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
3047 }
3048 else
3049 {
3050 skip_char_back(common);
3051 read_char(common);
3052 check_newlinechar(common, common->nltype, fallbacks, FALSE);
3053 }
3054 JUMPHERE(jump[0]);
3055 return cc;
3056
3057 case OP_DOLL:
3058 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
3059 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
3060 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
3061
3062 if (!common->endonly)
3063 compile_char1_hotpath(common, OP_EODN, cc, fallbacks);
3064 else
3065 add_jump(compiler, fallbacks, CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0));
3066 return cc;
3067
3068 case OP_DOLLM:
3069 jump[1] = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
3070 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
3071 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
3072 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
3073 jump[0] = JUMP(SLJIT_JUMP);
3074 JUMPHERE(jump[1]);
3075
3076 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3077 {
3078 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, 2);
3079 add_jump(compiler, fallbacks, CMP(SLJIT_C_GREATER, TMP2, 0, STR_END, 0));
3080 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3081 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(STR_PTR), 1);
3082 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
3083 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
3084 }
3085 else
3086 {
3087 peek_char(common);
3088 check_newlinechar(common, common->nltype, fallbacks, FALSE);
3089 }
3090 JUMPHERE(jump[0]);
3091 return cc;
3092
3093 case OP_CHAR:
3094 case OP_CHARI:
3095 length = IN_UCHARS(1);
3096 #ifdef SUPPORT_UTF8
3097 if (common->utf8 && *cc >= 0xc0) length += PRIV(utf8_table4)[*cc & 0x3f];
3098 #endif
3099 if (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0)
3100 {
3101 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, length);
3102 add_jump(compiler, fallbacks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
3103
3104 context.length = length;
3105 context.sourcereg = -1;
3106 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
3107 context.ucharptr = 0;
3108 #endif
3109 return byte_sequence_compare(common, type == OP_CHARI, cc, &context, fallbacks);
3110 }
3111 check_input_end(common, fallbacks);
3112 read_char(common);
3113 #ifdef SUPPORT_UTF8
3114 if (common->utf8)
3115 {
3116 GETCHAR(c, cc);
3117 }
3118 else
3119 #endif
3120 c = *cc;
3121 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c);
3122 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
3123 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, char_othercase(common, c));
3124 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
3125 add_jump(compiler, fallbacks, JUMP(SLJIT_C_ZERO));
3126 return cc + length;
3127
3128 case OP_NOT:
3129 case OP_NOTI:
3130 #ifdef SUPPORT_UTF8
3131 if (common->utf8)
3132 {
3133 length = 1;
3134 if (*cc >= 0xc0) length += PRIV(utf8_table4)[*cc & 0x3f];
3135
3136 check_input_end(common, fallbacks);
3137 GETCHAR(c, cc);
3138
3139 if (c <= 127)
3140 {
3141 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3142 if (type == OP_NOT || !char_has_othercase(common, cc))
3143 add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
3144 else
3145 {
3146 /* Since UTF8 code page is fixed, we know that c is in [a-z] or [A-Z] range. */
3147 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x20);
3148 add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20));
3149 }
3150 /* Skip the variable-length character. */
3151 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);
3152 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
3153 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0);
3154 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3155 JUMPHERE(jump[0]);
3156 return cc + length;
3157 }
3158 else
3159 read_char(common);
3160 }
3161 else
3162 #endif
3163 {
3164 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3165 add_jump(compiler, fallbacks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
3166 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
3167 c = *cc;
3168 }
3169
3170 if (type == OP_NOT || !char_has_othercase(common, cc))
3171 add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
3172 else
3173 {
3174 oc = char_othercase(common, c);
3175 bit = c ^ oc;
3176 if (ispowerof2(bit))
3177 {
3178 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
3179 add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
3180 }
3181 else
3182 {
3183 add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
3184 add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, oc));
3185 }
3186 }
3187 return cc + 1;
3188
3189 case OP_CLASS:
3190 case OP_NCLASS:
3191 check_input_end(common, fallbacks);
3192 read_char(common);
3193 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3194 jump[0] = NULL;
3195 #ifdef SUPPORT_UTF8
3196 /* This check can only be skipped in pure 8 bit mode. */
3197 if (common->utf8)
3198 #endif
3199 {
3200 jump[0] = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3201 if (type == OP_CLASS)
3202 {
3203 add_jump(compiler, fallbacks, jump[0]);
3204 jump[0] = NULL;
3205 }
3206 }
3207 #endif
3208 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
3209 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
3210 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)cc);
3211 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
3212 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
3213 add_jump(compiler, fallbacks, JUMP(SLJIT_C_ZERO));
3214 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3215 if (jump[0] != NULL)
3216 JUMPHERE(jump[0]);
3217 #endif
3218 return cc + 32 / sizeof(pcre_uchar);
3219
3220 #if defined SUPPORT_UTF || defined COMPILE_PCRE16
3221 case OP_XCLASS:
3222 compile_xclass_hotpath(common, cc + LINK_SIZE, fallbacks);
3223 return cc + GET(cc, 0) - 1;
3224 #endif
3225
3226 case OP_REVERSE:
3227 length = GET(cc, 0);
3228 SLJIT_ASSERT(length > 0);
3229 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3230 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
3231 #ifdef SUPPORT_UTF8
3232 if (common->utf8)
3233 {
3234 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, length);
3235 label = LABEL();
3236 add_jump(compiler, fallbacks, CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP1, 0));
3237 skip_char_back(common);
3238 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
3239 JUMPTO(SLJIT_C_NOT_ZERO, label);
3240 return cc + LINK_SIZE;
3241 }
3242 #endif
3243 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, length);
3244 add_jump(compiler, fallbacks, CMP(SLJIT_C_LESS, STR_PTR, 0, TMP1, 0));
3245 return cc + LINK_SIZE;
3246 }
3247 SLJIT_ASSERT_STOP();
3248 return cc;
3249 }
3250
3251 static SLJIT_INLINE pcre_uchar *compile_charn_hotpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, jump_list **fallbacks)
3252 {
3253 /* This function consumes at least one input character. */
3254 /* To decrease the number of length checks, we try to concatenate the fixed length character sequences. */
3255 DEFINE_COMPILER;
3256 pcre_uchar *ccbegin = cc;
3257 compare_context context;
3258 int size;
3259
3260 context.length = 0;
3261 do
3262 {
3263 if (cc >= ccend)
3264 break;
3265
3266 if (*cc == OP_CHAR)
3267 {
3268 size = 1;
3269 #ifdef SUPPORT_UTF8
3270 if (common->utf8 && cc[1] >= 0xc0)
3271 size += PRIV(utf8_table4)[cc[1] & 0x3f];
3272 #endif
3273 }
3274 else if (*cc == OP_CHARI)
3275 {
3276 size = 1;
3277 #ifdef SUPPORT_UTF8
3278 if (common->utf8)
3279 {
3280 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
3281 size = 0;
3282 else if (cc[1] >= 0xc0)
3283 size += PRIV(utf8_table4)[cc[1] & 0x3f];
3284 }
3285 else
3286 #endif
3287 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
3288 size = 0;
3289 }
3290 else
3291 size = 0;
3292
3293 cc += 1 + size;
3294 context.length += IN_UCHARS(size);
3295 }
3296 while (size > 0 && context.length <= 128);
3297
3298 cc = ccbegin;
3299 if (context.length > 0)
3300 {
3301 /* We have a fixed-length byte sequence. */
3302 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, context.length);
3303 add_jump(compiler, fallbacks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
3304
3305 context.sourcereg = -1;
3306 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
3307 context.ucharptr = 0;
3308 #endif
3309 do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, fallbacks); while (context.length > 0);
3310 sljit_emit_op0(compiler, SLJIT_NOP);
3311 return cc;
3312 }
3313
3314 /* A non-fixed length character will be checked if length == 0. */
3315 return compile_char1_hotpath(common, *cc, cc + 1, fallbacks);
3316 }
3317
3318 static struct sljit_jump *compile_ref_checks(compiler_common *common, pcre_uchar *cc, jump_list **fallbacks)
3319 {
3320 DEFINE_COMPILER;
3321 int offset = GET2(cc, 1) << 1;
3322
3323 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
3324 if (!common->jscript_compat)
3325 {
3326 if (fallbacks == NULL)
3327 {
3328 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
3329 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
3330 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
3331 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
3332 return JUMP(SLJIT_C_NOT_ZERO);
3333 }
3334 add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
3335 }
3336 return CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
3337 }
3338
3339 /* Forward definitions. */
3340 static void compile_hotpath(compiler_common *, pcre_uchar *, pcre_uchar *, fallback_common *);
3341 static void compile_fallbackpath(compiler_common *, struct fallback_common *);
3342
3343 #define PUSH_FALLBACK(size, ccstart, error) \
3344 do \
3345 { \
3346 fallback = sljit_alloc_memory(compiler, (size)); \
3347 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
3348 return error; \
3349 memset(fallback, 0, size); \
3350 fallback->prev = parent->top; \
3351 fallback->cc = (ccstart); \
3352 parent->top = fallback; \
3353 } \
3354 while (0)
3355
3356 #define PUSH_FALLBACK_NOVALUE(size, ccstart) \
3357 do \
3358 { \
3359 fallback = sljit_alloc_memory(compiler, (size)); \
3360 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
3361 return; \
3362 memset(fallback, 0, size); \
3363 fallback->prev = parent->top; \
3364 fallback->cc = (ccstart); \
3365 parent->top = fallback; \
3366 } \
3367 while (0)
3368
3369 #define FALLBACK_AS(type) ((type*)fallback)
3370
3371 static pcre_uchar *compile_ref_hotpath(compiler_common *common, pcre_uchar *cc, jump_list **fallbacks, BOOL withchecks, BOOL emptyfail)
3372 {
3373 DEFINE_COMPILER;
3374 int offset = GET2(cc, 1) << 1;
3375 struct sljit_jump *jump = NULL;
3376
3377 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
3378 if (withchecks && !common->jscript_compat)
3379 add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
3380
3381 #ifdef SUPPORT_UTF8
3382 #ifdef SUPPORT_UCP
3383 if (common->utf8 && *cc == OP_REFI)
3384 {
3385 SLJIT_ASSERT(TMP1 == SLJIT_TEMPORARY_REG1 && STACK_TOP == SLJIT_TEMPORARY_REG2 && TMP2 == SLJIT_TEMPORARY_REG3);
3386 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
3387 if (withchecks)
3388 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, TMP2, 0);
3389
3390 /* Needed to save important temporary registers. */
3391 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
3392 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG2, 0, ARGUMENTS, 0);
3393 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), SLJIT_OFFSETOF(jit_arguments, ptr), STR_PTR, 0);
3394 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_utf_caselesscmp));
3395 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3396 add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
3397 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
3398 }
3399 else
3400 #endif
3401 #endif
3402 {
3403 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP1, 0);
3404 if (withchecks)
3405 jump = JUMP(SLJIT_C_ZERO);
3406 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3407
3408 add_jump(compiler, fallbacks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
3409 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
3410 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
3411 }
3412
3413 if (jump != NULL)
3414 {
3415 if (emptyfail)
3416 add_jump(compiler, fallbacks, jump);
3417 else
3418 JUMPHERE(jump);
3419 }
3420 return cc + 1 + IMM2_SIZE;
3421 }
3422
3423 static SLJIT_INLINE pcre_uchar *compile_ref_iterator_hotpath(compiler_common *common, pcre_uchar *cc, fallback_common *parent)
3424 {
3425 DEFINE_COMPILER;
3426 fallback_common *fallback;
3427 pcre_uchar type;
3428 struct sljit_label *label;
3429 struct sljit_jump *zerolength;
3430 struct sljit_jump *jump = NULL;
3431 pcre_uchar *ccbegin = cc;
3432 int min = 0, max = 0;
3433 BOOL minimize;
3434
3435 PUSH_FALLBACK(sizeof(iterator_fallback), cc, NULL);
3436
3437 type = cc[1 + IMM2_SIZE];
3438 minimize = (type & 0x1) != 0;
3439 switch(type)
3440 {
3441 case OP_CRSTAR:
3442 case OP_CRMINSTAR:
3443 min = 0;
3444 max = 0;
3445 cc += 1 + IMM2_SIZE + 1;
3446 break;
3447 case OP_CRPLUS:
3448 case OP_CRMINPLUS:
3449 min = 1;
3450 max = 0;
3451 cc += 1 + IMM2_SIZE + 1;
3452 break;
3453 case OP_CRQUERY:
3454 case OP_CRMINQUERY:
3455 min = 0;
3456 max = 1;
3457 cc += 1 + IMM2_SIZE + 1;
3458 break;
3459 case OP_CRRANGE:
3460 case OP_CRMINRANGE:
3461 min = GET2(cc, 1 + IMM2_SIZE + 1);
3462 max = GET2(cc, 1 + IMM2_SIZE + 1 + IMM2_SIZE);
3463 cc += 1 + IMM2_SIZE + 1 + 2 * IMM2_SIZE;
3464 break;
3465 default:
3466 SLJIT_ASSERT_STOP();
3467 break;
3468 }
3469
3470 if (!minimize)
3471 {
3472 if (min == 0)
3473 {
3474 allocate_stack(common, 2);
3475 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
3476 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
3477 /* Temporary release of STR_PTR. */
3478 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
3479 zerolength = compile_ref_checks(common, ccbegin, NULL);
3480 /* Restore if not zero length. */
3481 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
3482 }
3483 else
3484 {
3485 allocate_stack(common, 1);
3486 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
3487 zerolength = compile_ref_checks(common, ccbegin, &fallback->topfallbacks);
3488 }
3489
3490 if (min > 1 || max > 1)
3491 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 0);
3492
3493 label = LABEL();
3494 compile_ref_hotpath(common, ccbegin, &fallback->topfallbacks, FALSE, FALSE);
3495
3496 if (min > 1 || max > 1)
3497 {
3498 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
3499 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3500 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0);
3501 if (min > 1)
3502 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, label);
3503 if (max > 1)
3504 {
3505 jump = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max);
3506 allocate_stack(common, 1);
3507 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
3508 JUMPTO(SLJIT_JUMP, label);
3509 JUMPHERE(jump);
3510 }
3511 }
3512
3513 if (max == 0)
3514 {
3515 /* Includes min > 1 case as well. */
3516 allocate_stack(common, 1);
3517 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
3518 JUMPTO(SLJIT_JUMP, label);
3519 }
3520
3521 JUMPHERE(zerolength);
3522 FALLBACK_AS(iterator_fallback)->hotpath = LABEL();
3523
3524 decrease_call_count(common);
3525 return cc;
3526 }
3527
3528 allocate_stack(common, 2);
3529 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
3530 if (type != OP_CRMINSTAR)
3531 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
3532
3533 if (min == 0)
3534 {
3535 zerolength = compile_ref_checks(common, ccbegin, NULL);
3536 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
3537 jump = JUMP(SLJIT_JUMP);
3538 }
3539 else
3540 zerolength = compile_ref_checks(common, ccbegin, &fallback->topfallbacks);
3541
3542 FALLBACK_AS(iterator_fallback)->hotpath = LABEL();
3543 if (max > 0)
3544 add_jump(compiler, &fallback->topfallbacks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max));
3545
3546 compile_ref_hotpath(common, ccbegin, &fallback->topfallbacks, TRUE, TRUE);
3547 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
3548
3549 if (min > 1)
3550 {
3551 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
3552 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3553 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
3554 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, FALLBACK_AS(iterator_fallback)->hotpath);
3555 }
3556 else if (max > 0)
3557 OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
3558
3559 if (jump != NULL)
3560 JUMPHERE(jump);
3561 JUMPHERE(zerolength);
3562
3563 decrease_call_count(common);
3564 return cc;
3565 }
3566
3567 static SLJIT_INLINE pcre_uchar *compile_recurse_hotpath(compiler_common *common, pcre_uchar *cc, fallback_common *parent)
3568 {
3569 DEFINE_COMPILER;
3570 fallback_common *fallback;
3571 recurse_entry *entry = common->entries;
3572 recurse_entry *prev = NULL;
3573 int start = GET(cc, 1);
3574
3575 PUSH_FALLBACK(sizeof(recurse_fallback), cc, NULL);
3576 while (entry != NULL)
3577 {
3578 if (entry->start == start)
3579 break;
3580 prev = entry;
3581 entry = entry->next;
3582 }
3583
3584 if (entry == NULL)
3585 {
3586 entry = sljit_alloc_memory(compiler, sizeof(recurse_entry));
3587 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
3588 return NULL;
3589 entry->next = NULL;
3590 entry->entry = NULL;
3591 entry->calls = NULL;
3592 entry->start = start;
3593
3594 if (prev != NULL)
3595 prev->next = entry;
3596 else
3597 common->entries = entry;
3598 }
3599
3600 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
3601 allocate_stack(common, 1);
3602 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
3603
3604 if (entry->entry == NULL)
3605 add_jump(compiler, &entry->calls, JUMP(SLJIT_FAST_CALL));
3606 else
3607 JUMPTO(SLJIT_FAST_CALL, entry->entry);
3608 /* Leave if the match is failed. */
3609 add_jump(compiler, &fallback->topfallbacks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0));
3610 return cc + 1 + LINK_SIZE;
3611 }
3612
3613 static pcre_uchar *compile_assert_hotpath(compiler_common *common, pcre_uchar *cc, assert_fallback *fallback, BOOL conditional)
3614 {
3615 DEFINE_COMPILER;
3616 int framesize;
3617 int localptr;
3618 fallback_common altfallback;
3619 pcre_uchar *ccbegin;
3620 pcre_uchar opcode;
3621 pcre_uchar bra = OP_BRA;
3622 jump_list *tmp = NULL;
3623 jump_list **target = (conditional) ? &fallback->condfailed : &fallback->common.topfallbacks;
3624 jump_list **found;
3625 /* Saving previous accept variables. */
3626 struct sljit_label *save_acceptlabel = common->acceptlabel;
3627 struct sljit_jump *jump;
3628 struct sljit_jump *brajump = NULL;
3629 jump_list *save_accept = common->accept;
3630
3631 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
3632 {
3633 SLJIT_ASSERT(!conditional);
3634 bra = *cc;
3635 cc++;
3636 }
3637 localptr = PRIV_DATA(cc);
3638 SLJIT_ASSERT(localptr != 0);
3639 framesize = get_framesize(common, cc, FALSE);
3640 fallback->framesize = framesize;
3641 fallback->localptr = localptr;
3642 opcode = *cc;
3643 SLJIT_ASSERT(opcode >= OP_ASSERT && opcode <= OP_ASSERTBACK_NOT);
3644 found = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) ? &tmp : target;
3645 ccbegin = cc;
3646 cc += GET(cc, 1);
3647
3648 if (bra == OP_BRAMINZERO)
3649 {
3650 /* This is a braminzero fallback path. */
3651 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
3652 free_stack(common, 1);
3653 brajump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
3654 }
3655
3656 if (framesize < 0)
3657 {
3658 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STACK_TOP, 0);
3659 allocate_stack(common, 1);
3660 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
3661 }
3662 else
3663 {
3664 allocate_stack(common, framesize + 2);
3665 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
3666 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(framesize + 1));
3667 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, TMP2, 0);
3668 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
3669 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
3670 init_frame(common, ccbegin, framesize + 1, 2, FALSE);
3671 }
3672
3673 memset(&altfallback, 0, sizeof(fallback_common));
3674 while (1)
3675 {
3676 common->acceptlabel = NULL;
3677 common->accept = NULL;
3678 altfallback.top = NULL;
3679 altfallback.topfallbacks = NULL;
3680
3681 if (*ccbegin == OP_ALT)
3682 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
3683
3684 altfallback.cc = ccbegin;
3685 compile_hotpath(common, ccbegin + 1 + LINK_SIZE, cc, &altfallback);
3686 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
3687 {
3688 common->acceptlabel = save_acceptlabel;
3689 common->accept = save_accept;
3690 return NULL;
3691 }
3692 common->acceptlabel = LABEL();
3693 if (common->accept != NULL)
3694 set_jumps(common->accept, common->acceptlabel);
3695
3696 /* Reset stack. */
3697 if (framesize < 0)
3698 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
3699 else {
3700 if ((opcode != OP_ASSERT_NOT && opcode != OP_ASSERTBACK_NOT) || conditional)
3701 {
3702 /* We don't need to keep the STR_PTR, only the previous localptr. */
3703 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_w));
3704 }
3705 else
3706 {
3707 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
3708 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
3709 }
3710 }
3711
3712 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
3713 {
3714 /* We know that STR_PTR was stored on the top of the stack. */
3715 if (conditional)
3716 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
3717 else if (bra == OP_BRAZERO)
3718 {
3719 if (framesize < 0)
3720 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
3721 else
3722 {
3723 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_w));
3724 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (framesize + 1) * sizeof(sljit_w));
3725 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, TMP1, 0);
3726 }
3727 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
3728 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
3729 }
3730 else if (framesize >= 0)
3731 {
3732 /* For OP_BRA and OP_BRAMINZERO. */
3733 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_w));
3734 }
3735 }
3736 add_jump(compiler, found, JUMP(SLJIT_JUMP));
3737
3738 compile_fallbackpath(common, altfallback.top);
3739 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
3740 {
3741 common->acceptlabel = save_acceptlabel;
3742 common->accept = save_accept;
3743 return NULL;
3744 }
3745 set_jumps(altfallback.topfallbacks, LABEL());
3746
3747 if (*cc != OP_ALT)
3748 break;
3749
3750 ccbegin = cc;
3751 cc += GET(cc, 1);
3752 }
3753 /* None of them matched. */
3754
3755 if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK)
3756 {
3757 /* Assert is failed. */
3758 if (conditional || bra == OP_BRAZERO)
3759 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
3760
3761 if (framesize < 0)
3762 {
3763 /* The topmost item should be 0. */
3764 if (bra == OP_BRAZERO)
3765 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
3766 else
3767 free_stack(common, 1);
3768 }
3769 else
3770 {
3771 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
3772 /* The topmost item should be 0. */
3773 if (bra == OP_BRAZERO)
3774 {
3775 free_stack(common, framesize + 1);
3776 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
3777 }
3778 else
3779 free_stack(common, framesize + 2);
3780 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, TMP1, 0);
3781 }
3782 jump = JUMP(SLJIT_JUMP);
3783 if (bra != OP_BRAZERO)
3784 add_jump(compiler, target, jump);
3785
3786 /* Assert is successful. */
3787 set_jumps(tmp, LABEL());
3788 if (framesize < 0)
3789 {
3790 /* We know that STR_PTR was stored on the top of the stack. */
3791 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
3792 /* Keep the STR_PTR on the top of the stack. */
3793 if (bra == OP_BRAZERO)
3794 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
3795 else if (bra == OP_BRAMINZERO)
3796 {
3797 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
3798 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
3799 }
3800 }
3801 else
3802 {
3803 if (bra == OP_BRA)
3804 {
3805 /* We don't need to keep the STR_PTR, only the previous localptr. */
3806 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_w));
3807 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
3808 }
3809 else
3810 {
3811 /* We don't need to keep the STR_PTR, only the previous localptr. */
3812 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_IMM, (framesize + 2) * sizeof(sljit_w));
3813 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
3814 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), bra == OP_BRAZERO ? STR_PTR : SLJIT_IMM, 0);
3815 }
3816 }
3817
3818 if (bra == OP_BRAZERO)
3819 {
3820 fallback->hotpath = LABEL();
3821 sljit_set_label(jump, fallback->hotpath);
3822 }
3823 else if (bra == OP_BRAMINZERO)
3824 {
3825 JUMPTO(SLJIT_JUMP, fallback->hotpath);
3826 JUMPHERE(brajump);
3827 if (framesize >= 0)
3828 {
3829 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
3830 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
3831 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_w));
3832 }
3833 set_jumps(fallback->common.topfallbacks, LABEL());
3834 }
3835 }
3836 else
3837 {
3838 /* AssertNot is successful. */
3839 if (framesize < 0)
3840 {
3841 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
3842 if (bra != OP_BRA)
3843 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
3844 else
3845 free_stack(common, 1);
3846 }
3847 else
3848 {
3849 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
3850 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
3851 /* The topmost item should be 0. */
3852 if (bra != OP_BRA)
3853 {
3854 free_stack(common, framesize + 1);
3855 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
3856 }
3857 else
3858 free_stack(common, framesize + 2);
3859 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, TMP1, 0);
3860 }
3861
3862 if (bra == OP_BRAZERO)
3863 fallback->hotpath = LABEL();
3864 else if (bra == OP_BRAMINZERO)
3865 {
3866 JUMPTO(SLJIT_JUMP, fallback->hotpath);
3867 JUMPHERE(brajump);
3868 }
3869
3870 if (bra != OP_BRA)
3871 {
3872 SLJIT_ASSERT(found == &fallback->common.topfallbacks);
3873 set_jumps(fallback->common.topfallbacks, LABEL());
3874 fallback->common.topfallbacks = NULL;
3875 }
3876 }
3877
3878 common->acceptlabel = save_acceptlabel;
3879 common->accept = save_accept;
3880 return cc + 1 + LINK_SIZE;
3881 }
3882
3883 static sljit_w SLJIT_CALL do_searchovector(sljit_w refno, sljit_w* locals, pcre_uchar *name_table)
3884 {
3885 int condition = FALSE;
3886 pcre_uchar *slotA = name_table;
3887 pcre_uchar *slotB;
3888 sljit_w name_count = locals[LOCALS0 / sizeof(sljit_w)];
3889 sljit_w name_entry_size = locals[LOCALS1 / sizeof(sljit_w)];
3890 sljit_w no_capture;
3891 int i;
3892
3893 locals += OVECTOR_START / sizeof(sljit_w);
3894 no_capture = locals[1];
3895
3896 for (i = 0; i < name_count; i++)
3897 {
3898 if (GET2(slotA, 0) == refno) break;
3899 slotA += name_entry_size;
3900 }
3901
3902 if (i < name_count)
3903 {
3904 /* Found a name for the number - there can be only one; duplicate names
3905 for different numbers are allowed, but not vice versa. First scan down
3906 for duplicates. */
3907
3908 slotB = slotA;
3909 while (slotB > name_table)
3910 {
3911 slotB -= name_entry_size;
3912 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
3913 {
3914 condition = locals[GET2(slotB, 0) << 1] != no_capture;
3915 if (condition) break;
3916 }
3917 else break;
3918 }
3919
3920 /* Scan up for duplicates */
3921 if (!condition)
3922 {
3923 slotB = slotA;
3924 for (i++; i < name_count; i++)
3925 {
3926 slotB += name_entry_size;
3927 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
3928 {
3929 condition = locals[GET2(slotB, 0) << 1] != no_capture;
3930 if (condition) break;
3931 }
3932 else break;
3933 }
3934 }
3935 }
3936 return condition;
3937 }
3938
3939 static sljit_w SLJIT_CALL do_searchgroups(sljit_w recno, sljit_w* locals, pcre_uchar *name_table)
3940 {
3941 int condition = FALSE;
3942 pcre_uchar *slotA = name_table;
3943 pcre_uchar *slotB;
3944 sljit_w name_count = locals[LOCALS0 / sizeof(sljit_w)];
3945 sljit_w name_entry_size = locals[LOCALS1 / sizeof(sljit_w)];
3946 sljit_w group_num = locals[POSSESSIVE0 / sizeof(sljit_w)];
3947 int i;
3948
3949 for (i = 0; i < name_count; i++)
3950 {
3951 if (GET2(slotA, 0) == recno) break;
3952 slotA += name_entry_size;
3953 }
3954
3955 if (i < name_count)
3956 {
3957 /* Found a name for the number - there can be only one; duplicate
3958 names for different numbers are allowed, but not vice versa. First
3959 scan down for duplicates. */
3960
3961 slotB = slotA;
3962 while (slotB > name_table)
3963 {
3964 slotB -= name_entry_size;
3965 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
3966 {
3967 condition = GET2(slotB, 0) == group_num;
3968 if (condition) break;
3969 }
3970 else break;
3971 }
3972
3973 /* Scan up for duplicates */
3974 if (!condition)
3975 {
3976 slotB = slotA;
3977 for (i++; i < name_count; i++)
3978 {
3979 slotB += name_entry_size;
3980 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
3981 {
3982 condition = GET2(slotB, 0) == group_num;
3983 if (condition) break;
3984 }
3985 else break;
3986 }
3987 }
3988 }
3989 return condition;
3990 }
3991
3992 /*
3993 Handling bracketed expressions is probably the most complex part.
3994
3995 Stack layout naming characters:
3996 S - Push the current STR_PTR
3997 0 - Push a 0 (NULL)
3998 A - Push the current STR_PTR. Needed for restoring the STR_PTR
3999 before the next alternative. Not pushed if there are no alternatives.
4000 M - Any values pushed by the current alternative. Can be empty, or anything.
4001 C - Push the previous OVECTOR(i), OVECTOR(i+1) and OVECTOR_PRIV(i) to the stack.
4002 L - Push the previous local (pointed by localptr) to the stack
4003 () - opional values stored on the stack
4004 ()* - optonal, can be stored multiple times
4005
4006 The following list shows the regular expression templates, their PCRE byte codes
4007 and stack layout supported by pcre-sljit.
4008
4009 (?:) OP_BRA | OP_KET A M
4010 () OP_CBRA | OP_KET C M
4011 (?:)+ OP_BRA | OP_KETRMAX 0 A M S ( A M S )*
4012 OP_SBRA | OP_KETRMAX 0 L M S ( L M S )*
4013 (?:)+? OP_BRA | OP_KETRMIN 0 A M S ( A M S )*
4014 OP_SBRA | OP_KETRMIN 0 L M S ( L M S )*
4015 ()+ OP_CBRA | OP_KETRMAX 0 C M S ( C M S )*
4016 OP_SCBRA | OP_KETRMAX 0 C M S ( C M S )*
4017 ()+? OP_CBRA | OP_KETRMIN 0 C M S ( C M S )*
4018 OP_SCBRA | OP_KETRMIN 0 C M S ( C M S )*
4019 (?:)? OP_BRAZERO | OP_BRA | OP_KET S ( A M 0 )
4020 (?:)?? OP_BRAMINZERO | OP_BRA | OP_KET S ( A M 0 )
4021 ()? OP_BRAZERO | OP_CBRA | OP_KET S ( C M 0 )
4022 ()?? OP_BRAMINZERO | OP_CBRA | OP_KET S ( C M 0 )
4023 (?:)* OP_BRAZERO | OP_BRA | OP_KETRMAX S 0 ( A M S )*
4024 OP_BRAZERO | OP_SBRA | OP_KETRMAX S 0 ( L M S )*
4025 (?:)*? OP_BRAMINZERO | OP_BRA | OP_KETRMIN S 0 ( A M S )*
4026 OP_BRAMINZERO | OP_SBRA | OP_KETRMIN S 0 ( L M S )*
4027 ()* OP_BRAZERO | OP_CBRA | OP_KETRMAX S 0 ( C M S )*
4028 OP_BRAZERO | OP_SCBRA | OP_KETRMAX S 0 ( C M S )*
4029 ()*? OP_BRAMINZERO | OP_CBRA | OP_KETRMIN S 0 ( C M S )*
4030 OP_BRAMINZERO | OP_SCBRA | OP_KETRMIN S 0 ( C M S )*
4031
4032
4033 Stack layout naming characters:
4034 A - Push the alternative index (starting from 0) on the stack.
4035 Not pushed if there is no alternatives.
4036 M - Any values pushed by the current alternative. Can be empty, or anything.
4037
4038 The next list shows the possible content of a bracket:
4039 (|) OP_*BRA | OP_ALT ... M A
4040 (?()|) OP_*COND | OP_ALT M A
4041 (?>|) OP_ONCE | OP_ALT ... [stack trace] M A
4042 (?>|) OP_ONCE_NC | OP_ALT ... [stack trace] M A
4043 Or nothing, if trace is unnecessary
4044 */
4045
4046 static pcre_uchar *compile_bracket_hotpath(compiler_common *common, pcre_uchar *cc, fallback_common *parent)
4047 {
4048 DEFINE_COMPILER;
4049 fallback_common *fallback;
4050 pcre_uchar opcode;
4051 int localptr = 0;
4052 int offset = 0;
4053 int stacksize;
4054 pcre_uchar *ccbegin;
4055 pcre_uchar *hotpath;
4056 pcre_uchar bra = OP_BRA;
4057 pcre_uchar ket;
4058 assert_fallback *assert;
4059 BOOL has_alternatives;
4060 struct sljit_jump *jump;
4061 struct sljit_jump *skip;
4062 struct sljit_label *rmaxlabel = NULL;
4063 struct sljit_jump *braminzerojump = NULL;
4064
4065 PUSH_FALLBACK(sizeof(bracket_fallback), cc, NULL);
4066
4067 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
4068 {
4069 bra = *cc;
4070 cc++;
4071 opcode = *cc;
4072 }
4073
4074 opcode = *cc;
4075 ccbegin = cc;
4076 hotpath = ccbegin + 1 + LINK_SIZE;
4077
4078 if ((opcode == OP_COND || opcode == OP_SCOND) && cc[1 + LINK_SIZE] == OP_DEF)
4079 {
4080 /* Drop this bracket_fallback. */
4081 parent->top = fallback->prev;
4082 return bracketend(cc);
4083 }
4084
4085 ket = *(bracketend(cc) - 1 - LINK_SIZE);
4086 SLJIT_ASSERT(ket == OP_KET || ket == OP_KETRMAX || ket == OP_KETRMIN);
4087 SLJIT_ASSERT(!((bra == OP_BRAZERO && ket == OP_KETRMIN) || (bra == OP_BRAMINZERO && ket == OP_KETRMAX)));
4088 cc += GET(cc, 1);
4089
4090 has_alternatives = *cc == OP_ALT;
4091 if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
4092 {
4093 has_alternatives = (*hotpath == OP_RREF) ? FALSE : TRUE;
4094 if (*hotpath == OP_NRREF)
4095 {
4096 stacksize = GET2(hotpath, 1);
4097 if (common->currententry == NULL || stacksize == RREF_ANY)
4098 has_alternatives = FALSE;
4099 else if (common->currententry->start == 0)
4100 has_alternatives = stacksize != 0;
4101 else
4102 has_alternatives = stacksize != GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
4103 }
4104 }
4105
4106 if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
4107 opcode = OP_SCOND;
4108 if (SLJIT_UNLIKELY(opcode == OP_ONCE_NC))
4109 opcode = OP_ONCE;
4110
4111 if (opcode == OP_CBRA || opcode == OP_SCBRA)
4112 {
4113 /* Capturing brackets has a pre-allocated space. */
4114 offset = GET2(ccbegin, 1 + LINK_SIZE);
4115 localptr = OVECTOR_PRIV(offset);
4116 offset <<= 1;
4117 FALLBACK_AS(bracket_fallback)->localptr = localptr;
4118 hotpath += IMM2_SIZE;
4119 }
4120 else if (opcode == OP_ONCE || opcode == OP_SBRA || opcode == OP_SCOND)
4121 {
4122 /* Other brackets simply allocate the next entry. */
4123 localptr = PRIV_DATA(ccbegin);
4124 SLJIT_ASSERT(localptr != 0);
4125 FALLBACK_AS(bracket_fallback)->localptr = localptr;
4126 if (opcode == OP_ONCE)
4127 FALLBACK_AS(bracket_fallback)->u.framesize = get_framesize(common, ccbegin, FALSE);
4128 }
4129
4130 /* Instructions before the first alternative. */
4131 stacksize = 0;
4132 if ((ket == OP_KETRMAX) || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
4133 stacksize++;
4134 if (bra == OP_BRAZERO)
4135 stacksize++;
4136
4137 if (stacksize > 0)
4138 allocate_stack(common, stacksize);
4139
4140 stacksize = 0;
4141 if ((ket == OP_KETRMAX) || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
4142 {
4143 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
4144 stacksize++;
4145 }
4146
4147 if (bra == OP_BRAZERO)
4148 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
4149
4150 if (bra == OP_BRAMINZERO)
4151 {
4152 /* This is a fallback path! (Since the hot-path of OP_BRAMINZERO matches to the empty string) */
4153 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
4154 if (ket != OP_KETRMIN)
4155 {
4156 free_stack(common, 1);
4157 braminzerojump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
4158 }
4159 else
4160 {
4161 if (opcode == OP_ONCE || opcode >= OP_SBRA)
4162 {
4163 jump = CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
4164 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
4165 /* Nothing stored during the first run. */
4166 skip = JUMP(SLJIT_JUMP);
4167 JUMPHERE(jump);
4168 /* Checking zero-length iteration. */
4169 if (opcode != OP_ONCE || FALLBACK_AS(bracket_fallback)->u.framesize < 0)
4170 {
4171 /* When we come from outside, localptr contains the previous STR_PTR. */
4172 braminzerojump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4173 }
4174 else
4175 {
4176 /* Except when the whole stack frame must be saved. */
4177 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4178 braminzerojump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), (FALLBACK_AS(bracket_fallback)->u.framesize + 1) * sizeof(sljit_w));
4179 }
4180 JUMPHERE(skip);
4181 }
4182 else
4183 {
4184 jump = CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
4185 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
4186 JUMPHERE(jump);
4187 }
4188 }
4189 }
4190
4191 if (ket == OP_KETRMIN)
4192 FALLBACK_AS(bracket_fallback)->recursivehotpath = LABEL();
4193
4194 if (ket == OP_KETRMAX)
4195 {
4196 rmaxlabel = LABEL();
4197 if (has_alternatives && opcode != OP_ONCE && opcode < OP_SBRA)
4198 FALLBACK_AS(bracket_fallback)->althotpath = rmaxlabel;
4199 }
4200
4201 /* Handling capturing brackets and alternatives. */
4202 if (opcode == OP_ONCE)
4203 {
4204 if (FALLBACK_AS(bracket_fallback)->u.framesize < 0)
4205 {
4206 /* Neither capturing brackets nor recursions are not found in the block. */
4207 if (ket == OP_KETRMIN)
4208 {
4209 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4210 allocate_stack(common, 2);
4211 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4212 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
4213 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
4214 }
4215 else if (ket == OP_KETRMAX || has_alternatives)
4216 {
4217 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STACK_TOP, 0);
4218 allocate_stack(common, 1);
4219 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4220 }
4221 else
4222 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STACK_TOP, 0);
4223 }
4224 else
4225 {
4226 if (ket == OP_KETRMIN || ket == OP_KETRMAX || has_alternatives)
4227 {
4228 allocate_stack(common, FALLBACK_AS(bracket_fallback)->u.framesize + 2);
4229 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4230 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(FALLBACK_AS(bracket_fallback)->u.framesize + 1));
4231 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4232 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, TMP2, 0);
4233 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
4234 init_frame(common, ccbegin, FALLBACK_AS(bracket_fallback)->u.framesize + 1, 2, FALSE);
4235 }
4236 else
4237 {
4238 allocate_stack(common, FALLBACK_AS(bracket_fallback)->u.framesize + 1);
4239 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4240 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(FALLBACK_AS(bracket_fallback)->u.framesize));
4241 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, TMP2, 0);
4242 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
4243 init_frame(common, ccbegin, FALLBACK_AS(bracket_fallback)->u.framesize, 1, FALSE);
4244 }
4245 }
4246 }
4247 else if (opcode == OP_CBRA || opcode == OP_SCBRA)
4248 {
4249 /* Saving the previous values. */
4250 allocate_stack(common, 3);
4251 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
4252 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
4253 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
4254 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
4255 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4256 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STR_PTR, 0);
4257 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
4258 }
4259 else if (opcode == OP_SBRA || opcode == OP_SCOND)
4260 {
4261 /* Saving the previous value. */
4262 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4263 allocate_stack(common, 1);
4264 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STR_PTR, 0);
4265 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
4266 }
4267 else if (has_alternatives)
4268 {
4269 /* Pushing the starting string pointer. */
4270 allocate_stack(common, 1);
4271 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4272 }
4273
4274 /* Generating code for the first alternative. */
4275 if (opcode == OP_COND || opcode == OP_SCOND)
4276 {
4277 if (*hotpath == OP_CREF)
4278 {
4279 SLJIT_ASSERT(has_alternatives);
4280 add_jump(compiler, &(FALLBACK_AS(bracket_fallback)->u.condfailed),
4281 CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(GET2(hotpath, 1) << 1), SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
4282 hotpath += 1 + IMM2_SIZE;
4283 }
4284 else if (*hotpath == OP_NCREF)
4285 {
4286 SLJIT_ASSERT(has_alternatives);
4287 stacksize = GET2(hotpath, 1);
4288 jump = CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(stacksize << 1), SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
4289
4290 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STACK_TOP, 0);
4291 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, SLJIT_IMM, common->name_count);
4292 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, common->name_entry_size);
4293 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG1, 0, SLJIT_IMM, stacksize);
4294 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG2, 0, SLJIT_LOCALS_REG, 0);
4295 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, common->name_table);
4296 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_searchovector));
4297 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);
4298 add_jump(compiler, &(FALLBACK_AS(bracket_fallback)->u.condfailed), CMP(SLJIT_C_EQUAL, SLJIT_TEMPORARY_REG1, 0, SLJIT_IMM, 0));
4299
4300 JUMPHERE(jump);
4301 hotpath += 1 + IMM2_SIZE;
4302 }
4303 else if (*hotpath == OP_RREF || *hotpath == OP_NRREF)
4304 {
4305 /* Never has other case. */
4306 FALLBACK_AS(bracket_fallback)->u.condfailed = NULL;
4307
4308 stacksize = GET2(hotpath, 1);
4309 if (common->currententry == NULL)
4310 stacksize = 0;
4311 else if (stacksize == RREF_ANY)
4312 stacksize = 1;
4313 else if (common->currententry->start == 0)
4314 stacksize = stacksize == 0;
4315 else
4316 stacksize = stacksize == GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
4317
4318 if (*hotpath == OP_RREF || stacksize || common->currententry == NULL)
4319 {
4320 SLJIT_ASSERT(!has_alternatives);
4321 if (stacksize != 0)
4322 hotpath += 1 + IMM2_SIZE;
4323 else
4324 {
4325 if (*cc == OP_ALT)
4326 {
4327 hotpath = cc + 1 + LINK_SIZE;
4328 cc += GET(cc, 1);
4329 }
4330 else
4331 hotpath = cc;
4332 }
4333 }
4334 else
4335 {
4336 SLJIT_ASSERT(has_alternatives);
4337
4338 stacksize = GET2(hotpath, 1);
4339 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STACK_TOP, 0);
4340 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, SLJIT_IMM, common->name_count);
4341 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, common->name_entry_size);
4342 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, GET2(common->start, common->currententry->start + 1 + LINK_SIZE));
4343 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG1, 0, SLJIT_IMM, stacksize);
4344 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG2, 0, SLJIT_LOCALS_REG, 0);
4345 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, common->name_table);
4346 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_searchgroups));
4347 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);
4348 add_jump(compiler, &(FALLBACK_AS(bracket_fallback)->u.condfailed), CMP(SLJIT_C_EQUAL, SLJIT_TEMPORARY_REG1, 0, SLJIT_IMM, 0));
4349 hotpath += 1 + IMM2_SIZE;
4350 }
4351 }
4352 else
4353 {
4354 SLJIT_ASSERT(has_alternatives && *hotpath >= OP_ASSERT && *hotpath <= OP_ASSERTBACK_NOT);
4355 /* Similar code as PUSH_FALLBACK macro. */
4356 assert = sljit_alloc_memory(compiler, sizeof(assert_fallback));
4357 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
4358 return NULL;
4359 memset(assert, 0, sizeof(assert_fallback));
4360 assert->common.cc = hotpath;
4361 FALLBACK_AS(bracket_fallback)->u.assert = assert;
4362 hotpath = compile_assert_hotpath(common, hotpath, assert, TRUE);
4363 }
4364 }
4365
4366 compile_hotpath(common, hotpath, cc, fallback);
4367 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
4368 return NULL;
4369
4370 if (opcode == OP_ONCE)
4371 {
4372 if (FALLBACK_AS(bracket_fallback)->u.framesize < 0)
4373 {
4374 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4375 /* TMP2 which is set here used by OP_KETRMAX below. */
4376 if (ket == OP_KETRMAX)
4377 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), 0);
4378 else if (ket == OP_KETRMIN)
4379 {
4380 /* Move the STR_PTR to the localptr. */
4381 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_MEM1(STACK_TOP), 0);
4382 }
4383 }
4384 else
4385 {
4386 stacksize = (ket == OP_KETRMIN || ket == OP_KETRMAX || has_alternatives) ? 2 : 1;
4387 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_IMM, (FALLBACK_AS(bracket_fallback)->u.framesize + stacksize) * sizeof(sljit_w));
4388 if (ket == OP_KETRMAX)
4389 {
4390 /* TMP2 which is set here used by OP_KETRMAX below. */
4391 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
4392 }
4393 }
4394 }
4395
4396 stacksize = 0;
4397 if (ket != OP_KET || bra != OP_BRA)
4398 stacksize++;
4399 if (has_alternatives && opcode != OP_ONCE)
4400 stacksize++;
4401
4402 if (stacksize > 0)
4403 allocate_stack(common, stacksize);
4404
4405 stacksize = 0;
4406 if (ket != OP_KET)
4407 {
4408 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
4409 stacksize++;
4410 }
4411 else if (bra != OP_BRA)
4412 {
4413 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
4414 stacksize++;
4415 }
4416
4417 if (has_alternatives)
4418 {
4419 if (opcode != OP_ONCE)
4420 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
4421 if (ket != OP_KETRMAX)
4422 FALLBACK_AS(bracket_fallback)->althotpath = LABEL();
4423 }
4424
4425 /* Must be after the hotpath label. */
4426 if (offset != 0)
4427 {
4428 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4429 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
4430 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 0), TMP1, 0);
4431 }
4432
4433 if (ket == OP_KETRMAX)
4434 {
4435 if (opcode == OP_ONCE || opcode >= OP_SBRA)
4436 {
4437 if (has_alternatives)
4438 FALLBACK_AS(bracket_fallback)->althotpath = LABEL();
4439 /* Checking zero-length iteration. */
4440 if (opcode != OP_ONCE)
4441 CMPTO(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STR_PTR, 0, rmaxlabel);
4442 else
4443 /* TMP2 must contain the starting STR_PTR. */
4444 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0, rmaxlabel);
4445 }
4446 else
4447 JUMPTO(SLJIT_JUMP, rmaxlabel);
4448 FALLBACK_AS(bracket_fallback)->recursivehotpath = LABEL();
4449 }
4450
4451 if (bra == OP_BRAZERO)
4452 FALLBACK_AS(bracket_fallback)->zerohotpath = LABEL();
4453
4454 if (bra == OP_BRAMINZERO)
4455 {
4456 /* This is a fallback path! (From the viewpoint of OP_BRAMINZERO) */
4457 JUMPTO(SLJIT_JUMP, ((braminzero_fallback*)parent)->hotpath);
4458 if (braminzerojump != NULL)
4459 {
4460 JUMPHERE(braminzerojump);
4461 /* We need to release the end pointer to perform the
4462 fallback for the zero-length iteration. When
4463 framesize is < 0, OP_ONCE will do the release itself. */
4464 if (opcode == OP_ONCE && FALLBACK_AS(bracket_fallback)->u.framesize >= 0)
4465 {
4466 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4467 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
4468 }
4469 else if (ket == OP_KETRMIN && opcode != OP_ONCE)
4470 free_stack(common, 1);
4471 }
4472 /* Continue to the normal fallback. */
4473 }
4474
4475 if ((ket != OP_KET && bra != OP_BRAMINZERO) || bra == OP_BRAZERO)
4476 decrease_call_count(common);
4477
4478 /* Skip the other alternatives. */
4479 while (*cc == OP_ALT)
4480 cc += GET(cc, 1);
4481 cc += 1 + LINK_SIZE;
4482 return cc;
4483 }
4484
4485 static pcre_uchar *compile_bracketpos_hotpath(compiler_common *common, pcre_uchar *cc, fallback_common *parent)
4486 {
4487 DEFINE_COMPILER;
4488 fallback_common *fallback;
4489 pcre_uchar opcode;
4490 int localptr;
4491 int cbraprivptr = 0;
4492 int framesize;
4493 int stacksize;
4494 int offset = 0;
4495 BOOL zero = FALSE;
4496 pcre_uchar *ccbegin = NULL;
4497 int stack;
4498 struct sljit_label *loop = NULL;
4499 struct jump_list *emptymatch = NULL;
4500
4501 PUSH_FALLBACK(sizeof(bracketpos_fallback), cc, NULL);
4502 if (*cc == OP_BRAPOSZERO)
4503 {
4504 zero = TRUE;
4505 cc++;
4506 }
4507
4508 opcode = *cc;
4509 localptr = PRIV_DATA(cc);
4510 SLJIT_ASSERT(localptr != 0);
4511 FALLBACK_AS(bracketpos_fallback)->localptr = localptr;
4512 switch(opcode)
4513 {
4514 case OP_BRAPOS:
4515 case OP_SBRAPOS:
4516 ccbegin = cc + 1 + LINK_SIZE;
4517 break;
4518
4519 case OP_CBRAPOS:
4520 case OP_SCBRAPOS:
4521 offset = GET2(cc, 1 + LINK_SIZE);
4522 cbraprivptr = OVECTOR_PRIV(offset);
4523 offset <<= 1;
4524 ccbegin = cc + 1 + LINK_SIZE + IMM2_SIZE;
4525 break;
4526
4527 default:
4528 SLJIT_ASSERT_STOP();
4529 break;
4530 }
4531
4532 framesize = get_framesize(common, cc, FALSE);
4533 FALLBACK_AS(bracketpos_fallback)->framesize = framesize;
4534 if (framesize < 0)
4535 {
4536 stacksize = (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS) ? 2 : 1;
4537 if (!zero)
4538 stacksize++;
4539 FALLBACK_AS(bracketpos_fallback)->stacksize = stacksize;
4540 allocate_stack(common, stacksize);
4541 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STACK_TOP, 0);
4542
4543 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
4544 {
4545 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
4546 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
4547 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
4548 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
4549 }
4550 else
4551 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4552
4553 if (!zero)
4554 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 1);
4555 }
4556 else
4557 {
4558 stacksize = framesize + 1;
4559 if (!zero)
4560 stacksize++;
4561 if (opcode == OP_BRAPOS || opcode == OP_SBRAPOS)
4562 stacksize++;
4563 FALLBACK_AS(bracketpos_fallback)->stacksize = stacksize;
4564 allocate_stack(common, stacksize);
4565
4566 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4567 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(stacksize - 1));
4568 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, TMP2, 0);
4569 stack = 0;
4570 if (!zero)
4571 {
4572 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 1);
4573 stack++;
4574 }
4575 if (opcode == OP_BRAPOS || opcode == OP_SBRAPOS)
4576 {
4577 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), STR_PTR, 0);
4578 stack++;
4579 }
4580 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP1, 0);
4581 init_frame(common, cc, stacksize - 1, stacksize - framesize, FALSE);
4582 }
4583
4584 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
4585 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr, STR_PTR, 0);
4586
4587 loop = LABEL();
4588 while (*cc != OP_KETRPOS)
4589 {
4590 fallback->top = NULL;
4591 fallback->topfallbacks = NULL;
4592 cc += GET(cc, 1);
4593
4594 compile_hotpath(common, ccbegin, cc, fallback);
4595 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
4596 return NULL;
4597
4598 if (framesize < 0)
4599 {
4600 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4601
4602 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
4603 {
4604 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
4605 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
4606 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr, STR_PTR, 0);
4607 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
4608 }
4609 else
4610 {
4611 if (opcode == OP_SBRAPOS)
4612 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
4613 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4614 }
4615
4616 if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
4617 add_jump(compiler, &emptymatch, CMP(SLJIT_C_EQUAL, TMP1, 0, STR_PTR, 0));
4618
4619 if (!zero)
4620 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
4621 }
4622 else
4623 {
4624 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
4625 {
4626 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_IMM, stacksize * sizeof(sljit_w));
4627 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
4628 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
4629 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr, STR_PTR, 0);
4630 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
4631 }
4632 else
4633 {
4634 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4635 OP2(SLJIT_ADD, STACK_TOP, 0, TMP2, 0, SLJIT_IMM, stacksize * sizeof(sljit_w));
4636 if (opcode == OP_SBRAPOS)
4637 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_w));
4638 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_w), STR_PTR, 0);
4639 }
4640
4641 if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
4642 add_jump(compiler, &emptymatch, CMP(SLJIT_C_EQUAL, TMP1, 0, STR_PTR, 0));
4643
4644 if (!zero)
4645 {
4646 if (framesize < 0)
4647 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
4648 else
4649 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
4650 }
4651 }
4652 JUMPTO(SLJIT_JUMP, loop);
4653 flush_stubs(common);
4654
4655 compile_fallbackpath(common, fallback->top);
4656 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
4657 return NULL;
4658 set_jumps(fallback->topfallbacks, LABEL());
4659
4660 if (framesize < 0)
4661 {
4662 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
4663 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
4664 else
4665 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
4666 }
4667 else
4668 {
4669 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
4670 {
4671 /* Last alternative. */
4672 if (*cc == OP_KETRPOS)
4673 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4674 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
4675 }
4676 else
4677 {
4678 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4679 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_w));
4680 }
4681 }
4682
4683 if (*cc == OP_KETRPOS)
4684 break;
4685 ccbegin = cc + 1 + LINK_SIZE;
4686 }
4687
4688 fallback->topfallbacks = NULL;
4689 if (!zero)
4690 {
4691 if (framesize < 0)
4692 add_jump(compiler, &fallback->topfallbacks, CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0));
4693 else /* TMP2 is set to [localptr] above. */
4694 add_jump(compiler, &fallback->topfallbacks, CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(TMP2), (stacksize - 1) * sizeof(sljit_w), SLJIT_IMM, 0));
4695 }
4696
4697 /* None of them matched. */
4698 set_jumps(emptymatch, LABEL());
4699 decrease_call_count(common);
4700 return cc + 1 + LINK_SIZE;
4701 }
4702
4703 static SLJIT_INLINE pcre_uchar *get_iterator_parameters(compiler_common *common, pcre_uchar *cc, pcre_uchar *opcode, pcre_uchar *type, int *arg1, int *arg2, pcre_uchar **end)
4704 {
4705 int class_len;
4706
4707 *opcode = *cc;
4708 if (*opcode >= OP_STAR && *opcode <= OP_POSUPTO)
4709 {
4710 cc++;
4711 *type = OP_CHAR;
4712 }
4713 else if (*opcode >= OP_STARI && *opcode <= OP_POSUPTOI)
4714 {
4715 cc++;
4716 *type = OP_CHARI;
4717 *opcode -= OP_STARI - OP_STAR;
4718 }
4719 else if (*opcode >= OP_NOTSTAR && *opcode <= OP_NOTPOSUPTO)
4720 {
4721 cc++;
4722 *type = OP_NOT;
4723 *opcode -= OP_NOTSTAR - OP_STAR;
4724 }
4725 else if (*opcode >= OP_NOTSTARI && *opcode <= OP_NOTPOSUPTOI)
4726 {
4727 cc++;
4728 *type = OP_NOTI;
4729 *opcode -= OP_NOTSTARI - OP_STAR;
4730 }
4731 else if (*opcode >= OP_TYPESTAR && *opcode <= OP_TYPEPOSUPTO)
4732 {
4733 cc++;
4734 *opcode -= OP_TYPESTAR - OP_STAR;
4735 *type = 0;
4736 }
4737 else
4738 {
4739 SLJIT_ASSERT(*opcode >= OP_CLASS || *opcode <= OP_XCLASS);
4740 *type = *opcode;
4741 cc++;
4742 class_len = (*type < OP_XCLASS) ? (1 + (32 / sizeof(pcre_uchar))) : GET(cc, 0);
4743 *opcode = cc[class_len - 1];
4744 if (*opcode >= OP_CRSTAR && *opcode <= OP_CRMINQUERY)
4745 {
4746 *opcode -= OP_CRSTAR - OP_STAR;
4747 if (end != NULL)
4748 *end = cc + class_len;
4749 }
4750 else
4751 {
4752 SLJIT_ASSERT(*opcode == OP_CRRANGE || *opcode == OP_CRMINRANGE);
4753 *arg1 = GET2(cc, (class_len + IMM2_SIZE));
4754 *arg2 = GET2(cc, class_len);
4755
4756 if (*arg2 == 0)
4757 {
4758 SLJIT_ASSERT(*arg1 != 0);
4759 *opcode = (*opcode == OP_CRRANGE) ? OP_UPTO : OP_MINUPTO;
4760 }
4761 if (*arg1 == *arg2)
4762 *opcode = OP_EXACT;
4763
4764 if (end != NULL)
4765 *end = cc + class_len + 2 * IMM2_SIZE;
4766 }
4767 return cc;
4768 }
4769
4770 if (*opcode == OP_UPTO || *opcode == OP_MINUPTO || *opcode == OP_EXACT || *opcode == OP_POSUPTO)
4771 {
4772 *arg1 = GET2(cc, 0);
4773 cc += IMM2_SIZE;
4774 }
4775
4776 if (*type == 0)
4777 {
4778 *type = *cc;
4779 if (end != NULL)
4780 *end = next_opcode(common, cc);
4781 cc++;
4782 return cc;
4783 }
4784
4785 if (end != NULL)
4786 {
4787 *end = cc + 1;
4788 #ifdef SUPPORT_UTF8
4789 if (common->utf8 && *cc >= 0xc0) *end += PRIV(utf8_table4)[*cc & 0x3f];
4790 #endif
4791 }
4792 return cc;
4793 }
4794
4795 static pcre_uchar *compile_iterator_hotpath(compiler_common *common, pcre_uchar *cc, fallback_common *parent)
4796 {
4797 DEFINE_COMPILER;
4798 fallback_common *fallback;
4799 pcre_uchar opcode;
4800 pcre_uchar type;
4801 int arg1 = -1, arg2 = -1;
4802 pcre_uchar* end;
4803 jump_list *nomatch = NULL;
4804 struct sljit_jump *jump = NULL;
4805 struct sljit_label *label;
4806
4807 PUSH_FALLBACK(sizeof(iterator_fallback), cc, NULL);
4808
4809 cc = get_iterator_parameters(common, cc, &opcode, &type, &arg1, &arg2, &end);
4810
4811 switch(opcode)
4812 {
4813 case OP_STAR:
4814 case OP_PLUS:
4815 case OP_UPTO:
4816 case OP_CRRANGE:
4817 if (type == OP_ANYNL || type == OP_EXTUNI)
4818 {
4819 if (opcode == OP_STAR || opcode == OP_UPTO)
4820 {
4821 allocate_stack(common, 2);
4822 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4823 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
4824 }
4825 else
4826 {
4827 allocate_stack(common, 1);
4828 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
4829 }
4830 if (opcode == OP_UPTO || opcode == OP_CRRANGE)
4831 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 0);
4832
4833 label = LABEL();
4834 compile_char1_hotpath(common, type, cc, &fallback->topfallbacks);
4835 if (opcode == OP_UPTO || opcode == OP_CRRANGE)
4836 {
4837 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
4838 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4839 if (opcode == OP_CRRANGE && arg2 > 0)
4840 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg2, label);
4841 if (opcode == OP_UPTO || (opcode == OP_CRRANGE && arg1 > 0))
4842 jump = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, arg1);
4843 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0);
4844 }
4845
4846 allocate_stack(common, 1);
4847 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4848 JUMPTO(SLJIT_JUMP, label);
4849 if (jump != NULL)
4850 JUMPHERE(jump);
4851 }
4852 else
4853 {
4854 allocate_stack(common, 2);
4855 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4856 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
4857 label = LABEL();
4858 compile_char1_hotpath(common, type, cc, &nomatch);
4859 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4860 if (opcode <= OP_PLUS || (opcode == OP_CRRANGE && arg1 == 0))
4861 {
4862 OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
4863 JUMPTO(SLJIT_JUMP, label);
4864 }
4865 else
4866 {
4867 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
4868 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4869 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
4870 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg1 + 1, label);
4871 }
4872 set_jumps(nomatch, LABEL());
4873 if (opcode == OP_PLUS || opcode == OP_CRRANGE)
4874 add_jump(compiler, &fallback->topfallbacks,
4875 CMP(SLJIT_C_LESS, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, opcode == OP_PLUS ? 2 : arg2 + 1));
4876 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
4877 }
4878 FALLBACK_AS(iterator_fallback)->hotpath = LABEL();
4879 break;
4880
4881 case OP_MINSTAR:
4882 case OP_MINPLUS:
4883 allocate_stack(common, 1);
4884 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4885 if (opcode == OP_MINPLUS)
4886 add_jump(compiler, &fallback->topfallbacks, JUMP(SLJIT_JUMP));
4887 FALLBACK_AS(iterator_fallback)->hotpath = LABEL();
4888 break;
4889
4890 case OP_MINUPTO:
4891 case OP_CRMINRANGE:
4892 allocate_stack(common, 2);
4893 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4894 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
4895 if (opcode == OP_CRMINRANGE)
4896 add_jump(compiler, &fallback->topfallbacks, JUMP(SLJIT_JUMP));
4897 FALLBACK_AS(iterator_fallback)->hotpath = LABEL();
4898 break;
4899
4900 case OP_QUERY:
4901 case OP_MINQUERY:
4902 allocate_stack(common, 1);
4903 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4904 if (opcode == OP_QUERY)
4905 compile_char1_hotpath(common, type, cc, &fallback->topfallbacks);
4906 FALLBACK_AS(iterator_fallback)->hotpath = LABEL();
4907 break;
4908
4909 case OP_EXACT:
4910 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 1);
4911 label = LABEL();
4912 compile_char1_hotpath(common, type, cc, &fallback->topfallbacks);
4913 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
4914 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4915 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0);
4916 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg1 + 1, label);
4917 break;
4918
4919 case OP_POSSTAR:
4920 case OP_POSPLUS:
4921 case OP_POSUPTO:
4922 if (opcode != OP_POSSTAR)
4923 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 1);
4924 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STR_PTR, 0);
4925 label = LABEL();
4926 compile_char1_hotpath(common, type, cc, &nomatch);
4927 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STR_PTR, 0);
4928 if (opcode != OP_POSUPTO)
4929 {
4930 if (opcode == OP_POSPLUS)
4931 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 2);
4932 JUMPTO(SLJIT_JUMP, label);
4933 }
4934 else
4935 {
4936 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
4937 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4938 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0);
4939 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg1 + 1, label);
4940 }
4941 set_jumps(nomatch, LABEL());
4942 if (opcode == OP_POSPLUS)
4943 add_jump(compiler, &fallback->topfallbacks, CMP(SLJIT_C_LESS, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 2));
4944 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);
4945 break;
4946
4947 case OP_POSQUERY:
4948 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STR_PTR, 0);
4949 compile_char1_hotpath(common, type, cc, &nomatch);
4950 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STR_PTR, 0);
4951 set_jumps(nomatch, LABEL());
4952 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);
4953 break;
4954
4955 default:
4956 SLJIT_ASSERT_STOP();
4957 break;
4958 }
4959
4960 decrease_call_count(common);
4961 return end;
4962 }
4963
4964 static SLJIT_INLINE pcre_uchar *compile_fail_accept_hotpath(compiler_common *common, pcre_uchar *cc, fallback_common *parent)
4965 {
4966 DEFINE_COMPILER;
4967 fallback_common *fallback;
4968
4969 PUSH_FALLBACK(sizeof(bracket_fallback), cc, NULL);
4970
4971 if (*cc == OP_FAIL)
4972 {
4973 add_jump(compiler, &fallback->topfallbacks, JUMP(SLJIT_JUMP));
4974 return cc + 1;
4975 }
4976
4977 if (*cc == OP_ASSERT_ACCEPT || common->currententry != NULL)
4978 {
4979 /* No need to check notempty conditions. */
4980 if (common->acceptlabel == NULL)
4981 add_jump(compiler, &common->accept, JUMP(SLJIT_JUMP));
4982 else
4983 JUMPTO(SLJIT_JUMP, common->acceptlabel);
4984 return cc + 1;
4985 }
4986
4987 if (common->acceptlabel == NULL)
4988 add_jump(compiler, &common->accept, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0)));
4989 else
4990 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), common->acceptlabel);
4991 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4992 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty));
4993 add_jump(compiler, &fallback->topfallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4994 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty_atstart));
4995 if (common->acceptlabel == NULL)
4996 add_jump(compiler, &common->accept, CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4997 else
4998 CMPTO(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0, common->acceptlabel);
4999 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
5000 if (common->acceptlabel == NULL)
5001 add_jump(compiler, &common->accept, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0));
5002 else
5003 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0, common->acceptlabel);
5004 add_jump(compiler, &fallback->topfallbacks, JUMP(SLJIT_JUMP));
5005 return cc + 1;
5006 }
5007
5008 static SLJIT_INLINE pcre_uchar *compile_close_hotpath(compiler_common *common, pcre_uchar *cc)
5009 {
5010 DEFINE_COMPILER;
5011 int offset = GET2(cc, 1);
5012
5013 /* Data will be discarded anyway... */
5014 if (common->currententry != NULL)
5015 return cc + 1 + IMM2_SIZE;
5016
5017 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR_PRIV(offset));
5018 offset <<= 1;
5019 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
5020 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
5021 return cc + 1 + IMM2_SIZE;
5022 }
5023
5024 static void compile_hotpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, fallback_common *parent)
5025 {
5026 DEFINE_COMPILER;
5027 fallback_common *fallback;
5028
5029 while (cc < ccend)
5030 {
5031 switch(*cc)
5032 {
5033 case OP_SOD:
5034 case OP_SOM:
5035 case OP_NOT_WORD_BOUNDARY:
5036 case OP_WORD_BOUNDARY:
5037 case OP_NOT_DIGIT:
5038 case OP_DIGIT:
5039 case OP_NOT_WHITESPACE:
5040 case OP_WHITESPACE:
5041 case OP_NOT_WORDCHAR:
5042 case OP_WORDCHAR:
5043 case OP_ANY:
5044 case OP_ALLANY:
5045 case OP_ANYBYTE:
5046 case OP_NOTPROP:
5047 case OP_PROP:
5048 case OP_ANYNL:
5049 case OP_NOT_HSPACE:
5050 case OP_HSPACE:
5051 case OP_NOT_VSPACE:
5052 case OP_VSPACE:
5053 case OP_EXTUNI:
5054 case OP_EODN:
5055 case OP_EOD:
5056 case OP_CIRC:
5057 case OP_CIRCM:
5058 case OP_DOLL:
5059 case OP_DOLLM:
5060 case OP_NOT:
5061 case OP_NOTI:
5062 case OP_REVERSE:
5063 cc = compile_char1_hotpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextfallbacks : &parent->topfallbacks);
5064 break;
5065
5066 case OP_SET_SOM:
5067 PUSH_FALLBACK_NOVALUE(sizeof(fallback_common), cc);
5068 allocate_stack(common, 1);
5069 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
5070 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), STR_PTR, 0);
5071 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
5072 cc++;
5073 break;
5074
5075 case OP_CHAR:
5076 case OP_CHARI:
5077 cc = compile_charn_hotpath(common, cc, ccend, parent->top != NULL ? &parent->top->nextfallbacks : &parent->topfallbacks);
5078 break;
5079
5080 case OP_STAR:
5081 case OP_MINSTAR:
5082 case OP_PLUS:
5083 case OP_MINPLUS:
5084 case OP_QUERY:
5085 case OP_MINQUERY:
5086 case OP_UPTO:
5087 case OP_MINUPTO:
5088 case OP_EXACT:
5089 case OP_POSSTAR:
5090 case OP_POSPLUS:
5091 case OP_POSQUERY:
5092 case OP_POSUPTO:
5093 case OP_STARI:
5094 case OP_MINSTARI:
5095 case OP_PLUSI:
5096 case OP_MINPLUSI:
5097 case OP_QUERYI:
5098 case OP_MINQUERYI:
5099 case OP_UPTOI:
5100 case OP_MINUPTOI:
5101 case OP_EXACTI:
5102 case OP_POSSTARI:
5103 case OP_POSPLUSI:
5104 case OP_POSQUERYI:
5105 case OP_POSUPTOI:
5106 case OP_NOTSTAR:
5107 case OP_NOTMINSTAR:
5108 case OP_NOTPLUS:
5109 case OP_NOTMINPLUS:
5110 case OP_NOTQUERY:
5111 case OP_NOTMINQUERY:
5112 case OP_NOTUPTO:
5113 case OP_NOTMINUPTO:
5114 case OP_NOTEXACT:
5115 case OP_NOTPOSSTAR:
5116 case OP_NOTPOSPLUS:
5117 case OP_NOTPOSQUERY:
5118 case OP_NOTPOSUPTO:
5119 case OP_NOTSTARI:
5120 case OP_NOTMINSTARI:
5121 case OP_NOTPLUSI:
5122 case OP_NOTMINPLUSI:
5123 case OP_NOTQUERYI:
5124 case OP_NOTMINQUERYI:
5125 case OP_NOTUPTOI:
5126 case OP_NOTMINUPTOI:
5127 case OP_NOTEXACTI:
5128 case OP_NOTPOSSTARI:
5129 case OP_NOTPOSPLUSI:
5130 case OP_NOTPOSQUERYI:
5131 case OP_NOTPOSUPTOI:
5132 case OP_TYPESTAR:
5133 case OP_TYPEMINSTAR:
5134 case OP_TYPEPLUS:
5135 case OP_TYPEMINPLUS:
5136 case OP_TYPEQUERY:
5137 case OP_TYPEMINQUERY:
5138 case OP_TYPEUPTO:
5139 case OP_TYPEMINUPTO:
5140 case OP_TYPEEXACT:
5141 case OP_TYPEPOSSTAR:
5142 case OP_TYPEPOSPLUS:
5143 case OP_TYPEPOSQUERY:
5144 case OP_TYPEPOSUPTO:
5145 cc = compile_iterator_hotpath(common, cc, parent);
5146 break;
5147
5148 case OP_CLASS:
5149 case OP_NCLASS:
5150 if (cc[1 + (32 / sizeof(pcre_uchar))] >= OP_CRSTAR && cc[1 + (32 / sizeof(pcre_uchar))] <= OP_CRMINRANGE)
5151 cc = compile_iterator_hotpath(common, cc, parent);
5152 else
5153 cc = compile_char1_hotpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextfallbacks : &parent->topfallbacks);
5154 break;
5155
5156 #if defined SUPPORT_UTF || defined COMPILE_PCRE16
5157 case OP_XCLASS:
5158 if (*(cc + GET(cc, 1)) >= OP_CRSTAR && *(cc + GET(cc, 1)) <= OP_CRMINRANGE)
5159 cc = compile_iterator_hotpath(common, cc, parent);
5160 else
5161 cc = compile_char1_hotpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextfallbacks : &parent->topfallbacks);
5162 break;
5163 #endif
5164
5165 case OP_REF:
5166 case OP_REFI:
5167 if (cc[1 + IMM2_SIZE] >= OP_CRSTAR && cc[1 + IMM2_SIZE] <= OP_CRMINRANGE)
5168 cc = compile_ref_iterator_hotpath(common, cc, parent);
5169 else
5170 cc = compile_ref_hotpath(common, cc, parent->top != NULL ? &parent->top->nextfallbacks : &parent->topfallbacks, TRUE, FALSE);
5171 break;
5172
5173 case OP_RECURSE:
5174 cc = compile_recurse_hotpath(common, cc, parent);
5175 break;
5176
5177 case OP_ASSERT:
5178 case OP_ASSERT_NOT:
5179 case OP_ASSERTBACK:
5180 case OP_ASSERTBACK_NOT:
5181 PUSH_FALLBACK_NOVALUE(sizeof(assert_fallback), cc);
5182 cc = compile_assert_hotpath(common, cc, FALLBACK_AS(assert_fallback), FALSE);
5183 break;
5184
5185 case OP_BRAMINZERO:
5186 PUSH_FALLBACK_NOVALUE(sizeof(braminzero_fallback), cc);
5187 cc = bracketend(cc + 1);
5188 if (*(cc - 1 - LINK_SIZE) != OP_KETRMIN)
5189 {
5190 allocate_stack(common, 1);
5191 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5192 }
5193 else
5194 {
5195 allocate_stack(common, 2);
5196 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5197 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), STR_PTR, 0);
5198 }
5199 FALLBACK_AS(braminzero_fallback)->hotpath = LABEL();
5200 if (cc[1] > OP_ASSERTBACK_NOT)
5201 decrease_call_count(common);
5202 break;
5203
5204 case OP_ONCE:
5205 case OP_ONCE_NC:
5206 case OP_BRA:
5207 case OP_CBRA:
5208 case OP_COND:
5209 case OP_SBRA:
5210 case OP_SCBRA:
5211 case OP_SCOND:
5212 cc = compile_bracket_hotpath(common, cc, parent);
5213 break;
5214
5215 case OP_BRAZERO:
5216 if (cc[1] > OP_ASSERTBACK_NOT)
5217 cc = compile_bracket_hotpath(common, cc, parent);
5218 else
5219 {
5220 PUSH_FALLBACK_NOVALUE(sizeof(assert_fallback), cc);
5221 cc = compile_assert_hotpath(common, cc, FALLBACK_AS(assert_fallback), FALSE);
5222 }
5223 break;
5224
5225 case OP_BRAPOS:
5226 case OP_CBRAPOS:
5227 case OP_SBRAPOS:
5228 case OP_SCBRAPOS:
5229 case OP_BRAPOSZERO:
5230 cc = compile_bracketpos_hotpath(common, cc, parent);
5231 break;
5232
5233 case OP_FAIL:
5234 case OP_ACCEPT:
5235 case OP_ASSERT_ACCEPT:
5236 cc = compile_fail_accept_hotpath(common, cc, parent);
5237 break;
5238
5239 case OP_CLOSE:
5240 cc = compile_close_hotpath(common, cc);
5241 break;
5242
5243 case OP_SKIPZERO:
5244 cc = bracketend(cc + 1);
5245 break;
5246
5247 default:
5248 SLJIT_ASSERT_STOP();
5249 return;
5250 }
5251 if (cc == NULL)
5252 return;
5253 }
5254 SLJIT_ASSERT(cc == ccend);
5255 }
5256
5257 #undef PUSH_FALLBACK
5258 #undef PUSH_FALLBACK_NOVALUE
5259 #undef FALLBACK_AS
5260
5261 #define COMPILE_FALLBACKPATH(current) \
5262 do \
5263 { \
5264 compile_fallbackpath(common, (current)); \
5265 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
5266 return; \
5267 } \
5268 while (0)
5269
5270 #define CURRENT_AS(type) ((type*)current)
5271
5272 static void compile_iterator_fallbackpath(compiler_common *common, struct fallback_common *current)
5273 {
5274 DEFINE_COMPILER;
5275 pcre_uchar *cc = current->cc;
5276 pcre_uchar opcode;
5277 pcre_uchar type;
5278 int arg1 = -1, arg2 = -1;
5279 struct sljit_label *label = NULL;
5280 struct sljit_jump *jump = NULL;
5281
5282 cc = get_iterator_parameters(common, cc, &opcode, &type, &arg1, &arg2, NULL);
5283
5284 switch(opcode)
5285 {
5286 case OP_STAR:
5287 case OP_PLUS:
5288 case OP_UPTO:
5289 case OP_CRRANGE:
5290 if (type == OP_ANYNL || type == OP_EXTUNI)
5291 {
5292 set_jumps(current->topfallbacks, LABEL());
5293 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5294 free_stack(common, 1);
5295 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_fallback)->hotpath);
5296 }
5297 else
5298 {
5299 if (opcode == OP_STAR || opcode == OP_UPTO)
5300 arg2 = 0;
5301 else if (opcode == OP_PLUS)
5302 arg2 = 1;
5303 jump = CMP(SLJIT_C_LESS_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, arg2 + 1);
5304 OP2(SLJIT_SUB, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
5305 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5306 skip_char_back(common);
5307 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5308 JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_fallback)->hotpath);
5309 if (opcode == OP_PLUS || opcode == OP_CRRANGE)
5310 set_jumps(current->topfallbacks, LABEL());
5311 JUMPHERE(jump);
5312 free_stack(common, 2);
5313 }
5314 break;
5315
5316 case OP_MINSTAR:
5317 case OP_MINPLUS:
5318 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5319 if (opcode == OP_MINPLUS)
5320 {
5321 set_jumps(current->topfallbacks, LABEL());
5322 current->topfallbacks = NULL;
5323 }
5324 compile_char1_hotpath(common, type, cc, &current->topfallbacks);
5325 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5326 JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_fallback)->hotpath);
5327 set_jumps(current->topfallbacks, LABEL());
5328 free_stack(common, 1);
5329 break;
5330
5331 case OP_MINUPTO:
5332 case OP_CRMINRANGE:
5333 if (opcode == OP_CRMINRANGE)
5334 {
5335 set_jumps(current->topfallbacks, LABEL());
5336 current->topfallbacks = NULL;
5337 label = LABEL();
5338 }
5339 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5340 compile_char1_hotpath(common, type, cc, &current->topfallbacks);
5341
5342 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5343 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5344 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
5345 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5346
5347 if (opcode == OP_CRMINRANGE)
5348 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg2 + 1, label);
5349
5350 if (opcode == OP_CRMINRANGE && arg1 == 0)
5351 JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_fallback)->hotpath);
5352 else
5353 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg1 + 2, CURRENT_AS(iterator_fallback)->hotpath);
5354
5355 set_jumps(current->topfallbacks, LABEL());
5356 free_stack(common, 2);
5357 break;
5358
5359 case OP_QUERY:
5360 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5361 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5362 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_fallback)->hotpath);
5363 jump = JUMP(SLJIT_JUMP);
5364 set_jumps(current->topfallbacks, LABEL());
5365 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5366 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5367 JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_fallback)->hotpath);
5368 JUMPHERE(jump);
5369 free_stack(common, 1);
5370 break;
5371
5372 case OP_MINQUERY:
5373 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5374 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5375 jump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
5376 compile_char1_hotpath(common, type, cc, &current->topfallbacks);
5377 JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_fallback)->hotpath);
5378 set_jumps(current->topfallbacks, LABEL());
5379 JUMPHERE(jump);
5380 free_stack(common, 1);
5381 break;
5382
5383 case OP_EXACT:
5384 case OP_POSPLUS:
5385 set_jumps(current->topfallbacks, LABEL());
5386 break;
5387
5388 case OP_POSSTAR:
5389 case OP_POSQUERY:
5390 case OP_POSUPTO:
5391 break;
5392
5393 default:
5394 SLJIT_ASSERT_STOP();
5395 break;
5396 }
5397 }
5398
5399 static void compile_ref_iterator_fallbackpath(compiler_common *common, struct fallback_common *current)
5400 {
5401 DEFINE_COMPILER;
5402 pcre_uchar *cc = current->cc;
5403 pcre_uchar type;
5404
5405 type = cc[1 + IMM2_SIZE];
5406 if ((type & 0x1) == 0)
5407 {
5408 set_jumps(current->topfallbacks, LABEL());
5409 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5410 free_stack(common, 1);
5411 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_fallback)->hotpath);
5412 return;
5413 }
5414
5415 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5416 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_fallback)->hotpath);
5417 set_jumps(current->topfallbacks, LABEL());
5418 free_stack(common, 2);
5419 }
5420
5421 static void compile_recurse_fallbackpath(compiler_common *common, struct fallback_common *current)
5422 {
5423 DEFINE_COMPILER;
5424
5425 set_jumps(current->topfallbacks, LABEL());
5426 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5427 free_stack(common, 1);
5428 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), TMP2, 0);
5429 }
5430
5431 static void compile_assert_fallbackpath(compiler_common *common, struct fallback_common *current)
5432 {
5433 DEFINE_COMPILER;
5434 pcre_uchar *cc = current->cc;
5435 pcre_uchar bra = OP_BRA;
5436 struct sljit_jump *brajump = NULL;
5437
5438 SLJIT_ASSERT(*cc != OP_BRAMINZERO);
5439 if (*cc == OP_BRAZERO)
5440 {
5441 bra = *cc;
5442 cc++;
5443 }
5444
5445 if (bra == OP_BRAZERO)
5446 {
5447 SLJIT_ASSERT(current->topfallbacks == NULL);
5448 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5449 }
5450
5451 if (CURRENT_AS(assert_fallback)->framesize < 0)
5452 {
5453 set_jumps(current->topfallbacks, LABEL());
5454
5455 if (bra == OP_BRAZERO)
5456 {
5457 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5458 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_fallback)->hotpath);
5459 free_stack(common, 1);
5460 }
5461 return;
5462 }
5463
5464 if (bra == OP_BRAZERO)
5465 {
5466 if (*cc == OP_ASSERT_NOT || *cc == OP_ASSERTBACK_NOT)
5467 {
5468 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5469 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_fallback)->hotpath);
5470 free_stack(common, 1);
5471 return;
5472 }
5473 free_stack(common, 1);
5474 brajump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
5475 }
5476
5477 if (*cc == OP_ASSERT || *cc == OP_ASSERTBACK)
5478 {
5479 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), CURRENT_AS(assert_fallback)->localptr);
5480 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
5481 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), CURRENT_AS(assert_fallback)->localptr, SLJIT_MEM1(STACK_TOP), CURRENT_AS(assert_fallback)->framesize * sizeof(sljit_w));
5482
5483 set_jumps(current->topfallbacks, LABEL());
5484 }
5485 else
5486 set_jumps(current->topfallbacks, LABEL());
5487
5488 if (bra == OP_BRAZERO)
5489 {
5490 /* We know there is enough place on the stack. */
5491 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
5492 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5493 JUMPTO(SLJIT_JUMP, CURRENT_AS(assert_fallback)->hotpath);
5494 JUMPHERE(brajump);
5495 }
5496 }
5497
5498 static void compile_bracket_fallbackpath(compiler_common *common, struct fallback_common *current)
5499 {
5500 DEFINE_COMPILER;
5501 int opcode;
5502 int offset = 0;
5503 int localptr = CURRENT_AS(bracket_fallback)->localptr;
5504 int stacksize;
5505 int count;
5506 pcre_uchar *cc = current->cc;
5507 pcre_uchar *ccbegin;
5508 pcre_uchar *ccprev;
5509 jump_list *jumplist = NULL;
5510 jump_list *jumplistitem = NULL;
5511 pcre_uchar bra = OP_BRA;
5512 pcre_uchar ket;
5513 assert_fallback *assert;
5514 BOOL has_alternatives;
5515 struct sljit_jump *brazero = NULL;
5516 struct sljit_jump *once = NULL;
5517 struct sljit_jump *cond = NULL;
5518 struct sljit_label *rminlabel = NULL;
5519
5520 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
5521 {
5522 bra = *cc;
5523 cc++;
5524 }
5525
5526 opcode = *cc;
5527 ccbegin = cc;
5528 ket = *(bracketend(ccbegin) - 1 - LINK_SIZE);
5529 cc += GET(cc, 1);
5530 has_alternatives = *cc == OP_ALT;
5531 if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
5532 has_alternatives = (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT) || CURRENT_AS(bracket_fallback)->u.condfailed != NULL;
5533 if (opcode == OP_CBRA || opcode == OP_SCBRA)
5534 offset = (GET2(ccbegin, 1 + LINK_SIZE)) << 1;
5535 if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
5536 opcode = OP_SCOND;
5537 if (SLJIT_UNLIKELY(opcode == OP_ONCE_NC))
5538 opcode = OP_ONCE;
5539
5540 if (ket == OP_KETRMAX)
5541 {
5542 if (bra != OP_BRAZERO)
5543 free_stack(common, 1);
5544 else
5545 {
5546 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5547 free_stack(common, 1);
5548 brazero = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0);
5549 }
5550 }
5551 else if (ket == OP_KETRMIN)
5552 {
5553 if (bra != OP_BRAMINZERO)
5554 {
5555 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5556 if (opcode >= OP_SBRA || opcode == OP_ONCE)
5557 {
5558 /* Checking zero-length iteration. */
5559 if (opcode != OP_ONCE || CURRENT_AS(bracket_fallback)->u.framesize < 0)
5560 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, CURRENT_AS(bracket_fallback)->recursivehotpath);
5561 else
5562 {
5563 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
5564 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), (CURRENT_AS(bracket_fallback)->u.framesize + 1) * sizeof(sljit_w), CURRENT_AS(bracket_fallback)->recursivehotpath);
5565 }
5566 if (opcode != OP_ONCE)
5567 free_stack(common, 1);
5568 }
5569 else
5570 JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_fallback)->recursivehotpath);
5571 }
5572 rminlabel = LABEL();
5573 }
5574 else if (bra == OP_BRAZERO)
5575 {
5576 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5577 free_stack(common, 1);
5578 brazero = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
5579 }
5580
5581 if (SLJIT_UNLIKELY(opcode == OP_ONCE))
5582 {
5583 if (CURRENT_AS(bracket_fallback)->u.framesize >= 0)
5584 {
5585 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
5586 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
5587 }
5588 once = JUMP(SLJIT_JUMP);
5589 }
5590 else if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
5591 {
5592 if (has_alternatives)
5593 {
5594 /* Always exactly one alternative. */
5595 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5596 free_stack(common, 1);
5597
5598 jumplistitem = sljit_alloc_memory(compiler, sizeof(jump_list));
5599 if (SLJIT_UNLIKELY(!jumplistitem))
5600 return;
5601 jumplist = jumplistitem;
5602 jumplistitem->next = NULL;
5603 jumplistitem->jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 1);
5604 }
5605 }
5606 else if (*cc == OP_ALT)
5607 {
5608 /* Build a jump list. Get the last successfully matched branch index. */
5609 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5610 free_stack(common, 1);
5611 count = 1;
5612 do
5613 {
5614 /* Append as the last item. */
5615 if (jumplist != NULL)
5616 {
5617 jumplistitem->next = sljit_alloc_memory(compiler, sizeof(jump_list));
5618 jumplistitem = jumplistitem->next;
5619 }
5620 else
5621 {
5622 jumplistitem = sljit_alloc_memory(compiler, sizeof(jump_list));
5623 jumplist = jumplistitem;
5624 }
5625
5626 if (SLJIT_UNLIKELY(!jumplistitem))
5627 return;
5628
5629 jumplistitem->next = NULL;
5630 jumplistitem->jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, count++);
5631 cc += GET(cc, 1);
5632 }
5633 while (*cc == OP_ALT);
5634
5635 cc = ccbegin + GET(ccbegin, 1);
5636 }
5637
5638 COMPILE_FALLBACKPATH(current->top);
5639 if (current->topfallbacks)
5640 set_jumps(current->topfallbacks, LABEL());
5641
5642 if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
5643 {
5644 /* Conditional block always has at most one alternative. */
5645 if (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT)
5646 {
5647 SLJIT_ASSERT(has_alternatives);
5648 assert = CURRENT_AS(bracket_fallback)->u.assert;
5649 if (assert->framesize >= 0 && (ccbegin[1 + LINK_SIZE] == OP_ASSERT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK))
5650 {
5651 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), assert->localptr);
5652 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
5653 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), assert->localptr, SLJIT_MEM1(STACK_TOP), assert->framesize * sizeof(sljit_w));
5654 }
5655 cond = JUMP(SLJIT_JUMP);
5656 set_jumps(CURRENT_AS(bracket_fallback)->u.assert->condfailed, LABEL());
5657 }
5658 else if (CURRENT_AS(bracket_fallback)->u.condfailed != NULL)
5659 {
5660 SLJIT_ASSERT(has_alternatives);
5661 cond = JUMP(SLJIT_JUMP);
5662 set_jumps(CURRENT_AS(bracket_fallback)->u.condfailed, LABEL());
5663 }
5664 else
5665 SLJIT_ASSERT(!has_alternatives);
5666 }
5667
5668 if (has_alternatives)
5669 {
5670 count = 1;
5671 do
5672 {
5673 current->top = NULL;
5674 current->topfallbacks = NULL;
5675 current->nextfallbacks = NULL;
5676 if (*cc == OP_ALT)
5677 {
5678 ccprev = cc + 1 + LINK_SIZE;
5679 cc += GET(cc, 1);
5680 if (opcode != OP_COND && opcode != OP_SCOND)
5681 {
5682 if (localptr != 0 && opcode != OP_ONCE)
5683 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
5684 else
5685 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5686 }
5687 compile_hotpath(common, ccprev, cc, current);
5688 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5689 return;
5690 }
5691
5692 /* Instructions after the current alternative is succesfully matched. */
5693 /* There is a similar code in compile_bracket_hotpath. */
5694 if (opcode == OP_ONCE)
5695 {
5696 if (CURRENT_AS(bracket_fallback)->u.framesize < 0)
5697 {
5698 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
5699 /* TMP2 which is set here used by OP_KETRMAX below. */
5700 if (ket == OP_KETRMAX)
5701 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), 0);
5702 else if (ket == OP_KETRMIN)
5703 {
5704 /* Move the STR_PTR to the localptr. */
5705 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_MEM1(STACK_TOP), 0);
5706 }
5707 }
5708 else
5709 {
5710 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_IMM, (CURRENT_AS(bracket_fallback)->u.framesize + 2) * sizeof(sljit_w));
5711 if (ket == OP_KETRMAX)
5712 {
5713 /* TMP2 which is set here used by OP_KETRMAX below. */
5714 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5715 }
5716 }
5717 }
5718
5719 stacksize = 0;
5720 if (opcode != OP_ONCE)
5721 stacksize++;
5722 if (ket != OP_KET || bra != OP_BRA)
5723 stacksize++;
5724
5725 if (stacksize > 0) {
5726 if (opcode != OP_ONCE || CURRENT_AS(bracket_fallback)->u.framesize >= 0)
5727 allocate_stack(common, stacksize);
5728 else
5729 {
5730 /* We know we have place at least for one item on the top of the stack. */
5731 SLJIT_ASSERT(stacksize == 1);
5732 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
5733 }
5734 }
5735
5736 stacksize = 0;
5737 if (ket != OP_KET || bra != OP_BRA)
5738 {
5739 if (ket != OP_KET)
5740 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
5741 else
5742 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
5743 stacksize++;
5744 }
5745
5746 if (opcode != OP_ONCE)
5747 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, count++);
5748
5749 if (offset != 0)
5750 {
5751 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
5752 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
5753 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 0), TMP1, 0);
5754 }
5755
5756 JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_fallback)->althotpath);
5757
5758 if (opcode != OP_ONCE)
5759 {
5760 SLJIT_ASSERT(jumplist);
5761 JUMPHERE(jumplist->jump);
5762 jumplist = jumplist->next;
5763 }
5764
5765 COMPILE_FALLBACKPATH(current->top);
5766 if (current->topfallbacks)
5767 set_jumps(current->topfallbacks, LABEL());
5768 SLJIT_ASSERT(!current->nextfallbacks);
5769 }
5770 while (*cc == OP_ALT);
5771 SLJIT_ASSERT(!jumplist);
5772
5773 if (cond != NULL)
5774 {
5775 SLJIT_ASSERT(opcode == OP_COND || opcode == OP_SCOND);
5776 assert = CURRENT_AS(bracket_fallback)->u.assert;
5777 if (assert->framesize >= 0 && (ccbegin[1 + LINK_SIZE] == OP_ASSERT_NOT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK_NOT))
5778 {
5779 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), assert->localptr);
5780 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
5781 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), assert->localptr, SLJIT_MEM1(STACK_TOP), assert->framesize * sizeof(sljit_w));
5782 }
5783 JUMPHERE(cond);
5784 }
5785
5786 /* Free the STR_PTR. */
5787 if (localptr == 0)
5788 free_stack(common, 1);
5789 }
5790
5791 if (offset != 0)
5792 {
5793 /* Using both tmp register is better for instruction scheduling. */
5794 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5795 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5796 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
5797 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP2, 0);
5798 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_MEM1(STACK_TOP), STACK(2));
5799 free_stack(common, 3);
5800 }
5801 else if (opcode == OP_SBRA || opcode == OP_SCOND)
5802 {
5803 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_MEM1(STACK_TOP), STACK(0));
5804 free_stack(common, 1);
5805 }
5806 else if (opcode == OP_ONCE)
5807 {
5808 cc = ccbegin + GET(ccbegin, 1);
5809 if (CURRENT_AS(bracket_fallback)->u.framesize >= 0)
5810 {
5811 /* Reset head and drop saved frame. */
5812 stacksize = (ket == OP_KETRMAX || ket == OP_KETRMIN || *cc == OP_ALT) ? 2 : 1;
5813 free_stack(common, CURRENT_AS(bracket_fallback)->u.framesize + stacksize);
5814 }
5815 else if (ket == OP_KETRMAX || (*cc == OP_ALT && ket != OP_KETRMIN))
5816 {
5817 /* The STR_PTR must be released. */
5818 free_stack(common, 1);
5819 }
5820
5821 JUMPHERE(once);
5822 /* Restore previous localptr */
5823 if (CURRENT_AS(bracket_fallback)->u.framesize >= 0)
5824 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_MEM1(STACK_TOP), CURRENT_AS(bracket_fallback)->u.framesize * sizeof(sljit_w));
5825 else if (ket == OP_KETRMIN)
5826 {
5827 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5828 /* See the comment below. */
5829 free_stack(common, 2);
5830 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, TMP1, 0);
5831 }
5832 }
5833
5834 if (ket == OP_KETRMAX)
5835 {
5836 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5837 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(bracket_fallback)->recursivehotpath);
5838 if (bra == OP_BRAZERO)
5839 {
5840 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5841 JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_fallback)->zerohotpath);
5842 JUMPHERE(brazero);
5843 }
5844 free_stack(common, 1);
5845 }
5846 else if (ket == OP_KETRMIN)
5847 {
5848 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5849
5850 /* OP_ONCE removes everything in case of a fallback, so we don't
5851 need to explicitly release the STR_PTR. The extra release would
5852 affect badly the free_stack(2) above. */
5853 if (opcode != OP_ONCE)
5854 free_stack(common, 1);
5855 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, rminlabel);
5856 if (opcode == OP_ONCE)
5857 free_stack(common, bra == OP_BRAMINZERO ? 2 : 1);
5858 else if (bra == OP_BRAMINZERO)
5859 free_stack(common, 1);
5860 }
5861 else if (bra == OP_BRAZERO)
5862 {
5863 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5864 JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_fallback)->zerohotpath);
5865 JUMPHERE(brazero);
5866 }
5867 }
5868
5869 static void compile_bracketpos_fallbackpath(compiler_common *common, struct fallback_common *current)
5870 {
5871 DEFINE_COMPILER;
5872 int offset;
5873 struct sljit_jump *jump;
5874
5875 if (CURRENT_AS(bracketpos_fallback)->framesize < 0)
5876 {
5877 if (*current->cc == OP_CBRAPOS || *current->cc == OP_SCBRAPOS)
5878 {
5879 offset = (GET2(current->cc, 1 + LINK_SIZE)) << 1;
5880 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5881 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5882 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
5883 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP2, 0);
5884 }
5885 set_jumps(current->topfallbacks, LABEL());
5886 free_stack(common, CURRENT_AS(bracketpos_fallback)->stacksize);
5887 return;
5888 }
5889
5890 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), CURRENT_AS(bracketpos_fallback)->localptr);
5891 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
5892
5893 if (current->topfallbacks)
5894 {
5895 jump = JUMP(SLJIT_JUMP);
5896 set_jumps(current->topfallbacks, LABEL());
5897 /* Drop the stack frame. */
5898 free_stack(common, CURRENT_AS(bracketpos_fallback)->stacksize);
5899 JUMPHERE(jump);
5900 }
5901 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), CURRENT_AS(bracketpos_fallback)->localptr, SLJIT_MEM1(STACK_TOP), CURRENT_AS(bracketpos_fallback)->framesize * sizeof(sljit_w));
5902 }
5903
5904 static void compile_braminzero_fallbackpath(compiler_common *common, struct fallback_common *current)
5905 {
5906 assert_fallback fallback;
5907
5908 current->top = NULL;
5909 current->topfallbacks = NULL;
5910 current->nextfallbacks = NULL;
5911 if (current->cc[1] > OP_ASSERTBACK_NOT)
5912 {
5913 /* Manual call of compile_bracket_hotpath and compile_bracket_fallbackpath. */
5914 compile_bracket_hotpath(common, current->cc, current);
5915 compile_bracket_fallbackpath(common, current->top);
5916 }
5917 else
5918 {
5919 memset(&fallback, 0, sizeof(fallback));
5920 fallback.common.cc = current->cc;
5921 fallback.hotpath = CURRENT_AS(braminzero_fallback)->hotpath;
5922 /* Manual call of compile_assert_hotpath. */
5923 compile_assert_hotpath(common, current->cc, &fallback, FALSE);
5924 }
5925 SLJIT_ASSERT(!current->nextfallbacks && !current->topfallbacks);
5926 }
5927
5928 static void compile_fallbackpath(compiler_common *common, struct fallback_common *current)
5929 {
5930 DEFINE_COMPILER;
5931
5932 while (current)
5933 {
5934 if (current->nextfallbacks != NULL)
5935 set_jumps(current->nextfallbacks, LABEL());
5936 switch(*current->cc)
5937 {
5938 case OP_SET_SOM:
5939 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5940 free_stack(common, 1);
5941 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), TMP1, 0);
5942 break;
5943
5944 case OP_STAR:
5945 case OP_MINSTAR:
5946 case OP_PLUS:
5947 case OP_MINPLUS:
5948 case OP_QUERY:
5949 case OP_MINQUERY:
5950 case OP_UPTO:
5951 case OP_MINUPTO:
5952 case OP_EXACT:
5953 case OP_POSSTAR:
5954 case OP_POSPLUS:
5955 case OP_POSQUERY:
5956 case OP_POSUPTO:
5957 case OP_STARI:
5958 case OP_MINSTARI:
5959 case OP_PLUSI:
5960 case OP_MINPLUSI:
5961 case OP_QUERYI:
5962 case OP_MINQUERYI:
5963 case OP_UPTOI:
5964 case OP_MINUPTOI:
5965 case OP_EXACTI:
5966 case OP_POSSTARI:
5967 case OP_POSPLUSI:
5968 case OP_POSQUERYI:
5969 case OP_POSUPTOI:
5970 case OP_NOTSTAR:
5971 case OP_NOTMINSTAR:
5972 case OP_NOTPLUS:
5973 case OP_NOTMINPLUS:
5974 case OP_NOTQUERY:
5975 case OP_NOTMINQUERY:
5976 case OP_NOTUPTO:
5977 case OP_NOTMINUPTO:
5978 case OP_NOTEXACT:
5979 case OP_NOTPOSSTAR:
5980 case OP_NOTPOSPLUS:
5981 case OP_NOTPOSQUERY:
5982 case OP_NOTPOSUPTO:
5983 case OP_NOTSTARI:
5984 case OP_NOTMINSTARI:
5985 case OP_NOTPLUSI:
5986 case OP_NOTMINPLUSI:
5987 case OP_NOTQUERYI:
5988 case OP_NOTMINQUERYI:
5989 case OP_NOTUPTOI:
5990 case OP_NOTMINUPTOI:
5991 case OP_NOTEXACTI:
5992 case OP_NOTPOSSTARI:
5993 case OP_NOTPOSPLUSI:
5994 case OP_NOTPOSQUERYI:
5995 case OP_NOTPOSUPTOI:
5996 case OP_TYPESTAR:
5997 case OP_TYPEMINSTAR:
5998 case OP_TYPEPLUS:
5999 case OP_TYPEMINPLUS:
6000 case OP_TYPEQUERY:
6001 case OP_TYPEMINQUERY:
6002 case OP_TYPEUPTO:
6003 case OP_TYPEMINUPTO:
6004 case OP_TYPEEXACT:
6005 case OP_TYPEPOSSTAR:
6006 case OP_TYPEPOSPLUS:
6007 case OP_TYPEPOSQUERY:
6008 case OP_TYPEPOSUPTO:
6009 case OP_CLASS:
6010 case OP_NCLASS:
6011 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
6012 case OP_XCLASS:
6013 #endif
6014 compile_iterator_fallbackpath(common, current);
6015 break;
6016
6017 case OP_REF:
6018 case OP_REFI:
6019 compile_ref_iterator_fallbackpath(common, current);
6020 break;
6021
6022 case OP_RECURSE:
6023 compile_recurse_fallbackpath(common, current);
6024 break;
6025
6026 case OP_ASSERT:
6027 case OP_ASSERT_NOT:
6028 case OP_ASSERTBACK:
6029 case OP_ASSERTBACK_NOT:
6030 compile_assert_fallbackpath(common, current);
6031 break;
6032
6033 case OP_ONCE:
6034 case OP_ONCE_NC:
6035 case OP_BRA:
6036 case OP_CBRA:
6037 case OP_COND:
6038 case OP_SBRA:
6039 case OP_SCBRA:
6040 case OP_SCOND:
6041 compile_bracket_fallbackpath(common, current);
6042 break;
6043
6044 case OP_BRAZERO:
6045 if (current->cc[1] > OP_ASSERTBACK_NOT)
6046 compile_bracket_fallbackpath(common, current);
6047 else
6048 compile_assert_fallbackpath(common, current);
6049 break;
6050
6051 case OP_BRAPOS:
6052 case OP_CBRAPOS:
6053 case OP_SBRAPOS:
6054 case OP_SCBRAPOS:
6055 case OP_BRAPOSZERO:
6056 compile_bracketpos_fallbackpath(common, current);
6057 break;
6058
6059 case OP_BRAMINZERO:
6060 compile_braminzero_fallbackpath(common, current);
6061 break;
6062
6063 case OP_FAIL:
6064 case OP_ACCEPT:
6065 case OP_ASSERT_ACCEPT:
6066 set_jumps(current->topfallbacks, LABEL());
6067 break;
6068
6069 default:
6070 SLJIT_ASSERT_STOP();
6071 break;
6072 }
6073 current = current->prev;
6074 }
6075 }
6076
6077 static SLJIT_INLINE void compile_recurse(compiler_common *common)
6078 {
6079 DEFINE_COMPILER;
6080 pcre_uchar *cc = common->start + common->currententry->start;
6081 pcre_uchar *ccbegin = cc + 1 + LINK_SIZE + (*cc == OP_BRA ? 0 : IMM2_SIZE);
6082 pcre_uchar *ccend = bracketend(cc);
6083 int localsize = get_localsize(common, ccbegin, ccend);
6084 int framesize = get_framesize(common, cc, TRUE);
6085 int alternativesize;
6086 BOOL needsframe;
6087 fallback_common altfallback;
6088 struct sljit_jump *jump;
6089
6090 SLJIT_ASSERT(*cc == OP_BRA || *cc == OP_CBRA || *cc == OP_CBRAPOS || *cc == OP_SCBRA || *cc == OP_SCBRAPOS);
6091 needsframe = framesize >= 0;
6092 if (!needsframe)
6093 framesize = 0;
6094 alternativesize = *(cc + GET(cc, 1)) == OP_ALT ? 1 : 0;
6095
6096 SLJIT_ASSERT(common->currententry->entry == NULL);
6097 common->currententry->entry = LABEL();
6098 set_jumps(common->currententry->calls, common->currententry->entry);
6099
6100 sljit_emit_fast_enter(compiler, TMP2, 0, 1, 5, 5, common->localsize);
6101 allocate_stack(common, localsize + framesize + alternativesize);
6102 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(localsize + framesize + alternativesize - 1), TMP2, 0);
6103 copy_locals(common, ccbegin, ccend, TRUE, localsize + framesize + alternativesize, framesize + alternativesize);
6104 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), RECURSIVE_HEAD, STACK_TOP, 0);
6105 if (needsframe)
6106 init_frame(common, cc, framesize + alternativesize - 1, alternativesize, FALSE);
6107
6108 if (alternativesize > 0)
6109 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6110
6111 memset(&altfallback, 0, sizeof(fallback_common));
6112 common->acceptlabel = NULL;
6113 common->accept = NULL;
6114 altfallback.cc = ccbegin;
6115 cc += GET(cc, 1);
6116 while (1)
6117 {
6118 altfallback.top = NULL;
6119 altfallback.topfallbacks = NULL;
6120
6121 if (altfallback.cc != ccbegin)
6122 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6123
6124 compile_hotpath(common, altfallback.cc, cc, &altfallback);
6125 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6126 return;
6127
6128 add_jump(compiler, &common->accept, JUMP(SLJIT_JUMP));
6129
6130 compile_fallbackpath(common, altfallback.top);
6131 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6132 return;
6133 set_jumps(altfallback.topfallbacks, LABEL());
6134
6135 if (*cc != OP_ALT)
6136 break;
6137
6138 altfallback.cc = cc + 1 + LINK_SIZE;
6139 cc += GET(cc, 1);
6140 }
6141 /* None of them matched. */
6142 OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 0);
6143 jump = JUMP(SLJIT_JUMP);
6144
6145 set_jumps(common->accept, LABEL());
6146 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), RECURSIVE_HEAD);
6147 if (needsframe)
6148 {
6149 OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
6150 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + alternativesize) * sizeof(sljit_w));
6151 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6152 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + alternativesize) * sizeof(sljit_w));
6153 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), TMP3, 0);
6154 }
6155 OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 1);
6156
6157 JUMPHERE(jump);
6158 copy_locals(common, ccbegin, ccend, FALSE, localsize + framesize + alternativesize, framesize + alternativesize);
6159 free_stack(common, localsize + framesize + alternativesize);
6160 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), sizeof(sljit_w));
6161 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
6162 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), RECURSIVE_HEAD, TMP2, 0);
6163 sljit_emit_fast_return(compiler, SLJIT_MEM1(STACK_TOP), 0);
6164 }
6165
6166 #undef COMPILE_FALLBACKPATH
6167 #undef CURRENT_AS
6168
6169 void
6170 PRIV(jit_compile)(const real_pcre *re, pcre_extra *extra)
6171 {
6172 struct sljit_compiler *compiler;
6173 fallback_common rootfallback;
6174 compiler_common common_data;
6175 compiler_common *common = &common_data;
6176 const pcre_uint8 *tables = re->tables;
6177 pcre_study_data *study;
6178 pcre_uchar *ccend;
6179 executable_function *function;
6180 void *executable_func;
6181 struct sljit_label *leave;
6182 struct sljit_label *mainloop = NULL;
6183 struct sljit_label *empty_match_found;
6184 struct sljit_label *empty_match_fallback;
6185 struct sljit_jump *alloc_error;
6186 struct sljit_jump *reqbyte_notfound = NULL;
6187 struct sljit_jump *empty_match;
6188
6189 SLJIT_ASSERT((extra->flags & PCRE_EXTRA_STUDY_DATA) != 0);
6190 study = extra->study_data;
6191
6192 if (!tables)
6193 tables = PRIV(default_tables);
6194
6195 memset(&rootfallback, 0, sizeof(fallback_common));
6196 rootfallback.cc = (pcre_uchar *)re + re->name_table_offset + re->name_count * re->name_entry_size;
6197
6198 common->compiler = NULL;
6199 common->start = rootfallback.cc;
6200 common->cbraptr = OVECTOR_START + (re->top_bracket + 1) * 2 * sizeof(sljit_w);
6201 common->fcc = tables + fcc_offset;
6202 common->lcc = (sljit_w)(tables + lcc_offset);
6203 common->nltype = NLTYPE_FIXED;
6204 switch(re->options & PCRE_NEWLINE_BITS)
6205 {
6206 case 0:
6207 /* Compile-time default */
6208 switch (NEWLINE)
6209 {
6210 case -1: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANY; break;
6211 case -2: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANYCRLF; break;
6212 default: common->newline = NEWLINE; break;
6213 }
6214 break;
6215 case PCRE_NEWLINE_CR: common->newline = CHAR_CR; break;
6216 case PCRE_NEWLINE_LF: common->newline = CHAR_NL; break;
6217 case PCRE_NEWLINE_CR+
6218 PCRE_NEWLINE_LF: common->newline = (CHAR_CR << 8) | CHAR_NL; break;
6219 case PCRE_NEWLINE_ANY: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANY; break;
6220 case PCRE_NEWLINE_ANYCRLF: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANYCRLF; break;
6221 default: return;
6222 }
6223 if ((re->options & PCRE_BSR_ANYCRLF) != 0)
6224 common->bsr_nltype = NLTYPE_ANYCRLF;
6225 else if ((re->options & PCRE_BSR_UNICODE) != 0)
6226 common->bsr_nltype = NLTYPE_ANY;
6227 else
6228 {
6229 #ifdef BSR_ANYCRLF
6230 common->bsr_nltype = NLTYPE_ANYCRLF;
6231 #else
6232 common->bsr_nltype = NLTYPE_ANY;
6233 #endif
6234 }
6235 common->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
6236 common->ctypes = (sljit_w)(tables + ctypes_offset);
6237 common->name_table = (sljit_w)re + re->name_table_offset;
6238 common->name_count = re->name_count;
6239 common->name_entry_size = re->name_entry_size;
6240 common->acceptlabel = NULL;
6241 common->stubs = NULL;
6242 common->entries = NULL;
6243 common->currententry = NULL;
6244 common->accept = NULL;
6245 common->calllimit = NULL;
6246 common->stackalloc = NULL;
6247 common->revertframes = NULL;
6248 common->wordboundary = NULL;
6249 common->anynewline = NULL;
6250 common->hspace = NULL;
6251 common->vspace = NULL;
6252 common->casefulcmp = NULL;
6253 common->caselesscmp = NULL;
6254 common->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;
6255 #ifdef SUPPORT_UTF8
6256 common->utf8 = (re->options & PCRE_UTF8) != 0;
6257 #ifdef SUPPORT_UCP
6258 common->useucp = (re->options & PCRE_UCP) != 0;
6259 #endif
6260 common->utf8readchar = NULL;
6261 common->utf8readtype8 = NULL;
6262 #endif
6263 #ifdef SUPPORT_UCP
6264 common->getucd = NULL;
6265 #endif
6266 ccend = bracketend(rootfallback.cc);
6267 SLJIT_ASSERT(*rootfallback.cc == OP_BRA && ccend[-(1 + LINK_SIZE)] == OP_KET);
6268 common->localsize = get_localspace(common, rootfallback.cc, ccend);
6269 if (common->localsize < 0)
6270 return;
6271 common->localsize += common->cbraptr + (re->top_bracket + 1) * sizeof(sljit_w);
6272 if (common->localsize > SLJIT_MAX_LOCAL_SIZE)
6273 return;
6274 common->localptrs = (int*)SLJIT_MALLOC((ccend - rootfallback.cc) * sizeof(int));
6275 if (!common->localptrs)
6276 return;
6277 memset(common->localptrs, 0, (ccend - rootfallback.cc) * sizeof(int));
6278 set_localptrs(common, common->cbraptr + (re->top_bracket + 1) * sizeof(sljit_w), ccend);
6279
6280 compiler = sljit_create_compiler();
6281 if (!compiler)
6282 {
6283 SLJIT_FREE(common->localptrs);
6284 return;
6285 }
6286 common->compiler = compiler;
6287
6288 /* Main pcre_jit_exec entry. */
6289 sljit_emit_enter(compiler, 1, 5, 5, common->localsize);
6290
6291 /* Register init. */
6292 reset_ovector(common, (re->top_bracket + 1) * 2);
6293 if ((re->flags & PCRE_REQCHSET) != 0)
6294 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), REQ_BYTE_PTR, SLJIT_TEMPORARY_REG1, 0);
6295
6296 OP1(SLJIT_MOV, ARGUMENTS, 0, SLJIT_GENERAL_REG1, 0);
6297 OP1(SLJIT_MOV, TMP1, 0, SLJIT_GENERAL_REG1, 0);
6298 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
6299 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, end));
6300 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, stack));
6301 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, calllimit));
6302 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, base));
6303 OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, limit));
6304 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), CALL_LIMIT, TMP1, 0);
6305
6306 /* Main part of the matching */
6307 if ((re->options & PCRE_ANCHORED) == 0)
6308 {
6309 mainloop = mainloop_entry(common, (re->flags & PCRE_HASCRORLF) != 0, (re->options & PCRE_FIRSTLINE) != 0);
6310 /* Forward search if possible. */
6311 if ((re->flags & PCRE_FIRSTSET) != 0)
6312 fast_forward_first_byte(common, re->first_byte, (re->options & PCRE_FIRSTLINE) != 0);
6313 else if ((re->flags & PCRE_STARTLINE) != 0)
6314 fast_forward_newline(common, (re->options & PCRE_FIRSTLINE) != 0);
6315 else if ((re->flags & PCRE_STARTLINE) == 0 && study != NULL && (study->flags & PCRE_STUDY_MAPPED) != 0)
6316 fast_forward_start_bits(common, (sljit_uw)study->start_bits, (re->options & PCRE_FIRSTLINE) != 0);
6317 }
6318 if ((re->flags & PCRE_REQCHSET) != 0)
6319 reqbyte_notfound = search_requested_char(common, re->req_byte, (re->flags & PCRE_FIRSTSET) != 0);
6320
6321 /* Store the current STR_PTR in OVECTOR(0). */
6322 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), STR_PTR, 0);
6323 /* Copy the limit of allowed recursions. */
6324 OP1(SLJIT_MOV, CALL_COUNT, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), CALL_LIMIT);
6325
6326 compile_hotpath(common, rootfallback.cc, ccend, &rootfallback);
6327 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6328 {
6329 sljit_free_compiler(compiler);
6330 SLJIT_FREE(common->localptrs);
6331 return;
6332 }
6333
6334 empty_match = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
6335 empty_match_found = LABEL();
6336
6337 common->acceptlabel = LABEL();
6338 if (common->accept != NULL)
6339 set_jumps(common->accept, common->acceptlabel);
6340
6341 /* This means we have a match. Update the ovector. */
6342 copy_ovector(common, re->top_bracket + 1);
6343 leave = LABEL();
6344 sljit_emit_return(compiler, SLJIT_UNUSED, 0);
6345
6346 empty_match_fallback = LABEL();
6347 compile_fallbackpath(common, rootfallback.top);
6348 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6349 {
6350 sljit_free_compiler(compiler);
6351 SLJIT_FREE(common->localptrs);
6352 return;
6353 }
6354
6355 SLJIT_ASSERT(rootfallback.prev == NULL);
6356
6357 /* Check we have remaining characters. */
6358 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
6359
6360 if ((re->options & PCRE_ANCHORED) == 0)
6361 {
6362 if ((re->options & PCRE_FIRSTLINE) == 0)
6363 {
6364 if (study != NULL && study->minlength > 1)
6365 {
6366 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(study->minlength));
6367 CMPTO(SLJIT_C_LESS_EQUAL, TMP1, 0, STR_END, 0, mainloop);
6368 }
6369 else
6370 CMPTO(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0, mainloop);
6371 }
6372 else
6373 {
6374 if (study != NULL && study->minlength > 1)
6375 {
6376 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(study->minlength));
6377 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, STR_END, 0);
6378 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_GREATER);
6379 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), FIRSTLINE_END);
6380 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_GREATER_EQUAL);
6381 JUMPTO(SLJIT_C_ZERO, mainloop);
6382 }
6383 else
6384 CMPTO(SLJIT_C_LESS, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), FIRSTLINE_END, mainloop);
6385 }
6386 }
6387
6388 if (reqbyte_notfound != NULL)
6389 JUMPHERE(reqbyte_notfound);
6390 /* Copy OVECTOR(1) to OVECTOR(0) */
6391 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
6392 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_NOMATCH);
6393 JUMPTO(SLJIT_JUMP, leave);
6394
6395 flush_stubs(common);
6396
6397 JUMPHERE(empty_match);
6398 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6399 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty));
6400 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0, empty_match_fallback);
6401 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty_atstart));
6402 CMPTO(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0, empty_match_found);
6403 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
6404 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0, empty_match_found);
6405 JUMPTO(SLJIT_JUMP, empty_match_fallback);
6406
6407 common->currententry = common->entries;
6408 while (common->currententry != NULL)
6409 {
6410 /* Might add new entries. */
6411 compile_recurse(common);
6412 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6413 {
6414 sljit_free_compiler(compiler);
6415 SLJIT_FREE(common->localptrs);
6416 return;
6417 }
6418 flush_stubs(common);
6419 common->currententry = common->currententry->next;
6420 }
6421
6422 /* Allocating stack, returns with PCRE_ERROR_JIT_STACKLIMIT if fails. */
6423 /* This is a (really) rare case. */
6424 set_jumps(common->stackalloc, LABEL());
6425 /* RETURN_ADDR is not a saved register. */
6426 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, 1, 5, 5, common->localsize);
6427 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP2, 0);
6428 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6429 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, stack));
6430 OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(struct sljit_stack, top), STACK_TOP, 0);
6431 OP2(SLJIT_ADD, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(struct sljit_stack, limit), SLJIT_IMM, STACK_GROWTH_RATE);
6432
6433 sljit_emit_ijump(compiler, SLJIT_CALL2, SLJIT_IMM, SLJIT_FUNC_OFFSET(sljit_stack_resize));
6434 alloc_error = CMP(SLJIT_C_NOT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
6435 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6436 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, stack));
6437 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(struct sljit_stack, top));
6438 OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(struct sljit_stack, limit));
6439 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
6440 sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
6441
6442 /* Allocation failed. */
6443 JUMPHERE(alloc_error);
6444 /* We break the return address cache here, but this is a really rare case. */
6445 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_JIT_STACKLIMIT);
6446 JUMPTO(SLJIT_JUMP, leave);
6447
6448 /* Call limit reached. */
6449 set_jumps(common->calllimit, LABEL());
6450 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_MATCHLIMIT);
6451 JUMPTO(SLJIT_JUMP, leave);
6452
6453 if (common->revertframes != NULL)
6454 {
6455 set_jumps(common->revertframes, LABEL());
6456 do_revertframes(common);
6457 }
6458 if (common->wordboundary != NULL)
6459 {
6460 set_jumps(common->wordboundary, LABEL());
6461 check_wordboundary(common);
6462 }
6463 if (common->anynewline != NULL)
6464 {
6465 set_jumps(common->anynewline, LABEL());
6466 check_anynewline(common);
6467 }
6468 if (common->hspace != NULL)
6469 {
6470 set_jumps(common->hspace, LABEL());
6471 check_hspace(common);
6472 }
6473 if (common->vspace != NULL)
6474 {
6475 set_jumps(common->vspace, LABEL());
6476 check_vspace(common);
6477 }
6478 if (common->casefulcmp != NULL)
6479 {
6480 set_jumps(common->casefulcmp, LABEL());
6481 do_casefulcmp(common);
6482 }
6483 if (common->caselesscmp != NULL)
6484 {
6485 set_jumps(common->caselesscmp, LABEL());
6486 do_caselesscmp(common);
6487 }
6488 #ifdef SUPPORT_UTF8
6489 if (common->utf8readchar != NULL)
6490 {
6491 set_jumps(common->utf8readchar, LABEL());
6492 do_utf8readchar(common);
6493 }
6494 if (common->utf8readtype8 != NULL)
6495 {
6496 set_jumps(common->utf8readtype8, LABEL());
6497 do_utf8readtype8(common);
6498 }
6499 #endif
6500 #ifdef SUPPORT_UCP
6501 if (common->getucd != NULL)
6502 {
6503 set_jumps(common->getucd, LABEL());
6504 do_getucd(common);
6505 }
6506 #endif
6507
6508 SLJIT_FREE(common->localptrs);
6509 executable_func = sljit_generate_code(compiler);
6510 sljit_free_compiler(compiler);
6511 if (executable_func == NULL)
6512 return;
6513
6514 function = SLJIT_MALLOC(sizeof(executable_function));
6515 if (function == NULL)
6516 {
6517 /* This case is highly unlikely since we just recently
6518 freed a lot of memory. Although not impossible. */
6519 sljit_free_code(executable_func);
6520 return;
6521 }
6522
6523 function->executable_func = executable_func;
6524 function->callback = NULL;
6525 function->userdata = NULL;
6526 extra->executable_jit = function;
6527 extra->flags |= PCRE_EXTRA_EXECUTABLE_JIT;
6528 }
6529
6530 static int jit_machine_stack_exec(jit_arguments *arguments, executable_function *function)
6531 {
6532 union {
6533 void* executable_func;
6534 jit_function call_executable_func;
6535 } convert_executable_func;
6536 pcre_uint8 local_area[LOCAL_SPACE_SIZE];
6537 struct sljit_stack local_stack;
6538
6539 local_stack.top = (sljit_w)&local_area;
6540 local_stack.base = local_stack.top;
6541 local_stack.limit = local_stack.base + LOCAL_SPACE_SIZE;
6542 local_stack.max_limit = local_stack.limit;
6543 arguments->stack = &local_stack;
6544 convert_executable_func.executable_func = function->executable_func;
6545 return convert_executable_func.call_executable_func(arguments);
6546 }
6547
6548 int
6549 PRIV(jit_exec)(const real_pcre *re, void *executable_func,
6550 const pcre_uchar *subject, int length, int start_offset, int options,
6551 int match_limit, int *offsets, int offsetcount)
6552 {
6553 executable_function *function = (executable_function*)executable_func;
6554 union {
6555 void* executable_func;
6556 jit_function call_executable_func;
6557 } convert_executable_func;
6558 jit_arguments arguments;
6559 int maxoffsetcount;
6560 int retval;
6561
6562 /* Sanity checks should be handled by pcre_exec. */
6563 arguments.stack = NULL;
6564 arguments.str = subject + start_offset;
6565 arguments.begin = subject;
6566 arguments.end = subject + length;
6567 arguments.calllimit = match_limit; /* JIT decreases this value less times. */
6568 arguments.notbol = (options & PCRE_NOTBOL) != 0;
6569 arguments.noteol = (options & PCRE_NOTEOL) != 0;
6570 arguments.notempty = (options & PCRE_NOTEMPTY) != 0;
6571 arguments.notempty_atstart = (options & PCRE_NOTEMPTY_ATSTART) != 0;
6572 arguments.offsets = offsets;
6573
6574 /* pcre_exec() rounds offsetcount to a multiple of 3, and then uses only 2/3 of
6575 the output vector for storing captured strings, with the remainder used as
6576 workspace. We don't need the workspace here. For compatibility, we limit the
6577 number of captured strings in the same way as pcre_exec(), so that the user
6578 gets the same result with and without JIT. */
6579
6580 offsetcount = ((offsetcount - (offsetcount % 3)) * 2)/3;
6581 maxoffsetcount = (re->top_bracket + 1) * 2;
6582 if (offsetcount > maxoffsetcount)
6583 offsetcount = maxoffsetcount;
6584 arguments.offsetcount = offsetcount;
6585
6586 if (function->callback)
6587 arguments.stack = (struct sljit_stack*)function->callback(function->userdata);
6588 else
6589 arguments.stack = (struct sljit_stack*)function->userdata;
6590
6591 if (arguments.stack == NULL)
6592 retval = jit_machine_stack_exec(&arguments, function);
6593 else
6594 {
6595 convert_executable_func.executable_func = function->executable_func;
6596 retval = convert_executable_func.call_executable_func(&arguments);
6597 }
6598
6599 if (retval * 2 > offsetcount)
6600 retval = 0;
6601 return retval;
6602 }
6603
6604 void
6605 PRIV(jit_free)(void *executable_func)
6606 {
6607 executable_function *function = (executable_function*)executable_func;
6608 sljit_free_code(function->executable_func);
6609 SLJIT_FREE(function);
6610 }
6611
6612 #ifdef COMPILE_PCRE8
6613 PCRE_EXP_DECL pcre_jit_stack *
6614 pcre_jit_stack_alloc(int startsize, int maxsize)
6615 #else
6616 PCRE_EXP_DECL pcre_jit_stack *
6617 pcre16_jit_stack_alloc(int startsize, int maxsize)
6618 #endif
6619 {
6620 if (startsize < 1 || maxsize < 1)
6621 return NULL;
6622 if (startsize > maxsize)
6623 startsize = maxsize;
6624 startsize = (startsize + STACK_GROWTH_RATE - 1) & ~(STACK_GROWTH_RATE - 1);
6625 maxsize = (maxsize + STACK_GROWTH_RATE - 1) & ~(STACK_GROWTH_RATE - 1);
6626 return (pcre_jit_stack*)sljit_allocate_stack(startsize, maxsize);
6627 }
6628
6629 #ifdef COMPILE_PCRE8
6630 PCRE_EXP_DECL void
6631 pcre_jit_stack_free(pcre_jit_stack *stack)
6632 #else
6633 PCRE_EXP_DECL void
6634 pcre16_jit_stack_free(pcre_jit_stack *stack)
6635 #endif
6636 {
6637 sljit_free_stack((struct sljit_stack*)stack);
6638 }
6639
6640 #ifdef COMPILE_PCRE8
6641 PCRE_EXP_DECL void
6642 pcre_assign_jit_stack(pcre_extra *extra, pcre_jit_callback callback, void *userdata)
6643 #else
6644 PCRE_EXP_DECL void
6645 pcre16_assign_jit_stack(pcre_extra *extra, pcre_jit_callback callback, void *userdata)
6646 #endif
6647 {
6648 executable_function *function;
6649 if (extra != NULL &&
6650 (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0 &&
6651 extra->executable_jit != NULL)
6652 {
6653 function = (executable_function*)extra->executable_jit;
6654 function->callback = callback;
6655 function->userdata = userdata;
6656 }
6657 }
6658
6659 #else /* SUPPORT_JIT */
6660
6661 /* These are dummy functions to avoid linking errors when JIT support is not
6662 being compiled. */
6663
6664 #ifdef COMPILE_PCRE8
6665 PCRE_EXP_DECL pcre_jit_stack *
6666 pcre_jit_stack_alloc(int startsize, int maxsize)
6667 #else
6668 PCRE_EXP_DECL pcre_jit_stack *
6669 pcre16_jit_stack_alloc(int startsize, int maxsize)
6670 #endif
6671 {
6672 (void)startsize;
6673 (void)maxsize;
6674 return NULL;
6675 }
6676
6677 #ifdef COMPILE_PCRE8
6678 PCRE_EXP_DECL void
6679 pcre_jit_stack_free(pcre_jit_stack *stack)
6680 #else
6681 PCRE_EXP_DECL void
6682 pcre16_jit_stack_free(pcre_jit_stack *stack)
6683 #endif
6684 {
6685 (void)stack;
6686 }
6687
6688 #ifdef COMPILE_PCRE8
6689 PCRE_EXP_DECL void
6690 pcre_assign_jit_stack(pcre_extra *extra, pcre_jit_callback callback, void *userdata)
6691 #else
6692 PCRE_EXP_DECL void
6693 pcre16_assign_jit_stack(pcre_extra *extra, pcre_jit_callback callback, void *userdata)
6694 #endif
6695 {
6696 (void)extra;
6697 (void)callback;
6698 (void)userdata;
6699 }
6700
6701 #endif
6702
6703 /* End of pcre_jit_compile.c */

  ViewVC Help
Powered by ViewVC 1.1.5