/[pcre]/code/branches/pcre16/pcre_jit_compile.c
ViewVC logotype

Contents of /code/branches/pcre16/pcre_jit_compile.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 774 - (show annotations)
Thu Dec 1 06:08:45 2011 UTC (7 years, 9 months ago) by zherczeg
File MIME type: text/plain
File size: 206056 byte(s)
better digit parsing, first_byte, req_byte are renamed to first_char req_char respectively
1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Written by Philip Hazel
9 Copyright (c) 1997-2008 University of Cambridge
10
11 The machine code generator part (this module) was written by Zoltan Herczeg
12 Copyright (c) 2010-2011
13
14 -----------------------------------------------------------------------------
15 Redistribution and use in source and binary forms, with or without
16 modification, are permitted provided that the following conditions are met:
17
18 * Redistributions of source code must retain the above copyright notice,
19 this list of conditions and the following disclaimer.
20
21 * Redistributions in binary form must reproduce the above copyright
22 notice, this list of conditions and the following disclaimer in the
23 documentation and/or other materials provided with the distribution.
24
25 * Neither the name of the University of Cambridge nor the names of its
26 contributors may be used to endorse or promote products derived from
27 this software without specific prior written permission.
28
29 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 POSSIBILITY OF SUCH DAMAGE.
40 -----------------------------------------------------------------------------
41 */
42
43 #ifdef HAVE_CONFIG_H
44 #include "config.h"
45 #endif
46
47 #include "pcre_internal.h"
48
49 #ifdef SUPPORT_JIT
50
51 /* All-in-one: Since we use the JIT compiler only from here,
52 we just include it. This way we don't need to touch the build
53 system files. */
54
55 #define SLJIT_MALLOC(size) (pcre_malloc)(size)
56 #define SLJIT_FREE(ptr) (pcre_free)(ptr)
57 #define SLJIT_CONFIG_AUTO 1
58 #define SLJIT_CONFIG_STATIC 1
59 #define SLJIT_VERBOSE 0
60 #define SLJIT_DEBUG 0
61
62 #include "sljit/sljitLir.c"
63
64 #if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED
65 #error Unsupported architecture
66 #endif
67
68 /* Allocate memory on the stack. Fast, but limited size. */
69 #define LOCAL_SPACE_SIZE 32768
70
71 #define STACK_GROWTH_RATE 8192
72
73 /* Enable to check that the allocation could destroy temporaries. */
74 #if defined SLJIT_DEBUG && SLJIT_DEBUG
75 #define DESTROY_REGISTERS 1
76 #endif
77
78 /*
79 Short summary about the backtracking mechanism empolyed by the jit code generator:
80
81 The code generator follows the recursive nature of the PERL compatible regular
82 expressions. The basic blocks of regular expressions are condition checkers
83 whose execute different commands depending on the result of the condition check.
84 The relationship between the operators can be horizontal (concatenation) and
85 vertical (sub-expression) (See struct fallback_common for more details).
86
87 'ab' - 'a' and 'b' regexps are concatenated
88 'a+' - 'a' is the sub-expression of the '+' operator
89
90 The condition checkers are boolean (true/false) checkers. Machine code is generated
91 for the checker itself and for the actions depending on the result of the checker.
92 The 'true' case is called as the hot path (expected path), and the other is called as
93 the 'fallback' path. Branch instructions are expesive for all CPUs, so we avoid taken
94 branches on the hot path.
95
96 Greedy star operator (*) :
97 Hot path: match happens.
98 Fallback path: match failed.
99 Non-greedy star operator (*?) :
100 Hot path: no need to perform a match.
101 Fallback path: match is required.
102
103 The following example shows how the code generated for a capturing bracket
104 with two alternatives. Let A, B, C, D are arbirary regular expressions, and
105 we have the following regular expression:
106
107 A(B|C)D
108
109 The generated code will be the following:
110
111 A hot path
112 '(' hot path (pushing arguments to the stack)
113 B hot path
114 ')' hot path (pushing arguments to the stack)
115 D hot path
116 return with successful match
117
118 D fallback path
119 ')' fallback path (If we arrived from "C" jump to the fallback of "C")
120 B fallback path
121 C expected path
122 jump to D hot path
123 C fallback path
124 A fallback path
125
126 Notice, that the order of fallback code paths are the opposite of the fast
127 code paths. In this way the topmost value on the stack is always belong
128 to the current fallback code path. The fallback code path must check
129 whether there is a next alternative. If so, it needs to jump back to
130 the hot path eventually. Otherwise it needs to clear out its own stack
131 frame and continue the execution on the fallback code paths.
132 */
133
134 /*
135 Saved stack frames:
136
137 Atomic blocks and asserts require reloading the values of local variables
138 when the fallback mechanism performed. Because of OP_RECURSE, the locals
139 are not necessarly known in compile time, thus we need a dynamic restore
140 mechanism.
141
142 The stack frames are stored in a chain list, and have the following format:
143 ([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]
144
145 Thus we can restore the locals to a particular point in the stack.
146 */
147
148 typedef struct jit_arguments {
149 /* Pointers first. */
150 struct sljit_stack *stack;
151 const pcre_uchar *str;
152 const pcre_uchar *begin;
153 const pcre_uchar *end;
154 int *offsets;
155 pcre_uchar *ptr;
156 /* Everything else after. */
157 int offsetcount;
158 int calllimit;
159 pcre_uint8 notbol;
160 pcre_uint8 noteol;
161 pcre_uint8 notempty;
162 pcre_uint8 notempty_atstart;
163 } jit_arguments;
164
165 typedef struct executable_function {
166 void *executable_func;
167 pcre_jit_callback callback;
168 void *userdata;
169 } executable_function;
170
171 typedef struct jump_list {
172 struct sljit_jump *jump;
173 struct jump_list *next;
174 } jump_list;
175
176 enum stub_types { stack_alloc };
177
178 typedef struct stub_list {
179 enum stub_types type;
180 int data;
181 struct sljit_jump *start;
182 struct sljit_label *leave;
183 struct stub_list *next;
184 } stub_list;
185
186 typedef int (SLJIT_CALL *jit_function)(jit_arguments *args);
187
188 /* The following structure is the key data type for the recursive
189 code generator. It is allocated by compile_hotpath, and contains
190 the aguments for compile_fallbackpath. Must be the first member
191 of its descendants. */
192 typedef struct fallback_common {
193 /* Concatenation stack. */
194 struct fallback_common *prev;
195 jump_list *nextfallbacks;
196 /* Internal stack (for component operators). */
197 struct fallback_common *top;
198 jump_list *topfallbacks;
199 /* Opcode pointer. */
200 pcre_uchar *cc;
201 } fallback_common;
202
203 typedef struct assert_fallback {
204 fallback_common common;
205 jump_list *condfailed;
206 /* Less than 0 (-1) if a frame is not needed. */
207 int framesize;
208 /* Points to our private memory word on the stack. */
209 int localptr;
210 /* For iterators. */
211 struct sljit_label *hotpath;
212 } assert_fallback;
213
214 typedef struct bracket_fallback {
215 fallback_common common;
216 /* Where to coninue if an alternative is successfully matched. */
217 struct sljit_label *althotpath;
218 /* For rmin and rmax iterators. */
219 struct sljit_label *recursivehotpath;
220 /* For greedy ? operator. */
221 struct sljit_label *zerohotpath;
222 /* Contains the branches of a failed condition. */
223 union {
224 /* Both for OP_COND, OP_SCOND. */
225 jump_list *condfailed;
226 assert_fallback *assert;
227 /* For OP_ONCE. -1 if not needed. */
228 int framesize;
229 } u;
230 /* Points to our private memory word on the stack. */
231 int localptr;
232 } bracket_fallback;
233
234 typedef struct bracketpos_fallback {
235 fallback_common common;
236 /* Points to our private memory word on the stack. */
237 int localptr;
238 /* Reverting stack is needed. */
239 int framesize;
240 /* Allocated stack size. */
241 int stacksize;
242 } bracketpos_fallback;
243
244 typedef struct braminzero_fallback {
245 fallback_common common;
246 struct sljit_label *hotpath;
247 } braminzero_fallback;
248
249 typedef struct iterator_fallback {
250 fallback_common common;
251 /* Next iteration. */
252 struct sljit_label *hotpath;
253 } iterator_fallback;
254
255 typedef struct recurse_entry {
256 struct recurse_entry *next;
257 /* Contains the function entry. */
258 struct sljit_label *entry;
259 /* Collects the calls until the function is not created. */
260 jump_list *calls;
261 /* Points to the starting opcode. */
262 int start;
263 } recurse_entry;
264
265 typedef struct recurse_fallback {
266 fallback_common common;
267 } recurse_fallback;
268
269 typedef struct compiler_common {
270 struct sljit_compiler *compiler;
271 pcre_uchar *start;
272 int localsize;
273 int *localptrs;
274 const pcre_uint8 *fcc;
275 sljit_w lcc;
276 int cbraptr;
277 int nltype;
278 int newline;
279 int bsr_nltype;
280 int endonly;
281 sljit_w ctypes;
282 sljit_uw name_table;
283 sljit_w name_count;
284 sljit_w name_entry_size;
285 struct sljit_label *acceptlabel;
286 stub_list *stubs;
287 recurse_entry *entries;
288 recurse_entry *currententry;
289 jump_list *accept;
290 jump_list *calllimit;
291 jump_list *stackalloc;
292 jump_list *revertframes;
293 jump_list *wordboundary;
294 jump_list *anynewline;
295 jump_list *hspace;
296 jump_list *vspace;
297 jump_list *casefulcmp;
298 jump_list *caselesscmp;
299 BOOL jscript_compat;
300 #ifdef SUPPORT_UTF8
301 BOOL utf8;
302 #ifdef SUPPORT_UCP
303 BOOL useucp;
304 #endif
305 jump_list *utf8readchar;
306 jump_list *utf8readtype8;
307 #endif
308 #ifdef SUPPORT_UCP
309 jump_list *getucd;
310 #endif
311 } compiler_common;
312
313 /* For byte_sequence_compare. */
314
315 typedef struct compare_context {
316 int length;
317 int sourcereg;
318 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
319 int ucharptr;
320 union {
321 sljit_i asint;
322 sljit_h asshort;
323 #ifdef COMPILE_PCRE8
324 sljit_ub asbyte;
325 sljit_ub asuchars[4];
326 #else
327 #ifdef COMPILE_PCRE16
328 sljit_uh asuchars[2];
329 #endif
330 #endif
331 } c;
332 union {
333 sljit_i asint;
334 sljit_h asshort;
335 #ifdef COMPILE_PCRE8
336 sljit_ub asbyte;
337 sljit_ub asuchars[4];
338 #else
339 #ifdef COMPILE_PCRE16
340 sljit_uh asuchars[2];
341 #endif
342 #endif
343 } oc;
344 #endif
345 } compare_context;
346
347 enum {
348 frame_end = 0,
349 frame_setstrbegin = -1
350 };
351
352 /* Used for accessing the elements of the stack. */
353 #define STACK(i) ((-(i) - 1) * (int)sizeof(sljit_w))
354
355 #define TMP1 SLJIT_TEMPORARY_REG1
356 #define TMP2 SLJIT_TEMPORARY_REG3
357 #define TMP3 SLJIT_TEMPORARY_EREG2
358 #define STR_PTR SLJIT_GENERAL_REG1
359 #define STR_END SLJIT_GENERAL_REG2
360 #define STACK_TOP SLJIT_TEMPORARY_REG2
361 #define STACK_LIMIT SLJIT_GENERAL_REG3
362 #define ARGUMENTS SLJIT_GENERAL_EREG1
363 #define CALL_COUNT SLJIT_GENERAL_EREG2
364 #define RETURN_ADDR SLJIT_TEMPORARY_EREG1
365
366 /* Locals layout. */
367 /* These two locals can be used by the current opcode. */
368 #define LOCALS0 (0 * sizeof(sljit_w))
369 #define LOCALS1 (1 * sizeof(sljit_w))
370 /* Two local variables for possessive quantifiers (char1 cannot use them). */
371 #define POSSESSIVE0 (2 * sizeof(sljit_w))
372 #define POSSESSIVE1 (3 * sizeof(sljit_w))
373 /* Head of the last recursion. */
374 #define RECURSIVE_HEAD (4 * sizeof(sljit_w))
375 /* Max limit of recursions. */
376 #define CALL_LIMIT (5 * sizeof(sljit_w))
377 /* Last known position of the requested byte. */
378 #define REQ_CHAR_PTR (6 * sizeof(sljit_w))
379 /* End pointer of the first line. */
380 #define FIRSTLINE_END (7 * sizeof(sljit_w))
381 /* The output vector is stored on the stack, and contains pointers
382 to characters. The vector data is divided into two groups: the first
383 group contains the start / end character pointers, and the second is
384 the start pointers when the end of the capturing group has not yet reached. */
385 #define OVECTOR_START (8 * sizeof(sljit_w))
386 #define OVECTOR(i) (OVECTOR_START + (i) * sizeof(sljit_w))
387 #define OVECTOR_PRIV(i) (common->cbraptr + (i) * sizeof(sljit_w))
388 #define PRIV_DATA(cc) (common->localptrs[(cc) - common->start])
389
390 #ifdef COMPILE_PCRE8
391 #define MOV_UCHAR SLJIT_MOV_UB
392 #else
393 #ifdef COMPILE_PCRE16
394 #define MOV_UCHAR SLJIT_MOV_UH
395 #else
396 #error Unsupported compiling mode
397 #endif
398 #endif
399
400 /* Shortcuts. */
401 #define DEFINE_COMPILER \
402 struct sljit_compiler *compiler = common->compiler
403 #define OP1(op, dst, dstw, src, srcw) \
404 sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw))
405 #define OP2(op, dst, dstw, src1, src1w, src2, src2w) \
406 sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w))
407 #define LABEL() \
408 sljit_emit_label(compiler)
409 #define JUMP(type) \
410 sljit_emit_jump(compiler, (type))
411 #define JUMPTO(type, label) \
412 sljit_set_label(sljit_emit_jump(compiler, (type)), (label))
413 #define JUMPHERE(jump) \
414 sljit_set_label((jump), sljit_emit_label(compiler))
415 #define CMP(type, src1, src1w, src2, src2w) \
416 sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))
417 #define CMPTO(type, src1, src1w, src2, src2w, label) \
418 sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))
419 #define COND_VALUE(op, dst, dstw, type) \
420 sljit_emit_cond_value(compiler, (op), (dst), (dstw), (type))
421
422 static pcre_uchar* bracketend(pcre_uchar* cc)
423 {
424 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
425 do cc += GET(cc, 1); while (*cc == OP_ALT);
426 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
427 cc += 1 + LINK_SIZE;
428 return cc;
429 }
430
431 /* Functions whose might need modification for all new supported opcodes:
432 next_opcode
433 get_localspace
434 set_localptrs
435 get_framesize
436 init_frame
437 get_localsize
438 copy_locals
439 compile_hotpath
440 compile_fallbackpath
441 */
442
443 static pcre_uchar *next_opcode(compiler_common *common, pcre_uchar *cc)
444 {
445 SLJIT_UNUSED_ARG(common);
446 switch(*cc)
447 {
448 case OP_SOD:
449 case OP_SOM:
450 case OP_SET_SOM:
451 case OP_NOT_WORD_BOUNDARY:
452 case OP_WORD_BOUNDARY:
453 case OP_NOT_DIGIT:
454 case OP_DIGIT:
455 case OP_NOT_WHITESPACE:
456 case OP_WHITESPACE:
457 case OP_NOT_WORDCHAR:
458 case OP_WORDCHAR:
459 case OP_ANY:
460 case OP_ALLANY:
461 case OP_ANYNL:
462 case OP_NOT_HSPACE:
463 case OP_HSPACE:
464 case OP_NOT_VSPACE:
465 case OP_VSPACE:
466 case OP_EXTUNI:
467 case OP_EODN:
468 case OP_EOD:
469 case OP_CIRC:
470 case OP_CIRCM:
471 case OP_DOLL:
472 case OP_DOLLM:
473 case OP_TYPESTAR:
474 case OP_TYPEMINSTAR:
475 case OP_TYPEPLUS:
476 case OP_TYPEMINPLUS:
477 case OP_TYPEQUERY:
478 case OP_TYPEMINQUERY:
479 case OP_TYPEPOSSTAR:
480 case OP_TYPEPOSPLUS:
481 case OP_TYPEPOSQUERY:
482 case OP_CRSTAR:
483 case OP_CRMINSTAR:
484 case OP_CRPLUS:
485 case OP_CRMINPLUS:
486 case OP_CRQUERY:
487 case OP_CRMINQUERY:
488 case OP_DEF:
489 case OP_BRAZERO:
490 case OP_BRAMINZERO:
491 case OP_BRAPOSZERO:
492 case OP_FAIL:
493 case OP_ACCEPT:
494 case OP_ASSERT_ACCEPT:
495 case OP_SKIPZERO:
496 return cc + 1;
497
498 case OP_ANYBYTE:
499 #ifdef SUPPORT_UTF8
500 if (common->utf8) return NULL;
501 #endif
502 return cc + 1;
503
504 case OP_CHAR:
505 case OP_CHARI:
506 case OP_NOT:
507 case OP_NOTI:
508
509 case OP_STAR:
510 case OP_MINSTAR:
511 case OP_PLUS:
512 case OP_MINPLUS:
513 case OP_QUERY:
514 case OP_MINQUERY:
515 case OP_POSSTAR:
516 case OP_POSPLUS:
517 case OP_POSQUERY:
518 case OP_STARI:
519 case OP_MINSTARI:
520 case OP_PLUSI:
521 case OP_MINPLUSI:
522 case OP_QUERYI:
523 case OP_MINQUERYI:
524 case OP_POSSTARI:
525 case OP_POSPLUSI:
526 case OP_POSQUERYI:
527 case OP_NOTSTAR:
528 case OP_NOTMINSTAR:
529 case OP_NOTPLUS:
530 case OP_NOTMINPLUS:
531 case OP_NOTQUERY:
532 case OP_NOTMINQUERY:
533 case OP_NOTPOSSTAR:
534 case OP_NOTPOSPLUS:
535 case OP_NOTPOSQUERY:
536 case OP_NOTSTARI:
537 case OP_NOTMINSTARI:
538 case OP_NOTPLUSI:
539 case OP_NOTMINPLUSI:
540 case OP_NOTQUERYI:
541 case OP_NOTMINQUERYI:
542 case OP_NOTPOSSTARI:
543 case OP_NOTPOSPLUSI:
544 case OP_NOTPOSQUERYI:
545 cc += 2;
546 #ifdef SUPPORT_UTF8
547 if (common->utf8 && cc[-1] >= 0xc0) cc += PRIV(utf8_table4)[cc[-1] & 0x3f];
548 #endif
549 return cc;
550
551 case OP_UPTO:
552 case OP_MINUPTO:
553 case OP_EXACT:
554 case OP_POSUPTO:
555 case OP_UPTOI:
556 case OP_MINUPTOI:
557 case OP_EXACTI:
558 case OP_POSUPTOI:
559 case OP_NOTUPTO:
560 case OP_NOTMINUPTO:
561 case OP_NOTEXACT:
562 case OP_NOTPOSUPTO:
563 case OP_NOTUPTOI:
564 case OP_NOTMINUPTOI:
565 case OP_NOTEXACTI:
566 case OP_NOTPOSUPTOI:
567 cc += 2 + IMM2_SIZE;
568 #ifdef SUPPORT_UTF8
569 if (common->utf8 && cc[-1] >= 0xc0) cc += PRIV(utf8_table4)[cc[-1] & 0x3f];
570 #endif
571 return cc;
572
573 case OP_NOTPROP:
574 case OP_PROP:
575 case OP_TYPEUPTO:
576 case OP_TYPEMINUPTO:
577 case OP_TYPEEXACT:
578 case OP_TYPEPOSUPTO:
579 case OP_REF:
580 case OP_REFI:
581 case OP_CREF:
582 case OP_NCREF:
583 case OP_RREF:
584 case OP_NRREF:
585 case OP_CLOSE:
586 cc += 1 + IMM2_SIZE;
587 return cc;
588
589 case OP_CRRANGE:
590 case OP_CRMINRANGE:
591 return cc + 1 + 2 * IMM2_SIZE;
592
593 case OP_CLASS:
594 case OP_NCLASS:
595 return cc + 1 + 32 / sizeof(pcre_uchar);
596
597 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
598 case OP_XCLASS:
599 return cc + GET(cc, 1);
600 #endif
601
602 case OP_RECURSE:
603 case OP_ASSERT:
604 case OP_ASSERT_NOT:
605 case OP_ASSERTBACK:
606 case OP_ASSERTBACK_NOT:
607 case OP_REVERSE:
608 case OP_ONCE:
609 case OP_ONCE_NC:
610 case OP_BRA:
611 case OP_BRAPOS:
612 case OP_COND:
613 case OP_SBRA:
614 case OP_SBRAPOS:
615 case OP_SCOND:
616 case OP_ALT:
617 case OP_KET:
618 case OP_KETRMAX:
619 case OP_KETRMIN:
620 case OP_KETRPOS:
621 return cc + 1 + LINK_SIZE;
622
623 case OP_CBRA:
624 case OP_CBRAPOS:
625 case OP_SCBRA:
626 case OP_SCBRAPOS:
627 return cc + 1 + LINK_SIZE + IMM2_SIZE;
628
629 default:
630 return NULL;
631 }
632 }
633
634 static int get_localspace(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
635 {
636 int localspace = 0;
637 pcre_uchar *alternative;
638 /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
639 while (cc < ccend)
640 {
641 switch(*cc)
642 {
643 case OP_ASSERT:
644 case OP_ASSERT_NOT:
645 case OP_ASSERTBACK:
646 case OP_ASSERTBACK_NOT:
647 case OP_ONCE:
648 case OP_ONCE_NC:
649 case OP_BRAPOS:
650 case OP_SBRA:
651 case OP_SBRAPOS:
652 case OP_SCOND:
653 localspace += sizeof(sljit_w);
654 cc += 1 + LINK_SIZE;
655 break;
656
657 case OP_CBRAPOS:
658 case OP_SCBRAPOS:
659 localspace += sizeof(sljit_w);
660 cc += 1 + LINK_SIZE + IMM2_SIZE;
661 break;
662
663 case OP_COND:
664 /* Might be a hidden SCOND. */
665 alternative = cc + GET(cc, 1);
666 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
667 localspace += sizeof(sljit_w);
668 cc += 1 + LINK_SIZE;
669 break;
670
671 default:
672 cc = next_opcode(common, cc);
673 if (cc == NULL)
674 return -1;
675 break;
676 }
677 }
678 return localspace;
679 }
680
681 static void set_localptrs(compiler_common *common, int localptr, pcre_uchar *ccend)
682 {
683 pcre_uchar *cc = common->start;
684 pcre_uchar *alternative;
685 while (cc < ccend)
686 {
687 switch(*cc)
688 {
689 case OP_ASSERT:
690 case OP_ASSERT_NOT:
691 case OP_ASSERTBACK:
692 case OP_ASSERTBACK_NOT:
693 case OP_ONCE:
694 case OP_ONCE_NC:
695 case OP_BRAPOS:
696 case OP_SBRA:
697 case OP_SBRAPOS:
698 case OP_SCOND:
699 common->localptrs[cc - common->start] = localptr;
700 localptr += sizeof(sljit_w);
701 cc += 1 + LINK_SIZE;
702 break;
703
704 case OP_CBRAPOS:
705 case OP_SCBRAPOS:
706 common->localptrs[cc - common->start] = localptr;
707 localptr += sizeof(sljit_w);
708 cc += 1 + LINK_SIZE + IMM2_SIZE;
709 break;
710
711 case OP_COND:
712 /* Might be a hidden SCOND. */
713 alternative = cc + GET(cc, 1);
714 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
715 {
716 common->localptrs[cc - common->start] = localptr;
717 localptr += sizeof(sljit_w);
718 }
719 cc += 1 + LINK_SIZE;
720 break;
721
722 default:
723 cc = next_opcode(common, cc);
724 SLJIT_ASSERT(cc != NULL);
725 break;
726 }
727 }
728 }
729
730 /* Returns with -1 if no need for frame. */
731 static int get_framesize(compiler_common *common, pcre_uchar *cc, BOOL recursive)
732 {
733 pcre_uchar *ccend = bracketend(cc);
734 int length = 0;
735 BOOL possessive = FALSE;
736 BOOL setsom_found = FALSE;
737
738 if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))
739 {
740 length = 3;
741 possessive = TRUE;
742 }
743
744 cc = next_opcode(common, cc);
745 SLJIT_ASSERT(cc != NULL);
746 while (cc < ccend)
747 switch(*cc)
748 {
749 case OP_SET_SOM:
750 case OP_RECURSE:
751 if (!setsom_found)
752 {
753 length += 2;
754 setsom_found = TRUE;
755 }
756 cc += (*cc == OP_SET_SOM) ? 1 : 1 + LINK_SIZE;
757 break;
758
759 case OP_CBRA:
760 case OP_CBRAPOS:
761 case OP_SCBRA:
762 case OP_SCBRAPOS:
763 length += 3;
764 cc += 1 + LINK_SIZE + IMM2_SIZE;
765 break;
766
767 default:
768 cc = next_opcode(common, cc);
769 SLJIT_ASSERT(cc != NULL);
770 break;
771 }
772
773 /* Possessive quantifiers can use a special case. */
774 if (SLJIT_UNLIKELY(possessive) && length == 3)
775 return -1;
776
777 if (length > 0)
778 return length + 1;
779 return -1;
780 }
781
782 static void init_frame(compiler_common *common, pcre_uchar *cc, int stackpos, int stacktop, BOOL recursive)
783 {
784 DEFINE_COMPILER;
785 pcre_uchar *ccend = bracketend(cc);
786 BOOL setsom_found = FALSE;
787 int offset;
788
789 /* >= 1 + shortest item size (2) */
790 SLJIT_ASSERT(stackpos >= stacktop + 2);
791
792 stackpos = STACK(stackpos);
793 if (recursive || (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS))
794 cc = next_opcode(common, cc);
795 SLJIT_ASSERT(cc != NULL);
796 while (cc < ccend)
797 switch(*cc)
798 {
799 case OP_SET_SOM:
800 case OP_RECURSE:
801 if (!setsom_found)
802 {
803 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
804 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_setstrbegin);
805 stackpos += (int)sizeof(sljit_w);
806 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
807 stackpos += (int)sizeof(sljit_w);
808 setsom_found = TRUE;
809 }
810 cc += (*cc == OP_SET_SOM) ? 1 : 1 + LINK_SIZE;
811 break;
812
813 case OP_CBRA:
814 case OP_CBRAPOS:
815 case OP_SCBRA:
816 case OP_SCBRAPOS:
817 offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
818 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));
819 stackpos += (int)sizeof(sljit_w);
820 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
821 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
822 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
823 stackpos += (int)sizeof(sljit_w);
824 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
825 stackpos += (int)sizeof(sljit_w);
826
827 cc += 1 + LINK_SIZE + IMM2_SIZE;
828 break;
829
830 default:
831 cc = next_opcode(common, cc);
832 SLJIT_ASSERT(cc != NULL);
833 break;
834 }
835
836 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_end);
837 SLJIT_ASSERT(stackpos == STACK(stacktop));
838 }
839
840 static SLJIT_INLINE int get_localsize(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
841 {
842 int localsize = 2;
843 pcre_uchar *alternative;
844 /* Calculate the sum of the local variables. */
845 while (cc < ccend)
846 {
847 switch(*cc)
848 {
849 case OP_ASSERT:
850 case OP_ASSERT_NOT:
851 case OP_ASSERTBACK:
852 case OP_ASSERTBACK_NOT:
853 case OP_ONCE:
854 case OP_ONCE_NC:
855 case OP_BRAPOS:
856 case OP_SBRA:
857 case OP_SBRAPOS:
858 case OP_SCOND:
859 localsize++;
860 cc += 1 + LINK_SIZE;
861 break;
862
863 case OP_CBRA:
864 case OP_SCBRA:
865 localsize++;
866 cc += 1 + LINK_SIZE + IMM2_SIZE;
867 break;
868
869 case OP_CBRAPOS:
870 case OP_SCBRAPOS:
871 localsize += 2;
872 cc += 1 + LINK_SIZE + IMM2_SIZE;
873 break;
874
875 case OP_COND:
876 /* Might be a hidden SCOND. */
877 alternative = cc + GET(cc, 1);
878 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
879 localsize++;
880 cc += 1 + LINK_SIZE;
881 break;
882
883 default:
884 cc = next_opcode(common, cc);
885 SLJIT_ASSERT(cc != NULL);
886 break;
887 }
888 }
889 SLJIT_ASSERT(cc == ccend);
890 return localsize;
891 }
892
893 static void copy_locals(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend,
894 BOOL save, int stackptr, int stacktop)
895 {
896 DEFINE_COMPILER;
897 int srcw[2];
898 int count;
899 BOOL tmp1next = TRUE;
900 BOOL tmp1empty = TRUE;
901 BOOL tmp2empty = TRUE;
902 pcre_uchar *alternative;
903 enum {
904 start,
905 loop,
906 end
907 } status;
908
909 status = save ? start : loop;
910 stackptr = STACK(stackptr - 2);
911 stacktop = STACK(stacktop - 1);
912
913 if (!save)
914 {
915 stackptr += sizeof(sljit_w);
916 if (stackptr < stacktop)
917 {
918 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
919 stackptr += sizeof(sljit_w);
920 tmp1empty = FALSE;
921 }
922 if (stackptr < stacktop)
923 {
924 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
925 stackptr += sizeof(sljit_w);
926 tmp2empty = FALSE;
927 }
928 /* The tmp1next must be TRUE in either way. */
929 }
930
931 while (status != end)
932 {
933 count = 0;
934 switch(status)
935 {
936 case start:
937 SLJIT_ASSERT(save);
938 count = 1;
939 srcw[0] = RECURSIVE_HEAD;
940 status = loop;
941 break;
942
943 case loop:
944 if (cc >= ccend)
945 {
946 status = end;
947 break;
948 }
949
950 switch(*cc)
951 {
952 case OP_ASSERT:
953 case OP_ASSERT_NOT:
954 case OP_ASSERTBACK:
955 case OP_ASSERTBACK_NOT:
956 case OP_ONCE:
957 case OP_ONCE_NC:
958 case OP_BRAPOS:
959 case OP_SBRA:
960 case OP_SBRAPOS:
961 case OP_SCOND:
962 count = 1;
963 srcw[0] = PRIV_DATA(cc);
964 SLJIT_ASSERT(srcw[0] != 0);
965 cc += 1 + LINK_SIZE;
966 break;
967
968 case OP_CBRA:
969 case OP_SCBRA:
970 count = 1;
971 srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
972 cc += 1 + LINK_SIZE + IMM2_SIZE;
973 break;
974
975 case OP_CBRAPOS:
976 case OP_SCBRAPOS:
977 count = 2;
978 srcw[1] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
979 srcw[0] = PRIV_DATA(cc);
980 SLJIT_ASSERT(srcw[0] != 0);
981 cc += 1 + LINK_SIZE + IMM2_SIZE;
982 break;
983
984 case OP_COND:
985 /* Might be a hidden SCOND. */
986 alternative = cc + GET(cc, 1);
987 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
988 {
989 count = 1;
990 srcw[0] = PRIV_DATA(cc);
991 SLJIT_ASSERT(srcw[0] != 0);
992 }
993 cc += 1 + LINK_SIZE;
994 break;
995
996 default:
997 cc = next_opcode(common, cc);
998 SLJIT_ASSERT(cc != NULL);
999 break;
1000 }
1001 break;
1002
1003 case end:
1004 SLJIT_ASSERT_STOP();
1005 break;
1006 }
1007
1008 while (count > 0)
1009 {
1010 count--;
1011 if (save)
1012 {
1013 if (tmp1next)
1014 {
1015 if (!tmp1empty)
1016 {
1017 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1018 stackptr += sizeof(sljit_w);
1019 }
1020 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);
1021 tmp1empty = FALSE;
1022 tmp1next = FALSE;
1023 }
1024 else
1025 {
1026 if (!tmp2empty)
1027 {
1028 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1029 stackptr += sizeof(sljit_w);
1030 }
1031 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);
1032 tmp2empty = FALSE;
1033 tmp1next = TRUE;
1034 }
1035 }
1036 else
1037 {
1038 if (tmp1next)
1039 {
1040 SLJIT_ASSERT(!tmp1empty);
1041 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP1, 0);
1042 tmp1empty = stackptr >= stacktop;
1043 if (!tmp1empty)
1044 {
1045 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1046 stackptr += sizeof(sljit_w);
1047 }
1048 tmp1next = FALSE;
1049 }
1050 else
1051 {
1052 SLJIT_ASSERT(!tmp2empty);
1053 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP2, 0);
1054 tmp2empty = stackptr >= stacktop;
1055 if (!tmp2empty)
1056 {
1057 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1058 stackptr += sizeof(sljit_w);
1059 }
1060 tmp1next = TRUE;
1061 }
1062 }
1063 }
1064 }
1065
1066 if (save)
1067 {
1068 if (tmp1next)
1069 {
1070 if (!tmp1empty)
1071 {
1072 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1073 stackptr += sizeof(sljit_w);
1074 }
1075 if (!tmp2empty)
1076 {
1077 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1078 stackptr += sizeof(sljit_w);
1079 }
1080 }
1081 else
1082 {
1083 if (!tmp2empty)
1084 {
1085 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1086 stackptr += sizeof(sljit_w);
1087 }
1088 if (!tmp1empty)
1089 {
1090 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1091 stackptr += sizeof(sljit_w);
1092 }
1093 }
1094 }
1095 SLJIT_ASSERT(cc == ccend && stackptr == stacktop && (save || (tmp1empty && tmp2empty)));
1096 }
1097
1098 static SLJIT_INLINE BOOL ispowerof2(unsigned int value)
1099 {
1100 return (value & (value - 1)) == 0;
1101 }
1102
1103 static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label)
1104 {
1105 while (list)
1106 {
1107 /* sljit_set_label is clever enough to do nothing
1108 if either the jump or the label is NULL */
1109 sljit_set_label(list->jump, label);
1110 list = list->next;
1111 }
1112 }
1113
1114 static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump* jump)
1115 {
1116 jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list));
1117 if (list_item)
1118 {
1119 list_item->next = *list;
1120 list_item->jump = jump;
1121 *list = list_item;
1122 }
1123 }
1124
1125 static void add_stub(compiler_common *common, enum stub_types type, int data, struct sljit_jump *start)
1126 {
1127 DEFINE_COMPILER;
1128 stub_list* list_item = sljit_alloc_memory(compiler, sizeof(stub_list));
1129
1130 if (list_item)
1131 {
1132 list_item->type = type;
1133 list_item->data = data;
1134 list_item->start = start;
1135 list_item->leave = LABEL();
1136 list_item->next = common->stubs;
1137 common->stubs = list_item;
1138 }
1139 }
1140
1141 static void flush_stubs(compiler_common *common)
1142 {
1143 DEFINE_COMPILER;
1144 stub_list* list_item = common->stubs;
1145
1146 while (list_item)
1147 {
1148 JUMPHERE(list_item->start);
1149 switch(list_item->type)
1150 {
1151 case stack_alloc:
1152 add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));
1153 break;
1154 }
1155 JUMPTO(SLJIT_JUMP, list_item->leave);
1156 list_item = list_item->next;
1157 }
1158 common->stubs = NULL;
1159 }
1160
1161 static SLJIT_INLINE void decrease_call_count(compiler_common *common)
1162 {
1163 DEFINE_COMPILER;
1164
1165 OP2(SLJIT_SUB | SLJIT_SET_E, CALL_COUNT, 0, CALL_COUNT, 0, SLJIT_IMM, 1);
1166 add_jump(compiler, &common->calllimit, JUMP(SLJIT_C_ZERO));
1167 }
1168
1169 static SLJIT_INLINE void allocate_stack(compiler_common *common, int size)
1170 {
1171 /* May destroy all locals and registers except TMP2. */
1172 DEFINE_COMPILER;
1173
1174 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_w));
1175 #ifdef DESTROY_REGISTERS
1176 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);
1177 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
1178 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
1179 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, TMP1, 0);
1180 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);
1181 #endif
1182 add_stub(common, stack_alloc, 0, CMP(SLJIT_C_GREATER, STACK_TOP, 0, STACK_LIMIT, 0));
1183 }
1184
1185 static SLJIT_INLINE void free_stack(compiler_common *common, int size)
1186 {
1187 DEFINE_COMPILER;
1188 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_w));
1189 }
1190
1191 static SLJIT_INLINE void reset_ovector(compiler_common *common, int length)
1192 {
1193 DEFINE_COMPILER;
1194 struct sljit_label *loop;
1195 int i;
1196 /* At this point we can freely use all temporary registers. */
1197 /* TMP1 returns with begin - 1. */
1198 OP2(SLJIT_SUB, SLJIT_TEMPORARY_REG1, 0, SLJIT_MEM1(SLJIT_GENERAL_REG1), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));
1199 if (length < 8)
1200 {
1201 for (i = 0; i < length; i++)
1202 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(i), SLJIT_TEMPORARY_REG1, 0);
1203 }
1204 else
1205 {
1206 OP2(SLJIT_ADD, SLJIT_TEMPORARY_REG2, 0, SLJIT_LOCALS_REG, 0, SLJIT_IMM, OVECTOR_START - sizeof(sljit_w));
1207 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, length);
1208 loop = LABEL();
1209 OP1(SLJIT_MOVU, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), sizeof(sljit_w), SLJIT_TEMPORARY_REG1, 0);
1210 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_TEMPORARY_REG3, 0, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, 1);
1211 JUMPTO(SLJIT_C_NOT_ZERO, loop);
1212 }
1213 }
1214
1215 static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)
1216 {
1217 DEFINE_COMPILER;
1218 struct sljit_label *loop;
1219 struct sljit_jump *earlyexit;
1220
1221 /* At this point we can freely use all registers. */
1222 OP1(SLJIT_MOV, SLJIT_GENERAL_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
1223 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1), STR_PTR, 0);
1224
1225 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG1, 0, ARGUMENTS, 0);
1226 OP1(SLJIT_MOV_SI, SLJIT_TEMPORARY_REG2, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), SLJIT_OFFSETOF(jit_arguments, offsetcount));
1227 OP2(SLJIT_SUB, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), SLJIT_OFFSETOF(jit_arguments, offsets), SLJIT_IMM, sizeof(int));
1228 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG1, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), SLJIT_OFFSETOF(jit_arguments, begin));
1229 OP2(SLJIT_ADD, SLJIT_GENERAL_REG1, 0, SLJIT_LOCALS_REG, 0, SLJIT_IMM, OVECTOR_START);
1230 /* Unlikely, but possible */
1231 earlyexit = CMP(SLJIT_C_EQUAL, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, 0);
1232 loop = LABEL();
1233 OP2(SLJIT_SUB, SLJIT_GENERAL_REG2, 0, SLJIT_MEM1(SLJIT_GENERAL_REG1), 0, SLJIT_TEMPORARY_REG1, 0);
1234 OP2(SLJIT_ADD, SLJIT_GENERAL_REG1, 0, SLJIT_GENERAL_REG1, 0, SLJIT_IMM, sizeof(sljit_w));
1235 /* Copy the integer value to the output buffer */
1236 #ifdef COMPILE_PCRE16
1237 OP2(SLJIT_LSHR, SLJIT_GENERAL_REG2, 0, SLJIT_GENERAL_REG2, 0, SLJIT_IMM, 1);
1238 #endif
1239 OP1(SLJIT_MOVU_SI, SLJIT_MEM1(SLJIT_TEMPORARY_REG3), sizeof(int), SLJIT_GENERAL_REG2, 0);
1240 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_TEMPORARY_REG2, 0, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, 1);
1241 JUMPTO(SLJIT_C_NOT_ZERO, loop);
1242 JUMPHERE(earlyexit);
1243
1244 /* Calculate the return value, which is the maximum ovector value. */
1245 if (topbracket > 1)
1246 {
1247 OP2(SLJIT_ADD, SLJIT_TEMPORARY_REG1, 0, SLJIT_LOCALS_REG, 0, SLJIT_IMM, OVECTOR_START + topbracket * 2 * sizeof(sljit_w));
1248 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, topbracket + 1);
1249
1250 /* OVECTOR(0) is never equal to SLJIT_GENERAL_REG3. */
1251 loop = LABEL();
1252 OP1(SLJIT_MOVU, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), -(2 * (sljit_w)sizeof(sljit_w)));
1253 OP2(SLJIT_SUB, SLJIT_TEMPORARY_REG2, 0, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, 1);
1254 CMPTO(SLJIT_C_EQUAL, SLJIT_TEMPORARY_REG3, 0, SLJIT_GENERAL_REG3, 0, loop);
1255 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_TEMPORARY_REG2, 0);
1256 }
1257 else
1258 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
1259 }
1260
1261 static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, pcre_uchar* cc)
1262 {
1263 /* Detects if the character has an othercase. */
1264 unsigned int c;
1265
1266 #ifdef SUPPORT_UTF8
1267 if (common->utf8)
1268 {
1269 GETCHAR(c, cc);
1270 if (c > 127)
1271 {
1272 #ifdef SUPPORT_UCP
1273 return c != UCD_OTHERCASE(c);
1274 #else
1275 return FALSE;
1276 #endif
1277 }
1278 }
1279 else
1280 #endif
1281 c = *cc;
1282 return MAX_255(c) ? common->fcc[c] != c : FALSE;
1283 }
1284
1285 static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)
1286 {
1287 /* Returns with the othercase. */
1288 #ifdef SUPPORT_UTF8
1289 if (common->utf8 && c > 127)
1290 {
1291 #ifdef SUPPORT_UCP
1292 return UCD_OTHERCASE(c);
1293 #else
1294 return c;
1295 #endif
1296 }
1297 #endif
1298 return TABLE_GET(c, common->fcc, c);
1299 }
1300
1301 static unsigned int char_get_othercase_bit(compiler_common *common, pcre_uchar* cc)
1302 {
1303 /* Detects if the character and its othercase has only 1 bit difference. */
1304 unsigned int c, oc, bit;
1305 #ifdef SUPPORT_UTF8
1306 int n;
1307 #endif
1308
1309 #ifdef SUPPORT_UTF8
1310 if (common->utf8)
1311 {
1312 GETCHAR(c, cc);
1313 if (c <= 127)
1314 oc = common->fcc[c];
1315 else
1316 {
1317 #ifdef SUPPORT_UCP
1318 oc = UCD_OTHERCASE(c);
1319 #else
1320 oc = c;
1321 #endif
1322 }
1323 }
1324 else
1325 {
1326 c = *cc;
1327 oc = common->fcc[c];
1328 }
1329 #else
1330 c = *cc;
1331 oc = common->fcc[c];
1332 #endif
1333
1334 SLJIT_ASSERT(c != oc);
1335
1336 bit = c ^ oc;
1337 /* Optimized for English alphabet. */
1338 if (c <= 127 && bit == 0x20)
1339 return (0 << 8) | 0x20;
1340
1341 /* Since c != oc, they must have at least 1 bit difference. */
1342 if (!ispowerof2(bit))
1343 return 0;
1344
1345 #ifdef SUPPORT_UTF8
1346 if (common->utf8 && c > 127)
1347 {
1348 n = PRIV(utf8_table4)[*cc & 0x3f];
1349 while ((bit & 0x3f) == 0)
1350 {
1351 n--;
1352 bit >>= 6;
1353 }
1354 return (n << 8) | bit;
1355 }
1356 #endif
1357 return (0 << 8) | bit;
1358 }
1359
1360 static SLJIT_INLINE void check_input_end(compiler_common *common, jump_list **fallbacks)
1361 {
1362 DEFINE_COMPILER;
1363 add_jump(compiler, fallbacks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
1364 }
1365
1366 static void read_char(compiler_common *common)
1367 {
1368 /* Reads the character into TMP1, updates STR_PTR.
1369 Does not check STR_END. TMP2 Destroyed. */
1370 DEFINE_COMPILER;
1371 #ifdef SUPPORT_UTF8
1372 struct sljit_jump *jump;
1373 #endif
1374
1375 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
1376 #ifdef SUPPORT_UTF8
1377 if (common->utf8)
1378 {
1379 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
1380 add_jump(compiler, &common->utf8readchar, JUMP(SLJIT_FAST_CALL));
1381 JUMPHERE(jump);
1382 }
1383 #endif
1384 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1385 }
1386
1387 static void peek_char(compiler_common *common)
1388 {
1389 /* Reads the character into TMP1, keeps STR_PTR.
1390 Does not check STR_END. TMP2 Destroyed. */
1391 DEFINE_COMPILER;
1392 #ifdef SUPPORT_UTF8
1393 struct sljit_jump *jump;
1394 #endif
1395
1396 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
1397 #ifdef SUPPORT_UTF8
1398 if (common->utf8)
1399 {
1400 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
1401 add_jump(compiler, &common->utf8readchar, JUMP(SLJIT_FAST_CALL));
1402 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
1403 JUMPHERE(jump);
1404 }
1405 #endif
1406 }
1407
1408 static void read_char8_type(compiler_common *common)
1409 {
1410 /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */
1411 DEFINE_COMPILER;
1412 #ifdef SUPPORT_UTF8
1413 struct sljit_jump *jump;
1414 #endif
1415
1416 #ifdef SUPPORT_UTF8
1417 if (common->utf8)
1418 {
1419 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
1420 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);
1421 /* This can be an extra read in some situations, but hopefully
1422 it is a clever early read in most cases. */
1423 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
1424 jump = CMP(SLJIT_C_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
1425 add_jump(compiler, &common->utf8readtype8, JUMP(SLJIT_FAST_CALL));
1426 JUMPHERE(jump);
1427 return;
1428 }
1429 #endif
1430 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
1431 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1432 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);
1433 }
1434
1435 static void skip_char_back(compiler_common *common)
1436 {
1437 /* Goes one character back. Only affects STR_PTR. Does not check begin. */
1438 DEFINE_COMPILER;
1439 #ifdef SUPPORT_UTF8
1440 struct sljit_label *label;
1441
1442 if (common->utf8)
1443 {
1444 label = LABEL();
1445 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);
1446 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
1447 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
1448 CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);
1449 return;
1450 }
1451 #endif
1452 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1453 }
1454
1455 static void check_newlinechar(compiler_common *common, int nltype, jump_list **fallbacks, BOOL jumpiftrue)
1456 {
1457 /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */
1458 DEFINE_COMPILER;
1459
1460 if (nltype == NLTYPE_ANY)
1461 {
1462 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
1463 add_jump(compiler, fallbacks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
1464 }
1465 else if (nltype == NLTYPE_ANYCRLF)
1466 {
1467 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_CR);
1468 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
1469 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
1470 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
1471 add_jump(compiler, fallbacks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
1472 }
1473 else
1474 {
1475 SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline <= 255);
1476 add_jump(compiler, fallbacks, CMP(jumpiftrue ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
1477 }
1478 }
1479
1480 #ifdef SUPPORT_UTF8
1481 static void do_utf8readchar(compiler_common *common)
1482 {
1483 /* Fast decoding an utf8 character. TMP1 contains the first byte
1484 of the character (>= 0xc0). Return char value in TMP1, length - 1 in TMP2. */
1485 DEFINE_COMPILER;
1486 struct sljit_jump *jump;
1487
1488 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);
1489 /* Searching for the first zero. */
1490 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
1491 jump = JUMP(SLJIT_C_NOT_ZERO);
1492 /* 2 byte sequence */
1493 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(STR_PTR), 1);
1494 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);
1495 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1f);
1496 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
1497 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
1498 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
1499 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
1500 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1501 JUMPHERE(jump);
1502
1503 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10);
1504 jump = JUMP(SLJIT_C_NOT_ZERO);
1505 /* 3 byte sequence */
1506 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(STR_PTR), 1);
1507 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0f);
1508 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 12);
1509 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
1510 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
1511 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
1512 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(STR_PTR), 2);
1513 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 2);
1514 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
1515 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
1516 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 2);
1517 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1518 JUMPHERE(jump);
1519
1520 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x08);
1521 jump = JUMP(SLJIT_C_NOT_ZERO);
1522 /* 4 byte sequence */
1523 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(STR_PTR), 1);
1524 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x07);
1525 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 18);
1526 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
1527 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12);
1528 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
1529 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(STR_PTR), 2);
1530 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
1531 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
1532 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
1533 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(STR_PTR), 3);
1534 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 3);
1535 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
1536 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
1537 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 3);
1538 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1539 JUMPHERE(jump);
1540
1541 /* 5 byte sequence */
1542 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(STR_PTR), 1);
1543 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x03);
1544 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 24);
1545 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
1546 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 18);
1547 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
1548 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(STR_PTR), 2);
1549 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
1550 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12);
1551 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
1552 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(STR_PTR), 3);
1553 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
1554 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
1555 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
1556 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(STR_PTR), 4);
1557 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 4);
1558 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
1559 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
1560 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 4);
1561 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1562 }
1563
1564 static void do_utf8readtype8(compiler_common *common)
1565 {
1566 /* Fast decoding an utf8 character type. TMP2 contains the first byte
1567 of the character (>= 0xc0) and TMP1 is destroyed. Return value in TMP1. */
1568 DEFINE_COMPILER;
1569 struct sljit_jump *jump;
1570 struct sljit_jump *compare;
1571
1572 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);
1573
1574 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20);
1575 jump = JUMP(SLJIT_C_NOT_ZERO);
1576 /* 2 byte sequence */
1577 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
1578 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);
1579 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
1580 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
1581 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
1582 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
1583 compare = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
1584 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
1585 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1586
1587 JUMPHERE(compare);
1588 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
1589 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1590 JUMPHERE(jump);
1591
1592 /* We only have types for characters less than 256. */
1593 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_w)PRIV(utf8_table4) - 0xc0);
1594 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
1595 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
1596 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1597 }
1598
1599 #endif
1600
1601 #ifdef SUPPORT_UCP
1602
1603 /* UCD_BLOCK_SIZE must be 128 (see the assert below). */
1604 #define UCD_BLOCK_MASK 127
1605 #define UCD_BLOCK_SHIFT 7
1606
1607 static void do_getucd(compiler_common *common)
1608 {
1609 /* Search the UCD record for the character comes in TMP1.
1610 Returns chartype in TMP1 and UCD offset in TMP2. */
1611 DEFINE_COMPILER;
1612
1613 SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 8);
1614
1615 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);
1616 OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
1617 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_w)PRIV(ucd_stage1));
1618 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
1619 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
1620 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
1621 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_w)PRIV(ucd_stage2));
1622 OP1(SLJIT_MOV_UH, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
1623 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_w)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
1624 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
1625 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1626 }
1627 #endif
1628
1629 static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common, BOOL hascrorlf, BOOL firstline)
1630 {
1631 DEFINE_COMPILER;
1632 struct sljit_label *mainloop;
1633 struct sljit_label *newlinelabel = NULL;
1634 struct sljit_jump *start;
1635 struct sljit_jump *end = NULL;
1636 struct sljit_jump *nl = NULL;
1637 #ifdef SUPPORT_UTF8
1638 struct sljit_jump *singlebyte;
1639 #endif
1640 jump_list *newline = NULL;
1641 BOOL newlinecheck = FALSE;
1642 BOOL readuchar = FALSE;
1643
1644 if (!(hascrorlf || firstline) && (common->nltype == NLTYPE_ANY ||
1645 common->nltype == NLTYPE_ANYCRLF || common->newline > 255))
1646 newlinecheck = TRUE;
1647
1648 if (firstline)
1649 {
1650 /* Search for the end of the first line. */
1651 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STR_PTR, 0);
1652 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), FIRSTLINE_END, STR_END, 0);
1653
1654 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
1655 {
1656 mainloop = LABEL();
1657 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1658 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
1659 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
1660 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
1661 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);
1662 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);
1663 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), FIRSTLINE_END, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1664 }
1665 else
1666 {
1667 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
1668 mainloop = LABEL();
1669 /* Continual stores does not cause data dependency. */
1670 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), FIRSTLINE_END, STR_PTR, 0);
1671 read_char(common);
1672 check_newlinechar(common, common->nltype, &newline, TRUE);
1673 CMPTO(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0, mainloop);
1674 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), FIRSTLINE_END, STR_PTR, 0);
1675 set_jumps(newline, LABEL());
1676 }
1677
1678 JUMPHERE(end);
1679 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
1680 }
1681
1682 start = JUMP(SLJIT_JUMP);
1683
1684 if (newlinecheck)
1685 {
1686 newlinelabel = LABEL();
1687 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1688 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
1689 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
1690 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
1691 COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
1692 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
1693 nl = JUMP(SLJIT_JUMP);
1694 }
1695
1696 mainloop = LABEL();
1697
1698 /* Increasing the STR_PTR here requires one less jump in the most common case. */
1699 #ifdef SUPPORT_UTF8
1700 if (common->utf8) readuchar = TRUE;
1701 #endif
1702 if (newlinecheck) readuchar = TRUE;
1703
1704 if (readuchar)
1705 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
1706
1707 if (newlinecheck)
1708 CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);
1709
1710 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1711 #ifdef SUPPORT_UTF8
1712 if (common->utf8)
1713 {
1714 singlebyte = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
1715 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0);
1716 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
1717 JUMPHERE(singlebyte);
1718 }
1719 #endif
1720 JUMPHERE(start);
1721
1722 if (newlinecheck)
1723 {
1724 JUMPHERE(end);
1725 JUMPHERE(nl);
1726 }
1727
1728 return mainloop;
1729 }
1730
1731 static SLJIT_INLINE void fast_forward_first_char(compiler_common *common, pcre_uchar firstchar, BOOL caseless, BOOL firstline)
1732 {
1733 DEFINE_COMPILER;
1734 struct sljit_label *start;
1735 struct sljit_jump *leave;
1736 struct sljit_jump *found;
1737 pcre_uchar oc, bit;
1738
1739 if (firstline)
1740 {
1741 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, STR_END, 0);
1742 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), FIRSTLINE_END);
1743 }
1744
1745 start = LABEL();
1746 leave = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
1747 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
1748
1749 oc = firstchar;
1750 if (caseless)
1751 oc = TABLE_GET(firstchar, common->fcc, firstchar);
1752 if (firstchar == oc)
1753 found = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, firstchar);
1754 else
1755 {
1756 bit = firstchar ^ oc;
1757 if (ispowerof2(bit))
1758 {
1759 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, bit);
1760 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, firstchar | bit);
1761 }
1762 else
1763 {
1764 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, firstchar);
1765 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
1766 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);
1767 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
1768 found = JUMP(SLJIT_C_NOT_ZERO);
1769 }
1770 }
1771
1772 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1773 #ifdef SUPPORT_UTF8
1774 if (common->utf8)
1775 {
1776 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start);
1777 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0);
1778 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
1779 }
1780 #endif
1781 JUMPTO(SLJIT_JUMP, start);
1782 JUMPHERE(found);
1783 JUMPHERE(leave);
1784
1785 if (firstline)
1786 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
1787 }
1788
1789 static SLJIT_INLINE void fast_forward_newline(compiler_common *common, BOOL firstline)
1790 {
1791 DEFINE_COMPILER;
1792 struct sljit_label *loop;
1793 struct sljit_jump *lastchar;
1794 struct sljit_jump *firstchar;
1795 struct sljit_jump *leave;
1796 struct sljit_jump *foundcr = NULL;
1797 struct sljit_jump *notfoundnl;
1798 jump_list *newline = NULL;
1799
1800 if (firstline)
1801 {
1802 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, STR_END, 0);
1803 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), FIRSTLINE_END);
1804 }
1805
1806 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
1807 {
1808 lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
1809 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
1810 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
1811 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
1812 firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
1813
1814 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2);
1815 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0);
1816 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_GREATER_EQUAL);
1817 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
1818
1819 loop = LABEL();
1820 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);
1821 leave = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
1822 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), -2);
1823 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(STR_PTR), -1);
1824 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop);
1825 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop);
1826
1827 JUMPHERE(leave);
1828 JUMPHERE(firstchar);
1829 JUMPHERE(lastchar);
1830
1831 if (firstline)
1832 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
1833 return;
1834 }
1835
1836 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
1837 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
1838 firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
1839 skip_char_back(common);
1840
1841 loop = LABEL();
1842 read_char(common);
1843 lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
1844 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
1845 foundcr = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
1846 check_newlinechar(common, common->nltype, &newline, FALSE);
1847 set_jumps(newline, loop);
1848
1849 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
1850 {
1851 leave = JUMP(SLJIT_JUMP);
1852 JUMPHERE(foundcr);
1853 notfoundnl = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
1854 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
1855 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
1856 COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
1857 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
1858 JUMPHERE(notfoundnl);
1859 JUMPHERE(leave);
1860 }
1861 JUMPHERE(lastchar);
1862 JUMPHERE(firstchar);
1863
1864 if (firstline)
1865 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
1866 }
1867
1868 static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, sljit_uw start_bits, BOOL firstline)
1869 {
1870 DEFINE_COMPILER;
1871 struct sljit_label *start;
1872 struct sljit_jump *leave;
1873 struct sljit_jump *found;
1874
1875 if (firstline)
1876 {
1877 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, STR_END, 0);
1878 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), FIRSTLINE_END);
1879 }
1880
1881 start = LABEL();
1882 leave = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
1883 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
1884 #ifdef SUPPORT_UTF
1885 if (common->utf8)
1886 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
1887 #endif
1888 #ifndef COMPILE_PCRE8
1889 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xff);
1890 #endif
1891 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
1892 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
1893 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), start_bits);
1894 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
1895 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
1896 found = JUMP(SLJIT_C_NOT_ZERO);
1897
1898 #ifdef SUPPORT_UTF
1899 if (common->utf8)
1900 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
1901 #endif
1902 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1903 #ifdef SUPPORT_UTF8
1904 if (common->utf8)
1905 {
1906 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start);
1907 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0);
1908 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
1909 }
1910 #endif
1911 JUMPTO(SLJIT_JUMP, start);
1912 JUMPHERE(found);
1913 JUMPHERE(leave);
1914
1915 if (firstline)
1916 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
1917 }
1918
1919 static SLJIT_INLINE struct sljit_jump *search_requested_char(compiler_common *common, pcre_uchar reqchar, BOOL caseless, BOOL has_firstchar)
1920 {
1921 DEFINE_COMPILER;
1922 struct sljit_label *loop;
1923 struct sljit_jump *toolong;
1924 struct sljit_jump *alreadyfound;
1925 struct sljit_jump *found;
1926 struct sljit_jump *foundoc = NULL;
1927 struct sljit_jump *notfound;
1928 pcre_uchar oc, bit;
1929
1930 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), REQ_CHAR_PTR);
1931 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, REQ_BYTE_MAX);
1932 toolong = CMP(SLJIT_C_LESS, TMP1, 0, STR_END, 0);
1933 alreadyfound = CMP(SLJIT_C_LESS, STR_PTR, 0, TMP2, 0);
1934
1935 if (has_firstchar)
1936 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, 1);
1937 else
1938 OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);
1939
1940 loop = LABEL();
1941 notfound = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, STR_END, 0);
1942
1943 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), 0);
1944 oc = reqchar;
1945 if (caseless)
1946 oc = TABLE_GET(reqchar, common->fcc, reqchar);
1947 if (reqchar == oc)
1948 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, reqchar);
1949 else
1950 {
1951 bit = reqchar ^ oc;
1952 if (ispowerof2(bit))
1953 {
1954 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);
1955 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, reqchar | bit);
1956 }
1957 else
1958 {
1959 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, reqchar);
1960 foundoc = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, oc);
1961 }
1962 }
1963 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
1964 JUMPTO(SLJIT_JUMP, loop);
1965
1966 JUMPHERE(found);
1967 if (foundoc)
1968 JUMPHERE(foundoc);
1969 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), REQ_CHAR_PTR, TMP1, 0);
1970 JUMPHERE(alreadyfound);
1971 JUMPHERE(toolong);
1972 return notfound;
1973 }
1974
1975 static void do_revertframes(compiler_common *common)
1976 {
1977 DEFINE_COMPILER;
1978 struct sljit_jump *jump;
1979 struct sljit_label *mainloop;
1980
1981 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);
1982 OP1(SLJIT_MOV, TMP1, 0, STACK_TOP, 0);
1983
1984 /* Drop frames until we reach STACK_TOP. */
1985 mainloop = LABEL();
1986 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), 0);
1987 jump = CMP(SLJIT_C_SIG_LESS_EQUAL, TMP2, 0, SLJIT_IMM, frame_end);
1988 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_LOCALS_REG, 0);
1989 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_w));
1990 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_w), SLJIT_MEM1(TMP1), 2 * sizeof(sljit_w));
1991 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_w));
1992 JUMPTO(SLJIT_JUMP, mainloop);
1993
1994 JUMPHERE(jump);
1995 jump = CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, frame_end);
1996 /* End of dropping frames. */
1997 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1998
1999 JUMPHERE(jump);
2000 jump = CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, frame_setstrbegin);
2001 /* Set string begin. */
2002 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), sizeof(sljit_w));
2003 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_w));
2004 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), TMP2, 0);
2005 JUMPTO(SLJIT_JUMP, mainloop);
2006
2007 JUMPHERE(jump);
2008 /* Unknown command. */
2009 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_w));
2010 JUMPTO(SLJIT_JUMP, mainloop);
2011 }
2012
2013 static void check_wordboundary(compiler_common *common)
2014 {
2015 DEFINE_COMPILER;
2016 struct sljit_jump *beginend;
2017 #ifdef SUPPORT_UTF8
2018 struct sljit_jump *jump;
2019 #endif
2020
2021 SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16);
2022
2023 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, 1, 5, 5, common->localsize);
2024 /* Get type of the previous char, and put it to LOCALS1. */
2025 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
2026 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
2027 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, 0);
2028 beginend = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
2029 skip_char_back(common);
2030 read_char(common);
2031
2032 /* Testing char type. */
2033 #ifdef SUPPORT_UCP
2034 if (common->useucp)
2035 {
2036 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
2037 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
2038 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
2039 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
2040 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
2041 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);
2042 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
2043 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
2044 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_LESS_EQUAL);
2045 JUMPHERE(jump);
2046 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP2, 0);
2047 }
2048 else
2049 #endif
2050 {
2051 #ifdef SUPPORT_UTF8
2052 /* Here LOCALS1 has already been zeroed. */
2053 jump = NULL;
2054 if (common->utf8)
2055 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
2056 #endif
2057 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);
2058 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */);
2059 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2060 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);
2061 #ifdef SUPPORT_UTF8
2062 if (jump != NULL)
2063 JUMPHERE(jump);
2064 #endif
2065 }
2066 JUMPHERE(beginend);
2067
2068 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
2069 beginend = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2070 peek_char(common);
2071
2072 /* Testing char type. This is a code duplication. */
2073 #ifdef SUPPORT_UCP
2074 if (common->useucp)
2075 {
2076 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
2077 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
2078 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
2079 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
2080 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
2081 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);
2082 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
2083 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
2084 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_LESS_EQUAL);
2085 JUMPHERE(jump);
2086 }
2087 else
2088 #endif
2089 {
2090 #ifdef SUPPORT_UTF8
2091 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
2092 jump = NULL;
2093 if (common->utf8)
2094 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
2095 #endif
2096 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes);
2097 OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */);
2098 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
2099 #ifdef SUPPORT_UTF8
2100 if (jump != NULL)
2101 JUMPHERE(jump);
2102 #endif
2103 }
2104 JUMPHERE(beginend);
2105
2106 OP2(SLJIT_XOR | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
2107 sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
2108 }
2109
2110 static void check_anynewline(compiler_common *common)
2111 {
2112 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
2113 DEFINE_COMPILER;
2114
2115 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);
2116
2117 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
2118 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
2119 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);
2120 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
2121 #ifdef SUPPORT_UTF8
2122 if (common->utf8)
2123 {
2124 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
2125 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
2126 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
2127 }
2128 #endif
2129 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
2130 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2131 }
2132
2133 static void check_hspace(compiler_common *common)
2134 {
2135 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
2136 DEFINE_COMPILER;
2137
2138 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);
2139
2140 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x09);
2141 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
2142 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
2143 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
2144 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xa0);
2145 #ifdef SUPPORT_UTF8
2146 if (common->utf8)
2147 {
2148 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
2149 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x1680);
2150 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
2151 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e);
2152 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
2153 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000);
2154 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000);
2155 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_LESS_EQUAL);
2156 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000);
2157 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
2158 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000);
2159 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
2160 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000);
2161 }
2162 #endif
2163 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
2164
2165 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2166 }
2167
2168 static void check_vspace(compiler_common *common)
2169 {
2170 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
2171 DEFINE_COMPILER;
2172
2173 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);
2174
2175 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
2176 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
2177 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);
2178 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
2179 #ifdef SUPPORT_UTF8
2180 if (common->utf8)
2181 {
2182 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
2183 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
2184 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
2185 }
2186 #endif
2187 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
2188
2189 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2190 }
2191
2192 #define CHAR1 STR_END
2193 #define CHAR2 STACK_TOP
2194
2195 static void do_casefulcmp(compiler_common *common)
2196 {
2197 DEFINE_COMPILER;
2198 struct sljit_jump *jump;
2199 struct sljit_label *label;
2200
2201 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);
2202 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2203 OP1(SLJIT_MOV, TMP3, 0, CHAR1, 0);
2204 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR2, 0);
2205 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2206 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);
2207
2208 label = LABEL();
2209 OP1(SLJIT_MOVU_UB, CHAR1, 0, SLJIT_MEM1(TMP1), 1);
2210 OP1(SLJIT_MOVU_UB, CHAR2, 0, SLJIT_MEM1(STR_PTR), 1);
2211 jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
2212 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
2213 JUMPTO(SLJIT_C_NOT_ZERO, label);
2214
2215 JUMPHERE(jump);
2216 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);
2217 OP1(SLJIT_MOV, CHAR1, 0, TMP3, 0);
2218 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
2219 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2220 }
2221
2222 #define LCC_TABLE STACK_LIMIT
2223
2224 static void do_caselesscmp(compiler_common *common)
2225 {
2226 DEFINE_COMPILER;
2227 struct sljit_jump *jump;
2228 struct sljit_label *label;
2229
2230 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);
2231 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2232
2233 OP1(SLJIT_MOV, TMP3, 0, LCC_TABLE, 0);
2234 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR1, 0);
2235 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, CHAR2, 0);
2236 OP1(SLJIT_MOV, LCC_TABLE, 0, SLJIT_IMM, common->lcc);
2237 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2238 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);
2239
2240 label = LABEL();
2241 OP1(SLJIT_MOVU_UB, CHAR1, 0, SLJIT_MEM1(TMP1), 1);
2242 OP1(SLJIT_MOVU_UB, CHAR2, 0, SLJIT_MEM1(STR_PTR), 1);
2243 OP1(SLJIT_MOV_UB, CHAR1, 0, SLJIT_MEM2(LCC_TABLE, CHAR1), 0);
2244 OP1(SLJIT_MOV_UB, CHAR2, 0, SLJIT_MEM2(LCC_TABLE, CHAR2), 0);
2245 jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
2246 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
2247 JUMPTO(SLJIT_C_NOT_ZERO, label);
2248
2249 JUMPHERE(jump);
2250 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);
2251 OP1(SLJIT_MOV, LCC_TABLE, 0, TMP3, 0);
2252 OP1(SLJIT_MOV, CHAR1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
2253 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
2254 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2255 }
2256
2257 #undef LCC_TABLE
2258 #undef CHAR1
2259 #undef CHAR2
2260
2261 #ifdef SUPPORT_UTF8
2262 #ifdef SUPPORT_UCP
2263
2264 static const pcre_uchar *SLJIT_CALL do_utf_caselesscmp(pcre_uchar *src1, jit_arguments *args, pcre_uchar *end1)
2265 {
2266 /* This function would be ineffective to do in JIT level. */
2267 int c1, c2;
2268 const pcre_uchar *src2 = args->ptr;
2269 const pcre_uchar *end2 = (pcre_uchar *)args->end;
2270
2271 while (src1 < end1)
2272 {
2273 if (src2 >= end2)
2274 return 0;
2275 GETCHARINC(c1, src1);
2276 GETCHARINC(c2, src2);
2277 if (c1 != c2 && c1 != UCD_OTHERCASE(c2)) return 0;
2278 }
2279 return src2;
2280 }
2281
2282 #endif
2283 #endif
2284
2285 static pcre_uchar *byte_sequence_compare(compiler_common *common, BOOL caseless, pcre_uchar *cc,
2286 compare_context* context, jump_list **fallbacks)
2287 {
2288 DEFINE_COMPILER;
2289 unsigned int othercasebit = 0;
2290 pcre_uchar *othercasechar = NULL;
2291 #ifdef SUPPORT_UTF8
2292 int utf8length;
2293 #endif
2294
2295 if (caseless && char_has_othercase(common, cc))
2296 {
2297 othercasebit = char_get_othercase_bit(common, cc);
2298 SLJIT_ASSERT(othercasebit);
2299 /* Extracting bit difference info. */
2300 #ifdef COMPILE_PCRE8
2301 othercasechar = cc + (othercasebit >> 8);
2302 othercasebit &= 0xff;
2303 #else
2304 #ifdef COMPILE_PCRE16
2305 othercasechar = cc + (othercasebit >> 9);
2306 if ((othercasebit & 0x100) != 0)
2307 othercasebit = (othercasebit & 0xff) << 8;
2308 else
2309 othercasebit &= 0xff;
2310 #endif
2311 #endif
2312 }
2313
2314 if (context->sourcereg == -1)
2315 {
2316 #ifdef COMPILE_PCRE8
2317 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
2318 if (context->length >= 4)
2319 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
2320 else if (context->length >= 2)
2321 OP1(SLJIT_MOV_SH, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
2322 else
2323 #endif
2324 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
2325 #else
2326 #ifdef COMPILE_PCRE16
2327 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
2328 if (context->length >= 4)
2329 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
2330 else
2331 #endif
2332 OP1(SLJIT_MOV_SH, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
2333 #endif
2334 #endif /* COMPILE_PCRE8 */
2335 context->sourcereg = TMP2;
2336 }
2337
2338 #ifdef SUPPORT_UTF8
2339 utf8length = 1;
2340 if (common->utf8 && *cc >= 0xc0)
2341 utf8length += PRIV(utf8_table4)[*cc & 0x3f];
2342
2343 do
2344 {
2345 #endif
2346
2347 context->length -= IN_UCHARS(1);
2348 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
2349
2350 /* Unaligned read is supported. */
2351 if (othercasebit != 0 && othercasechar == cc)
2352 {
2353 context->c.asuchars[context->ucharptr] = *cc | othercasebit;
2354 context->oc.asuchars[context->ucharptr] = othercasebit;
2355 }
2356 else
2357 {
2358 context->c.asuchars[context->ucharptr] = *cc;
2359 context->oc.asuchars[context->ucharptr] = 0;
2360 }
2361 context->ucharptr++;
2362
2363 #ifdef COMPILE_PCRE8
2364 if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1))
2365 #else
2366 if (context->ucharptr >= 2 || context->length == 0)
2367 #endif
2368 {
2369 if (context->length >= 4)
2370 OP1(SLJIT_MOV_SI, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
2371 #ifdef COMPILE_PCRE8
2372 else if (context->length >= 2)
2373 OP1(SLJIT_MOV_SH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
2374 else if (context->length >= 1)
2375 OP1(SLJIT_MOV_UB, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
2376 #else
2377 else if (context->length >= 2)
2378 OP1(SLJIT_MOV_SH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
2379 #endif
2380 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
2381
2382 switch(context->ucharptr)
2383 {
2384 case 4 / sizeof(pcre_uchar):
2385 if (context->oc.asint != 0)
2386 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint);
2387 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint));
2388 break;
2389
2390 case 2 / sizeof(pcre_uchar):
2391 if (context->oc.asshort != 0)
2392 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asshort);
2393 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asshort | context->oc.asshort));
2394 break;
2395
2396 #ifdef COMPILE_PCRE8
2397 case 1:
2398 if (context->oc.asbyte != 0)
2399 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte);
2400 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte));
2401 break;
2402 #endif
2403
2404 default:
2405 SLJIT_ASSERT_STOP();
2406 break;
2407 }
2408 context->ucharptr = 0;
2409 }
2410
2411 #else
2412
2413 /* Unaligned read is unsupported. */
2414 #ifdef COMPILE_PCRE8
2415 if (context->length > 0)
2416 OP1(SLJIT_MOV_UB, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
2417 #else
2418 if (context->length > 0)
2419 OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
2420 #endif
2421 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
2422
2423 if (othercasebit != 0 && othercasechar == cc)
2424 {
2425 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit);
2426 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit));
2427 }
2428 else
2429 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc));
2430
2431 #endif
2432
2433 cc++;
2434 #ifdef SUPPORT_UTF8
2435 utf8length--;
2436 }
2437 while (utf8length > 0);
2438 #endif
2439
2440 return cc;
2441 }
2442
2443 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
2444
2445 #define SET_TYPE_OFFSET(value) \
2446 if ((value) != typeoffset) \
2447 { \
2448 if ((value) > typeoffset) \
2449 OP2(SLJIT_SUB, typereg, 0, typereg, 0, SLJIT_IMM, (value) - typeoffset); \
2450 else \
2451 OP2(SLJIT_ADD, typereg, 0, typereg, 0, SLJIT_IMM, typeoffset - (value)); \
2452 } \
2453 typeoffset = (value);
2454
2455 #define SET_CHAR_OFFSET(value) \
2456 if ((value) != charoffset) \
2457 { \
2458 if ((value) > charoffset) \
2459 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (value) - charoffset); \
2460 else \
2461 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, charoffset - (value)); \
2462 } \
2463 charoffset = (value);
2464
2465 static void compile_xclass_hotpath(compiler_common *common, pcre_uchar *cc, jump_list **fallbacks)
2466 {
2467 DEFINE_COMPILER;
2468 jump_list *found = NULL;
2469 jump_list **list = (*cc & XCL_NOT) == 0 ? &found : fallbacks;
2470 unsigned int c;
2471 int compares;
2472 struct sljit_jump *jump = NULL;
2473 pcre_uchar *ccbegin;
2474 #ifdef SUPPORT_UCP
2475 BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE;
2476 BOOL charsaved = FALSE;
2477 int typereg = TMP1, scriptreg = TMP1;
2478 unsigned int typeoffset;
2479 #endif
2480 int invertcmp, numberofcmps;
2481 unsigned int charoffset;
2482
2483 /* Although SUPPORT_UTF8 must be defined, we are not necessary in utf8 mode. */
2484 check_input_end(common, fallbacks);
2485 read_char(common);
2486
2487 if ((*cc++ & XCL_MAP) != 0)
2488 {
2489 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
2490 #ifndef COMPILE_PCRE8
2491 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
2492 #elif defined SUPPORT_UTF8
2493 if (common->utf8)
2494 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
2495 #endif
2496
2497 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
2498 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
2499 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)cc);
2500 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
2501 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
2502 add_jump(compiler, list, JUMP(SLJIT_C_NOT_ZERO));
2503
2504 #ifndef COMPILE_PCRE8
2505 JUMPHERE(jump);
2506 #elif defined SUPPORT_UTF8
2507 if (common->utf8)
2508 JUMPHERE(jump);
2509 #endif
2510 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
2511 #ifdef SUPPORT_UCP
2512 charsaved = TRUE;
2513 #endif
2514 cc += 32 / sizeof(pcre_uchar);
2515 }
2516
2517 /* Scanning the necessary info. */
2518 ccbegin = cc;
2519 compares = 0;
2520 while (*cc != XCL_END)
2521 {
2522 compares++;
2523 if (*cc == XCL_SINGLE)
2524 {
2525 cc += 2;
2526 #ifdef SUPPORT_UTF8
2527 if (common->utf8 && cc[-1] >= 0xc0) cc += PRIV(utf8_table4)[cc[-1] & 0x3f];
2528 #endif
2529 #ifdef SUPPORT_UCP
2530 needschar = TRUE;
2531 #endif
2532 }
2533 else if (*cc == XCL_RANGE)
2534 {
2535 cc += 2;
2536 #ifdef SUPPORT_UTF8
2537 if (common->utf8 && cc[-1] >= 0xc0) cc += PRIV(utf8_table4)[cc[-1] & 0x3f];
2538 #endif
2539 cc++;
2540 #ifdef SUPPORT_UTF8
2541 if (common->utf8 && cc[-1] >= 0xc0) cc += PRIV(utf8_table4)[cc[-1] & 0x3f];
2542 #endif
2543 #ifdef SUPPORT_UCP
2544 needschar = TRUE;
2545 #endif
2546 }
2547 #ifdef SUPPORT_UCP
2548 else
2549 {
2550 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
2551 cc++;
2552 switch(*cc)
2553 {
2554 case PT_ANY:
2555 break;
2556
2557 case PT_LAMP:
2558 case PT_GC:
2559 case PT_PC:
2560 case PT_ALNUM:
2561 needstype = TRUE;
2562 break;
2563
2564 case PT_SC:
2565 needsscript = TRUE;
2566 break;
2567
2568 case PT_SPACE:
2569 case PT_PXSPACE:
2570 case PT_WORD:
2571 needstype = TRUE;
2572 needschar = TRUE;
2573 break;
2574
2575 default:
2576 SLJIT_ASSERT_STOP();
2577 break;
2578 }
2579 cc += 2;
2580 }
2581 #endif
2582 }
2583
2584 #ifdef SUPPORT_UCP
2585 /* Simple register allocation. TMP1 is preferred if possible. */
2586 if (needstype || needsscript)
2587 {
2588 if (needschar && !charsaved)
2589 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
2590 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
2591 if (needschar)
2592 {
2593 if (needstype)
2594 {
2595 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
2596 typereg = RETURN_ADDR;
2597 }
2598
2599 if (needsscript)
2600 scriptreg = TMP3;
2601 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
2602 }
2603 else if (needstype && needsscript)
2604 scriptreg = TMP3;
2605 /* In all other cases only one of them was specified, and that can goes to TMP1. */
2606
2607 if (needsscript)
2608 {
2609 if (scriptreg == TMP1)
2610 {
2611 OP1(SLJIT_MOV, scriptreg, 0, SLJIT_IMM, (sljit_w)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
2612 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM2(scriptreg, TMP2), 3);
2613 }
2614 else
2615 {
2616 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
2617 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_w)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
2618 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM1(TMP2), 0);
2619 }
2620 }
2621 }
2622 #endif
2623
2624 /* Generating code. */
2625 cc = ccbegin;
2626 charoffset = 0;
2627 numberofcmps = 0;
2628 #ifdef SUPPORT_UCP
2629 typeoffset = 0;
2630 #endif
2631
2632 while (*cc != XCL_END)
2633 {
2634 compares--;
2635 invertcmp = (compares == 0 && list != fallbacks);
2636 jump = NULL;
2637
2638 if (*cc == XCL_SINGLE)
2639 {
2640 cc ++;
2641 #ifdef SUPPORT_UTF8
2642 if (common->utf8)
2643 {
2644 GETCHARINC(c, cc);
2645 }
2646 else
2647 #endif
2648 c = *cc++;
2649
2650 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
2651 {
2652 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
2653 COND_VALUE(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
2654 numberofcmps++;
2655 }
2656 else if (numberofcmps > 0)
2657 {
2658 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
2659 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
2660 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
2661 numberofcmps = 0;
2662 }
2663 else
2664 {
2665 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, c - charoffset);
2666 numberofcmps = 0;
2667 }
2668 }
2669 else if (*cc == XCL_RANGE)
2670 {
2671 cc ++;
2672 #ifdef SUPPORT_UTF8
2673 if (common->utf8)
2674 {
2675 GETCHARINC(c, cc);
2676 }
2677 else
2678 #endif
2679 c = *cc++;
2680 SET_CHAR_OFFSET(c);
2681 #ifdef SUPPORT_UTF8
2682 if (common->utf8)
2683 {
2684 GETCHARINC(c, cc);
2685 }
2686 else
2687 #endif
2688 c = *cc++;
2689 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
2690 {
2691 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
2692 COND_VALUE(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_C_LESS_EQUAL);
2693 numberofcmps++;
2694 }
2695 else if (numberofcmps > 0)
2696 {
2697 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
2698 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_LESS_EQUAL);
2699 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
2700 numberofcmps = 0;
2701 }
2702 else
2703 {
2704 jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, c - charoffset);
2705 numberofcmps = 0;
2706 }
2707 }
2708 #ifdef SUPPORT_UCP
2709 else
2710 {
2711 if (*cc == XCL_NOTPROP)
2712 invertcmp ^= 0x1;
2713 cc++;
2714 switch(*cc)
2715 {
2716 case PT_ANY:
2717 if (list != fallbacks)
2718 {
2719 if ((cc[-1] == XCL_NOTPROP && compares > 0) || (cc[-1] == XCL_PROP && compares == 0))
2720 continue;
2721 }
2722 else if (cc[-1] == XCL_NOTPROP)
2723 continue;
2724 jump = JUMP(SLJIT_JUMP);
2725 break;
2726
2727 case PT_LAMP:
2728 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - typeoffset);
2729 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
2730 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ll - typeoffset);
2731 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
2732 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lt - typeoffset);
2733 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
2734 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
2735 break;
2736
2737 case PT_GC:
2738 c = PRIV(ucp_typerange)[(int)cc[1] * 2];
2739 SET_TYPE_OFFSET(c);
2740 jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, PRIV(ucp_typerange)[(int)cc[1] * 2 + 1] - c);
2741 break;
2742
2743 case PT_PC:
2744 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, (int)cc[1] - typeoffset);
2745 break;
2746
2747 case PT_SC:
2748 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, scriptreg, 0, SLJIT_IMM, (int)cc[1]);
2749 break;
2750
2751 case PT_SPACE:
2752 case PT_PXSPACE:
2753 if (*cc == PT_SPACE)
2754 {
2755 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
2756 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 11 - charoffset);
2757 }
2758 SET_CHAR_OFFSET(9);
2759 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 13 - 9);
2760 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);
2761 if (*cc == PT_SPACE)
2762 JUMPHERE(jump);
2763
2764 SET_TYPE_OFFSET(ucp_Zl);
2765 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl);
2766 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_LESS_EQUAL);
2767 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
2768 break;
2769
2770 case PT_WORD:
2771 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE - charoffset);
2772 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
2773 /* ... fall through */
2774
2775 case PT_ALNUM:
2776 SET_TYPE_OFFSET(ucp_Ll);
2777 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
2778 COND_VALUE((*cc == PT_ALNUM) ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_C_LESS_EQUAL);
2779 SET_TYPE_OFFSET(ucp_Nd);
2780 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_No - ucp_Nd);
2781 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_LESS_EQUAL);
2782 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
2783 break;
2784 }
2785 cc += 2;
2786 }
2787 #endif
2788
2789 if (jump != NULL)
2790 add_jump(compiler, compares > 0 ? list : fallbacks, jump);
2791 }
2792
2793 if (found != NULL)
2794 set_jumps(found, LABEL());
2795 }
2796
2797 #undef SET_TYPE_OFFSET
2798 #undef SET_CHAR_OFFSET
2799
2800 #endif
2801
2802 static pcre_uchar *compile_char1_hotpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **fallbacks)
2803 {
2804 DEFINE_COMPILER;
2805 int length;
2806 unsigned int c, oc, bit;
2807 compare_context context;
2808 struct sljit_jump *jump[4];
2809 #ifdef SUPPORT_UTF8
2810 struct sljit_label *label;
2811 #ifdef SUPPORT_UCP
2812 pcre_uchar propdata[5];
2813 #endif
2814 #endif
2815
2816 switch(type)
2817 {
2818 case OP_SOD:
2819 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
2820 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
2821 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
2822 return cc;
2823
2824 case OP_SOM:
2825 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
2826 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
2827 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
2828 return cc;
2829
2830 case OP_NOT_WORD_BOUNDARY:
2831 case OP_WORD_BOUNDARY:
2832 add_jump(compiler, &common->wordboundary, JUMP(SLJIT_FAST_CALL));
2833 add_jump(compiler, fallbacks, JUMP(type == OP_NOT_WORD_BOUNDARY ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
2834 return cc;
2835
2836 case OP_NOT_DIGIT:
2837 case OP_DIGIT:
2838 check_input_end(common, fallbacks);
2839 read_char8_type(common);
2840 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit);
2841 add_jump(compiler, fallbacks, JUMP(type == OP_DIGIT ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
2842 return cc;
2843
2844 case OP_NOT_WHITESPACE:
2845 case OP_WHITESPACE:
2846 check_input_end(common, fallbacks);
2847 read_char8_type(common);
2848 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space);
2849 add_jump(compiler, fallbacks, JUMP(type == OP_WHITESPACE ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
2850 return cc;
2851
2852 case OP_NOT_WORDCHAR:
2853 case OP_WORDCHAR:
2854 check_input_end(common, fallbacks);
2855 read_char8_type(common);
2856 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word);
2857 add_jump(compiler, fallbacks, JUMP(type == OP_WORDCHAR ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
2858 return cc;
2859
2860 case OP_ANY:
2861 check_input_end(common, fallbacks);
2862 read_char(common);
2863 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
2864 {
2865 jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
2866 jump[1] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2867 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2868 add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff));
2869 JUMPHERE(jump[1]);
2870 JUMPHERE(jump[0]);
2871 }
2872 else
2873 check_newlinechar(common, common->nltype, fallbacks, TRUE);
2874 return cc;
2875
2876 case OP_ALLANY:
2877 check_input_end(common, fallbacks);
2878 #ifdef SUPPORT_UTF8
2879 if (common->utf8)
2880 {
2881 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2882 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);
2883 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2884 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0);
2885 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2886 JUMPHERE(jump[0]);
2887 return cc;
2888 }
2889 #endif
2890 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);
2891 return cc;
2892
2893 case OP_ANYBYTE:
2894 check_input_end(common, fallbacks);
2895 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);
2896 return cc;
2897
2898 #ifdef SUPPORT_UTF8
2899 #ifdef SUPPORT_UCP
2900 case OP_NOTPROP:
2901 case OP_PROP:
2902 propdata[0] = 0;
2903 propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP;
2904 propdata[2] = cc[0];
2905 propdata[3] = cc[1];
2906 propdata[4] = XCL_END;
2907 compile_xclass_hotpath(common, propdata, fallbacks);
2908 return cc + 2;
2909 #endif
2910 #endif
2911
2912 case OP_ANYNL:
2913 check_input_end(common, fallbacks);
2914 read_char(common);
2915 jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
2916 jump[1] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2917 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2918 jump[2] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
2919 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);
2920 jump[3] = JUMP(SLJIT_JUMP);
2921 JUMPHERE(jump[0]);
2922 check_newlinechar(common, common->bsr_nltype, fallbacks, FALSE);
2923 JUMPHERE(jump[1]);
2924 JUMPHERE(jump[2]);
2925 JUMPHERE(jump[3]);
2926 return cc;
2927
2928 case OP_NOT_HSPACE:
2929 case OP_HSPACE:
2930 check_input_end(common, fallbacks);
2931 read_char(common);
2932 add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));
2933 add_jump(compiler, fallbacks, JUMP(type == OP_NOT_HSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
2934 return cc;
2935
2936 case OP_NOT_VSPACE:
2937 case OP_VSPACE:
2938 check_input_end(common, fallbacks);
2939 read_char(common);
2940 add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));
2941 add_jump(compiler, fallbacks, JUMP(type == OP_NOT_VSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
2942 return cc;
2943
2944 #ifdef SUPPORT_UCP
2945 case OP_EXTUNI:
2946 check_input_end(common, fallbacks);
2947 read_char(common);
2948 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
2949 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Mc);
2950 add_jump(compiler, fallbacks, CMP(SLJIT_C_LESS_EQUAL, TMP1, 0, SLJIT_IMM, ucp_Mn - ucp_Mc));
2951
2952 label = LABEL();
2953 jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2954 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
2955 read_char(common);
2956 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
2957 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Mc);
2958 CMPTO(SLJIT_C_LESS_EQUAL, TMP1, 0, SLJIT_IMM, ucp_Mn - ucp_Mc, label);
2959
2960 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
2961 JUMPHERE(jump[0]);
2962 return cc;
2963 #endif
2964
2965 case OP_EODN:
2966 jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2967 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
2968 {
2969 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, 2);
2970 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2971 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0));
2972 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(STR_PTR), 1);
2973 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
2974 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
2975 }
2976 else if (common->nltype == NLTYPE_FIXED)
2977 {
2978 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, 1);
2979 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2980 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0));
2981 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
2982 }
2983 else
2984 {
2985 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2986 jump[1] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
2987 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, 2);
2988 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
2989 jump[2] = JUMP(SLJIT_C_GREATER);
2990 add_jump(compiler, fallbacks, JUMP(SLJIT_C_LESS));
2991 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 1);
2992 jump[3] = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
2993 add_jump(compiler, fallbacks, JUMP(SLJIT_JUMP));
2994
2995 JUMPHERE(jump[1]);
2996 if (common->nltype == NLTYPE_ANYCRLF)
2997 {
2998 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, 1);
2999 add_jump(compiler, fallbacks, CMP(SLJIT_C_LESS, TMP2, 0, STR_END, 0));
3000 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
3001 }
3002 else
3003 {
3004 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, STR_PTR, 0);
3005 read_char(common);
3006 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
3007 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
3008 add_jump(compiler, fallbacks, JUMP(SLJIT_C_ZERO));
3009 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
3010 }
3011 JUMPHERE(jump[2]);
3012 JUMPHERE(jump[3]);
3013 }
3014 JUMPHERE(jump[0]);
3015 return cc;
3016
3017 case OP_EOD:
3018 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
3019 return cc;
3020
3021 case OP_CIRC:
3022 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
3023 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
3024 add_jump(compiler, fallbacks, CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0));
3025 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
3026 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
3027 return cc;
3028
3029 case OP_CIRCM:
3030 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
3031 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
3032 jump[1] = CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0);
3033 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
3034 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
3035 jump[0] = JUMP(SLJIT_JUMP);
3036 JUMPHERE(jump[1]);
3037
3038 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, end));
3039 add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP2, 0, STR_PTR, 0));
3040
3041 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3042 {
3043 OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, SLJIT_IMM, 2);
3044 add_jump(compiler, fallbacks, CMP(SLJIT_C_LESS, TMP2, 0, TMP1, 0));
3045 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), -2);
3046 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(STR_PTR), -1);
3047 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
3048 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
3049 }
3050 else
3051 {
3052 skip_char_back(common);
3053 read_char(common);
3054 check_newlinechar(common, common->nltype, fallbacks, FALSE);
3055 }
3056 JUMPHERE(jump[0]);
3057 return cc;
3058
3059 case OP_DOLL:
3060 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
3061 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
3062 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
3063
3064 if (!common->endonly)
3065 compile_char1_hotpath(common, OP_EODN, cc, fallbacks);
3066 else
3067 add_jump(compiler, fallbacks, CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0));
3068 return cc;
3069
3070 case OP_DOLLM:
3071 jump[1] = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
3072 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
3073 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
3074 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
3075 jump[0] = JUMP(SLJIT_JUMP);
3076 JUMPHERE(jump[1]);
3077
3078 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3079 {
3080 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, 2);
3081 add_jump(compiler, fallbacks, CMP(SLJIT_C_GREATER, TMP2, 0, STR_END, 0));
3082 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3083 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(STR_PTR), 1);
3084 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
3085 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
3086 }
3087 else
3088 {
3089 peek_char(common);
3090 check_newlinechar(common, common->nltype, fallbacks, FALSE);
3091 }
3092 JUMPHERE(jump[0]);
3093 return cc;
3094
3095 case OP_CHAR:
3096 case OP_CHARI:
3097 length = 1;
3098 #ifdef SUPPORT_UTF8
3099 if (common->utf8 && *cc >= 0xc0) length += PRIV(utf8_table4)[*cc & 0x3f];
3100 #endif
3101 if (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0)
3102 {
3103 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
3104 add_jump(compiler, fallbacks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
3105
3106 context.length = IN_UCHARS(length);
3107 context.sourcereg = -1;
3108 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
3109 context.ucharptr = 0;
3110 #endif
3111 return byte_sequence_compare(common, type == OP_CHARI, cc, &context, fallbacks);
3112 }
3113 check_input_end(common, fallbacks);
3114 read_char(common);
3115 #ifdef SUPPORT_UTF8
3116 if (common->utf8)
3117 {
3118 GETCHAR(c, cc);
3119 }
3120 else
3121 #endif
3122 c = *cc;
3123 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c);
3124 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
3125 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, char_othercase(common, c));
3126 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
3127 add_jump(compiler, fallbacks, JUMP(SLJIT_C_ZERO));
3128 return cc + length;
3129
3130 case OP_NOT:
3131 case OP_NOTI:
3132 #ifdef SUPPORT_UTF8
3133 if (common->utf8)
3134 {
3135 length = 1;
3136 if (*cc >= 0xc0) length += PRIV(utf8_table4)[*cc & 0x3f];
3137
3138 check_input_end(common, fallbacks);
3139 GETCHAR(c, cc);
3140
3141 if (c <= 127)
3142 {
3143 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3144 if (type == OP_NOT || !char_has_othercase(common, cc))
3145 add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
3146 else
3147 {
3148 /* Since UTF8 code page is fixed, we know that c is in [a-z] or [A-Z] range. */
3149 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x20);
3150 add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20));
3151 }
3152 /* Skip the variable-length character. */
3153 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);
3154 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
3155 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0);
3156 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3157 JUMPHERE(jump[0]);
3158 return cc + length;
3159 }
3160 else
3161 read_char(common);
3162 }
3163 else
3164 #endif
3165 {
3166 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3167 add_jump(compiler, fallbacks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
3168 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
3169 c = *cc;
3170 }
3171
3172 if (type == OP_NOT || !char_has_othercase(common, cc))
3173 add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
3174 else
3175 {
3176 oc = char_othercase(common, c);
3177 bit = c ^ oc;
3178 if (ispowerof2(bit))
3179 {
3180 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
3181 add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
3182 }
3183 else
3184 {
3185 add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
3186 add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, oc));
3187 }
3188 }
3189 return cc + 1;
3190
3191 case OP_CLASS:
3192 case OP_NCLASS:
3193 check_input_end(common, fallbacks);
3194 read_char(common);
3195 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3196 jump[0] = NULL;
3197 #ifdef SUPPORT_UTF8
3198 /* This check can only be skipped in pure 8 bit mode. */
3199 if (common->utf8)
3200 #endif
3201 {
3202 jump[0] = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3203 if (type == OP_CLASS)
3204 {
3205 add_jump(compiler, fallbacks, jump[0]);
3206 jump[0] = NULL;
3207 }
3208 }
3209 #endif
3210 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
3211 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
3212 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)cc);
3213 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
3214 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
3215 add_jump(compiler, fallbacks, JUMP(SLJIT_C_ZERO));
3216 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3217 if (jump[0] != NULL)
3218 JUMPHERE(jump[0]);
3219 #endif
3220 return cc + 32 / sizeof(pcre_uchar);
3221
3222 #if defined SUPPORT_UTF || defined COMPILE_PCRE16
3223 case OP_XCLASS:
3224 compile_xclass_hotpath(common, cc + LINK_SIZE, fallbacks);
3225 return cc + GET(cc, 0) - 1;
3226 #endif
3227
3228 case OP_REVERSE:
3229 length = GET(cc, 0);
3230 SLJIT_ASSERT(length > 0);
3231 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3232 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
3233 #ifdef SUPPORT_UTF8
3234 if (common->utf8)
3235 {
3236 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, length);
3237 label = LABEL();
3238 add_jump(compiler, fallbacks, CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP1, 0));
3239 skip_char_back(common);
3240 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
3241 JUMPTO(SLJIT_C_NOT_ZERO, label);
3242 return cc + LINK_SIZE;
3243 }
3244 #endif
3245 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, length);
3246 add_jump(compiler, fallbacks, CMP(SLJIT_C_LESS, STR_PTR, 0, TMP1, 0));
3247 return cc + LINK_SIZE;
3248 }
3249 SLJIT_ASSERT_STOP();
3250 return cc;
3251 }
3252
3253 static SLJIT_INLINE pcre_uchar *compile_charn_hotpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, jump_list **fallbacks)
3254 {
3255 /* This function consumes at least one input character. */
3256 /* To decrease the number of length checks, we try to concatenate the fixed length character sequences. */
3257 DEFINE_COMPILER;
3258 pcre_uchar *ccbegin = cc;
3259 compare_context context;
3260 int size;
3261
3262 context.length = 0;
3263 do
3264 {
3265 if (cc >= ccend)
3266 break;
3267
3268 if (*cc == OP_CHAR)
3269 {
3270 size = 1;
3271 #ifdef SUPPORT_UTF8
3272 if (common->utf8 && cc[1] >= 0xc0)
3273 size += PRIV(utf8_table4)[cc[1] & 0x3f];
3274 #endif
3275 }
3276 else if (*cc == OP_CHARI)
3277 {
3278 size = 1;
3279 #ifdef SUPPORT_UTF8
3280 if (common->utf8)
3281 {
3282 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
3283 size = 0;
3284 else if (cc[1] >= 0xc0)
3285 size += PRIV(utf8_table4)[cc[1] & 0x3f];
3286 }
3287 else
3288 #endif
3289 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
3290 size = 0;
3291 }
3292 else
3293 size = 0;
3294
3295 cc += 1 + size;
3296 context.length += IN_UCHARS(size);
3297 }
3298 while (size > 0 && context.length <= 128);
3299
3300 cc = ccbegin;
3301 if (context.length > 0)
3302 {
3303 /* We have a fixed-length byte sequence. */
3304 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, context.length);
3305 add_jump(compiler, fallbacks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
3306
3307 context.sourcereg = -1;
3308 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
3309 context.ucharptr = 0;
3310 #endif
3311 do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, fallbacks); while (context.length > 0);
3312 return cc;
3313 }
3314
3315 /* A non-fixed length character will be checked if length == 0. */
3316 return compile_char1_hotpath(common, *cc, cc + 1, fallbacks);
3317 }
3318
3319 static struct sljit_jump *compile_ref_checks(compiler_common *common, pcre_uchar *cc, jump_list **fallbacks)
3320 {
3321 DEFINE_COMPILER;
3322 int offset = GET2(cc, 1) << 1;
3323
3324 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
3325 if (!common->jscript_compat)
3326 {
3327 if (fallbacks == NULL)
3328 {
3329 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
3330 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
3331 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
3332 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
3333 return JUMP(SLJIT_C_NOT_ZERO);
3334 }
3335 add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
3336 }
3337 return CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
3338 }
3339
3340 /* Forward definitions. */
3341 static void compile_hotpath(compiler_common *, pcre_uchar *, pcre_uchar *, fallback_common *);
3342 static void compile_fallbackpath(compiler_common *, struct fallback_common *);
3343
3344 #define PUSH_FALLBACK(size, ccstart, error) \
3345 do \
3346 { \
3347 fallback = sljit_alloc_memory(compiler, (size)); \
3348 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
3349 return error; \
3350 memset(fallback, 0, size); \
3351 fallback->prev = parent->top; \
3352 fallback->cc = (ccstart); \
3353 parent->top = fallback; \
3354 } \
3355 while (0)
3356
3357 #define PUSH_FALLBACK_NOVALUE(size, ccstart) \
3358 do \
3359 { \
3360 fallback = sljit_alloc_memory(compiler, (size)); \
3361 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
3362 return; \
3363 memset(fallback, 0, size); \
3364 fallback->prev = parent->top; \
3365 fallback->cc = (ccstart); \
3366 parent->top = fallback; \
3367 } \
3368 while (0)
3369
3370 #define FALLBACK_AS(type) ((type*)fallback)
3371
3372 static pcre_uchar *compile_ref_hotpath(compiler_common *common, pcre_uchar *cc, jump_list **fallbacks, BOOL withchecks, BOOL emptyfail)
3373 {
3374 DEFINE_COMPILER;
3375 int offset = GET2(cc, 1) << 1;
3376 struct sljit_jump *jump = NULL;
3377
3378 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
3379 if (withchecks && !common->jscript_compat)
3380 add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
3381
3382 #ifdef SUPPORT_UTF8
3383 #ifdef SUPPORT_UCP
3384 if (common->utf8 && *cc == OP_REFI)
3385 {
3386 SLJIT_ASSERT(TMP1 == SLJIT_TEMPORARY_REG1 && STACK_TOP == SLJIT_TEMPORARY_REG2 && TMP2 == SLJIT_TEMPORARY_REG3);
3387 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
3388 if (withchecks)
3389 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, TMP2, 0);
3390
3391 /* Needed to save important temporary registers. */
3392 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
3393 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG2, 0, ARGUMENTS, 0);
3394 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), SLJIT_OFFSETOF(jit_arguments, ptr), STR_PTR, 0);
3395 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_utf_caselesscmp));
3396 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3397 add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
3398 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
3399 }
3400 else
3401 #endif
3402 #endif
3403 {
3404 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP1, 0);
3405 if (withchecks)
3406 jump = JUMP(SLJIT_C_ZERO);
3407 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3408
3409 add_jump(compiler, fallbacks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
3410 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
3411 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
3412 }
3413
3414 if (jump != NULL)
3415 {
3416 if (emptyfail)
3417 add_jump(compiler, fallbacks, jump);
3418 else
3419 JUMPHERE(jump);
3420 }
3421 return cc + 1 + IMM2_SIZE;
3422 }
3423
3424 static SLJIT_INLINE pcre_uchar *compile_ref_iterator_hotpath(compiler_common *common, pcre_uchar *cc, fallback_common *parent)
3425 {
3426 DEFINE_COMPILER;
3427 fallback_common *fallback;
3428 pcre_uchar type;
3429 struct sljit_label *label;
3430 struct sljit_jump *zerolength;
3431 struct sljit_jump *jump = NULL;
3432 pcre_uchar *ccbegin = cc;
3433 int min = 0, max = 0;
3434 BOOL minimize;
3435
3436 PUSH_FALLBACK(sizeof(iterator_fallback), cc, NULL);
3437
3438 type = cc[1 + IMM2_SIZE];
3439 minimize = (type & 0x1) != 0;
3440 switch(type)
3441 {
3442 case OP_CRSTAR:
3443 case OP_CRMINSTAR:
3444 min = 0;
3445 max = 0;
3446 cc += 1 + IMM2_SIZE + 1;
3447 break;
3448 case OP_CRPLUS:
3449 case OP_CRMINPLUS:
3450 min = 1;
3451 max = 0;
3452 cc += 1 + IMM2_SIZE + 1;
3453 break;
3454 case OP_CRQUERY:
3455 case OP_CRMINQUERY:
3456 min = 0;
3457 max = 1;
3458 cc += 1 + IMM2_SIZE + 1;
3459 break;
3460 case OP_CRRANGE:
3461 case OP_CRMINRANGE:
3462 min = GET2(cc, 1 + IMM2_SIZE + 1);
3463 max = GET2(cc, 1 + IMM2_SIZE + 1 + IMM2_SIZE);
3464 cc += 1 + IMM2_SIZE + 1 + 2 * IMM2_SIZE;
3465 break;
3466 default:
3467 SLJIT_ASSERT_STOP();
3468 break;
3469 }
3470
3471 if (!minimize)
3472 {
3473 if (min == 0)
3474 {
3475 allocate_stack(common, 2);
3476 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
3477 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
3478 /* Temporary release of STR_PTR. */
3479 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
3480 zerolength = compile_ref_checks(common, ccbegin, NULL);
3481 /* Restore if not zero length. */
3482 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
3483 }
3484 else
3485 {
3486 allocate_stack(common, 1);
3487 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
3488 zerolength = compile_ref_checks(common, ccbegin, &fallback->topfallbacks);
3489 }
3490
3491 if (min > 1 || max > 1)
3492 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 0);
3493
3494 label = LABEL();
3495 compile_ref_hotpath(common, ccbegin, &fallback->topfallbacks, FALSE, FALSE);
3496
3497 if (min > 1 || max > 1)
3498 {
3499 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
3500 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3501 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0);
3502 if (min > 1)
3503 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, label);
3504 if (max > 1)
3505 {
3506 jump = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max);
3507 allocate_stack(common, 1);
3508 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
3509 JUMPTO(SLJIT_JUMP, label);
3510 JUMPHERE(jump);
3511 }
3512 }
3513
3514 if (max == 0)
3515 {
3516 /* Includes min > 1 case as well. */
3517 allocate_stack(common, 1);
3518 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
3519 JUMPTO(SLJIT_JUMP, label);
3520 }
3521
3522 JUMPHERE(zerolength);
3523 FALLBACK_AS(iterator_fallback)->hotpath = LABEL();
3524
3525 decrease_call_count(common);
3526 return cc;
3527 }
3528
3529 allocate_stack(common, 2);
3530 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
3531 if (type != OP_CRMINSTAR)
3532 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
3533
3534 if (min == 0)
3535 {
3536 zerolength = compile_ref_checks(common, ccbegin, NULL);
3537 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
3538 jump = JUMP(SLJIT_JUMP);
3539 }
3540 else
3541 zerolength = compile_ref_checks(common, ccbegin, &fallback->topfallbacks);
3542
3543 FALLBACK_AS(iterator_fallback)->hotpath = LABEL();
3544 if (max > 0)
3545 add_jump(compiler, &fallback->topfallbacks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max));
3546
3547 compile_ref_hotpath(common, ccbegin, &fallback->topfallbacks, TRUE, TRUE);
3548 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
3549
3550 if (min > 1)
3551 {
3552 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
3553 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3554 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
3555 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, FALLBACK_AS(iterator_fallback)->hotpath);
3556 }
3557 else if (max > 0)
3558 OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
3559
3560 if (jump != NULL)
3561 JUMPHERE(jump);
3562 JUMPHERE(zerolength);
3563
3564 decrease_call_count(common);
3565 return cc;
3566 }
3567
3568 static SLJIT_INLINE pcre_uchar *compile_recurse_hotpath(compiler_common *common, pcre_uchar *cc, fallback_common *parent)
3569 {
3570 DEFINE_COMPILER;
3571 fallback_common *fallback;
3572 recurse_entry *entry = common->entries;
3573 recurse_entry *prev = NULL;
3574 int start = GET(cc, 1);
3575
3576 PUSH_FALLBACK(sizeof(recurse_fallback), cc, NULL);
3577 while (entry != NULL)
3578 {
3579 if (entry->start == start)
3580 break;
3581 prev = entry;
3582 entry = entry->next;
3583 }
3584
3585 if (entry == NULL)
3586 {
3587 entry = sljit_alloc_memory(compiler, sizeof(recurse_entry));
3588 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
3589 return NULL;
3590 entry->next = NULL;
3591 entry->entry = NULL;
3592 entry->calls = NULL;
3593 entry->start = start;
3594
3595 if (prev != NULL)
3596 prev->next = entry;
3597 else
3598 common->entries = entry;
3599 }
3600
3601 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
3602 allocate_stack(common, 1);
3603 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
3604
3605 if (entry->entry == NULL)
3606 add_jump(compiler, &entry->calls, JUMP(SLJIT_FAST_CALL));
3607 else
3608 JUMPTO(SLJIT_FAST_CALL, entry->entry);
3609 /* Leave if the match is failed. */
3610 add_jump(compiler, &fallback->topfallbacks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0));
3611 return cc + 1 + LINK_SIZE;
3612 }
3613
3614 static pcre_uchar *compile_assert_hotpath(compiler_common *common, pcre_uchar *cc, assert_fallback *fallback, BOOL conditional)
3615 {
3616 DEFINE_COMPILER;
3617 int framesize;
3618 int localptr;
3619 fallback_common altfallback;
3620 pcre_uchar *ccbegin;
3621 pcre_uchar opcode;
3622 pcre_uchar bra = OP_BRA;
3623 jump_list *tmp = NULL;
3624 jump_list **target = (conditional) ? &fallback->condfailed : &fallback->common.topfallbacks;
3625 jump_list **found;
3626 /* Saving previous accept variables. */
3627 struct sljit_label *save_acceptlabel = common->acceptlabel;
3628 struct sljit_jump *jump;
3629 struct sljit_jump *brajump = NULL;
3630 jump_list *save_accept = common->accept;
3631
3632 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
3633 {
3634 SLJIT_ASSERT(!conditional);
3635 bra = *cc;
3636 cc++;
3637 }
3638 localptr = PRIV_DATA(cc);
3639 SLJIT_ASSERT(localptr != 0);
3640 framesize = get_framesize(common, cc, FALSE);
3641 fallback->framesize = framesize;
3642 fallback->localptr = localptr;
3643 opcode = *cc;
3644 SLJIT_ASSERT(opcode >= OP_ASSERT && opcode <= OP_ASSERTBACK_NOT);
3645 found = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) ? &tmp : target;
3646 ccbegin = cc;
3647 cc += GET(cc, 1);
3648
3649 if (bra == OP_BRAMINZERO)
3650 {
3651 /* This is a braminzero fallback path. */
3652 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
3653 free_stack(common, 1);
3654 brajump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
3655 }
3656
3657 if (framesize < 0)
3658 {
3659 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STACK_TOP, 0);
3660 allocate_stack(common, 1);
3661 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
3662 }
3663 else
3664 {
3665 allocate_stack(common, framesize + 2);
3666 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
3667 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(framesize + 1));
3668 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, TMP2, 0);
3669 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
3670 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
3671 init_frame(common, ccbegin, framesize + 1, 2, FALSE);
3672 }
3673
3674 memset(&altfallback, 0, sizeof(fallback_common));
3675 while (1)
3676 {
3677 common->acceptlabel = NULL;
3678 common->accept = NULL;
3679 altfallback.top = NULL;
3680 altfallback.topfallbacks = NULL;
3681
3682 if (*ccbegin == OP_ALT)
3683 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
3684
3685 altfallback.cc = ccbegin;
3686 compile_hotpath(common, ccbegin + 1 + LINK_SIZE, cc, &altfallback);
3687 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
3688 {
3689 common->acceptlabel = save_acceptlabel;
3690 common->accept = save_accept;
3691 return NULL;
3692 }
3693 common->acceptlabel = LABEL();
3694 if (common->accept != NULL)
3695 set_jumps(common->accept, common->acceptlabel);
3696
3697 /* Reset stack. */
3698 if (framesize < 0)
3699 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
3700 else {
3701 if ((opcode != OP_ASSERT_NOT && opcode != OP_ASSERTBACK_NOT) || conditional)
3702 {
3703 /* We don't need to keep the STR_PTR, only the previous localptr. */
3704 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_w));
3705 }
3706 else
3707 {
3708 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
3709 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
3710 }
3711 }
3712
3713 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
3714 {
3715 /* We know that STR_PTR was stored on the top of the stack. */
3716 if (conditional)
3717 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
3718 else if (bra == OP_BRAZERO)
3719 {
3720 if (framesize < 0)
3721 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
3722 else
3723 {
3724 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_w));
3725 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (framesize + 1) * sizeof(sljit_w));
3726 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, TMP1, 0);
3727 }
3728 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
3729 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
3730 }
3731 else if (framesize >= 0)
3732 {
3733 /* For OP_BRA and OP_BRAMINZERO. */
3734 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_w));
3735 }
3736 }
3737 add_jump(compiler, found, JUMP(SLJIT_JUMP));
3738
3739 compile_fallbackpath(common, altfallback.top);
3740 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
3741 {
3742 common->acceptlabel = save_acceptlabel;
3743 common->accept = save_accept;
3744 return NULL;
3745 }
3746 set_jumps(altfallback.topfallbacks, LABEL());
3747
3748 if (*cc != OP_ALT)
3749 break;
3750
3751 ccbegin = cc;
3752 cc += GET(cc, 1);
3753 }
3754 /* None of them matched. */
3755
3756 if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK)
3757 {
3758 /* Assert is failed. */
3759 if (conditional || bra == OP_BRAZERO)
3760 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
3761
3762 if (framesize < 0)
3763 {
3764 /* The topmost item should be 0. */
3765 if (bra == OP_BRAZERO)
3766 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
3767 else
3768 free_stack(common, 1);
3769 }
3770 else
3771 {
3772 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
3773 /* The topmost item should be 0. */
3774 if (bra == OP_BRAZERO)
3775 {
3776 free_stack(common, framesize + 1);
3777 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
3778 }
3779 else
3780 free_stack(common, framesize + 2);
3781 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, TMP1, 0);
3782 }
3783 jump = JUMP(SLJIT_JUMP);
3784 if (bra != OP_BRAZERO)
3785 add_jump(compiler, target, jump);
3786
3787 /* Assert is successful. */
3788 set_jumps(tmp, LABEL());
3789 if (framesize < 0)
3790 {
3791 /* We know that STR_PTR was stored on the top of the stack. */
3792 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
3793 /* Keep the STR_PTR on the top of the stack. */
3794 if (bra == OP_BRAZERO)
3795 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
3796 else if (bra == OP_BRAMINZERO)
3797 {
3798 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
3799 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
3800 }
3801 }
3802 else
3803 {
3804 if (bra == OP_BRA)
3805 {
3806 /* We don't need to keep the STR_PTR, only the previous localptr. */
3807 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_w));
3808 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
3809 }
3810 else
3811 {
3812 /* We don't need to keep the STR_PTR, only the previous localptr. */
3813 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_IMM, (framesize + 2) * sizeof(sljit_w));
3814 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
3815 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), bra == OP_BRAZERO ? STR_PTR : SLJIT_IMM, 0);
3816 }
3817 }
3818
3819 if (bra == OP_BRAZERO)
3820 {
3821 fallback->hotpath = LABEL();
3822 sljit_set_label(jump, fallback->hotpath);
3823 }
3824 else if (bra == OP_BRAMINZERO)
3825 {
3826 JUMPTO(SLJIT_JUMP, fallback->hotpath);
3827 JUMPHERE(brajump);
3828 if (framesize >= 0)
3829 {
3830 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
3831 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
3832 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_w));
3833 }
3834 set_jumps(fallback->common.topfallbacks, LABEL());
3835 }
3836 }
3837 else
3838 {
3839 /* AssertNot is successful. */
3840 if (framesize < 0)
3841 {
3842 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
3843 if (bra != OP_BRA)
3844 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
3845 else
3846 free_stack(common, 1);
3847 }
3848 else
3849 {
3850 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
3851 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
3852 /* The topmost item should be 0. */
3853 if (bra != OP_BRA)
3854 {
3855 free_stack(common, framesize + 1);
3856 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
3857 }
3858 else
3859 free_stack(common, framesize + 2);
3860 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, TMP1, 0);
3861 }
3862
3863 if (bra == OP_BRAZERO)
3864 fallback->hotpath = LABEL();
3865 else if (bra == OP_BRAMINZERO)
3866 {
3867 JUMPTO(SLJIT_JUMP, fallback->hotpath);
3868 JUMPHERE(brajump);
3869 }
3870
3871 if (bra != OP_BRA)
3872 {
3873 SLJIT_ASSERT(found == &fallback->common.topfallbacks);
3874 set_jumps(fallback->common.topfallbacks, LABEL());
3875 fallback->common.topfallbacks = NULL;
3876 }
3877 }
3878
3879 common->acceptlabel = save_acceptlabel;
3880 common->accept = save_accept;
3881 return cc + 1 + LINK_SIZE;
3882 }
3883
3884 static sljit_w SLJIT_CALL do_searchovector(sljit_w refno, sljit_w* locals, pcre_uchar *name_table)
3885 {
3886 int condition = FALSE;
3887 pcre_uchar *slotA = name_table;
3888 pcre_uchar *slotB;
3889 sljit_w name_count = locals[LOCALS0 / sizeof(sljit_w)];
3890 sljit_w name_entry_size = locals[LOCALS1 / sizeof(sljit_w)];
3891 sljit_w no_capture;
3892 int i;
3893
3894 locals += OVECTOR_START / sizeof(sljit_w);
3895 no_capture = locals[1];
3896
3897 for (i = 0; i < name_count; i++)
3898 {
3899 if (GET2(slotA, 0) == refno) break;
3900 slotA += name_entry_size;
3901 }
3902
3903 if (i < name_count)
3904 {
3905 /* Found a name for the number - there can be only one; duplicate names
3906 for different numbers are allowed, but not vice versa. First scan down
3907 for duplicates. */
3908
3909 slotB = slotA;
3910 while (slotB > name_table)
3911 {
3912 slotB -= name_entry_size;
3913 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
3914 {
3915 condition = locals[GET2(slotB, 0) << 1] != no_capture;
3916 if (condition) break;
3917 }
3918 else break;
3919 }
3920
3921 /* Scan up for duplicates */
3922 if (!condition)
3923 {
3924 slotB = slotA;
3925 for (i++; i < name_count; i++)
3926 {
3927 slotB += name_entry_size;
3928 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
3929 {
3930 condition = locals[GET2(slotB, 0) << 1] != no_capture;
3931 if (condition) break;
3932 }
3933 else break;
3934 }
3935 }
3936 }
3937 return condition;
3938 }
3939
3940 static sljit_w SLJIT_CALL do_searchgroups(sljit_w recno, sljit_w* locals, pcre_uchar *name_table)
3941 {
3942 int condition = FALSE;
3943 pcre_uchar *slotA = name_table;
3944 pcre_uchar *slotB;
3945 sljit_w name_count = locals[LOCALS0 / sizeof(sljit_w)];
3946 sljit_w name_entry_size = locals[LOCALS1 / sizeof(sljit_w)];
3947 sljit_w group_num = locals[POSSESSIVE0 / sizeof(sljit_w)];
3948 int i;
3949
3950 for (i = 0; i < name_count; i++)
3951 {
3952 if (GET2(slotA, 0) == recno) break;
3953 slotA += name_entry_size;
3954 }
3955
3956 if (i < name_count)
3957 {
3958 /* Found a name for the number - there can be only one; duplicate
3959 names for different numbers are allowed, but not vice versa. First
3960 scan down for duplicates. */
3961
3962 slotB = slotA;
3963 while (slotB > name_table)
3964 {
3965 slotB -= name_entry_size;
3966 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
3967 {
3968 condition = GET2(slotB, 0) == group_num;
3969 if (condition) break;
3970 }
3971 else break;
3972 }
3973
3974 /* Scan up for duplicates */
3975 if (!condition)
3976 {
3977 slotB = slotA;
3978 for (i++; i < name_count; i++)
3979 {
3980 slotB += name_entry_size;
3981 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
3982 {
3983 condition = GET2(slotB, 0) == group_num;
3984 if (condition) break;
3985 }
3986 else break;
3987 }
3988 }
3989 }
3990 return condition;
3991 }
3992
3993 /*
3994 Handling bracketed expressions is probably the most complex part.
3995
3996 Stack layout naming characters:
3997 S - Push the current STR_PTR
3998 0 - Push a 0 (NULL)
3999 A - Push the current STR_PTR. Needed for restoring the STR_PTR
4000 before the next alternative. Not pushed if there are no alternatives.
4001 M - Any values pushed by the current alternative. Can be empty, or anything.
4002 C - Push the previous OVECTOR(i), OVECTOR(i+1) and OVECTOR_PRIV(i) to the stack.
4003 L - Push the previous local (pointed by localptr) to the stack
4004 () - opional values stored on the stack
4005 ()* - optonal, can be stored multiple times
4006
4007 The following list shows the regular expression templates, their PCRE byte codes
4008 and stack layout supported by pcre-sljit.
4009
4010 (?:) OP_BRA | OP_KET A M
4011 () OP_CBRA | OP_KET C M
4012 (?:)+ OP_BRA | OP_KETRMAX 0 A M S ( A M S )*
4013 OP_SBRA | OP_KETRMAX 0 L M S ( L M S )*
4014 (?:)+? OP_BRA | OP_KETRMIN 0 A M S ( A M S )*
4015 OP_SBRA | OP_KETRMIN 0 L M S ( L M S )*
4016 ()+ OP_CBRA | OP_KETRMAX 0 C M S ( C M S )*
4017 OP_SCBRA | OP_KETRMAX 0 C M S ( C M S )*
4018 ()+? OP_CBRA | OP_KETRMIN 0 C M S ( C M S )*
4019 OP_SCBRA | OP_KETRMIN 0 C M S ( C M S )*
4020 (?:)? OP_BRAZERO | OP_BRA | OP_KET S ( A M 0 )
4021 (?:)?? OP_BRAMINZERO | OP_BRA | OP_KET S ( A M 0 )
4022 ()? OP_BRAZERO | OP_CBRA | OP_KET S ( C M 0 )
4023 ()?? OP_BRAMINZERO | OP_CBRA | OP_KET S ( C M 0 )
4024 (?:)* OP_BRAZERO | OP_BRA | OP_KETRMAX S 0 ( A M S )*
4025 OP_BRAZERO | OP_SBRA | OP_KETRMAX S 0 ( L M S )*
4026 (?:)*? OP_BRAMINZERO | OP_BRA | OP_KETRMIN S 0 ( A M S )*
4027 OP_BRAMINZERO | OP_SBRA | OP_KETRMIN S 0 ( L M S )*
4028 ()* OP_BRAZERO | OP_CBRA | OP_KETRMAX S 0 ( C M S )*
4029 OP_BRAZERO | OP_SCBRA | OP_KETRMAX S 0 ( C M S )*
4030 ()*? OP_BRAMINZERO | OP_CBRA | OP_KETRMIN S 0 ( C M S )*
4031 OP_BRAMINZERO | OP_SCBRA | OP_KETRMIN S 0 ( C M S )*
4032
4033
4034 Stack layout naming characters:
4035 A - Push the alternative index (starting from 0) on the stack.
4036 Not pushed if there is no alternatives.
4037 M - Any values pushed by the current alternative. Can be empty, or anything.
4038
4039 The next list shows the possible content of a bracket:
4040 (|) OP_*BRA | OP_ALT ... M A
4041 (?()|) OP_*COND | OP_ALT M A
4042 (?>|) OP_ONCE | OP_ALT ... [stack trace] M A
4043 (?>|) OP_ONCE_NC | OP_ALT ... [stack trace] M A
4044 Or nothing, if trace is unnecessary
4045 */
4046
4047 static pcre_uchar *compile_bracket_hotpath(compiler_common *common, pcre_uchar *cc, fallback_common *parent)
4048 {
4049 DEFINE_COMPILER;
4050 fallback_common *fallback;
4051 pcre_uchar opcode;
4052 int localptr = 0;
4053 int offset = 0;
4054 int stacksize;
4055 pcre_uchar *ccbegin;
4056 pcre_uchar *hotpath;
4057 pcre_uchar bra = OP_BRA;
4058 pcre_uchar ket;
4059 assert_fallback *assert;
4060 BOOL has_alternatives;
4061 struct sljit_jump *jump;
4062 struct sljit_jump *skip;
4063 struct sljit_label *rmaxlabel = NULL;
4064 struct sljit_jump *braminzerojump = NULL;
4065
4066 PUSH_FALLBACK(sizeof(bracket_fallback), cc, NULL);
4067
4068 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
4069 {
4070 bra = *cc;
4071 cc++;
4072 opcode = *cc;
4073 }
4074
4075 opcode = *cc;
4076 ccbegin = cc;
4077 hotpath = ccbegin + 1 + LINK_SIZE;
4078
4079 if ((opcode == OP_COND || opcode == OP_SCOND) && cc[1 + LINK_SIZE] == OP_DEF)
4080 {
4081 /* Drop this bracket_fallback. */
4082 parent->top = fallback->prev;
4083 return bracketend(cc);
4084 }
4085
4086 ket = *(bracketend(cc) - 1 - LINK_SIZE);
4087 SLJIT_ASSERT(ket == OP_KET || ket == OP_KETRMAX || ket == OP_KETRMIN);
4088 SLJIT_ASSERT(!((bra == OP_BRAZERO && ket == OP_KETRMIN) || (bra == OP_BRAMINZERO && ket == OP_KETRMAX)));
4089 cc += GET(cc, 1);
4090
4091 has_alternatives = *cc == OP_ALT;
4092 if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
4093 {
4094 has_alternatives = (*hotpath == OP_RREF) ? FALSE : TRUE;
4095 if (*hotpath == OP_NRREF)
4096 {
4097 stacksize = GET2(hotpath, 1);
4098 if (common->currententry == NULL || stacksize == RREF_ANY)
4099 has_alternatives = FALSE;
4100 else if (common->currententry->start == 0)
4101 has_alternatives = stacksize != 0;
4102 else
4103 has_alternatives = stacksize != GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
4104 }
4105 }
4106
4107 if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
4108 opcode = OP_SCOND;
4109 if (SLJIT_UNLIKELY(opcode == OP_ONCE_NC))
4110 opcode = OP_ONCE;
4111
4112 if (opcode == OP_CBRA || opcode == OP_SCBRA)
4113 {
4114 /* Capturing brackets has a pre-allocated space. */
4115 offset = GET2(ccbegin, 1 + LINK_SIZE);
4116 localptr = OVECTOR_PRIV(offset);
4117 offset <<= 1;
4118 FALLBACK_AS(bracket_fallback)->localptr = localptr;
4119 hotpath += IMM2_SIZE;
4120 }
4121 else if (opcode == OP_ONCE || opcode == OP_SBRA || opcode == OP_SCOND)
4122 {
4123 /* Other brackets simply allocate the next entry. */
4124 localptr = PRIV_DATA(ccbegin);
4125 SLJIT_ASSERT(localptr != 0);
4126 FALLBACK_AS(bracket_fallback)->localptr = localptr;
4127 if (opcode == OP_ONCE)
4128 FALLBACK_AS(bracket_fallback)->u.framesize = get_framesize(common, ccbegin, FALSE);
4129 }
4130
4131 /* Instructions before the first alternative. */
4132 stacksize = 0;
4133 if ((ket == OP_KETRMAX) || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
4134 stacksize++;
4135 if (bra == OP_BRAZERO)
4136 stacksize++;
4137
4138 if (stacksize > 0)
4139 allocate_stack(common, stacksize);
4140
4141 stacksize = 0;
4142 if ((ket == OP_KETRMAX) || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
4143 {
4144 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
4145 stacksize++;
4146 }
4147
4148 if (bra == OP_BRAZERO)
4149 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
4150
4151 if (bra == OP_BRAMINZERO)
4152 {
4153 /* This is a fallback path! (Since the hot-path of OP_BRAMINZERO matches to the empty string) */
4154 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
4155 if (ket != OP_KETRMIN)
4156 {
4157 free_stack(common, 1);
4158 braminzerojump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
4159 }
4160 else
4161 {
4162 if (opcode == OP_ONCE || opcode >= OP_SBRA)
4163 {
4164 jump = CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
4165 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
4166 /* Nothing stored during the first run. */
4167 skip = JUMP(SLJIT_JUMP);
4168 JUMPHERE(jump);
4169 /* Checking zero-length iteration. */
4170 if (opcode != OP_ONCE || FALLBACK_AS(bracket_fallback)->u.framesize < 0)
4171 {
4172 /* When we come from outside, localptr contains the previous STR_PTR. */
4173 braminzerojump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4174 }
4175 else
4176 {
4177 /* Except when the whole stack frame must be saved. */
4178 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4179 braminzerojump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), (FALLBACK_AS(bracket_fallback)->u.framesize + 1) * sizeof(sljit_w));
4180 }
4181 JUMPHERE(skip);
4182 }
4183 else
4184 {
4185 jump = CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
4186 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
4187 JUMPHERE(jump);
4188 }
4189 }
4190 }
4191
4192 if (ket == OP_KETRMIN)
4193 FALLBACK_AS(bracket_fallback)->recursivehotpath = LABEL();
4194
4195 if (ket == OP_KETRMAX)
4196 {
4197 rmaxlabel = LABEL();
4198 if (has_alternatives && opcode != OP_ONCE && opcode < OP_SBRA)
4199 FALLBACK_AS(bracket_fallback)->althotpath = rmaxlabel;
4200 }
4201
4202 /* Handling capturing brackets and alternatives. */
4203 if (opcode == OP_ONCE)
4204 {
4205 if (FALLBACK_AS(bracket_fallback)->u.framesize < 0)
4206 {
4207 /* Neither capturing brackets nor recursions are not found in the block. */
4208 if (ket == OP_KETRMIN)
4209 {
4210 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4211 allocate_stack(common, 2);
4212 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4213 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
4214 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
4215 }
4216 else if (ket == OP_KETRMAX || has_alternatives)
4217 {
4218 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STACK_TOP, 0);
4219 allocate_stack(common, 1);
4220 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4221 }
4222 else
4223 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STACK_TOP, 0);
4224 }
4225 else
4226 {
4227 if (ket == OP_KETRMIN || ket == OP_KETRMAX || has_alternatives)
4228 {
4229 allocate_stack(common, FALLBACK_AS(bracket_fallback)->u.framesize + 2);
4230 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4231 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(FALLBACK_AS(bracket_fallback)->u.framesize + 1));
4232 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4233 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, TMP2, 0);
4234 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
4235 init_frame(common, ccbegin, FALLBACK_AS(bracket_fallback)->u.framesize + 1, 2, FALSE);
4236 }
4237 else
4238 {
4239 allocate_stack(common, FALLBACK_AS(bracket_fallback)->u.framesize + 1);
4240 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4241 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(FALLBACK_AS(bracket_fallback)->u.framesize));
4242 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, TMP2, 0);
4243 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
4244 init_frame(common, ccbegin, FALLBACK_AS(bracket_fallback)->u.framesize, 1, FALSE);
4245 }
4246 }
4247 }
4248 else if (opcode == OP_CBRA || opcode == OP_SCBRA)
4249 {
4250 /* Saving the previous values. */
4251 allocate_stack(common, 3);
4252 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
4253 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
4254 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
4255 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
4256 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4257 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STR_PTR, 0);
4258 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
4259 }
4260 else if (opcode == OP_SBRA || opcode == OP_SCOND)
4261 {
4262 /* Saving the previous value. */
4263 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4264 allocate_stack(common, 1);
4265 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STR_PTR, 0);
4266 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
4267 }
4268 else if (has_alternatives)
4269 {
4270 /* Pushing the starting string pointer. */
4271 allocate_stack(common, 1);
4272 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4273 }
4274
4275 /* Generating code for the first alternative. */
4276 if (opcode == OP_COND || opcode == OP_SCOND)
4277 {
4278 if (*hotpath == OP_CREF)
4279 {
4280 SLJIT_ASSERT(has_alternatives);
4281 add_jump(compiler, &(FALLBACK_AS(bracket_fallback)->u.condfailed),
4282 CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(GET2(hotpath, 1) << 1), SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
4283 hotpath += 1 + IMM2_SIZE;
4284 }
4285 else if (*hotpath == OP_NCREF)
4286 {
4287 SLJIT_ASSERT(has_alternatives);
4288 stacksize = GET2(hotpath, 1);
4289 jump = CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(stacksize << 1), SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
4290
4291 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STACK_TOP, 0);
4292 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, SLJIT_IMM, common->name_count);
4293 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, common->name_entry_size);
4294 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG1, 0, SLJIT_IMM, stacksize);
4295 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG2, 0, SLJIT_LOCALS_REG, 0);
4296 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, common->name_table);
4297 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_searchovector));
4298 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);
4299 add_jump(compiler, &(FALLBACK_AS(bracket_fallback)->u.condfailed), CMP(SLJIT_C_EQUAL, SLJIT_TEMPORARY_REG1, 0, SLJIT_IMM, 0));
4300
4301 JUMPHERE(jump);
4302 hotpath += 1 + IMM2_SIZE;
4303 }
4304 else if (*hotpath == OP_RREF || *hotpath == OP_NRREF)
4305 {
4306 /* Never has other case. */
4307 FALLBACK_AS(bracket_fallback)->u.condfailed = NULL;
4308
4309 stacksize = GET2(hotpath, 1);
4310 if (common->currententry == NULL)
4311 stacksize = 0;
4312 else if (stacksize == RREF_ANY)
4313 stacksize = 1;
4314 else if (common->currententry->start == 0)
4315 stacksize = stacksize == 0;
4316 else
4317 stacksize = stacksize == GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
4318
4319 if (*hotpath == OP_RREF || stacksize || common->currententry == NULL)
4320 {
4321 SLJIT_ASSERT(!has_alternatives);
4322 if (stacksize != 0)
4323 hotpath += 1 + IMM2_SIZE;
4324 else
4325 {
4326 if (*cc == OP_ALT)
4327 {
4328 hotpath = cc + 1 + LINK_SIZE;
4329 cc += GET(cc, 1);
4330 }
4331 else
4332 hotpath = cc;
4333 }
4334 }
4335 else
4336 {
4337 SLJIT_ASSERT(has_alternatives);
4338
4339 stacksize = GET2(hotpath, 1);
4340 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STACK_TOP, 0);
4341 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, SLJIT_IMM, common->name_count);
4342 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, common->name_entry_size);
4343 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, GET2(common->start, common->currententry->start + 1 + LINK_SIZE));
4344 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG1, 0, SLJIT_IMM, stacksize);
4345 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG2, 0, SLJIT_LOCALS_REG, 0);
4346 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, common->name_table);
4347 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_searchgroups));
4348 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);
4349 add_jump(compiler, &(FALLBACK_AS(bracket_fallback)->u.condfailed), CMP(SLJIT_C_EQUAL, SLJIT_TEMPORARY_REG1, 0, SLJIT_IMM, 0));
4350 hotpath += 1 + IMM2_SIZE;
4351 }
4352 }
4353 else
4354 {
4355 SLJIT_ASSERT(has_alternatives && *hotpath >= OP_ASSERT && *hotpath <= OP_ASSERTBACK_NOT);
4356 /* Similar code as PUSH_FALLBACK macro. */
4357 assert = sljit_alloc_memory(compiler, sizeof(assert_fallback));
4358 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
4359 return NULL;
4360 memset(assert, 0, sizeof(assert_fallback));
4361 assert->common.cc = hotpath;
4362 FALLBACK_AS(bracket_fallback)->u.assert = assert;
4363 hotpath = compile_assert_hotpath(common, hotpath, assert, TRUE);
4364 }
4365 }
4366
4367 compile_hotpath(common, hotpath, cc, fallback);
4368 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
4369 return NULL;
4370
4371 if (opcode == OP_ONCE)
4372 {
4373 if (FALLBACK_AS(bracket_fallback)->u.framesize < 0)
4374 {
4375 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4376 /* TMP2 which is set here used by OP_KETRMAX below. */
4377 if (ket == OP_KETRMAX)
4378 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), 0);
4379 else if (ket == OP_KETRMIN)
4380 {
4381 /* Move the STR_PTR to the localptr. */
4382 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_MEM1(STACK_TOP), 0);
4383 }
4384 }
4385 else
4386 {
4387 stacksize = (ket == OP_KETRMIN || ket == OP_KETRMAX || has_alternatives) ? 2 : 1;
4388 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_IMM, (FALLBACK_AS(bracket_fallback)->u.framesize + stacksize) * sizeof(sljit_w));
4389 if (ket == OP_KETRMAX)
4390 {
4391 /* TMP2 which is set here used by OP_KETRMAX below. */
4392 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
4393 }
4394 }
4395 }
4396
4397 stacksize = 0;
4398 if (ket != OP_KET || bra != OP_BRA)
4399 stacksize++;
4400 if (has_alternatives && opcode != OP_ONCE)
4401 stacksize++;
4402
4403 if (stacksize > 0)
4404 allocate_stack(common, stacksize);
4405
4406 stacksize = 0;
4407 if (ket != OP_KET)
4408 {
4409 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
4410 stacksize++;
4411 }
4412 else if (bra != OP_BRA)
4413 {
4414 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
4415 stacksize++;
4416 }
4417
4418 if (has_alternatives)
4419 {
4420 if (opcode != OP_ONCE)
4421 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
4422 if (ket != OP_KETRMAX)
4423 FALLBACK_AS(bracket_fallback)->althotpath = LABEL();
4424 }
4425
4426 /* Must be after the hotpath label. */
4427 if (offset != 0)
4428 {
4429 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4430 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
4431 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 0), TMP1, 0);
4432 }
4433
4434 if (ket == OP_KETRMAX)
4435 {
4436 if (opcode == OP_ONCE || opcode >= OP_SBRA)
4437 {
4438 if (has_alternatives)
4439 FALLBACK_AS(bracket_fallback)->althotpath = LABEL();
4440 /* Checking zero-length iteration. */
4441 if (opcode != OP_ONCE)
4442 CMPTO(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STR_PTR, 0, rmaxlabel);
4443 else
4444 /* TMP2 must contain the starting STR_PTR. */
4445 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0, rmaxlabel);
4446 }
4447 else
4448 JUMPTO(SLJIT_JUMP, rmaxlabel);
4449 FALLBACK_AS(bracket_fallback)->recursivehotpath = LABEL();
4450 }
4451
4452 if (bra == OP_BRAZERO)
4453 FALLBACK_AS(bracket_fallback)->zerohotpath = LABEL();
4454
4455 if (bra == OP_BRAMINZERO)
4456 {
4457 /* This is a fallback path! (From the viewpoint of OP_BRAMINZERO) */
4458 JUMPTO(SLJIT_JUMP, ((braminzero_fallback*)parent)->hotpath);
4459 if (braminzerojump != NULL)
4460 {
4461 JUMPHERE(braminzerojump);
4462 /* We need to release the end pointer to perform the
4463 fallback for the zero-length iteration. When
4464 framesize is < 0, OP_ONCE will do the release itself. */
4465 if (opcode == OP_ONCE && FALLBACK_AS(bracket_fallback)->u.framesize >= 0)
4466 {
4467 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4468 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
4469 }
4470 else if (ket == OP_KETRMIN && opcode != OP_ONCE)
4471 free_stack(common, 1);
4472 }
4473 /* Continue to the normal fallback. */
4474 }
4475
4476 if ((ket != OP_KET && bra != OP_BRAMINZERO) || bra == OP_BRAZERO)
4477 decrease_call_count(common);
4478
4479 /* Skip the other alternatives. */
4480 while (*cc == OP_ALT)
4481 cc += GET(cc, 1);
4482 cc += 1 + LINK_SIZE;
4483 return cc;
4484 }
4485
4486 static pcre_uchar *compile_bracketpos_hotpath(compiler_common *common, pcre_uchar *cc, fallback_common *parent)
4487 {
4488 DEFINE_COMPILER;
4489 fallback_common *fallback;
4490 pcre_uchar opcode;
4491 int localptr;
4492 int cbraprivptr = 0;
4493 int framesize;
4494 int stacksize;
4495 int offset = 0;
4496 BOOL zero = FALSE;
4497 pcre_uchar *ccbegin = NULL;
4498 int stack;
4499 struct sljit_label *loop = NULL;
4500 struct jump_list *emptymatch = NULL;
4501
4502 PUSH_FALLBACK(sizeof(bracketpos_fallback), cc, NULL);
4503 if (*cc == OP_BRAPOSZERO)
4504 {
4505 zero = TRUE;
4506 cc++;
4507 }
4508
4509 opcode = *cc;
4510 localptr = PRIV_DATA(cc);
4511 SLJIT_ASSERT(localptr != 0);
4512 FALLBACK_AS(bracketpos_fallback)->localptr = localptr;
4513 switch(opcode)
4514 {
4515 case OP_BRAPOS:
4516 case OP_SBRAPOS:
4517 ccbegin = cc + 1 + LINK_SIZE;
4518 break;
4519
4520 case OP_CBRAPOS:
4521 case OP_SCBRAPOS:
4522 offset = GET2(cc, 1 + LINK_SIZE);
4523 cbraprivptr = OVECTOR_PRIV(offset);
4524 offset <<= 1;
4525 ccbegin = cc + 1 + LINK_SIZE + IMM2_SIZE;
4526 break;
4527
4528 default:
4529 SLJIT_ASSERT_STOP();
4530 break;
4531 }
4532
4533 framesize = get_framesize(common, cc, FALSE);
4534 FALLBACK_AS(bracketpos_fallback)->framesize = framesize;
4535 if (framesize < 0)
4536 {
4537 stacksize = (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS) ? 2 : 1;
4538 if (!zero)
4539 stacksize++;
4540 FALLBACK_AS(bracketpos_fallback)->stacksize = stacksize;
4541 allocate_stack(common, stacksize);
4542 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STACK_TOP, 0);
4543
4544 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
4545 {
4546 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
4547 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
4548 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
4549 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
4550 }
4551 else
4552 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4553
4554 if (!zero)
4555 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 1);
4556 }
4557 else
4558 {
4559 stacksize = framesize + 1;
4560 if (!zero)
4561 stacksize++;
4562 if (opcode == OP_BRAPOS || opcode == OP_SBRAPOS)
4563 stacksize++;
4564 FALLBACK_AS(bracketpos_fallback)->stacksize = stacksize;
4565 allocate_stack(common, stacksize);
4566
4567 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4568 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(stacksize - 1));
4569 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, TMP2, 0);
4570 stack = 0;
4571 if (!zero)
4572 {
4573 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 1);
4574 stack++;
4575 }
4576 if (opcode == OP_BRAPOS || opcode == OP_SBRAPOS)
4577 {
4578 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), STR_PTR, 0);
4579 stack++;
4580 }
4581 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP1, 0);
4582 init_frame(common, cc, stacksize - 1, stacksize - framesize, FALSE);
4583 }
4584
4585 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
4586 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr, STR_PTR, 0);
4587
4588 loop = LABEL();
4589 while (*cc != OP_KETRPOS)
4590 {
4591 fallback->top = NULL;
4592 fallback->topfallbacks = NULL;
4593 cc += GET(cc, 1);
4594
4595 compile_hotpath(common, ccbegin, cc, fallback);
4596 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
4597 return NULL;
4598
4599 if (framesize < 0)
4600 {
4601 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4602
4603 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
4604 {
4605 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
4606 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
4607 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr, STR_PTR, 0);
4608 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
4609 }
4610 else
4611 {
4612 if (opcode == OP_SBRAPOS)
4613 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
4614 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4615 }
4616
4617 if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
4618 add_jump(compiler, &emptymatch, CMP(SLJIT_C_EQUAL, TMP1, 0, STR_PTR, 0));
4619
4620 if (!zero)
4621 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
4622 }
4623 else
4624 {
4625 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
4626 {
4627 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_IMM, stacksize * sizeof(sljit_w));
4628 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
4629 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
4630 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr, STR_PTR, 0);
4631 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
4632 }
4633 else
4634 {
4635 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4636 OP2(SLJIT_ADD, STACK_TOP, 0, TMP2, 0, SLJIT_IMM, stacksize * sizeof(sljit_w));
4637 if (opcode == OP_SBRAPOS)
4638 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_w));
4639 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_w), STR_PTR, 0);
4640 }
4641
4642 if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
4643 add_jump(compiler, &emptymatch, CMP(SLJIT_C_EQUAL, TMP1, 0, STR_PTR, 0));
4644
4645 if (!zero)
4646 {
4647 if (framesize < 0)
4648 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
4649 else
4650 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
4651 }
4652 }
4653 JUMPTO(SLJIT_JUMP, loop);
4654 flush_stubs(common);
4655
4656 compile_fallbackpath(common, fallback->top);
4657 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
4658 return NULL;
4659 set_jumps(fallback->topfallbacks, LABEL());
4660
4661 if (framesize < 0)
4662 {
4663 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
4664 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
4665 else
4666 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
4667 }
4668 else
4669 {
4670 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
4671 {
4672 /* Last alternative. */
4673 if (*cc == OP_KETRPOS)
4674 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4675 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
4676 }
4677 else
4678 {
4679 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4680 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_w));
4681 }
4682 }
4683
4684 if (*cc == OP_KETRPOS)
4685 break;
4686 ccbegin = cc + 1 + LINK_SIZE;
4687 }
4688
4689 fallback->topfallbacks = NULL;
4690 if (!zero)
4691 {
4692 if (framesize < 0)
4693 add_jump(compiler, &fallback->topfallbacks, CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0));
4694 else /* TMP2 is set to [localptr] above. */
4695 add_jump(compiler, &fallback->topfallbacks, CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(TMP2), (stacksize - 1) * sizeof(sljit_w), SLJIT_IMM, 0));
4696 }
4697
4698 /* None of them matched. */
4699 set_jumps(emptymatch, LABEL());
4700 decrease_call_count(common);
4701 return cc + 1 + LINK_SIZE;
4702 }
4703
4704 static SLJIT_INLINE pcre_uchar *get_iterator_parameters(compiler_common *common, pcre_uchar *cc, pcre_uchar *opcode, pcre_uchar *type, int *arg1, int *arg2, pcre_uchar **end)
4705 {
4706 int class_len;
4707
4708 *opcode = *cc;
4709 if (*opcode >= OP_STAR && *opcode <= OP_POSUPTO)
4710 {
4711 cc++;
4712 *type = OP_CHAR;
4713 }
4714 else if (*opcode >= OP_STARI && *opcode <= OP_POSUPTOI)
4715 {
4716 cc++;
4717 *type = OP_CHARI;
4718 *opcode -= OP_STARI - OP_STAR;
4719 }
4720 else if (*opcode >= OP_NOTSTAR && *opcode <= OP_NOTPOSUPTO)
4721 {
4722 cc++;
4723 *type = OP_NOT;
4724 *opcode -= OP_NOTSTAR - OP_STAR;
4725 }
4726 else if (*opcode >= OP_NOTSTARI && *opcode <= OP_NOTPOSUPTOI)
4727 {
4728 cc++;
4729 *type = OP_NOTI;
4730 *opcode -= OP_NOTSTARI - OP_STAR;
4731 }
4732 else if (*opcode >= OP_TYPESTAR && *opcode <= OP_TYPEPOSUPTO)
4733 {
4734 cc++;
4735 *opcode -= OP_TYPESTAR - OP_STAR;
4736 *type = 0;
4737 }
4738 else
4739 {
4740 SLJIT_ASSERT(*opcode >= OP_CLASS || *opcode <= OP_XCLASS);
4741 *type = *opcode;
4742 cc++;
4743 class_len = (*type < OP_XCLASS) ? (1 + (32 / sizeof(pcre_uchar))) : GET(cc, 0);
4744 *opcode = cc[class_len - 1];
4745 if (*opcode >= OP_CRSTAR && *opcode <= OP_CRMINQUERY)
4746 {
4747 *opcode -= OP_CRSTAR - OP_STAR;
4748 if (end != NULL)
4749 *end = cc + class_len;
4750 }
4751 else
4752 {
4753 SLJIT_ASSERT(*opcode == OP_CRRANGE || *opcode == OP_CRMINRANGE);
4754 *arg1 = GET2(cc, (class_len + IMM2_SIZE));
4755 *arg2 = GET2(cc, class_len);
4756
4757 if (*arg2 == 0)
4758 {
4759 SLJIT_ASSERT(*arg1 != 0);
4760 *opcode = (*opcode == OP_CRRANGE) ? OP_UPTO : OP_MINUPTO;
4761 }
4762 if (*arg1 == *arg2)
4763 *opcode = OP_EXACT;
4764
4765 if (end != NULL)
4766 *end = cc + class_len + 2 * IMM2_SIZE;
4767 }
4768 return cc;
4769 }
4770
4771 if (*opcode == OP_UPTO || *opcode == OP_MINUPTO || *opcode == OP_EXACT || *opcode == OP_POSUPTO)
4772 {
4773 *arg1 = GET2(cc, 0);
4774 cc += IMM2_SIZE;
4775 }
4776
4777 if (*type == 0)
4778 {
4779 *type = *cc;
4780 if (end != NULL)
4781 *end = next_opcode(common, cc);
4782 cc++;
4783 return cc;
4784 }
4785
4786 if (end != NULL)
4787 {
4788 *end = cc + 1;
4789 #ifdef SUPPORT_UTF8
4790 if (common->utf8 && *cc >= 0xc0) *end += PRIV(utf8_table4)[*cc & 0x3f];
4791 #endif
4792 }
4793 return cc;
4794 }
4795
4796 static pcre_uchar *compile_iterator_hotpath(compiler_common *common, pcre_uchar *cc, fallback_common *parent)
4797 {
4798 DEFINE_COMPILER;
4799 fallback_common *fallback;
4800 pcre_uchar opcode;
4801 pcre_uchar type;
4802 int arg1 = -1, arg2 = -1;
4803 pcre_uchar* end;
4804 jump_list *nomatch = NULL;
4805 struct sljit_jump *jump = NULL;
4806 struct sljit_label *label;
4807
4808 PUSH_FALLBACK(sizeof(iterator_fallback), cc, NULL);
4809
4810 cc = get_iterator_parameters(common, cc, &opcode, &type, &arg1, &arg2, &end);
4811
4812 switch(opcode)
4813 {
4814 case OP_STAR:
4815 case OP_PLUS:
4816 case OP_UPTO:
4817 case OP_CRRANGE:
4818 if (type == OP_ANYNL || type == OP_EXTUNI)
4819 {
4820 if (opcode == OP_STAR || opcode == OP_UPTO)
4821 {
4822 allocate_stack(common, 2);
4823 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4824 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
4825 }
4826 else
4827 {
4828 allocate_stack(common, 1);
4829 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
4830 }
4831 if (opcode == OP_UPTO || opcode == OP_CRRANGE)
4832 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 0);
4833
4834 label = LABEL();
4835 compile_char1_hotpath(common, type, cc, &fallback->topfallbacks);
4836 if (opcode == OP_UPTO || opcode == OP_CRRANGE)
4837 {
4838 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
4839 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4840 if (opcode == OP_CRRANGE && arg2 > 0)
4841 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg2, label);
4842 if (opcode == OP_UPTO || (opcode == OP_CRRANGE && arg1 > 0))
4843 jump = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, arg1);
4844 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0);
4845 }
4846
4847 allocate_stack(common, 1);
4848 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4849 JUMPTO(SLJIT_JUMP, label);
4850 if (jump != NULL)
4851 JUMPHERE(jump);
4852 }
4853 else
4854 {
4855 allocate_stack(common, 2);
4856 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4857 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
4858 label = LABEL();
4859 compile_char1_hotpath(common, type, cc, &nomatch);
4860 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4861 if (opcode <= OP_PLUS || (opcode == OP_CRRANGE && arg1 == 0))
4862 {
4863 OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
4864 JUMPTO(SLJIT_JUMP, label);
4865 }
4866 else
4867 {
4868 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
4869 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4870 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
4871 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg1 + 1, label);
4872 }
4873 set_jumps(nomatch, LABEL());
4874 if (opcode == OP_PLUS || opcode == OP_CRRANGE)
4875 add_jump(compiler, &fallback->topfallbacks,
4876 CMP(SLJIT_C_LESS, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, opcode == OP_PLUS ? 2 : arg2 + 1));
4877 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
4878 }
4879 FALLBACK_AS(iterator_fallback)->hotpath = LABEL();
4880 break;
4881
4882 case OP_MINSTAR:
4883 case OP_MINPLUS:
4884 allocate_stack(common, 1);
4885 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4886 if (opcode == OP_MINPLUS)
4887 add_jump(compiler, &fallback->topfallbacks, JUMP(SLJIT_JUMP));
4888 FALLBACK_AS(iterator_fallback)->hotpath = LABEL();
4889 break;
4890
4891 case OP_MINUPTO:
4892 case OP_CRMINRANGE:
4893 allocate_stack(common, 2);
4894 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4895 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
4896 if (opcode == OP_CRMINRANGE)
4897 add_jump(compiler, &fallback->topfallbacks, JUMP(SLJIT_JUMP));
4898 FALLBACK_AS(iterator_fallback)->hotpath = LABEL();
4899 break;
4900
4901 case OP_QUERY:
4902 case OP_MINQUERY:
4903 allocate_stack(common, 1);
4904 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4905 if (opcode == OP_QUERY)
4906 compile_char1_hotpath(common, type, cc, &fallback->topfallbacks);
4907 FALLBACK_AS(iterator_fallback)->hotpath = LABEL();
4908 break;
4909
4910 case OP_EXACT:
4911 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 1);
4912 label = LABEL();
4913 compile_char1_hotpath(common, type, cc, &fallback->topfallbacks);
4914 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
4915 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4916 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0);
4917 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg1 + 1, label);
4918 break;
4919
4920 case OP_POSSTAR:
4921 case OP_POSPLUS:
4922 case OP_POSUPTO:
4923 if (opcode != OP_POSSTAR)
4924 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 1);
4925 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STR_PTR, 0);
4926 label = LABEL();
4927 compile_char1_hotpath(common, type, cc, &nomatch);
4928 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STR_PTR, 0);
4929 if (opcode != OP_POSUPTO)
4930 {
4931 if (opcode == OP_POSPLUS)
4932 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 2);
4933 JUMPTO(SLJIT_JUMP, label);
4934 }
4935 else
4936 {
4937 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
4938 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4939 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0);
4940 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg1 + 1, label);
4941 }
4942 set_jumps(nomatch, LABEL());
4943 if (opcode == OP_POSPLUS)
4944 add_jump(compiler, &fallback->topfallbacks, CMP(SLJIT_C_LESS, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 2));
4945 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);
4946 break;
4947
4948 case OP_POSQUERY:
4949 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STR_PTR, 0);
4950 compile_char1_hotpath(common, type, cc, &nomatch);
4951 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STR_PTR, 0);
4952 set_jumps(nomatch, LABEL());
4953 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);
4954 break;
4955
4956 default:
4957 SLJIT_ASSERT_STOP();
4958 break;
4959 }
4960
4961 decrease_call_count(common);
4962 return end;
4963 }
4964
4965 static SLJIT_INLINE pcre_uchar *compile_fail_accept_hotpath(compiler_common *common, pcre_uchar *cc, fallback_common *parent)
4966 {
4967 DEFINE_COMPILER;
4968 fallback_common *fallback;
4969
4970 PUSH_FALLBACK(sizeof(bracket_fallback), cc, NULL);
4971
4972 if (*cc == OP_FAIL)
4973 {
4974 add_jump(compiler, &fallback->topfallbacks, JUMP(SLJIT_JUMP));
4975 return cc + 1;
4976 }
4977
4978 if (*cc == OP_ASSERT_ACCEPT || common->currententry != NULL)
4979 {
4980 /* No need to check notempty conditions. */
4981 if (common->acceptlabel == NULL)
4982 add_jump(compiler, &common->accept, JUMP(SLJIT_JUMP));
4983 else
4984 JUMPTO(SLJIT_JUMP, common->acceptlabel);
4985 return cc + 1;
4986 }
4987
4988 if (common->acceptlabel == NULL)
4989 add_jump(compiler, &common->accept, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0)));
4990 else
4991 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), common->acceptlabel);
4992 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4993 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty));
4994 add_jump(compiler, &fallback->topfallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4995 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty_atstart));
4996 if (common->acceptlabel == NULL)
4997 add_jump(compiler, &common->accept, CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4998 else
4999 CMPTO(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0, common->acceptlabel);
5000 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
5001 if (common->acceptlabel == NULL)
5002 add_jump(compiler, &common->accept, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0));
5003 else
5004 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0, common->acceptlabel);
5005 add_jump(compiler, &fallback->topfallbacks, JUMP(SLJIT_JUMP));
5006 return cc + 1;
5007 }
5008
5009 static SLJIT_INLINE pcre_uchar *compile_close_hotpath(compiler_common *common, pcre_uchar *cc)
5010 {
5011 DEFINE_COMPILER;
5012 int offset = GET2(cc, 1);
5013
5014 /* Data will be discarded anyway... */
5015 if (common->currententry != NULL)
5016 return cc + 1 + IMM2_SIZE;
5017
5018 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR_PRIV(offset));
5019 offset <<= 1;
5020 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
5021 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
5022 return cc + 1 + IMM2_SIZE;
5023 }
5024
5025 static void compile_hotpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, fallback_common *parent)
5026 {
5027 DEFINE_COMPILER;
5028 fallback_common *fallback;
5029
5030 while (cc < ccend)
5031 {
5032 switch(*cc)
5033 {
5034 case OP_SOD:
5035 case OP_SOM:
5036 case OP_NOT_WORD_BOUNDARY:
5037 case OP_WORD_BOUNDARY:
5038 case OP_NOT_DIGIT:
5039 case OP_DIGIT:
5040 case OP_NOT_WHITESPACE:
5041 case OP_WHITESPACE:
5042 case OP_NOT_WORDCHAR:
5043 case OP_WORDCHAR:
5044 case OP_ANY:
5045 case OP_ALLANY:
5046 case OP_ANYBYTE:
5047 case OP_NOTPROP:
5048 case OP_PROP:
5049 case OP_ANYNL:
5050 case OP_NOT_HSPACE:
5051 case OP_HSPACE:
5052 case OP_NOT_VSPACE:
5053 case OP_VSPACE:
5054 case OP_EXTUNI:
5055 case OP_EODN:
5056 case OP_EOD:
5057 case OP_CIRC:
5058 case OP_CIRCM:
5059 case OP_DOLL:
5060 case OP_DOLLM:
5061 case OP_NOT:
5062 case OP_NOTI:
5063 case OP_REVERSE:
5064 cc = compile_char1_hotpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextfallbacks : &parent->topfallbacks);
5065 break;
5066
5067 case OP_SET_SOM:
5068 PUSH_FALLBACK_NOVALUE(sizeof(fallback_common), cc);
5069 allocate_stack(common, 1);
5070 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
5071 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), STR_PTR, 0);
5072 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
5073 cc++;
5074 break;
5075
5076 case OP_CHAR:
5077 case OP_CHARI:
5078 cc = compile_charn_hotpath(common, cc, ccend, parent->top != NULL ? &parent->top->nextfallbacks : &parent->topfallbacks);
5079 break;
5080
5081 case OP_STAR:
5082 case OP_MINSTAR:
5083 case OP_PLUS:
5084 case OP_MINPLUS:
5085 case OP_QUERY:
5086 case OP_MINQUERY:
5087 case OP_UPTO:
5088 case OP_MINUPTO:
5089 case OP_EXACT:
5090 case OP_POSSTAR:
5091 case OP_POSPLUS:
5092 case OP_POSQUERY:
5093 case OP_POSUPTO:
5094 case OP_STARI:
5095 case OP_MINSTARI:
5096 case OP_PLUSI:
5097 case OP_MINPLUSI:
5098 case OP_QUERYI:
5099 case OP_MINQUERYI:
5100 case OP_UPTOI:
5101 case OP_MINUPTOI:
5102 case OP_EXACTI:
5103 case OP_POSSTARI:
5104 case OP_POSPLUSI:
5105 case OP_POSQUERYI:
5106 case OP_POSUPTOI:
5107 case OP_NOTSTAR:
5108 case OP_NOTMINSTAR:
5109 case OP_NOTPLUS:
5110 case OP_NOTMINPLUS:
5111 case OP_NOTQUERY:
5112 case OP_NOTMINQUERY:
5113 case OP_NOTUPTO:
5114 case OP_NOTMINUPTO:
5115 case OP_NOTEXACT:
5116 case OP_NOTPOSSTAR:
5117 case OP_NOTPOSPLUS:
5118 case OP_NOTPOSQUERY:
5119 case OP_NOTPOSUPTO:
5120 case OP_NOTSTARI:
5121 case OP_NOTMINSTARI:
5122 case OP_NOTPLUSI:
5123 case OP_NOTMINPLUSI:
5124 case OP_NOTQUERYI:
5125 case OP_NOTMINQUERYI:
5126 case OP_NOTUPTOI:
5127 case OP_NOTMINUPTOI:
5128 case OP_NOTEXACTI:
5129 case OP_NOTPOSSTARI:
5130 case OP_NOTPOSPLUSI:
5131 case OP_NOTPOSQUERYI:
5132 case OP_NOTPOSUPTOI:
5133 case OP_TYPESTAR:
5134 case OP_TYPEMINSTAR:
5135 case OP_TYPEPLUS:
5136 case OP_TYPEMINPLUS:
5137 case OP_TYPEQUERY:
5138 case OP_TYPEMINQUERY:
5139 case OP_TYPEUPTO:
5140 case OP_TYPEMINUPTO:
5141 case OP_TYPEEXACT:
5142 case OP_TYPEPOSSTAR:
5143 case OP_TYPEPOSPLUS:
5144 case OP_TYPEPOSQUERY:
5145 case OP_TYPEPOSUPTO:
5146 cc = compile_iterator_hotpath(common, cc, parent);
5147 break;
5148
5149 case OP_CLASS:
5150 case OP_NCLASS:
5151 if (cc[1 + (32 / sizeof(pcre_uchar))] >= OP_CRSTAR && cc[1 + (32 / sizeof(pcre_uchar))] <= OP_CRMINRANGE)
5152 cc = compile_iterator_hotpath(common, cc, parent);
5153 else
5154 cc = compile_char1_hotpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextfallbacks : &parent->topfallbacks);
5155 break;
5156
5157 #if defined SUPPORT_UTF || defined COMPILE_PCRE16
5158 case OP_XCLASS:
5159 if (*(cc + GET(cc, 1)) >= OP_CRSTAR && *(cc + GET(cc, 1)) <= OP_CRMINRANGE)
5160 cc = compile_iterator_hotpath(common, cc, parent);
5161 else
5162 cc = compile_char1_hotpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextfallbacks : &parent->topfallbacks);
5163 break;
5164 #endif
5165
5166 case OP_REF:
5167 case OP_REFI:
5168 if (cc[1 + IMM2_SIZE] >= OP_CRSTAR && cc[1 + IMM2_SIZE] <= OP_CRMINRANGE)
5169 cc = compile_ref_iterator_hotpath(common, cc, parent);
5170 else
5171 cc = compile_ref_hotpath(common, cc, parent->top != NULL ? &parent->top->nextfallbacks : &parent->topfallbacks, TRUE, FALSE);
5172 break;
5173
5174 case OP_RECURSE:
5175 cc = compile_recurse_hotpath(common, cc, parent);
5176 break;
5177
5178 case OP_ASSERT:
5179 case OP_ASSERT_NOT:
5180 case OP_ASSERTBACK:
5181 case OP_ASSERTBACK_NOT:
5182 PUSH_FALLBACK_NOVALUE(sizeof(assert_fallback), cc);
5183 cc = compile_assert_hotpath(common, cc, FALLBACK_AS(assert_fallback), FALSE);
5184 break;
5185
5186 case OP_BRAMINZERO:
5187 PUSH_FALLBACK_NOVALUE(sizeof(braminzero_fallback), cc);
5188 cc = bracketend(cc + 1);
5189 if (*(cc - 1 - LINK_SIZE) != OP_KETRMIN)
5190 {
5191 allocate_stack(common, 1);
5192 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5193 }
5194 else
5195 {
5196 allocate_stack(common, 2);
5197 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5198 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), STR_PTR, 0);
5199 }
5200 FALLBACK_AS(braminzero_fallback)->hotpath = LABEL();
5201 if (cc[1] > OP_ASSERTBACK_NOT)
5202 decrease_call_count(common);
5203 break;
5204
5205 case OP_ONCE:
5206 case OP_ONCE_NC:
5207 case OP_BRA:
5208 case OP_CBRA:
5209 case OP_COND:
5210 case OP_SBRA:
5211 case OP_SCBRA:
5212 case OP_SCOND:
5213 cc = compile_bracket_hotpath(common, cc, parent);
5214 break;
5215
5216 case OP_BRAZERO:
5217 if (cc[1] > OP_ASSERTBACK_NOT)
5218 cc = compile_bracket_hotpath(common, cc, parent);
5219 else
5220 {
5221 PUSH_FALLBACK_NOVALUE(sizeof(assert_fallback), cc);
5222 cc = compile_assert_hotpath(common, cc, FALLBACK_AS(assert_fallback), FALSE);
5223 }
5224 break;
5225
5226 case OP_BRAPOS:
5227 case OP_CBRAPOS:
5228 case OP_SBRAPOS:
5229 case OP_SCBRAPOS:
5230 case OP_BRAPOSZERO:
5231 cc = compile_bracketpos_hotpath(common, cc, parent);
5232 break;
5233
5234 case OP_FAIL:
5235 case OP_ACCEPT:
5236 case OP_ASSERT_ACCEPT:
5237 cc = compile_fail_accept_hotpath(common, cc, parent);
5238 break;
5239
5240 case OP_CLOSE:
5241 cc = compile_close_hotpath(common, cc);
5242 break;
5243
5244 case OP_SKIPZERO:
5245 cc = bracketend(cc + 1);
5246 break;
5247
5248 default:
5249 SLJIT_ASSERT_STOP();
5250 return;
5251 }
5252 if (cc == NULL)
5253 return;
5254 }
5255 SLJIT_ASSERT(cc == ccend);
5256 }
5257
5258 #undef PUSH_FALLBACK
5259 #undef PUSH_FALLBACK_NOVALUE
5260 #undef FALLBACK_AS
5261
5262 #define COMPILE_FALLBACKPATH(current) \
5263 do \
5264 { \
5265 compile_fallbackpath(common, (current)); \
5266 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
5267 return; \
5268 } \
5269 while (0)
5270
5271 #define CURRENT_AS(type) ((type*)current)
5272
5273 static void compile_iterator_fallbackpath(compiler_common *common, struct fallback_common *current)
5274 {
5275 DEFINE_COMPILER;
5276 pcre_uchar *cc = current->cc;
5277 pcre_uchar opcode;
5278 pcre_uchar type;
5279 int arg1 = -1, arg2 = -1;
5280 struct sljit_label *label = NULL;
5281 struct sljit_jump *jump = NULL;
5282
5283 cc = get_iterator_parameters(common, cc, &opcode, &type, &arg1, &arg2, NULL);
5284
5285 switch(opcode)
5286 {
5287 case OP_STAR:
5288 case OP_PLUS:
5289 case OP_UPTO:
5290 case OP_CRRANGE:
5291 if (type == OP_ANYNL || type == OP_EXTUNI)
5292 {
5293 set_jumps(current->topfallbacks, LABEL());
5294 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5295 free_stack(common, 1);
5296 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_fallback)->hotpath);
5297 }
5298 else
5299 {
5300 if (opcode == OP_STAR || opcode == OP_UPTO)
5301 arg2 = 0;
5302 else if (opcode == OP_PLUS)
5303 arg2 = 1;
5304 jump = CMP(SLJIT_C_LESS_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, arg2 + 1);
5305 OP2(SLJIT_SUB, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
5306 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5307 skip_char_back(common);
5308 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5309 JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_fallback)->hotpath);
5310 if (opcode == OP_PLUS || opcode == OP_CRRANGE)
5311 set_jumps(current->topfallbacks, LABEL());
5312 JUMPHERE(jump);
5313 free_stack(common, 2);
5314 }
5315 break;
5316
5317 case OP_MINSTAR:
5318 case OP_MINPLUS:
5319 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5320 if (opcode == OP_MINPLUS)
5321 {
5322 set_jumps(current->topfallbacks, LABEL());
5323 current->topfallbacks = NULL;
5324 }
5325 compile_char1_hotpath(common, type, cc, &current->topfallbacks);
5326 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5327 JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_fallback)->hotpath);
5328 set_jumps(current->topfallbacks, LABEL());
5329 free_stack(common, 1);
5330 break;
5331
5332 case OP_MINUPTO:
5333 case OP_CRMINRANGE:
5334 if (opcode == OP_CRMINRANGE)
5335 {
5336 set_jumps(current->topfallbacks, LABEL());
5337 current->topfallbacks = NULL;
5338 label = LABEL();
5339 }
5340 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5341 compile_char1_hotpath(common, type, cc, &current->topfallbacks);
5342
5343 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5344 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5345 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
5346 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5347
5348 if (opcode == OP_CRMINRANGE)
5349 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg2 + 1, label);
5350
5351 if (opcode == OP_CRMINRANGE && arg1 == 0)
5352 JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_fallback)->hotpath);
5353 else
5354 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg1 + 2, CURRENT_AS(iterator_fallback)->hotpath);
5355
5356 set_jumps(current->topfallbacks, LABEL());
5357 free_stack(common, 2);
5358 break;
5359
5360 case OP_QUERY:
5361 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5362 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5363 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_fallback)->hotpath);
5364 jump = JUMP(SLJIT_JUMP);
5365 set_jumps(current->topfallbacks, LABEL());
5366 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5367 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5368 JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_fallback)->hotpath);
5369 JUMPHERE(jump);
5370 free_stack(common, 1);
5371 break;
5372
5373 case OP_MINQUERY:
5374 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5375 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5376 jump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
5377 compile_char1_hotpath(common, type, cc, &current->topfallbacks);
5378 JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_fallback)->hotpath);
5379 set_jumps(current->topfallbacks, LABEL());
5380 JUMPHERE(jump);
5381 free_stack(common, 1);
5382 break;
5383
5384 case OP_EXACT:
5385 case OP_POSPLUS:
5386 set_jumps(current->topfallbacks, LABEL());
5387 break;
5388
5389 case OP_POSSTAR:
5390 case OP_POSQUERY:
5391 case OP_POSUPTO:
5392 break;
5393
5394 default:
5395 SLJIT_ASSERT_STOP();
5396 break;
5397 }
5398 }
5399
5400 static void compile_ref_iterator_fallbackpath(compiler_common *common, struct fallback_common *current)
5401 {
5402 DEFINE_COMPILER;
5403 pcre_uchar *cc = current->cc;
5404 pcre_uchar type;
5405
5406 type = cc[1 + IMM2_SIZE];
5407 if ((type & 0x1) == 0)
5408 {
5409 set_jumps(current->topfallbacks, LABEL());
5410 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5411 free_stack(common, 1);
5412 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_fallback)->hotpath);
5413 return;
5414 }
5415
5416 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5417 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_fallback)->hotpath);
5418 set_jumps(current->topfallbacks, LABEL());
5419 free_stack(common, 2);
5420 }
5421
5422 static void compile_recurse_fallbackpath(compiler_common *common, struct fallback_common *current)
5423 {
5424 DEFINE_COMPILER;
5425
5426 set_jumps(current->topfallbacks, LABEL());
5427 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5428 free_stack(common, 1);
5429 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), TMP2, 0);
5430 }
5431
5432 static void compile_assert_fallbackpath(compiler_common *common, struct fallback_common *current)
5433 {
5434 DEFINE_COMPILER;
5435 pcre_uchar *cc = current->cc;
5436 pcre_uchar bra = OP_BRA;
5437 struct sljit_jump *brajump = NULL;
5438
5439 SLJIT_ASSERT(*cc != OP_BRAMINZERO);
5440 if (*cc == OP_BRAZERO)
5441 {
5442 bra = *cc;
5443 cc++;
5444 }
5445
5446 if (bra == OP_BRAZERO)
5447 {
5448 SLJIT_ASSERT(current->topfallbacks == NULL);
5449 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5450 }
5451
5452 if (CURRENT_AS(assert_fallback)->framesize < 0)
5453 {
5454 set_jumps(current->topfallbacks, LABEL());
5455
5456 if (bra == OP_BRAZERO)
5457 {
5458 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5459 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_fallback)->hotpath);
5460 free_stack(common, 1);
5461 }
5462 return;
5463 }
5464
5465 if (bra == OP_BRAZERO)
5466 {
5467 if (*cc == OP_ASSERT_NOT || *cc == OP_ASSERTBACK_NOT)
5468 {
5469 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5470 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_fallback)->hotpath);
5471 free_stack(common, 1);
5472 return;
5473 }
5474 free_stack(common, 1);
5475 brajump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
5476 }
5477
5478 if (*cc == OP_ASSERT || *cc == OP_ASSERTBACK)
5479 {
5480 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), CURRENT_AS(assert_fallback)->localptr);
5481 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
5482 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), CURRENT_AS(assert_fallback)->localptr, SLJIT_MEM1(STACK_TOP), CURRENT_AS(assert_fallback)->framesize * sizeof(sljit_w));
5483
5484 set_jumps(current->topfallbacks, LABEL());
5485 }
5486 else
5487 set_jumps(current->topfallbacks, LABEL());
5488
5489 if (bra == OP_BRAZERO)
5490 {
5491 /* We know there is enough place on the stack. */
5492 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
5493 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5494 JUMPTO(SLJIT_JUMP, CURRENT_AS(assert_fallback)->hotpath);
5495 JUMPHERE(brajump);
5496 }
5497 }
5498
5499 static void compile_bracket_fallbackpath(compiler_common *common, struct fallback_common *current)
5500 {
5501 DEFINE_COMPILER;
5502 int opcode;
5503 int offset = 0;
5504 int localptr = CURRENT_AS(bracket_fallback)->localptr;
5505 int stacksize;
5506 int count;
5507 pcre_uchar *cc = current->cc;
5508 pcre_uchar *ccbegin;
5509 pcre_uchar *ccprev;
5510 jump_list *jumplist = NULL;
5511 jump_list *jumplistitem = NULL;
5512 pcre_uchar bra = OP_BRA;
5513 pcre_uchar ket;
5514 assert_fallback *assert;
5515 BOOL has_alternatives;
5516 struct sljit_jump *brazero = NULL;
5517 struct sljit_jump *once = NULL;
5518 struct sljit_jump *cond = NULL;
5519 struct sljit_label *rminlabel = NULL;
5520
5521 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
5522 {
5523 bra = *cc;
5524 cc++;
5525 }
5526
5527 opcode = *cc;
5528 ccbegin = cc;
5529 ket = *(bracketend(ccbegin) - 1 - LINK_SIZE);
5530 cc += GET(cc, 1);
5531 has_alternatives = *cc == OP_ALT;
5532 if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
5533 has_alternatives = (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT) || CURRENT_AS(bracket_fallback)->u.condfailed != NULL;
5534 if (opcode == OP_CBRA || opcode == OP_SCBRA)
5535 offset = (GET2(ccbegin, 1 + LINK_SIZE)) << 1;
5536 if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
5537 opcode = OP_SCOND;
5538 if (SLJIT_UNLIKELY(opcode == OP_ONCE_NC))
5539 opcode = OP_ONCE;
5540
5541 if (ket == OP_KETRMAX)
5542 {
5543 if (bra != OP_BRAZERO)
5544 free_stack(common, 1);
5545 else
5546 {
5547 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5548 free_stack(common, 1);
5549 brazero = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0);
5550 }
5551 }
5552 else if (ket == OP_KETRMIN)
5553 {
5554 if (bra != OP_BRAMINZERO)
5555 {
5556 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5557 if (opcode >= OP_SBRA || opcode == OP_ONCE)
5558 {
5559 /* Checking zero-length iteration. */
5560 if (opcode != OP_ONCE || CURRENT_AS(bracket_fallback)->u.framesize < 0)
5561 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, CURRENT_AS(bracket_fallback)->recursivehotpath);
5562 else
5563 {
5564 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
5565 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), (CURRENT_AS(bracket_fallback)->u.framesize + 1) * sizeof(sljit_w), CURRENT_AS(bracket_fallback)->recursivehotpath);
5566 }
5567 if (opcode != OP_ONCE)
5568 free_stack(common, 1);
5569 }
5570 else
5571 JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_fallback)->recursivehotpath);
5572 }
5573 rminlabel = LABEL();
5574 }
5575 else if (bra == OP_BRAZERO)
5576 {
5577 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5578 free_stack(common, 1);
5579 brazero = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
5580 }
5581
5582 if (SLJIT_UNLIKELY(opcode == OP_ONCE))
5583 {
5584 if (CURRENT_AS(bracket_fallback)->u.framesize >= 0)
5585 {
5586 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
5587 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
5588 }
5589 once = JUMP(SLJIT_JUMP);
5590 }
5591 else if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
5592 {
5593 if (has_alternatives)
5594 {
5595 /* Always exactly one alternative. */
5596 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5597 free_stack(common, 1);
5598
5599 jumplistitem = sljit_alloc_memory(compiler, sizeof(jump_list));
5600 if (SLJIT_UNLIKELY(!jumplistitem))
5601 return;
5602 jumplist = jumplistitem;
5603 jumplistitem->next = NULL;
5604 jumplistitem->jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 1);
5605 }
5606 }
5607 else if (*cc == OP_ALT)
5608 {
5609 /* Build a jump list. Get the last successfully matched branch index. */
5610 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5611 free_stack(common, 1);
5612 count = 1;
5613 do
5614 {
5615 /* Append as the last item. */
5616 if (jumplist != NULL)
5617 {
5618 jumplistitem->next = sljit_alloc_memory(compiler, sizeof(jump_list));
5619 jumplistitem = jumplistitem->next;
5620 }
5621 else
5622 {
5623 jumplistitem = sljit_alloc_memory(compiler, sizeof(jump_list));
5624 jumplist = jumplistitem;
5625 }
5626
5627 if (SLJIT_UNLIKELY(!jumplistitem))
5628 return;
5629
5630 jumplistitem->next = NULL;
5631 jumplistitem->jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, count++);
5632 cc += GET(cc, 1);
5633 }
5634 while (*cc == OP_ALT);
5635
5636 cc = ccbegin + GET(ccbegin, 1);
5637 }
5638
5639 COMPILE_FALLBACKPATH(current->top);
5640 if (current->topfallbacks)
5641 set_jumps(current->topfallbacks, LABEL());
5642
5643 if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
5644 {
5645 /* Conditional block always has at most one alternative. */
5646 if (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT)
5647 {
5648 SLJIT_ASSERT(has_alternatives);
5649 assert = CURRENT_AS(bracket_fallback)->u.assert;
5650 if (assert->framesize >= 0 && (ccbegin[1 + LINK_SIZE] == OP_ASSERT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK))
5651 {
5652 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), assert->localptr);
5653 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
5654 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), assert->localptr, SLJIT_MEM1(STACK_TOP), assert->framesize * sizeof(sljit_w));
5655 }
5656 cond = JUMP(SLJIT_JUMP);
5657 set_jumps(CURRENT_AS(bracket_fallback)->u.assert->condfailed, LABEL());
5658 }
5659 else if (CURRENT_AS(bracket_fallback)->u.condfailed != NULL)
5660 {
5661 SLJIT_ASSERT(has_alternatives);
5662 cond = JUMP(SLJIT_JUMP);
5663 set_jumps(CURRENT_AS(bracket_fallback)->u.condfailed, LABEL());
5664 }
5665 else
5666 SLJIT_ASSERT(!has_alternatives);
5667 }
5668
5669 if (has_alternatives)
5670 {
5671 count = 1;
5672 do
5673 {
5674 current->top = NULL;
5675 current->topfallbacks = NULL;
5676 current->nextfallbacks = NULL;
5677 if (*cc == OP_ALT)
5678 {
5679 ccprev = cc + 1 + LINK_SIZE;
5680 cc += GET(cc, 1);
5681 if (opcode != OP_COND && opcode != OP_SCOND)
5682 {
5683 if (localptr != 0 && opcode != OP_ONCE)
5684 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
5685 else
5686 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5687 }
5688 compile_hotpath(common, ccprev, cc, current);
5689 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5690 return;
5691 }
5692
5693 /* Instructions after the current alternative is succesfully matched. */
5694 /* There is a similar code in compile_bracket_hotpath. */
5695 if (opcode == OP_ONCE)
5696 {
5697 if (CURRENT_AS(bracket_fallback)->u.framesize < 0)
5698 {
5699 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
5700 /* TMP2 which is set here used by OP_KETRMAX below. */
5701 if (ket == OP_KETRMAX)
5702 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), 0);
5703 else if (ket == OP_KETRMIN)
5704 {
5705 /* Move the STR_PTR to the localptr. */
5706 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_MEM1(STACK_TOP), 0);
5707 }
5708 }
5709 else
5710 {
5711 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_IMM, (CURRENT_AS(bracket_fallback)->u.framesize + 2) * sizeof(sljit_w));
5712 if (ket == OP_KETRMAX)
5713 {
5714 /* TMP2 which is set here used by OP_KETRMAX below. */
5715 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5716 }
5717 }
5718 }
5719
5720 stacksize = 0;
5721 if (opcode != OP_ONCE)
5722 stacksize++;
5723 if (ket != OP_KET || bra != OP_BRA)
5724 stacksize++;
5725
5726 if (stacksize > 0) {
5727 if (opcode != OP_ONCE || CURRENT_AS(bracket_fallback)->u.framesize >= 0)
5728 allocate_stack(common, stacksize);
5729 else
5730 {
5731 /* We know we have place at least for one item on the top of the stack. */
5732 SLJIT_ASSERT(stacksize == 1);
5733 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
5734 }
5735 }
5736
5737 stacksize = 0;
5738 if (ket != OP_KET || bra != OP_BRA)
5739 {
5740 if (ket != OP_KET)
5741 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
5742 else
5743 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
5744 stacksize++;
5745 }
5746
5747 if (opcode != OP_ONCE)
5748 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, count++);
5749
5750 if (offset != 0)
5751 {
5752 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
5753 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
5754 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 0), TMP1, 0);
5755 }
5756
5757 JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_fallback)->althotpath);
5758
5759 if (opcode != OP_ONCE)
5760 {
5761 SLJIT_ASSERT(jumplist);
5762 JUMPHERE(jumplist->jump);
5763 jumplist = jumplist->next;
5764 }
5765
5766 COMPILE_FALLBACKPATH(current->top);
5767 if (current->topfallbacks)
5768 set_jumps(current->topfallbacks, LABEL());
5769 SLJIT_ASSERT(!current->nextfallbacks);
5770 }
5771 while (*cc == OP_ALT);
5772 SLJIT_ASSERT(!jumplist);
5773
5774 if (cond != NULL)
5775 {
5776 SLJIT_ASSERT(opcode == OP_COND || opcode == OP_SCOND);
5777 assert = CURRENT_AS(bracket_fallback)->u.assert;
5778 if (assert->framesize >= 0 && (ccbegin[1 + LINK_SIZE] == OP_ASSERT_NOT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK_NOT))
5779 {
5780 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), assert->localptr);
5781 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
5782 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), assert->localptr, SLJIT_MEM1(STACK_TOP), assert->framesize * sizeof(sljit_w));
5783 }
5784 JUMPHERE(cond);
5785 }
5786
5787 /* Free the STR_PTR. */
5788 if (localptr == 0)
5789 free_stack(common, 1);
5790 }
5791
5792 if (offset != 0)
5793 {
5794 /* Using both tmp register is better for instruction scheduling. */
5795 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5796 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5797 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
5798 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP2, 0);
5799 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_MEM1(STACK_TOP), STACK(2));
5800 free_stack(common, 3);
5801 }
5802 else if (opcode == OP_SBRA || opcode == OP_SCOND)
5803 {
5804 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_MEM1(STACK_TOP), STACK(0));
5805 free_stack(common, 1);
5806 }
5807 else if (opcode == OP_ONCE)
5808 {
5809 cc = ccbegin + GET(ccbegin, 1);
5810 if (CURRENT_AS(bracket_fallback)->u.framesize >= 0)
5811 {
5812 /* Reset head and drop saved frame. */
5813 stacksize = (ket == OP_KETRMAX || ket == OP_KETRMIN || *cc == OP_ALT) ? 2 : 1;
5814 free_stack(common, CURRENT_AS(bracket_fallback)->u.framesize + stacksize);
5815 }
5816 else if (ket == OP_KETRMAX || (*cc == OP_ALT && ket != OP_KETRMIN))
5817 {
5818 /* The STR_PTR must be released. */
5819 free_stack(common, 1);
5820 }
5821
5822 JUMPHERE(once);
5823 /* Restore previous localptr */
5824 if (CURRENT_AS(bracket_fallback)->u.framesize >= 0)
5825 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_MEM1(STACK_TOP), CURRENT_AS(bracket_fallback)->u.framesize * sizeof(sljit_w));
5826 else if (ket == OP_KETRMIN)
5827 {
5828 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5829 /* See the comment below. */
5830 free_stack(common, 2);
5831 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, TMP1, 0);
5832 }
5833 }
5834
5835 if (ket == OP_KETRMAX)
5836 {
5837 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5838 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(bracket_fallback)->recursivehotpath);
5839 if (bra == OP_BRAZERO)
5840 {
5841 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5842 JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_fallback)->zerohotpath);
5843 JUMPHERE(brazero);
5844 }
5845 free_stack(common, 1);
5846 }
5847 else if (ket == OP_KETRMIN)
5848 {
5849 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5850
5851 /* OP_ONCE removes everything in case of a fallback, so we don't
5852 need to explicitly release the STR_PTR. The extra release would
5853 affect badly the free_stack(2) above. */
5854 if (opcode != OP_ONCE)
5855 free_stack(common, 1);
5856 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, rminlabel);
5857 if (opcode == OP_ONCE)
5858 free_stack(common, bra == OP_BRAMINZERO ? 2 : 1);
5859 else if (bra == OP_BRAMINZERO)
5860 free_stack(common, 1);
5861 }
5862 else if (bra == OP_BRAZERO)
5863 {
5864 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5865 JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_fallback)->zerohotpath);
5866 JUMPHERE(brazero);
5867 }
5868 }
5869
5870 static void compile_bracketpos_fallbackpath(compiler_common *common, struct fallback_common *current)
5871 {
5872 DEFINE_COMPILER;
5873 int offset;
5874 struct sljit_jump *jump;
5875
5876 if (CURRENT_AS(bracketpos_fallback)->framesize < 0)
5877 {
5878 if (*current->cc == OP_CBRAPOS || *current->cc == OP_SCBRAPOS)
5879 {
5880 offset = (GET2(current->cc, 1 + LINK_SIZE)) << 1;
5881 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5882 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5883 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
5884 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP2, 0);
5885 }
5886 set_jumps(current->topfallbacks, LABEL());
5887 free_stack(common, CURRENT_AS(bracketpos_fallback)->stacksize);
5888 return;
5889 }
5890
5891 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), CURRENT_AS(bracketpos_fallback)->localptr);
5892 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
5893
5894 if (current->topfallbacks)
5895 {
5896 jump = JUMP(SLJIT_JUMP);
5897 set_jumps(current->topfallbacks, LABEL());
5898 /* Drop the stack frame. */
5899 free_stack(common, CURRENT_AS(bracketpos_fallback)->stacksize);
5900 JUMPHERE(jump);
5901 }
5902 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), CURRENT_AS(bracketpos_fallback)->localptr, SLJIT_MEM1(STACK_TOP), CURRENT_AS(bracketpos_fallback)->framesize * sizeof(sljit_w));
5903 }
5904
5905 static void compile_braminzero_fallbackpath(compiler_common *common, struct fallback_common *current)
5906 {
5907 assert_fallback fallback;
5908
5909 current->top = NULL;
5910 current->topfallbacks = NULL;
5911 current->nextfallbacks = NULL;
5912 if (current->cc[1] > OP_ASSERTBACK_NOT)
5913 {
5914 /* Manual call of compile_bracket_hotpath and compile_bracket_fallbackpath. */
5915 compile_bracket_hotpath(common, current->cc, current);
5916 compile_bracket_fallbackpath(common, current->top);
5917 }
5918 else
5919 {
5920 memset(&fallback, 0, sizeof(fallback));
5921 fallback.common.cc = current->cc;
5922 fallback.hotpath = CURRENT_AS(braminzero_fallback)->hotpath;
5923 /* Manual call of compile_assert_hotpath. */
5924 compile_assert_hotpath(common, current->cc, &fallback, FALSE);
5925 }
5926 SLJIT_ASSERT(!current->nextfallbacks && !current->topfallbacks);
5927 }
5928
5929 static void compile_fallbackpath(compiler_common *common, struct fallback_common *current)
5930 {
5931 DEFINE_COMPILER;
5932
5933 while (current)
5934 {
5935 if (current->nextfallbacks != NULL)
5936 set_jumps(current->nextfallbacks, LABEL());
5937 switch(*current->cc)
5938 {
5939 case OP_SET_SOM:
5940 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5941 free_stack(common, 1);
5942 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), TMP1, 0);
5943 break;
5944
5945 case OP_STAR:
5946 case OP_MINSTAR:
5947 case OP_PLUS:
5948 case OP_MINPLUS:
5949 case OP_QUERY:
5950 case OP_MINQUERY:
5951 case OP_UPTO:
5952 case OP_MINUPTO:
5953 case OP_EXACT:
5954 case OP_POSSTAR:
5955 case OP_POSPLUS:
5956 case OP_POSQUERY:
5957 case OP_POSUPTO:
5958 case OP_STARI:
5959 case OP_MINSTARI:
5960 case OP_PLUSI:
5961 case OP_MINPLUSI:
5962 case OP_QUERYI:
5963 case OP_MINQUERYI:
5964 case OP_UPTOI:
5965 case OP_MINUPTOI:
5966 case OP_EXACTI:
5967 case OP_POSSTARI:
5968 case OP_POSPLUSI:
5969 case OP_POSQUERYI:
5970 case OP_POSUPTOI:
5971 case OP_NOTSTAR:
5972 case OP_NOTMINSTAR:
5973 case OP_NOTPLUS:
5974 case OP_NOTMINPLUS:
5975 case OP_NOTQUERY:
5976 case OP_NOTMINQUERY:
5977 case OP_NOTUPTO:
5978 case OP_NOTMINUPTO:
5979 case OP_NOTEXACT:
5980 case OP_NOTPOSSTAR:
5981 case OP_NOTPOSPLUS:
5982 case OP_NOTPOSQUERY:
5983 case OP_NOTPOSUPTO:
5984 case OP_NOTSTARI:
5985 case OP_NOTMINSTARI:
5986 case OP_NOTPLUSI:
5987 case OP_NOTMINPLUSI:
5988 case OP_NOTQUERYI:
5989 case OP_NOTMINQUERYI:
5990 case OP_NOTUPTOI:
5991 case OP_NOTMINUPTOI:
5992 case OP_NOTEXACTI:
5993 case OP_NOTPOSSTARI:
5994 case OP_NOTPOSPLUSI:
5995 case OP_NOTPOSQUERYI:
5996 case OP_NOTPOSUPTOI:
5997 case OP_TYPESTAR:
5998 case OP_TYPEMINSTAR:
5999 case OP_TYPEPLUS:
6000 case OP_TYPEMINPLUS:
6001 case OP_TYPEQUERY:
6002 case OP_TYPEMINQUERY:
6003 case OP_TYPEUPTO:
6004 case OP_TYPEMINUPTO:
6005 case OP_TYPEEXACT:
6006 case OP_TYPEPOSSTAR:
6007 case OP_TYPEPOSPLUS:
6008 case OP_TYPEPOSQUERY:
6009 case OP_TYPEPOSUPTO:
6010 case OP_CLASS:
6011 case OP_NCLASS:
6012 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
6013 case OP_XCLASS:
6014 #endif
6015 compile_iterator_fallbackpath(common, current);
6016 break;
6017
6018 case OP_REF:
6019 case OP_REFI:
6020 compile_ref_iterator_fallbackpath(common, current);
6021 break;
6022
6023 case OP_RECURSE:
6024 compile_recurse_fallbackpath(common, current);
6025 break;
6026
6027 case OP_ASSERT:
6028 case OP_ASSERT_NOT:
6029 case OP_ASSERTBACK:
6030 case OP_ASSERTBACK_NOT:
6031 compile_assert_fallbackpath(common, current);
6032 break;
6033
6034 case OP_ONCE:
6035 case OP_ONCE_NC:
6036 case OP_BRA:
6037 case OP_CBRA:
6038 case OP_COND:
6039 case OP_SBRA:
6040 case OP_SCBRA:
6041 case OP_SCOND:
6042 compile_bracket_fallbackpath(common, current);
6043 break;
6044
6045 case OP_BRAZERO:
6046 if (current->cc[1] > OP_ASSERTBACK_NOT)
6047 compile_bracket_fallbackpath(common, current);
6048 else
6049 compile_assert_fallbackpath(common, current);
6050 break;
6051
6052 case OP_BRAPOS:
6053 case OP_CBRAPOS:
6054 case OP_SBRAPOS:
6055 case OP_SCBRAPOS:
6056 case OP_BRAPOSZERO:
6057 compile_bracketpos_fallbackpath(common, current);
6058 break;
6059
6060 case OP_BRAMINZERO:
6061 compile_braminzero_fallbackpath(common, current);
6062 break;
6063
6064 case OP_FAIL:
6065 case OP_ACCEPT:
6066 case OP_ASSERT_ACCEPT:
6067 set_jumps(current->topfallbacks, LABEL());
6068 break;
6069
6070 default:
6071 SLJIT_ASSERT_STOP();
6072 break;
6073 }
6074 current = current->prev;
6075 }
6076 }
6077
6078 static SLJIT_INLINE void compile_recurse(compiler_common *common)
6079 {
6080 DEFINE_COMPILER;
6081 pcre_uchar *cc = common->start + common->currententry->start;
6082 pcre_uchar *ccbegin = cc + 1 + LINK_SIZE + (*cc == OP_BRA ? 0 : IMM2_SIZE);
6083 pcre_uchar *ccend = bracketend(cc);
6084 int localsize = get_localsize(common, ccbegin, ccend);
6085 int framesize = get_framesize(common, cc, TRUE);
6086 int alternativesize;
6087 BOOL needsframe;
6088 fallback_common altfallback;
6089 struct sljit_jump *jump;
6090
6091 SLJIT_ASSERT(*cc == OP_BRA || *cc == OP_CBRA || *cc == OP_CBRAPOS || *cc == OP_SCBRA || *cc == OP_SCBRAPOS);
6092 needsframe = framesize >= 0;
6093 if (!needsframe)
6094 framesize = 0;
6095 alternativesize = *(cc + GET(cc, 1)) == OP_ALT ? 1 : 0;
6096
6097 SLJIT_ASSERT(common->currententry->entry == NULL);
6098 common->currententry->entry = LABEL();
6099 set_jumps(common->currententry->calls, common->currententry->entry);
6100
6101 sljit_emit_fast_enter(compiler, TMP2, 0, 1, 5, 5, common->localsize);
6102 allocate_stack(common, localsize + framesize + alternativesize);
6103 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(localsize + framesize + alternativesize - 1), TMP2, 0);
6104 copy_locals(common, ccbegin, ccend, TRUE, localsize + framesize + alternativesize, framesize + alternativesize);
6105 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), RECURSIVE_HEAD, STACK_TOP, 0);
6106 if (needsframe)
6107 init_frame(common, cc, framesize + alternativesize - 1, alternativesize, FALSE);
6108
6109 if (alternativesize > 0)
6110 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6111
6112 memset(&altfallback, 0, sizeof(fallback_common));
6113 common->acceptlabel = NULL;
6114 common->accept = NULL;
6115 altfallback.cc = ccbegin;
6116 cc += GET(cc, 1);
6117 while (1)
6118 {
6119 altfallback.top = NULL;
6120 altfallback.topfallbacks = NULL;
6121
6122 if (altfallback.cc != ccbegin)
6123 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6124
6125 compile_hotpath(common, altfallback.cc, cc, &altfallback);
6126 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6127 return;
6128
6129 add_jump(compiler, &common->accept, JUMP(SLJIT_JUMP));
6130
6131 compile_fallbackpath(common, altfallback.top);
6132 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6133 return;
6134 set_jumps(altfallback.topfallbacks, LABEL());
6135
6136 if (*cc != OP_ALT)
6137 break;
6138
6139 altfallback.cc = cc + 1 + LINK_SIZE;
6140 cc += GET(cc, 1);
6141 }
6142 /* None of them matched. */
6143 OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 0);
6144 jump = JUMP(SLJIT_JUMP);
6145
6146 set_jumps(common->accept, LABEL());
6147 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), RECURSIVE_HEAD);
6148 if (needsframe)
6149 {
6150 OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
6151 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + alternativesize) * sizeof(sljit_w));
6152 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6153 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + alternativesize) * sizeof(sljit_w));
6154 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), TMP3, 0);
6155 }
6156 OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 1);
6157
6158 JUMPHERE(jump);
6159 copy_locals(common, ccbegin, ccend, FALSE, localsize + framesize + alternativesize, framesize + alternativesize);
6160 free_stack(common, localsize + framesize + alternativesize);
6161 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), sizeof(sljit_w));
6162 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
6163 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), RECURSIVE_HEAD, TMP2, 0);
6164 sljit_emit_fast_return(compiler, SLJIT_MEM1(STACK_TOP), 0);
6165 }
6166
6167 #undef COMPILE_FALLBACKPATH
6168 #undef CURRENT_AS
6169
6170 void
6171 PRIV(jit_compile)(const real_pcre *re, pcre_extra *extra)
6172 {
6173 struct sljit_compiler *compiler;
6174 fallback_common rootfallback;
6175 compiler_common common_data;
6176 compiler_common *common = &common_data;
6177 const pcre_uint8 *tables = re->tables;
6178 pcre_study_data *study;
6179 pcre_uchar *ccend;
6180 executable_function *function;
6181 void *executable_func;
6182 struct sljit_label *leave;
6183 struct sljit_label *mainloop = NULL;
6184 struct sljit_label *empty_match_found;
6185 struct sljit_label *empty_match_fallback;
6186 struct sljit_jump *alloc_error;
6187 struct sljit_jump *reqbyte_notfound = NULL;
6188 struct sljit_jump *empty_match;
6189
6190 SLJIT_ASSERT((extra->flags & PCRE_EXTRA_STUDY_DATA) != 0);
6191 study = extra->study_data;
6192
6193 if (!tables)
6194 tables = PRIV(default_tables);
6195
6196 memset(&rootfallback, 0, sizeof(fallback_common));
6197 rootfallback.cc = (pcre_uchar *)re + re->name_table_offset + re->name_count * re->name_entry_size;
6198
6199 common->compiler = NULL;
6200 common->start = rootfallback.cc;
6201 common->cbraptr = OVECTOR_START + (re->top_bracket + 1) * 2 * sizeof(sljit_w);
6202 common->fcc = tables + fcc_offset;
6203 common->lcc = (sljit_w)(tables + lcc_offset);
6204 common->nltype = NLTYPE_FIXED;
6205 switch(re->options & PCRE_NEWLINE_BITS)
6206 {
6207 case 0:
6208 /* Compile-time default */
6209 switch (NEWLINE)
6210 {
6211 case -1: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANY; break;
6212 case -2: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANYCRLF; break;
6213 default: common->newline = NEWLINE; break;
6214 }
6215 break;
6216 case PCRE_NEWLINE_CR: common->newline = CHAR_CR; break;
6217 case PCRE_NEWLINE_LF: common->newline = CHAR_NL; break;
6218 case PCRE_NEWLINE_CR+
6219 PCRE_NEWLINE_LF: common->newline = (CHAR_CR << 8) | CHAR_NL; break;
6220 case PCRE_NEWLINE_ANY: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANY; break;
6221 case PCRE_NEWLINE_ANYCRLF: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANYCRLF; break;
6222 default: return;
6223 }
6224 if ((re->options & PCRE_BSR_ANYCRLF) != 0)
6225 common->bsr_nltype = NLTYPE_ANYCRLF;
6226 else if ((re->options & PCRE_BSR_UNICODE) != 0)
6227 common->bsr_nltype = NLTYPE_ANY;
6228 else
6229 {
6230 #ifdef BSR_ANYCRLF
6231 common->bsr_nltype = NLTYPE_ANYCRLF;
6232 #else
6233 common->bsr_nltype = NLTYPE_ANY;
6234 #endif
6235 }
6236 common->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
6237 common->ctypes = (sljit_w)(tables + ctypes_offset);
6238 common->name_table = (sljit_w)re + re->name_table_offset;
6239 common->name_count = re->name_count;
6240 common->name_entry_size = re->name_entry_size;
6241 common->acceptlabel = NULL;
6242 common->stubs = NULL;
6243 common->entries = NULL;
6244 common->currententry = NULL;
6245 common->accept = NULL;
6246 common->calllimit = NULL;
6247 common->stackalloc = NULL;
6248 common->revertframes = NULL;
6249 common->wordboundary = NULL;
6250 common->anynewline = NULL;
6251 common->hspace = NULL;
6252 common->vspace = NULL;
6253 common->casefulcmp = NULL;
6254 common->caselesscmp = NULL;
6255 common->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;
6256 #ifdef SUPPORT_UTF8
6257 common->utf8 = (re->options & PCRE_UTF8) != 0;
6258 #ifdef SUPPORT_UCP
6259 common->useucp = (re->options & PCRE_UCP) != 0;
6260 #endif
6261 common->utf8readchar = NULL;
6262 common->utf8readtype8 = NULL;
6263 #endif
6264 #ifdef SUPPORT_UCP
6265 common->getucd = NULL;
6266 #endif
6267 ccend = bracketend(rootfallback.cc);
6268 SLJIT_ASSERT(*rootfallback.cc == OP_BRA && ccend[-(1 + LINK_SIZE)] == OP_KET);
6269 common->localsize = get_localspace(common, rootfallback.cc, ccend);
6270 if (common->localsize < 0)
6271 return;
6272 common->localsize += common->cbraptr + (re->top_bracket + 1) * sizeof(sljit_w);
6273 if (common->localsize > SLJIT_MAX_LOCAL_SIZE)
6274 return;
6275 common->localptrs = (int*)SLJIT_MALLOC((ccend - rootfallback.cc) * sizeof(int));
6276 if (!common->localptrs)
6277 return;
6278 memset(common->localptrs, 0, (ccend - rootfallback.cc) * sizeof(int));
6279 set_localptrs(common, common->cbraptr + (re->top_bracket + 1) * sizeof(sljit_w), ccend);
6280
6281 compiler = sljit_create_compiler();
6282 if (!compiler)
6283 {
6284 SLJIT_FREE(common->localptrs);
6285 return;
6286 }
6287 common->compiler = compiler;
6288
6289 /* Main pcre_jit_exec entry. */
6290 sljit_emit_enter(compiler, 1, 5, 5, common->localsize);
6291
6292 /* Register init. */
6293 reset_ovector(common, (re->top_bracket + 1) * 2);
6294 if ((re->flags & PCRE_REQCHSET) != 0)
6295 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), REQ_CHAR_PTR, SLJIT_TEMPORARY_REG1, 0);
6296
6297 OP1(SLJIT_MOV, ARGUMENTS, 0, SLJIT_GENERAL_REG1, 0);
6298 OP1(SLJIT_MOV, TMP1, 0, SLJIT_GENERAL_REG1, 0);
6299 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
6300 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, end));
6301 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, stack));
6302 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, calllimit));
6303 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, base));
6304 OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, limit));
6305 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), CALL_LIMIT, TMP1, 0);
6306
6307 /* Main part of the matching */
6308 if ((re->options & PCRE_ANCHORED) == 0)
6309 {
6310 mainloop = mainloop_entry(common, (re->flags & PCRE_HASCRORLF) != 0, (re->options & PCRE_FIRSTLINE) != 0);
6311 /* Forward search if possible. */
6312 if ((re->flags & PCRE_FIRSTSET) != 0)
6313 fast_forward_first_char(common, re->first_char, (re->flags & PCRE_FCH_CASELESS) != 0, (re->options & PCRE_FIRSTLINE) != 0);
6314 else if ((re->flags & PCRE_STARTLINE) != 0)
6315 fast_forward_newline(common, (re->options & PCRE_FIRSTLINE) != 0);
6316 else if ((re->flags & PCRE_STARTLINE) == 0 && study != NULL && (study->flags & PCRE_STUDY_MAPPED) != 0)
6317 fast_forward_start_bits(common, (sljit_uw)study->start_bits, (re->options & PCRE_FIRSTLINE) != 0);
6318 }
6319 if ((re->flags & PCRE_REQCHSET) != 0)
6320 reqbyte_notfound = search_requested_char(common, re->req_char, (re->flags & PCRE_RCH_CASELESS) != 0, (re->flags & PCRE_FIRSTSET) != 0);
6321
6322 /* Store the current STR_PTR in OVECTOR(0). */
6323 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), STR_PTR, 0);
6324 /* Copy the limit of allowed recursions. */
6325 OP1(SLJIT_MOV, CALL_COUNT, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), CALL_LIMIT);
6326
6327 compile_hotpath(common, rootfallback.cc, ccend, &rootfallback);
6328 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6329 {
6330 sljit_free_compiler(compiler);
6331 SLJIT_FREE(common->localptrs);
6332 return;
6333 }
6334
6335 empty_match = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
6336 empty_match_found = LABEL();
6337
6338 common->acceptlabel = LABEL();
6339 if (common->accept != NULL)
6340 set_jumps(common->accept, common->acceptlabel);
6341
6342 /* This means we have a match. Update the ovector. */
6343 copy_ovector(common, re->top_bracket + 1);
6344 leave = LABEL();
6345 sljit_emit_return(compiler, SLJIT_UNUSED, 0);
6346
6347 empty_match_fallback = LABEL();
6348 compile_fallbackpath(common, rootfallback.top);
6349 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6350 {
6351 sljit_free_compiler(compiler);
6352 SLJIT_FREE(common->localptrs);
6353 return;
6354 }
6355
6356 SLJIT_ASSERT(rootfallback.prev == NULL);
6357
6358 /* Check we have remaining characters. */
6359 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
6360
6361 if ((re->options & PCRE_ANCHORED) == 0)
6362 {
6363 if ((re->options & PCRE_FIRSTLINE) == 0)
6364 {
6365 if (study != NULL && study->minlength > 1)
6366 {
6367 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(study->minlength));
6368 CMPTO(SLJIT_C_LESS_EQUAL, TMP1, 0, STR_END, 0, mainloop);
6369 }
6370 else
6371 CMPTO(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0, mainloop);
6372 }
6373 else
6374 {
6375 if (study != NULL && study->minlength > 1)
6376 {
6377 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(study->minlength));
6378 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, STR_END, 0);
6379 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_GREATER);
6380 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), FIRSTLINE_END);
6381 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_GREATER_EQUAL);
6382 JUMPTO(SLJIT_C_ZERO, mainloop);
6383 }
6384 else
6385 CMPTO(SLJIT_C_LESS, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), FIRSTLINE_END, mainloop);
6386 }
6387 }
6388
6389 if (reqbyte_notfound != NULL)
6390 JUMPHERE(reqbyte_notfound);
6391 /* Copy OVECTOR(1) to OVECTOR(0) */
6392 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
6393 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_NOMATCH);
6394 JUMPTO(SLJIT_JUMP, leave);
6395
6396 flush_stubs(common);
6397
6398 JUMPHERE(empty_match);
6399 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6400 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty));
6401 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0, empty_match_fallback);
6402 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty_atstart));
6403 CMPTO(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0, empty_match_found);
6404 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
6405 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0, empty_match_found);
6406 JUMPTO(SLJIT_JUMP, empty_match_fallback);
6407
6408 common->currententry = common->entries;
6409 while (common->currententry != NULL)
6410 {
6411 /* Might add new entries. */
6412 compile_recurse(common);
6413 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6414 {
6415 sljit_free_compiler(compiler);
6416 SLJIT_FREE(common->localptrs);
6417 return;
6418 }
6419 flush_stubs(common);
6420 common->currententry = common->currententry->next;
6421 }
6422
6423 /* Allocating stack, returns with PCRE_ERROR_JIT_STACKLIMIT if fails. */
6424 /* This is a (really) rare case. */
6425 set_jumps(common->stackalloc, LABEL());
6426 /* RETURN_ADDR is not a saved register. */
6427 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, 1, 5, 5, common->localsize);
6428 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP2, 0);
6429 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6430 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, stack));
6431 OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(struct sljit_stack, top), STACK_TOP, 0);
6432 OP2(SLJIT_ADD, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(struct sljit_stack, limit), SLJIT_IMM, STACK_GROWTH_RATE);
6433
6434 sljit_emit_ijump(compiler, SLJIT_CALL2, SLJIT_IMM, SLJIT_FUNC_OFFSET(sljit_stack_resize));
6435 alloc_error = CMP(SLJIT_C_NOT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
6436 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6437 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, stack));
6438 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(struct sljit_stack, top));
6439 OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(struct sljit_stack, limit));
6440 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
6441 sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
6442
6443 /* Allocation failed. */
6444 JUMPHERE(alloc_error);
6445 /* We break the return address cache here, but this is a really rare case. */
6446 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_JIT_STACKLIMIT);
6447 JUMPTO(SLJIT_JUMP, leave);
6448
6449 /* Call limit reached. */
6450 set_jumps(common->calllimit, LABEL());
6451 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_MATCHLIMIT);
6452 JUMPTO(SLJIT_JUMP, leave);
6453
6454 if (common->revertframes != NULL)
6455 {
6456 set_jumps(common->revertframes, LABEL());
6457 do_revertframes(common);
6458 }
6459 if (common->wordboundary != NULL)
6460 {
6461 set_jumps(common->wordboundary, LABEL());
6462 check_wordboundary(common);
6463 }
6464 if (common->anynewline != NULL)
6465 {
6466 set_jumps(common->anynewline, LABEL());
6467 check_anynewline(common);
6468 }
6469 if (common->hspace != NULL)
6470 {
6471 set_jumps(common->hspace, LABEL());
6472 check_hspace(common);
6473 }
6474 if (common->vspace != NULL)
6475 {
6476 set_jumps(common->vspace, LABEL());
6477 check_vspace(common);
6478 }
6479 if (common->casefulcmp != NULL)
6480 {
6481 set_jumps(common->casefulcmp, LABEL());
6482 do_casefulcmp(common);
6483 }
6484 if (common->caselesscmp != NULL)
6485 {
6486 set_jumps(common->caselesscmp, LABEL());
6487 do_caselesscmp(common);
6488 }
6489 #ifdef SUPPORT_UTF8
6490 if (common->utf8readchar != NULL)
6491 {
6492 set_jumps(common->utf8readchar, LABEL());
6493 do_utf8readchar(common);
6494 }
6495 if (common->utf8readtype8 != NULL)
6496 {
6497 set_jumps(common->utf8readtype8, LABEL());
6498 do_utf8readtype8(common);
6499 }
6500 #endif
6501 #ifdef SUPPORT_UCP
6502 if (common->getucd != NULL)
6503 {
6504 set_jumps(common->getucd, LABEL());
6505 do_getucd(common);
6506 }
6507 #endif
6508
6509 SLJIT_FREE(common->localptrs);
6510 executable_func = sljit_generate_code(compiler);
6511 sljit_free_compiler(compiler);
6512 if (executable_func == NULL)
6513 return;
6514
6515 function = SLJIT_MALLOC(sizeof(executable_function));
6516 if (function == NULL)
6517 {
6518 /* This case is highly unlikely since we just recently
6519 freed a lot of memory. Although not impossible. */
6520 sljit_free_code(executable_func);
6521 return;
6522 }
6523
6524 function->executable_func = executable_func;
6525 function->callback = NULL;
6526 function->userdata = NULL;
6527 extra->executable_jit = function;
6528 extra->flags |= PCRE_EXTRA_EXECUTABLE_JIT;
6529 }
6530
6531 static int jit_machine_stack_exec(jit_arguments *arguments, executable_function *function)
6532 {
6533 union {
6534 void* executable_func;
6535 jit_function call_executable_func;
6536 } convert_executable_func;
6537 pcre_uint8 local_area[LOCAL_SPACE_SIZE];
6538 struct sljit_stack local_stack;
6539
6540 local_stack.top = (sljit_w)&local_area;
6541 local_stack.base = local_stack.top;
6542 local_stack.limit = local_stack.base + LOCAL_SPACE_SIZE;
6543 local_stack.max_limit = local_stack.limit;
6544 arguments->stack = &local_stack;
6545 convert_executable_func.executable_func = function->executable_func;
6546 return convert_executable_func.call_executable_func(arguments);
6547 }
6548
6549 int
6550 PRIV(jit_exec)(const real_pcre *re, void *executable_func,
6551 const pcre_uchar *subject, int length, int start_offset, int options,
6552 int match_limit, int *offsets, int offsetcount)
6553 {
6554 executable_function *function = (executable_function*)executable_func;
6555 union {
6556 void* executable_func;
6557 jit_function call_executable_func;
6558 } convert_executable_func;
6559 jit_arguments arguments;
6560 int maxoffsetcount;
6561 int retval;
6562
6563 /* Sanity checks should be handled by pcre_exec. */
6564 arguments.stack = NULL;
6565 arguments.str = subject + start_offset;
6566 arguments.begin = subject;
6567 arguments.end = subject + length;
6568 arguments.calllimit = match_limit; /* JIT decreases this value less times. */
6569 arguments.notbol = (options & PCRE_NOTBOL) != 0;
6570 arguments.noteol = (options & PCRE_NOTEOL) != 0;
6571 arguments.notempty = (options & PCRE_NOTEMPTY) != 0;
6572 arguments.notempty_atstart = (options & PCRE_NOTEMPTY_ATSTART) != 0;
6573 arguments.offsets = offsets;
6574
6575 /* pcre_exec() rounds offsetcount to a multiple of 3, and then uses only 2/3 of
6576 the output vector for storing captured strings, with the remainder used as
6577 workspace. We don't need the workspace here. For compatibility, we limit the
6578 number of captured strings in the same way as pcre_exec(), so that the user
6579 gets the same result with and without JIT. */
6580
6581 offsetcount = ((offsetcount - (offsetcount % 3)) * 2)/3;
6582 maxoffsetcount = (re->top_bracket + 1) * 2;
6583 if (offsetcount > maxoffsetcount)
6584 offsetcount = maxoffsetcount;
6585 arguments.offsetcount = offsetcount;
6586
6587 if (function->callback)
6588 arguments.stack = (struct sljit_stack*)function->callback(function->userdata);
6589 else
6590 arguments.stack = (struct sljit_stack*)function->userdata;
6591
6592 if (arguments.stack == NULL)
6593 retval = jit_machine_stack_exec(&arguments, function);
6594 else
6595 {
6596 convert_executable_func.executable_func = function->executable_func;
6597 retval = convert_executable_func.call_executable_func(&arguments);
6598 }
6599
6600 if (retval * 2 > offsetcount)
6601 retval = 0;
6602 return retval;
6603 }
6604
6605 void
6606 PRIV(jit_free)(void *executable_func)
6607 {
6608 executable_function *function = (executable_function*)executable_func;
6609 sljit_free_code(function->executable_func);
6610 SLJIT_FREE(function);
6611 }
6612
6613 #ifdef COMPILE_PCRE8
6614 PCRE_EXP_DECL pcre_jit_stack *
6615 pcre_jit_stack_alloc(int startsize, int maxsize)
6616 #else
6617 PCRE_EXP_DECL pcre_jit_stack *
6618 pcre16_jit_stack_alloc(int startsize, int maxsize)
6619 #endif
6620 {
6621 if (startsize < 1 || maxsize < 1)
6622 return NULL;
6623 if (startsize > maxsize)
6624 startsize = maxsize;
6625 startsize = (startsize + STACK_GROWTH_RATE - 1) & ~(STACK_GROWTH_RATE - 1);
6626 maxsize = (maxsize + STACK_GROWTH_RATE - 1) & ~(STACK_GROWTH_RATE - 1);
6627 return (pcre_jit_stack*)sljit_allocate_stack(startsize, maxsize);
6628 }
6629
6630 #ifdef COMPILE_PCRE8
6631 PCRE_EXP_DECL void
6632 pcre_jit_stack_free(pcre_jit_stack *stack)
6633 #else
6634 PCRE_EXP_DECL void
6635 pcre16_jit_stack_free(pcre_jit_stack *stack)
6636 #endif
6637 {
6638 sljit_free_stack((struct sljit_stack*)stack);
6639 }
6640
6641 #ifdef COMPILE_PCRE8
6642 PCRE_EXP_DECL void
6643 pcre_assign_jit_stack(pcre_extra *extra, pcre_jit_callback callback, void *userdata)
6644 #else
6645 PCRE_EXP_DECL void
6646 pcre16_assign_jit_stack(pcre_extra *extra, pcre_jit_callback callback, void *userdata)
6647 #endif
6648 {
6649 executable_function *function;
6650 if (extra != NULL &&
6651 (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0 &&
6652 extra->executable_jit != NULL)
6653 {
6654 function = (executable_function*)extra->executable_jit;
6655 function->callback = callback;
6656 function->userdata = userdata;
6657 }
6658 }
6659
6660 #else /* SUPPORT_JIT */
6661
6662 /* These are dummy functions to avoid linking errors when JIT support is not
6663 being compiled. */
6664
6665 #ifdef COMPILE_PCRE8
6666 PCRE_EXP_DECL pcre_jit_stack *
6667 pcre_jit_stack_alloc(int startsize, int maxsize)
6668 #else
6669 PCRE_EXP_DECL pcre_jit_stack *
6670 pcre16_jit_stack_alloc(int startsize, int maxsize)
6671 #endif
6672 {
6673 (void)startsize;
6674 (void)maxsize;
6675 return NULL;
6676 }
6677
6678 #ifdef COMPILE_PCRE8
6679 PCRE_EXP_DECL void
6680 pcre_jit_stack_free(pcre_jit_stack *stack)
6681 #else
6682 PCRE_EXP_DECL void
6683 pcre16_jit_stack_free(pcre_jit_stack *stack)
6684 #endif
6685 {
6686 (void)stack;
6687 }
6688
6689 #ifdef COMPILE_PCRE8
6690 PCRE_EXP_DECL void
6691 pcre_assign_jit_stack(pcre_extra *extra, pcre_jit_callback callback, void *userdata)
6692 #else
6693 PCRE_EXP_DECL void
6694 pcre16_assign_jit_stack(pcre_extra *extra, pcre_jit_callback callback, void *userdata)
6695 #endif
6696 {
6697 (void)extra;
6698 (void)callback;
6699 (void)userdata;
6700 }
6701
6702 #endif
6703
6704 /* End of pcre_jit_compile.c */

  ViewVC Help
Powered by ViewVC 1.1.5