/[pcre]/code/branches/pcre16/pcre_jit_compile.c
ViewVC logotype

Contents of /code/branches/pcre16/pcre_jit_compile.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 767 - (show annotations)
Sat Nov 26 12:48:56 2011 UTC (8 years ago) by zherczeg
File MIME type: text/plain
File size: 204742 byte(s)
Make simple patterns work in PCRE16
1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Written by Philip Hazel
9 Copyright (c) 1997-2008 University of Cambridge
10
11 The machine code generator part (this module) was written by Zoltan Herczeg
12 Copyright (c) 2010-2011
13
14 -----------------------------------------------------------------------------
15 Redistribution and use in source and binary forms, with or without
16 modification, are permitted provided that the following conditions are met:
17
18 * Redistributions of source code must retain the above copyright notice,
19 this list of conditions and the following disclaimer.
20
21 * Redistributions in binary form must reproduce the above copyright
22 notice, this list of conditions and the following disclaimer in the
23 documentation and/or other materials provided with the distribution.
24
25 * Neither the name of the University of Cambridge nor the names of its
26 contributors may be used to endorse or promote products derived from
27 this software without specific prior written permission.
28
29 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 POSSIBILITY OF SUCH DAMAGE.
40 -----------------------------------------------------------------------------
41 */
42
43 #ifdef HAVE_CONFIG_H
44 #include "config.h"
45 #endif
46
47 #include "pcre_internal.h"
48
49 #ifdef SUPPORT_JIT
50
51 /* All-in-one: Since we use the JIT compiler only from here,
52 we just include it. This way we don't need to touch the build
53 system files. */
54
55 #define SLJIT_MALLOC(size) (pcre_malloc)(size)
56 #define SLJIT_FREE(ptr) (pcre_free)(ptr)
57 #define SLJIT_CONFIG_AUTO 1
58 #define SLJIT_CONFIG_STATIC 1
59 #define SLJIT_VERBOSE 0
60 #define SLJIT_DEBUG 0
61
62 #include "sljit/sljitLir.c"
63
64 #if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED
65 #error Unsupported architecture
66 #endif
67
68 /* Allocate memory on the stack. Fast, but limited size. */
69 #define LOCAL_SPACE_SIZE 32768
70
71 #define STACK_GROWTH_RATE 8192
72
73 /* Enable to check that the allocation could destroy temporaries. */
74 #if defined SLJIT_DEBUG && SLJIT_DEBUG
75 #define DESTROY_REGISTERS 1
76 #endif
77
78 /*
79 Short summary about the backtracking mechanism empolyed by the jit code generator:
80
81 The code generator follows the recursive nature of the PERL compatible regular
82 expressions. The basic blocks of regular expressions are condition checkers
83 whose execute different commands depending on the result of the condition check.
84 The relationship between the operators can be horizontal (concatenation) and
85 vertical (sub-expression) (See struct fallback_common for more details).
86
87 'ab' - 'a' and 'b' regexps are concatenated
88 'a+' - 'a' is the sub-expression of the '+' operator
89
90 The condition checkers are boolean (true/false) checkers. Machine code is generated
91 for the checker itself and for the actions depending on the result of the checker.
92 The 'true' case is called as the hot path (expected path), and the other is called as
93 the 'fallback' path. Branch instructions are expesive for all CPUs, so we avoid taken
94 branches on the hot path.
95
96 Greedy star operator (*) :
97 Hot path: match happens.
98 Fallback path: match failed.
99 Non-greedy star operator (*?) :
100 Hot path: no need to perform a match.
101 Fallback path: match is required.
102
103 The following example shows how the code generated for a capturing bracket
104 with two alternatives. Let A, B, C, D are arbirary regular expressions, and
105 we have the following regular expression:
106
107 A(B|C)D
108
109 The generated code will be the following:
110
111 A hot path
112 '(' hot path (pushing arguments to the stack)
113 B hot path
114 ')' hot path (pushing arguments to the stack)
115 D hot path
116 return with successful match
117
118 D fallback path
119 ')' fallback path (If we arrived from "C" jump to the fallback of "C")
120 B fallback path
121 C expected path
122 jump to D hot path
123 C fallback path
124 A fallback path
125
126 Notice, that the order of fallback code paths are the opposite of the fast
127 code paths. In this way the topmost value on the stack is always belong
128 to the current fallback code path. The fallback code path must check
129 whether there is a next alternative. If so, it needs to jump back to
130 the hot path eventually. Otherwise it needs to clear out its own stack
131 frame and continue the execution on the fallback code paths.
132 */
133
134 /*
135 Saved stack frames:
136
137 Atomic blocks and asserts require reloading the values of local variables
138 when the fallback mechanism performed. Because of OP_RECURSE, the locals
139 are not necessarly known in compile time, thus we need a dynamic restore
140 mechanism.
141
142 The stack frames are stored in a chain list, and have the following format:
143 ([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]
144
145 Thus we can restore the locals to a particular point in the stack.
146 */
147
148 typedef struct jit_arguments {
149 /* Pointers first. */
150 struct sljit_stack *stack;
151 const pcre_uchar *str;
152 const pcre_uchar *begin;
153 const pcre_uchar *end;
154 int *offsets;
155 pcre_uchar *ptr;
156 /* Everything else after. */
157 int offsetcount;
158 int calllimit;
159 pcre_uint8 notbol;
160 pcre_uint8 noteol;
161 pcre_uint8 notempty;
162 pcre_uint8 notempty_atstart;
163 } jit_arguments;
164
165 typedef struct executable_function {
166 void *executable_func;
167 pcre_jit_callback callback;
168 void *userdata;
169 } executable_function;
170
171 typedef struct jump_list {
172 struct sljit_jump *jump;
173 struct jump_list *next;
174 } jump_list;
175
176 enum stub_types { stack_alloc };
177
178 typedef struct stub_list {
179 enum stub_types type;
180 int data;
181 struct sljit_jump *start;
182 struct sljit_label *leave;
183 struct stub_list *next;
184 } stub_list;
185
186 typedef int (SLJIT_CALL *jit_function)(jit_arguments *args);
187
188 /* The following structure is the key data type for the recursive
189 code generator. It is allocated by compile_hotpath, and contains
190 the aguments for compile_fallbackpath. Must be the first member
191 of its descendants. */
192 typedef struct fallback_common {
193 /* Concatenation stack. */
194 struct fallback_common *prev;
195 jump_list *nextfallbacks;
196 /* Internal stack (for component operators). */
197 struct fallback_common *top;
198 jump_list *topfallbacks;
199 /* Opcode pointer. */
200 pcre_uchar *cc;
201 } fallback_common;
202
203 typedef struct assert_fallback {
204 fallback_common common;
205 jump_list *condfailed;
206 /* Less than 0 (-1) if a frame is not needed. */
207 int framesize;
208 /* Points to our private memory word on the stack. */
209 int localptr;
210 /* For iterators. */
211 struct sljit_label *hotpath;
212 } assert_fallback;
213
214 typedef struct bracket_fallback {
215 fallback_common common;
216 /* Where to coninue if an alternative is successfully matched. */
217 struct sljit_label *althotpath;
218 /* For rmin and rmax iterators. */
219 struct sljit_label *recursivehotpath;
220 /* For greedy ? operator. */
221 struct sljit_label *zerohotpath;
222 /* Contains the branches of a failed condition. */
223 union {
224 /* Both for OP_COND, OP_SCOND. */
225 jump_list *condfailed;
226 assert_fallback *assert;
227 /* For OP_ONCE. -1 if not needed. */
228 int framesize;
229 } u;
230 /* Points to our private memory word on the stack. */
231 int localptr;
232 } bracket_fallback;
233
234 typedef struct bracketpos_fallback {
235 fallback_common common;
236 /* Points to our private memory word on the stack. */
237 int localptr;
238 /* Reverting stack is needed. */
239 int framesize;
240 /* Allocated stack size. */
241 int stacksize;
242 } bracketpos_fallback;
243
244 typedef struct braminzero_fallback {
245 fallback_common common;
246 struct sljit_label *hotpath;
247 } braminzero_fallback;
248
249 typedef struct iterator_fallback {
250 fallback_common common;
251 /* Next iteration. */
252 struct sljit_label *hotpath;
253 } iterator_fallback;
254
255 typedef struct recurse_entry {
256 struct recurse_entry *next;
257 /* Contains the function entry. */
258 struct sljit_label *entry;
259 /* Collects the calls until the function is not created. */
260 jump_list *calls;
261 /* Points to the starting opcode. */
262 int start;
263 } recurse_entry;
264
265 typedef struct recurse_fallback {
266 fallback_common common;
267 } recurse_fallback;
268
269 typedef struct compiler_common {
270 struct sljit_compiler *compiler;
271 pcre_uchar *start;
272 int localsize;
273 int *localptrs;
274 const pcre_uint8 *fcc;
275 sljit_w lcc;
276 int cbraptr;
277 int nltype;
278 int newline;
279 int bsr_nltype;
280 int endonly;
281 sljit_w ctypes;
282 sljit_uw name_table;
283 sljit_w name_count;
284 sljit_w name_entry_size;
285 struct sljit_label *acceptlabel;
286 stub_list *stubs;
287 recurse_entry *entries;
288 recurse_entry *currententry;
289 jump_list *accept;
290 jump_list *calllimit;
291 jump_list *stackalloc;
292 jump_list *revertframes;
293 jump_list *wordboundary;
294 jump_list *anynewline;
295 jump_list *hspace;
296 jump_list *vspace;
297 jump_list *casefulcmp;
298 jump_list *caselesscmp;
299 BOOL jscript_compat;
300 #ifdef SUPPORT_UTF8
301 BOOL utf8;
302 #ifdef SUPPORT_UCP
303 BOOL useucp;
304 #endif
305 jump_list *utf8readchar;
306 jump_list *utf8readtype8;
307 #endif
308 #ifdef SUPPORT_UCP
309 jump_list *getucd;
310 #endif
311 } compiler_common;
312
313 /* For byte_sequence_compare. */
314
315 typedef struct compare_context {
316 int length;
317 int sourcereg;
318 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
319 int ucharptr;
320 union {
321 sljit_i asint;
322 sljit_h asshort;
323 #ifdef COMPILE_PCRE8
324 sljit_ub asbyte;
325 sljit_ub asuchars[4];
326 #else
327 #ifdef COMPILE_PCRE16
328 sljit_uh asuchars[2];
329 #endif
330 #endif
331 } c;
332 union {
333 sljit_i asint;
334 sljit_h asshort;
335 #ifdef COMPILE_PCRE8
336 sljit_ub asbyte;
337 sljit_ub asuchars[4];
338 #else
339 #ifdef COMPILE_PCRE16
340 sljit_uh asuchars[2];
341 #endif
342 #endif
343 } oc;
344 #endif
345 } compare_context;
346
347 enum {
348 frame_end = 0,
349 frame_setstrbegin = -1
350 };
351
352 /* Used for accessing the elements of the stack. */
353 #define STACK(i) ((-(i) - 1) * (int)sizeof(sljit_w))
354
355 #define TMP1 SLJIT_TEMPORARY_REG1
356 #define TMP2 SLJIT_TEMPORARY_REG3
357 #define TMP3 SLJIT_TEMPORARY_EREG2
358 #define STR_PTR SLJIT_GENERAL_REG1
359 #define STR_END SLJIT_GENERAL_REG2
360 #define STACK_TOP SLJIT_TEMPORARY_REG2
361 #define STACK_LIMIT SLJIT_GENERAL_REG3
362 #define ARGUMENTS SLJIT_GENERAL_EREG1
363 #define CALL_COUNT SLJIT_GENERAL_EREG2
364 #define RETURN_ADDR SLJIT_TEMPORARY_EREG1
365
366 /* Locals layout. */
367 /* These two locals can be used by the current opcode. */
368 #define LOCALS0 (0 * sizeof(sljit_w))
369 #define LOCALS1 (1 * sizeof(sljit_w))
370 /* Two local variables for possessive quantifiers (char1 cannot use them). */
371 #define POSSESSIVE0 (2 * sizeof(sljit_w))
372 #define POSSESSIVE1 (3 * sizeof(sljit_w))
373 /* Head of the last recursion. */
374 #define RECURSIVE_HEAD (4 * sizeof(sljit_w))
375 /* Max limit of recursions. */
376 #define CALL_LIMIT (5 * sizeof(sljit_w))
377 /* Last known position of the requested byte. */
378 #define REQ_BYTE_PTR (6 * sizeof(sljit_w))
379 /* End pointer of the first line. */
380 #define FIRSTLINE_END (7 * sizeof(sljit_w))
381 /* The output vector is stored on the stack, and contains pointers
382 to characters. The vector data is divided into two groups: the first
383 group contains the start / end character pointers, and the second is
384 the start pointers when the end of the capturing group has not yet reached. */
385 #define OVECTOR_START (8 * sizeof(sljit_w))
386 #define OVECTOR(i) (OVECTOR_START + (i) * sizeof(sljit_w))
387 #define OVECTOR_PRIV(i) (common->cbraptr + (i) * sizeof(sljit_w))
388 #define PRIV_DATA(cc) (common->localptrs[(cc) - common->start])
389
390 #ifdef COMPILE_PCRE8
391 #define MOV_UCHAR SLJIT_MOV_UB
392 #else
393 #ifdef COMPILE_PCRE16
394 #define MOV_UCHAR SLJIT_MOV_UH
395 #else
396 #error Unsupported compiling mode
397 #endif
398 #endif
399
400 /* Shortcuts. */
401 #define DEFINE_COMPILER \
402 struct sljit_compiler *compiler = common->compiler
403 #define OP1(op, dst, dstw, src, srcw) \
404 sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw))
405 #define OP2(op, dst, dstw, src1, src1w, src2, src2w) \
406 sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w))
407 #define LABEL() \
408 sljit_emit_label(compiler)
409 #define JUMP(type) \
410 sljit_emit_jump(compiler, (type))
411 #define JUMPTO(type, label) \
412 sljit_set_label(sljit_emit_jump(compiler, (type)), (label))
413 #define JUMPHERE(jump) \
414 sljit_set_label((jump), sljit_emit_label(compiler))
415 #define CMP(type, src1, src1w, src2, src2w) \
416 sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))
417 #define CMPTO(type, src1, src1w, src2, src2w, label) \
418 sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))
419 #define COND_VALUE(op, dst, dstw, type) \
420 sljit_emit_cond_value(compiler, (op), (dst), (dstw), (type))
421
422 static pcre_uchar* bracketend(pcre_uchar* cc)
423 {
424 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
425 do cc += GET(cc, 1); while (*cc == OP_ALT);
426 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
427 cc += 1 + LINK_SIZE;
428 return cc;
429 }
430
431 /* Functions whose might need modification for all new supported opcodes:
432 next_opcode
433 get_localspace
434 set_localptrs
435 get_framesize
436 init_frame
437 get_localsize
438 copy_locals
439 compile_hotpath
440 compile_fallbackpath
441 */
442
443 static pcre_uchar *next_opcode(compiler_common *common, pcre_uchar *cc)
444 {
445 SLJIT_UNUSED_ARG(common);
446 switch(*cc)
447 {
448 case OP_SOD:
449 case OP_SOM:
450 case OP_SET_SOM:
451 case OP_NOT_WORD_BOUNDARY:
452 case OP_WORD_BOUNDARY:
453 case OP_NOT_DIGIT:
454 case OP_DIGIT:
455 case OP_NOT_WHITESPACE:
456 case OP_WHITESPACE:
457 case OP_NOT_WORDCHAR:
458 case OP_WORDCHAR:
459 case OP_ANY:
460 case OP_ALLANY:
461 case OP_ANYNL:
462 case OP_NOT_HSPACE:
463 case OP_HSPACE:
464 case OP_NOT_VSPACE:
465 case OP_VSPACE:
466 case OP_EXTUNI:
467 case OP_EODN:
468 case OP_EOD:
469 case OP_CIRC:
470 case OP_CIRCM:
471 case OP_DOLL:
472 case OP_DOLLM:
473 case OP_TYPESTAR:
474 case OP_TYPEMINSTAR:
475 case OP_TYPEPLUS:
476 case OP_TYPEMINPLUS:
477 case OP_TYPEQUERY:
478 case OP_TYPEMINQUERY:
479 case OP_TYPEPOSSTAR:
480 case OP_TYPEPOSPLUS:
481 case OP_TYPEPOSQUERY:
482 case OP_CRSTAR:
483 case OP_CRMINSTAR:
484 case OP_CRPLUS:
485 case OP_CRMINPLUS:
486 case OP_CRQUERY:
487 case OP_CRMINQUERY:
488 case OP_DEF:
489 case OP_BRAZERO:
490 case OP_BRAMINZERO:
491 case OP_BRAPOSZERO:
492 case OP_FAIL:
493 case OP_ACCEPT:
494 case OP_ASSERT_ACCEPT:
495 case OP_SKIPZERO:
496 return cc + 1;
497
498 case OP_ANYBYTE:
499 #ifdef SUPPORT_UTF8
500 if (common->utf8) return NULL;
501 #endif
502 return cc + 1;
503
504 case OP_CHAR:
505 case OP_CHARI:
506 case OP_NOT:
507 case OP_NOTI:
508
509 case OP_STAR:
510 case OP_MINSTAR:
511 case OP_PLUS:
512 case OP_MINPLUS:
513 case OP_QUERY:
514 case OP_MINQUERY:
515 case OP_POSSTAR:
516 case OP_POSPLUS:
517 case OP_POSQUERY:
518 case OP_STARI:
519 case OP_MINSTARI:
520 case OP_PLUSI:
521 case OP_MINPLUSI:
522 case OP_QUERYI:
523 case OP_MINQUERYI:
524 case OP_POSSTARI:
525 case OP_POSPLUSI:
526 case OP_POSQUERYI:
527 case OP_NOTSTAR:
528 case OP_NOTMINSTAR:
529 case OP_NOTPLUS:
530 case OP_NOTMINPLUS:
531 case OP_NOTQUERY:
532 case OP_NOTMINQUERY:
533 case OP_NOTPOSSTAR:
534 case OP_NOTPOSPLUS:
535 case OP_NOTPOSQUERY:
536 case OP_NOTSTARI:
537 case OP_NOTMINSTARI:
538 case OP_NOTPLUSI:
539 case OP_NOTMINPLUSI:
540 case OP_NOTQUERYI:
541 case OP_NOTMINQUERYI:
542 case OP_NOTPOSSTARI:
543 case OP_NOTPOSPLUSI:
544 case OP_NOTPOSQUERYI:
545 cc += 2;
546 #ifdef SUPPORT_UTF8
547 if (common->utf8 && cc[-1] >= 0xc0) cc += PRIV(utf8_table4)[cc[-1] & 0x3f];
548 #endif
549 return cc;
550
551 case OP_UPTO:
552 case OP_MINUPTO:
553 case OP_EXACT:
554 case OP_POSUPTO:
555 case OP_UPTOI:
556 case OP_MINUPTOI:
557 case OP_EXACTI:
558 case OP_POSUPTOI:
559 case OP_NOTUPTO:
560 case OP_NOTMINUPTO:
561 case OP_NOTEXACT:
562 case OP_NOTPOSUPTO:
563 case OP_NOTUPTOI:
564 case OP_NOTMINUPTOI:
565 case OP_NOTEXACTI:
566 case OP_NOTPOSUPTOI:
567 cc += 4;
568 #ifdef SUPPORT_UTF8
569 if (common->utf8 && cc[-1] >= 0xc0) cc += PRIV(utf8_table4)[cc[-1] & 0x3f];
570 #endif
571 return cc;
572
573 case OP_NOTPROP:
574 case OP_PROP:
575 case OP_TYPEUPTO:
576 case OP_TYPEMINUPTO:
577 case OP_TYPEEXACT:
578 case OP_TYPEPOSUPTO:
579 case OP_REF:
580 case OP_REFI:
581 case OP_CREF:
582 case OP_NCREF:
583 case OP_RREF:
584 case OP_NRREF:
585 case OP_CLOSE:
586 cc += 3;
587 return cc;
588
589 case OP_CRRANGE:
590 case OP_CRMINRANGE:
591 return cc + 5;
592
593 case OP_CLASS:
594 case OP_NCLASS:
595 return cc + 33;
596
597 #ifdef SUPPORT_UTF8
598 case OP_XCLASS:
599 return cc + GET(cc, 1);
600 #endif
601
602 case OP_RECURSE:
603 case OP_ASSERT:
604 case OP_ASSERT_NOT:
605 case OP_ASSERTBACK:
606 case OP_ASSERTBACK_NOT:
607 case OP_REVERSE:
608 case OP_ONCE:
609 case OP_ONCE_NC:
610 case OP_BRA:
611 case OP_BRAPOS:
612 case OP_COND:
613 case OP_SBRA:
614 case OP_SBRAPOS:
615 case OP_SCOND:
616 case OP_ALT:
617 case OP_KET:
618 case OP_KETRMAX:
619 case OP_KETRMIN:
620 case OP_KETRPOS:
621 return cc + 1 + LINK_SIZE;
622
623 case OP_CBRA:
624 case OP_CBRAPOS:
625 case OP_SCBRA:
626 case OP_SCBRAPOS:
627 return cc + 1 + LINK_SIZE + 2;
628
629 default:
630 return NULL;
631 }
632 }
633
634 static int get_localspace(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
635 {
636 int localspace = 0;
637 pcre_uchar *alternative;
638 /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
639 while (cc < ccend)
640 {
641 switch(*cc)
642 {
643 case OP_ASSERT:
644 case OP_ASSERT_NOT:
645 case OP_ASSERTBACK:
646 case OP_ASSERTBACK_NOT:
647 case OP_ONCE:
648 case OP_ONCE_NC:
649 case OP_BRAPOS:
650 case OP_SBRA:
651 case OP_SBRAPOS:
652 case OP_SCOND:
653 localspace += sizeof(sljit_w);
654 cc += 1 + LINK_SIZE;
655 break;
656
657 case OP_CBRAPOS:
658 case OP_SCBRAPOS:
659 localspace += sizeof(sljit_w);
660 cc += 1 + LINK_SIZE + 2;
661 break;
662
663 case OP_COND:
664 /* Might be a hidden SCOND. */
665 alternative = cc + GET(cc, 1);
666 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
667 localspace += sizeof(sljit_w);
668 cc += 1 + LINK_SIZE;
669 break;
670
671 default:
672 cc = next_opcode(common, cc);
673 if (cc == NULL)
674 return -1;
675 break;
676 }
677 }
678 return localspace;
679 }
680
681 static void set_localptrs(compiler_common *common, int localptr, pcre_uchar *ccend)
682 {
683 pcre_uchar *cc = common->start;
684 pcre_uchar *alternative;
685 while (cc < ccend)
686 {
687 switch(*cc)
688 {
689 case OP_ASSERT:
690 case OP_ASSERT_NOT:
691 case OP_ASSERTBACK:
692 case OP_ASSERTBACK_NOT:
693 case OP_ONCE:
694 case OP_ONCE_NC:
695 case OP_BRAPOS:
696 case OP_SBRA:
697 case OP_SBRAPOS:
698 case OP_SCOND:
699 common->localptrs[cc - common->start] = localptr;
700 localptr += sizeof(sljit_w);
701 cc += 1 + LINK_SIZE;
702 break;
703
704 case OP_CBRAPOS:
705 case OP_SCBRAPOS:
706 common->localptrs[cc - common->start] = localptr;
707 localptr += sizeof(sljit_w);
708 cc += 1 + LINK_SIZE + 2;
709 break;
710
711 case OP_COND:
712 /* Might be a hidden SCOND. */
713 alternative = cc + GET(cc, 1);
714 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
715 {
716 common->localptrs[cc - common->start] = localptr;
717 localptr += sizeof(sljit_w);
718 }
719 cc += 1 + LINK_SIZE;
720 break;
721
722 default:
723 cc = next_opcode(common, cc);
724 SLJIT_ASSERT(cc != NULL);
725 break;
726 }
727 }
728 }
729
730 /* Returns with -1 if no need for frame. */
731 static int get_framesize(compiler_common *common, pcre_uchar *cc, BOOL recursive)
732 {
733 pcre_uchar *ccend = bracketend(cc);
734 int length = 0;
735 BOOL possessive = FALSE;
736 BOOL setsom_found = FALSE;
737
738 if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))
739 {
740 length = 3;
741 possessive = TRUE;
742 }
743
744 cc = next_opcode(common, cc);
745 SLJIT_ASSERT(cc != NULL);
746 while (cc < ccend)
747 switch(*cc)
748 {
749 case OP_SET_SOM:
750 case OP_RECURSE:
751 if (!setsom_found)
752 {
753 length += 2;
754 setsom_found = TRUE;
755 }
756 cc += (*cc == OP_SET_SOM) ? 1 : 1 + LINK_SIZE;
757 break;
758
759 case OP_CBRA:
760 case OP_CBRAPOS:
761 case OP_SCBRA:
762 case OP_SCBRAPOS:
763 length += 3;
764 cc += 1 + LINK_SIZE + 2;
765 break;
766
767 default:
768 cc = next_opcode(common, cc);
769 SLJIT_ASSERT(cc != NULL);
770 break;
771 }
772
773 /* Possessive quantifiers can use a special case. */
774 if (SLJIT_UNLIKELY(possessive) && length == 3)
775 return -1;
776
777 if (length > 0)
778 return length + 1;
779 return -1;
780 }
781
782 static void init_frame(compiler_common *common, pcre_uchar *cc, int stackpos, int stacktop, BOOL recursive)
783 {
784 DEFINE_COMPILER;
785 pcre_uchar *ccend = bracketend(cc);
786 BOOL setsom_found = FALSE;
787 int offset;
788
789 /* >= 1 + shortest item size (2) */
790 SLJIT_ASSERT(stackpos >= stacktop + 2);
791
792 stackpos = STACK(stackpos);
793 if (recursive || (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS))
794 cc = next_opcode(common, cc);
795 SLJIT_ASSERT(cc != NULL);
796 while (cc < ccend)
797 switch(*cc)
798 {
799 case OP_SET_SOM:
800 case OP_RECURSE:
801 if (!setsom_found)
802 {
803 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
804 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_setstrbegin);
805 stackpos += (int)sizeof(sljit_w);
806 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
807 stackpos += (int)sizeof(sljit_w);
808 setsom_found = TRUE;
809 }
810 cc += (*cc == OP_SET_SOM) ? 1 : 1 + LINK_SIZE;
811 break;
812
813 case OP_CBRA:
814 case OP_CBRAPOS:
815 case OP_SCBRA:
816 case OP_SCBRAPOS:
817 offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
818 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));
819 stackpos += (int)sizeof(sljit_w);
820 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
821 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
822 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
823 stackpos += (int)sizeof(sljit_w);
824 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
825 stackpos += (int)sizeof(sljit_w);
826
827 cc += 1 + LINK_SIZE + 2;
828 break;
829
830 default:
831 cc = next_opcode(common, cc);
832 SLJIT_ASSERT(cc != NULL);
833 break;
834 }
835
836 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_end);
837 SLJIT_ASSERT(stackpos == STACK(stacktop));
838 }
839
840 static SLJIT_INLINE int get_localsize(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
841 {
842 int localsize = 2;
843 pcre_uchar *alternative;
844 /* Calculate the sum of the local variables. */
845 while (cc < ccend)
846 {
847 switch(*cc)
848 {
849 case OP_ASSERT:
850 case OP_ASSERT_NOT:
851 case OP_ASSERTBACK:
852 case OP_ASSERTBACK_NOT:
853 case OP_ONCE:
854 case OP_ONCE_NC:
855 case OP_BRAPOS:
856 case OP_SBRA:
857 case OP_SBRAPOS:
858 case OP_SCOND:
859 localsize++;
860 cc += 1 + LINK_SIZE;
861 break;
862
863 case OP_CBRA:
864 case OP_SCBRA:
865 localsize++;
866 cc += 1 + LINK_SIZE + 2;
867 break;
868
869 case OP_CBRAPOS:
870 case OP_SCBRAPOS:
871 localsize += 2;
872 cc += 1 + LINK_SIZE + 2;
873 break;
874
875 case OP_COND:
876 /* Might be a hidden SCOND. */
877 alternative = cc + GET(cc, 1);
878 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
879 localsize++;
880 cc += 1 + LINK_SIZE;
881 break;
882
883 default:
884 cc = next_opcode(common, cc);
885 SLJIT_ASSERT(cc != NULL);
886 break;
887 }
888 }
889 SLJIT_ASSERT(cc == ccend);
890 return localsize;
891 }
892
893 static void copy_locals(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend,
894 BOOL save, int stackptr, int stacktop)
895 {
896 DEFINE_COMPILER;
897 int srcw[2];
898 int count;
899 BOOL tmp1next = TRUE;
900 BOOL tmp1empty = TRUE;
901 BOOL tmp2empty = TRUE;
902 pcre_uchar *alternative;
903 enum {
904 start,
905 loop,
906 end
907 } status;
908
909 status = save ? start : loop;
910 stackptr = STACK(stackptr - 2);
911 stacktop = STACK(stacktop - 1);
912
913 if (!save)
914 {
915 stackptr += sizeof(sljit_w);
916 if (stackptr < stacktop)
917 {
918 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
919 stackptr += sizeof(sljit_w);
920 tmp1empty = FALSE;
921 }
922 if (stackptr < stacktop)
923 {
924 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
925 stackptr += sizeof(sljit_w);
926 tmp2empty = FALSE;
927 }
928 /* The tmp1next must be TRUE in either way. */
929 }
930
931 while (status != end)
932 {
933 count = 0;
934 switch(status)
935 {
936 case start:
937 SLJIT_ASSERT(save);
938 count = 1;
939 srcw[0] = RECURSIVE_HEAD;
940 status = loop;
941 break;
942
943 case loop:
944 if (cc >= ccend)
945 {
946 status = end;
947 break;
948 }
949
950 switch(*cc)
951 {
952 case OP_ASSERT:
953 case OP_ASSERT_NOT:
954 case OP_ASSERTBACK:
955 case OP_ASSERTBACK_NOT:
956 case OP_ONCE:
957 case OP_ONCE_NC:
958 case OP_BRAPOS:
959 case OP_SBRA:
960 case OP_SBRAPOS:
961 case OP_SCOND:
962 count = 1;
963 srcw[0] = PRIV_DATA(cc);
964 SLJIT_ASSERT(srcw[0] != 0);
965 cc += 1 + LINK_SIZE;
966 break;
967
968 case OP_CBRA:
969 case OP_SCBRA:
970 count = 1;
971 srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
972 cc += 1 + LINK_SIZE + 2;
973 break;
974
975 case OP_CBRAPOS:
976 case OP_SCBRAPOS:
977 count = 2;
978 srcw[1] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
979 srcw[0] = PRIV_DATA(cc);
980 SLJIT_ASSERT(srcw[0] != 0);
981 cc += 1 + LINK_SIZE + 2;
982 break;
983
984 case OP_COND:
985 /* Might be a hidden SCOND. */
986 alternative = cc + GET(cc, 1);
987 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
988 {
989 count = 1;
990 srcw[0] = PRIV_DATA(cc);
991 SLJIT_ASSERT(srcw[0] != 0);
992 }
993 cc += 1 + LINK_SIZE;
994 break;
995
996 default:
997 cc = next_opcode(common, cc);
998 SLJIT_ASSERT(cc != NULL);
999 break;
1000 }
1001 break;
1002
1003 case end:
1004 SLJIT_ASSERT_STOP();
1005 break;
1006 }
1007
1008 while (count > 0)
1009 {
1010 count--;
1011 if (save)
1012 {
1013 if (tmp1next)
1014 {
1015 if (!tmp1empty)
1016 {
1017 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1018 stackptr += sizeof(sljit_w);
1019 }
1020 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);
1021 tmp1empty = FALSE;
1022 tmp1next = FALSE;
1023 }
1024 else
1025 {
1026 if (!tmp2empty)
1027 {
1028 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1029 stackptr += sizeof(sljit_w);
1030 }
1031 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);
1032 tmp2empty = FALSE;
1033 tmp1next = TRUE;
1034 }
1035 }
1036 else
1037 {
1038 if (tmp1next)
1039 {
1040 SLJIT_ASSERT(!tmp1empty);
1041 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP1, 0);
1042 tmp1empty = stackptr >= stacktop;
1043 if (!tmp1empty)
1044 {
1045 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1046 stackptr += sizeof(sljit_w);
1047 }
1048 tmp1next = FALSE;
1049 }
1050 else
1051 {
1052 SLJIT_ASSERT(!tmp2empty);
1053 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP2, 0);
1054 tmp2empty = stackptr >= stacktop;
1055 if (!tmp2empty)
1056 {
1057 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1058 stackptr += sizeof(sljit_w);
1059 }
1060 tmp1next = TRUE;
1061 }
1062 }
1063 }
1064 }
1065
1066 if (save)
1067 {
1068 if (tmp1next)
1069 {
1070 if (!tmp1empty)
1071 {
1072 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1073 stackptr += sizeof(sljit_w);
1074 }
1075 if (!tmp2empty)
1076 {
1077 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1078 stackptr += sizeof(sljit_w);
1079 }
1080 }
1081 else
1082 {
1083 if (!tmp2empty)
1084 {
1085 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1086 stackptr += sizeof(sljit_w);
1087 }
1088 if (!tmp1empty)
1089 {
1090 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1091 stackptr += sizeof(sljit_w);
1092 }
1093 }
1094 }
1095 SLJIT_ASSERT(cc == ccend && stackptr == stacktop && (save || (tmp1empty && tmp2empty)));
1096 }
1097
1098 static SLJIT_INLINE BOOL ispowerof2(unsigned int value)
1099 {
1100 return (value & (value - 1)) == 0;
1101 }
1102
1103 static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label)
1104 {
1105 while (list)
1106 {
1107 /* sljit_set_label is clever enough to do nothing
1108 if either the jump or the label is NULL */
1109 sljit_set_label(list->jump, label);
1110 list = list->next;
1111 }
1112 }
1113
1114 static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump* jump)
1115 {
1116 jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list));
1117 if (list_item)
1118 {
1119 list_item->next = *list;
1120 list_item->jump = jump;
1121 *list = list_item;
1122 }
1123 }
1124
1125 static void add_stub(compiler_common *common, enum stub_types type, int data, struct sljit_jump *start)
1126 {
1127 DEFINE_COMPILER;
1128 stub_list* list_item = sljit_alloc_memory(compiler, sizeof(stub_list));
1129
1130 if (list_item)
1131 {
1132 list_item->type = type;
1133 list_item->data = data;
1134 list_item->start = start;
1135 list_item->leave = LABEL();
1136 list_item->next = common->stubs;
1137 common->stubs = list_item;
1138 }
1139 }
1140
1141 static void flush_stubs(compiler_common *common)
1142 {
1143 DEFINE_COMPILER;
1144 stub_list* list_item = common->stubs;
1145
1146 while (list_item)
1147 {
1148 JUMPHERE(list_item->start);
1149 switch(list_item->type)
1150 {
1151 case stack_alloc:
1152 add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));
1153 break;
1154 }
1155 JUMPTO(SLJIT_JUMP, list_item->leave);
1156 list_item = list_item->next;
1157 }
1158 common->stubs = NULL;
1159 }
1160
1161 static SLJIT_INLINE void decrease_call_count(compiler_common *common)
1162 {
1163 DEFINE_COMPILER;
1164
1165 OP2(SLJIT_SUB | SLJIT_SET_E, CALL_COUNT, 0, CALL_COUNT, 0, SLJIT_IMM, 1);
1166 add_jump(compiler, &common->calllimit, JUMP(SLJIT_C_ZERO));
1167 }
1168
1169 static SLJIT_INLINE void allocate_stack(compiler_common *common, int size)
1170 {
1171 /* May destroy all locals and registers except TMP2. */
1172 DEFINE_COMPILER;
1173
1174 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_w));
1175 #ifdef DESTROY_REGISTERS
1176 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);
1177 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
1178 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
1179 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, TMP1, 0);
1180 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);
1181 #endif
1182 add_stub(common, stack_alloc, 0, CMP(SLJIT_C_GREATER, STACK_TOP, 0, STACK_LIMIT, 0));
1183 }
1184
1185 static SLJIT_INLINE void free_stack(compiler_common *common, int size)
1186 {
1187 DEFINE_COMPILER;
1188 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_w));
1189 }
1190
1191 static SLJIT_INLINE void reset_ovector(compiler_common *common, int length)
1192 {
1193 DEFINE_COMPILER;
1194 struct sljit_label *loop;
1195 int i;
1196 /* At this point we can freely use all temporary registers. */
1197 /* TMP1 returns with begin - 1. */
1198 OP2(SLJIT_SUB, SLJIT_TEMPORARY_REG1, 0, SLJIT_MEM1(SLJIT_GENERAL_REG1), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));
1199 if (length < 8)
1200 {
1201 for (i = 0; i < length; i++)
1202 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(i), SLJIT_TEMPORARY_REG1, 0);
1203 }
1204 else
1205 {
1206 OP2(SLJIT_ADD, SLJIT_TEMPORARY_REG2, 0, SLJIT_LOCALS_REG, 0, SLJIT_IMM, OVECTOR_START - sizeof(sljit_w));
1207 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, length);
1208 loop = LABEL();
1209 OP1(SLJIT_MOVU, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), sizeof(sljit_w), SLJIT_TEMPORARY_REG1, 0);
1210 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_TEMPORARY_REG3, 0, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, 1);
1211 JUMPTO(SLJIT_C_NOT_ZERO, loop);
1212 }
1213 }
1214
1215 static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)
1216 {
1217 DEFINE_COMPILER;
1218 struct sljit_label *loop;
1219 struct sljit_jump *earlyexit;
1220
1221 /* At this point we can freely use all registers. */
1222 OP1(SLJIT_MOV, SLJIT_GENERAL_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
1223 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1), STR_PTR, 0);
1224
1225 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG1, 0, ARGUMENTS, 0);
1226 OP1(SLJIT_MOV_SI, SLJIT_TEMPORARY_REG2, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), SLJIT_OFFSETOF(jit_arguments, offsetcount));
1227 OP2(SLJIT_SUB, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), SLJIT_OFFSETOF(jit_arguments, offsets), SLJIT_IMM, sizeof(int));
1228 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG1, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), SLJIT_OFFSETOF(jit_arguments, begin));
1229 OP2(SLJIT_ADD, SLJIT_GENERAL_REG1, 0, SLJIT_LOCALS_REG, 0, SLJIT_IMM, OVECTOR_START);
1230 /* Unlikely, but possible */
1231 earlyexit = CMP(SLJIT_C_EQUAL, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, 0);
1232 loop = LABEL();
1233 OP2(SLJIT_SUB, SLJIT_GENERAL_REG2, 0, SLJIT_MEM1(SLJIT_GENERAL_REG1), 0, SLJIT_TEMPORARY_REG1, 0);
1234 OP2(SLJIT_ADD, SLJIT_GENERAL_REG1, 0, SLJIT_GENERAL_REG1, 0, SLJIT_IMM, sizeof(sljit_w));
1235 /* Copy the integer value to the output buffer */
1236 #ifdef COMPILE_PCRE16
1237 OP2(SLJIT_LSHR, SLJIT_GENERAL_REG2, 0, SLJIT_GENERAL_REG2, 0, SLJIT_IMM, 1);
1238 #endif
1239 OP1(SLJIT_MOVU_SI, SLJIT_MEM1(SLJIT_TEMPORARY_REG3), sizeof(int), SLJIT_GENERAL_REG2, 0);
1240 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_TEMPORARY_REG2, 0, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, 1);
1241 JUMPTO(SLJIT_C_NOT_ZERO, loop);
1242 JUMPHERE(earlyexit);
1243
1244 /* Calculate the return value, which is the maximum ovector value. */
1245 if (topbracket > 1)
1246 {
1247 OP2(SLJIT_ADD, SLJIT_TEMPORARY_REG1, 0, SLJIT_LOCALS_REG, 0, SLJIT_IMM, OVECTOR_START + topbracket * 2 * sizeof(sljit_w));
1248 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, topbracket + 1);
1249
1250 /* OVECTOR(0) is never equal to SLJIT_GENERAL_REG3. */
1251 loop = LABEL();
1252 OP1(SLJIT_MOVU, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), -(2 * (sljit_w)sizeof(sljit_w)));
1253 OP2(SLJIT_SUB, SLJIT_TEMPORARY_REG2, 0, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, 1);
1254 CMPTO(SLJIT_C_EQUAL, SLJIT_TEMPORARY_REG3, 0, SLJIT_GENERAL_REG3, 0, loop);
1255 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_TEMPORARY_REG2, 0);
1256 }
1257 else
1258 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
1259 }
1260
1261 static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, pcre_uchar* cc)
1262 {
1263 /* Detects if the character has an othercase. */
1264 unsigned int c;
1265
1266 #ifdef SUPPORT_UTF8
1267 if (common->utf8)
1268 {
1269 GETCHAR(c, cc);
1270 if (c > 127)
1271 {
1272 #ifdef SUPPORT_UCP
1273 return c != UCD_OTHERCASE(c);
1274 #else
1275 return FALSE;
1276 #endif
1277 }
1278 }
1279 else
1280 #endif
1281 c = *cc;
1282 return common->fcc[c] != c;
1283 }
1284
1285 static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)
1286 {
1287 /* Returns with the othercase. */
1288 #ifdef SUPPORT_UTF8
1289 if (common->utf8 && c > 127)
1290 {
1291 #ifdef SUPPORT_UCP
1292 return UCD_OTHERCASE(c);
1293 #else
1294 return c;
1295 #endif
1296 }
1297 #endif
1298 return common->fcc[c];
1299 }
1300
1301 static unsigned int char_get_othercase_bit(compiler_common *common, pcre_uchar* cc)
1302 {
1303 /* Detects if the character and its othercase has only 1 bit difference. */
1304 unsigned int c, oc, bit;
1305 #ifdef SUPPORT_UTF8
1306 int n;
1307 #endif
1308
1309 #ifdef SUPPORT_UTF8
1310 if (common->utf8)
1311 {
1312 GETCHAR(c, cc);
1313 if (c <= 127)
1314 oc = common->fcc[c];
1315 else
1316 {
1317 #ifdef SUPPORT_UCP
1318 oc = UCD_OTHERCASE(c);
1319 #else
1320 oc = c;
1321 #endif
1322 }
1323 }
1324 else
1325 {
1326 c = *cc;
1327 oc = common->fcc[c];
1328 }
1329 #else
1330 c = *cc;
1331 oc = common->fcc[c];
1332 #endif
1333
1334 SLJIT_ASSERT(c != oc);
1335
1336 bit = c ^ oc;
1337 /* Optimized for English alphabet. */
1338 if (c <= 127 && bit == 0x20)
1339 return (0 << 8) | 0x20;
1340
1341 /* Since c != oc, they must have at least 1 bit difference. */
1342 if (!ispowerof2(bit))
1343 return 0;
1344
1345 #ifdef SUPPORT_UTF8
1346 if (common->utf8 && c > 127)
1347 {
1348 n = PRIV(utf8_table4)[*cc & 0x3f];
1349 while ((bit & 0x3f) == 0)
1350 {
1351 n--;
1352 bit >>= 6;
1353 }
1354 return (n << 8) | bit;
1355 }
1356 #endif
1357 return (0 << 8) | bit;
1358 }
1359
1360 static SLJIT_INLINE void check_input_end(compiler_common *common, jump_list **fallbacks)
1361 {
1362 DEFINE_COMPILER;
1363 add_jump(compiler, fallbacks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
1364 }
1365
1366 static void read_char(compiler_common *common)
1367 {
1368 /* Reads the character into TMP1, updates STR_PTR.
1369 Does not check STR_END. TMP2 Destroyed. */
1370 DEFINE_COMPILER;
1371 #ifdef SUPPORT_UTF8
1372 struct sljit_jump *jump;
1373 #endif
1374
1375 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
1376 #ifdef SUPPORT_UTF8
1377 if (common->utf8)
1378 {
1379 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
1380 add_jump(compiler, &common->utf8readchar, JUMP(SLJIT_FAST_CALL));
1381 JUMPHERE(jump);
1382 }
1383 #endif
1384 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1385 }
1386
1387 static void peek_char(compiler_common *common)
1388 {
1389 /* Reads the character into TMP1, keeps STR_PTR.
1390 Does not check STR_END. TMP2 Destroyed. */
1391 DEFINE_COMPILER;
1392 #ifdef SUPPORT_UTF8
1393 struct sljit_jump *jump;
1394 #endif
1395
1396 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
1397 #ifdef SUPPORT_UTF8
1398 if (common->utf8)
1399 {
1400 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
1401 add_jump(compiler, &common->utf8readchar, JUMP(SLJIT_FAST_CALL));
1402 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
1403 JUMPHERE(jump);
1404 }
1405 #endif
1406 }
1407
1408 static void read_char8_type(compiler_common *common)
1409 {
1410 /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */
1411 DEFINE_COMPILER;
1412 #ifdef SUPPORT_UTF8
1413 struct sljit_jump *jump;
1414 #endif
1415
1416 #ifdef SUPPORT_UTF8
1417 if (common->utf8)
1418 {
1419 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
1420 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);
1421 /* This can be an extra read in some situations, but hopefully
1422 it is a clever early read in most cases. */
1423 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
1424 jump = CMP(SLJIT_C_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
1425 add_jump(compiler, &common->utf8readtype8, JUMP(SLJIT_FAST_CALL));
1426 JUMPHERE(jump);
1427 return;
1428 }
1429 #endif
1430 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
1431 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);
1432 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);
1433 }
1434
1435 static void skip_char_back(compiler_common *common)
1436 {
1437 /* Goes one character back. Only affects STR_PTR. Does not check begin. */
1438 DEFINE_COMPILER;
1439 #ifdef SUPPORT_UTF8
1440 struct sljit_label *label;
1441
1442 if (common->utf8)
1443 {
1444 label = LABEL();
1445 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);
1446 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
1447 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
1448 CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);
1449 return;
1450 }
1451 #endif
1452 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);
1453 }
1454
1455 static void check_newlinechar(compiler_common *common, int nltype, jump_list **fallbacks, BOOL jumpiftrue)
1456 {
1457 /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */
1458 DEFINE_COMPILER;
1459
1460 if (nltype == NLTYPE_ANY)
1461 {
1462 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
1463 add_jump(compiler, fallbacks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
1464 }
1465 else if (nltype == NLTYPE_ANYCRLF)
1466 {
1467 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_CR);
1468 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
1469 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
1470 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
1471 add_jump(compiler, fallbacks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
1472 }
1473 else
1474 {
1475 SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline <= 255);
1476 add_jump(compiler, fallbacks, CMP(jumpiftrue ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
1477 }
1478 }
1479
1480 #ifdef SUPPORT_UTF8
1481 static void do_utf8readchar(compiler_common *common)
1482 {
1483 /* Fast decoding an utf8 character. TMP1 contains the first byte
1484 of the character (>= 0xc0). Return char value in TMP1, length - 1 in TMP2. */
1485 DEFINE_COMPILER;
1486 struct sljit_jump *jump;
1487
1488 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);
1489 /* Searching for the first zero. */
1490 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
1491 jump = JUMP(SLJIT_C_NOT_ZERO);
1492 /* 2 byte sequence */
1493 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(STR_PTR), 1);
1494 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);
1495 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1f);
1496 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
1497 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
1498 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
1499 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
1500 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1501 JUMPHERE(jump);
1502
1503 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10);
1504 jump = JUMP(SLJIT_C_NOT_ZERO);
1505 /* 3 byte sequence */
1506 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(STR_PTR), 1);
1507 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0f);
1508 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 12);
1509 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
1510 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
1511 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
1512 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(STR_PTR), 2);
1513 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 2);
1514 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
1515 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
1516 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 2);
1517 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1518 JUMPHERE(jump);
1519
1520 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x08);
1521 jump = JUMP(SLJIT_C_NOT_ZERO);
1522 /* 4 byte sequence */
1523 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(STR_PTR), 1);
1524 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x07);
1525 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 18);
1526 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
1527 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12);
1528 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
1529 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(STR_PTR), 2);
1530 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
1531 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
1532 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
1533 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(STR_PTR), 3);
1534 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 3);
1535 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
1536 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
1537 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 3);
1538 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1539 JUMPHERE(jump);
1540
1541 /* 5 byte sequence */
1542 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(STR_PTR), 1);
1543 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x03);
1544 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 24);
1545 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
1546 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 18);
1547 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
1548 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(STR_PTR), 2);
1549 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
1550 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12);
1551 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
1552 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(STR_PTR), 3);
1553 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
1554 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
1555 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
1556 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(STR_PTR), 4);
1557 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 4);
1558 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
1559 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
1560 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 4);
1561 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1562 }
1563
1564 static void do_utf8readtype8(compiler_common *common)
1565 {
1566 /* Fast decoding an utf8 character type. TMP2 contains the first byte
1567 of the character (>= 0xc0) and TMP1 is destroyed. Return value in TMP1. */
1568 DEFINE_COMPILER;
1569 struct sljit_jump *jump;
1570 struct sljit_jump *compare;
1571
1572 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);
1573
1574 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20);
1575 jump = JUMP(SLJIT_C_NOT_ZERO);
1576 /* 2 byte sequence */
1577 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
1578 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);
1579 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
1580 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
1581 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
1582 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
1583 compare = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
1584 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
1585 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1586
1587 JUMPHERE(compare);
1588 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
1589 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1590 JUMPHERE(jump);
1591
1592 /* We only have types for characters less than 256. */
1593 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_w)PRIV(utf8_table4) - 0xc0);
1594 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
1595 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
1596 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1597 }
1598
1599 #endif
1600
1601 #ifdef SUPPORT_UCP
1602
1603 /* UCD_BLOCK_SIZE must be 128 (see the assert below). */
1604 #define UCD_BLOCK_MASK 127
1605 #define UCD_BLOCK_SHIFT 7
1606
1607 static void do_getucd(compiler_common *common)
1608 {
1609 /* Search the UCD record for the character comes in TMP1.
1610 Returns chartype in TMP1 and UCD offset in TMP2. */
1611 DEFINE_COMPILER;
1612
1613 SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 8);
1614
1615 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);
1616 OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
1617 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_w)PRIV(ucd_stage1));
1618 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
1619 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
1620 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
1621 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_w)PRIV(ucd_stage2));
1622 OP1(SLJIT_MOV_UH, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
1623 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_w)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
1624 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
1625 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1626 }
1627 #endif
1628
1629 static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common, BOOL hascrorlf, BOOL firstline)
1630 {
1631 DEFINE_COMPILER;
1632 struct sljit_label *mainloop;
1633 struct sljit_label *newlinelabel = NULL;
1634 struct sljit_jump *start;
1635 struct sljit_jump *end = NULL;
1636 struct sljit_jump *nl = NULL;
1637 #ifdef SUPPORT_UTF8
1638 struct sljit_jump *singlebyte;
1639 #endif
1640 jump_list *newline = NULL;
1641 BOOL newlinecheck = FALSE;
1642 BOOL readuchar = FALSE;
1643
1644 if (!(hascrorlf || firstline) && (common->nltype == NLTYPE_ANY ||
1645 common->nltype == NLTYPE_ANYCRLF || common->newline > 255))
1646 newlinecheck = TRUE;
1647
1648 if (firstline)
1649 {
1650 /* Search for the end of the first line. */
1651 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STR_PTR, 0);
1652 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), FIRSTLINE_END, STR_END, 0);
1653
1654 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
1655 {
1656 mainloop = LABEL();
1657 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1658 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
1659 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
1660 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
1661 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);
1662 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);
1663 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), FIRSTLINE_END, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1664 }
1665 else
1666 {
1667 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
1668 mainloop = LABEL();
1669 /* Continual stores does not cause data dependency. */
1670 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), FIRSTLINE_END, STR_PTR, 0);
1671 read_char(common);
1672 check_newlinechar(common, common->nltype, &newline, TRUE);
1673 CMPTO(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0, mainloop);
1674 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), FIRSTLINE_END, STR_PTR, 0);
1675 set_jumps(newline, LABEL());
1676 }
1677
1678 JUMPHERE(end);
1679 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
1680 }
1681
1682 start = JUMP(SLJIT_JUMP);
1683
1684 if (newlinecheck)
1685 {
1686 newlinelabel = LABEL();
1687 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1688 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
1689 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
1690 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
1691 COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
1692 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
1693 nl = JUMP(SLJIT_JUMP);
1694 }
1695
1696 mainloop = LABEL();
1697
1698 /* Increasing the STR_PTR here requires one less jump in the most common case. */
1699 #ifdef SUPPORT_UTF8
1700 if (common->utf8) readuchar = TRUE;
1701 #endif
1702 if (newlinecheck) readuchar = TRUE;
1703
1704 if (readuchar)
1705 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
1706
1707 if (newlinecheck)
1708 CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);
1709
1710 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1711 #ifdef SUPPORT_UTF8
1712 if (common->utf8)
1713 {
1714 singlebyte = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
1715 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0);
1716 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
1717 JUMPHERE(singlebyte);
1718 }
1719 #endif
1720 JUMPHERE(start);
1721
1722 if (newlinecheck)
1723 {
1724 JUMPHERE(end);
1725 JUMPHERE(nl);
1726 }
1727
1728 return mainloop;
1729 }
1730
1731 static SLJIT_INLINE void fast_forward_first_byte(compiler_common *common, pcre_uint16 firstbyte, BOOL firstline)
1732 {
1733 DEFINE_COMPILER;
1734 struct sljit_label *start;
1735 struct sljit_jump *leave;
1736 struct sljit_jump *found;
1737 pcre_uint16 oc, bit;
1738
1739 if (firstline)
1740 {
1741 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, STR_END, 0);
1742 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), FIRSTLINE_END);
1743 }
1744
1745 start = LABEL();
1746 leave = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
1747 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
1748
1749 if ((firstbyte & REQ_CASELESS) == 0)
1750 found = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, firstbyte & 0xff);
1751 else
1752 {
1753 firstbyte &= 0xff;
1754 oc = common->fcc[firstbyte];
1755 bit = firstbyte ^ oc;
1756 if (ispowerof2(bit))
1757 {
1758 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, bit);
1759 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, firstbyte | bit);
1760 }
1761 else
1762 {
1763 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, firstbyte);
1764 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
1765 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);
1766 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
1767 found = JUMP(SLJIT_C_NOT_ZERO);
1768 }
1769 }
1770
1771 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1772 #ifdef SUPPORT_UTF8
1773 if (common->utf8)
1774 {
1775 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start);
1776 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0);
1777 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
1778 }
1779 #endif
1780 JUMPTO(SLJIT_JUMP, start);
1781 JUMPHERE(found);
1782 JUMPHERE(leave);
1783
1784 if (firstline)
1785 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
1786 }
1787
1788 static SLJIT_INLINE void fast_forward_newline(compiler_common *common, BOOL firstline)
1789 {
1790 DEFINE_COMPILER;
1791 struct sljit_label *loop;
1792 struct sljit_jump *lastchar;
1793 struct sljit_jump *firstchar;
1794 struct sljit_jump *leave;
1795 struct sljit_jump *foundcr = NULL;
1796 struct sljit_jump *notfoundnl;
1797 jump_list *newline = NULL;
1798
1799 if (firstline)
1800 {
1801 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, STR_END, 0);
1802 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), FIRSTLINE_END);
1803 }
1804
1805 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
1806 {
1807 lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
1808 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
1809 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
1810 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
1811 firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
1812
1813 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2);
1814 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0);
1815 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_GREATER_EQUAL);
1816 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
1817
1818 loop = LABEL();
1819 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);
1820 leave = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
1821 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), -2);
1822 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(STR_PTR), -1);
1823 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop);
1824 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop);
1825
1826 JUMPHERE(leave);
1827 JUMPHERE(firstchar);
1828 JUMPHERE(lastchar);
1829
1830 if (firstline)
1831 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
1832 return;
1833 }
1834
1835 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
1836 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
1837 firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
1838 skip_char_back(common);
1839
1840 loop = LABEL();
1841 read_char(common);
1842 lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
1843 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
1844 foundcr = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
1845 check_newlinechar(common, common->nltype, &newline, FALSE);
1846 set_jumps(newline, loop);
1847
1848 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
1849 {
1850 leave = JUMP(SLJIT_JUMP);
1851 JUMPHERE(foundcr);
1852 notfoundnl = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
1853 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
1854 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
1855 COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
1856 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
1857 JUMPHERE(notfoundnl);
1858 JUMPHERE(leave);
1859 }
1860 JUMPHERE(lastchar);
1861 JUMPHERE(firstchar);
1862
1863 if (firstline)
1864 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
1865 }
1866
1867 static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, sljit_uw start_bits, BOOL firstline)
1868 {
1869 DEFINE_COMPILER;
1870 struct sljit_label *start;
1871 struct sljit_jump *leave;
1872 struct sljit_jump *found;
1873
1874 if (firstline)
1875 {
1876 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, STR_END, 0);
1877 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), FIRSTLINE_END);
1878 }
1879
1880 start = LABEL();
1881 leave = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
1882 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
1883 #ifdef SUPPORT_UTF8
1884 if (common->utf8)
1885 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
1886 #endif
1887 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
1888 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
1889 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), start_bits);
1890 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
1891 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
1892 found = JUMP(SLJIT_C_NOT_ZERO);
1893
1894 #ifdef SUPPORT_UTF8
1895 if (common->utf8)
1896 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
1897 #endif
1898 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);
1899 #ifdef SUPPORT_UTF8
1900 if (common->utf8)
1901 {
1902 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start);
1903 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0);
1904 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
1905 }
1906 #endif
1907 JUMPTO(SLJIT_JUMP, start);
1908 JUMPHERE(found);
1909 JUMPHERE(leave);
1910
1911 if (firstline)
1912 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
1913 }
1914
1915 static SLJIT_INLINE struct sljit_jump *search_requested_char(compiler_common *common, pcre_uint16 reqbyte, BOOL has_firstbyte)
1916 {
1917 DEFINE_COMPILER;
1918 struct sljit_label *loop;
1919 struct sljit_jump *toolong;
1920 struct sljit_jump *alreadyfound;
1921 struct sljit_jump *found;
1922 struct sljit_jump *foundoc = NULL;
1923 struct sljit_jump *notfound;
1924 pcre_uint16 oc, bit;
1925
1926 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), REQ_BYTE_PTR);
1927 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, REQ_BYTE_MAX);
1928 toolong = CMP(SLJIT_C_LESS, TMP1, 0, STR_END, 0);
1929 alreadyfound = CMP(SLJIT_C_LESS, STR_PTR, 0, TMP2, 0);
1930
1931 if (has_firstbyte)
1932 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, 1);
1933 else
1934 OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);
1935
1936 loop = LABEL();
1937 notfound = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, STR_END, 0);
1938
1939 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), 0);
1940 if ((reqbyte & REQ_CASELESS) == 0)
1941 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, reqbyte & 0xff);
1942 else
1943 {
1944 reqbyte &= 0xff;
1945 oc = common->fcc[reqbyte];
1946 bit = reqbyte ^ oc;
1947 if (ispowerof2(bit))
1948 {
1949 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);
1950 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, reqbyte | bit);
1951 }
1952 else
1953 {
1954 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, reqbyte);
1955 foundoc = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, oc);
1956 }
1957 }
1958 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
1959 JUMPTO(SLJIT_JUMP, loop);
1960
1961 JUMPHERE(found);
1962 if (foundoc)
1963 JUMPHERE(foundoc);
1964 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), REQ_BYTE_PTR, TMP1, 0);
1965 JUMPHERE(alreadyfound);
1966 JUMPHERE(toolong);
1967 return notfound;
1968 }
1969
1970 static void do_revertframes(compiler_common *common)
1971 {
1972 DEFINE_COMPILER;
1973 struct sljit_jump *jump;
1974 struct sljit_label *mainloop;
1975
1976 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);
1977 OP1(SLJIT_MOV, TMP1, 0, STACK_TOP, 0);
1978
1979 /* Drop frames until we reach STACK_TOP. */
1980 mainloop = LABEL();
1981 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), 0);
1982 jump = CMP(SLJIT_C_SIG_LESS_EQUAL, TMP2, 0, SLJIT_IMM, frame_end);
1983 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_LOCALS_REG, 0);
1984 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_w));
1985 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_w), SLJIT_MEM1(TMP1), 2 * sizeof(sljit_w));
1986 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_w));
1987 JUMPTO(SLJIT_JUMP, mainloop);
1988
1989 JUMPHERE(jump);
1990 jump = CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, frame_end);
1991 /* End of dropping frames. */
1992 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1993
1994 JUMPHERE(jump);
1995 jump = CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, frame_setstrbegin);
1996 /* Set string begin. */
1997 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), sizeof(sljit_w));
1998 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_w));
1999 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), TMP2, 0);
2000 JUMPTO(SLJIT_JUMP, mainloop);
2001
2002 JUMPHERE(jump);
2003 /* Unknown command. */
2004 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_w));
2005 JUMPTO(SLJIT_JUMP, mainloop);
2006 }
2007
2008 static void check_wordboundary(compiler_common *common)
2009 {
2010 DEFINE_COMPILER;
2011 struct sljit_jump *beginend;
2012 #ifdef SUPPORT_UTF8
2013 struct sljit_jump *jump;
2014 #endif
2015
2016 SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16);
2017
2018 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, 1, 5, 5, common->localsize);
2019 /* Get type of the previous char, and put it to LOCALS1. */
2020 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
2021 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
2022 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, 0);
2023 beginend = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
2024 skip_char_back(common);
2025 read_char(common);
2026
2027 /* Testing char type. */
2028 #ifdef SUPPORT_UCP
2029 if (common->useucp)
2030 {
2031 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
2032 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
2033 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
2034 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
2035 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
2036 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);
2037 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
2038 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
2039 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_LESS_EQUAL);
2040 JUMPHERE(jump);
2041 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP2, 0);
2042 }
2043 else
2044 #endif
2045 {
2046 #ifdef SUPPORT_UTF8
2047 /* Here LOCALS1 has already been zeroed. */
2048 jump = NULL;
2049 if (common->utf8)
2050 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
2051 #endif
2052 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);
2053 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */);
2054 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2055 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);
2056 #ifdef SUPPORT_UTF8
2057 if (jump != NULL)
2058 JUMPHERE(jump);
2059 #endif
2060 }
2061 JUMPHERE(beginend);
2062
2063 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
2064 beginend = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2065 peek_char(common);
2066
2067 /* Testing char type. This is a code duplication. */
2068 #ifdef SUPPORT_UCP
2069 if (common->useucp)
2070 {
2071 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
2072 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
2073 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
2074 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
2075 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
2076 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);
2077 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
2078 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
2079 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_LESS_EQUAL);
2080 JUMPHERE(jump);
2081 }
2082 else
2083 #endif
2084 {
2085 #ifdef SUPPORT_UTF8
2086 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
2087 jump = NULL;
2088 if (common->utf8)
2089 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
2090 #endif
2091 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes);
2092 OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */);
2093 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
2094 #ifdef SUPPORT_UTF8
2095 if (jump != NULL)
2096 JUMPHERE(jump);
2097 #endif
2098 }
2099 JUMPHERE(beginend);
2100
2101 OP2(SLJIT_XOR | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
2102 sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
2103 }
2104
2105 static void check_anynewline(compiler_common *common)
2106 {
2107 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
2108 DEFINE_COMPILER;
2109
2110 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);
2111
2112 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
2113 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
2114 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);
2115 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
2116 #ifdef SUPPORT_UTF8
2117 if (common->utf8)
2118 {
2119 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
2120 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
2121 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
2122 }
2123 #endif
2124 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
2125 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2126 }
2127
2128 static void check_hspace(compiler_common *common)
2129 {
2130 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
2131 DEFINE_COMPILER;
2132
2133 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);
2134
2135 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x09);
2136 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
2137 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
2138 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
2139 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xa0);
2140 #ifdef SUPPORT_UTF8
2141 if (common->utf8)
2142 {
2143 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
2144 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x1680);
2145 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
2146 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e);
2147 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
2148 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000);
2149 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000);
2150 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_LESS_EQUAL);
2151 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000);
2152 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
2153 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000);
2154 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
2155 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000);
2156 }
2157 #endif
2158 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
2159
2160 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2161 }
2162
2163 static void check_vspace(compiler_common *common)
2164 {
2165 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
2166 DEFINE_COMPILER;
2167
2168 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);
2169
2170 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
2171 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
2172 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);
2173 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
2174 #ifdef SUPPORT_UTF8
2175 if (common->utf8)
2176 {
2177 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
2178 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
2179 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
2180 }
2181 #endif
2182 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
2183
2184 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2185 }
2186
2187 #define CHAR1 STR_END
2188 #define CHAR2 STACK_TOP
2189
2190 static void do_casefulcmp(compiler_common *common)
2191 {
2192 DEFINE_COMPILER;
2193 struct sljit_jump *jump;
2194 struct sljit_label *label;
2195
2196 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);
2197 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2198 OP1(SLJIT_MOV, TMP3, 0, CHAR1, 0);
2199 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR2, 0);
2200 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2201 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);
2202
2203 label = LABEL();
2204 OP1(SLJIT_MOVU_UB, CHAR1, 0, SLJIT_MEM1(TMP1), 1);
2205 OP1(SLJIT_MOVU_UB, CHAR2, 0, SLJIT_MEM1(STR_PTR), 1);
2206 jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
2207 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
2208 JUMPTO(SLJIT_C_NOT_ZERO, label);
2209
2210 JUMPHERE(jump);
2211 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);
2212 OP1(SLJIT_MOV, CHAR1, 0, TMP3, 0);
2213 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
2214 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2215 }
2216
2217 #define LCC_TABLE STACK_LIMIT
2218
2219 static void do_caselesscmp(compiler_common *common)
2220 {
2221 DEFINE_COMPILER;
2222 struct sljit_jump *jump;
2223 struct sljit_label *label;
2224
2225 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);
2226 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2227
2228 OP1(SLJIT_MOV, TMP3, 0, LCC_TABLE, 0);
2229 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR1, 0);
2230 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, CHAR2, 0);
2231 OP1(SLJIT_MOV, LCC_TABLE, 0, SLJIT_IMM, common->lcc);
2232 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2233 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);
2234
2235 label = LABEL();
2236 OP1(SLJIT_MOVU_UB, CHAR1, 0, SLJIT_MEM1(TMP1), 1);
2237 OP1(SLJIT_MOVU_UB, CHAR2, 0, SLJIT_MEM1(STR_PTR), 1);
2238 OP1(SLJIT_MOV_UB, CHAR1, 0, SLJIT_MEM2(LCC_TABLE, CHAR1), 0);
2239 OP1(SLJIT_MOV_UB, CHAR2, 0, SLJIT_MEM2(LCC_TABLE, CHAR2), 0);
2240 jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
2241 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
2242 JUMPTO(SLJIT_C_NOT_ZERO, label);
2243
2244 JUMPHERE(jump);
2245 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);
2246 OP1(SLJIT_MOV, LCC_TABLE, 0, TMP3, 0);
2247 OP1(SLJIT_MOV, CHAR1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
2248 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
2249 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2250 }
2251
2252 #undef LCC_TABLE
2253 #undef CHAR1
2254 #undef CHAR2
2255
2256 #ifdef SUPPORT_UTF8
2257 #ifdef SUPPORT_UCP
2258
2259 static const pcre_uchar *SLJIT_CALL do_utf_caselesscmp(pcre_uchar *src1, jit_arguments *args, pcre_uchar *end1)
2260 {
2261 /* This function would be ineffective to do in JIT level. */
2262 int c1, c2;
2263 const pcre_uchar *src2 = args->ptr;
2264 const pcre_uchar *end2 = (pcre_uchar *)args->end;
2265
2266 while (src1 < end1)
2267 {
2268 if (src2 >= end2)
2269 return 0;
2270 GETCHARINC(c1, src1);
2271 GETCHARINC(c2, src2);
2272 if (c1 != c2 && c1 != UCD_OTHERCASE(c2)) return 0;
2273 }
2274 return src2;
2275 }
2276
2277 #endif
2278 #endif
2279
2280 static pcre_uchar *byte_sequence_compare(compiler_common *common, BOOL caseless, pcre_uchar *cc,
2281 compare_context* context, jump_list **fallbacks)
2282 {
2283 DEFINE_COMPILER;
2284 unsigned int othercasebit = 0;
2285 pcre_uchar *othercasechar = NULL;
2286 #ifdef SUPPORT_UTF8
2287 int utf8length;
2288 #endif
2289
2290 if (caseless && char_has_othercase(common, cc))
2291 {
2292 othercasebit = char_get_othercase_bit(common, cc);
2293 SLJIT_ASSERT(othercasebit);
2294 /* Extracting bit difference info. */
2295 #ifdef COMPILE_PCRE8
2296 othercasechar = cc + (othercasebit >> 8);
2297 othercasebit &= 0xff;
2298 #else
2299 #ifdef COMPILE_PCRE16
2300 othercasechar = cc + (othercasebit >> 9);
2301 if ((othercasebit & 0x100) != 0)
2302 othercasebit = (othercasebit & 0xff) << 8;
2303 else
2304 othercasebit &= 0xff;
2305 #endif
2306 #endif
2307 }
2308
2309 if (context->sourcereg == -1)
2310 {
2311 #ifdef COMPILE_PCRE8
2312 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
2313 if (context->length >= 4)
2314 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
2315 else if (context->length >= 2)
2316 OP1(SLJIT_MOV_SH, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
2317 else
2318 #endif
2319 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
2320 #else
2321 #ifdef COMPILE_PCRE16
2322 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
2323 if (context->length >= 4)
2324 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
2325 else
2326 #endif
2327 OP1(SLJIT_MOV_SH, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
2328 #endif
2329 #endif /* COMPILE_PCRE8 */
2330 context->sourcereg = TMP2;
2331 }
2332
2333 #ifdef SUPPORT_UTF8
2334 utf8length = 1;
2335 if (common->utf8 && *cc >= 0xc0)
2336 utf8length += PRIV(utf8_table4)[*cc & 0x3f];
2337
2338 do
2339 {
2340 #endif
2341
2342 context->length -= IN_UCHARS(1);
2343 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
2344
2345 /* Unaligned read is supported. */
2346 if (othercasebit != 0 && othercasechar == cc)
2347 {
2348 context->c.asuchars[context->ucharptr] = *cc | othercasebit;
2349 context->oc.asuchars[context->ucharptr] = othercasebit;
2350 }
2351 else
2352 {
2353 context->c.asuchars[context->ucharptr] = *cc;
2354 context->oc.asuchars[context->ucharptr] = 0;
2355 }
2356 context->ucharptr++;
2357
2358 #ifdef COMPILE_PCRE8
2359 if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1))
2360 #else
2361 if (context->ucharptr >= 2 || context->length == 0)
2362 #endif
2363 {
2364 if (context->length >= 4)
2365 OP1(SLJIT_MOV_SI, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
2366 #ifdef COMPILE_PCRE8
2367 else if (context->length >= 2)
2368 OP1(SLJIT_MOV_SH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
2369 else if (context->length >= 1)
2370 OP1(SLJIT_MOV_UB, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
2371 #else
2372 else if (context->length >= 2)
2373 OP1(SLJIT_MOV_SH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
2374 #endif
2375 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
2376
2377 switch(context->ucharptr)
2378 {
2379 case 4 / sizeof(pcre_uchar):
2380 if (context->oc.asint != 0)
2381 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint);
2382 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint));
2383 break;
2384
2385 case 2 / sizeof(pcre_uchar):
2386 if (context->oc.asshort != 0)
2387 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asshort);
2388 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asshort | context->oc.asshort));
2389 break;
2390
2391 #ifdef COMPILE_PCRE8
2392 case 1:
2393 if (context->oc.asbyte != 0)
2394 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte);
2395 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte));
2396 break;
2397 #endif
2398
2399 default:
2400 SLJIT_ASSERT_STOP();
2401 break;
2402 }
2403 context->ucharptr = 0;
2404 }
2405
2406 #else
2407
2408 /* Unaligned read is unsupported. */
2409 #ifdef COMPILE_PCRE8
2410 if (context->length > 0)
2411 OP1(SLJIT_MOV_UB, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
2412 #else
2413 if (context->length > 0)
2414 OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
2415 #endif
2416 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
2417
2418 if (othercasebit != 0 && othercasechar == cc)
2419 {
2420 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit);
2421 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit));
2422 }
2423 else
2424 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc));
2425
2426 #endif
2427
2428 cc++;
2429 #ifdef SUPPORT_UTF8
2430 utf8length--;
2431 }
2432 while (utf8length > 0);
2433 #endif
2434
2435 return cc;
2436 }
2437
2438 #ifdef SUPPORT_UTF8
2439
2440 #define SET_TYPE_OFFSET(value) \
2441 if ((value) != typeoffset) \
2442 { \
2443 if ((value) > typeoffset) \
2444 OP2(SLJIT_SUB, typereg, 0, typereg, 0, SLJIT_IMM, (value) - typeoffset); \
2445 else \
2446 OP2(SLJIT_ADD, typereg, 0, typereg, 0, SLJIT_IMM, typeoffset - (value)); \
2447 } \
2448 typeoffset = (value);
2449
2450 #define SET_CHAR_OFFSET(value) \
2451 if ((value) != charoffset) \
2452 { \
2453 if ((value) > charoffset) \
2454 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (value) - charoffset); \
2455 else \
2456 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, charoffset - (value)); \
2457 } \
2458 charoffset = (value);
2459
2460 static void compile_xclass_hotpath(compiler_common *common, pcre_uchar *cc, jump_list **fallbacks)
2461 {
2462 DEFINE_COMPILER;
2463 jump_list *found = NULL;
2464 jump_list **list = (*cc & XCL_NOT) == 0 ? &found : fallbacks;
2465 unsigned int c;
2466 int compares;
2467 struct sljit_jump *jump = NULL;
2468 pcre_uchar *ccbegin;
2469 #ifdef SUPPORT_UCP
2470 BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE;
2471 BOOL charsaved = FALSE;
2472 int typereg = TMP1, scriptreg = TMP1;
2473 unsigned int typeoffset;
2474 #endif
2475 int invertcmp, numberofcmps;
2476 unsigned int charoffset;
2477
2478 /* Although SUPPORT_UTF8 must be defined, we are not necessary in utf8 mode. */
2479 check_input_end(common, fallbacks);
2480 read_char(common);
2481
2482 if ((*cc++ & XCL_MAP) != 0)
2483 {
2484 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
2485 if (common->utf8)
2486 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
2487
2488 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
2489 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
2490 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)cc);
2491 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
2492 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
2493 add_jump(compiler, list, JUMP(SLJIT_C_NOT_ZERO));
2494
2495 if (common->utf8)
2496 JUMPHERE(jump);
2497 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
2498 #ifdef SUPPORT_UCP
2499 charsaved = TRUE;
2500 #endif
2501 cc += 32;
2502 }
2503
2504 /* Scanning the necessary info. */
2505 ccbegin = cc;
2506 compares = 0;
2507 while (*cc != XCL_END)
2508 {
2509 compares++;
2510 if (*cc == XCL_SINGLE)
2511 {
2512 cc += 2;
2513 #ifdef SUPPORT_UTF8
2514 if (common->utf8 && cc[-1] >= 0xc0) cc += PRIV(utf8_table4)[cc[-1] & 0x3f];
2515 #endif
2516 #ifdef SUPPORT_UCP
2517 needschar = TRUE;
2518 #endif
2519 }
2520 else if (*cc == XCL_RANGE)
2521 {
2522 cc += 2;
2523 #ifdef SUPPORT_UTF8
2524 if (common->utf8 && cc[-1] >= 0xc0) cc += PRIV(utf8_table4)[cc[-1] & 0x3f];
2525 #endif
2526 cc++;
2527 #ifdef SUPPORT_UTF8
2528 if (common->utf8 && cc[-1] >= 0xc0) cc += PRIV(utf8_table4)[cc[-1] & 0x3f];
2529 #endif
2530 #ifdef SUPPORT_UCP
2531 needschar = TRUE;
2532 #endif
2533 }
2534 #ifdef SUPPORT_UCP
2535 else
2536 {
2537 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
2538 cc++;
2539 switch(*cc)
2540 {
2541 case PT_ANY:
2542 break;
2543
2544 case PT_LAMP:
2545 case PT_GC:
2546 case PT_PC:
2547 case PT_ALNUM:
2548 needstype = TRUE;
2549 break;
2550
2551 case PT_SC:
2552 needsscript = TRUE;
2553 break;
2554
2555 case PT_SPACE:
2556 case PT_PXSPACE:
2557 case PT_WORD:
2558 needstype = TRUE;
2559 needschar = TRUE;
2560 break;
2561
2562 default:
2563 SLJIT_ASSERT_STOP();
2564 break;
2565 }
2566 cc += 2;
2567 }
2568 #endif
2569 }
2570
2571 #ifdef SUPPORT_UCP
2572 /* Simple register allocation. TMP1 is preferred if possible. */
2573 if (needstype || needsscript)
2574 {
2575 if (needschar && !charsaved)
2576 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
2577 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
2578 if (needschar)
2579 {
2580 if (needstype)
2581 {
2582 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
2583 typereg = RETURN_ADDR;
2584 }
2585
2586 if (needsscript)
2587 scriptreg = TMP3;
2588 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
2589 }
2590 else if (needstype && needsscript)
2591 scriptreg = TMP3;
2592 /* In all other cases only one of them was specified, and that can goes to TMP1. */
2593
2594 if (needsscript)
2595 {
2596 if (scriptreg == TMP1)
2597 {
2598 OP1(SLJIT_MOV, scriptreg, 0, SLJIT_IMM, (sljit_w)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
2599 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM2(scriptreg, TMP2), 3);
2600 }
2601 else
2602 {
2603 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
2604 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_w)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
2605 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM1(TMP2), 0);
2606 }
2607 }
2608 }
2609 #endif
2610
2611 /* Generating code. */
2612 cc = ccbegin;
2613 charoffset = 0;
2614 numberofcmps = 0;
2615 #ifdef SUPPORT_UCP
2616 typeoffset = 0;
2617 #endif
2618
2619 while (*cc != XCL_END)
2620 {
2621 compares--;
2622 invertcmp = (compares == 0 && list != fallbacks);
2623 jump = NULL;
2624
2625 if (*cc == XCL_SINGLE)
2626 {
2627 cc ++;
2628 #ifdef SUPPORT_UTF8
2629 if (common->utf8)
2630 {
2631 GETCHARINC(c, cc);
2632 }
2633 else
2634 #endif
2635 c = *cc++;
2636
2637 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
2638 {
2639 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
2640 COND_VALUE(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
2641 numberofcmps++;
2642 }
2643 else if (numberofcmps > 0)
2644 {
2645 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
2646 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
2647 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
2648 numberofcmps = 0;
2649 }
2650 else
2651 {
2652 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, c - charoffset);
2653 numberofcmps = 0;
2654 }
2655 }
2656 else if (*cc == XCL_RANGE)
2657 {
2658 cc ++;
2659 #ifdef SUPPORT_UTF8
2660 if (common->utf8)
2661 {
2662 GETCHARINC(c, cc);
2663 }
2664 else
2665 #endif
2666 c = *cc++;
2667 SET_CHAR_OFFSET(c);
2668 #ifdef SUPPORT_UTF8
2669 if (common->utf8)
2670 {
2671 GETCHARINC(c, cc);
2672 }
2673 else
2674 #endif
2675 c = *cc++;
2676 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
2677 {
2678 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
2679 COND_VALUE(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_C_LESS_EQUAL);
2680 numberofcmps++;
2681 }
2682 else if (numberofcmps > 0)
2683 {
2684 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
2685 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_LESS_EQUAL);
2686 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
2687 numberofcmps = 0;
2688 }
2689 else
2690 {
2691 jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, c - charoffset);
2692 numberofcmps = 0;
2693 }
2694 }
2695 #ifdef SUPPORT_UCP
2696 else
2697 {
2698 if (*cc == XCL_NOTPROP)
2699 invertcmp ^= 0x1;
2700 cc++;
2701 switch(*cc)
2702 {
2703 case PT_ANY:
2704 if (list != fallbacks)
2705 {
2706 if ((cc[-1] == XCL_NOTPROP && compares > 0) || (cc[-1] == XCL_PROP && compares == 0))
2707 continue;
2708 }
2709 else if (cc[-1] == XCL_NOTPROP)
2710 continue;
2711 jump = JUMP(SLJIT_JUMP);
2712 break;
2713
2714 case PT_LAMP:
2715 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - typeoffset);
2716 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
2717 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ll - typeoffset);
2718 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
2719 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lt - typeoffset);
2720 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
2721 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
2722 break;
2723
2724 case PT_GC:
2725 c = PRIV(ucp_typerange)[(int)cc[1] * 2];
2726 SET_TYPE_OFFSET(c);
2727 jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, PRIV(ucp_typerange)[(int)cc[1] * 2 + 1] - c);
2728 break;
2729
2730 case PT_PC:
2731 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, (int)cc[1] - typeoffset);
2732 break;
2733
2734 case PT_SC:
2735 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, scriptreg, 0, SLJIT_IMM, (int)cc[1]);
2736 break;
2737
2738 case PT_SPACE:
2739 case PT_PXSPACE:
2740 if (*cc == PT_SPACE)
2741 {
2742 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
2743 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 11 - charoffset);
2744 }
2745 SET_CHAR_OFFSET(9);
2746 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 13 - 9);
2747 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);
2748 if (*cc == PT_SPACE)
2749 JUMPHERE(jump);
2750
2751 SET_TYPE_OFFSET(ucp_Zl);
2752 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl);
2753 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_LESS_EQUAL);
2754 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
2755 break;
2756
2757 case PT_WORD:
2758 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE - charoffset);
2759 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
2760 /* ... fall through */
2761
2762 case PT_ALNUM:
2763 SET_TYPE_OFFSET(ucp_Ll);
2764 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
2765 COND_VALUE((*cc == PT_ALNUM) ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_C_LESS_EQUAL);
2766 SET_TYPE_OFFSET(ucp_Nd);
2767 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_No - ucp_Nd);
2768 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_LESS_EQUAL);
2769 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
2770 break;
2771 }
2772 cc += 2;
2773 }
2774 #endif
2775
2776 if (jump != NULL)
2777 add_jump(compiler, compares > 0 ? list : fallbacks, jump);
2778 }
2779
2780 if (found != NULL)
2781 set_jumps(found, LABEL());
2782 }
2783
2784 #undef SET_TYPE_OFFSET
2785 #undef SET_CHAR_OFFSET
2786
2787 #endif
2788
2789 static pcre_uchar *compile_char1_hotpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **fallbacks)
2790 {
2791 DEFINE_COMPILER;
2792 int length;
2793 unsigned int c, oc, bit;
2794 compare_context context;
2795 struct sljit_jump *jump[4];
2796 #ifdef SUPPORT_UTF8
2797 struct sljit_label *label;
2798 #ifdef SUPPORT_UCP
2799 pcre_uchar propdata[5];
2800 #endif
2801 #endif
2802
2803 switch(type)
2804 {
2805 case OP_SOD:
2806 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
2807 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
2808 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
2809 return cc;
2810
2811 case OP_SOM:
2812 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
2813 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
2814 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
2815 return cc;
2816
2817 case OP_NOT_WORD_BOUNDARY:
2818 case OP_WORD_BOUNDARY:
2819 add_jump(compiler, &common->wordboundary, JUMP(SLJIT_FAST_CALL));
2820 add_jump(compiler, fallbacks, JUMP(type == OP_NOT_WORD_BOUNDARY ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
2821 return cc;
2822
2823 case OP_NOT_DIGIT:
2824 case OP_DIGIT:
2825 check_input_end(common, fallbacks);
2826 read_char8_type(common);
2827 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit);
2828 add_jump(compiler, fallbacks, JUMP(type == OP_DIGIT ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
2829 return cc;
2830
2831 case OP_NOT_WHITESPACE:
2832 case OP_WHITESPACE:
2833 check_input_end(common, fallbacks);
2834 read_char8_type(common);
2835 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space);
2836 add_jump(compiler, fallbacks, JUMP(type == OP_WHITESPACE ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
2837 return cc;
2838
2839 case OP_NOT_WORDCHAR:
2840 case OP_WORDCHAR:
2841 check_input_end(common, fallbacks);
2842 read_char8_type(common);
2843 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word);
2844 add_jump(compiler, fallbacks, JUMP(type == OP_WORDCHAR ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
2845 return cc;
2846
2847 case OP_ANY:
2848 check_input_end(common, fallbacks);
2849 read_char(common);
2850 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
2851 {
2852 jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
2853 jump[1] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2854 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2855 add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff));
2856 JUMPHERE(jump[1]);
2857 JUMPHERE(jump[0]);
2858 }
2859 else
2860 check_newlinechar(common, common->nltype, fallbacks, TRUE);
2861 return cc;
2862
2863 case OP_ALLANY:
2864 check_input_end(common, fallbacks);
2865 #ifdef SUPPORT_UTF8
2866 if (common->utf8)
2867 {
2868 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2869 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);
2870 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2871 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0);
2872 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2873 JUMPHERE(jump[0]);
2874 return cc;
2875 }
2876 #endif
2877 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);
2878 return cc;
2879
2880 case OP_ANYBYTE:
2881 check_input_end(common, fallbacks);
2882 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);
2883 return cc;
2884
2885 #ifdef SUPPORT_UTF8
2886 #ifdef SUPPORT_UCP
2887 case OP_NOTPROP:
2888 case OP_PROP:
2889 propdata[0] = 0;
2890 propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP;
2891 propdata[2] = cc[0];
2892 propdata[3] = cc[1];
2893 propdata[4] = XCL_END;
2894 compile_xclass_hotpath(common, propdata, fallbacks);
2895 return cc + 2;
2896 #endif
2897 #endif
2898
2899 case OP_ANYNL:
2900 check_input_end(common, fallbacks);
2901 read_char(common);
2902 jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
2903 jump[1] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2904 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2905 jump[2] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
2906 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);
2907 jump[3] = JUMP(SLJIT_JUMP);
2908 JUMPHERE(jump[0]);
2909 check_newlinechar(common, common->bsr_nltype, fallbacks, FALSE);
2910 JUMPHERE(jump[1]);
2911 JUMPHERE(jump[2]);
2912 JUMPHERE(jump[3]);
2913 return cc;
2914
2915 case OP_NOT_HSPACE:
2916 case OP_HSPACE:
2917 check_input_end(common, fallbacks);
2918 read_char(common);
2919 add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));
2920 add_jump(compiler, fallbacks, JUMP(type == OP_NOT_HSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
2921 return cc;
2922
2923 case OP_NOT_VSPACE:
2924 case OP_VSPACE:
2925 check_input_end(common, fallbacks);
2926 read_char(common);
2927 add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));
2928 add_jump(compiler, fallbacks, JUMP(type == OP_NOT_VSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
2929 return cc;
2930
2931 #ifdef SUPPORT_UCP
2932 case OP_EXTUNI:
2933 check_input_end(common, fallbacks);
2934 read_char(common);
2935 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
2936 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Mc);
2937 add_jump(compiler, fallbacks, CMP(SLJIT_C_LESS_EQUAL, TMP1, 0, SLJIT_IMM, ucp_Mn - ucp_Mc));
2938
2939 label = LABEL();
2940 jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2941 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
2942 read_char(common);
2943 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
2944 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Mc);
2945 CMPTO(SLJIT_C_LESS_EQUAL, TMP1, 0, SLJIT_IMM, ucp_Mn - ucp_Mc, label);
2946
2947 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
2948 JUMPHERE(jump[0]);
2949 return cc;
2950 #endif
2951
2952 case OP_EODN:
2953 jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2954 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
2955 {
2956 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, 2);
2957 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2958 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0));
2959 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(STR_PTR), 1);
2960 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
2961 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
2962 }
2963 else if (common->nltype == NLTYPE_FIXED)
2964 {
2965 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, 1);
2966 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2967 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0));
2968 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
2969 }
2970 else
2971 {
2972 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2973 jump[1] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
2974 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, 2);
2975 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
2976 jump[2] = JUMP(SLJIT_C_GREATER);
2977 add_jump(compiler, fallbacks, JUMP(SLJIT_C_LESS));
2978 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 1);
2979 jump[3] = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
2980 add_jump(compiler, fallbacks, JUMP(SLJIT_JUMP));
2981
2982 JUMPHERE(jump[1]);
2983 if (common->nltype == NLTYPE_ANYCRLF)
2984 {
2985 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, 1);
2986 add_jump(compiler, fallbacks, CMP(SLJIT_C_LESS, TMP2, 0, STR_END, 0));
2987 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
2988 }
2989 else
2990 {
2991 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, STR_PTR, 0);
2992 read_char(common);
2993 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
2994 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
2995 add_jump(compiler, fallbacks, JUMP(SLJIT_C_ZERO));
2996 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
2997 }
2998 JUMPHERE(jump[2]);
2999 JUMPHERE(jump[3]);
3000 }
3001 JUMPHERE(jump[0]);
3002 return cc;
3003
3004 case OP_EOD:
3005 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
3006 return cc;
3007
3008 case OP_CIRC:
3009 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
3010 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
3011 add_jump(compiler, fallbacks, CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0));
3012 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
3013 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
3014 return cc;
3015
3016 case OP_CIRCM:
3017 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
3018 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
3019 jump[1] = CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0);
3020 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
3021 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
3022 jump[0] = JUMP(SLJIT_JUMP);
3023 JUMPHERE(jump[1]);
3024
3025 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, end));
3026 add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP2, 0, STR_PTR, 0));
3027
3028 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3029 {
3030 OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, SLJIT_IMM, 2);
3031 add_jump(compiler, fallbacks, CMP(SLJIT_C_LESS, TMP2, 0, TMP1, 0));
3032 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), -2);
3033 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(STR_PTR), -1);
3034 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
3035 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
3036 }
3037 else
3038 {
3039 skip_char_back(common);
3040 read_char(common);
3041 check_newlinechar(common, common->nltype, fallbacks, FALSE);
3042 }
3043 JUMPHERE(jump[0]);
3044 return cc;
3045
3046 case OP_DOLL:
3047 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
3048 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
3049 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
3050
3051 if (!common->endonly)
3052 compile_char1_hotpath(common, OP_EODN, cc, fallbacks);
3053 else
3054 add_jump(compiler, fallbacks, CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0));
3055 return cc;
3056
3057 case OP_DOLLM:
3058 jump[1] = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
3059 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
3060 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
3061 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
3062 jump[0] = JUMP(SLJIT_JUMP);
3063 JUMPHERE(jump[1]);
3064
3065 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3066 {
3067 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, 2);
3068 add_jump(compiler, fallbacks, CMP(SLJIT_C_GREATER, TMP2, 0, STR_END, 0));
3069 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3070 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(STR_PTR), 1);
3071 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
3072 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
3073 }
3074 else
3075 {
3076 peek_char(common);
3077 check_newlinechar(common, common->nltype, fallbacks, FALSE);
3078 }
3079 JUMPHERE(jump[0]);
3080 return cc;
3081
3082 case OP_CHAR:
3083 case OP_CHARI:
3084 length = IN_UCHARS(1);
3085 #ifdef SUPPORT_UTF8
3086 if (common->utf8 && *cc >= 0xc0) length += PRIV(utf8_table4)[*cc & 0x3f];
3087 #endif
3088 if (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0)
3089 {
3090 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, length);
3091 add_jump(compiler, fallbacks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
3092
3093 context.length = length;
3094 context.sourcereg = -1;
3095 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
3096 context.ucharptr = 0;
3097 #endif
3098 return byte_sequence_compare(common, type == OP_CHARI, cc, &context, fallbacks);
3099 }
3100 check_input_end(common, fallbacks);
3101 read_char(common);
3102 #ifdef SUPPORT_UTF8
3103 if (common->utf8)
3104 {
3105 GETCHAR(c, cc);
3106 }
3107 else
3108 #endif
3109 c = *cc;
3110 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c);
3111 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
3112 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, char_othercase(common, c));
3113 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
3114 add_jump(compiler, fallbacks, JUMP(SLJIT_C_ZERO));
3115 return cc + length;
3116
3117 case OP_NOT:
3118 case OP_NOTI:
3119 length = 1;
3120 #ifdef SUPPORT_UTF8
3121 if (common->utf8)
3122 {
3123 if (*cc >= 0xc0) length += PRIV(utf8_table4)[*cc & 0x3f];
3124
3125 check_input_end(common, fallbacks);
3126 GETCHAR(c, cc);
3127
3128 if (c <= 127)
3129 {
3130 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3131 if (type == OP_NOT || !char_has_othercase(common, cc))
3132 add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
3133 else
3134 {
3135 /* Since UTF8 code page is fixed, we know that c is in [a-z] or [A-Z] range. */
3136 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x20);
3137 add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20));
3138 }
3139 /* Skip the variable-length character. */
3140 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);
3141 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
3142 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0);
3143 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3144 JUMPHERE(jump[0]);
3145 return cc + length;
3146 }
3147 else
3148 read_char(common);
3149 }
3150 else
3151 #endif
3152 {
3153 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);
3154 add_jump(compiler, fallbacks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
3155 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), -1);
3156 c = *cc;
3157 }
3158
3159 if (type == OP_NOT || !char_has_othercase(common, cc))
3160 add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
3161 else
3162 {
3163 oc = char_othercase(common, c);
3164 bit = c ^ oc;
3165 if (ispowerof2(bit))
3166 {
3167 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
3168 add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
3169 }
3170 else
3171 {
3172 add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
3173 add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, oc));
3174 }
3175 }
3176 return cc + length;
3177
3178 case OP_CLASS:
3179 case OP_NCLASS:
3180 check_input_end(common, fallbacks);
3181 read_char(common);
3182 #ifdef SUPPORT_UTF8
3183 jump[0] = NULL;
3184 if (common->utf8)
3185 {
3186 jump[0] = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3187 if (type == OP_CLASS)
3188 {
3189 add_jump(compiler, fallbacks, jump[0]);
3190 jump[0] = NULL;
3191 }
3192 }
3193 #endif
3194 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
3195 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
3196 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)cc);
3197 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
3198 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
3199 add_jump(compiler, fallbacks, JUMP(SLJIT_C_ZERO));
3200 #ifdef SUPPORT_UTF8
3201 if (jump[0] != NULL)
3202 JUMPHERE(jump[0]);
3203 #endif
3204 return cc + 32;
3205
3206 #ifdef SUPPORT_UTF8
3207 case OP_XCLASS:
3208 compile_xclass_hotpath(common, cc + LINK_SIZE, fallbacks);
3209 return cc + GET(cc, 0) - 1;
3210 #endif
3211
3212 case OP_REVERSE:
3213 length = GET(cc, 0);
3214 SLJIT_ASSERT(length > 0);
3215 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3216 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
3217 #ifdef SUPPORT_UTF8
3218 if (common->utf8)
3219 {
3220 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, length);
3221 label = LABEL();
3222 add_jump(compiler, fallbacks, CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP1, 0));
3223 skip_char_back(common);
3224 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
3225 JUMPTO(SLJIT_C_NOT_ZERO, label);
3226 return cc + LINK_SIZE;
3227 }
3228 #endif
3229 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, length);
3230 add_jump(compiler, fallbacks, CMP(SLJIT_C_LESS, STR_PTR, 0, TMP1, 0));
3231 return cc + LINK_SIZE;
3232 }
3233 SLJIT_ASSERT_STOP();
3234 return cc;
3235 }
3236
3237 static SLJIT_INLINE pcre_uchar *compile_charn_hotpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, jump_list **fallbacks)
3238 {
3239 /* This function consumes at least one input character. */
3240 /* To decrease the number of length checks, we try to concatenate the fixed length character sequences. */
3241 DEFINE_COMPILER;
3242 pcre_uchar *ccbegin = cc;
3243 compare_context context;
3244 int size;
3245
3246 context.length = 0;
3247 do
3248 {
3249 if (cc >= ccend)
3250 break;
3251
3252 if (*cc == OP_CHAR)
3253 {
3254 size = 1;
3255 #ifdef SUPPORT_UTF8
3256 if (common->utf8 && cc[1] >= 0xc0)
3257 size += PRIV(utf8_table4)[cc[1] & 0x3f];
3258 #endif
3259 }
3260 else if (*cc == OP_CHARI)
3261 {
3262 size = 1;
3263 #ifdef SUPPORT_UTF8
3264 if (common->utf8)
3265 {
3266 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
3267 size = 0;
3268 else if (cc[1] >= 0xc0)
3269 size += PRIV(utf8_table4)[cc[1] & 0x3f];
3270 }
3271 else
3272 #endif
3273 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
3274 size = 0;
3275 }
3276 else
3277 size = 0;
3278
3279 cc += 1 + size;
3280 context.length += IN_UCHARS(size);
3281 }
3282 while (size > 0 && context.length <= 128);
3283
3284 cc = ccbegin;
3285 if (context.length > 0)
3286 {
3287 /* We have a fixed-length byte sequence. */
3288 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, context.length);
3289 add_jump(compiler, fallbacks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
3290
3291 context.sourcereg = -1;
3292 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
3293 context.ucharptr = 0;
3294 #endif
3295 do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, fallbacks); while (context.length > 0);
3296 sljit_emit_op0(compiler, SLJIT_NOP);
3297 return cc;
3298 }
3299
3300 /* A non-fixed length character will be checked if length == 0. */
3301 return compile_char1_hotpath(common, *cc, cc + 1, fallbacks);
3302 }
3303
3304 static struct sljit_jump *compile_ref_checks(compiler_common *common, pcre_uchar *cc, jump_list **fallbacks)
3305 {
3306 DEFINE_COMPILER;
3307 int offset = GET2(cc, 1) << 1;
3308
3309 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
3310 if (!common->jscript_compat)
3311 {
3312 if (fallbacks == NULL)
3313 {
3314 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
3315 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
3316 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
3317 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
3318 return JUMP(SLJIT_C_NOT_ZERO);
3319 }
3320 add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
3321 }
3322 return CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
3323 }
3324
3325 /* Forward definitions. */
3326 static void compile_hotpath(compiler_common *, pcre_uchar *, pcre_uchar *, fallback_common *);
3327 static void compile_fallbackpath(compiler_common *, struct fallback_common *);
3328
3329 #define PUSH_FALLBACK(size, ccstart, error) \
3330 do \
3331 { \
3332 fallback = sljit_alloc_memory(compiler, (size)); \
3333 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
3334 return error; \
3335 memset(fallback, 0, size); \
3336 fallback->prev = parent->top; \
3337 fallback->cc = (ccstart); \
3338 parent->top = fallback; \
3339 } \
3340 while (0)
3341
3342 #define PUSH_FALLBACK_NOVALUE(size, ccstart) \
3343 do \
3344 { \
3345 fallback = sljit_alloc_memory(compiler, (size)); \
3346 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
3347 return; \
3348 memset(fallback, 0, size); \
3349 fallback->prev = parent->top; \
3350 fallback->cc = (ccstart); \
3351 parent->top = fallback; \
3352 } \
3353 while (0)
3354
3355 #define FALLBACK_AS(type) ((type*)fallback)
3356
3357 static pcre_uchar *compile_ref_hotpath(compiler_common *common, pcre_uchar *cc, jump_list **fallbacks, BOOL withchecks, BOOL emptyfail)
3358 {
3359 DEFINE_COMPILER;
3360 int offset = GET2(cc, 1) << 1;
3361 struct sljit_jump *jump = NULL;
3362
3363 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
3364 if (withchecks && !common->jscript_compat)
3365 add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
3366
3367 #ifdef SUPPORT_UTF8
3368 #ifdef SUPPORT_UCP
3369 if (common->utf8 && *cc == OP_REFI)
3370 {
3371 SLJIT_ASSERT(TMP1 == SLJIT_TEMPORARY_REG1 && STACK_TOP == SLJIT_TEMPORARY_REG2 && TMP2 == SLJIT_TEMPORARY_REG3);
3372 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
3373 if (withchecks)
3374 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, TMP2, 0);
3375
3376 /* Needed to save important temporary registers. */
3377 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
3378 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG2, 0, ARGUMENTS, 0);
3379 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), SLJIT_OFFSETOF(jit_arguments, ptr), STR_PTR, 0);
3380 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_utf_caselesscmp));
3381 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3382 add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
3383 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
3384 }
3385 else
3386 #endif
3387 #endif
3388 {
3389 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP1, 0);
3390 if (withchecks)
3391 jump = JUMP(SLJIT_C_ZERO);
3392 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3393
3394 add_jump(compiler, fallbacks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
3395 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
3396 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
3397 }
3398
3399 if (jump != NULL)
3400 {
3401 if (emptyfail)
3402 add_jump(compiler, fallbacks, jump);
3403 else
3404 JUMPHERE(jump);
3405 }
3406 return cc + 3;
3407 }
3408
3409 static SLJIT_INLINE pcre_uchar *compile_ref_iterator_hotpath(compiler_common *common, pcre_uchar *cc, fallback_common *parent)
3410 {
3411 DEFINE_COMPILER;
3412 fallback_common *fallback;
3413 pcre_uchar type;
3414 struct sljit_label *label;
3415 struct sljit_jump *zerolength;
3416 struct sljit_jump *jump = NULL;
3417 pcre_uchar *ccbegin = cc;
3418 int min = 0, max = 0;
3419 BOOL minimize;
3420
3421 PUSH_FALLBACK(sizeof(iterator_fallback), cc, NULL);
3422
3423 type = cc[3];
3424 minimize = (type & 0x1) != 0;
3425 switch(type)
3426 {
3427 case OP_CRSTAR:
3428 case OP_CRMINSTAR:
3429 min = 0;
3430 max = 0;
3431 cc += 4;
3432 break;
3433 case OP_CRPLUS:
3434 case OP_CRMINPLUS:
3435 min = 1;
3436 max = 0;
3437 cc += 4;
3438 break;
3439 case OP_CRQUERY:
3440 case OP_CRMINQUERY:
3441 min = 0;
3442 max = 1;
3443 cc += 4;
3444 break;
3445 case OP_CRRANGE:
3446 case OP_CRMINRANGE:
3447 min = GET2(cc, 3 + 1);
3448 max = GET2(cc, 3 + 3);
3449 cc += 8;
3450 break;
3451 default:
3452 SLJIT_ASSERT_STOP();
3453 break;
3454 }
3455
3456 if (!minimize)
3457 {
3458 if (min == 0)
3459 {
3460 allocate_stack(common, 2);
3461 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
3462 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
3463 /* Temporary release of STR_PTR. */
3464 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
3465 zerolength = compile_ref_checks(common, ccbegin, NULL);
3466 /* Restore if not zero length. */
3467 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
3468 }
3469 else
3470 {
3471 allocate_stack(common, 1);
3472 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
3473 zerolength = compile_ref_checks(common, ccbegin, &fallback->topfallbacks);
3474 }
3475
3476 if (min > 1 || max > 1)
3477 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 0);
3478
3479 label = LABEL();
3480 compile_ref_hotpath(common, ccbegin, &fallback->topfallbacks, FALSE, FALSE);
3481
3482 if (min > 1 || max > 1)
3483 {
3484 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
3485 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3486 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0);
3487 if (min > 1)
3488 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, label);
3489 if (max > 1)
3490 {
3491 jump = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max);
3492 allocate_stack(common, 1);
3493 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
3494 JUMPTO(SLJIT_JUMP, label);
3495 JUMPHERE(jump);
3496 }
3497 }
3498
3499 if (max == 0)
3500 {
3501 /* Includes min > 1 case as well. */
3502 allocate_stack(common, 1);
3503 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
3504 JUMPTO(SLJIT_JUMP, label);
3505 }
3506
3507 JUMPHERE(zerolength);
3508 FALLBACK_AS(iterator_fallback)->hotpath = LABEL();
3509
3510 decrease_call_count(common);
3511 return cc;
3512 }
3513
3514 allocate_stack(common, 2);
3515 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
3516 if (type != OP_CRMINSTAR)
3517 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
3518
3519 if (min == 0)
3520 {
3521 zerolength = compile_ref_checks(common, ccbegin, NULL);
3522 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
3523 jump = JUMP(SLJIT_JUMP);
3524 }
3525 else
3526 zerolength = compile_ref_checks(common, ccbegin, &fallback->topfallbacks);
3527
3528 FALLBACK_AS(iterator_fallback)->hotpath = LABEL();
3529 if (max > 0)
3530 add_jump(compiler, &fallback->topfallbacks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max));
3531
3532 compile_ref_hotpath(common, ccbegin, &fallback->topfallbacks, TRUE, TRUE);
3533 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
3534
3535 if (min > 1)
3536 {
3537 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
3538 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3539 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
3540 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, FALLBACK_AS(iterator_fallback)->hotpath);
3541 }
3542 else if (max > 0)
3543 OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
3544
3545 if (jump != NULL)
3546 JUMPHERE(jump);
3547 JUMPHERE(zerolength);
3548
3549 decrease_call_count(common);
3550 return cc;
3551 }
3552
3553 static SLJIT_INLINE pcre_uchar *compile_recurse_hotpath(compiler_common *common, pcre_uchar *cc, fallback_common *parent)
3554 {
3555 DEFINE_COMPILER;
3556 fallback_common *fallback;
3557 recurse_entry *entry = common->entries;
3558 recurse_entry *prev = NULL;
3559 int start = GET(cc, 1);
3560
3561 PUSH_FALLBACK(sizeof(recurse_fallback), cc, NULL);
3562 while (entry != NULL)
3563 {
3564 if (entry->start == start)
3565 break;
3566 prev = entry;
3567 entry = entry->next;
3568 }
3569
3570 if (entry == NULL)
3571 {
3572 entry = sljit_alloc_memory(compiler, sizeof(recurse_entry));
3573 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
3574 return NULL;
3575 entry->next = NULL;
3576 entry->entry = NULL;
3577 entry->calls = NULL;
3578 entry->start = start;
3579
3580 if (prev != NULL)
3581 prev->next = entry;
3582 else
3583 common->entries = entry;
3584 }
3585
3586 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
3587 allocate_stack(common, 1);
3588 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
3589
3590 if (entry->entry == NULL)
3591 add_jump(compiler, &entry->calls, JUMP(SLJIT_FAST_CALL));
3592 else
3593 JUMPTO(SLJIT_FAST_CALL, entry->entry);
3594 /* Leave if the match is failed. */
3595 add_jump(compiler, &fallback->topfallbacks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0));
3596 return cc + 1 + LINK_SIZE;
3597 }
3598
3599 static pcre_uchar *compile_assert_hotpath(compiler_common *common, pcre_uchar *cc, assert_fallback *fallback, BOOL conditional)
3600 {
3601 DEFINE_COMPILER;
3602 int framesize;
3603 int localptr;
3604 fallback_common altfallback;
3605 pcre_uchar *ccbegin;
3606 pcre_uchar opcode;
3607 pcre_uchar bra = OP_BRA;
3608 jump_list *tmp = NULL;
3609 jump_list **target = (conditional) ? &fallback->condfailed : &fallback->common.topfallbacks;
3610 jump_list **found;
3611 /* Saving previous accept variables. */
3612 struct sljit_label *save_acceptlabel = common->acceptlabel;
3613 struct sljit_jump *jump;
3614 struct sljit_jump *brajump = NULL;
3615 jump_list *save_accept = common->accept;
3616
3617 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
3618 {
3619 SLJIT_ASSERT(!conditional);
3620 bra = *cc;
3621 cc++;
3622 }
3623 localptr = PRIV_DATA(cc);
3624 SLJIT_ASSERT(localptr != 0);
3625 framesize = get_framesize(common, cc, FALSE);
3626 fallback->framesize = framesize;
3627 fallback->localptr = localptr;
3628 opcode = *cc;
3629 SLJIT_ASSERT(opcode >= OP_ASSERT && opcode <= OP_ASSERTBACK_NOT);
3630 found = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) ? &tmp : target;
3631 ccbegin = cc;
3632 cc += GET(cc, 1);
3633
3634 if (bra == OP_BRAMINZERO)
3635 {
3636 /* This is a braminzero fallback path. */
3637 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
3638 free_stack(common, 1);
3639 brajump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
3640 }
3641
3642 if (framesize < 0)
3643 {
3644 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STACK_TOP, 0);
3645 allocate_stack(common, 1);
3646 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
3647 }
3648 else
3649 {
3650 allocate_stack(common, framesize + 2);
3651 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
3652 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(framesize + 1));
3653 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, TMP2, 0);
3654 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
3655 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
3656 init_frame(common, ccbegin, framesize + 1, 2, FALSE);
3657 }
3658
3659 memset(&altfallback, 0, sizeof(fallback_common));
3660 while (1)
3661 {
3662 common->acceptlabel = NULL;
3663 common->accept = NULL;
3664 altfallback.top = NULL;
3665 altfallback.topfallbacks = NULL;
3666
3667 if (*ccbegin == OP_ALT)
3668 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
3669
3670 altfallback.cc = ccbegin;
3671 compile_hotpath(common, ccbegin + 1 + LINK_SIZE, cc, &altfallback);
3672 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
3673 {
3674 common->acceptlabel = save_acceptlabel;
3675 common->accept = save_accept;
3676 return NULL;
3677 }
3678 common->acceptlabel = LABEL();
3679 if (common->accept != NULL)
3680 set_jumps(common->accept, common->acceptlabel);
3681
3682 /* Reset stack. */
3683 if (framesize < 0)
3684 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
3685 else {
3686 if ((opcode != OP_ASSERT_NOT && opcode != OP_ASSERTBACK_NOT) || conditional)
3687 {
3688 /* We don't need to keep the STR_PTR, only the previous localptr. */
3689 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_w));
3690 }
3691 else
3692 {
3693 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
3694 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
3695 }
3696 }
3697
3698 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
3699 {
3700 /* We know that STR_PTR was stored on the top of the stack. */
3701 if (conditional)
3702 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
3703 else if (bra == OP_BRAZERO)
3704 {
3705 if (framesize < 0)
3706 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
3707 else
3708 {
3709 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_w));
3710 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (framesize + 1) * sizeof(sljit_w));
3711 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, TMP1, 0);
3712 }
3713 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
3714 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
3715 }
3716 else if (framesize >= 0)
3717 {
3718 /* For OP_BRA and OP_BRAMINZERO. */
3719 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_w));
3720 }
3721 }
3722 add_jump(compiler, found, JUMP(SLJIT_JUMP));
3723
3724 compile_fallbackpath(common, altfallback.top);
3725 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
3726 {
3727 common->acceptlabel = save_acceptlabel;
3728 common->accept = save_accept;
3729 return NULL;
3730 }
3731 set_jumps(altfallback.topfallbacks, LABEL());
3732
3733 if (*cc != OP_ALT)
3734 break;
3735
3736 ccbegin = cc;
3737 cc += GET(cc, 1);
3738 }
3739 /* None of them matched. */
3740
3741 if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK)
3742 {
3743 /* Assert is failed. */
3744 if (conditional || bra == OP_BRAZERO)
3745 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
3746
3747 if (framesize < 0)
3748 {
3749 /* The topmost item should be 0. */
3750 if (bra == OP_BRAZERO)
3751 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
3752 else
3753 free_stack(common, 1);
3754 }
3755 else
3756 {
3757 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
3758 /* The topmost item should be 0. */
3759 if (bra == OP_BRAZERO)
3760 {
3761 free_stack(common, framesize + 1);
3762 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
3763 }
3764 else
3765 free_stack(common, framesize + 2);
3766 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, TMP1, 0);
3767 }
3768 jump = JUMP(SLJIT_JUMP);
3769 if (bra != OP_BRAZERO)
3770 add_jump(compiler, target, jump);
3771
3772 /* Assert is successful. */
3773 set_jumps(tmp, LABEL());
3774 if (framesize < 0)
3775 {
3776 /* We know that STR_PTR was stored on the top of the stack. */
3777 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
3778 /* Keep the STR_PTR on the top of the stack. */
3779 if (bra == OP_BRAZERO)
3780 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
3781 else if (bra == OP_BRAMINZERO)
3782 {
3783 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
3784 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
3785 }
3786 }
3787 else
3788 {
3789 if (bra == OP_BRA)
3790 {
3791 /* We don't need to keep the STR_PTR, only the previous localptr. */
3792 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_w));
3793 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
3794 }
3795 else
3796 {
3797 /* We don't need to keep the STR_PTR, only the previous localptr. */
3798 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_IMM, (framesize + 2) * sizeof(sljit_w));
3799 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
3800 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), bra == OP_BRAZERO ? STR_PTR : SLJIT_IMM, 0);
3801 }
3802 }
3803
3804 if (bra == OP_BRAZERO)
3805 {
3806 fallback->hotpath = LABEL();
3807 sljit_set_label(jump, fallback->hotpath);
3808 }
3809 else if (bra == OP_BRAMINZERO)
3810 {
3811 JUMPTO(SLJIT_JUMP, fallback->hotpath);
3812 JUMPHERE(brajump);
3813 if (framesize >= 0)
3814 {
3815 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
3816 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
3817 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_w));
3818 }
3819 set_jumps(fallback->common.topfallbacks, LABEL());
3820 }
3821 }
3822 else
3823 {
3824 /* AssertNot is successful. */
3825 if (framesize < 0)
3826 {
3827 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
3828 if (bra != OP_BRA)
3829 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
3830 else
3831 free_stack(common, 1);
3832 }
3833 else
3834 {
3835 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
3836 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
3837 /* The topmost item should be 0. */
3838 if (bra != OP_BRA)
3839 {
3840 free_stack(common, framesize + 1);
3841 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
3842 }
3843 else
3844 free_stack(common, framesize + 2);
3845 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, TMP1, 0);
3846 }
3847
3848 if (bra == OP_BRAZERO)
3849 fallback->hotpath = LABEL();
3850 else if (bra == OP_BRAMINZERO)
3851 {
3852 JUMPTO(SLJIT_JUMP, fallback->hotpath);
3853 JUMPHERE(brajump);
3854 }
3855
3856 if (bra != OP_BRA)
3857 {
3858 SLJIT_ASSERT(found == &fallback->common.topfallbacks);
3859 set_jumps(fallback->common.topfallbacks, LABEL());
3860 fallback->common.topfallbacks = NULL;
3861 }
3862 }
3863
3864 common->acceptlabel = save_acceptlabel;
3865 common->accept = save_accept;
3866 return cc + 1 + LINK_SIZE;
3867 }
3868
3869 static sljit_w SLJIT_CALL do_searchovector(sljit_w refno, sljit_w* locals, pcre_uchar *name_table)
3870 {
3871 int condition = FALSE;
3872 pcre_uchar *slotA = name_table;
3873 pcre_uchar *slotB;
3874 sljit_w name_count = locals[LOCALS0 / sizeof(sljit_w)];
3875 sljit_w name_entry_size = locals[LOCALS1 / sizeof(sljit_w)];
3876 sljit_w no_capture;
3877 int i;
3878
3879 locals += OVECTOR_START / sizeof(sljit_w);
3880 no_capture = locals[1];
3881
3882 for (i = 0; i < name_count; i++)
3883 {
3884 if (GET2(slotA, 0) == refno) break;
3885 slotA += name_entry_size;
3886 }
3887
3888 if (i < name_count)
3889 {
3890 /* Found a name for the number - there can be only one; duplicate names
3891 for different numbers are allowed, but not vice versa. First scan down
3892 for duplicates. */
3893
3894 slotB = slotA;
3895 while (slotB > name_table)
3896 {
3897 slotB -= name_entry_size;
3898 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
3899 {
3900 condition = locals[GET2(slotB, 0) << 1] != no_capture;
3901 if (condition) break;
3902 }
3903 else break;
3904 }
3905
3906 /* Scan up for duplicates */
3907 if (!condition)
3908 {
3909 slotB = slotA;
3910 for (i++; i < name_count; i++)
3911 {
3912 slotB += name_entry_size;
3913 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
3914 {
3915 condition = locals[GET2(slotB, 0) << 1] != no_capture;
3916 if (condition) break;
3917 }
3918 else break;
3919 }
3920 }
3921 }
3922 return condition;
3923 }
3924
3925 static sljit_w SLJIT_CALL do_searchgroups(sljit_w recno, sljit_w* locals, pcre_uchar *name_table)
3926 {
3927 int condition = FALSE;
3928 pcre_uchar *slotA = name_table;
3929 pcre_uchar *slotB;
3930 sljit_w name_count = locals[LOCALS0 / sizeof(sljit_w)];
3931 sljit_w name_entry_size = locals[LOCALS1 / sizeof(sljit_w)];
3932 sljit_w group_num = locals[POSSESSIVE0 / sizeof(sljit_w)];
3933 int i;
3934
3935 for (i = 0; i < name_count; i++)
3936 {
3937 if (GET2(slotA, 0) == recno) break;
3938 slotA += name_entry_size;
3939 }
3940
3941 if (i < name_count)
3942 {
3943 /* Found a name for the number - there can be only one; duplicate
3944 names for different numbers are allowed, but not vice versa. First
3945 scan down for duplicates. */
3946
3947 slotB = slotA;
3948 while (slotB > name_table)
3949 {
3950 slotB -= name_entry_size;
3951 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
3952 {
3953 condition = GET2(slotB, 0) == group_num;
3954 if (condition) break;
3955 }
3956 else break;
3957 }
3958
3959 /* Scan up for duplicates */
3960 if (!condition)
3961 {
3962 slotB = slotA;
3963 for (i++; i < name_count; i++)
3964 {
3965 slotB += name_entry_size;
3966 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
3967 {
3968 condition = GET2(slotB, 0) == group_num;
3969 if (condition) break;
3970 }
3971 else break;
3972 }
3973 }
3974 }
3975 return condition;
3976 }
3977
3978 /*
3979 Handling bracketed expressions is probably the most complex part.
3980
3981 Stack layout naming characters:
3982 S - Push the current STR_PTR
3983 0 - Push a 0 (NULL)
3984 A - Push the current STR_PTR. Needed for restoring the STR_PTR
3985 before the next alternative. Not pushed if there are no alternatives.
3986 M - Any values pushed by the current alternative. Can be empty, or anything.
3987 C - Push the previous OVECTOR(i), OVECTOR(i+1) and OVECTOR_PRIV(i) to the stack.
3988 L - Push the previous local (pointed by localptr) to the stack
3989 () - opional values stored on the stack
3990 ()* - optonal, can be stored multiple times
3991
3992 The following list shows the regular expression templates, their PCRE byte codes
3993 and stack layout supported by pcre-sljit.
3994
3995 (?:) OP_BRA | OP_KET A M
3996 () OP_CBRA | OP_KET C M
3997 (?:)+ OP_BRA | OP_KETRMAX 0 A M S ( A M S )*
3998 OP_SBRA | OP_KETRMAX 0 L M S ( L M S )*
3999 (?:)+? OP_BRA | OP_KETRMIN 0 A M S ( A M S )*
4000 OP_SBRA | OP_KETRMIN 0 L M S ( L M S )*
4001 ()+ OP_CBRA | OP_KETRMAX 0 C M S ( C M S )*
4002 OP_SCBRA | OP_KETRMAX 0 C M S ( C M S )*
4003 ()+? OP_CBRA | OP_KETRMIN 0 C M S ( C M S )*
4004 OP_SCBRA | OP_KETRMIN 0 C M S ( C M S )*
4005 (?:)? OP_BRAZERO | OP_BRA | OP_KET S ( A M 0 )
4006 (?:)?? OP_BRAMINZERO | OP_BRA | OP_KET S ( A M 0 )
4007 ()? OP_BRAZERO | OP_CBRA | OP_KET S ( C M 0 )
4008 ()?? OP_BRAMINZERO | OP_CBRA | OP_KET S ( C M 0 )
4009 (?:)* OP_BRAZERO | OP_BRA | OP_KETRMAX S 0 ( A M S )*
4010 OP_BRAZERO | OP_SBRA | OP_KETRMAX S 0 ( L M S )*
4011 (?:)*? OP_BRAMINZERO | OP_BRA | OP_KETRMIN S 0 ( A M S )*
4012 OP_BRAMINZERO | OP_SBRA | OP_KETRMIN S 0 ( L M S )*
4013 ()* OP_BRAZERO | OP_CBRA | OP_KETRMAX S 0 ( C M S )*
4014 OP_BRAZERO | OP_SCBRA | OP_KETRMAX S 0 ( C M S )*
4015 ()*? OP_BRAMINZERO | OP_CBRA | OP_KETRMIN S 0 ( C M S )*
4016 OP_BRAMINZERO | OP_SCBRA | OP_KETRMIN S 0 ( C M S )*
4017
4018
4019 Stack layout naming characters:
4020 A - Push the alternative index (starting from 0) on the stack.
4021 Not pushed if there is no alternatives.
4022 M - Any values pushed by the current alternative. Can be empty, or anything.
4023
4024 The next list shows the possible content of a bracket:
4025 (|) OP_*BRA | OP_ALT ... M A
4026 (?()|) OP_*COND | OP_ALT M A
4027 (?>|) OP_ONCE | OP_ALT ... [stack trace] M A
4028 (?>|) OP_ONCE_NC | OP_ALT ... [stack trace] M A
4029 Or nothing, if trace is unnecessary
4030 */
4031
4032 static pcre_uchar *compile_bracket_hotpath(compiler_common *common, pcre_uchar *cc, fallback_common *parent)
4033 {
4034 DEFINE_COMPILER;
4035 fallback_common *fallback;
4036 pcre_uchar opcode;
4037 int localptr = 0;
4038 int offset = 0;
4039 int stacksize;
4040 pcre_uchar *ccbegin;
4041 pcre_uchar *hotpath;
4042 pcre_uchar bra = OP_BRA;
4043 pcre_uchar ket;
4044 assert_fallback *assert;
4045 BOOL has_alternatives;
4046 struct sljit_jump *jump;
4047 struct sljit_jump *skip;
4048 struct sljit_label *rmaxlabel = NULL;
4049 struct sljit_jump *braminzerojump = NULL;
4050
4051 PUSH_FALLBACK(sizeof(bracket_fallback), cc, NULL);
4052
4053 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
4054 {
4055 bra = *cc;
4056 cc++;
4057 opcode = *cc;
4058 }
4059
4060 opcode = *cc;
4061 ccbegin = cc;
4062 hotpath = ccbegin + 1 + LINK_SIZE;
4063
4064 if ((opcode == OP_COND || opcode == OP_SCOND) && cc[1 + LINK_SIZE] == OP_DEF)
4065 {
4066 /* Drop this bracket_fallback. */
4067 parent->top = fallback->prev;
4068 return bracketend(cc);
4069 }
4070
4071 ket = *(bracketend(cc) - 1 - LINK_SIZE);
4072 SLJIT_ASSERT(ket == OP_KET || ket == OP_KETRMAX || ket == OP_KETRMIN);
4073 SLJIT_ASSERT(!((bra == OP_BRAZERO && ket == OP_KETRMIN) || (bra == OP_BRAMINZERO && ket == OP_KETRMAX)));
4074 cc += GET(cc, 1);
4075
4076 has_alternatives = *cc == OP_ALT;
4077 if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
4078 {
4079 has_alternatives = (*hotpath == OP_RREF) ? FALSE : TRUE;
4080 if (*hotpath == OP_NRREF)
4081 {
4082 stacksize = GET2(hotpath, 1);
4083 if (common->currententry == NULL || stacksize == RREF_ANY)
4084 has_alternatives = FALSE;
4085 else if (common->currententry->start == 0)
4086 has_alternatives = stacksize != 0;
4087 else
4088 has_alternatives = stacksize != GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
4089 }
4090 }
4091
4092 if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
4093 opcode = OP_SCOND;
4094 if (SLJIT_UNLIKELY(opcode == OP_ONCE_NC))
4095 opcode = OP_ONCE;
4096
4097 if (opcode == OP_CBRA || opcode == OP_SCBRA)
4098 {
4099 /* Capturing brackets has a pre-allocated space. */
4100 offset = GET2(ccbegin, 1 + LINK_SIZE);
4101 localptr = OVECTOR_PRIV(offset);
4102 offset <<= 1;
4103 FALLBACK_AS(bracket_fallback)->localptr = localptr;
4104 hotpath += 2;
4105 }
4106 else if (opcode == OP_ONCE || opcode == OP_SBRA || opcode == OP_SCOND)
4107 {
4108 /* Other brackets simply allocate the next entry. */
4109 localptr = PRIV_DATA(ccbegin);
4110 SLJIT_ASSERT(localptr != 0);
4111 FALLBACK_AS(bracket_fallback)->localptr = localptr;
4112 if (opcode == OP_ONCE)
4113 FALLBACK_AS(bracket_fallback)->u.framesize = get_framesize(common, ccbegin, FALSE);
4114 }
4115
4116 /* Instructions before the first alternative. */
4117 stacksize = 0;
4118 if ((ket == OP_KETRMAX) || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
4119 stacksize++;
4120 if (bra == OP_BRAZERO)
4121 stacksize++;
4122
4123 if (stacksize > 0)
4124 allocate_stack(common, stacksize);
4125
4126 stacksize = 0;
4127 if ((ket == OP_KETRMAX) || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
4128 {
4129 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
4130 stacksize++;
4131 }
4132
4133 if (bra == OP_BRAZERO)
4134 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
4135
4136 if (bra == OP_BRAMINZERO)
4137 {
4138 /* This is a fallback path! (Since the hot-path of OP_BRAMINZERO matches to the empty string) */
4139 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
4140 if (ket != OP_KETRMIN)
4141 {
4142 free_stack(common, 1);
4143 braminzerojump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
4144 }
4145 else
4146 {
4147 if (opcode == OP_ONCE || opcode >= OP_SBRA)
4148 {
4149 jump = CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
4150 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
4151 /* Nothing stored during the first run. */
4152 skip = JUMP(SLJIT_JUMP);
4153 JUMPHERE(jump);
4154 /* Checking zero-length iteration. */
4155 if (opcode != OP_ONCE || FALLBACK_AS(bracket_fallback)->u.framesize < 0)
4156 {
4157 /* When we come from outside, localptr contains the previous STR_PTR. */
4158 braminzerojump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4159 }
4160 else
4161 {
4162 /* Except when the whole stack frame must be saved. */
4163 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4164 braminzerojump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), (FALLBACK_AS(bracket_fallback)->u.framesize + 1) * sizeof(sljit_w));
4165 }
4166 JUMPHERE(skip);
4167 }
4168 else
4169 {
4170 jump = CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
4171 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
4172 JUMPHERE(jump);
4173 }
4174 }
4175 }
4176
4177 if (ket == OP_KETRMIN)
4178 FALLBACK_AS(bracket_fallback)->recursivehotpath = LABEL();
4179
4180 if (ket == OP_KETRMAX)
4181 {
4182 rmaxlabel = LABEL();
4183 if (has_alternatives && opcode != OP_ONCE && opcode < OP_SBRA)
4184 FALLBACK_AS(bracket_fallback)->althotpath = rmaxlabel;
4185 }
4186
4187 /* Handling capturing brackets and alternatives. */
4188 if (opcode == OP_ONCE)
4189 {
4190 if (FALLBACK_AS(bracket_fallback)->u.framesize < 0)
4191 {
4192 /* Neither capturing brackets nor recursions are not found in the block. */
4193 if (ket == OP_KETRMIN)
4194 {
4195 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4196 allocate_stack(common, 2);
4197 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4198 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
4199 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
4200 }
4201 else if (ket == OP_KETRMAX || has_alternatives)
4202 {
4203 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STACK_TOP, 0);
4204 allocate_stack(common, 1);
4205 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4206 }
4207 else
4208 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STACK_TOP, 0);
4209 }
4210 else
4211 {
4212 if (ket == OP_KETRMIN || ket == OP_KETRMAX || has_alternatives)
4213 {
4214 allocate_stack(common, FALLBACK_AS(bracket_fallback)->u.framesize + 2);
4215 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4216 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(FALLBACK_AS(bracket_fallback)->u.framesize + 1));
4217 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4218 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, TMP2, 0);
4219 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
4220 init_frame(common, ccbegin, FALLBACK_AS(bracket_fallback)->u.framesize + 1, 2, FALSE);
4221 }
4222 else
4223 {
4224 allocate_stack(common, FALLBACK_AS(bracket_fallback)->u.framesize + 1);
4225 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4226 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(FALLBACK_AS(bracket_fallback)->u.framesize));
4227 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, TMP2, 0);
4228 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
4229 init_frame(common, ccbegin, FALLBACK_AS(bracket_fallback)->u.framesize, 1, FALSE);
4230 }
4231 }
4232 }
4233 else if (opcode == OP_CBRA || opcode == OP_SCBRA)
4234 {
4235 /* Saving the previous values. */
4236 allocate_stack(common, 3);
4237 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
4238 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
4239 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
4240 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
4241 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4242 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STR_PTR, 0);
4243 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
4244 }
4245 else if (opcode == OP_SBRA || opcode == OP_SCOND)
4246 {
4247 /* Saving the previous value. */
4248 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4249 allocate_stack(common, 1);
4250 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STR_PTR, 0);
4251 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
4252 }
4253 else if (has_alternatives)
4254 {
4255 /* Pushing the starting string pointer. */
4256 allocate_stack(common, 1);
4257 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4258 }
4259
4260 /* Generating code for the first alternative. */
4261 if (opcode == OP_COND || opcode == OP_SCOND)
4262 {
4263 if (*hotpath == OP_CREF)
4264 {
4265 SLJIT_ASSERT(has_alternatives);
4266 add_jump(compiler, &(FALLBACK_AS(bracket_fallback)->u.condfailed),
4267 CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(GET2(hotpath, 1) << 1), SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
4268 hotpath += 3;
4269 }
4270 else if (*hotpath == OP_NCREF)
4271 {
4272 SLJIT_ASSERT(has_alternatives);
4273 stacksize = GET2(hotpath, 1);
4274 jump = CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(stacksize << 1), SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
4275
4276 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STACK_TOP, 0);
4277 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, SLJIT_IMM, common->name_count);
4278 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, common->name_entry_size);
4279 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG1, 0, SLJIT_IMM, stacksize);
4280 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG2, 0, SLJIT_LOCALS_REG, 0);
4281 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, common->name_table);
4282 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_searchovector));
4283 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);
4284 add_jump(compiler, &(FALLBACK_AS(bracket_fallback)->u.condfailed), CMP(SLJIT_C_EQUAL, SLJIT_TEMPORARY_REG1, 0, SLJIT_IMM, 0));
4285
4286 JUMPHERE(jump);
4287 hotpath += 3;
4288 }
4289 else if (*hotpath == OP_RREF || *hotpath == OP_NRREF)
4290 {
4291 /* Never has other case. */
4292 FALLBACK_AS(bracket_fallback)->u.condfailed = NULL;
4293
4294 stacksize = GET2(hotpath, 1);
4295 if (common->currententry == NULL)
4296 stacksize = 0;
4297 else if (stacksize == RREF_ANY)
4298 stacksize = 1;
4299 else if (common->currententry->start == 0)
4300 stacksize = stacksize == 0;
4301 else
4302 stacksize = stacksize == GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
4303
4304 if (*hotpath == OP_RREF || stacksize || common->currententry == NULL)
4305 {
4306 SLJIT_ASSERT(!has_alternatives);
4307 if (stacksize != 0)
4308 hotpath += 3;
4309 else
4310 {
4311 if (*cc == OP_ALT)
4312 {
4313 hotpath = cc + 1 + LINK_SIZE;
4314 cc += GET(cc, 1);
4315 }
4316 else
4317 hotpath = cc;
4318 }
4319 }
4320 else
4321 {
4322 SLJIT_ASSERT(has_alternatives);
4323
4324 stacksize = GET2(hotpath, 1);
4325 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STACK_TOP, 0);
4326 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, SLJIT_IMM, common->name_count);
4327 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, common->name_entry_size);
4328 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, GET2(common->start, common->currententry->start + 1 + LINK_SIZE));
4329 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG1, 0, SLJIT_IMM, stacksize);
4330 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG2, 0, SLJIT_LOCALS_REG, 0);
4331 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, common->name_table);
4332 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_searchgroups));
4333 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);
4334 add_jump(compiler, &(FALLBACK_AS(bracket_fallback)->u.condfailed), CMP(SLJIT_C_EQUAL, SLJIT_TEMPORARY_REG1, 0, SLJIT_IMM, 0));
4335 hotpath += 3;
4336 }
4337 }
4338 else
4339 {
4340 SLJIT_ASSERT(has_alternatives && *hotpath >= OP_ASSERT && *hotpath <= OP_ASSERTBACK_NOT);
4341 /* Similar code as PUSH_FALLBACK macro. */
4342 assert = sljit_alloc_memory(compiler, sizeof(assert_fallback));
4343 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
4344 return NULL;
4345 memset(assert, 0, sizeof(assert_fallback));
4346 assert->common.cc = hotpath;
4347 FALLBACK_AS(bracket_fallback)->u.assert = assert;
4348 hotpath = compile_assert_hotpath(common, hotpath, assert, TRUE);
4349 }
4350 }
4351
4352 compile_hotpath(common, hotpath, cc, fallback);
4353 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
4354 return NULL;
4355
4356 if (opcode == OP_ONCE)
4357 {
4358 if (FALLBACK_AS(bracket_fallback)->u.framesize < 0)
4359 {
4360 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4361 /* TMP2 which is set here used by OP_KETRMAX below. */
4362 if (ket == OP_KETRMAX)
4363 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), 0);
4364 else if (ket == OP_KETRMIN)
4365 {
4366 /* Move the STR_PTR to the localptr. */
4367 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_MEM1(STACK_TOP), 0);
4368 }
4369 }
4370 else
4371 {
4372 stacksize = (ket == OP_KETRMIN || ket == OP_KETRMAX || has_alternatives) ? 2 : 1;
4373 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_IMM, (FALLBACK_AS(bracket_fallback)->u.framesize + stacksize) * sizeof(sljit_w));
4374 if (ket == OP_KETRMAX)
4375 {
4376 /* TMP2 which is set here used by OP_KETRMAX below. */
4377 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
4378 }
4379 }
4380 }
4381
4382 stacksize = 0;
4383 if (ket != OP_KET || bra != OP_BRA)
4384 stacksize++;
4385 if (has_alternatives && opcode != OP_ONCE)
4386 stacksize++;
4387
4388 if (stacksize > 0)
4389 allocate_stack(common, stacksize);
4390
4391 stacksize = 0;
4392 if (ket != OP_KET)
4393 {
4394 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
4395 stacksize++;
4396 }
4397 else if (bra != OP_BRA)
4398 {
4399 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
4400 stacksize++;
4401 }
4402
4403 if (has_alternatives)
4404 {
4405 if (opcode != OP_ONCE)
4406 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
4407 if (ket != OP_KETRMAX)
4408 FALLBACK_AS(bracket_fallback)->althotpath = LABEL();
4409 }
4410
4411 /* Must be after the hotpath label. */
4412 if (offset != 0)
4413 {
4414 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4415 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
4416 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 0), TMP1, 0);
4417 }
4418
4419 if (ket == OP_KETRMAX)
4420 {
4421 if (opcode == OP_ONCE || opcode >= OP_SBRA)
4422 {
4423 if (has_alternatives)
4424 FALLBACK_AS(bracket_fallback)->althotpath = LABEL();
4425 /* Checking zero-length iteration. */
4426 if (opcode != OP_ONCE)
4427 CMPTO(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STR_PTR, 0, rmaxlabel);
4428 else
4429 /* TMP2 must contain the starting STR_PTR. */
4430 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0, rmaxlabel);
4431 }
4432 else
4433 JUMPTO(SLJIT_JUMP, rmaxlabel);
4434 FALLBACK_AS(bracket_fallback)->recursivehotpath = LABEL();
4435 }
4436
4437 if (bra == OP_BRAZERO)
4438 FALLBACK_AS(bracket_fallback)->zerohotpath = LABEL();
4439
4440 if (bra == OP_BRAMINZERO)
4441 {
4442 /* This is a fallback path! (From the viewpoint of OP_BRAMINZERO) */
4443 JUMPTO(SLJIT_JUMP, ((braminzero_fallback*)parent)->hotpath);
4444 if (braminzerojump != NULL)
4445 {
4446 JUMPHERE(braminzerojump);
4447 /* We need to release the end pointer to perform the
4448 fallback for the zero-length iteration. When
4449 framesize is < 0, OP_ONCE will do the release itself. */
4450 if (opcode == OP_ONCE && FALLBACK_AS(bracket_fallback)->u.framesize >= 0)
4451 {
4452 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4453 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
4454 }
4455 else if (ket == OP_KETRMIN && opcode != OP_ONCE)
4456 free_stack(common, 1);
4457 }
4458 /* Continue to the normal fallback. */
4459 }
4460
4461 if ((ket != OP_KET && bra != OP_BRAMINZERO) || bra == OP_BRAZERO)
4462 decrease_call_count(common);
4463
4464 /* Skip the other alternatives. */
4465 while (*cc == OP_ALT)
4466 cc += GET(cc, 1);
4467 cc += 1 + LINK_SIZE;
4468 return cc;
4469 }
4470
4471 static pcre_uchar *compile_bracketpos_hotpath(compiler_common *common, pcre_uchar *cc, fallback_common *parent)
4472 {
4473 DEFINE_COMPILER;
4474 fallback_common *fallback;
4475 pcre_uchar opcode;
4476 int localptr;
4477 int cbraprivptr = 0;
4478 int framesize;
4479 int stacksize;
4480 int offset = 0;
4481 BOOL zero = FALSE;
4482 pcre_uchar *ccbegin = NULL;
4483 int stack;
4484 struct sljit_label *loop = NULL;
4485 struct jump_list *emptymatch = NULL;
4486
4487 PUSH_FALLBACK(sizeof(bracketpos_fallback), cc, NULL);
4488 if (*cc == OP_BRAPOSZERO)
4489 {
4490 zero = TRUE;
4491 cc++;
4492 }
4493
4494 opcode = *cc;
4495 localptr = PRIV_DATA(cc);
4496 SLJIT_ASSERT(localptr != 0);
4497 FALLBACK_AS(bracketpos_fallback)->localptr = localptr;
4498 switch(opcode)
4499 {
4500 case OP_BRAPOS:
4501 case OP_SBRAPOS:
4502 ccbegin = cc + 1 + LINK_SIZE;
4503 break;
4504
4505 case OP_CBRAPOS:
4506 case OP_SCBRAPOS:
4507 offset = GET2(cc, 1 + LINK_SIZE);
4508 cbraprivptr = OVECTOR_PRIV(offset);
4509 offset <<= 1;
4510 ccbegin = cc + 1 + LINK_SIZE + 2;
4511 break;
4512
4513 default:
4514 SLJIT_ASSERT_STOP();
4515 break;
4516 }
4517
4518 framesize = get_framesize(common, cc, FALSE);
4519 FALLBACK_AS(bracketpos_fallback)->framesize = framesize;
4520 if (framesize < 0)
4521 {
4522 stacksize = (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS) ? 2 : 1;
4523 if (!zero)
4524 stacksize++;
4525 FALLBACK_AS(bracketpos_fallback)->stacksize = stacksize;
4526 allocate_stack(common, stacksize);
4527 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STACK_TOP, 0);
4528
4529 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
4530 {
4531 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
4532 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
4533 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
4534 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
4535 }
4536 else
4537 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4538
4539 if (!zero)
4540 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 1);
4541 }
4542 else
4543 {
4544 stacksize = framesize + 1;
4545 if (!zero)
4546 stacksize++;
4547 if (opcode == OP_BRAPOS || opcode == OP_SBRAPOS)
4548 stacksize++;
4549 FALLBACK_AS(bracketpos_fallback)->stacksize = stacksize;
4550 allocate_stack(common, stacksize);
4551
4552 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4553 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(stacksize - 1));
4554 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, TMP2, 0);
4555 stack = 0;
4556 if (!zero)
4557 {
4558 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 1);
4559 stack++;
4560 }
4561 if (opcode == OP_BRAPOS || opcode == OP_SBRAPOS)
4562 {
4563 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), STR_PTR, 0);
4564 stack++;
4565 }
4566 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP1, 0);
4567 init_frame(common, cc, stacksize - 1, stacksize - framesize, FALSE);
4568 }
4569
4570 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
4571 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr, STR_PTR, 0);
4572
4573 loop = LABEL();
4574 while (*cc != OP_KETRPOS)
4575 {
4576 fallback->top = NULL;
4577 fallback->topfallbacks = NULL;
4578 cc += GET(cc, 1);
4579
4580 compile_hotpath(common, ccbegin, cc, fallback);
4581 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
4582 return NULL;
4583
4584 if (framesize < 0)
4585 {
4586 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4587
4588 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
4589 {
4590 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
4591 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
4592 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr, STR_PTR, 0);
4593 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
4594 }
4595 else
4596 {
4597 if (opcode == OP_SBRAPOS)
4598 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
4599 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4600 }
4601
4602 if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
4603 add_jump(compiler, &emptymatch, CMP(SLJIT_C_EQUAL, TMP1, 0, STR_PTR, 0));
4604
4605 if (!zero)
4606 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
4607 }
4608 else
4609 {
4610 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
4611 {
4612 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_IMM, stacksize * sizeof(sljit_w));
4613 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
4614 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
4615 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr, STR_PTR, 0);
4616 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
4617 }
4618 else
4619 {
4620 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4621 OP2(SLJIT_ADD, STACK_TOP, 0, TMP2, 0, SLJIT_IMM, stacksize * sizeof(sljit_w));
4622 if (opcode == OP_SBRAPOS)
4623 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_w));
4624 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_w), STR_PTR, 0);
4625 }
4626
4627 if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
4628 add_jump(compiler, &emptymatch, CMP(SLJIT_C_EQUAL, TMP1, 0, STR_PTR, 0));
4629
4630 if (!zero)
4631 {
4632 if (framesize < 0)
4633 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
4634 else
4635 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
4636 }
4637 }
4638 JUMPTO(SLJIT_JUMP, loop);
4639 flush_stubs(common);
4640
4641 compile_fallbackpath(common, fallback->top);
4642 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
4643 return NULL;
4644 set_jumps(fallback->topfallbacks, LABEL());
4645
4646 if (framesize < 0)
4647 {
4648 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
4649 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
4650 else
4651 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
4652 }
4653 else
4654 {
4655 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
4656 {
4657 /* Last alternative. */
4658 if (*cc == OP_KETRPOS)
4659 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4660 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
4661 }
4662 else
4663 {
4664 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4665 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_w));
4666 }
4667 }
4668
4669 if (*cc == OP_KETRPOS)
4670 break;
4671 ccbegin = cc + 1 + LINK_SIZE;
4672 }
4673
4674 fallback->topfallbacks = NULL;
4675 if (!zero)
4676 {
4677 if (framesize < 0)
4678 add_jump(compiler, &fallback->topfallbacks, CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0));
4679 else /* TMP2 is set to [localptr] above. */
4680 add_jump(compiler, &fallback->topfallbacks, CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(TMP2), (stacksize - 1) * sizeof(sljit_w), SLJIT_IMM, 0));
4681 }
4682
4683 /* None of them matched. */
4684 set_jumps(emptymatch, LABEL());
4685 decrease_call_count(common);
4686 return cc + 1 + LINK_SIZE;
4687 }
4688
4689 static SLJIT_INLINE pcre_uchar *get_iterator_parameters(compiler_common *common, pcre_uchar *cc, pcre_uchar *opcode, pcre_uchar *type, int *arg1, int *arg2, pcre_uchar **end)
4690 {
4691 int class_len;
4692
4693 *opcode = *cc;
4694 if (*opcode >= OP_STAR && *opcode <= OP_POSUPTO)
4695 {
4696 cc++;
4697 *type = OP_CHAR;
4698 }
4699 else if (*opcode >= OP_STARI && *opcode <= OP_POSUPTOI)
4700 {
4701 cc++;
4702 *type = OP_CHARI;
4703 *opcode -= OP_STARI - OP_STAR;
4704 }
4705 else if (*opcode >= OP_NOTSTAR && *opcode <= OP_NOTPOSUPTO)
4706 {
4707 cc++;
4708 *type = OP_NOT;
4709 *opcode -= OP_NOTSTAR - OP_STAR;
4710 }
4711 else if (*opcode >= OP_NOTSTARI && *opcode <= OP_NOTPOSUPTOI)
4712 {
4713 cc++;
4714 *type = OP_NOTI;
4715 *opcode -= OP_NOTSTARI - OP_STAR;
4716 }
4717 else if (*opcode >= OP_TYPESTAR && *opcode <= OP_TYPEPOSUPTO)
4718 {
4719 cc++;
4720 *opcode -= OP_TYPESTAR - OP_STAR;
4721 *type = 0;
4722 }
4723 else
4724 {
4725 SLJIT_ASSERT(*opcode >= OP_CLASS || *opcode <= OP_XCLASS);
4726 *type = *opcode;
4727 cc++;
4728 class_len = (*type < OP_XCLASS) ? 33 : GET(cc, 0);
4729 *opcode = cc[class_len - 1];
4730 if (*opcode >= OP_CRSTAR && *opcode <= OP_CRMINQUERY)
4731 {
4732 *opcode -= OP_CRSTAR - OP_STAR;
4733 if (end != NULL)
4734 *end = cc + class_len;
4735 }
4736 else
4737 {
4738 SLJIT_ASSERT(*opcode == OP_CRRANGE || *opcode == OP_CRMINRANGE);
4739 *arg1 = GET2(cc, (class_len + 2));
4740 *arg2 = GET2(cc, class_len);
4741
4742 if (*arg2 == 0)
4743 {
4744 SLJIT_ASSERT(*arg1 != 0);
4745 *opcode = (*opcode == OP_CRRANGE) ? OP_UPTO : OP_MINUPTO;
4746 }
4747 if (*arg1 == *arg2)
4748 *opcode = OP_EXACT;
4749
4750 if (end != NULL)
4751 *end = cc + class_len + 4;
4752 }
4753 return cc;
4754 }
4755
4756 if (*opcode == OP_UPTO || *opcode == OP_MINUPTO || *opcode == OP_EXACT || *opcode == OP_POSUPTO)
4757 {
4758 *arg1 = GET2(cc, 0);
4759 cc += 2;
4760 }
4761
4762 if (*type == 0)
4763 {
4764 *type = *cc;
4765 if (end != NULL)
4766 *end = next_opcode(common, cc);
4767 cc++;
4768 return cc;
4769 }
4770
4771 if (end != NULL)
4772 {
4773 *end = cc + 1;
4774 #ifdef SUPPORT_UTF8
4775 if (common->utf8 && *cc >= 0xc0) *end += PRIV(utf8_table4)[*cc & 0x3f];
4776 #endif
4777 }
4778 return cc;
4779 }
4780
4781 static pcre_uchar *compile_iterator_hotpath(compiler_common *common, pcre_uchar *cc, fallback_common *parent)
4782 {
4783 DEFINE_COMPILER;
4784 fallback_common *fallback;
4785 pcre_uchar opcode;
4786 pcre_uchar type;
4787 int arg1 = -1, arg2 = -1;
4788 pcre_uchar* end;
4789 jump_list *nomatch = NULL;
4790 struct sljit_jump *jump = NULL;
4791 struct sljit_label *label;
4792
4793 PUSH_FALLBACK(sizeof(iterator_fallback), cc, NULL);
4794
4795 cc = get_iterator_parameters(common, cc, &opcode, &type, &arg1, &arg2, &end);
4796
4797 switch(opcode)
4798 {
4799 case OP_STAR:
4800 case OP_PLUS:
4801 case OP_UPTO:
4802 case OP_CRRANGE:
4803 if (type == OP_ANYNL || type == OP_EXTUNI)
4804 {
4805 if (opcode == OP_STAR || opcode == OP_UPTO)
4806 {
4807 allocate_stack(common, 2);
4808 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4809 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
4810 }
4811 else
4812 {
4813 allocate_stack(common, 1);
4814 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
4815 }
4816 if (opcode == OP_UPTO || opcode == OP_CRRANGE)
4817 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 0);
4818
4819 label = LABEL();
4820 compile_char1_hotpath(common, type, cc, &fallback->topfallbacks);
4821 if (opcode == OP_UPTO || opcode == OP_CRRANGE)
4822 {
4823 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
4824 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4825 if (opcode == OP_CRRANGE && arg2 > 0)
4826 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg2, label);
4827 if (opcode == OP_UPTO || (opcode == OP_CRRANGE && arg1 > 0))
4828 jump = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, arg1);
4829 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0);
4830 }
4831
4832 allocate_stack(common, 1);
4833 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4834 JUMPTO(SLJIT_JUMP, label);
4835 if (jump != NULL)
4836 JUMPHERE(jump);
4837 }
4838 else
4839 {
4840 allocate_stack(common, 2);
4841 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4842 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
4843 label = LABEL();
4844 compile_char1_hotpath(common, type, cc, &nomatch);
4845 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4846 if (opcode <= OP_PLUS || (opcode == OP_CRRANGE && arg1 == 0))
4847 {
4848 OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
4849 JUMPTO(SLJIT_JUMP, label);
4850 }
4851 else
4852 {
4853 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
4854 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4855 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
4856 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg1 + 1, label);
4857 }
4858 set_jumps(nomatch, LABEL());
4859 if (opcode == OP_PLUS || opcode == OP_CRRANGE)
4860 add_jump(compiler, &fallback->topfallbacks,
4861 CMP(SLJIT_C_LESS, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, opcode == OP_PLUS ? 2 : arg2 + 1));
4862 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
4863 }
4864 FALLBACK_AS(iterator_fallback)->hotpath = LABEL();
4865 break;
4866
4867 case OP_MINSTAR:
4868 case OP_MINPLUS:
4869 allocate_stack(common, 1);
4870 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4871 if (opcode == OP_MINPLUS)
4872 add_jump(compiler, &fallback->topfallbacks, JUMP(SLJIT_JUMP));
4873 FALLBACK_AS(iterator_fallback)->hotpath = LABEL();
4874 break;
4875
4876 case OP_MINUPTO:
4877 case OP_CRMINRANGE:
4878 allocate_stack(common, 2);
4879 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4880 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
4881 if (opcode == OP_CRMINRANGE)
4882 add_jump(compiler, &fallback->topfallbacks, JUMP(SLJIT_JUMP));
4883 FALLBACK_AS(iterator_fallback)->hotpath = LABEL();
4884 break;
4885
4886 case OP_QUERY:
4887 case OP_MINQUERY:
4888 allocate_stack(common, 1);
4889 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4890 if (opcode == OP_QUERY)
4891 compile_char1_hotpath(common, type, cc, &fallback->topfallbacks);
4892 FALLBACK_AS(iterator_fallback)->hotpath = LABEL();
4893 break;
4894
4895 case OP_EXACT:
4896 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 1);
4897 label = LABEL();
4898 compile_char1_hotpath(common, type, cc, &fallback->topfallbacks);
4899 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
4900 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4901 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0);
4902 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg1 + 1, label);
4903 break;
4904
4905 case OP_POSSTAR:
4906 case OP_POSPLUS:
4907 case OP_POSUPTO:
4908 if (opcode != OP_POSSTAR)
4909 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 1);
4910 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STR_PTR, 0);
4911 label = LABEL();
4912 compile_char1_hotpath(common, type, cc, &nomatch);
4913 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STR_PTR, 0);
4914 if (opcode != OP_POSUPTO)
4915 {
4916 if (opcode == OP_POSPLUS)
4917 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 2);
4918 JUMPTO(SLJIT_JUMP, label);
4919 }
4920 else
4921 {
4922 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
4923 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4924 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0);
4925 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg1 + 1, label);
4926 }
4927 set_jumps(nomatch, LABEL());
4928 if (opcode == OP_POSPLUS)
4929 add_jump(compiler, &fallback->topfallbacks, CMP(SLJIT_C_LESS, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 2));
4930 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);
4931 break;
4932
4933 case OP_POSQUERY:
4934 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STR_PTR, 0);
4935 compile_char1_hotpath(common, type, cc, &nomatch);
4936 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STR_PTR, 0);
4937 set_jumps(nomatch, LABEL());
4938 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);
4939 break;
4940
4941 default:
4942 SLJIT_ASSERT_STOP();
4943 break;
4944 }
4945
4946 decrease_call_count(common);
4947 return end;
4948 }
4949
4950 static SLJIT_INLINE pcre_uchar *compile_fail_accept_hotpath(compiler_common *common, pcre_uchar *cc, fallback_common *parent)
4951 {
4952 DEFINE_COMPILER;
4953 fallback_common *fallback;
4954
4955 PUSH_FALLBACK(sizeof(bracket_fallback), cc, NULL);
4956
4957 if (*cc == OP_FAIL)
4958 {
4959 add_jump(compiler, &fallback->topfallbacks, JUMP(SLJIT_JUMP));
4960 return cc + 1;
4961 }
4962
4963 if (*cc == OP_ASSERT_ACCEPT || common->currententry != NULL)
4964 {
4965 /* No need to check notempty conditions. */
4966 if (common->acceptlabel == NULL)
4967 add_jump(compiler, &common->accept, JUMP(SLJIT_JUMP));
4968 else
4969 JUMPTO(SLJIT_JUMP, common->acceptlabel);
4970 return cc + 1;
4971 }
4972
4973 if (common->acceptlabel == NULL)
4974 add_jump(compiler, &common->accept, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0)));
4975 else
4976 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), common->acceptlabel);
4977 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4978 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty));
4979 add_jump(compiler, &fallback->topfallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4980 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty_atstart));
4981 if (common->acceptlabel == NULL)
4982 add_jump(compiler, &common->accept, CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4983 else
4984 CMPTO(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0, common->acceptlabel);
4985 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
4986 if (common->acceptlabel == NULL)
4987 add_jump(compiler, &common->accept, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0));
4988 else
4989 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0, common->acceptlabel);
4990 add_jump(compiler, &fallback->topfallbacks, JUMP(SLJIT_JUMP));
4991 return cc + 1;
4992 }
4993
4994 static SLJIT_INLINE pcre_uchar *compile_close_hotpath(compiler_common *common, pcre_uchar *cc)
4995 {
4996 DEFINE_COMPILER;
4997 int offset = GET2(cc, 1);
4998
4999 /* Data will be discarded anyway... */
5000 if (common->currententry != NULL)
5001 return cc + 3;
5002
5003 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR_PRIV(offset));
5004 offset <<= 1;
5005 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
5006 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
5007 return cc + 3;
5008 }
5009
5010 static void compile_hotpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, fallback_common *parent)
5011 {
5012 DEFINE_COMPILER;
5013 fallback_common *fallback;
5014
5015 while (cc < ccend)
5016 {
5017 switch(*cc)
5018 {
5019 case OP_SOD:
5020 case OP_SOM:
5021 case OP_NOT_WORD_BOUNDARY:
5022 case OP_WORD_BOUNDARY:
5023 case OP_NOT_DIGIT:
5024 case OP_DIGIT:
5025 case OP_NOT_WHITESPACE:
5026 case OP_WHITESPACE:
5027 case OP_NOT_WORDCHAR:
5028 case OP_WORDCHAR:
5029 case OP_ANY:
5030 case OP_ALLANY:
5031 case OP_ANYBYTE:
5032 case OP_NOTPROP:
5033 case OP_PROP:
5034 case OP_ANYNL:
5035 case OP_NOT_HSPACE:
5036 case OP_HSPACE:
5037 case OP_NOT_VSPACE:
5038 case OP_VSPACE:
5039 case OP_EXTUNI:
5040 case OP_EODN:
5041 case OP_EOD:
5042 case OP_CIRC:
5043 case OP_CIRCM:
5044 case OP_DOLL:
5045 case OP_DOLLM:
5046 case OP_NOT:
5047 case OP_NOTI:
5048 case OP_REVERSE:
5049 cc = compile_char1_hotpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextfallbacks : &parent->topfallbacks);
5050 break;
5051
5052 case OP_SET_SOM:
5053 PUSH_FALLBACK_NOVALUE(sizeof(fallback_common), cc);
5054 allocate_stack(common, 1);
5055 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
5056 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), STR_PTR, 0);
5057 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
5058 cc++;
5059 break;
5060
5061 case OP_CHAR:
5062 case OP_CHARI:
5063 cc = compile_charn_hotpath(common, cc, ccend, parent->top != NULL ? &parent->top->nextfallbacks : &parent->topfallbacks);
5064 break;
5065
5066 case OP_STAR:
5067 case OP_MINSTAR:
5068 case OP_PLUS:
5069 case OP_MINPLUS:
5070 case OP_QUERY:
5071 case OP_MINQUERY:
5072 case OP_UPTO:
5073 case OP_MINUPTO:
5074 case OP_EXACT:
5075 case OP_POSSTAR:
5076 case OP_POSPLUS:
5077 case OP_POSQUERY:
5078 case OP_POSUPTO:
5079 case OP_STARI:
5080 case OP_MINSTARI:
5081 case OP_PLUSI:
5082 case OP_MINPLUSI:
5083 case OP_QUERYI:
5084 case OP_MINQUERYI:
5085 case OP_UPTOI:
5086 case OP_MINUPTOI:
5087 case OP_EXACTI:
5088 case OP_POSSTARI:
5089 case OP_POSPLUSI:
5090 case OP_POSQUERYI:
5091 case OP_POSUPTOI:
5092 case OP_NOTSTAR:
5093 case OP_NOTMINSTAR:
5094 case OP_NOTPLUS:
5095 case OP_NOTMINPLUS:
5096 case OP_NOTQUERY:
5097 case OP_NOTMINQUERY:
5098 case OP_NOTUPTO:
5099 case OP_NOTMINUPTO:
5100 case OP_NOTEXACT:
5101 case OP_NOTPOSSTAR:
5102 case OP_NOTPOSPLUS:
5103 case OP_NOTPOSQUERY:
5104 case OP_NOTPOSUPTO:
5105 case OP_NOTSTARI:
5106 case OP_NOTMINSTARI:
5107 case OP_NOTPLUSI:
5108 case OP_NOTMINPLUSI:
5109 case OP_NOTQUERYI:
5110 case OP_NOTMINQUERYI:
5111 case OP_NOTUPTOI:
5112 case OP_NOTMINUPTOI:
5113 case OP_NOTEXACTI:
5114 case OP_NOTPOSSTARI:
5115 case OP_NOTPOSPLUSI:
5116 case OP_NOTPOSQUERYI:
5117 case OP_NOTPOSUPTOI:
5118 case OP_TYPESTAR:
5119 case OP_TYPEMINSTAR:
5120 case OP_TYPEPLUS:
5121 case OP_TYPEMINPLUS:
5122 case OP_TYPEQUERY:
5123 case OP_TYPEMINQUERY:
5124 case OP_TYPEUPTO:
5125 case OP_TYPEMINUPTO:
5126 case OP_TYPEEXACT:
5127 case OP_TYPEPOSSTAR:
5128 case OP_TYPEPOSPLUS:
5129 case OP_TYPEPOSQUERY:
5130 case OP_TYPEPOSUPTO:
5131 cc = compile_iterator_hotpath(common, cc, parent);
5132 break;
5133
5134 case OP_CLASS:
5135 case OP_NCLASS:
5136 if (cc[33] >= OP_CRSTAR && cc[33] <= OP_CRMINRANGE)
5137 cc = compile_iterator_hotpath(common, cc, parent);
5138 else
5139 cc = compile_char1_hotpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextfallbacks : &parent->topfallbacks);
5140 break;
5141
5142 #ifdef SUPPORT_UTF8
5143 case OP_XCLASS:
5144 if (*(cc + GET(cc, 1)) >= OP_CRSTAR && *(cc + GET(cc, 1)) <= OP_CRMINRANGE)
5145 cc = compile_iterator_hotpath(common, cc, parent);
5146 else
5147 cc = compile_char1_hotpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextfallbacks : &parent->topfallbacks);
5148 break;
5149 #endif
5150
5151 case OP_REF:
5152 case OP_REFI:
5153 if (cc[3] >= OP_CRSTAR && cc[3] <= OP_CRMINRANGE)
5154 cc = compile_ref_iterator_hotpath(common, cc, parent);
5155 else
5156 cc = compile_ref_hotpath(common, cc, parent->top != NULL ? &parent->top->nextfallbacks : &parent->topfallbacks, TRUE, FALSE);
5157 break;
5158
5159 case OP_RECURSE:
5160 cc = compile_recurse_hotpath(common, cc, parent);
5161 break;
5162
5163 case OP_ASSERT:
5164 case OP_ASSERT_NOT:
5165 case OP_ASSERTBACK:
5166 case OP_ASSERTBACK_NOT:
5167 PUSH_FALLBACK_NOVALUE(sizeof(assert_fallback), cc);
5168 cc = compile_assert_hotpath(common, cc, FALLBACK_AS(assert_fallback), FALSE);
5169 break;
5170
5171 case OP_BRAMINZERO:
5172 PUSH_FALLBACK_NOVALUE(sizeof(braminzero_fallback), cc);
5173 cc = bracketend(cc + 1);
5174 if (*(cc - 1 - LINK_SIZE) != OP_KETRMIN)
5175 {
5176 allocate_stack(common, 1);
5177 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5178 }
5179 else
5180 {
5181 allocate_stack(common, 2);
5182 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5183 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), STR_PTR, 0);
5184 }
5185 FALLBACK_AS(braminzero_fallback)->hotpath = LABEL();
5186 if (cc[1] > OP_ASSERTBACK_NOT)
5187 decrease_call_count(common);
5188 break;
5189
5190 case OP_ONCE:
5191 case OP_ONCE_NC:
5192 case OP_BRA:
5193 case OP_CBRA:
5194 case OP_COND:
5195 case OP_SBRA:
5196 case OP_SCBRA:
5197 case OP_SCOND:
5198 cc = compile_bracket_hotpath(common, cc, parent);
5199 break;
5200
5201 case OP_BRAZERO:
5202 if (cc[1] > OP_ASSERTBACK_NOT)
5203 cc = compile_bracket_hotpath(common, cc, parent);
5204 else
5205 {
5206 PUSH_FALLBACK_NOVALUE(sizeof(assert_fallback), cc);
5207 cc = compile_assert_hotpath(common, cc, FALLBACK_AS(assert_fallback), FALSE);
5208 }
5209 break;
5210
5211 case OP_BRAPOS:
5212 case OP_CBRAPOS:
5213 case OP_SBRAPOS:
5214 case OP_SCBRAPOS:
5215 case OP_BRAPOSZERO:
5216 cc = compile_bracketpos_hotpath(common, cc, parent);
5217 break;
5218
5219 case OP_FAIL:
5220 case OP_ACCEPT:
5221 case OP_ASSERT_ACCEPT:
5222 cc = compile_fail_accept_hotpath(common, cc, parent);
5223 break;
5224
5225 case OP_CLOSE:
5226 cc = compile_close_hotpath(common, cc);
5227 break;
5228
5229 case OP_SKIPZERO:
5230 cc = bracketend(cc + 1);
5231 break;
5232
5233 default:
5234 SLJIT_ASSERT_STOP();
5235 return;
5236 }
5237 if (cc == NULL)
5238 return;
5239 }
5240 SLJIT_ASSERT(cc == ccend);
5241 }
5242
5243 #undef PUSH_FALLBACK
5244 #undef PUSH_FALLBACK_NOVALUE
5245 #undef FALLBACK_AS
5246
5247 #define COMPILE_FALLBACKPATH(current) \
5248 do \
5249 { \
5250 compile_fallbackpath(common, (current)); \
5251 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
5252 return; \
5253 } \
5254 while (0)
5255
5256 #define CURRENT_AS(type) ((type*)current)
5257
5258 static void compile_iterator_fallbackpath(compiler_common *common, struct fallback_common *current)
5259 {
5260 DEFINE_COMPILER;
5261 pcre_uchar *cc = current->cc;
5262 pcre_uchar opcode;
5263 pcre_uchar type;
5264 int arg1 = -1, arg2 = -1;
5265 struct sljit_label *label = NULL;
5266 struct sljit_jump *jump = NULL;
5267
5268 cc = get_iterator_parameters(common, cc, &opcode, &type, &arg1, &arg2, NULL);
5269
5270 switch(opcode)
5271 {
5272 case OP_STAR:
5273 case OP_PLUS:
5274 case OP_UPTO:
5275 case OP_CRRANGE:
5276 if (type == OP_ANYNL || type == OP_EXTUNI)
5277 {
5278 set_jumps(current->topfallbacks, LABEL());
5279 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5280 free_stack(common, 1);
5281 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_fallback)->hotpath);
5282 }
5283 else
5284 {
5285 if (opcode == OP_STAR || opcode == OP_UPTO)
5286 arg2 = 0;
5287 else if (opcode == OP_PLUS)
5288 arg2 = 1;
5289 jump = CMP(SLJIT_C_LESS_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, arg2 + 1);
5290 OP2(SLJIT_SUB, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
5291 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5292 skip_char_back(common);
5293 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5294 JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_fallback)->hotpath);
5295 if (opcode == OP_PLUS || opcode == OP_CRRANGE)
5296 set_jumps(current->topfallbacks, LABEL());
5297 JUMPHERE(jump);
5298 free_stack(common, 2);
5299 }
5300 break;
5301
5302 case OP_MINSTAR:
5303 case OP_MINPLUS:
5304 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5305 if (opcode == OP_MINPLUS)
5306 {
5307 set_jumps(current->topfallbacks, LABEL());
5308 current->topfallbacks = NULL;
5309 }
5310 compile_char1_hotpath(common, type, cc, &current->topfallbacks);
5311 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5312 JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_fallback)->hotpath);
5313 set_jumps(current->topfallbacks, LABEL());
5314 free_stack(common, 1);
5315 break;
5316
5317 case OP_MINUPTO:
5318 case OP_CRMINRANGE:
5319 if (opcode == OP_CRMINRANGE)
5320 {
5321 set_jumps(current->topfallbacks, LABEL());
5322 current->topfallbacks = NULL;
5323 label = LABEL();
5324 }
5325 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5326 compile_char1_hotpath(common, type, cc, &current->topfallbacks);
5327
5328 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5329 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5330 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
5331 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5332
5333 if (opcode == OP_CRMINRANGE)
5334 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg2 + 1, label);
5335
5336 if (opcode == OP_CRMINRANGE && arg1 == 0)
5337 JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_fallback)->hotpath);
5338 else
5339 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg1 + 2, CURRENT_AS(iterator_fallback)->hotpath);
5340
5341 set_jumps(current->topfallbacks, LABEL());
5342 free_stack(common, 2);
5343 break;
5344
5345 case OP_QUERY:
5346 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5347 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5348 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_fallback)->hotpath);
5349 jump = JUMP(SLJIT_JUMP);
5350 set_jumps(current->topfallbacks, LABEL());
5351 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5352 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5353 JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_fallback)->hotpath);
5354 JUMPHERE(jump);
5355 free_stack(common, 1);
5356 break;
5357
5358 case OP_MINQUERY:
5359 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5360 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5361 jump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
5362 compile_char1_hotpath(common, type, cc, &current->topfallbacks);
5363 JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_fallback)->hotpath);
5364 set_jumps(current->topfallbacks, LABEL());
5365 JUMPHERE(jump);
5366 free_stack(common, 1);
5367 break;
5368
5369 case OP_EXACT:
5370 case OP_POSPLUS:
5371 set_jumps(current->topfallbacks, LABEL());
5372 break;
5373
5374 case OP_POSSTAR:
5375 case OP_POSQUERY:
5376 case OP_POSUPTO:
5377 break;
5378
5379 default:
5380 SLJIT_ASSERT_STOP();
5381 break;
5382 }
5383 }
5384
5385 static void compile_ref_iterator_fallbackpath(compiler_common *common, struct fallback_common *current)
5386 {
5387 DEFINE_COMPILER;
5388 pcre_uchar *cc = current->cc;
5389 pcre_uchar type;
5390
5391 type = cc[3];
5392 if ((type & 0x1) == 0)
5393 {
5394 set_jumps(current->topfallbacks, LABEL());
5395 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5396 free_stack(common, 1);
5397 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_fallback)->hotpath);
5398 return;
5399 }
5400
5401 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5402 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_fallback)->hotpath);
5403 set_jumps(current->topfallbacks, LABEL());
5404 free_stack(common, 2);
5405 }
5406
5407 static void compile_recurse_fallbackpath(compiler_common *common, struct fallback_common *current)
5408 {
5409 DEFINE_COMPILER;
5410
5411 set_jumps(current->topfallbacks, LABEL());
5412 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5413 free_stack(common, 1);
5414 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), TMP2, 0);
5415 }
5416
5417 static void compile_assert_fallbackpath(compiler_common *common, struct fallback_common *current)
5418 {
5419 DEFINE_COMPILER;
5420 pcre_uchar *cc = current->cc;
5421 pcre_uchar bra = OP_BRA;
5422 struct sljit_jump *brajump = NULL;
5423
5424 SLJIT_ASSERT(*cc != OP_BRAMINZERO);
5425 if (*cc == OP_BRAZERO)
5426 {
5427 bra = *cc;
5428 cc++;
5429 }
5430
5431 if (bra == OP_BRAZERO)
5432 {
5433 SLJIT_ASSERT(current->topfallbacks == NULL);
5434 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5435 }
5436
5437 if (CURRENT_AS(assert_fallback)->framesize < 0)
5438 {
5439 set_jumps(current->topfallbacks, LABEL());
5440
5441 if (bra == OP_BRAZERO)
5442 {
5443 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5444 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_fallback)->hotpath);
5445 free_stack(common, 1);
5446 }
5447 return;
5448 }
5449
5450 if (bra == OP_BRAZERO)
5451 {
5452 if (*cc == OP_ASSERT_NOT || *cc == OP_ASSERTBACK_NOT)
5453 {
5454 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5455 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_fallback)->hotpath);
5456 free_stack(common, 1);
5457 return;
5458 }
5459 free_stack(common, 1);
5460 brajump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
5461 }
5462
5463 if (*cc == OP_ASSERT || *cc == OP_ASSERTBACK)
5464 {
5465 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), CURRENT_AS(assert_fallback)->localptr);
5466 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
5467 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), CURRENT_AS(assert_fallback)->localptr, SLJIT_MEM1(STACK_TOP), CURRENT_AS(assert_fallback)->framesize * sizeof(sljit_w));
5468
5469 set_jumps(current->topfallbacks, LABEL());
5470 }
5471 else
5472 set_jumps(current->topfallbacks, LABEL());
5473
5474 if (bra == OP_BRAZERO)
5475 {
5476 /* We know there is enough place on the stack. */
5477 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
5478 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5479 JUMPTO(SLJIT_JUMP, CURRENT_AS(assert_fallback)->hotpath);
5480 JUMPHERE(brajump);
5481 }
5482 }
5483
5484 static void compile_bracket_fallbackpath(compiler_common *common, struct fallback_common *current)
5485 {
5486 DEFINE_COMPILER;
5487 int opcode;
5488 int offset = 0;
5489 int localptr = CURRENT_AS(bracket_fallback)->localptr;
5490 int stacksize;
5491 int count;
5492 pcre_uchar *cc = current->cc;
5493 pcre_uchar *ccbegin;
5494 pcre_uchar *ccprev;
5495 jump_list *jumplist = NULL;
5496 jump_list *jumplistitem = NULL;
5497 pcre_uchar bra = OP_BRA;
5498 pcre_uchar ket;
5499 assert_fallback *assert;
5500 BOOL has_alternatives;
5501 struct sljit_jump *brazero = NULL;
5502 struct sljit_jump *once = NULL;
5503 struct sljit_jump *cond = NULL;
5504 struct sljit_label *rminlabel = NULL;
5505
5506 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
5507 {
5508 bra = *cc;
5509 cc++;
5510 }
5511
5512 opcode = *cc;
5513 ccbegin = cc;
5514 ket = *(bracketend(ccbegin) - 1 - LINK_SIZE);
5515 cc += GET(cc, 1);
5516 has_alternatives = *cc == OP_ALT;
5517 if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
5518 has_alternatives = (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT) || CURRENT_AS(bracket_fallback)->u.condfailed != NULL;
5519 if (opcode == OP_CBRA || opcode == OP_SCBRA)
5520 offset = (GET2(ccbegin, 1 + LINK_SIZE)) << 1;
5521 if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
5522 opcode = OP_SCOND;
5523 if (SLJIT_UNLIKELY(opcode == OP_ONCE_NC))
5524 opcode = OP_ONCE;
5525
5526 if (ket == OP_KETRMAX)
5527 {
5528 if (bra != OP_BRAZERO)
5529 free_stack(common, 1);
5530 else
5531 {
5532 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5533 free_stack(common, 1);
5534 brazero = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0);
5535 }
5536 }
5537 else if (ket == OP_KETRMIN)
5538 {
5539 if (bra != OP_BRAMINZERO)
5540 {
5541 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5542 if (opcode >= OP_SBRA || opcode == OP_ONCE)
5543 {
5544 /* Checking zero-length iteration. */
5545 if (opcode != OP_ONCE || CURRENT_AS(bracket_fallback)->u.framesize < 0)
5546 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, CURRENT_AS(bracket_fallback)->recursivehotpath);
5547 else
5548 {
5549 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
5550 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), (CURRENT_AS(bracket_fallback)->u.framesize + 1) * sizeof(sljit_w), CURRENT_AS(bracket_fallback)->recursivehotpath);
5551 }
5552 if (opcode != OP_ONCE)
5553 free_stack(common, 1);
5554 }
5555 else
5556 JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_fallback)->recursivehotpath);
5557 }
5558 rminlabel = LABEL();
5559 }
5560 else if (bra == OP_BRAZERO)
5561 {
5562 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5563 free_stack(common, 1);
5564 brazero = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
5565 }
5566
5567 if (SLJIT_UNLIKELY(opcode == OP_ONCE))
5568 {
5569 if (CURRENT_AS(bracket_fallback)->u.framesize >= 0)
5570 {
5571 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
5572 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
5573 }
5574 once = JUMP(SLJIT_JUMP);
5575 }
5576 else if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
5577 {
5578 if (has_alternatives)
5579 {
5580 /* Always exactly one alternative. */
5581 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5582 free_stack(common, 1);
5583
5584 jumplistitem = sljit_alloc_memory(compiler, sizeof(jump_list));
5585 if (SLJIT_UNLIKELY(!jumplistitem))
5586 return;
5587 jumplist = jumplistitem;
5588 jumplistitem->next = NULL;
5589 jumplistitem->jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 1);
5590 }
5591 }
5592 else if (*cc == OP_ALT)
5593 {
5594 /* Build a jump list. Get the last successfully matched branch index. */
5595 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5596 free_stack(common, 1);
5597 count = 1;
5598 do
5599 {
5600 /* Append as the last item. */
5601 if (jumplist != NULL)
5602 {
5603 jumplistitem->next = sljit_alloc_memory(compiler, sizeof(jump_list));
5604 jumplistitem = jumplistitem->next;
5605 }
5606 else
5607 {
5608 jumplistitem = sljit_alloc_memory(compiler, sizeof(jump_list));
5609 jumplist = jumplistitem;
5610 }
5611
5612 if (SLJIT_UNLIKELY(!jumplistitem))
5613 return;
5614
5615 jumplistitem->next = NULL;
5616 jumplistitem->jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, count++);
5617 cc += GET(cc, 1);
5618 }
5619 while (*cc == OP_ALT);
5620
5621 cc = ccbegin + GET(ccbegin, 1);
5622 }
5623
5624 COMPILE_FALLBACKPATH(current->top);
5625 if (current->topfallbacks)
5626 set_jumps(current->topfallbacks, LABEL());
5627
5628 if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
5629 {
5630 /* Conditional block always has at most one alternative. */
5631 if (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT)
5632 {
5633 SLJIT_ASSERT(has_alternatives);
5634 assert = CURRENT_AS(bracket_fallback)->u.assert;
5635 if (assert->framesize >= 0 && (ccbegin[1 + LINK_SIZE] == OP_ASSERT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK))
5636 {
5637 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), assert->localptr);
5638 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
5639 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), assert->localptr, SLJIT_MEM1(STACK_TOP), assert->framesize * sizeof(sljit_w));
5640 }
5641 cond = JUMP(SLJIT_JUMP);
5642 set_jumps(CURRENT_AS(bracket_fallback)->u.assert->condfailed, LABEL());
5643 }
5644 else if (CURRENT_AS(bracket_fallback)->u.condfailed != NULL)
5645 {
5646 SLJIT_ASSERT(has_alternatives);
5647 cond = JUMP(SLJIT_JUMP);
5648 set_jumps(CURRENT_AS(bracket_fallback)->u.condfailed, LABEL());
5649 }
5650 else
5651 SLJIT_ASSERT(!has_alternatives);
5652 }
5653
5654 if (has_alternatives)
5655 {
5656 count = 1;
5657 do
5658 {
5659 current->top = NULL;
5660 current->topfallbacks = NULL;
5661 current->nextfallbacks = NULL;
5662 if (*cc == OP_ALT)
5663 {
5664 ccprev = cc + 1 + LINK_SIZE;
5665 cc += GET(cc, 1);
5666 if (opcode != OP_COND && opcode != OP_SCOND)
5667 {
5668 if (localptr != 0 && opcode != OP_ONCE)
5669 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
5670 else
5671 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5672 }
5673 compile_hotpath(common, ccprev, cc, current);
5674 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5675 return;
5676 }
5677
5678 /* Instructions after the current alternative is succesfully matched. */
5679 /* There is a similar code in compile_bracket_hotpath. */
5680 if (opcode == OP_ONCE)
5681 {
5682 if (CURRENT_AS(bracket_fallback)->u.framesize < 0)
5683 {
5684 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
5685 /* TMP2 which is set here used by OP_KETRMAX below. */
5686 if (ket == OP_KETRMAX)
5687 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), 0);
5688 else if (ket == OP_KETRMIN)
5689 {
5690 /* Move the STR_PTR to the localptr. */
5691 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_MEM1(STACK_TOP), 0);
5692 }
5693 }
5694 else
5695 {
5696 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_IMM, (CURRENT_AS(bracket_fallback)->u.framesize + 2) * sizeof(sljit_w));
5697 if (ket == OP_KETRMAX)
5698 {
5699 /* TMP2 which is set here used by OP_KETRMAX below. */
5700 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5701 }
5702 }
5703 }
5704
5705 stacksize = 0;
5706 if (opcode != OP_ONCE)
5707 stacksize++;
5708 if (ket != OP_KET || bra != OP_BRA)
5709 stacksize++;
5710
5711 if (stacksize > 0) {
5712 if (opcode != OP_ONCE || CURRENT_AS(bracket_fallback)->u.framesize >= 0)
5713 allocate_stack(common, stacksize);
5714 else
5715 {
5716 /* We know we have place at least for one item on the top of the stack. */
5717 SLJIT_ASSERT(stacksize == 1);
5718 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
5719 }
5720 }
5721
5722 stacksize = 0;
5723 if (ket != OP_KET || bra != OP_BRA)
5724 {
5725 if (ket != OP_KET)
5726 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
5727 else
5728 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
5729 stacksize++;
5730 }
5731
5732 if (opcode != OP_ONCE)
5733 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, count++);
5734
5735 if (offset != 0)
5736 {
5737 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
5738 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
5739 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 0), TMP1, 0);
5740 }
5741
5742 JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_fallback)->althotpath);
5743
5744 if (opcode != OP_ONCE)
5745 {
5746 SLJIT_ASSERT(jumplist);
5747 JUMPHERE(jumplist->jump);
5748 jumplist = jumplist->next;
5749 }
5750
5751 COMPILE_FALLBACKPATH(current->top);
5752 if (current->topfallbacks)
5753 set_jumps(current->topfallbacks, LABEL());
5754 SLJIT_ASSERT(!current->nextfallbacks);
5755 }
5756 while (*cc == OP_ALT);
5757 SLJIT_ASSERT(!jumplist);
5758
5759 if (cond != NULL)
5760 {
5761 SLJIT_ASSERT(opcode == OP_COND || opcode == OP_SCOND);
5762 assert = CURRENT_AS(bracket_fallback)->u.assert;
5763 if (assert->framesize >= 0 && (ccbegin[1 + LINK_SIZE] == OP_ASSERT_NOT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK_NOT))
5764 {
5765 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), assert->localptr);
5766 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
5767 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), assert->localptr, SLJIT_MEM1(STACK_TOP), assert->framesize * sizeof(sljit_w));
5768 }
5769 JUMPHERE(cond);
5770 }
5771
5772 /* Free the STR_PTR. */
5773 if (localptr == 0)
5774 free_stack(common, 1);
5775 }
5776
5777 if (offset != 0)
5778 {
5779 /* Using both tmp register is better for instruction scheduling. */
5780 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5781 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5782 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
5783 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP2, 0);
5784 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_MEM1(STACK_TOP), STACK(2));
5785 free_stack(common, 3);
5786 }
5787 else if (opcode == OP_SBRA || opcode == OP_SCOND)
5788 {
5789 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_MEM1(STACK_TOP), STACK(0));
5790 free_stack(common, 1);
5791 }
5792 else if (opcode == OP_ONCE)
5793 {
5794 cc = ccbegin + GET(ccbegin, 1);
5795 if (CURRENT_AS(bracket_fallback)->u.framesize >= 0)
5796 {
5797 /* Reset head and drop saved frame. */
5798 stacksize = (ket == OP_KETRMAX || ket == OP_KETRMIN || *cc == OP_ALT) ? 2 : 1;
5799 free_stack(common, CURRENT_AS(bracket_fallback)->u.framesize + stacksize);
5800 }
5801 else if (ket == OP_KETRMAX || (*cc == OP_ALT && ket != OP_KETRMIN))
5802 {
5803 /* The STR_PTR must be released. */
5804 free_stack(common, 1);
5805 }
5806
5807 JUMPHERE(once);
5808 /* Restore previous localptr */
5809 if (CURRENT_AS(bracket_fallback)->u.framesize >= 0)
5810 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_MEM1(STACK_TOP), CURRENT_AS(bracket_fallback)->u.framesize * sizeof(sljit_w));
5811 else if (ket == OP_KETRMIN)
5812 {
5813 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5814 /* See the comment below. */
5815 free_stack(common, 2);
5816 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, TMP1, 0);
5817 }
5818 }
5819
5820 if (ket == OP_KETRMAX)
5821 {
5822 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5823 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(bracket_fallback)->recursivehotpath);
5824 if (bra == OP_BRAZERO)
5825 {
5826 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5827 JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_fallback)->zerohotpath);
5828 JUMPHERE(brazero);
5829 }
5830 free_stack(common, 1);
5831 }
5832 else if (ket == OP_KETRMIN)
5833 {
5834 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5835
5836 /* OP_ONCE removes everything in case of a fallback, so we don't
5837 need to explicitly release the STR_PTR. The extra release would
5838 affect badly the free_stack(2) above. */
5839 if (opcode != OP_ONCE)
5840 free_stack(common, 1);
5841 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, rminlabel);
5842 if (opcode == OP_ONCE)
5843 free_stack(common, bra == OP_BRAMINZERO ? 2 : 1);
5844 else if (bra == OP_BRAMINZERO)
5845 free_stack(common, 1);
5846 }
5847 else if (bra == OP_BRAZERO)
5848 {
5849 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5850 JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_fallback)->zerohotpath);
5851 JUMPHERE(brazero);
5852 }
5853 }
5854
5855 static void compile_bracketpos_fallbackpath(compiler_common *common, struct fallback_common *current)
5856 {
5857 DEFINE_COMPILER;
5858 int offset;
5859 struct sljit_jump *jump;
5860
5861 if (CURRENT_AS(bracketpos_fallback)->framesize < 0)
5862 {
5863 if (*current->cc == OP_CBRAPOS || *current->cc == OP_SCBRAPOS)
5864 {
5865 offset = (GET2(current->cc, 1 + LINK_SIZE)) << 1;
5866 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5867 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5868 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
5869 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP2, 0);
5870 }
5871 set_jumps(current->topfallbacks, LABEL());
5872 free_stack(common, CURRENT_AS(bracketpos_fallback)->stacksize);
5873 return;
5874 }
5875
5876 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), CURRENT_AS(bracketpos_fallback)->localptr);
5877 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
5878
5879 if (current->topfallbacks)
5880 {
5881 jump = JUMP(SLJIT_JUMP);
5882 set_jumps(current->topfallbacks, LABEL());
5883 /* Drop the stack frame. */
5884 free_stack(common, CURRENT_AS(bracketpos_fallback)->stacksize);
5885 JUMPHERE(jump);
5886 }
5887 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), CURRENT_AS(bracketpos_fallback)->localptr, SLJIT_MEM1(STACK_TOP), CURRENT_AS(bracketpos_fallback)->framesize * sizeof(sljit_w));
5888 }
5889
5890 static void compile_braminzero_fallbackpath(compiler_common *common, struct fallback_common *current)
5891 {
5892 assert_fallback fallback;
5893
5894 current->top = NULL;
5895 current->topfallbacks = NULL;
5896 current->nextfallbacks = NULL;
5897 if (current->cc[1] > OP_ASSERTBACK_NOT)
5898 {
5899 /* Manual call of compile_bracket_hotpath and compile_bracket_fallbackpath. */
5900 compile_bracket_hotpath(common, current->cc, current);
5901 compile_bracket_fallbackpath(common, current->top);
5902 }
5903 else
5904 {
5905 memset(&fallback, 0, sizeof(fallback));
5906 fallback.common.cc = current->cc;
5907 fallback.hotpath = CURRENT_AS(braminzero_fallback)->hotpath;
5908 /* Manual call of compile_assert_hotpath. */
5909 compile_assert_hotpath(common, current->cc, &fallback, FALSE);
5910 }
5911 SLJIT_ASSERT(!current->nextfallbacks && !current->topfallbacks);
5912 }
5913
5914 static void compile_fallbackpath(compiler_common *common, struct fallback_common *current)
5915 {
5916 DEFINE_COMPILER;
5917
5918 while (current)
5919 {
5920 if (current->nextfallbacks != NULL)
5921 set_jumps(current->nextfallbacks, LABEL());
5922 switch(*current->cc)
5923 {
5924 case OP_SET_SOM:
5925 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5926 free_stack(common, 1);
5927 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), TMP1, 0);
5928 break;
5929
5930 case OP_STAR:
5931 case OP_MINSTAR:
5932 case OP_PLUS:
5933 case OP_MINPLUS:
5934 case OP_QUERY:
5935 case OP_MINQUERY:
5936 case OP_UPTO:
5937 case OP_MINUPTO:
5938 case OP_EXACT:
5939 case OP_POSSTAR:
5940 case OP_POSPLUS:
5941 case OP_POSQUERY:
5942 case OP_POSUPTO:
5943 case OP_STARI:
5944 case OP_MINSTARI:
5945 case OP_PLUSI:
5946 case OP_MINPLUSI:
5947 case OP_QUERYI:
5948 case OP_MINQUERYI:
5949 case OP_UPTOI:
5950 case OP_MINUPTOI:
5951 case OP_EXACTI:
5952 case OP_POSSTARI:
5953 case OP_POSPLUSI:
5954 case OP_POSQUERYI:
5955 case OP_POSUPTOI:
5956 case OP_NOTSTAR:
5957 case OP_NOTMINSTAR:
5958 case OP_NOTPLUS:
5959 case OP_NOTMINPLUS:
5960 case OP_NOTQUERY:
5961 case OP_NOTMINQUERY:
5962 case OP_NOTUPTO:
5963 case OP_NOTMINUPTO:
5964 case OP_NOTEXACT:
5965 case OP_NOTPOSSTAR:
5966 case OP_NOTPOSPLUS:
5967 case OP_NOTPOSQUERY:
5968 case OP_NOTPOSUPTO:
5969 case OP_NOTSTARI:
5970 case OP_NOTMINSTARI:
5971 case OP_NOTPLUSI:
5972 case OP_NOTMINPLUSI:
5973 case OP_NOTQUERYI:
5974 case OP_NOTMINQUERYI:
5975 case OP_NOTUPTOI:
5976 case OP_NOTMINUPTOI:
5977 case OP_NOTEXACTI:
5978 case OP_NOTPOSSTARI:
5979 case OP_NOTPOSPLUSI:
5980 case OP_NOTPOSQUERYI:
5981 case OP_NOTPOSUPTOI:
5982 case OP_TYPESTAR:
5983 case OP_TYPEMINSTAR:
5984 case OP_TYPEPLUS:
5985 case OP_TYPEMINPLUS:
5986 case OP_TYPEQUERY:
5987 case OP_TYPEMINQUERY:
5988 case OP_TYPEUPTO:
5989 case OP_TYPEMINUPTO:
5990 case OP_TYPEEXACT:
5991 case OP_TYPEPOSSTAR:
5992 case OP_TYPEPOSPLUS:
5993 case OP_TYPEPOSQUERY:
5994 case OP_TYPEPOSUPTO:
5995 case OP_CLASS:
5996 case OP_NCLASS:
5997 case OP_XCLASS:
5998 compile_iterator_fallbackpath(common, current);
5999 break;
6000
6001 case OP_REF:
6002 case OP_REFI:
6003 compile_ref_iterator_fallbackpath(common, current);
6004 break;
6005
6006 case OP_RECURSE:
6007 compile_recurse_fallbackpath(common, current);
6008 break;
6009
6010 case OP_ASSERT:
6011 case OP_ASSERT_NOT:
6012 case OP_ASSERTBACK:
6013 case OP_ASSERTBACK_NOT:
6014 compile_assert_fallbackpath(common, current);
6015 break;
6016
6017 case OP_ONCE:
6018 case OP_ONCE_NC:
6019 case OP_BRA:
6020 case OP_CBRA:
6021 case OP_COND:
6022 case OP_SBRA:
6023 case OP_SCBRA:
6024 case OP_SCOND:
6025 compile_bracket_fallbackpath(common, current);
6026 break;
6027
6028 case OP_BRAZERO:
6029 if (current->cc[1] > OP_ASSERTBACK_NOT)
6030 compile_bracket_fallbackpath(common, current);
6031 else
6032 compile_assert_fallbackpath(common, current);
6033 break;
6034
6035 case OP_BRAPOS:
6036 case OP_CBRAPOS:
6037 case OP_SBRAPOS:
6038 case OP_SCBRAPOS:
6039 case OP_BRAPOSZERO:
6040 compile_bracketpos_fallbackpath(common, current);
6041 break;
6042
6043 case OP_BRAMINZERO:
6044 compile_braminzero_fallbackpath(common, current);
6045 break;
6046
6047 case OP_FAIL:
6048 case OP_ACCEPT:
6049 case OP_ASSERT_ACCEPT:
6050 set_jumps(current->topfallbacks, LABEL());
6051 break;
6052
6053 default:
6054 SLJIT_ASSERT_STOP();
6055 break;
6056 }
6057 current = current->prev;
6058 }
6059 }
6060
6061 static SLJIT_INLINE void compile_recurse(compiler_common *common)
6062 {
6063 DEFINE_COMPILER;
6064 pcre_uchar *cc = common->start + common->currententry->start;
6065 pcre_uchar *ccbegin = cc + 1 + LINK_SIZE + (*cc == OP_BRA ? 0 : 2);
6066 pcre_uchar *ccend = bracketend(cc);
6067 int localsize = get_localsize(common, ccbegin, ccend);
6068 int framesize = get_framesize(common, cc, TRUE);
6069 int alternativesize;
6070 BOOL needsframe;
6071 fallback_common altfallback;
6072 struct sljit_jump *jump;
6073
6074 SLJIT_ASSERT(*cc == OP_BRA || *cc == OP_CBRA || *cc == OP_CBRAPOS || *cc == OP_SCBRA || *cc == OP_SCBRAPOS);
6075 needsframe = framesize >= 0;
6076 if (!needsframe)
6077 framesize = 0;
6078 alternativesize = *(cc + GET(cc, 1)) == OP_ALT ? 1 : 0;
6079
6080 SLJIT_ASSERT(common->currententry->entry == NULL);
6081 common->currententry->entry = LABEL();
6082 set_jumps(common->currententry->calls, common->currententry->entry);
6083
6084 sljit_emit_fast_enter(compiler, TMP2, 0, 1, 5, 5, common->localsize);
6085 allocate_stack(common, localsize + framesize + alternativesize);
6086 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(localsize + framesize + alternativesize - 1), TMP2, 0);
6087 copy_locals(common, ccbegin, ccend, TRUE, localsize + framesize + alternativesize, framesize + alternativesize);
6088 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), RECURSIVE_HEAD, STACK_TOP, 0);
6089 if (needsframe)
6090 init_frame(common, cc, framesize + alternativesize - 1, alternativesize, FALSE);
6091
6092 if (alternativesize > 0)
6093 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6094
6095 memset(&altfallback, 0, sizeof(fallback_common));
6096 common->acceptlabel = NULL;
6097 common->accept = NULL;
6098 altfallback.cc = ccbegin;
6099 cc += GET(cc, 1);
6100 while (1)
6101 {
6102 altfallback.top = NULL;
6103 altfallback.topfallbacks = NULL;
6104
6105 if (altfallback.cc != ccbegin)
6106 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6107
6108 compile_hotpath(common, altfallback.cc, cc, &altfallback);
6109 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6110 return;
6111
6112 add_jump(compiler, &common->accept, JUMP(SLJIT_JUMP));
6113
6114 compile_fallbackpath(common, altfallback.top);
6115 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6116 return;
6117 set_jumps(altfallback.topfallbacks, LABEL());
6118
6119 if (*cc != OP_ALT)
6120 break;
6121
6122 altfallback.cc = cc + 1 + LINK_SIZE;
6123 cc += GET(cc, 1);
6124 }
6125 /* None of them matched. */
6126 OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 0);
6127 jump = JUMP(SLJIT_JUMP);
6128
6129 set_jumps(common->accept, LABEL());
6130 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), RECURSIVE_HEAD);
6131 if (needsframe)
6132 {
6133 OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
6134 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + alternativesize) * sizeof(sljit_w));
6135 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6136 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + alternativesize) * sizeof(sljit_w));
6137 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), TMP3, 0);
6138 }
6139 OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 1);
6140
6141 JUMPHERE(jump);
6142 copy_locals(common, ccbegin, ccend, FALSE, localsize + framesize + alternativesize, framesize + alternativesize);
6143 free_stack(common, localsize + framesize + alternativesize);
6144 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), sizeof(sljit_w));
6145 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
6146 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), RECURSIVE_HEAD, TMP2, 0);
6147 sljit_emit_fast_return(compiler, SLJIT_MEM1(STACK_TOP), 0);
6148 }
6149
6150 #undef COMPILE_FALLBACKPATH
6151 #undef CURRENT_AS
6152
6153 void
6154 PRIV(jit_compile)(const real_pcre *re, pcre_extra *extra)
6155 {
6156 struct sljit_compiler *compiler;
6157 fallback_common rootfallback;
6158 compiler_common common_data;
6159 compiler_common *common = &common_data;
6160 const pcre_uint8 *tables = re->tables;
6161 pcre_study_data *study;
6162 pcre_uchar *ccend;
6163 executable_function *function;
6164 void *executable_func;
6165 struct sljit_label *leave;
6166 struct sljit_label *mainloop = NULL;
6167 struct sljit_label *empty_match_found;
6168 struct sljit_label *empty_match_fallback;
6169 struct sljit_jump *alloc_error;
6170 struct sljit_jump *reqbyte_notfound = NULL;
6171 struct sljit_jump *empty_match;
6172
6173 SLJIT_ASSERT((extra->flags & PCRE_EXTRA_STUDY_DATA) != 0);
6174 study = extra->study_data;
6175
6176 if (!tables)
6177 tables = PRIV(default_tables);
6178
6179 memset(&rootfallback, 0, sizeof(fallback_common));
6180 rootfallback.cc = (pcre_uchar *)re + re->name_table_offset + re->name_count * re->name_entry_size;
6181
6182 common->compiler = NULL;
6183 common->start = rootfallback.cc;
6184 common->cbraptr = OVECTOR_START + (re->top_bracket + 1) * 2 * sizeof(sljit_w);
6185 common->fcc = tables + fcc_offset;
6186 common->lcc = (sljit_w)(tables + lcc_offset);
6187 common->nltype = NLTYPE_FIXED;
6188 switch(re->options & PCRE_NEWLINE_BITS)
6189 {
6190 case 0:
6191 /* Compile-time default */
6192 switch (NEWLINE)
6193 {
6194 case -1: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANY; break;
6195 case -2: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANYCRLF; break;
6196 default: common->newline = NEWLINE; break;
6197 }
6198 break;
6199 case PCRE_NEWLINE_CR: common->newline = CHAR_CR; break;
6200 case PCRE_NEWLINE_LF: common->newline = CHAR_NL; break;
6201 case PCRE_NEWLINE_CR+
6202 PCRE_NEWLINE_LF: common->newline = (CHAR_CR << 8) | CHAR_NL; break;
6203 case PCRE_NEWLINE_ANY: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANY; break;
6204 case PCRE_NEWLINE_ANYCRLF: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANYCRLF; break;
6205 default: return;
6206 }
6207 if ((re->options & PCRE_BSR_ANYCRLF) != 0)
6208 common->bsr_nltype = NLTYPE_ANYCRLF;
6209 else if ((re->options & PCRE_BSR_UNICODE) != 0)
6210 common->bsr_nltype = NLTYPE_ANY;
6211 else
6212 {
6213 #ifdef BSR_ANYCRLF
6214 common->bsr_nltype = NLTYPE_ANYCRLF;
6215 #else
6216 common->bsr_nltype = NLTYPE_ANY;
6217 #endif
6218 }
6219 common->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
6220 common->ctypes = (sljit_w)(tables + ctypes_offset);
6221 common->name_table = (sljit_w)re + re->name_table_offset;
6222 common->name_count = re->name_count;
6223 common->name_entry_size = re->name_entry_size;
6224 common->acceptlabel = NULL;
6225 common->stubs = NULL;
6226 common->entries = NULL;
6227 common->currententry = NULL;
6228 common->accept = NULL;
6229 common->calllimit = NULL;
6230 common->stackalloc = NULL;
6231 common->revertframes = NULL;
6232 common->wordboundary = NULL;
6233 common->anynewline = NULL;
6234 common->hspace = NULL;
6235 common->vspace = NULL;
6236 common->casefulcmp = NULL;
6237 common->caselesscmp = NULL;
6238 common->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;
6239 #ifdef SUPPORT_UTF8
6240 common->utf8 = (re->options & PCRE_UTF8) != 0;
6241 #ifdef SUPPORT_UCP
6242 common->useucp = (re->options & PCRE_UCP) != 0;
6243 #endif
6244 common->utf8readchar = NULL;
6245 common->utf8readtype8 = NULL;
6246 #endif
6247 #ifdef SUPPORT_UCP
6248 common->getucd = NULL;
6249 #endif
6250 ccend = bracketend(rootfallback.cc);
6251 SLJIT_ASSERT(*rootfallback.cc == OP_BRA && ccend[-(1 + LINK_SIZE)] == OP_KET);
6252 common->localsize = get_localspace(common, rootfallback.cc, ccend);
6253 if (common->localsize < 0)
6254 return;
6255 common->localsize += common->cbraptr + (re->top_bracket + 1) * sizeof(sljit_w);
6256 if (common->localsize > SLJIT_MAX_LOCAL_SIZE)
6257 return;
6258 common->localptrs = (int*)SLJIT_MALLOC((ccend - rootfallback.cc) * sizeof(int));
6259 if (!common->localptrs)
6260 return;
6261 memset(common->localptrs, 0, (ccend - rootfallback.cc) * sizeof(int));
6262 set_localptrs(common, common->cbraptr + (re->top_bracket + 1) * sizeof(sljit_w), ccend);
6263
6264 compiler = sljit_create_compiler();
6265 if (!compiler)
6266 {
6267 SLJIT_FREE(common->localptrs);
6268 return;
6269 }
6270 common->compiler = compiler;
6271
6272 /* Main pcre_jit_exec entry. */
6273 sljit_emit_enter(compiler, 1, 5, 5, common->localsize);
6274
6275 /* Register init. */
6276 reset_ovector(common, (re->top_bracket + 1) * 2);
6277 if ((re->flags & PCRE_REQCHSET) != 0)
6278 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), REQ_BYTE_PTR, SLJIT_TEMPORARY_REG1, 0);
6279
6280 OP1(SLJIT_MOV, ARGUMENTS, 0, SLJIT_GENERAL_REG1, 0);
6281 OP1(SLJIT_MOV, TMP1, 0, SLJIT_GENERAL_REG1, 0);
6282 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
6283 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, end));
6284 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, stack));
6285 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, calllimit));
6286 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, base));
6287 OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, limit));
6288 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), CALL_LIMIT, TMP1, 0);
6289
6290 /* Main part of the matching */
6291 if ((re->options & PCRE_ANCHORED) == 0)
6292 {
6293 mainloop = mainloop_entry(common, (re->flags & PCRE_HASCRORLF) != 0, (re->options & PCRE_FIRSTLINE) != 0);
6294 /* Forward search if possible. */
6295 if ((re->flags & PCRE_FIRSTSET) != 0)
6296 fast_forward_first_byte(common, re->first_byte, (re->options & PCRE_FIRSTLINE) != 0);
6297 else if ((re->flags & PCRE_STARTLINE) != 0)
6298 fast_forward_newline(common, (re->options & PCRE_FIRSTLINE) != 0);
6299 else if ((re->flags & PCRE_STARTLINE) == 0 && study != NULL && (study->flags & PCRE_STUDY_MAPPED) != 0)
6300 fast_forward_start_bits(common, (sljit_uw)study->start_bits, (re->options & PCRE_FIRSTLINE) != 0);
6301 }
6302 if ((re->flags & PCRE_REQCHSET) != 0)
6303 reqbyte_notfound = search_requested_char(common, re->req_byte, (re->flags & PCRE_FIRSTSET) != 0);
6304
6305 /* Store the current STR_PTR in OVECTOR(0). */
6306 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), STR_PTR, 0);
6307 /* Copy the limit of allowed recursions. */
6308 OP1(SLJIT_MOV, CALL_COUNT, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), CALL_LIMIT);
6309
6310 compile_hotpath(common, rootfallback.cc, ccend, &rootfallback);
6311 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6312 {
6313 sljit_free_compiler(compiler);
6314 SLJIT_FREE(common->localptrs);
6315 return;
6316 }
6317
6318 empty_match = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
6319 empty_match_found = LABEL();
6320
6321 common->acceptlabel = LABEL();
6322 if (common->accept != NULL)
6323 set_jumps(common->accept, common->acceptlabel);
6324
6325 /* This means we have a match. Update the ovector. */
6326 copy_ovector(common, re->top_bracket + 1);
6327 leave = LABEL();
6328 sljit_emit_return(compiler, SLJIT_UNUSED, 0);
6329
6330 empty_match_fallback = LABEL();
6331 compile_fallbackpath(common, rootfallback.top);
6332 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6333 {
6334 sljit_free_compiler(compiler);
6335 SLJIT_FREE(common->localptrs);
6336 return;
6337 }
6338
6339 SLJIT_ASSERT(rootfallback.prev == NULL);
6340
6341 /* Check we have remaining characters. */
6342 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
6343
6344 if ((re->options & PCRE_ANCHORED) == 0)
6345 {
6346 if ((re->options & PCRE_FIRSTLINE) == 0)
6347 {
6348 if (study != NULL && study->minlength > 1)
6349 {
6350 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(study->minlength));
6351 CMPTO(SLJIT_C_LESS_EQUAL, TMP1, 0, STR_END, 0, mainloop);
6352 }
6353 else
6354 CMPTO(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0, mainloop);
6355 }
6356 else
6357 {
6358 if (study != NULL && study->minlength > 1)
6359 {
6360 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(study->minlength));
6361 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, STR_END, 0);
6362 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_GREATER);
6363 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), FIRSTLINE_END);
6364 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_GREATER_EQUAL);
6365 JUMPTO(SLJIT_C_ZERO, mainloop);
6366 }
6367 else
6368 CMPTO(SLJIT_C_LESS, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), FIRSTLINE_END, mainloop);
6369 }
6370 }
6371
6372 if (reqbyte_notfound != NULL)
6373 JUMPHERE(reqbyte_notfound);
6374 /* Copy OVECTOR(1) to OVECTOR(0) */
6375 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
6376 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_NOMATCH);
6377 JUMPTO(SLJIT_JUMP, leave);
6378
6379 flush_stubs(common);
6380
6381 JUMPHERE(empty_match);
6382 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6383 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty));
6384 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0, empty_match_fallback);
6385 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty_atstart));
6386 CMPTO(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0, empty_match_found);
6387 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
6388 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0, empty_match_found);
6389 JUMPTO(SLJIT_JUMP, empty_match_fallback);
6390
6391 common->currententry = common->entries;
6392 while (common->currententry != NULL)
6393 {
6394 /* Might add new entries. */
6395 compile_recurse(common);
6396 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6397 {
6398 sljit_free_compiler(compiler);
6399 SLJIT_FREE(common->localptrs);
6400 return;
6401 }
6402 flush_stubs(common);
6403 common->currententry = common->currententry->next;
6404 }
6405
6406 /* Allocating stack, returns with PCRE_ERROR_JIT_STACKLIMIT if fails. */
6407 /* This is a (really) rare case. */
6408 set_jumps(common->stackalloc, LABEL());
6409 /* RETURN_ADDR is not a saved register. */
6410 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, 1, 5, 5, common->localsize);
6411 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP2, 0);
6412 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6413 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, stack));
6414 OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(struct sljit_stack, top), STACK_TOP, 0);
6415 OP2(SLJIT_ADD, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(struct sljit_stack, limit), SLJIT_IMM, STACK_GROWTH_RATE);
6416
6417 sljit_emit_ijump(compiler, SLJIT_CALL2, SLJIT_IMM, SLJIT_FUNC_OFFSET(sljit_stack_resize));
6418 alloc_error = CMP(SLJIT_C_NOT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
6419 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6420 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, stack));
6421 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(struct sljit_stack, top));
6422 OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(struct sljit_stack, limit));
6423 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
6424 sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
6425
6426 /* Allocation failed. */
6427 JUMPHERE(alloc_error);
6428 /* We break the return address cache here, but this is a really rare case. */
6429 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_JIT_STACKLIMIT);
6430 JUMPTO(SLJIT_JUMP, leave);
6431
6432 /* Call limit reached. */
6433 set_jumps(common->calllimit, LABEL());
6434 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_MATCHLIMIT);
6435 JUMPTO(SLJIT_JUMP, leave);
6436
6437 if (common->revertframes != NULL)
6438 {
6439 set_jumps(common->revertframes, LABEL());
6440 do_revertframes(common);
6441 }
6442 if (common->wordboundary != NULL)
6443 {
6444 set_jumps(common->wordboundary, LABEL());
6445 check_wordboundary(common);
6446 }
6447 if (common->anynewline != NULL)
6448 {
6449 set_jumps(common->anynewline, LABEL());
6450 check_anynewline(common);
6451 }
6452 if (common->hspace != NULL)
6453 {
6454 set_jumps(common->hspace, LABEL());
6455 check_hspace(common);
6456 }
6457 if (common->vspace != NULL)
6458 {
6459 set_jumps(common->vspace, LABEL());
6460 check_vspace(common);
6461 }
6462 if (common->casefulcmp != NULL)
6463 {
6464 set_jumps(common->casefulcmp, LABEL());
6465 do_casefulcmp(common);
6466 }
6467 if (common->caselesscmp != NULL)
6468 {
6469 set_jumps(common->caselesscmp, LABEL());
6470 do_caselesscmp(common);
6471 }
6472 #ifdef SUPPORT_UTF8
6473 if (common->utf8readchar != NULL)
6474 {
6475 set_jumps(common->utf8readchar, LABEL());
6476 do_utf8readchar(common);
6477 }
6478 if (common->utf8readtype8 != NULL)
6479 {
6480 set_jumps(common->utf8readtype8, LABEL());
6481 do_utf8readtype8(common);
6482 }
6483 #endif
6484 #ifdef SUPPORT_UCP
6485 if (common->getucd != NULL)
6486 {
6487 set_jumps(common->getucd, LABEL());
6488 do_getucd(common);
6489 }
6490 #endif
6491
6492 SLJIT_FREE(common->localptrs);
6493 executable_func = sljit_generate_code(compiler);
6494 sljit_free_compiler(compiler);
6495 if (executable_func == NULL)
6496 return;
6497
6498 function = SLJIT_MALLOC(sizeof(executable_function));
6499 if (function == NULL)
6500 {
6501 /* This case is highly unlikely since we just recently
6502 freed a lot of memory. Although not impossible. */
6503 sljit_free_code(executable_func);
6504 return;
6505 }
6506
6507 function->executable_func = executable_func;
6508 function->callback = NULL;
6509 function->userdata = NULL;
6510 extra->executable_jit = function;
6511 extra->flags |= PCRE_EXTRA_EXECUTABLE_JIT;
6512 }
6513
6514 static int jit_machine_stack_exec(jit_arguments *arguments, executable_function *function)
6515 {
6516 union {
6517 void* executable_func;
6518 jit_function call_executable_func;
6519 } convert_executable_func;
6520 pcre_uint8 local_area[LOCAL_SPACE_SIZE];
6521 struct sljit_stack local_stack;
6522
6523 local_stack.top = (sljit_w)&local_area;
6524 local_stack.base = local_stack.top;
6525 local_stack.limit = local_stack.base + LOCAL_SPACE_SIZE;
6526 local_stack.max_limit = local_stack.limit;
6527 arguments->stack = &local_stack;
6528 convert_executable_func.executable_func = function->executable_func;
6529 return convert_executable_func.call_executable_func(arguments);
6530 }
6531
6532 int
6533 PRIV(jit_exec)(const real_pcre *re, void *executable_func,
6534 const pcre_uchar *subject, int length, int start_offset, int options,
6535 int match_limit, int *offsets, int offsetcount)
6536 {
6537 executable_function *function = (executable_function*)executable_func;
6538 union {
6539 void* executable_func;
6540 jit_function call_executable_func;
6541 } convert_executable_func;
6542 jit_arguments arguments;
6543 int maxoffsetcount;
6544 int retval;
6545
6546 /* Sanity checks should be handled by pcre_exec. */
6547 arguments.stack = NULL;
6548 arguments.str = subject + start_offset;
6549 arguments.begin = subject;
6550 arguments.end = subject + length;
6551 arguments.calllimit = match_limit; /* JIT decreases this value less times. */
6552 arguments.notbol = (options & PCRE_NOTBOL) != 0;
6553 arguments.noteol = (options & PCRE_NOTEOL) != 0;
6554 arguments.notempty = (options & PCRE_NOTEMPTY) != 0;
6555 arguments.notempty_atstart = (options & PCRE_NOTEMPTY_ATSTART) != 0;
6556 arguments.offsets = offsets;
6557
6558 /* pcre_exec() rounds offsetcount to a multiple of 3, and then uses only 2/3 of
6559 the output vector for storing captured strings, with the remainder used as
6560 workspace. We don't need the workspace here. For compatibility, we limit the
6561 number of captured strings in the same way as pcre_exec(), so that the user
6562 gets the same result with and without JIT. */
6563
6564 offsetcount = ((offsetcount - (offsetcount % 3)) * 2)/3;
6565 maxoffsetcount = (re->top_bracket + 1) * 2;
6566 if (offsetcount > maxoffsetcount)
6567 offsetcount = maxoffsetcount;
6568 arguments.offsetcount = offsetcount;
6569
6570 if (function->callback)
6571 arguments.stack = (struct sljit_stack*)function->callback(function->userdata);
6572 else
6573 arguments.stack = (struct sljit_stack*)function->userdata;
6574
6575 if (arguments.stack == NULL)
6576 retval = jit_machine_stack_exec(&arguments, function);
6577 else
6578 {
6579 convert_executable_func.executable_func = function->executable_func;
6580 retval = convert_executable_func.call_executable_func(&arguments);
6581 }
6582
6583 if (retval * 2 > offsetcount)
6584 retval = 0;
6585 return retval;
6586 }
6587
6588 void
6589 PRIV(jit_free)(void *executable_func)
6590 {
6591 executable_function *function = (executable_function*)executable_func;
6592 sljit_free_code(function->executable_func);
6593 SLJIT_FREE(function);
6594 }
6595
6596 #ifdef COMPILE_PCRE8
6597 PCRE_EXP_DECL pcre_jit_stack *
6598 pcre_jit_stack_alloc(int startsize, int maxsize)
6599 #else
6600 PCRE_EXP_DECL pcre_jit_stack *
6601 pcre16_jit_stack_alloc(int startsize, int maxsize)
6602 #endif
6603 {
6604 if (startsize < 1 || maxsize < 1)
6605 return NULL;
6606 if (startsize > maxsize)
6607 startsize = maxsize;
6608 startsize = (startsize + STACK_GROWTH_RATE - 1) & ~(STACK_GROWTH_RATE - 1);
6609 maxsize = (maxsize + STACK_GROWTH_RATE - 1) & ~(STACK_GROWTH_RATE - 1);
6610 return (pcre_jit_stack*)sljit_allocate_stack(startsize, maxsize);
6611 }
6612
6613 #ifdef COMPILE_PCRE8
6614 PCRE_EXP_DECL void
6615 pcre_jit_stack_free(pcre_jit_stack *stack)
6616 #else
6617 PCRE_EXP_DECL void
6618 pcre16_jit_stack_free(pcre_jit_stack *stack)
6619 #endif
6620 {
6621 sljit_free_stack((struct sljit_stack*)stack);
6622 }
6623
6624 #ifdef COMPILE_PCRE8
6625 PCRE_EXP_DECL void
6626 pcre_assign_jit_stack(pcre_extra *extra, pcre_jit_callback callback, void *userdata)
6627 #else
6628 PCRE_EXP_DECL void
6629 pcre16_assign_jit_stack(pcre_extra *extra, pcre_jit_callback callback, void *userdata)
6630 #endif
6631 {
6632 executable_function *function;
6633 if (extra != NULL &&
6634 (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0 &&
6635 extra->executable_jit != NULL)
6636 {
6637 function = (executable_function*)extra->executable_jit;
6638 function->callback = callback;
6639 function->userdata = userdata;
6640 }
6641 }
6642
6643 #else /* SUPPORT_JIT */
6644
6645 /* These are dummy functions to avoid linking errors when JIT support is not
6646 being compiled. */
6647
6648 #ifdef COMPILE_PCRE8
6649 PCRE_EXP_DECL pcre_jit_stack *
6650 pcre_jit_stack_alloc(int startsize, int maxsize)
6651 #else
6652 PCRE_EXP_DECL pcre_jit_stack *
6653 pcre16_jit_stack_alloc(int startsize, int maxsize)
6654 #endif
6655 {
6656 (void)startsize;
6657 (void)maxsize;
6658 return NULL;
6659 }
6660
6661 #ifdef COMPILE_PCRE8
6662 PCRE_EXP_DECL void
6663 pcre_jit_stack_free(pcre_jit_stack *stack)
6664 #else
6665 PCRE_EXP_DECL void
6666 pcre16_jit_stack_free(pcre_jit_stack *stack)
6667 #endif
6668 {
6669 (void)stack;
6670 }
6671
6672 #ifdef COMPILE_PCRE8
6673 PCRE_EXP_DECL void
6674 pcre_assign_jit_stack(pcre_extra *extra, pcre_jit_callback callback, void *userdata)
6675 #else
6676 PCRE_EXP_DECL void
6677 pcre16_assign_jit_stack(pcre_extra *extra, pcre_jit_callback callback, void *userdata)
6678 #endif
6679 {
6680 (void)extra;
6681 (void)callback;
6682 (void)userdata;
6683 }
6684
6685 #endif
6686
6687 /* End of pcre_jit_compile.c */

  ViewVC Help
Powered by ViewVC 1.1.5